@@ -153,3 +153,122 @@ jobs:
153153 if : always()
154154 run : |
155155 helm uninstall "$RELEASE_NAME" || true
156+
157+ observability-tests :
158+ name : Observability Tests
159+ if : github.event.pull_request.head.repo.full_name == github.repository
160+ permissions :
161+ contents : ' read'
162+ id-token : ' write'
163+ needs : integration
164+ runs-on : ubuntu-latest
165+ steps :
166+ - uses : actions/checkout@v5
167+
168+ - name : Start K3s cluster
169+ uses : jupyterhub/action-k3s-helm@v4
170+ with :
171+ k3s-channel : latest
172+ helm-version : ${{ env.HELM_VERSION }}
173+ metrics-enabled : false
174+ docker-enabled : true
175+
176+ - name : Set release name
177+ run : echo "RELEASE_NAME=eoapi-$(echo "${{ github.sha }}" | cut -c1-8)" >> "$GITHUB_ENV"
178+
179+ - name : Wait for K3s to be fully ready
180+ run : |
181+ echo "=== Waiting for K3s to be fully ready ==="
182+ kubectl wait --for=condition=Ready pod -l k8s-app=kube-dns -n kube-system --timeout=300s
183+ kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=traefik -n kube-system --timeout=300s
184+ kubectl get nodes
185+ kubectl get pods --all-namespaces
186+ sleep 10
187+ echo "✅ K3s is ready"
188+
189+ - name : Deploy eoAPI with monitoring
190+ run : |
191+ echo "=== Deploying eoAPI with monitoring stack ==="
192+ export RELEASE_NAME="$RELEASE_NAME"
193+ export PGO_VERSION="${{ env.PGO_VERSION }}"
194+ export GITHUB_SHA="${{ github.sha }}"
195+ ./scripts/deploy.sh --ci
196+
197+ # Enable monitoring components
198+ helm upgrade "$RELEASE_NAME" ./charts/eoapi \
199+ --set monitoring.prometheus.enabled=true \
200+ --set monitoring.prometheusAdapter.enabled=true \
201+ --set monitoring.kube-state-metrics.enabled=true \
202+ --set monitoring.prometheus-node-exporter.enabled=true \
203+ --set observability.grafana.enabled=true \
204+ --set stac.autoscaling.enabled=true \
205+ --set raster.autoscaling.enabled=true \
206+ --set vector.autoscaling.enabled=true \
207+ --namespace eoapi \
208+ --wait --timeout=10m
209+
210+ - name : Wait for monitoring stack to be ready
211+ run : |
212+ echo "=== Waiting for monitoring components ==="
213+
214+ # Wait for Prometheus
215+ kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=prometheus -n eoapi --timeout=300s || echo "Prometheus not ready"
216+
217+ # Wait for Grafana
218+ kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=grafana -n eoapi --timeout=300s || echo "Grafana not ready"
219+
220+ # Wait for prometheus-adapter
221+ kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=prometheus-adapter -n eoapi --timeout=300s || echo "prometheus-adapter not ready"
222+
223+ # Wait for HPA to be created
224+ sleep 30
225+
226+ echo "=== Final monitoring stack status ==="
227+ kubectl get pods -n eoapi -l 'app.kubernetes.io/component in (server,grafana,prometheus-adapter)' || true
228+ kubectl get hpa -n eoapi || true
229+
230+ - name : Run observability tests
231+ run : |
232+ echo "=== Running observability test suite ==="
233+ export RELEASE_NAME="$RELEASE_NAME"
234+ export NAMESPACE="eoapi"
235+
236+ # Install python dependencies for testing
237+ python -m pip install --upgrade pip
238+ pip install pytest requests
239+
240+ # Run observability tests
241+ python -m pytest .github/workflows/tests/test_observability.py -v --tb=short
242+
243+ # Run autoscaling tests
244+ python -m pytest .github/workflows/tests/test_autoscaling.py -v --tb=short -m "not slow"
245+
246+ - name : Debug observability stack on failure
247+ if : failure()
248+ run : |
249+ echo "=== Observability Debug Information ==="
250+
251+ echo "=== Monitoring Pods Status ==="
252+ kubectl get pods -n eoapi -l 'app.kubernetes.io/name in (prometheus,grafana,prometheus-adapter)' -o wide || true
253+
254+ echo "=== HPA Status ==="
255+ kubectl get hpa -n eoapi -o wide || true
256+ kubectl describe hpa -n eoapi || true
257+
258+ echo "=== Custom Metrics API ==="
259+ kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1" || true
260+
261+ echo "=== Pod Metrics ==="
262+ kubectl top pods -n eoapi || true
263+
264+ echo "=== Recent Events ==="
265+ kubectl get events -n eoapi --sort-by='.lastTimestamp' | tail -20 || true
266+
267+ echo "=== Component Logs ==="
268+ kubectl logs -l app.kubernetes.io/name=prometheus-adapter -n eoapi --tail=50 || true
269+ kubectl logs -l app.kubernetes.io/name=grafana -n eoapi --tail=30 || true
270+
271+ - name : Cleanup observability test
272+ if : always()
273+ run : |
274+ helm uninstall "$RELEASE_NAME" || true
0 commit comments