From 7e5663e2cff34ecd73c115a763af471884991d42 Mon Sep 17 00:00:00 2001 From: tellet-q Date: Thu, 16 May 2024 16:06:48 +0200 Subject: [PATCH 1/3] add runner resources monitoring --- .github/workflows/manual-benchmark.yaml | 27 +++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/.github/workflows/manual-benchmark.yaml b/.github/workflows/manual-benchmark.yaml index cc708a9c..f3b2a853 100644 --- a/.github/workflows/manual-benchmark.yaml +++ b/.github/workflows/manual-benchmark.yaml @@ -20,6 +20,23 @@ jobs: - uses: webfactory/ssh-agent@v0.8.0 with: ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }} + - name: Setup monitoring script + run: | + echo '#!/bin/bash' > monitor.sh + echo 'while true; do' >> monitor.sh + echo ' echo "CPU usage at $(date):" >> system_resources.log' >> monitor.sh + echo ' top -b -n 1 | head -n 20 >> system_resources.log' >> monitor.sh + echo ' echo "" >> system_resources.log' >> monitor.sh + echo ' echo "Memory usage at $(date):" >> system_resources.log' >> monitor.sh + echo ' free -h >> system_resources.log' >> monitor.sh + echo ' echo "" >> system_resources.log' >> monitor.sh + echo ' echo "Disk usage at $(date):" >> system_resources.log' >> monitor.sh + echo ' df -h >> system_resources.log' >> monitor.sh + echo ' sleep 10' >> monitor.sh + echo 'done' >> monitor.sh + chmod +x monitor.sh + - name: Start monitoring in background + run: ./monitor.sh & - name: Benches run: | export HCLOUD_TOKEN=${{ secrets.HCLOUD_TOKEN }} @@ -30,3 +47,13 @@ jobs: export POSTGRES_TABLE=benchmark_manual bash -x tools/setup_ci.sh bash -x tools/run_ci.sh + - name: Stop monitoring + run: | + pkill -f monitor.sh + - name: Display resource log + run: cat system_resources.log + - name: Upload system resource log + uses: actions/upload-artifact@v2 + with: + name: system-resources-log + path: system_resources.log From 07c5661adfcc9edd958caf5633bc47b0ee96c5e6 Mon Sep 17 00:00:00 2001 From: tellet-q Date: Fri, 17 May 2024 10:20:48 +0200 Subject: [PATCH 2/3] Revert "add runner resources monitoring" This reverts commit 7e5663e2cff34ecd73c115a763af471884991d42. --- .github/workflows/manual-benchmark.yaml | 27 ------------------------- 1 file changed, 27 deletions(-) diff --git a/.github/workflows/manual-benchmark.yaml b/.github/workflows/manual-benchmark.yaml index f3b2a853..cc708a9c 100644 --- a/.github/workflows/manual-benchmark.yaml +++ b/.github/workflows/manual-benchmark.yaml @@ -20,23 +20,6 @@ jobs: - uses: webfactory/ssh-agent@v0.8.0 with: ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }} - - name: Setup monitoring script - run: | - echo '#!/bin/bash' > monitor.sh - echo 'while true; do' >> monitor.sh - echo ' echo "CPU usage at $(date):" >> system_resources.log' >> monitor.sh - echo ' top -b -n 1 | head -n 20 >> system_resources.log' >> monitor.sh - echo ' echo "" >> system_resources.log' >> monitor.sh - echo ' echo "Memory usage at $(date):" >> system_resources.log' >> monitor.sh - echo ' free -h >> system_resources.log' >> monitor.sh - echo ' echo "" >> system_resources.log' >> monitor.sh - echo ' echo "Disk usage at $(date):" >> system_resources.log' >> monitor.sh - echo ' df -h >> system_resources.log' >> monitor.sh - echo ' sleep 10' >> monitor.sh - echo 'done' >> monitor.sh - chmod +x monitor.sh - - name: Start monitoring in background - run: ./monitor.sh & - name: Benches run: | export HCLOUD_TOKEN=${{ secrets.HCLOUD_TOKEN }} @@ -47,13 +30,3 @@ jobs: export POSTGRES_TABLE=benchmark_manual bash -x tools/setup_ci.sh bash -x tools/run_ci.sh - - name: Stop monitoring - run: | - pkill -f monitor.sh - - name: Display resource log - run: cat system_resources.log - - name: Upload system resource log - uses: actions/upload-artifact@v2 - with: - name: system-resources-log - path: system_resources.log From 3678fe6bf965c74c85bacd851d5181332c84889d Mon Sep 17 00:00:00 2001 From: tellet-q Date: Fri, 17 May 2024 10:50:25 +0200 Subject: [PATCH 3/3] add ServerAliveInterval and ServerAliveCountMax --- tools/run_client_script.sh | 2 +- tools/run_server_container.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/run_client_script.sh b/tools/run_client_script.sh index da082ee3..783ae793 100644 --- a/tools/run_client_script.sh +++ b/tools/run_client_script.sh @@ -23,7 +23,7 @@ PRIVATE_IP_OF_THE_SERVER=$(bash "${SCRIPT_PATH}/${CLOUD_NAME}/get_private_ip.sh" RUN_EXPERIMENT="ENGINE_NAME=${ENGINE_NAME} DATASETS=${DATASETS} PRIVATE_IP_OF_THE_SERVER=${PRIVATE_IP_OF_THE_SERVER} bash ~/run_experiment.sh" -ssh -tt "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}" "${RUN_EXPERIMENT}" +ssh -tt -o ServerAliveInterval=60 -o ServerAliveCountMax=3 "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}" "${RUN_EXPERIMENT}" SEARCH_RESULT_FILE=$(ssh "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}" "ls -t results/*-search-*.json | head -n 1") UPLOAD_RESULT_FILE=$(ssh "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}" "ls -t results/*-upload-*.json | head -n 1") diff --git a/tools/run_server_container.sh b/tools/run_server_container.sh index 4875e83e..d04f1199 100644 --- a/tools/run_server_container.sh +++ b/tools/run_server_container.sh @@ -34,7 +34,7 @@ if [[ ${QDRANT_VERSION} == docker/* ]] || [[ ${QDRANT_VERSION} == ghcr/* ]]; the fi DOCKER_COMPOSE="export QDRANT_VERSION=${QDRANT_VERSION}; export CONTAINER_REGISTRY=${CONTAINER_REGISTRY}; docker compose down; pkill qdrant ; docker rmi ${CONTAINER_REGISTRY}/qdrant/qdrant:${QDRANT_VERSION} || true ; docker compose up -d; docker container ls" - ssh -t "${SERVER_USERNAME}@${IP_OF_THE_SERVER}" "cd ./projects/vector-db-benchmark/engine/servers/${CONTAINER_NAME} ; $DOCKER_COMPOSE" + ssh -t -o ServerAliveInterval=60 -o ServerAliveCountMax=3 "${SERVER_USERNAME}@${IP_OF_THE_SERVER}" "cd ./projects/vector-db-benchmark/engine/servers/${CONTAINER_NAME} ; $DOCKER_COMPOSE" else echo "Error: unknown version ${QDRANT_VERSION}. Version name should start with 'docker/' or 'ghcr/'" exit 1