From c250d1924be18f2b5a426e48bb77bfa93ee6b57f Mon Sep 17 00:00:00 2001 From: Santiago Mola Date: Wed, 2 Jul 2025 08:36:10 +0200 Subject: [PATCH 1/6] Find PR base branch without GitHub API --- .gitlab/find-gh-base-ref.sh | 100 ++++++++++++------------------------ 1 file changed, 34 insertions(+), 66 deletions(-) diff --git a/.gitlab/find-gh-base-ref.sh b/.gitlab/find-gh-base-ref.sh index e007ab796a2..6d61efb51c1 100755 --- a/.gitlab/find-gh-base-ref.sh +++ b/.gitlab/find-gh-base-ref.sh @@ -2,6 +2,9 @@ # Determines the base branch for the current PR (if we are running in a PR). set -euo pipefail +# DEBUG: List all branches (to stderr) +git branch -a --sort=committerdate --format='%(refname:short)' >&2 + CURRENT_HEAD_SHA="$(git rev-parse HEAD)" if [[ -z "${CURRENT_HEAD_SHA:-}" ]]; then echo "Failed to determine current HEAD SHA" >&2 @@ -40,74 +43,39 @@ if [[ $(git log --pretty=oneline origin/master..HEAD | wc -l) -eq 1 ]]; then exit 0 fi -# In GitLab: we have no reference to the base branch or even the PR number. -# We have to find it from the current branch name, which is defined in -# CI_COMMIT_REF_NAME. -if [[ -z "${CI_COMMIT_REF_NAME}" ]]; then - echo "CI_COMMIT_REF_NAME is not set, not running in GitLab CI?" >&2 - exit 1 -fi - -# In GitLab, CI_PROJECT_NAME is set, otherwise, set it for testing. -export CI_PROJECT_NAME="${CI_PROJECT_NAME:-dd-trace-java}" +get_distance_from_merge_base() { + local candidate_base="$1" + merge_base_sha=$(git merge-base "$candidate_base" HEAD) + distance=$(git log --pretty=oneline "$merge_base_sha".."$CURRENT_HEAD_SHA" | wc -l) + echo "Distance from $candidate_base is $distance" >&2 + echo "$distance" +} -if [[ -z "${GITHUB_TOKEN:-}" ]]; then - echo "GITHUB_TOKEN is not set, fetching from AWS SSM" >&2 - if ! command -v aws >/dev/null 2>&1; then - echo "aws is not installed, please install it" >&2 - exit 1 +# Find the best base ref: the master/release branch whose merge base is closest to HEAD. +# If there are multiple candidates (e.g. immediately after a release branch is created), we cannot +# disambiguate and return an error. +# NOTE: GitHub API is more robust for this task, but we hit rate limits. +BEST_CANDIDATES=(origin/master) +BEST_DISTANCE=$(get_distance_from_merge_base origin/master) +mapfile -t CANDIDATE_BASES < <(git branch -a --sort=committerdate --format='%(refname:short)' --list 'origin/release/v*' | tac) +for candidate_base in "${CANDIDATE_BASES[@]}"; do + distance=$(get_distance_from_merge_base "$candidate_base") + if [[ $distance -lt $BEST_DISTANCE ]]; then + BEST_DISTANCE=$distance + BEST_CANDIDATES=("$candidate_base") + elif [[ $distance -eq $BEST_DISTANCE ]]; then + BEST_CANDIDATES+=("$candidate_base") fi - set +e - GITHUB_TOKEN=$(aws ssm get-parameter --name "ci.$CI_PROJECT_NAME.gh_release_token" --with-decryption --query "Parameter.Value" --output text) - set -e - if [[ -z "${GITHUB_TOKEN:-}" ]]; then - echo "Failed to fetch GITHUB_TOKEN from AWS SSM" >&2 - exit 1 - fi - export GITHUB_TOKEN -fi - -if ! command -v curl >/dev/null 2>&1; then - echo "curl is not installed, please install it" >&2 - exit 1 -fi +done -if ! command -v jq >/dev/null 2>&1; then - echo "jq is not installed, please install it" >&2 - exit 1 +if [[ ${#BEST_CANDIDATES[@]} -eq 1 ]]; then + # Remote the origin/ prefix + base_ref="${BEST_CANDIDATES[0]#origin/}" + echo "Base ref is ${base_ref}" >&2 + save_cache "${base_ref}" "$CURRENT_HEAD_SHA" + echo "${base_ref}" + exit 0 fi -while true; do - set +e - PR_DATA=$(curl \ - -XGET \ - --silent \ - --include \ - --fail-with-body \ - -H 'Accept: application/vnd.github+json' \ - -H "Authorization: Bearer ${GITHUB_TOKEN}" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - "https://api.github.com/repos/datadog/dd-trace-java/pulls?head=DataDog:${CI_COMMIT_REF_NAME}&sort=updated&direction=desc") - exit_code=$? - set -e - if [[ ${exit_code} -eq 0 ]]; then - PR_NUMBER=$(echo "$PR_DATA" | sed '1,/^[[:space:]]*$/d' | jq -r '.[].number') - PR_BASE_REF=$(echo "$PR_DATA" | sed '1,/^[[:space:]]*$/d' | jq -r '.[].base.ref') - if [[ -n "${PR_BASE_REF:-}" ]]; then - echo "PR is https://github.com/datadog/dd-trace-java/pull/${PR_NUMBER} and base ref is ${PR_BASE_REF}">&2 - save_cache "${PR_BASE_REF}" "$CURRENT_HEAD_SHA" - echo "${PR_BASE_REF}" - exit 0 - fi - fi - if echo "$PR_DATA" | grep -q "^x-ratelimit-reset:"; then - reset_timestamp=$(echo -n "$PR_DATA" | grep "^x-ratelimit-reset:" | sed -e 's/^x-ratelimit-reset: //' -e 's/\r//') - now=$(date +%s) - sleep_time=$((reset_timestamp - now + 1)) - echo "GitHub rate limit exceeded, sleeping for ${sleep_time} seconds" >&2 - sleep "${sleep_time}" - continue - fi - echo -e "GitHub request failed for an unknown reason:\n$(echo "$PR_DATA" | sed '/^$/q')" >&2 - exit 1 -done +echo "Base ref is ambiguous, candidates are: ${BEST_CANDIDATES[*]}" >&2 +exit 1 From d405fdff1c465ffa645d97ce68a5447bdbde5517 Mon Sep 17 00:00:00 2001 From: Santiago Mola Date: Wed, 2 Jul 2025 09:15:52 +0200 Subject: [PATCH 2/6] Enable git-based skipping again --- .gitlab-ci.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c01b08f7bcb..aadc0a593c5 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -119,8 +119,7 @@ default: .gitlab_base_ref_params: &gitlab_base_ref_params - | - # FIXME: Disabled until we find a way to not hit GitHub API rate limit - if false && [[ ! $CI_COMMIT_BRANCH =~ ^(master|release/.*)$ ]]; then + if [[ ! $CI_COMMIT_BRANCH =~ ^(master|release/.*)$ ]]; then export GIT_BASE_REF=$(.gitlab/find-gh-base-ref.sh) if [[ -n "$GIT_BASE_REF" ]]; then export GRADLE_PARAMS="$GRADLE_PARAMS -PgitBaseRef=origin/$GIT_BASE_REF" From 3c91d4d2a983664a7ccc4a44ee3ad6aa1a1a42a0 Mon Sep 17 00:00:00 2001 From: Santiago Mola Date: Wed, 2 Jul 2025 09:38:37 +0200 Subject: [PATCH 3/6] Clarifying comments --- .gitlab/find-gh-base-ref.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.gitlab/find-gh-base-ref.sh b/.gitlab/find-gh-base-ref.sh index 6d61efb51c1..7bb6f9dd0ca 100755 --- a/.gitlab/find-gh-base-ref.sh +++ b/.gitlab/find-gh-base-ref.sh @@ -2,9 +2,6 @@ # Determines the base branch for the current PR (if we are running in a PR). set -euo pipefail -# DEBUG: List all branches (to stderr) -git branch -a --sort=committerdate --format='%(refname:short)' >&2 - CURRENT_HEAD_SHA="$(git rev-parse HEAD)" if [[ -z "${CURRENT_HEAD_SHA:-}" ]]; then echo "Failed to determine current HEAD SHA" >&2 @@ -77,5 +74,10 @@ if [[ ${#BEST_CANDIDATES[@]} -eq 1 ]]; then exit 0 fi +# If base ref is ambiguous, we cannot determine the correct one. +# Example: a release branch is created, and a PR is opened starting from the +# commit where the release branch was created. The distance to the merge base +# for both master and the release branch is the same. In this case, we bail +# out, and make no assumption on which is the correct base ref. echo "Base ref is ambiguous, candidates are: ${BEST_CANDIDATES[*]}" >&2 exit 1 From b958dbc62585cec7a4e03eeae208da3c2bca7ddc Mon Sep 17 00:00:00 2001 From: Santiago Mola Date: Wed, 9 Jul 2025 09:43:34 +0200 Subject: [PATCH 4/6] Use rev-list instead if log --- .gitlab/find-gh-base-ref.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitlab/find-gh-base-ref.sh b/.gitlab/find-gh-base-ref.sh index 7bb6f9dd0ca..f6a9fa9c7ee 100755 --- a/.gitlab/find-gh-base-ref.sh +++ b/.gitlab/find-gh-base-ref.sh @@ -33,7 +33,7 @@ if [[ -f $CACHE_PATH ]]; then fi # Happy path: if we're just one commit away from master, base ref is master. -if [[ $(git log --pretty=oneline origin/master..HEAD | wc -l) -eq 1 ]]; then +if [[ $(git rev-list --count origin/master..HEAD) -eq 1 ]]; then echo "We are just one commit away from master, base ref is master" >&2 save_cache "master" "$CURRENT_HEAD_SHA" echo "master" @@ -43,7 +43,7 @@ fi get_distance_from_merge_base() { local candidate_base="$1" merge_base_sha=$(git merge-base "$candidate_base" HEAD) - distance=$(git log --pretty=oneline "$merge_base_sha".."$CURRENT_HEAD_SHA" | wc -l) + distance=$(git rev-list --count "$merge_base_sha".."$CURRENT_HEAD_SHA") echo "Distance from $candidate_base is $distance" >&2 echo "$distance" } From a4c9d0bfacff8fc3accd18aecccc024b8177b1eb Mon Sep 17 00:00:00 2001 From: Santiago Mola Date: Wed, 9 Jul 2025 09:45:04 +0200 Subject: [PATCH 5/6] Use locals --- .gitlab/find-gh-base-ref.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitlab/find-gh-base-ref.sh b/.gitlab/find-gh-base-ref.sh index f6a9fa9c7ee..69e43181018 100755 --- a/.gitlab/find-gh-base-ref.sh +++ b/.gitlab/find-gh-base-ref.sh @@ -42,6 +42,8 @@ fi get_distance_from_merge_base() { local candidate_base="$1" + local merge_base_sha + local distance merge_base_sha=$(git merge-base "$candidate_base" HEAD) distance=$(git rev-list --count "$merge_base_sha".."$CURRENT_HEAD_SHA") echo "Distance from $candidate_base is $distance" >&2 From 0ca758d26b00e7b9f464a185059d6a5bf4b071dd Mon Sep 17 00:00:00 2001 From: Santiago Mola Date: Wed, 9 Jul 2025 10:08:48 +0200 Subject: [PATCH 6/6] Support project branches, move further logic within find-gh-base-ref.sh --- .gitlab-ci.yml | 12 +++++------- .gitlab/find-gh-base-ref.sh | 22 +++++++++++++++++++++- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index aadc0a593c5..27840d96a49 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -119,13 +119,11 @@ default: .gitlab_base_ref_params: &gitlab_base_ref_params - | - if [[ ! $CI_COMMIT_BRANCH =~ ^(master|release/.*)$ ]]; then - export GIT_BASE_REF=$(.gitlab/find-gh-base-ref.sh) - if [[ -n "$GIT_BASE_REF" ]]; then - export GRADLE_PARAMS="$GRADLE_PARAMS -PgitBaseRef=origin/$GIT_BASE_REF" - else - echo "Failed to find base ref for PR" >&2 - fi + export GIT_BASE_REF=$(.gitlab/find-gh-base-ref.sh) + if [[ -n "$GIT_BASE_REF" ]]; then + export GRADLE_PARAMS="$GRADLE_PARAMS -PgitBaseRef=origin/$GIT_BASE_REF" + else + echo "Failed to find base ref for PR" >&2 fi .gradle_build: &gradle_build diff --git a/.gitlab/find-gh-base-ref.sh b/.gitlab/find-gh-base-ref.sh index 69e43181018..fe85ac0e4aa 100755 --- a/.gitlab/find-gh-base-ref.sh +++ b/.gitlab/find-gh-base-ref.sh @@ -2,6 +2,18 @@ # Determines the base branch for the current PR (if we are running in a PR). set -euo pipefail +if [[ -n "${CI_COMMIT_BRANCH:-}" ]]; then + echo "CI_COMMIT_BRANCH is set to $CI_COMMIT_BRANCH" >&2 +else + echo "CI_COMMIT_BRANCH is not set, skipping base ref detection" >&2 + exit 1 +fi + +if [[ $CI_COMMIT_BRANCH =~ ^(master|release/.*)$ ]]; then + echo "CI_COMMIT_BRANCH is a master or release branch, skipping base ref detection" >&2 + exit 1 +fi + CURRENT_HEAD_SHA="$(git rev-parse HEAD)" if [[ -z "${CURRENT_HEAD_SHA:-}" ]]; then echo "Failed to determine current HEAD SHA" >&2 @@ -56,7 +68,15 @@ get_distance_from_merge_base() { # NOTE: GitHub API is more robust for this task, but we hit rate limits. BEST_CANDIDATES=(origin/master) BEST_DISTANCE=$(get_distance_from_merge_base origin/master) -mapfile -t CANDIDATE_BASES < <(git branch -a --sort=committerdate --format='%(refname:short)' --list 'origin/release/v*' | tac) + +# If the current branch is not a project/ branch, project/ branches are candidates. +# This accounts for the case when the project/ branch is being merged to master. +if [[ ! "$CI_COMMIT_BRANCH" =~ ^project/.*$ ]]; then + mapfile -t CANDIDATE_BASES < <(git branch -a --sort=committerdate --format='%(refname:short)' --list 'origin/release/v*' --list 'origin/project/*' | tac) +else + mapfile -t CANDIDATE_BASES < <(git branch -a --sort=committerdate --format='%(refname:short)' --list 'origin/release/v*' | tac) +fi + for candidate_base in "${CANDIDATE_BASES[@]}"; do distance=$(get_distance_from_merge_base "$candidate_base") if [[ $distance -lt $BEST_DISTANCE ]]; then