From 85a1d814a7e6cba15de59505c82bf96798807eaf Mon Sep 17 00:00:00 2001 From: Jerop Kipruto Date: Wed, 2 Jul 2025 22:45:34 -0400 Subject: [PATCH] refactor(ci): improve pr triage (#3082) --- .github/scripts/pr-triage.sh | 163 ++++++++++++++++++ .../workflows/gemini-automated-pr-triage.yml | 84 --------- .../workflows/gemini-scheduled-pr-triage.yml | 72 ++------ 3 files changed, 173 insertions(+), 146 deletions(-) create mode 100755 .github/scripts/pr-triage.sh delete mode 100644 .github/workflows/gemini-automated-pr-triage.yml diff --git a/.github/scripts/pr-triage.sh b/.github/scripts/pr-triage.sh new file mode 100755 index 00000000..be86e393 --- /dev/null +++ b/.github/scripts/pr-triage.sh @@ -0,0 +1,163 @@ +#!/bin/bash +set -euo pipefail + +# Initialize a comma-separated string to hold PR numbers that need a comment +PRS_NEEDING_COMMENT="" + +# Function to process a single PR +process_pr() { + local PR_NUMBER=$1 + echo "🔄 Processing PR #$PR_NUMBER" + + # Get PR body with error handling + local PR_BODY + if ! PR_BODY=$(gh pr view "$PR_NUMBER" --repo "$GITHUB_REPOSITORY" --json body -q .body 2>/dev/null); then + echo " ⚠️ Could not fetch PR #$PR_NUMBER details" + return 1 + fi + + # Look for issue references using multiple patterns + local ISSUE_NUMBER="" + + # Pattern 1: Direct reference like #123 + if [ -z "$ISSUE_NUMBER" ]; then + ISSUE_NUMBER=$(echo "$PR_BODY" | grep -oE '#[0-9]+' | head -1 | sed 's/#//' 2>/dev/null || echo "") + fi + + # Pattern 2: Closes/Fixes/Resolves patterns (case insensitive) + if [ -z "$ISSUE_NUMBER" ]; then + ISSUE_NUMBER=$(echo "$PR_BODY" | grep -iE '(closes?|fixes?|resolves?) #[0-9]+' | grep -oE '#[0-9]+' | head -1 | sed 's/#//' 2>/dev/null || echo "") + fi + + if [ -z "$ISSUE_NUMBER" ]; then + echo "⚠️ No linked issue found for PR #$PR_NUMBER, adding status/need-issue label" + if ! gh pr edit "$PR_NUMBER" --repo "$GITHUB_REPOSITORY" --add-label "status/need-issue" 2>/dev/null; then + echo " ⚠️ Failed to add label (may already exist or have permission issues)" + fi + # Add PR number to the list + if [ -z "$PRS_NEEDING_COMMENT" ]; then + PRS_NEEDING_COMMENT="$PR_NUMBER" + else + PRS_NEEDING_COMMENT="$PRS_NEEDING_COMMENT,$PR_NUMBER" + fi + echo "needs_comment=true" >> $GITHUB_OUTPUT + else + echo "🔗 Found linked issue #$ISSUE_NUMBER" + + # Remove status/need-issue label if present + if ! gh pr edit "$PR_NUMBER" --repo "$GITHUB_REPOSITORY" --remove-label "status/need-issue" 2>/dev/null; then + echo " status/need-issue label not present or could not be removed" + fi + + # Get issue labels + echo "📥 Fetching labels from issue #$ISSUE_NUMBER" + local ISSUE_LABELS="" + if ! ISSUE_LABELS=$(gh issue view "$ISSUE_NUMBER" --repo "$GITHUB_REPOSITORY" --json labels -q '.labels[].name' 2>/dev/null | tr '\n' ',' | sed 's/,$//' || echo ""); then + echo " ⚠️ Could not fetch issue #$ISSUE_NUMBER (may not exist or be in different repo)" + ISSUE_LABELS="" + fi + + # Get PR labels + echo "📥 Fetching labels from PR #$PR_NUMBER" + local PR_LABELS="" + if ! PR_LABELS=$(gh pr view "$PR_NUMBER" --repo "$GITHUB_REPOSITORY" --json labels -q '.labels[].name' 2>/dev/null | tr '\n' ',' | sed 's/,$//' || echo ""); then + echo " ⚠️ Could not fetch PR labels" + PR_LABELS="" + fi + + echo " Issue labels: $ISSUE_LABELS" + echo " PR labels: $PR_LABELS" + + # Convert comma-separated strings to arrays + local ISSUE_LABEL_ARRAY PR_LABEL_ARRAY + IFS=',' read -ra ISSUE_LABEL_ARRAY <<< "$ISSUE_LABELS" + IFS=',' read -ra PR_LABEL_ARRAY <<< "$PR_LABELS" + + # Find labels to add (on issue but not on PR) + local LABELS_TO_ADD="" + for label in "${ISSUE_LABEL_ARRAY[@]}"; do + if [ -n "$label" ] && [[ ! " ${PR_LABEL_ARRAY[*]} " =~ " ${label} " ]]; then + if [ -z "$LABELS_TO_ADD" ]; then + LABELS_TO_ADD="$label" + else + LABELS_TO_ADD="$LABELS_TO_ADD,$label" + fi + fi + done + + # Find labels to remove (on PR but not on issue) + local LABELS_TO_REMOVE="" + for label in "${PR_LABEL_ARRAY[@]}"; do + if [ -n "$label" ] && [[ ! " ${ISSUE_LABEL_ARRAY[*]} " =~ " ${label} " ]]; then + # Don't remove status/need-issue since we already handled it + if [ "$label" != "status/need-issue" ]; then + if [ -z "$LABELS_TO_REMOVE" ]; then + LABELS_TO_REMOVE="$label" + else + LABELS_TO_REMOVE="$LABELS_TO_REMOVE,$label" + fi + fi + fi + done + + # Apply label changes + if [ -n "$LABELS_TO_ADD" ]; then + echo "➕ Adding labels: $LABELS_TO_ADD" + if ! gh pr edit "$PR_NUMBER" --repo "$GITHUB_REPOSITORY" --add-label "$LABELS_TO_ADD" 2>/dev/null; then + echo " ⚠️ Failed to add some labels" + fi + fi + + if [ -n "$LABELS_TO_REMOVE" ]; then + echo "➖ Removing labels: $LABELS_TO_REMOVE" + if ! gh pr edit "$PR_NUMBER" --repo "$GITHUB_REPOSITORY" --remove-label "$LABELS_TO_REMOVE" 2>/dev/null; then + echo " ⚠️ Failed to remove some labels" + fi + fi + + if [ -z "$LABELS_TO_ADD" ] && [ -z "$LABELS_TO_REMOVE" ]; then + echo "✅ Labels already synchronized" + fi + echo "needs_comment=false" >> $GITHUB_OUTPUT + fi +} + +# If PR_NUMBER is set, process only that PR +if [ -n "${PR_NUMBER:-}" ]; then + if ! process_pr "$PR_NUMBER"; then + echo "❌ Failed to process PR #$PR_NUMBER" + exit 1 + fi +else + # Otherwise, get all open PRs and process them + # The script logic will determine which ones need issue linking or label sync + echo "📥 Getting all open pull requests..." + if ! PR_NUMBERS=$(gh pr list --repo "$GITHUB_REPOSITORY" --state open --limit 1000 --json number -q '.[].number' 2>/dev/null); then + echo "❌ Failed to fetch PR list" + exit 1 + fi + + if [ -z "$PR_NUMBERS" ]; then + echo "✅ No open PRs found" + else + # Count the number of PRs + PR_COUNT=$(echo "$PR_NUMBERS" | wc -w | tr -d ' ') + echo "📊 Found $PR_COUNT open PRs to process" + + for pr_number in $PR_NUMBERS; do + if ! process_pr "$pr_number"; then + echo "⚠️ Failed to process PR #$pr_number, continuing with next PR..." + continue + fi + done + fi +fi + +# Ensure output is always set, even if empty +if [ -z "$PRS_NEEDING_COMMENT" ]; then + echo "prs_needing_comment=[]" >> $GITHUB_OUTPUT +else + echo "prs_needing_comment=[$PRS_NEEDING_COMMENT]" >> $GITHUB_OUTPUT +fi + +echo "✅ PR triage completed" \ No newline at end of file diff --git a/.github/workflows/gemini-automated-pr-triage.yml b/.github/workflows/gemini-automated-pr-triage.yml deleted file mode 100644 index f9697125..00000000 --- a/.github/workflows/gemini-automated-pr-triage.yml +++ /dev/null @@ -1,84 +0,0 @@ -name: Gemini Automated PR Triage 🛳️ - -on: - pull_request: - types: [opened, edited, synchronize, reopened] - -jobs: - triage-pr: - timeout-minutes: 5 - permissions: - contents: read - id-token: write - issues: write - pull-requests: write - concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number }} - cancel-in-progress: true - runs-on: ubuntu-latest - steps: - - name: Generate GitHub App Token - id: generate_token - uses: actions/create-github-app-token@v1 - with: - app-id: ${{ secrets.APP_ID }} - private-key: ${{ secrets.PRIVATE_KEY }} - - - name: Run Gemini PR Triage - uses: google-gemini/gemini-cli-action@41c0f1b3cbd1a0b284251bd1aac034edd07a3a2f - env: - GITHUB_TOKEN: ${{ steps.generate_token.outputs.token }} - with: - version: 0.1.8-rc.0 - GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} - OTLP_GCP_WIF_PROVIDER: ${{ secrets.OTLP_GCP_WIF_PROVIDER }} - OTLP_GCP_SERVICE_ACCOUNT: ${{ secrets.OTLP_GCP_SERVICE_ACCOUNT }} - OTLP_GOOGLE_CLOUD_PROJECT: ${{ secrets.OTLP_GOOGLE_CLOUD_PROJECT }} - settings_json: | - { - "coreTools": [ - "run_shell_command(gh pr list)", - "run_shell_command(gh pr view)", - "run_shell_command(gh pr edit)", - "run_shell_command(gh issue view)" - ] - } - prompt: | - You are a PR triage assistant. Your task is to ensure every pull request is linked to an issue and to perform a full synchronization of labels from the issue to the PR. - - The current pull request is #${{ github.event.pull_request.number }} in the repository ${{ github.repository }}. - - Follow these steps: - 1. **Check for a linked issue in the PR body.** - `gh pr view ${{ github.event.pull_request.number }} --repo ${{ github.repository }} --json body -q .body` - - 2. **Scan the body for an issue reference** (e.g., `#123`, `Closes #456`). - - 3. **Process the result:** - - If **no issue reference is found**, add the `status/need-issue` label to the PR: - `gh pr edit ${{ github.event.pull_request.number }} --repo ${{ github.repository }} --add-label "status/need-issue"` - - - If **an issue reference is found**, perform a full label synchronization: - a. **Remove the `status/need-issue` label** (if present): - `gh pr edit ${{ github.event.pull_request.number }} --repo ${{ github.repository }} --remove-label "status/need-issue"` - - b. **Extract the issue number** from the reference. - - c. **Fetch all labels for the issue** and store them. - `gh issue view --repo ${{ github.repository }} --json labels` - - d. **Fetch all labels for the PR** and store them. - `gh pr view ${{ github.event.pull_request.number }} --repo ${{ github.repository }} --json labels` - - e. **Compare the two sets of labels.** - - Identify labels to **add**: those on the issue but not on the PR. - - Identify labels to **remove**: those on the PR but not on the issue. - - f. **Execute the synchronization.** - - If there are labels to add, run: `gh pr edit ${{ github.event.pull_request.number }} --repo ${{ github.repository }} --add-label "label1,label2,..."` - - If there are labels to remove, run: `gh pr edit ${{ github.event.pull_request.number }} --repo ${{ github.repository }} --remove-label "label3,label4,..."` - - **Strictly follow these rules:** - - Only use the `gh` commands provided. - - Do not add any comments to the PR. - - The final set of labels on the PR must exactly match the labels on the linked issue. diff --git a/.github/workflows/gemini-scheduled-pr-triage.yml b/.github/workflows/gemini-scheduled-pr-triage.yml index a4c5a239..90d87815 100644 --- a/.github/workflows/gemini-scheduled-pr-triage.yml +++ b/.github/workflows/gemini-scheduled-pr-triage.yml @@ -2,7 +2,7 @@ name: Gemini Scheduled PR Triage 🚀 on: schedule: - - cron: '0 * * * *' # Runs at the beginning of every hour + - cron: '*/15 * * * *' # Runs every 15 minutes workflow_dispatch: {} jobs: @@ -14,7 +14,12 @@ jobs: issues: write pull-requests: write runs-on: ubuntu-latest + outputs: + prs_needing_comment: ${{ steps.run_triage.outputs.prs_needing_comment }} steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Generate GitHub App Token id: generate_token uses: actions/create-github-app-token@v1 @@ -22,66 +27,9 @@ jobs: app-id: ${{ secrets.APP_ID }} private-key: ${{ secrets.PRIVATE_KEY }} - - name: Run Gemini Scheduled PR Triage - uses: google-gemini/gemini-cli-action@41c0f1b3cbd1a0b284251bd1aac034edd07a3a2f + - name: Run PR Triage Script + id: run_triage env: GITHUB_TOKEN: ${{ steps.generate_token.outputs.token }} - with: - version: 0.1.8-rc.0 - GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} - OTLP_GCP_WIF_PROVIDER: ${{ secrets.OTLP_GCP_WIF_PROVIDER }} - OTLP_GCP_SERVICE_ACCOUNT: ${{ secrets.OTLP_GCP_SERVICE_ACCOUNT }} - OTLP_GOOGLE_CLOUD_PROJECT: ${{ secrets.OTLP_GOOGLE_CLOUD_PROJECT }} - settings_json: | - { - "coreTools": [ - "run_shell_command(gh pr list)", - "run_shell_command(gh pr view)", - "run_shell_command(gh pr edit)", - "run_shell_command(gh issue view)" - ] - } - prompt: | - You are a PR auditing assistant. Your task is to scan all open pull requests in the repository and ensure they are correctly triaged by performing a full label synchronization from the linked issue. - - The repository is ${{ github.repository }}. - - Follow these steps for each open pull request: - 1. **Get all open pull requests.** - `gh pr list --repo ${{ github.repository }} --state open --json number -q '.[].number'` - - 2. **Iterate through each PR number** and perform the following triage steps: - a. **Check for a linked issue in the PR body.** - `gh pr view --repo ${{ github.repository }} --json body -q .body` - - b. **Scan the body for an issue reference** (e.g., `#123`, `Closes #456`). - - c. **Process the result:** - - If **no issue reference is found**, add the `status/need-issue` label to the PR: - `gh pr edit --repo ${{ github.repository }} --add-label "status/need-issue"` - - - If **an issue reference is found**, perform a full label synchronization: - i. **Remove the `status/need-issue` label** (if present): - `gh pr edit --repo ${{ github.repository }} --remove-label "status/need-issue"` - - ii. **Extract the issue number** from the reference. - - iii. **Fetch all labels for the issue** and store them. - `gh issue view --repo ${{ github.repository }} --json labels` - - iv. **Fetch all labels for the PR** and store them. - `gh pr view --repo ${{ github.repository }} --json labels` - - v. **Compare the two sets of labels.** - - Identify labels to **add**: those on the issue but not on the PR. - - Identify labels to **remove**: those on the PR but not on the issue. - - vi. **Execute the synchronization.** - - If there are labels to add, run: `gh pr edit --repo ${{ github.repository }} --add-label "label1,label2,..."` - - If there are labels to remove, run: `gh pr edit --repo ${{ github.repository }} --remove-label "label3,label4,..."` - - **Strictly follow these rules:** - - Execute the logic for every open pull request. - - Only use the `gh` commands provided. - - Do not add any comments. - - The final set of labels on each PR must exactly match the labels on its linked issue. + GITHUB_REPOSITORY: ${{ github.repository }} + run: ./.github/scripts/pr-triage.sh