From 6390b81646e324bfbb86ccd73247f5d3e2a423f8 Mon Sep 17 00:00:00 2001 From: Gaurav <39389231+gsquared94@users.noreply.github.com> Date: Mon, 11 Aug 2025 11:48:57 -0700 Subject: [PATCH] update: issue triage workflows tags duplicate issues (#5868) --- .../gemini-automated-issue-triage.yml | 147 ++++++++++++++++++ .../gemini-scheduled-issue-triage.yml | 103 ++++++++++++ 2 files changed, 250 insertions(+) diff --git a/.github/workflows/gemini-automated-issue-triage.yml b/.github/workflows/gemini-automated-issue-triage.yml index 74904d4c..950d11e8 100644 --- a/.github/workflows/gemini-automated-issue-triage.yml +++ b/.github/workflows/gemini-automated-issue-triage.yml @@ -28,6 +28,7 @@ permissions: id-token: 'write' issues: 'write' statuses: 'write' + packages: 'read' jobs: triage-issue: @@ -201,3 +202,149 @@ jobs: issue_number: '${{ github.event.issue.number }}', body: 'There is a problem with the Gemini CLI issue triaging. Please check the [action logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details.' }) + + deduplicate-issues: + if: > + github.repository == 'google-gemini/gemini-cli' && + vars.TRIAGE_DEDUPLICATE_ISSUES != '' && + (github.event_name == 'issues' || + github.event_name == 'workflow_dispatch' || + (github.event_name == 'issue_comment' && + contains(github.event.comment.body, '@gemini-cli /deduplicate') && + (github.event.comment.author_association == 'OWNER' || + github.event.comment.author_association == 'MEMBER' || + github.event.comment.author_association == 'COLLABORATOR'))) + + timeout-minutes: 20 + runs-on: 'ubuntu-latest' + steps: + - name: 'Checkout repository' + uses: 'actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683' + + - name: 'Generate GitHub App Token' + id: 'generate_token' + uses: 'actions/create-github-app-token@df432ceedc7162793a195dd1713ff69aefc7379e' + with: + app-id: '${{ secrets.APP_ID }}' + private-key: '${{ secrets.PRIVATE_KEY }}' + + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: 'Run Gemini Issue Deduplication' + uses: 'google-github-actions/run-gemini-cli@20351b5ea2b4179431f1ae8918a246a0808f8747' + id: 'gemini_issue_deduplication' + env: + GITHUB_TOKEN: '${{ steps.generate_token.outputs.token }}' + ISSUE_TITLE: '${{ github.event.issue.title }}' + ISSUE_BODY: '${{ github.event.issue.body }}' + ISSUE_NUMBER: '${{ github.event.issue.number }}' + REPOSITORY: '${{ github.repository }}' + FIRESTORE_PROJECT: '${{ vars.FIRESTORE_PROJECT }}' + with: + gcp_workload_identity_provider: '${{ vars.GCP_WIF_PROVIDER }}' + gcp_project_id: '${{ vars.GOOGLE_CLOUD_PROJECT }}' + gcp_location: '${{ vars.GOOGLE_CLOUD_LOCATION }}' + gcp_service_account: '${{ vars.SERVICE_ACCOUNT_EMAIL }}' + gemini_api_key: '${{ secrets.GEMINI_API_KEY }}' + use_vertex_ai: '${{ vars.GOOGLE_GENAI_USE_VERTEXAI }}' + use_gemini_code_assist: '${{ vars.GOOGLE_GENAI_USE_GCA }}' + settings: |- + { + "mcpServers": { + "issue_deduplication": { + "command": "docker", + "args": [ + "run", + "-i", + "--rm", + "--network", "host", + "-e", "GITHUB_TOKEN", + "-e", "GEMINI_API_KEY", + "-e", "DATABASE_TYPE", + "-e", "FIRESTORE_DATABASE_ID", + "-e", "GCP_PROJECT", + "-e", "GOOGLE_APPLICATION_CREDENTIALS=/app/gcp-credentials.json", + "-v", "${GOOGLE_APPLICATION_CREDENTIALS}:/app/gcp-credentials.json", + "ghcr.io/google-gemini/gemini-cli-issue-triage@sha256:e3de1523f6c83aabb3c54b76d08940a2bf42febcb789dd2da6f95169641f94d3" + ], + "env": { + "GITHUB_TOKEN": "${GITHUB_TOKEN}", + "GEMINI_API_KEY": "${{ secrets.GEMINI_API_KEY }}", + "DATABASE_TYPE":"firestore", + "GCP_PROJECT": "${FIRESTORE_PROJECT}", + "FIRESTORE_DATABASE_ID": "(default)", + "GOOGLE_APPLICATION_CREDENTIALS": "${GOOGLE_APPLICATION_CREDENTIALS}" + }, + "enabled": true, + "timeout": 600000 + } + }, + "maxSessionTurns": 25, + "coreTools": [ + "run_shell_command(echo)", + "run_shell_command(gh issue comment)", + "run_shell_command(gh issue view)", + "run_shell_command(gh issue edit)" + ], + "telemetry": { + "enabled": true, + "target": "gcp" + } + } + prompt: |- + ## Role + You are an issue de-duplication assistant. Your goal is to find + duplicate issues, label the current issue as a duplicate, and notify + the user by commenting on the current issue, while avoiding + duplicate comments. + ## Steps + 1. **Find Potential Duplicates:** + - The repository is ${{ github.repository }} and the issue number is ${{ github.event.issue.number }}. + - Use the `duplicates` tool with the `repo` and `issue_number` to find potential duplicates for the current issue. Do not use the `threshold` parameter. + - If no duplicates are found, you are done. + - Print the JSON output from the `duplicates` tool to the logs. + 2. **Refine Duplicates List (if necessary):** + - If the `duplicates` tool returns between 1 and 14 results, you must refine the list. + - For each potential duplicate issue, run `gh issue view --json title,body,comments` to fetch its content. + - Also fetch the content of the original issue: `gh issue view "${ISSUE_NUMBER}" --json title,body,comments`. + - Carefully analyze the content (title, body, comments) of the original issue and all potential duplicates. + - It is very important if the comments on either issue mention that they are not duplicates of each other, to treat them as not duplicates. + - Based on your analysis, create a final list containing only the issues you are highly confident are actual duplicates. + - If your final list is empty, you are done. + - Print to the logs if you omitted any potential duplicates based on your analysis. + - If the `duplicates` tool returned 15+ results, use the top 15 matches (based on descending similarity score value) to perform this step. + 3. **Format Final Duplicates List:** + Format the final list of duplicates into a markdown string. + The format should be: + "Found possible duplicate issues:\n\n- #${issue_number}\n\nIf you believe this is not a duplicate, please remove the `status/possible-duplicate` label." + Add an HTML comment to the end for identification: `` + 4. **Check for Existing Comment:** + - Run `gh issue view "${ISSUE_NUMBER}" --json comments` to get all + comments on the issue. + - Look for a comment made by a bot (the author's login often ends in `[bot]`) that contains ``. + - If you find such a comment, store its `id` and `body`. + 5. **Decide Action:** + - **If an existing comment is found:** + - Compare the new list of duplicate issues with the list from the existing comment's body. + - If they are the same, do nothing. + - If they are different, edit the existing comment. Use + `gh issue comment "${ISSUE_NUMBER}" --edit-comment --body "..."`. + The new body should be the new list of duplicates, but with the header "Found possible duplicate issues (updated):". + - **If no existing comment is found:** + - Create a new comment with the list of duplicates. + - Use `gh issue comment "${ISSUE_NUMBER}" --body "..."`. + 6. **Add Duplicate Label:** + - If you created or updated a comment in the previous step, add the `duplicate` label to the current issue. + - Use `gh issue edit "${ISSUE_NUMBER}" --add-label "status/possible-duplicate"`. + ## Guidelines + - Only use the `duplicates` and `run_shell_command` tools. + - The `run_shell_command` tool can be used with `gh issue view`, `gh issue comment`, and `gh issue edit`. + - Do not download or read media files like images, videos, or links. The `--json` flag for `gh issue view` will prevent this. + - Do not modify the issue content or status. + - Only comment on and label the current issue. + - Reference all shell variables as "${VAR}" (with quotes and braces). diff --git a/.github/workflows/gemini-scheduled-issue-triage.yml b/.github/workflows/gemini-scheduled-issue-triage.yml index 97a81332..c6553706 100644 --- a/.github/workflows/gemini-scheduled-issue-triage.yml +++ b/.github/workflows/gemini-scheduled-issue-triage.yml @@ -18,6 +18,7 @@ permissions: id-token: 'write' issues: 'write' statuses: 'write' + packages: 'read' jobs: triage-issues: @@ -223,3 +224,105 @@ jobs: - could also pertain to latency, - other general software performance like, memory usage, CPU consumption, and algorithmic efficiency. - Switching models from one to the other unexpectedly. + + refresh-embeddings: + if: |- + ${{ vars.TRIAGE_DEDUPLICATE_ISSUES != '' && github.repository == 'google-gemini/gemini-cli' }} + timeout-minutes: 20 + runs-on: 'ubuntu-latest' + steps: + - name: 'Checkout repository' + uses: 'actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683' + + - name: 'Generate GitHub App Token' + id: 'generate_token' + uses: 'actions/create-github-app-token@df432ceedc7162793a195dd1713ff69aefc7379e' + with: + app-id: '${{ secrets.APP_ID }}' + private-key: '${{ secrets.PRIVATE_KEY }}' + + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: 'Run Gemini Issue Deduplication Refresh' + uses: 'google-github-actions/run-gemini-cli@20351b5ea2b4179431f1ae8918a246a0808f8747' + id: 'gemini_refresh_embeddings' + env: + GITHUB_TOKEN: '${{ steps.generate_token.outputs.token }}' + ISSUE_TITLE: '${{ github.event.issue.title }}' + ISSUE_BODY: '${{ github.event.issue.body }}' + ISSUE_NUMBER: '${{ github.event.issue.number }}' + REPOSITORY: '${{ github.repository }}' + FIRESTORE_PROJECT: '${{ vars.FIRESTORE_PROJECT }}' + with: + gcp_workload_identity_provider: '${{ vars.GCP_WIF_PROVIDER }}' + gcp_project_id: '${{ vars.GOOGLE_CLOUD_PROJECT }}' + gcp_location: '${{ vars.GOOGLE_CLOUD_LOCATION }}' + gcp_service_account: '${{ vars.SERVICE_ACCOUNT_EMAIL }}' + gemini_api_key: '${{ secrets.GEMINI_API_KEY }}' + use_vertex_ai: '${{ vars.GOOGLE_GENAI_USE_VERTEXAI }}' + use_gemini_code_assist: '${{ vars.GOOGLE_GENAI_USE_GCA }}' + settings: |- + { + "mcpServers": { + "issue_deduplication": { + "command": "docker", + "args": [ + "run", + "-i", + "--rm", + "--network", "host", + "-e", "GITHUB_TOKEN", + "-e", "GEMINI_API_KEY", + "-e", "DATABASE_TYPE", + "-e", "FIRESTORE_DATABASE_ID", + "-e", "GCP_PROJECT", + "-e", "GOOGLE_APPLICATION_CREDENTIALS=/app/gcp-credentials.json", + "-v", "${GOOGLE_APPLICATION_CREDENTIALS}:/app/gcp-credentials.json", + "ghcr.io/google-gemini/gemini-cli-issue-triage@sha256:e3de1523f6c83aabb3c54b76d08940a2bf42febcb789dd2da6f95169641f94d3" + ], + "env": { + "GITHUB_TOKEN": "${GITHUB_TOKEN}", + "GEMINI_API_KEY": "${{ secrets.GEMINI_API_KEY }}", + "DATABASE_TYPE":"firestore", + "GCP_PROJECT": "${FIRESTORE_PROJECT}", + "FIRESTORE_DATABASE_ID": "(default)", + "GOOGLE_APPLICATION_CREDENTIALS": "${GOOGLE_APPLICATION_CREDENTIALS}" + }, + "enabled": true, + "timeout": 600000 + } + }, + "maxSessionTurns": 25, + "coreTools": [ + "run_shell_command(echo)" + ], + "telemetry": { + "enabled": true, + "target": "gcp" + } + } + prompt: |- + ## Role + + You are a database maintenance assistant for a GitHub issue deduplication system. + + ## Goal + + Your sole responsibility is to refresh the embeddings for all open issues in the repository to ensure the deduplication database is up-to-date. + + ## Steps + + 1. **Extract Repository Information:** The repository is ${{ github.repository }}. + 2. **Refresh Embeddings:** Call the `refresh` tool with the correct `repo`. Do not use the `force` parameter. + 3. **Log Output:** Print the JSON output from the `refresh` tool to the logs. + + ## Guidelines + + - Only use the `refresh` tool. + - Do not attempt to find duplicates or modify any issues. + - Your only task is to call the `refresh` tool and log its output.