From f320ec4867403ed321505d56b8bcb2353bbeb93a Mon Sep 17 00:00:00 2001 From: contrueCT Date: Thu, 19 Mar 2026 21:18:08 +0800 Subject: [PATCH 1/6] ci: add automatic rerun controller for flaky workflows --- .github/workflows/rerun-ci.yml | 36 ++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 .github/workflows/rerun-ci.yml diff --git a/.github/workflows/rerun-ci.yml b/.github/workflows/rerun-ci.yml new file mode 100644 index 0000000000..4bfde45dee --- /dev/null +++ b/.github/workflows/rerun-ci.yml @@ -0,0 +1,36 @@ +name: "Rerun CI" + +on: + workflow_run: + workflows: + - "HugeGraph-Server CI" + - "HugeGraph-Commons CI" + - "HugeGraph-PD & Store & Hstore CI" + - "Cluster Test CI" + types: + - completed + +permissions: + actions: write + contents: read + +jobs: + rerun-failed-jobs: + if: >- + github.event.workflow_run.conclusion == 'failure' && + fromJSON(github.event.workflow_run.run_attempt) < 2 + runs-on: ubuntu-latest + steps: + - name: Show rerun target + run: | + echo "Workflow: ${{ github.event.workflow_run.name }}" + echo "Run ID: ${{ github.event.workflow_run.id }}" + echo "Run attempt: ${{ github.event.workflow_run.run_attempt }}" + echo "Conclusion: ${{ github.event.workflow_run.conclusion }}" + + - name: Rerun failed jobs + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GH_REPO: ${{ github.repository }} + run: | + gh run rerun ${{ github.event.workflow_run.id }} --failed From af40258a8682be7ed1c90fc16ee76b7b64458662 Mon Sep 17 00:00:00 2001 From: contrueCT Date: Sat, 11 Apr 2026 00:26:44 +0800 Subject: [PATCH 2/6] ci: refine auto rerun controller policy --- .github/workflows/rerun-ci.yml | 65 +++++++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 8 deletions(-) diff --git a/.github/workflows/rerun-ci.yml b/.github/workflows/rerun-ci.yml index 4bfde45dee..22a855b6a2 100644 --- a/.github/workflows/rerun-ci.yml +++ b/.github/workflows/rerun-ci.yml @@ -9,26 +9,75 @@ on: - "Cluster Test CI" types: - completed + branches: + - master + - 'release-*' + - 'test-*' permissions: actions: write contents: read +env: + MAX_RERUNS: '2' + RETRY_DELAY_SECONDS: '180' + jobs: rerun-failed-jobs: - if: >- - github.event.workflow_run.conclusion == 'failure' && - fromJSON(github.event.workflow_run.run_attempt) < 2 runs-on: ubuntu-latest steps: - - name: Show rerun target + - name: Decide rerun action + id: decision + env: + WORKFLOW_NAME: ${{ github.event.workflow_run.name }} + RUN_ID: ${{ github.event.workflow_run.id }} + RUN_ATTEMPT: ${{ github.event.workflow_run.run_attempt }} + CONCLUSION: ${{ github.event.workflow_run.conclusion }} + EVENT_NAME: ${{ github.event.workflow_run.event }} + HEAD_BRANCH: ${{ github.event.workflow_run.head_branch }} + run: | + set -euo pipefail + + action="skip" + reason="non-failure" + + if [[ "$CONCLUSION" == "failure" ]]; then + if [[ "$EVENT_NAME" != "push" && "$EVENT_NAME" != "pull_request" ]]; then + reason="unsupported event: $EVENT_NAME" + elif (( RUN_ATTEMPT > MAX_RERUNS )); then + reason="retry limit reached" + else + action="rerun" + reason="within retry limit" + fi + fi + + { + echo "action=$action" + echo "reason=$reason" + } >> "$GITHUB_OUTPUT" + + { + echo "### Rerun CI decision" + echo "" + echo "- Workflow: $WORKFLOW_NAME" + echo "- Source event: $EVENT_NAME" + echo "- Head branch: $HEAD_BRANCH" + echo "- Run ID: $RUN_ID" + echo "- Current attempt: $RUN_ATTEMPT" + echo "- Max automatic reruns: $MAX_RERUNS" + echo "- Delay seconds: $RETRY_DELAY_SECONDS" + echo "- Action: $action" + echo "- Reason: $reason" + } >> "$GITHUB_STEP_SUMMARY" + + - name: Wait before rerun + if: steps.decision.outputs.action == 'rerun' run: | - echo "Workflow: ${{ github.event.workflow_run.name }}" - echo "Run ID: ${{ github.event.workflow_run.id }}" - echo "Run attempt: ${{ github.event.workflow_run.run_attempt }}" - echo "Conclusion: ${{ github.event.workflow_run.conclusion }}" + sleep "$RETRY_DELAY_SECONDS" - name: Rerun failed jobs + if: steps.decision.outputs.action == 'rerun' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GH_REPO: ${{ github.repository }} From cff1eaef5b13b4551856b2be935eb8482eca24c7 Mon Sep 17 00:00:00 2001 From: contrueCT Date: Sat, 11 Apr 2026 17:14:58 +0800 Subject: [PATCH 3/6] ci: follow source workflow scope for reruns --- .github/workflows/rerun-ci.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/rerun-ci.yml b/.github/workflows/rerun-ci.yml index 22a855b6a2..f10ffd1bb3 100644 --- a/.github/workflows/rerun-ci.yml +++ b/.github/workflows/rerun-ci.yml @@ -9,10 +9,6 @@ on: - "Cluster Test CI" types: - completed - branches: - - master - - 'release-*' - - 'test-*' permissions: actions: write From 7daedbcdf7b91b7d70b31ba59696421c0da2323b Mon Sep 17 00:00:00 2001 From: contrueCT Date: Sat, 11 Apr 2026 17:27:31 +0800 Subject: [PATCH 4/6] ci: narrow rerun workflow write scope --- .github/workflows/rerun-ci.yml | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/.github/workflows/rerun-ci.yml b/.github/workflows/rerun-ci.yml index f10ffd1bb3..d2282e0092 100644 --- a/.github/workflows/rerun-ci.yml +++ b/.github/workflows/rerun-ci.yml @@ -10,17 +10,17 @@ on: types: - completed -permissions: - actions: write - contents: read +permissions: {} env: MAX_RERUNS: '2' RETRY_DELAY_SECONDS: '180' jobs: - rerun-failed-jobs: + decide-rerun-action: runs-on: ubuntu-latest + outputs: + action: ${{ steps.decision.outputs.action }} steps: - name: Decide rerun action id: decision @@ -67,13 +67,19 @@ jobs: echo "- Reason: $reason" } >> "$GITHUB_STEP_SUMMARY" + rerun-failed-jobs: + needs: decide-rerun-action + if: needs.decide-rerun-action.outputs.action == 'rerun' + permissions: + actions: write + contents: read + runs-on: ubuntu-latest + steps: - name: Wait before rerun - if: steps.decision.outputs.action == 'rerun' run: | sleep "$RETRY_DELAY_SECONDS" - name: Rerun failed jobs - if: steps.decision.outputs.action == 'rerun' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GH_REPO: ${{ github.repository }} From 16c6d85fbb7b103943783e87b9e61e6fe4b9130c Mon Sep 17 00:00:00 2001 From: contrueCT Date: Sat, 11 Apr 2026 17:35:56 +0800 Subject: [PATCH 5/6] ci: reduce retry delay for rerun jobs from 180 to 60 seconds --- .github/workflows/rerun-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rerun-ci.yml b/.github/workflows/rerun-ci.yml index d2282e0092..4e1494908d 100644 --- a/.github/workflows/rerun-ci.yml +++ b/.github/workflows/rerun-ci.yml @@ -14,7 +14,7 @@ permissions: {} env: MAX_RERUNS: '2' - RETRY_DELAY_SECONDS: '180' + RETRY_DELAY_SECONDS: '60' jobs: decide-rerun-action: From 48d6dda8b2b4a4cb135da46535f1cfde664f2e72 Mon Sep 17 00:00:00 2001 From: contrueCT Date: Sat, 11 Apr 2026 22:53:42 +0800 Subject: [PATCH 6/6] ci: increase retry delay for rerun jobs from 60 to 180 seconds --- .github/workflows/rerun-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rerun-ci.yml b/.github/workflows/rerun-ci.yml index 4e1494908d..d2282e0092 100644 --- a/.github/workflows/rerun-ci.yml +++ b/.github/workflows/rerun-ci.yml @@ -14,7 +14,7 @@ permissions: {} env: MAX_RERUNS: '2' - RETRY_DELAY_SECONDS: '60' + RETRY_DELAY_SECONDS: '180' jobs: decide-rerun-action: