From ca85d36761b64b85d801ba145c32a9a564a09a5d Mon Sep 17 00:00:00 2001 From: Hiroshi Nishio Date: Wed, 22 Apr 2026 19:07:57 -0700 Subject: [PATCH] Label CI logs with runtime ownership --- pyproject.toml | 2 +- services/webhook/check_suite_handler.py | 26 +++-- services/webhook/test_check_suite_handler.py | 37 +++++++ utils/logs/label_log_source.py | 34 +++++++ utils/logs/test_label_log_source.py | 100 +++++++++++++++++++ uv.lock | 2 +- 6 files changed, 192 insertions(+), 9 deletions(-) create mode 100644 utils/logs/label_log_source.py create mode 100644 utils/logs/test_label_log_source.py diff --git a/pyproject.toml b/pyproject.toml index 0dc88599a..916bb1e49 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "GitAuto" -version = "1.55.5" +version = "1.57.0" requires-python = ">=3.14" dependencies = [ "annotated-doc==0.0.4", diff --git a/services/webhook/check_suite_handler.py b/services/webhook/check_suite_handler.py index fa426f4e1..c1a54d602 100644 --- a/services/webhook/check_suite_handler.py +++ b/services/webhook/check_suite_handler.py @@ -78,6 +78,7 @@ from utils.logging.logging_config import logger, set_pr_number from utils.logs.clean_logs import clean_logs from utils.logs.detect_infra_failure import detect_infra_failure +from utils.logs.label_log_source import label_log_source from utils.logs.normalize_log_for_hashing import normalize_log_for_hashing from utils.logs.save_ci_log_to_file import ( CI_LOG_PATH, @@ -116,8 +117,8 @@ async def handle_check_suite( logger.info("Duplicate check_suite_id=%s ignored", check_suite_id) return - # Check if this is a GitAuto PR by branch name (early return) - # head_branch can be None when: + # Check if this is a GitAuto PR by branch name. Return early when it is not. + # `head_branch` can be None when: # - Check suite runs on a tag push (tags don't have branches) # - Check suite runs on a deleted branch # - Check suite runs on a direct commit without PR @@ -478,14 +479,21 @@ async def handle_check_suite( len(validation_result.errors), ) pre_existing_errors = "\n".join(validation_result.errors) + pre_existing_errors = label_log_source( + pre_existing_errors, + "ours", + "GitAuto validation (AWS Lambda, Amazon Linux 2023)", + ) logger.warning("Remaining errors:\n%s", pre_existing_errors) fixes_applied = validation_result.fixes_applied - # Get the error log from the workflow run + # Get the error log from the workflow run. + ci_log_source = "" if is_circleci: logger.info( "check_suite using CircleCI path for workflow %s", circleci_workflow_id ) + ci_log_source = f"CircleCI for {owner_name}/{repo_name}" circleci_token = get_circleci_token(owner_id) if not circleci_token: @@ -547,6 +555,7 @@ async def handle_check_suite( ) elif is_codecov: logger.info("check_suite using Codecov path for check_run=%s", check_run_name) + ci_log_source = f"Codecov coverage report for {owner_name}/{repo_name}" # still "theirs" — coverage for the customer's repo # See payloads/github/check_run/codecov_check_run_output.json output = check_run.get("output", {}) title = output.get("title", "") @@ -609,6 +618,7 @@ async def handle_check_suite( logger.info( "check_suite using GitHub Actions path for run_id=%s", github_run_id ) + ci_log_source = f"GitHub Actions for {owner_name}/{repo_name}" # GitHub Actions log retrieval (existing logic) error_log = get_workflow_run_logs( owner=owner_name, repo=repo_name, run_id=github_run_id, token=token @@ -860,6 +870,9 @@ async def handle_check_suite( ) ci_log_value = minimized_log + # Tag the customer CI log with its source so the agent doesn't confuse it with errors produced inside our own Lambda. + ci_log_value = label_log_source(ci_log_value, "theirs", ci_log_source) + # Truncate patch — it sits in the first message and repeats in every LLM call max_patch_chars = 1000 for f in changed_files: @@ -1054,10 +1067,9 @@ async def handle_check_suite( update_comment(body=comment_body, base_args=base_args) pushed = create_empty_commit(base_args=base_args) - # Update final comment. Do NOT include CHECK_RUN_FAILED_MESSAGE here — that marker - # is a concurrency lock set at line 317 when processing starts. Clearing it here - # releases the lock so the next check_suite webhook can proceed. The error hash - # dedup at line 557 prevents re-attempting the same error. + # Update final comment. Do NOT include CHECK_RUN_FAILED_MESSAGE here — that marker is a concurrency lock set at line 317 when processing starts. + # Clearing it here releases the lock so the next check_suite webhook can proceed. + # The error hash dedup at line 557 prevents re-attempting the same error. if not pushed: logger.warning( "Final empty commit skipped on %s: concurrent push detected; posting respect-the-push message", diff --git a/services/webhook/test_check_suite_handler.py b/services/webhook/test_check_suite_handler.py index 4a6c44ff0..1b3e55758 100644 --- a/services/webhook/test_check_suite_handler.py +++ b/services/webhook/test_check_suite_handler.py @@ -17,6 +17,7 @@ from services.agents.verify_task_is_complete import VerifyTaskIsCompleteResult from services.chat_with_agent import AgentResult from services.webhook.check_suite_handler import handle_check_suite +from utils.logs.label_log_source import label_log_source @pytest.fixture(autouse=True) @@ -1767,6 +1768,42 @@ def test_patch_truncation_in_changed_files(): assert serialized.count("5,000 chars total") == 1 +def test_check_suite_handler_imports_label_log_source(): + # Smoke test: the handler must import label_log_source or the runtime-provenance tagging isn't actually wired. + # Without this, a future refactor could silently drop the import and logs would lose their `[log source: ...]` header — the exact regression that produced Foxquilt PR #203. + from services.webhook import check_suite_handler # pylint: disable=import-outside-toplevel + + assert hasattr(check_suite_handler, "label_log_source") + + +def test_ci_log_source_strings_produce_expected_agent_input(): + # The handler builds `ci_log_source` as an inline f-string in each dispatch branch (CircleCI / Codecov / GitHub Actions), then passes the log through `label_log_source` with ownership="theirs". For Lambda validation errors the handler uses ownership="ours". This test pins the exact header text the agent will see, so a format drift in either the label or the source-string template fails here instead of silently confusing the agent in production. + owner = "Foxquilt" + repo = "foxden-version-controller" + raw_log = "FAIL test/spec/create-express-server.spec.ts" + + circleci_source = f"CircleCI for {owner}/{repo}" + codecov_source = f"Codecov coverage report for {owner}/{repo}" + gha_source = f"GitHub Actions for {owner}/{repo}" + lambda_source = "GitAuto validation (AWS Lambda, Amazon Linux 2023)" + + them = "CUSTOMER infrastructure (their runtime/CI)" + us = "OUR infrastructure (GitAuto-controlled)" + + assert label_log_source(raw_log, "theirs", circleci_source) == ( + f"[log source: {them} — {circleci_source}]\n{raw_log}" + ) + assert label_log_source(raw_log, "theirs", codecov_source) == ( + f"[log source: {them} — {codecov_source}]\n{raw_log}" + ) + assert label_log_source(raw_log, "theirs", gha_source) == ( + f"[log source: {them} — {gha_source}]\n{raw_log}" + ) + assert label_log_source(raw_log, "ours", lambda_source) == ( + f"[log source: {us} — {lambda_source}]\n{raw_log}" + ) + + def test_agent_result_concurrent_push_field_defaults_false(): """check_suite_handler breaks its agent loop and skips the final empty commit when AgentResult.concurrent_push_detected is True. Verify the new field diff --git a/utils/logs/label_log_source.py b/utils/logs/label_log_source.py new file mode 100644 index 000000000..4bf45ffaa --- /dev/null +++ b/utils/logs/label_log_source.py @@ -0,0 +1,34 @@ +from typing import Literal + +# Local imports +from utils.error.handle_exceptions import handle_exceptions +from utils.logging.logging_config import logger + +# "ours" = produced inside GitAuto-controlled infrastructure (our Lambda, our CodeBuild, etc.) — we can edit GitAuto source to fix it, never touch customer CI. +# "theirs" = produced inside customer infrastructure (their CircleCI / GitHub Actions / Codecov run) — we fix by editing customer code or their CI config, never by editing GitAuto infrastructure. +LogOwnership = Literal["ours", "theirs"] + +_OWNERSHIP_HEADERS: dict[LogOwnership, str] = { + "ours": "OUR infrastructure (GitAuto-controlled)", + "theirs": "CUSTOMER infrastructure (their runtime/CI)", +} + + +@handle_exceptions(default_return_value="", raise_on_error=False) +def label_log_source(log: str, ownership: LogOwnership, source: str): + """Prepend a single-line runtime-provenance header to a log so the agent can't confuse our environment with a customer's. + Real incident: Foxquilt/foxden-version-controller PR #203 — an unlabelled Lambda error got misread as a CircleCI problem, and the agent pushed `MONGOMS_DISTRO=ubuntu-22.04` into the customer's `.circleci/config.yml`. + Saying just "CircleCI" or "Lambda" isn't enough — the agent can still flip the mapping. Every log must say outright whether it came from OUR or CUSTOMER infrastructure. + """ + if not log: + logger.info("label_log_source: empty log, returning empty string") + return "" + + owner_label = _OWNERSHIP_HEADERS[ownership] + logger.info( + "label_log_source: tagging %d chars of log with ownership=%s source=%s", + len(log), + ownership, + source, + ) + return f"[log source: {owner_label} — {source}]\n{log}" diff --git a/utils/logs/test_label_log_source.py b/utils/logs/test_label_log_source.py new file mode 100644 index 000000000..9c6a32396 --- /dev/null +++ b/utils/logs/test_label_log_source.py @@ -0,0 +1,100 @@ +import pytest + +from utils.logs.label_log_source import label_log_source + + +def test_ours_header_says_our_infrastructure_in_all_caps(): + # The explicit "OUR" / "CUSTOMER" framing is the whole point of this function; a tag that just said "GitAuto Lambda" could still be misread as "Lambda must be the customer's Lambda". All-caps ownership prefix prevents that flip. + result = label_log_source( + "some lambda error", + "ours", + "GitAuto validation (AWS Lambda, Amazon Linux 2023)", + ) + assert result.startswith("[log source: OUR infrastructure (GitAuto-controlled) — ") + + +def test_theirs_header_says_customer_infrastructure_in_all_caps(): + result = label_log_source( + "some ci error", "theirs", "CircleCI for Foxquilt/foxden-version-controller" + ) + assert result.startswith( + "[log source: CUSTOMER infrastructure (their runtime/CI) — " + ) + + +def test_full_tagged_format_for_ours(): + assert label_log_source("E1", "ours", "X") == ( + "[log source: OUR infrastructure (GitAuto-controlled) — X]\nE1" + ) + + +def test_full_tagged_format_for_theirs(): + assert label_log_source("E2", "theirs", "Y") == ( + "[log source: CUSTOMER infrastructure (their runtime/CI) — Y]\nE2" + ) + + +def test_multiline_log_preserved(): + log = "line one\nline two\nline three" + result = label_log_source(log, "theirs", "GitHub Actions for owner/repo") + assert result == ( + "[log source: CUSTOMER infrastructure (their runtime/CI) — " + "GitHub Actions for owner/repo]\n" + "line one\nline two\nline three" + ) + + +def test_empty_log_returns_empty(): + # The webhook handler only attaches non-empty logs; no reason to tag an empty string. + assert label_log_source("", "ours", "anything") == "" + assert label_log_source("", "theirs", "anything") == "" + + +@pytest.mark.parametrize( + "ownership, source, expected_prefix", + [ + ( + "ours", + "GitAuto validation (AWS Lambda, Amazon Linux 2023)", + "[log source: OUR infrastructure (GitAuto-controlled) — GitAuto validation (AWS Lambda, Amazon Linux 2023)]\n", + ), + ( + "theirs", + "CircleCI for Foxquilt/foxden-version-controller", + "[log source: CUSTOMER infrastructure (their runtime/CI) — CircleCI for Foxquilt/foxden-version-controller]\n", + ), + ( + "theirs", + "GitHub Actions for Foxquilt/foxden-version-controller", + "[log source: CUSTOMER infrastructure (their runtime/CI) — GitHub Actions for Foxquilt/foxden-version-controller]\n", + ), + ( + "theirs", + "Codecov coverage report for Foxquilt/foxden-version-controller", + "[log source: CUSTOMER infrastructure (their runtime/CI) — Codecov coverage report for Foxquilt/foxden-version-controller]\n", + ), + ], + ids=["lambda_ours", "circleci_theirs", "github_actions_theirs", "codecov_theirs"], +) +def test_known_sources_produce_expected_prefix(ownership, source, expected_prefix): + result = label_log_source("whatever", ownership, source) + assert result.startswith(expected_prefix) + + +def test_pr203_scenario_lambda_error_tagged_as_ours(): + # Real Lambda error from Foxquilt PR #203 (CloudWatch 2026-04-21 14:01:55). + # Without the ownership prefix an agent could read the inner word "CircleCI"-free mention of libcrypto/MongoDB and still decide the fix belongs in the customer's repo. With "OUR infrastructure" at the front, the source is unambiguous on the first token. + lambda_error = ( + "Starting the MongoMemoryServer Instance failed, enable debug log for more information. Error:\n" + ' StdoutInstanceError: Instance failed to start because a library is missing or cannot be opened: "libcrypto.so.10"\n' + "error Command failed with exit code 1." + ) + labelled = label_log_source( + lambda_error, "ours", "GitAuto validation (AWS Lambda, Amazon Linux 2023)" + ) + first_line, rest = labelled.split("\n", 1) + assert first_line == ( + "[log source: OUR infrastructure (GitAuto-controlled) — " + "GitAuto validation (AWS Lambda, Amazon Linux 2023)]" + ) + assert rest == lambda_error diff --git a/uv.lock b/uv.lock index 8d2e5c76b..1de273600 100644 --- a/uv.lock +++ b/uv.lock @@ -596,7 +596,7 @@ wheels = [ [[package]] name = "gitauto" -version = "1.55.5" +version = "1.57.0" source = { virtual = "." } dependencies = [ { name = "annotated-doc" },