From ca85d36761b64b85d801ba145c32a9a564a09a5d Mon Sep 17 00:00:00 2001
From: Hiroshi Nishio <hnishio0105@gmail.com>
Date: Wed, 22 Apr 2026 19:07:57 -0700
Subject: [PATCH] Label CI logs with runtime ownership

---
 pyproject.toml                               |   2 +-
 services/webhook/check_suite_handler.py      |  26 +++--
 services/webhook/test_check_suite_handler.py |  37 +++++++
 utils/logs/label_log_source.py               |  34 +++++++
 utils/logs/test_label_log_source.py          | 100 +++++++++++++++++++
 uv.lock                                      |   2 +-
 6 files changed, 192 insertions(+), 9 deletions(-)
 create mode 100644 utils/logs/label_log_source.py
 create mode 100644 utils/logs/test_label_log_source.py

diff --git a/pyproject.toml b/pyproject.toml
index 0dc88599a..916bb1e49 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "GitAuto"
-version = "1.55.5"
+version = "1.57.0"
 requires-python = ">=3.14"
 dependencies = [
     "annotated-doc==0.0.4",
diff --git a/services/webhook/check_suite_handler.py b/services/webhook/check_suite_handler.py
index fa426f4e1..c1a54d602 100644
--- a/services/webhook/check_suite_handler.py
+++ b/services/webhook/check_suite_handler.py
@@ -78,6 +78,7 @@
 from utils.logging.logging_config import logger, set_pr_number
 from utils.logs.clean_logs import clean_logs
 from utils.logs.detect_infra_failure import detect_infra_failure
+from utils.logs.label_log_source import label_log_source
 from utils.logs.normalize_log_for_hashing import normalize_log_for_hashing
 from utils.logs.save_ci_log_to_file import (
     CI_LOG_PATH,
@@ -116,8 +117,8 @@ async def handle_check_suite(
         logger.info("Duplicate check_suite_id=%s ignored", check_suite_id)
         return
 
-    # Check if this is a GitAuto PR by branch name (early return)
-    # head_branch can be None when:
+    # Check if this is a GitAuto PR by branch name. Return early when it is not.
+    # `head_branch` can be None when:
     # - Check suite runs on a tag push (tags don't have branches)
     # - Check suite runs on a deleted branch
     # - Check suite runs on a direct commit without PR
@@ -478,14 +479,21 @@ async def handle_check_suite(
             len(validation_result.errors),
         )
         pre_existing_errors = "\n".join(validation_result.errors)
+        pre_existing_errors = label_log_source(
+            pre_existing_errors,
+            "ours",
+            "GitAuto validation (AWS Lambda, Amazon Linux 2023)",
+        )
         logger.warning("Remaining errors:\n%s", pre_existing_errors)
     fixes_applied = validation_result.fixes_applied
 
-    # Get the error log from the workflow run
+    # Get the error log from the workflow run.
+    ci_log_source = ""
     if is_circleci:
         logger.info(
             "check_suite using CircleCI path for workflow %s", circleci_workflow_id
         )
+        ci_log_source = f"CircleCI for {owner_name}/{repo_name}"
         circleci_token = get_circleci_token(owner_id)
 
         if not circleci_token:
@@ -547,6 +555,7 @@ async def handle_check_suite(
         )
     elif is_codecov:
         logger.info("check_suite using Codecov path for check_run=%s", check_run_name)
+        ci_log_source = f"Codecov coverage report for {owner_name}/{repo_name}"  # still "theirs" — coverage for the customer's repo
         # See payloads/github/check_run/codecov_check_run_output.json
         output = check_run.get("output", {})
         title = output.get("title", "")
@@ -609,6 +618,7 @@ async def handle_check_suite(
         logger.info(
             "check_suite using GitHub Actions path for run_id=%s", github_run_id
         )
+        ci_log_source = f"GitHub Actions for {owner_name}/{repo_name}"
         # GitHub Actions log retrieval (existing logic)
         error_log = get_workflow_run_logs(
             owner=owner_name, repo=repo_name, run_id=github_run_id, token=token
@@ -860,6 +870,9 @@ async def handle_check_suite(
         )
         ci_log_value = minimized_log
 
+    # Tag the customer CI log with its source so the agent doesn't confuse it with errors produced inside our own Lambda.
+    ci_log_value = label_log_source(ci_log_value, "theirs", ci_log_source)
+
     # Truncate patch — it sits in the first message and repeats in every LLM call
     max_patch_chars = 1000
     for f in changed_files:
@@ -1054,10 +1067,9 @@ async def handle_check_suite(
         update_comment(body=comment_body, base_args=base_args)
         pushed = create_empty_commit(base_args=base_args)
 
-        # Update final comment. Do NOT include CHECK_RUN_FAILED_MESSAGE here — that marker
-        # is a concurrency lock set at line 317 when processing starts. Clearing it here
-        # releases the lock so the next check_suite webhook can proceed. The error hash
-        # dedup at line 557 prevents re-attempting the same error.
+        # Update final comment. Do NOT include CHECK_RUN_FAILED_MESSAGE here — that marker is a concurrency lock set at line 317 when processing starts.
+        # Clearing it here releases the lock so the next check_suite webhook can proceed.
+        # The error hash dedup at line 557 prevents re-attempting the same error.
         if not pushed:
             logger.warning(
                 "Final empty commit skipped on %s: concurrent push detected; posting respect-the-push message",
diff --git a/services/webhook/test_check_suite_handler.py b/services/webhook/test_check_suite_handler.py
index 4a6c44ff0..1b3e55758 100644
--- a/services/webhook/test_check_suite_handler.py
+++ b/services/webhook/test_check_suite_handler.py
@@ -17,6 +17,7 @@
 from services.agents.verify_task_is_complete import VerifyTaskIsCompleteResult
 from services.chat_with_agent import AgentResult
 from services.webhook.check_suite_handler import handle_check_suite
+from utils.logs.label_log_source import label_log_source
 
 
 @pytest.fixture(autouse=True)
@@ -1767,6 +1768,42 @@ def test_patch_truncation_in_changed_files():
     assert serialized.count("5,000 chars total") == 1
 
 
+def test_check_suite_handler_imports_label_log_source():
+    # Smoke test: the handler must import label_log_source or the runtime-provenance tagging isn't actually wired.
+    # Without this, a future refactor could silently drop the import and logs would lose their `[log source: ...]` header — the exact regression that produced Foxquilt PR #203.
+    from services.webhook import check_suite_handler  # pylint: disable=import-outside-toplevel
+
+    assert hasattr(check_suite_handler, "label_log_source")
+
+
+def test_ci_log_source_strings_produce_expected_agent_input():
+    # The handler builds `ci_log_source` as an inline f-string in each dispatch branch (CircleCI / Codecov / GitHub Actions), then passes the log through `label_log_source` with ownership="theirs". For Lambda validation errors the handler uses ownership="ours". This test pins the exact header text the agent will see, so a format drift in either the label or the source-string template fails here instead of silently confusing the agent in production.
+    owner = "Foxquilt"
+    repo = "foxden-version-controller"
+    raw_log = "FAIL test/spec/create-express-server.spec.ts"
+
+    circleci_source = f"CircleCI for {owner}/{repo}"
+    codecov_source = f"Codecov coverage report for {owner}/{repo}"
+    gha_source = f"GitHub Actions for {owner}/{repo}"
+    lambda_source = "GitAuto validation (AWS Lambda, Amazon Linux 2023)"
+
+    them = "CUSTOMER infrastructure (their runtime/CI)"
+    us = "OUR infrastructure (GitAuto-controlled)"
+
+    assert label_log_source(raw_log, "theirs", circleci_source) == (
+        f"[log source: {them} — {circleci_source}]\n{raw_log}"
+    )
+    assert label_log_source(raw_log, "theirs", codecov_source) == (
+        f"[log source: {them} — {codecov_source}]\n{raw_log}"
+    )
+    assert label_log_source(raw_log, "theirs", gha_source) == (
+        f"[log source: {them} — {gha_source}]\n{raw_log}"
+    )
+    assert label_log_source(raw_log, "ours", lambda_source) == (
+        f"[log source: {us} — {lambda_source}]\n{raw_log}"
+    )
+
+
 def test_agent_result_concurrent_push_field_defaults_false():
     """check_suite_handler breaks its agent loop and skips the final empty commit
     when AgentResult.concurrent_push_detected is True. Verify the new field
diff --git a/utils/logs/label_log_source.py b/utils/logs/label_log_source.py
new file mode 100644
index 000000000..4bf45ffaa
--- /dev/null
+++ b/utils/logs/label_log_source.py
@@ -0,0 +1,34 @@
+from typing import Literal
+
+# Local imports
+from utils.error.handle_exceptions import handle_exceptions
+from utils.logging.logging_config import logger
+
+# "ours"   = produced inside GitAuto-controlled infrastructure (our Lambda, our CodeBuild, etc.) — we can edit GitAuto source to fix it, never touch customer CI.
+# "theirs" = produced inside customer infrastructure (their CircleCI / GitHub Actions / Codecov run) — we fix by editing customer code or their CI config, never by editing GitAuto infrastructure.
+LogOwnership = Literal["ours", "theirs"]
+
+_OWNERSHIP_HEADERS: dict[LogOwnership, str] = {
+    "ours": "OUR infrastructure (GitAuto-controlled)",
+    "theirs": "CUSTOMER infrastructure (their runtime/CI)",
+}
+
+
+@handle_exceptions(default_return_value="", raise_on_error=False)
+def label_log_source(log: str, ownership: LogOwnership, source: str):
+    """Prepend a single-line runtime-provenance header to a log so the agent can't confuse our environment with a customer's.
+    Real incident: Foxquilt/foxden-version-controller PR #203 — an unlabelled Lambda error got misread as a CircleCI problem, and the agent pushed `MONGOMS_DISTRO=ubuntu-22.04` into the customer's `.circleci/config.yml`.
+    Saying just "CircleCI" or "Lambda" isn't enough — the agent can still flip the mapping. Every log must say outright whether it came from OUR or CUSTOMER infrastructure.
+    """
+    if not log:
+        logger.info("label_log_source: empty log, returning empty string")
+        return ""
+
+    owner_label = _OWNERSHIP_HEADERS[ownership]
+    logger.info(
+        "label_log_source: tagging %d chars of log with ownership=%s source=%s",
+        len(log),
+        ownership,
+        source,
+    )
+    return f"[log source: {owner_label} — {source}]\n{log}"
diff --git a/utils/logs/test_label_log_source.py b/utils/logs/test_label_log_source.py
new file mode 100644
index 000000000..9c6a32396
--- /dev/null
+++ b/utils/logs/test_label_log_source.py
@@ -0,0 +1,100 @@
+import pytest
+
+from utils.logs.label_log_source import label_log_source
+
+
+def test_ours_header_says_our_infrastructure_in_all_caps():
+    # The explicit "OUR" / "CUSTOMER" framing is the whole point of this function; a tag that just said "GitAuto Lambda" could still be misread as "Lambda must be the customer's Lambda". All-caps ownership prefix prevents that flip.
+    result = label_log_source(
+        "some lambda error",
+        "ours",
+        "GitAuto validation (AWS Lambda, Amazon Linux 2023)",
+    )
+    assert result.startswith("[log source: OUR infrastructure (GitAuto-controlled) — ")
+
+
+def test_theirs_header_says_customer_infrastructure_in_all_caps():
+    result = label_log_source(
+        "some ci error", "theirs", "CircleCI for Foxquilt/foxden-version-controller"
+    )
+    assert result.startswith(
+        "[log source: CUSTOMER infrastructure (their runtime/CI) — "
+    )
+
+
+def test_full_tagged_format_for_ours():
+    assert label_log_source("E1", "ours", "X") == (
+        "[log source: OUR infrastructure (GitAuto-controlled) — X]\nE1"
+    )
+
+
+def test_full_tagged_format_for_theirs():
+    assert label_log_source("E2", "theirs", "Y") == (
+        "[log source: CUSTOMER infrastructure (their runtime/CI) — Y]\nE2"
+    )
+
+
+def test_multiline_log_preserved():
+    log = "line one\nline two\nline three"
+    result = label_log_source(log, "theirs", "GitHub Actions for owner/repo")
+    assert result == (
+        "[log source: CUSTOMER infrastructure (their runtime/CI) — "
+        "GitHub Actions for owner/repo]\n"
+        "line one\nline two\nline three"
+    )
+
+
+def test_empty_log_returns_empty():
+    # The webhook handler only attaches non-empty logs; no reason to tag an empty string.
+    assert label_log_source("", "ours", "anything") == ""
+    assert label_log_source("", "theirs", "anything") == ""
+
+
+@pytest.mark.parametrize(
+    "ownership, source, expected_prefix",
+    [
+        (
+            "ours",
+            "GitAuto validation (AWS Lambda, Amazon Linux 2023)",
+            "[log source: OUR infrastructure (GitAuto-controlled) — GitAuto validation (AWS Lambda, Amazon Linux 2023)]\n",
+        ),
+        (
+            "theirs",
+            "CircleCI for Foxquilt/foxden-version-controller",
+            "[log source: CUSTOMER infrastructure (their runtime/CI) — CircleCI for Foxquilt/foxden-version-controller]\n",
+        ),
+        (
+            "theirs",
+            "GitHub Actions for Foxquilt/foxden-version-controller",
+            "[log source: CUSTOMER infrastructure (their runtime/CI) — GitHub Actions for Foxquilt/foxden-version-controller]\n",
+        ),
+        (
+            "theirs",
+            "Codecov coverage report for Foxquilt/foxden-version-controller",
+            "[log source: CUSTOMER infrastructure (their runtime/CI) — Codecov coverage report for Foxquilt/foxden-version-controller]\n",
+        ),
+    ],
+    ids=["lambda_ours", "circleci_theirs", "github_actions_theirs", "codecov_theirs"],
+)
+def test_known_sources_produce_expected_prefix(ownership, source, expected_prefix):
+    result = label_log_source("whatever", ownership, source)
+    assert result.startswith(expected_prefix)
+
+
+def test_pr203_scenario_lambda_error_tagged_as_ours():
+    # Real Lambda error from Foxquilt PR #203 (CloudWatch 2026-04-21 14:01:55).
+    # Without the ownership prefix an agent could read the inner word "CircleCI"-free mention of libcrypto/MongoDB and still decide the fix belongs in the customer's repo. With "OUR infrastructure" at the front, the source is unambiguous on the first token.
+    lambda_error = (
+        "Starting the MongoMemoryServer Instance failed, enable debug log for more information. Error:\n"
+        ' StdoutInstanceError: Instance failed to start because a library is missing or cannot be opened: "libcrypto.so.10"\n'
+        "error Command failed with exit code 1."
+    )
+    labelled = label_log_source(
+        lambda_error, "ours", "GitAuto validation (AWS Lambda, Amazon Linux 2023)"
+    )
+    first_line, rest = labelled.split("\n", 1)
+    assert first_line == (
+        "[log source: OUR infrastructure (GitAuto-controlled) — "
+        "GitAuto validation (AWS Lambda, Amazon Linux 2023)]"
+    )
+    assert rest == lambda_error
diff --git a/uv.lock b/uv.lock
index 8d2e5c76b..1de273600 100644
--- a/uv.lock
+++ b/uv.lock
@@ -596,7 +596,7 @@ wheels = [
 
 [[package]]
 name = "gitauto"
-version = "1.55.5"
+version = "1.57.0"
 source = { virtual = "." }
 dependencies = [
     { name = "annotated-doc" },