Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "GitAuto"
version = "1.55.5"
version = "1.57.0"
requires-python = ">=3.14"
dependencies = [
"annotated-doc==0.0.4",
Expand Down
26 changes: 19 additions & 7 deletions services/webhook/check_suite_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
from utils.logging.logging_config import logger, set_pr_number
from utils.logs.clean_logs import clean_logs
from utils.logs.detect_infra_failure import detect_infra_failure
from utils.logs.label_log_source import label_log_source
from utils.logs.normalize_log_for_hashing import normalize_log_for_hashing
from utils.logs.save_ci_log_to_file import (
CI_LOG_PATH,
Expand Down Expand Up @@ -116,8 +117,8 @@ async def handle_check_suite(
logger.info("Duplicate check_suite_id=%s ignored", check_suite_id)
return

# Check if this is a GitAuto PR by branch name (early return)
# head_branch can be None when:
# Check if this is a GitAuto PR by branch name. Return early when it is not.
# `head_branch` can be None when:
# - Check suite runs on a tag push (tags don't have branches)
# - Check suite runs on a deleted branch
# - Check suite runs on a direct commit without PR
Expand Down Expand Up @@ -478,14 +479,21 @@ async def handle_check_suite(
len(validation_result.errors),
)
pre_existing_errors = "\n".join(validation_result.errors)
pre_existing_errors = label_log_source(
pre_existing_errors,
"ours",
"GitAuto validation (AWS Lambda, Amazon Linux 2023)",
)
logger.warning("Remaining errors:\n%s", pre_existing_errors)
fixes_applied = validation_result.fixes_applied

# Get the error log from the workflow run
# Get the error log from the workflow run.
ci_log_source = ""
if is_circleci:
logger.info(
"check_suite using CircleCI path for workflow %s", circleci_workflow_id
)
ci_log_source = f"CircleCI for {owner_name}/{repo_name}"
circleci_token = get_circleci_token(owner_id)

if not circleci_token:
Expand Down Expand Up @@ -547,6 +555,7 @@ async def handle_check_suite(
)
elif is_codecov:
logger.info("check_suite using Codecov path for check_run=%s", check_run_name)
ci_log_source = f"Codecov coverage report for {owner_name}/{repo_name}" # still "theirs" — coverage for the customer's repo
# See payloads/github/check_run/codecov_check_run_output.json
output = check_run.get("output", {})
title = output.get("title", "")
Expand Down Expand Up @@ -609,6 +618,7 @@ async def handle_check_suite(
logger.info(
"check_suite using GitHub Actions path for run_id=%s", github_run_id
)
ci_log_source = f"GitHub Actions for {owner_name}/{repo_name}"
# GitHub Actions log retrieval (existing logic)
error_log = get_workflow_run_logs(
owner=owner_name, repo=repo_name, run_id=github_run_id, token=token
Expand Down Expand Up @@ -860,6 +870,9 @@ async def handle_check_suite(
)
ci_log_value = minimized_log

# Tag the customer CI log with its source so the agent doesn't confuse it with errors produced inside our own Lambda.
ci_log_value = label_log_source(ci_log_value, "theirs", ci_log_source)

# Truncate patch — it sits in the first message and repeats in every LLM call
max_patch_chars = 1000
for f in changed_files:
Expand Down Expand Up @@ -1054,10 +1067,9 @@ async def handle_check_suite(
update_comment(body=comment_body, base_args=base_args)
pushed = create_empty_commit(base_args=base_args)

# Update final comment. Do NOT include CHECK_RUN_FAILED_MESSAGE here — that marker
# is a concurrency lock set at line 317 when processing starts. Clearing it here
# releases the lock so the next check_suite webhook can proceed. The error hash
# dedup at line 557 prevents re-attempting the same error.
# Update final comment. Do NOT include CHECK_RUN_FAILED_MESSAGE here — that marker is a concurrency lock set at line 317 when processing starts.
# Clearing it here releases the lock so the next check_suite webhook can proceed.
# The error hash dedup at line 557 prevents re-attempting the same error.
if not pushed:
logger.warning(
"Final empty commit skipped on %s: concurrent push detected; posting respect-the-push message",
Expand Down
37 changes: 37 additions & 0 deletions services/webhook/test_check_suite_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from services.agents.verify_task_is_complete import VerifyTaskIsCompleteResult
from services.chat_with_agent import AgentResult
from services.webhook.check_suite_handler import handle_check_suite
from utils.logs.label_log_source import label_log_source


@pytest.fixture(autouse=True)
Expand Down Expand Up @@ -1767,6 +1768,42 @@ def test_patch_truncation_in_changed_files():
assert serialized.count("5,000 chars total") == 1


def test_check_suite_handler_imports_label_log_source():
# Smoke test: the handler must import label_log_source or the runtime-provenance tagging isn't actually wired.
# Without this, a future refactor could silently drop the import and logs would lose their `[log source: ...]` header — the exact regression that produced Foxquilt PR #203.
from services.webhook import check_suite_handler # pylint: disable=import-outside-toplevel

assert hasattr(check_suite_handler, "label_log_source")


def test_ci_log_source_strings_produce_expected_agent_input():
# The handler builds `ci_log_source` as an inline f-string in each dispatch branch (CircleCI / Codecov / GitHub Actions), then passes the log through `label_log_source` with ownership="theirs". For Lambda validation errors the handler uses ownership="ours". This test pins the exact header text the agent will see, so a format drift in either the label or the source-string template fails here instead of silently confusing the agent in production.
owner = "Foxquilt"
repo = "foxden-version-controller"
raw_log = "FAIL test/spec/create-express-server.spec.ts"

circleci_source = f"CircleCI for {owner}/{repo}"
codecov_source = f"Codecov coverage report for {owner}/{repo}"
gha_source = f"GitHub Actions for {owner}/{repo}"
lambda_source = "GitAuto validation (AWS Lambda, Amazon Linux 2023)"

them = "CUSTOMER infrastructure (their runtime/CI)"
us = "OUR infrastructure (GitAuto-controlled)"

assert label_log_source(raw_log, "theirs", circleci_source) == (
f"[log source: {them} — {circleci_source}]\n{raw_log}"
)
assert label_log_source(raw_log, "theirs", codecov_source) == (
f"[log source: {them} — {codecov_source}]\n{raw_log}"
)
assert label_log_source(raw_log, "theirs", gha_source) == (
f"[log source: {them} — {gha_source}]\n{raw_log}"
)
assert label_log_source(raw_log, "ours", lambda_source) == (
f"[log source: {us} — {lambda_source}]\n{raw_log}"
)


def test_agent_result_concurrent_push_field_defaults_false():
"""check_suite_handler breaks its agent loop and skips the final empty commit
when AgentResult.concurrent_push_detected is True. Verify the new field
Expand Down
34 changes: 34 additions & 0 deletions utils/logs/label_log_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from typing import Literal

# Local imports
from utils.error.handle_exceptions import handle_exceptions
from utils.logging.logging_config import logger

# "ours" = produced inside GitAuto-controlled infrastructure (our Lambda, our CodeBuild, etc.) — we can edit GitAuto source to fix it, never touch customer CI.
# "theirs" = produced inside customer infrastructure (their CircleCI / GitHub Actions / Codecov run) — we fix by editing customer code or their CI config, never by editing GitAuto infrastructure.
LogOwnership = Literal["ours", "theirs"]

_OWNERSHIP_HEADERS: dict[LogOwnership, str] = {
"ours": "OUR infrastructure (GitAuto-controlled)",
"theirs": "CUSTOMER infrastructure (their runtime/CI)",
}


@handle_exceptions(default_return_value="", raise_on_error=False)
def label_log_source(log: str, ownership: LogOwnership, source: str):
"""Prepend a single-line runtime-provenance header to a log so the agent can't confuse our environment with a customer's.
Real incident: Foxquilt/foxden-version-controller PR #203 — an unlabelled Lambda error got misread as a CircleCI problem, and the agent pushed `MONGOMS_DISTRO=ubuntu-22.04` into the customer's `.circleci/config.yml`.
Saying just "CircleCI" or "Lambda" isn't enough — the agent can still flip the mapping. Every log must say outright whether it came from OUR or CUSTOMER infrastructure.
"""
if not log:
logger.info("label_log_source: empty log, returning empty string")
return ""

owner_label = _OWNERSHIP_HEADERS[ownership]
logger.info(
"label_log_source: tagging %d chars of log with ownership=%s source=%s",
len(log),
ownership,
source,
)
return f"[log source: {owner_label} — {source}]\n{log}"
100 changes: 100 additions & 0 deletions utils/logs/test_label_log_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import pytest

from utils.logs.label_log_source import label_log_source


def test_ours_header_says_our_infrastructure_in_all_caps():
# The explicit "OUR" / "CUSTOMER" framing is the whole point of this function; a tag that just said "GitAuto Lambda" could still be misread as "Lambda must be the customer's Lambda". All-caps ownership prefix prevents that flip.
result = label_log_source(
"some lambda error",
"ours",
"GitAuto validation (AWS Lambda, Amazon Linux 2023)",
)
assert result.startswith("[log source: OUR infrastructure (GitAuto-controlled) — ")


def test_theirs_header_says_customer_infrastructure_in_all_caps():
result = label_log_source(
"some ci error", "theirs", "CircleCI for Foxquilt/foxden-version-controller"
)
assert result.startswith(
"[log source: CUSTOMER infrastructure (their runtime/CI) — "
)


def test_full_tagged_format_for_ours():
assert label_log_source("E1", "ours", "X") == (
"[log source: OUR infrastructure (GitAuto-controlled) — X]\nE1"
)


def test_full_tagged_format_for_theirs():
assert label_log_source("E2", "theirs", "Y") == (
"[log source: CUSTOMER infrastructure (their runtime/CI) — Y]\nE2"
)


def test_multiline_log_preserved():
log = "line one\nline two\nline three"
result = label_log_source(log, "theirs", "GitHub Actions for owner/repo")
assert result == (
"[log source: CUSTOMER infrastructure (their runtime/CI) — "
"GitHub Actions for owner/repo]\n"
"line one\nline two\nline three"
)


def test_empty_log_returns_empty():
# The webhook handler only attaches non-empty logs; no reason to tag an empty string.
assert label_log_source("", "ours", "anything") == ""
assert label_log_source("", "theirs", "anything") == ""


@pytest.mark.parametrize(
"ownership, source, expected_prefix",
[
(
"ours",
"GitAuto validation (AWS Lambda, Amazon Linux 2023)",
"[log source: OUR infrastructure (GitAuto-controlled) — GitAuto validation (AWS Lambda, Amazon Linux 2023)]\n",
),
(
"theirs",
"CircleCI for Foxquilt/foxden-version-controller",
"[log source: CUSTOMER infrastructure (their runtime/CI) — CircleCI for Foxquilt/foxden-version-controller]\n",
),
(
"theirs",
"GitHub Actions for Foxquilt/foxden-version-controller",
"[log source: CUSTOMER infrastructure (their runtime/CI) — GitHub Actions for Foxquilt/foxden-version-controller]\n",
),
(
"theirs",
"Codecov coverage report for Foxquilt/foxden-version-controller",
"[log source: CUSTOMER infrastructure (their runtime/CI) — Codecov coverage report for Foxquilt/foxden-version-controller]\n",
),
],
ids=["lambda_ours", "circleci_theirs", "github_actions_theirs", "codecov_theirs"],
)
def test_known_sources_produce_expected_prefix(ownership, source, expected_prefix):
result = label_log_source("whatever", ownership, source)
assert result.startswith(expected_prefix)


def test_pr203_scenario_lambda_error_tagged_as_ours():
# Real Lambda error from Foxquilt PR #203 (CloudWatch 2026-04-21 14:01:55).
# Without the ownership prefix an agent could read the inner word "CircleCI"-free mention of libcrypto/MongoDB and still decide the fix belongs in the customer's repo. With "OUR infrastructure" at the front, the source is unambiguous on the first token.
lambda_error = (
"Starting the MongoMemoryServer Instance failed, enable debug log for more information. Error:\n"
' StdoutInstanceError: Instance failed to start because a library is missing or cannot be opened: "libcrypto.so.10"\n'
"error Command failed with exit code 1."
)
labelled = label_log_source(
lambda_error, "ours", "GitAuto validation (AWS Lambda, Amazon Linux 2023)"
)
first_line, rest = labelled.split("\n", 1)
assert first_line == (
"[log source: OUR infrastructure (GitAuto-controlled) — "
"GitAuto validation (AWS Lambda, Amazon Linux 2023)]"
)
assert rest == lambda_error
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading