diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index 84c8254ca..c1d2c483c 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -3,6 +3,7 @@ ## Release v0.105.0 ### New Features and Improvements +* Added automatic detection of AI coding agents (Amp, Antigravity, Augment, Claude Code, Cline, Codex, Copilot CLI, Copilot VS Code, Cursor, Gemini CLI, Goose, Kiro, OpenClaw, OpenCode, Windsurf) in the user-agent string. The SDK now appends `agent/` to HTTP request headers when running inside a known AI agent environment. Also honors the `AGENT=` standard: when `AGENT` is set to a known product name the SDK reports that product, and when set to an unrecognized non-empty value the SDK reports `agent/unknown`. Environment variables set to the empty string (e.g. `CLAUDECODE=""`) now count as "set" for presence-only matchers, matching `databricks-sdk-go` semantics; previously they were treated as unset. Explicit agent env vars (e.g. `CLAUDECODE`, `GOOSE_TERMINAL`) always take precedence over the generic `AGENT=` signal. When multiple agent env vars are present (e.g. a Cursor CLI subagent invoked from Claude Code), the user-agent reports `agent/multiple`. ### Bug Fixes diff --git a/databricks-sdk-java/lockfile.json b/databricks-sdk-java/lockfile.json index 68d464015..877c72f01 100644 --- a/databricks-sdk-java/lockfile.json +++ b/databricks-sdk-java/lockfile.json @@ -1,7 +1,7 @@ { "artifactId": "databricks-sdk-java", "groupId": "com.databricks", - "version": "0.103.0", + "version": "0.104.0", "lockFileVersion": 1, "dependencies": [ { diff --git a/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java b/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java index adf153869..8f385009a 100644 --- a/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java +++ b/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java @@ -237,52 +237,109 @@ private static String cicdProvider() { return cicdProvider; } - // Maps an environment variable to an agent product name. - private static class AgentDef { + // Describes a single AI coding agent: the env var that identifies it and the + // product name reported in the user agent. + private static class KnownAgent { private final String envVar; private final String product; - AgentDef(String envVar, String product) { + KnownAgent(String envVar, String product) { this.envVar = envVar; this.product = product; } } + // The agents.md standard env var. When set to a value we don't specifically + // recognize, detection falls back to "unknown". + private static final String AGENT_ENV_VAR = "AGENT"; + // Canonical list of known AI coding agents. // Keep this list in sync with databricks-sdk-go and databricks-sdk-py. - private static List listKnownAgents() { + // Agents are listed alphabetically by product name. + private static List listKnownAgents() { return Arrays.asList( - new AgentDef("ANTIGRAVITY_AGENT", "antigravity"), // Closed source (Google) - new AgentDef("CLAUDECODE", "claude-code"), // https://github.com/anthropics/claude-code - new AgentDef("CLINE_ACTIVE", "cline"), // https://github.com/cline/cline (v3.24.0+) - new AgentDef("CODEX_CI", "codex"), // https://github.com/openai/codex - new AgentDef("COPILOT_CLI", "copilot-cli"), // https://github.com/features/copilot - new AgentDef("CURSOR_AGENT", "cursor"), // Closed source - new AgentDef("GEMINI_CLI", "gemini-cli"), // https://google-gemini.github.io/gemini-cli - new AgentDef("OPENCODE", "opencode"), // https://github.com/opencode-ai/opencode - new AgentDef("OPENCLAW_SHELL", "openclaw")); // https://github.com/anthropics/openclaw + new KnownAgent( + "AMP_CURRENT_THREAD_ID", + "amp"), // https://ampcode.com/ (also sets AGENT=amp, handled centrally) + new KnownAgent("ANTIGRAVITY_AGENT", "antigravity"), // Closed source (Google) + new KnownAgent("AUGMENT_AGENT", "augment"), // https://www.augmentcode.com/ + new KnownAgent("CLAUDECODE", "claude-code"), // https://github.com/anthropics/claude-code + new KnownAgent("CLINE_ACTIVE", "cline"), // https://github.com/cline/cline (v3.24.0+) + new KnownAgent("CODEX_CI", "codex"), // https://github.com/openai/codex + new KnownAgent("COPILOT_CLI", "copilot-cli"), // https://github.com/features/copilot + // VS Code Copilot terminal; best-effort heuristic, not officially identified. + new KnownAgent("COPILOT_MODEL", "copilot-vscode"), + new KnownAgent("CURSOR_AGENT", "cursor"), // Closed source + new KnownAgent("GEMINI_CLI", "gemini-cli"), // https://google-gemini.github.io/gemini-cli + new KnownAgent( + "GOOSE_TERMINAL", + "goose"), // https://block.github.io/goose/ (also sets AGENT=goose, handled centrally) + new KnownAgent("KIRO", "kiro"), // https://kiro.dev/ (Amazon) + new KnownAgent("OPENCLAW_SHELL", "openclaw"), // https://github.com/anthropics/openclaw + new KnownAgent("OPENCODE", "opencode"), // https://github.com/opencode-ai/opencode + new KnownAgent("WINDSURF_AGENT", "windsurf")); // https://codeium.com/windsurf (Codeium) } // Looks up the active agent provider based on environment variables. - // Returns the agent name if exactly one is set (non-empty). - // Returns empty string if zero or multiple agents detected. + // + // Explicit env var matchers (e.g. CLAUDECODE, GOOSE_TERMINAL) always take + // precedence over the generic AGENT= signal. The AGENT env var is + // treated purely as a fallback for agents that have no explicit matcher, or + // for agents we do not yet specifically recognize. + // + // The function counts how many distinct agents matched via explicit env vars: + // - Exactly one agent matched: return its product name. + // - More than one agent matched: return "multiple". Agent env vars can be + // stacked when one agent invokes another as a subagent (e.g. Claude Code + // spawning a Cursor CLI subprocess), so the child process inherits env + // vars from multiple layers. + // - Zero agents matched: if the agents.md standard AGENT env var is set to + // a known product name, return that product name. If it is set to any + // other non-empty value, return "unknown". Otherwise return "". + // + // Because explicit matchers win over AGENT, e.g. AGENT=cursor + CLAUDECODE=1 + // yields "claude-code", and AGENT=goose + CLAUDECODE=1 also yields + // "claude-code". private static String lookupAgentProvider(Environment env) { - String detected = ""; - int count = 0; - for (AgentDef agent : listKnownAgents()) { - String value = env.get(agent.envVar); - if (value != null && !value.isEmpty()) { - detected = agent.product; - count++; - if (count > 1) { - return ""; - } + List agents = listKnownAgents(); + + List matches = new ArrayList<>(); + for (KnownAgent a : agents) { + if (env.get(a.envVar) != null) { + matches.add(a.product); } } - if (count == 1) { - return detected; + + // Known BYOK false positive: Copilot CLI users often set COPILOT_MODEL + // alongside COPILOT_CLI. Treat that pair as a single copilot-cli signal + // rather than a stacked multi-agent setup. + if (matches.contains("copilot-cli") && matches.contains("copilot-vscode")) { + matches.removeIf(m -> m.equals("copilot-vscode")); } - return ""; + + if (matches.size() == 1) { + return matches.get(0); + } + if (matches.size() > 1) { + return "multiple"; + } + return agentEnvFallback(env, agents); + } + + // agentEnvFallback honors the agents.md AGENT= standard. + // Returns the value if it matches a known product name, "unknown" if AGENT + // is set to any other non-empty value, and "" if AGENT is unset or empty. + private static String agentEnvFallback(Environment env, List agents) { + String v = env.get(AGENT_ENV_VAR); + if (v == null || v.isEmpty()) { + return ""; + } + for (KnownAgent a : agents) { + if (a.product.equals(v)) { + return v; + } + } + return "unknown"; } // Thread-safe lazy initialization of agent provider detection diff --git a/databricks-sdk-java/src/test/java/com/databricks/sdk/core/UserAgentTest.java b/databricks-sdk-java/src/test/java/com/databricks/sdk/core/UserAgentTest.java index 54405982c..232409c84 100644 --- a/databricks-sdk-java/src/test/java/com/databricks/sdk/core/UserAgentTest.java +++ b/databricks-sdk-java/src/test/java/com/databricks/sdk/core/UserAgentTest.java @@ -218,6 +218,232 @@ public void testAgentProviderOpenclaw() { Assertions.assertTrue(UserAgent.asString().contains("agent/openclaw")); } + @Test + public void testAgentProviderAmp() { + setupAgentEnv( + new HashMap() { + { + put("AMP_CURRENT_THREAD_ID", "thread-123"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/amp")); + } + + @Test + public void testAgentProviderAugment() { + setupAgentEnv( + new HashMap() { + { + put("AUGMENT_AGENT", "1"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/augment")); + } + + @Test + public void testAgentProviderCopilotVscode() { + setupAgentEnv( + new HashMap() { + { + put("COPILOT_MODEL", "gpt-4"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/copilot-vscode")); + } + + @Test + public void testAgentProviderGoose() { + setupAgentEnv( + new HashMap() { + { + put("GOOSE_TERMINAL", "1"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/goose")); + } + + @Test + public void testAgentProviderKiro() { + setupAgentEnv( + new HashMap() { + { + put("KIRO", "1"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/kiro")); + } + + @Test + public void testAgentProviderWindsurf() { + setupAgentEnv( + new HashMap() { + { + put("WINDSURF_AGENT", "1"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/windsurf")); + } + + @Test + public void testAgentProviderAgentEnvGoose() { + setupAgentEnv( + new HashMap() { + { + put("AGENT", "goose"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/goose")); + } + + @Test + public void testAgentProviderAgentEnvAmp() { + setupAgentEnv( + new HashMap() { + { + put("AGENT", "amp"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/amp")); + } + + @Test + public void testAgentProviderAgentEnvCursor() { + // AGENT=cursor with no cursor-specific env var. Falls through to the + // AGENT fallback and matches "cursor" as a known product name. + setupAgentEnv( + new HashMap() { + { + put("AGENT", "cursor"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/cursor")); + } + + @Test + public void testAgentProviderKnownMatcherWinsOverAgentFallback() { + // Known matchers always win over the AGENT fallback. AGENT=somethingweird + // alone would yield "unknown", but CLAUDECODE=1 takes precedence. + setupAgentEnv( + new HashMap() { + { + put("AGENT", "somethingweird"); + put("CLAUDECODE", "1"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/claude-code")); + Assertions.assertFalse(UserAgent.asString().contains("agent/unknown")); + } + + @Test + public void testAgentProviderGooseBothMatchers() { + // GOOSE_TERMINAL and AGENT=goose both fire the goose matcher. Since they + // both identify the same agent, this is NOT ambiguous. + setupAgentEnv( + new HashMap() { + { + put("GOOSE_TERMINAL", "1"); + put("AGENT", "goose"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/goose")); + } + + @Test + public void testAgentProviderAmpBothMatchers() { + // AMP_CURRENT_THREAD_ID and AGENT=amp both identify amp, not ambiguous. + setupAgentEnv( + new HashMap() { + { + put("AMP_CURRENT_THREAD_ID", "thread-123"); + put("AGENT", "amp"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/amp")); + } + + @Test + public void testAgentProviderAgentEnvUnknown() { + setupAgentEnv( + new HashMap() { + { + put("AGENT", "someweirdthing"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/unknown")); + } + + @Test + public void testAgentProviderAgentEnvEmpty() { + // AGENT="" should not trigger the unknown fallback. + setupAgentEnv( + new HashMap() { + { + put("AGENT", ""); + } + }); + Assertions.assertFalse(UserAgent.asString().contains("agent/")); + } + + @Test + public void testAgentProviderExplicitEnvWinsOverAgentEnv() { + // CLAUDECODE=1 is an explicit matcher and wins over AGENT=goose (which + // is only consulted as a fallback when no explicit matcher fires). + setupAgentEnv( + new HashMap() { + { + put("AGENT", "goose"); + put("CLAUDECODE", "1"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/claude-code")); + } + + @Test + public void testAgentProviderExplicitEnvWinsOverKnownAgentEnv() { + // GOOSE_TERMINAL=1 is an explicit matcher; AGENT=cursor (even though + // "cursor" is a known product name) is ignored because an explicit + // matcher already fired. + setupAgentEnv( + new HashMap() { + { + put("GOOSE_TERMINAL", "1"); + put("AGENT", "cursor"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/goose")); + Assertions.assertFalse(UserAgent.asString().contains("agent/cursor")); + } + + @Test + public void testAgentProviderCopilotCliAndCopilotVscodeCollapseToCopilotCli() { + // Copilot CLI users (BYOK mode) often set COPILOT_MODEL alongside + // COPILOT_CLI. Treat the pair as a single copilot-cli signal rather + // than a stacked multi-agent setup. + setupAgentEnv( + new HashMap() { + { + put("COPILOT_CLI", "1"); + put("COPILOT_MODEL", "gpt-4"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/copilot-cli")); + } + + @Test + public void testAgentProviderCopilotByokCollapseStillMultiple() { + // The Copilot BYOK collapse only drops the copilot-vscode match. If + // another agent is also present, the result is still "multiple". + setupAgentEnv( + new HashMap() { + { + put("COPILOT_CLI", "1"); + put("COPILOT_MODEL", "gpt-4"); + put("CLAUDECODE", "1"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/multiple")); + } + @Test public void testAgentProviderNoAgent() { setupAgentEnv(new HashMap<>()); @@ -226,6 +452,8 @@ public void testAgentProviderNoAgent() { @Test public void testAgentProviderMultipleAgents() { + // Nested agents (e.g. Claude Code spawning a Cursor CLI subagent) set + // multiple explicit matchers on the same process. setupAgentEnv( new HashMap() { { @@ -233,18 +461,33 @@ public void testAgentProviderMultipleAgents() { put("CURSOR_AGENT", "1"); } }); - Assertions.assertFalse(UserAgent.asString().contains("agent/")); + Assertions.assertTrue(UserAgent.asString().contains("agent/multiple")); + } + + @Test + public void testAgentProviderThreeStackedAgents() { + setupAgentEnv( + new HashMap() { + { + put("CLAUDECODE", "1"); + put("CURSOR_AGENT", "1"); + put("AUGMENT_AGENT", "1"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/multiple")); } @Test - public void testAgentProviderEmptyValue() { + public void testAgentProviderEmptyValueStillSet() { + // Empty string still counts as "set" for presence-only matchers, + // matching databricks-sdk-go semantics. setupAgentEnv( new HashMap() { { put("CLAUDECODE", ""); } }); - Assertions.assertFalse(UserAgent.asString().contains("agent/")); + Assertions.assertTrue(UserAgent.asString().contains("agent/claude-code")); } @Test diff --git a/lockfile.json b/lockfile.json index 31056496f..a795f157e 100644 --- a/lockfile.json +++ b/lockfile.json @@ -1,7 +1,7 @@ { "artifactId": "databricks-sdk-parent", "groupId": "com.databricks", - "version": "0.103.0", + "version": "0.104.0", "lockFileVersion": 1, "dependencies": [], "mavenPlugins": [], @@ -9,7 +9,7 @@ "environment": { "osName": "Mac OS X", "mavenVersion": "3.9.14", - "javaVersion": "25.0.2" + "javaVersion": "11.0.12" }, "config": { "includeMavenPlugins": false,