Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion src/app/endpoints/rlsapi_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,8 +353,22 @@ def _queue_splunk_event( # pylint: disable=too-many-arguments,too-many-position
response_text: str,
inference_time: float,
sourcetype: str,
input_tokens: int = 0,
output_tokens: int = 0,
) -> None:
"""Build and queue a Splunk telemetry event for background sending."""
"""Build and queue a Splunk telemetry event for background sending.

Args:
background_tasks: FastAPI background task manager.
infer_request: Original rlsapi v1 inference request.
request: FastAPI request object used to resolve identity context.
request_id: Unique identifier for the request.
response_text: Response text to include in the telemetry event.
inference_time: Request processing duration in seconds.
sourcetype: Splunk sourcetype to use when sending the event.
input_tokens: Number of prompt tokens consumed by the LLM call.
output_tokens: Number of completion tokens produced by the LLM call.
"""
org_id, system_id = _get_rh_identity_context(request)
systeminfo = infer_request.context.systeminfo

Expand All @@ -370,6 +384,8 @@ def _queue_splunk_event( # pylint: disable=too-many-arguments,too-many-position
system_os=systeminfo.os,
system_version=systeminfo.version,
system_arch=systeminfo.arch,
input_tokens=input_tokens,
output_tokens=output_tokens,
)

event = build_inference_event(event_data)
Expand Down Expand Up @@ -754,6 +770,8 @@ async def infer_endpoint( # pylint: disable=R0914
response_text,
inference_time,
"infer_with_llm",
input_tokens=token_usage.input_tokens,
output_tokens=token_usage.output_tokens,
)

logger.info("Completed rlsapi v1 /infer request %s", request_id)
Expand Down
5 changes: 3 additions & 2 deletions src/observability/formats/rlsapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ class InferenceEventData: # pylint: disable=too-many-instance-attributes
system_os: str
system_version: str
system_arch: str
input_tokens: int = 0
output_tokens: int = 0


def build_inference_event(data: InferenceEventData) -> dict[str, Any]:
Expand All @@ -47,8 +49,7 @@ def build_inference_event(data: InferenceEventData) -> dict[str, Any]:
"deployment": configuration.deployment_environment,
"org_id": data.org_id,
"system_id": data.system_id,
# Token counting not yet implemented in lightspeed-stack; rlsapi uses 0 as default
"total_llm_tokens": 0,
"total_llm_tokens": data.input_tokens + data.output_tokens,
"request_id": data.request_id,
"cla_version": data.cla_version,
"system_os": data.system_os,
Expand Down
20 changes: 20 additions & 0 deletions tests/integration/endpoints/test_rlsapi_v1_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ def _setup_responses_mock(
"""Set up responses.create mock with the given response text."""
mock_response = mocker.Mock()
mock_response.output = [_create_mock_response_output(mocker, response_text)]
mock_usage = mocker.Mock()
mock_usage.input_tokens = 10
mock_usage.output_tokens = 5
mock_response.usage = mock_usage

mock_responses = mocker.Mock()
mock_responses.create = mocker.AsyncMock(return_value=mock_response)
Expand Down Expand Up @@ -303,6 +307,10 @@ async def test_rlsapi_v1_infer_fallback_response_empty_output(

mock_response = mocker.Mock()
mock_response.output = []
mock_usage = mocker.Mock()
mock_usage.input_tokens = 10
mock_usage.output_tokens = 5
mock_response.usage = mock_usage

mock_responses = mocker.Mock()
mock_responses.create = mocker.AsyncMock(return_value=mock_response)
Expand Down Expand Up @@ -342,6 +350,10 @@ async def test_rlsapi_v1_infer_input_source_combination(

mock_response = mocker.Mock()
mock_response.output = [_create_mock_response_output(mocker, "response text")]
mock_usage = mocker.Mock()
mock_usage.input_tokens = 10
mock_usage.output_tokens = 5
mock_response.usage = mock_usage

mock_responses = mocker.Mock()
mock_responses.create = mocker.AsyncMock(return_value=mock_response)
Expand Down Expand Up @@ -401,6 +413,10 @@ async def test_rlsapi_v1_infer_no_mcp_servers_passes_empty_tools(

mock_response = mocker.Mock()
mock_response.output = [_create_mock_response_output(mocker, "response text")]
mock_usage = mocker.Mock()
mock_usage.input_tokens = 10
mock_usage.output_tokens = 5
mock_response.usage = mock_usage

mock_responses = mocker.Mock()
mock_responses.create = mocker.AsyncMock(return_value=mock_response)
Expand Down Expand Up @@ -442,6 +458,10 @@ async def test_rlsapi_v1_infer_mcp_tools_passed_to_llm(

mock_response = mocker.Mock()
mock_response.output = [_create_mock_response_output(mocker, "enriched response")]
mock_usage = mocker.Mock()
mock_usage.input_tokens = 10
mock_usage.output_tokens = 5
mock_response.usage = mock_usage

mock_responses = mocker.Mock()
mock_responses.create = mocker.AsyncMock(return_value=mock_response)
Expand Down
8 changes: 8 additions & 0 deletions tests/unit/app/endpoints/test_rlsapi_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ def mock_llm_response_fixture(mocker: MockerFixture) -> None:
mock_response.output = [
_create_mock_response_output(mocker, "This is a test LLM response.")
]
mock_usage = mocker.Mock()
mock_usage.input_tokens = 10
mock_usage.output_tokens = 5
mock_response.usage = mock_usage
_setup_responses_mock(mocker, mocker.AsyncMock(return_value=mock_response))


Expand All @@ -124,6 +128,10 @@ def mock_empty_llm_response_fixture(mocker: MockerFixture) -> None:
"""Mock responses.create to return empty output list."""
mock_response = mocker.Mock()
mock_response.output = []
mock_usage = mocker.Mock()
mock_usage.input_tokens = 10
mock_usage.output_tokens = 5
mock_response.usage = mock_usage
_setup_responses_mock(mocker, mocker.AsyncMock(return_value=mock_response))


Expand Down
27 changes: 27 additions & 0 deletions tests/unit/observability/formats/test_rlsapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,33 @@ def test_builds_event_with_all_fields(
assert event["total_llm_tokens"] == 0


def test_builds_event_with_token_counts(mocker: MockerFixture) -> None:
"""Test total_llm_tokens is computed from input and output token counts."""
mocker.patch(
"observability.formats.rlsapi.configuration"
).deployment_environment = "production"

data = InferenceEventData(
question="test",
response="test",
inference_time=1.0,
model="test-model",
org_id="org1",
system_id="sys1",
request_id="req_1",
cla_version="CLA/1.0",
system_os="RHEL",
system_version="9.4",
system_arch="x86_64",
input_tokens=150,
output_tokens=75,
)

event = build_inference_event(data)

assert event["total_llm_tokens"] == 225


def test_handles_auth_disabled_values(mocker: MockerFixture) -> None:
"""Test event handles auth_disabled placeholder values."""
data = InferenceEventData(
Expand Down
Loading