From f25de350ac517961bad2e1f2baf5174203884129 Mon Sep 17 00:00:00 2001
From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com>
Date: Mon, 20 Apr 2026 14:19:52 +0200
Subject: [PATCH] feat: add anonymous feature-usage telemetry

New `bitsandbytes._telemetry.report_feature()` sends one event per distinct
feature per process via `huggingface_hub.utils.send_telemetry()`, mirroring
the pattern Transformers uses for its `quant` user-agent field. Data lands
in the Hub telemetry index under `path_prefix=/api/telemetry/bitsandbytes/`
and informs which features are worth maintaining or retiring.

Wired at: Linear4bit/Linear8bitLt forward, Params4bit/Int8Params __new__,
all Embedding variants, Optimizer8bit.step, GlobalOptimManager overrides,
OutlierAwareLinear and int8_double_quant (deprecation candidates).

All metadata keys namespaced under `bitsandbytes.*`. Fingerprint carries
bnb version, OS, arch, libc, Python/torch versions, and accelerator vendor
/ name / arch / count. No model names, file paths, or user-derived values
are ever sent.

Opt-out via BNB_DISABLE_TELEMETRY, HF_HUB_DISABLE_TELEMETRY, or
HF_HUB_OFFLINE. Auto-disabled under pytest so CI and local test runs don't
pollute the real-usage stream. Silent no-op when huggingface_hub is not
installed.

End-to-end verification: `scripts/verify_telemetry.py` emits every feature
once tagged with a unique run_id via BNB_TELEMETRY_TAG, for correlation in
Elasticsearch queries on `ds-hub-telemetry`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 README.md                       |  37 +++++
 bitsandbytes/_telemetry.py      | 231 ++++++++++++++++++++++++++++++++
 bitsandbytes/functional.py      |   3 +
 bitsandbytes/nn/modules.py      |  34 +++++
 bitsandbytes/optim/optimizer.py |  16 +++
 scripts/verify_telemetry.py     | 117 ++++++++++++++++
 tests/test_telemetry.py         | 179 +++++++++++++++++++++++++
 7 files changed, 617 insertions(+)
 create mode 100644 bitsandbytes/_telemetry.py
 create mode 100644 scripts/verify_telemetry.py
 create mode 100644 tests/test_telemetry.py

diff --git a/README.md b/README.md
index b4fd29b3a..f641d76e0 100644
--- a/README.md
+++ b/README.md
@@ -183,6 +183,43 @@ bitsandbytes has the following minimum requirements for all platforms:
 * 🤗 [Diffusers](https://huggingface.co/docs/diffusers/quantization/bitsandbytes)
 * 🤗 [PEFT](https://huggingface.co/docs/peft/developer_guides/quantization#quantize-a-model)
 
+## Telemetry
+
+`bitsandbytes` sends anonymous, aggregate feature-usage telemetry to the
+Hugging Face Hub. This data is used to prioritize maintenance (which quantization
+methods and optimizers are actually in use?) and to safely retire features that
+are no longer called by anyone.
+
+### What is collected
+
+* A session fingerprint sent once per process: `bitsandbytes` version, OS
+  name/version, CPU architecture, Python/PyTorch versions, accelerator
+  vendor/name/arch/count (e.g. `nvidia`, `NVIDIA H100`, `sm_90`, `1`).
+* One event per distinct feature used, with feature-specific flags. For
+  example: using `Linear4bit` sends `quant_type=nf4`, `blocksize=64`; using
+  `AdamW8bit.step()` sends `name=AdamW8bit`, `is_paged=false`.
+
+### What is never collected
+
+Model names, file paths, tensor shapes, parameter values, user identifiers, or
+anything derived from user input.
+
+### How to opt out
+
+Set any one of these environment variables:
+
+| Variable                     | Scope                        |
+| ---------------------------- | ---------------------------- |
+| `BNB_DISABLE_TELEMETRY=1`    | `bitsandbytes` only          |
+| `HF_HUB_DISABLE_TELEMETRY=1` | all Hugging Face libraries   |
+| `HF_HUB_OFFLINE=1`           | all Hugging Face libraries   |
+
+Telemetry is also automatically suppressed while running under `pytest` (so
+CI and local test runs don't pollute the stream) and a silent no-op when
+`huggingface_hub` is not installed. The implementation lives in
+[`bitsandbytes/_telemetry.py`](bitsandbytes/_telemetry.py) and each event
+fires at most once per process.
+
 ## :heart: Sponsors
 The continued maintenance and development of `bitsandbytes` is made possible thanks to the generous support of our sponsors. Their contributions help ensure that we can keep improving the project and delivering valuable updates to the community.
 
diff --git a/bitsandbytes/_telemetry.py b/bitsandbytes/_telemetry.py
new file mode 100644
index 000000000..0063e3413
--- /dev/null
+++ b/bitsandbytes/_telemetry.py
@@ -0,0 +1,231 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""Anonymous feature-usage telemetry for bitsandbytes.
+
+Sends one HEAD request per distinct feature per process via
+`huggingface_hub.utils.send_telemetry()`. Data lands in the Hugging Face
+Hub telemetry index under `path_prefix == "/api/telemetry/bitsandbytes/"`
+and informs maintenance and deprecation decisions.
+
+What is collected
+    - Session fingerprint (once per process, first feature use):
+      bnb version, OS name/version, CPU arch, glibc version, Python/torch
+      versions, accelerator vendor/name/arch/count.
+    - Per-feature events: feature name plus feature-specific metadata
+      (e.g. `quant_type="nf4"`, `bits="8"`, `paged="true"`).
+
+What is NOT collected
+    Model names, file paths, parameter shapes, user identifiers, training
+    data, gradient values, or any value derived from user input.
+
+Automatically disabled when running under pytest (detected via
+`pytest` in `sys.modules` or `PYTEST_CURRENT_TEST` env var) so that test
+runs in CI and locally do not pollute the real-usage stream.
+
+Opt-out (any of the following env vars disables all telemetry):
+    - BNB_DISABLE_TELEMETRY=1           (bitsandbytes only)
+    - HF_HUB_DISABLE_TELEMETRY=1        (all HF libraries)
+    - HF_HUB_OFFLINE=1                  (all HF libraries)
+
+End-to-end verification:
+    Set `BNB_TELEMETRY_TAG=<some-id>` before importing bitsandbytes and the
+    value is attached as `bitsandbytes.tag` on every event. Use this to
+    correlate a single run's events in ES.
+
+No-ops silently if `huggingface_hub` is not installed, and never raises.
+
+Keys are namespaced under `bitsandbytes.*` in the resulting
+`metadata.bitsandbytes.*` fields so they do not collide with fields logged
+by other libraries in the shared telemetry index.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import platform
+import sys
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+_REPORTED: set[str] = set()
+_FINGERPRINT: Optional[dict[str, str]] = None
+
+_TRUTHY = frozenset({"1", "true", "yes", "on"})
+
+
+def _is_pytest() -> bool:
+    """Detect whether we are running inside a pytest process.
+
+    Telemetry is suppressed during test runs so that CI and local test
+    invocations don't pollute the real-usage stream. Tests that want to
+    assert on telemetry behavior monkey-patch this function to return False.
+    """
+    return "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
+
+
+def _is_disabled() -> bool:
+    for var in ("BNB_DISABLE_TELEMETRY", "HF_HUB_DISABLE_TELEMETRY", "HF_HUB_OFFLINE"):
+        if os.environ.get(var, "").strip().lower() in _TRUTHY:
+            return True
+    if _is_pytest():
+        return True
+    return False
+
+
+def _os_info() -> tuple[str, str]:
+    os_name = platform.system()
+    os_name = {"Darwin": "macOS"}.get(os_name, os_name)
+    if os_name == "Windows":
+        try:
+            build = sys.getwindowsversion().build
+            os_version = f"11 (build {build})" if build >= 22000 else f"10 (build {build})"
+        except Exception:
+            os_version = platform.release()
+    elif os_name == "macOS":
+        os_version = platform.mac_ver()[0] or platform.release()
+    else:
+        os_version = platform.release()
+    return os_name, os_version
+
+
+def _accel_info() -> dict[str, str]:
+    info: dict[str, str] = {}
+    try:
+        import torch
+    except ImportError:
+        info["bitsandbytes.accel"] = "unknown"
+        return info
+
+    try:
+        if torch.cuda.is_available():
+            vendor = "amd" if getattr(torch.version, "hip", None) else "nvidia"
+            info["bitsandbytes.accel"] = vendor
+            info["bitsandbytes.accel_count"] = str(torch.cuda.device_count())
+            props = torch.cuda.get_device_properties(0)
+            info["bitsandbytes.accel_name"] = props.name
+            if vendor == "nvidia":
+                info["bitsandbytes.accel_arch"] = f"sm_{props.major}{props.minor}"
+            else:
+                info["bitsandbytes.accel_arch"] = getattr(props, "gcnArchName", "unknown")
+            return info
+
+        if hasattr(torch, "xpu") and torch.xpu.is_available():
+            info["bitsandbytes.accel"] = "xpu"
+            info["bitsandbytes.accel_count"] = str(torch.xpu.device_count())
+            try:
+                info["bitsandbytes.accel_name"] = torch.xpu.get_device_properties(0).name
+            except Exception:
+                pass
+            return info
+
+        if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+            info["bitsandbytes.accel"] = "mps"
+            return info
+
+        if hasattr(torch, "hpu") and torch.hpu.is_available():
+            info["bitsandbytes.accel"] = "hpu"
+            return info
+    except Exception:
+        pass
+
+    info["bitsandbytes.accel"] = "cpu"
+    return info
+
+
+def _fingerprint() -> dict[str, str]:
+    global _FINGERPRINT
+    if _FINGERPRINT is not None:
+        return _FINGERPRINT
+
+    try:
+        import bitsandbytes
+
+        version = bitsandbytes.__version__
+    except Exception:
+        version = "unknown"
+
+    os_name, os_version = _os_info()
+    info = {
+        "bitsandbytes.version": version,
+        "bitsandbytes.os": os_name,
+        "bitsandbytes.os_version": os_version,
+        "bitsandbytes.arch": platform.machine(),
+        "bitsandbytes.python": platform.python_version(),
+    }
+    if os_name == "Linux":
+        try:
+            libc_name, libc_ver = platform.libc_ver()
+            if libc_name:
+                info["bitsandbytes.libc"] = f"{libc_name}-{libc_ver}"
+        except Exception:
+            pass
+    try:
+        import torch
+
+        info["bitsandbytes.torch"] = torch.__version__
+    except ImportError:
+        pass
+
+    info.update(_accel_info())
+
+    _FINGERPRINT = info
+    return info
+
+
+def report_feature(feature: str, details: Optional[dict[str, object]] = None) -> None:
+    """Report that a bitsandbytes feature was used.
+
+    Fires at most once per `feature` per process. Subsequent calls with the
+    same `feature` are O(1) no-ops.
+
+    Args:
+        feature: Short feature name. Becomes the final URL path segment:
+            `/api/telemetry/bitsandbytes/{feature}` (so it appears as
+            `path_filename` in ES queries).
+        details: Optional feature-specific key/value metadata. Keys without a
+            `bitsandbytes.` prefix are prefixed automatically.
+    """
+    if feature in _REPORTED:
+        return
+    _REPORTED.add(feature)
+
+    if _is_disabled():
+        return
+
+    try:
+        from huggingface_hub.utils import send_telemetry
+    except ImportError:
+        return
+
+    fingerprint = _fingerprint()
+    user_agent = dict(fingerprint)
+    user_agent["bitsandbytes.feature"] = feature
+    if details:
+        for k, v in details.items():
+            key = k if k.startswith("bitsandbytes.") else f"bitsandbytes.{k}"
+            user_agent[key] = str(v)
+
+    tag = os.environ.get("BNB_TELEMETRY_TAG", "").strip()
+    if tag:
+        user_agent["bitsandbytes.tag"] = tag
+
+    try:
+        send_telemetry(
+            topic=f"bitsandbytes/{feature}",
+            library_name="bitsandbytes",
+            library_version=fingerprint.get("bitsandbytes.version", "unknown"),
+            user_agent=user_agent,
+        )
+    except Exception as e:
+        logger.debug("bitsandbytes telemetry send failed: %s", e)
+
+
+def _reset_for_testing() -> None:
+    """Clear module state. Intended for use in test fixtures only."""
+    global _FINGERPRINT
+    _REPORTED.clear()
+    _FINGERPRINT = None
diff --git a/bitsandbytes/functional.py b/bitsandbytes/functional.py
index 0165a1288..9b32b4b70 100644
--- a/bitsandbytes/functional.py
+++ b/bitsandbytes/functional.py
@@ -12,6 +12,7 @@
 import torch
 from torch import Tensor
 
+from bitsandbytes._telemetry import report_feature
 from bitsandbytes.utils import pack_dict_to_tensor, unpack_tensor_to_dict
 
 from .cextension import lib
@@ -1593,6 +1594,8 @@ def int8_double_quant(
         - `torch.Tensor` with dtype `torch.int32`, *optional*: A list of column indices which contain outlier features.
     """
 
+    report_feature("int8_double_quant")
+
     if row_stats is not None:
         raise ValueError("row_stats must be None. int8_double_quant() does not support pre-allocated row_stats.")
     if col_stats is not None:
diff --git a/bitsandbytes/nn/modules.py b/bitsandbytes/nn/modules.py
index bfd41d5dd..ca867d059 100644
--- a/bitsandbytes/nn/modules.py
+++ b/bitsandbytes/nn/modules.py
@@ -11,6 +11,7 @@
 import torch.nn.functional as F
 
 import bitsandbytes as bnb
+from bitsandbytes._telemetry import report_feature
 from bitsandbytes.functional import (
     QuantState,
     _convert_weight_packed_for_cpu,
@@ -97,6 +98,7 @@ def __init__(
         )
         self.norm = torch.nn.LayerNorm(embedding_dim, device=device)
         GlobalOptimManager.get_instance().register_module_override(self, "weight", {"optim_bits": 32})
+        report_feature("embedding", {"variant": "stable"})
 
     def reset_parameters(self) -> None:
         torch.nn.init.xavier_uniform_(self.weight)
@@ -179,6 +181,7 @@ def __init__(
             device=device,
         )
         GlobalOptimManager.get_instance().register_module_override(self, "weight", {"optim_bits": 32})
+        report_feature("embedding", {"variant": "standard"})
 
     def reset_parameters(self) -> None:
         torch.nn.init.xavier_uniform_(self.weight)
@@ -239,6 +242,15 @@ def __new__(
         self.bnb_quantized = bnb_quantized
         self.data = data
         self.module = module
+        report_feature(
+            "params_4bit",
+            {
+                "quant_type": quant_type,
+                "blocksize": blocksize,
+                "compress_statistics": compress_statistics,
+                "quant_storage": str(quant_storage).replace("torch.", ""),
+            },
+        )
         return self
 
     def __getstate__(self):
@@ -607,6 +619,16 @@ def _save_to_state_dict(self, destination, prefix, keep_vars):
                 destination[prefix + "weight." + k] = v if keep_vars else v.detach()
 
     def forward(self, x: torch.Tensor):
+        report_feature(
+            "linear_4bit",
+            {
+                "quant_type": getattr(self.weight, "quant_type", "unknown"),
+                "blocksize": getattr(self.weight, "blocksize", 0),
+                "compress_statistics": getattr(self.weight, "compress_statistics", False),
+                "input_dtype": str(x.dtype).replace("torch.", ""),
+                "compute_dtype": (str(self.compute_dtype).replace("torch.", "") if self.compute_dtype else "auto"),
+            },
+        )
         fix_4bit_weight_quant_state_from_module(self)
         quant_state = self.weight.quant_state
 
@@ -732,6 +754,7 @@ def __new__(
         obj.CB = CB
         obj.SCB = SCB
         obj.has_fp16_weights = has_fp16_weights
+        report_feature("int8_params", {"has_fp16_weights": has_fp16_weights})
         return obj
 
     def _quantize(self, device):
@@ -855,6 +878,7 @@ def __init__(self, num_embeddings, embedding_dim, device=None, dtype=None):
         self.dtype = self.weight.data.dtype
 
         self.weight = Int8Params(self.weight.data, has_fp16_weights=False, requires_grad=False)
+        report_feature("embedding", {"variant": "8bit"})
 
     def _save_to_state_dict(self, destination, prefix, keep_vars):
         raise NotImplementedError("Saving Embedding8bit module is not implemented")
@@ -926,6 +950,7 @@ def __init__(
                 f"Embedding size {embedding_dim} is not divisible by block size {blocksize}. "
                 "This will lead to slow inference.",
             )
+        report_feature("embedding", {"variant": "4bit", "quant_type": quant_type})
 
     def _forward_with_partial_dequantize(self, input: Tensor):
         assert self.embedding_dim % self.weight.quant_state.blocksize == 0
@@ -1178,6 +1203,14 @@ def to(self, *args, **kwargs):
         return result
 
     def forward(self, x: torch.Tensor):
+        report_feature(
+            "linear_8bit",
+            {
+                "has_fp16_weights": self.state.has_fp16_weights,
+                "threshold": self.state.threshold,
+                "input_dtype": str(x.dtype).replace("torch.", ""),
+            },
+        )
         self.state.is_training = self.training
         if self.weight.CB is not None:
             self.init_8bit_state()
@@ -1199,6 +1232,7 @@ def __init__(self, input_features, output_features, bias=True, device=None):
         super().__init__(input_features, output_features, bias, device)
         self.outlier_dim = None
         self.is_quantized = False
+        report_feature("outlier_aware_linear")
 
     def forward_with_outliers(self, x, outlier_idx):
         raise NotImplementedError("Please override the `forward_with_outliers(self, x, outlier_idx)` function")
diff --git a/bitsandbytes/optim/optimizer.py b/bitsandbytes/optim/optimizer.py
index dfc6e5d65..33798a4d3 100644
--- a/bitsandbytes/optim/optimizer.py
+++ b/bitsandbytes/optim/optimizer.py
@@ -11,6 +11,7 @@
 
 import torch
 
+from bitsandbytes._telemetry import report_feature
 import bitsandbytes.functional as F
 from bitsandbytes.utils import sync_gpu
 
@@ -104,6 +105,10 @@ def override_config(self, parameters, key=None, value=None, key_value_dict=None)
             key_value_dict = {key: value}
 
         if key_value_dict is not None:
+            report_feature(
+                "optim_override_config",
+                {"keys": ",".join(sorted(key_value_dict.keys()))},
+            )
             for p in parameters:
                 if id(p) in self.pid2config:
                     self.pid2config[id(p)].update(key_value_dict)
@@ -111,6 +116,10 @@ def override_config(self, parameters, key=None, value=None, key_value_dict=None)
                     self.pid2config[id(p)] = key_value_dict
 
     def register_module_override(self, module, param_name, config):
+        report_feature(
+            "optim_register_module_override",
+            {"keys": ",".join(sorted(config.keys())) if isinstance(config, dict) else "unknown"},
+        )
         self.module_weight_config_triple.append((module, param_name, config))
 
 
@@ -310,6 +319,13 @@ def step(self, closure=None):
             closure (`Callable`, *optional*, defaults to `None`):
                 A closure that reevaluates the model and returns the loss.
         """
+        report_feature(
+            "optimizer",
+            {
+                "name": type(self).__name__,
+                "is_paged": self.is_paged,
+            },
+        )
         loss = None
         if closure is not None:
             with torch.enable_grad():
diff --git a/scripts/verify_telemetry.py b/scripts/verify_telemetry.py
new file mode 100644
index 000000000..c4d305bd4
--- /dev/null
+++ b/scripts/verify_telemetry.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""End-to-end verification of bitsandbytes telemetry.
+
+Exercises every wired-up feature once, tagging every event with a unique
+run_id so the run's events can be correlated in Elasticsearch afterwards.
+
+Usage:
+    python scripts/verify_telemetry.py
+
+Then wait ~30 seconds for ES indexing and query:
+
+    es-cli -H esql 'FROM ds-hub-telemetry
+      | WHERE @timestamp >= NOW() - 1 hour
+        AND metadata.bitsandbytes.tag == "<PRINTED_RUN_ID>"
+      | KEEP @timestamp, path_filename,
+             metadata.bitsandbytes.feature,
+             metadata.bitsandbytes.quant_type,
+             metadata.bitsandbytes.variant,
+             metadata.bitsandbytes.accel
+      | SORT @timestamp ASC'
+
+Expected features per run (on a CUDA host):
+    params_4bit, linear_4bit, int8_params, linear_8bit, embedding (each
+    variant hit), optimizer, optim_override_config,
+    optim_register_module_override, outlier_aware_linear, int8_double_quant.
+"""
+
+from __future__ import annotations
+
+import os
+import time
+import uuid
+
+
+def main() -> int:
+    run_id = f"verify-{uuid.uuid4().hex[:8]}"
+    # Must be set BEFORE importing bitsandbytes so the tag is live when the
+    # first telemetry event fires.
+    os.environ["BNB_TELEMETRY_TAG"] = run_id
+    print(f"run_id = {run_id}")
+
+    import torch
+
+    if not torch.cuda.is_available():
+        print("CUDA not available — this script exercises GPU-only code paths.")
+        print("Feature events requiring CUDA will not fire.")
+
+    import bitsandbytes as bnb
+    from bitsandbytes.functional import int8_double_quant
+    from bitsandbytes.nn import (
+        Embedding,
+        Embedding4bit,
+        Embedding8bit,
+        Linear8bitLt,
+        LinearNF4,
+        OutlierAwareLinear,
+        StableEmbedding,
+    )
+
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    dtype = torch.float16 if device == "cuda" else torch.float32
+
+    # Embeddings — fire at __init__, no device requirement
+    StableEmbedding(16, 8)
+    Embedding(16, 8)
+    if device == "cuda":
+        Embedding8bit(16, 8).to(device)
+        Embedding4bit(16, 8, quant_type="nf4").to(device)
+
+    # Linear4bit + Params4bit (NF4)
+    if device == "cuda":
+        layer4 = LinearNF4(64, 64, compute_dtype=torch.bfloat16).to(device)
+        layer4(torch.randn(1, 64, device=device, dtype=dtype))
+
+        # Linear8bitLt + Int8Params
+        layer8 = Linear8bitLt(64, 64, has_fp16_weights=False, threshold=6.0).to(device)
+        layer8(torch.randn(1, 64, device=device, dtype=dtype))
+
+        # Optimizer: step() triggers the event
+        param = torch.nn.Parameter(torch.randn(64, device=device))
+        param.grad = torch.zeros_like(param)
+        opt = bnb.optim.AdamW8bit([param], lr=1e-3)
+        opt.step()
+
+        # GlobalOptimManager: override_config + register_module_override
+        mng = bnb.optim.GlobalOptimManager.get_instance()
+        mng.override_config([param], "optim_bits", 32)
+        mng.register_module_override(layer4, "weight", {"optim_bits": 32})
+
+        # OutlierAwareLinear (deprecation candidate)
+        OutlierAwareLinear(64, 64).to(device)
+
+        # int8_double_quant (deprecation candidate)
+        A = torch.randn(4, 64, device=device, dtype=torch.float16)
+        int8_double_quant(A, threshold=6.0)
+
+    # Drain the hf_hub telemetry daemon thread — events are queued
+    # asynchronously, exiting too fast would kill them before they flush.
+    print("exercising done, draining telemetry queue ...")
+    time.sleep(8)
+
+    print("\nnext steps:")
+    print(f"  run_id: {run_id}")
+    print(
+        "  query: es-cli -H esql 'FROM ds-hub-telemetry | WHERE metadata.bitsandbytes.tag == \""
+        + run_id
+        + "\" | STATS count = COUNT(*) BY metadata.bitsandbytes.feature | SORT count DESC'"
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tests/test_telemetry.py b/tests/test_telemetry.py
new file mode 100644
index 000000000..cdfb6344b
--- /dev/null
+++ b/tests/test_telemetry.py
@@ -0,0 +1,179 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+
+import sys
+from unittest import mock
+
+import pytest
+
+from bitsandbytes import _telemetry
+
+
+@pytest.fixture(autouse=True)
+def _reset_telemetry_state(monkeypatch):
+    """Clear telemetry dedup state + opt-out env vars before each test.
+
+    Also bypasses the pytest auto-detection in _is_disabled so the telemetry
+    code path is actually exercised. Individual tests that want the
+    pytest-disabled behavior override this by restoring _is_pytest.
+    """
+    for var in ("BNB_DISABLE_TELEMETRY", "HF_HUB_DISABLE_TELEMETRY", "HF_HUB_OFFLINE", "BNB_TELEMETRY_TAG"):
+        monkeypatch.delenv(var, raising=False)
+    monkeypatch.setattr(_telemetry, "_is_pytest", lambda: False)
+    _telemetry._reset_for_testing()
+    yield
+    _telemetry._reset_for_testing()
+
+
+@pytest.fixture
+def fake_send():
+    """Replace huggingface_hub.utils.send_telemetry with a recording mock."""
+    with mock.patch("huggingface_hub.utils.send_telemetry") as m:
+        yield m
+
+
+def test_report_feature_fires_once_per_feature(fake_send):
+    for _ in range(10):
+        _telemetry.report_feature("linear_4bit", {"quant_type": "nf4"})
+    assert fake_send.call_count == 1
+
+
+def test_distinct_features_each_fire(fake_send):
+    _telemetry.report_feature("linear_4bit", {"quant_type": "nf4"})
+    _telemetry.report_feature("linear_8bit", {"threshold": 6.0})
+    _telemetry.report_feature("optimizer", {"name": "AdamW8bit"})
+    assert fake_send.call_count == 3
+
+
+def test_payload_is_namespaced_under_bitsandbytes(fake_send):
+    _telemetry.report_feature("linear_4bit", {"quant_type": "nf4", "blocksize": 64})
+    kwargs = fake_send.call_args.kwargs
+    assert kwargs["topic"] == "bitsandbytes/linear_4bit"
+    assert kwargs["library_name"] == "bitsandbytes"
+    ua = kwargs["user_agent"]
+    assert ua["bitsandbytes.feature"] == "linear_4bit"
+    assert ua["bitsandbytes.quant_type"] == "nf4"
+    assert ua["bitsandbytes.blocksize"] == "64"
+    # No bare keys leaked
+    assert not any(k == "quant_type" or k == "blocksize" for k in ua)
+
+
+def test_fingerprint_fields_present(fake_send):
+    _telemetry.report_feature("linear_4bit")
+    ua = fake_send.call_args.kwargs["user_agent"]
+    assert "bitsandbytes.version" in ua
+    assert "bitsandbytes.os" in ua
+    assert "bitsandbytes.arch" in ua
+    assert "bitsandbytes.python" in ua
+    assert "bitsandbytes.accel" in ua
+
+
+def test_values_are_stringified(fake_send):
+    _telemetry.report_feature("x", {"bits": 8, "paged": True, "blocksize": 64})
+    ua = fake_send.call_args.kwargs["user_agent"]
+    assert ua["bitsandbytes.bits"] == "8"
+    assert ua["bitsandbytes.paged"] == "True"
+    assert ua["bitsandbytes.blocksize"] == "64"
+
+
+def test_already_prefixed_keys_not_double_prefixed(fake_send):
+    _telemetry.report_feature("x", {"bitsandbytes.custom": "v"})
+    ua = fake_send.call_args.kwargs["user_agent"]
+    assert ua["bitsandbytes.custom"] == "v"
+    assert "bitsandbytes.bitsandbytes.custom" not in ua
+
+
+@pytest.mark.parametrize(
+    "env_var",
+    ["BNB_DISABLE_TELEMETRY", "HF_HUB_DISABLE_TELEMETRY", "HF_HUB_OFFLINE"],
+)
+def test_opt_out_env_vars(fake_send, monkeypatch, env_var):
+    monkeypatch.setenv(env_var, "1")
+    _telemetry.report_feature("linear_4bit")
+    fake_send.assert_not_called()
+
+
+@pytest.mark.parametrize("value", ["1", "true", "TRUE", "yes", "on"])
+def test_opt_out_accepts_truthy_values(fake_send, monkeypatch, value):
+    monkeypatch.setenv("BNB_DISABLE_TELEMETRY", value)
+    _telemetry.report_feature("linear_4bit")
+    fake_send.assert_not_called()
+
+
+def test_opt_out_blank_does_not_disable(fake_send, monkeypatch):
+    monkeypatch.setenv("BNB_DISABLE_TELEMETRY", "")
+    _telemetry.report_feature("linear_4bit")
+    assert fake_send.call_count == 1
+
+
+def test_telemetry_tag_attached_when_set(fake_send, monkeypatch):
+    monkeypatch.setenv("BNB_TELEMETRY_TAG", "verify-abc123")
+    _telemetry.report_feature("linear_4bit")
+    ua = fake_send.call_args.kwargs["user_agent"]
+    assert ua["bitsandbytes.tag"] == "verify-abc123"
+
+
+def test_no_tag_attached_by_default(fake_send):
+    _telemetry.report_feature("linear_4bit")
+    ua = fake_send.call_args.kwargs["user_agent"]
+    assert "bitsandbytes.tag" not in ua
+
+
+def test_graceful_when_huggingface_hub_missing(monkeypatch):
+    """If huggingface_hub is unavailable, report_feature must no-op silently."""
+    import builtins
+
+    real_import = builtins.__import__
+
+    def fake_import(name, *args, **kwargs):
+        if name == "huggingface_hub.utils" or name.startswith("huggingface_hub"):
+            raise ImportError("simulated missing package")
+        return real_import(name, *args, **kwargs)
+
+    monkeypatch.setattr(builtins, "__import__", fake_import)
+    # Ensure any cached submodule is gone so the patched __import__ is consulted
+    for mod in list(sys.modules):
+        if mod.startswith("huggingface_hub"):
+            monkeypatch.delitem(sys.modules, mod, raising=False)
+
+    # Should not raise
+    _telemetry.report_feature("linear_4bit")
+
+
+def test_send_exception_is_swallowed(monkeypatch):
+    """A failing send_telemetry must not propagate."""
+    with mock.patch("huggingface_hub.utils.send_telemetry", side_effect=RuntimeError("boom")):
+        _telemetry.report_feature("linear_4bit")  # must not raise
+
+
+def test_dedup_persists_even_when_disabled(fake_send, monkeypatch):
+    """Disabled calls still burn the dedup slot — otherwise toggling the env
+    var mid-process could send a duplicate event for the same feature."""
+    monkeypatch.setenv("BNB_DISABLE_TELEMETRY", "1")
+    _telemetry.report_feature("linear_4bit")
+    fake_send.assert_not_called()
+
+    monkeypatch.delenv("BNB_DISABLE_TELEMETRY")
+    _telemetry.report_feature("linear_4bit")
+    fake_send.assert_not_called()
+
+
+def test_pytest_auto_detection_disables_telemetry(fake_send, monkeypatch):
+    """Under a real pytest process _is_pytest() returns True and suppresses
+    telemetry so tests don't pollute the real-usage stream."""
+    monkeypatch.setattr(_telemetry, "_is_pytest", lambda: True)
+    _telemetry.report_feature("linear_4bit")
+    fake_send.assert_not_called()
+
+
+def test_is_pytest_detects_current_process(monkeypatch):
+    """Sanity check: the helper reports True when we really are in pytest.
+
+    The autouse fixture monkey-patches _is_pytest; `undo()` restores the real
+    implementation so we can verify its actual behavior.
+    """
+    monkeypatch.undo()
+    assert _telemetry._is_pytest()