From cb8ecdd28da16b52cadf7eb22f5d9c22cb5f4e77 Mon Sep 17 00:00:00 2001 From: Pringled Date: Thu, 9 Apr 2026 09:44:33 +0200 Subject: [PATCH 1/2] Make typing flexible --- vicinity/datatypes.py | 3 ++- vicinity/vicinity.py | 34 +++++++++++++++++++--------------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/vicinity/datatypes.py b/vicinity/datatypes.py index 8871609..e1bee3d 100644 --- a/vicinity/datatypes.py +++ b/vicinity/datatypes.py @@ -1,12 +1,13 @@ from collections.abc import Iterable from enum import Enum from pathlib import Path +from typing import Any from numpy import typing as npt PathLike = str | Path Matrix = npt.NDArray | list[npt.NDArray] -SimilarityItem = list[tuple[str, float]] +SimilarityItem = list[tuple[Any, float]] SimilarityResult = list[SimilarityItem] # Tuple of (indices, distances) SingleQueryResult = tuple[npt.NDArray, npt.NDArray] diff --git a/vicinity/vicinity.py b/vicinity/vicinity.py index 6330ff1..25b9581 100644 --- a/vicinity/vicinity.py +++ b/vicinity/vicinity.py @@ -6,7 +6,9 @@ from collections.abc import Iterable, Sequence from pathlib import Path from time import perf_counter -from typing import Any +from typing import Any, Generic, TypeVar + +T = TypeVar("T") import numpy as np import orjson @@ -15,12 +17,12 @@ from vicinity import Metric from vicinity.backends import AbstractBackend, BasicBackend, BasicVectorStore, get_backend_class -from vicinity.datatypes import Backend, PathLike, SimilarityResult +from vicinity.datatypes import Backend, PathLike logger = logging.getLogger(__name__) -class Vicinity: +class Vicinity(Generic[T]): """ Work with vector representations of items. @@ -30,7 +32,7 @@ class Vicinity: def __init__( self, - items: Sequence[Any], + items: Sequence[T], backend: AbstractBackend, metadata: dict[str, Any] | None = None, vector_store: BasicVectorStore | None = None, @@ -50,7 +52,7 @@ def __init__( raise ValueError( "Your vector space and list of items are not the same length: " f"{len(backend)} != {len(items)}" ) - self.items: list[Any] = list(items) + self.items: list[T] = list(items) self.backend: AbstractBackend = backend self.metadata = metadata or {} self.vector_store = vector_store @@ -73,13 +75,13 @@ def __len__(self) -> int: @classmethod def from_vectors_and_items( - cls: type[Vicinity], + cls: type[Vicinity[T]], vectors: npt.NDArray, - items: Sequence[Any], + items: Sequence[T], backend_type: Backend | str = Backend.BASIC, store_vectors: bool = False, **kwargs: Any, - ) -> Vicinity: + ) -> Vicinity[T]: """ Create a Vicinity instance from vectors and items. @@ -115,7 +117,7 @@ def query( self, vectors: npt.NDArray, k: int = 10, - ) -> SimilarityResult: + ) -> list[list[tuple[T, float]]]: """ Find the nearest neighbors to some arbitrary vector. @@ -142,7 +144,7 @@ def query_threshold( vectors: npt.NDArray, threshold: float = 0.5, max_k: int = 100, - ) -> SimilarityResult: + ) -> list[list[tuple[T, float]]]: """ Find the nearest neighbors to some arbitrary vector with some threshold. Note: the output is not sorted. @@ -178,7 +180,9 @@ def save( :param folder: The path to which to save the JSON file. The vectors are saved separately. The JSON contains a path to the numpy file. :param overwrite: Whether to overwrite the JSON and numpy files if they already exist. :raises ValueError: If the path is not a directory. - :raises JSONEncodeError: If the items are not serializable. + :raises JSONEncodeError: If the items are not JSON-serializable. ``save()`` and ``load()`` + only support item types that orjson can encode (e.g. strings, numbers, dicts). + Use ``Vicinity[str]`` or another serializable type if you need persistence. """ path = Path(folder) path.mkdir(parents=True, exist_ok=overwrite) @@ -200,7 +204,7 @@ def save( self.vector_store.save(store_path) @classmethod - def load(cls, filename: PathLike) -> Vicinity: + def load(cls, filename: PathLike) -> Vicinity[Any]: """ Load a Vicinity instance in fast format. @@ -231,7 +235,7 @@ def load(cls, filename: PathLike) -> Vicinity: return instance - def insert(self, tokens: Sequence[Any], vectors: npt.NDArray) -> None: + def insert(self, tokens: Sequence[T], vectors: npt.NDArray) -> None: """ Insert new items into the vector space. @@ -250,7 +254,7 @@ def insert(self, tokens: Sequence[Any], vectors: npt.NDArray) -> None: if self.vector_store is not None: self.vector_store.insert(vectors) - def delete(self, tokens: Sequence[Any]) -> None: + def delete(self, tokens: Sequence[T]) -> None: """ Delete tokens from the vector space. @@ -309,7 +313,7 @@ def push_to_hub( ) @classmethod - def load_from_hub(cls: type[Vicinity], repo_id: str, token: str | None = None, **kwargs: Any) -> Vicinity: + def load_from_hub(cls: type[Vicinity[Any]], repo_id: str, token: str | None = None, **kwargs: Any) -> Vicinity[Any]: """ Load a Vicinity instance from the Hugging Face Hub. From 07e86cb2a79f1402983e89cbdda239e5cef100f7 Mon Sep 17 00:00:00 2001 From: Pringled Date: Thu, 9 Apr 2026 09:48:26 +0200 Subject: [PATCH 2/2] Make typing flexible --- vicinity/datatypes.py | 8 +++++--- vicinity/vicinity.py | 10 ++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/vicinity/datatypes.py b/vicinity/datatypes.py index e1bee3d..f09db5e 100644 --- a/vicinity/datatypes.py +++ b/vicinity/datatypes.py @@ -1,14 +1,16 @@ from collections.abc import Iterable from enum import Enum from pathlib import Path -from typing import Any +from typing import TypeVar from numpy import typing as npt +T = TypeVar("T") + PathLike = str | Path Matrix = npt.NDArray | list[npt.NDArray] -SimilarityItem = list[tuple[Any, float]] -SimilarityResult = list[SimilarityItem] +SimilarityItem = list[tuple[T, float]] +SimilarityResult = list[list[tuple[T, float]]] # Tuple of (indices, distances) SingleQueryResult = tuple[npt.NDArray, npt.NDArray] QueryResult = list[SingleQueryResult] diff --git a/vicinity/vicinity.py b/vicinity/vicinity.py index 25b9581..b1334e0 100644 --- a/vicinity/vicinity.py +++ b/vicinity/vicinity.py @@ -6,9 +6,7 @@ from collections.abc import Iterable, Sequence from pathlib import Path from time import perf_counter -from typing import Any, Generic, TypeVar - -T = TypeVar("T") +from typing import Any, Generic import numpy as np import orjson @@ -17,7 +15,7 @@ from vicinity import Metric from vicinity.backends import AbstractBackend, BasicBackend, BasicVectorStore, get_backend_class -from vicinity.datatypes import Backend, PathLike +from vicinity.datatypes import Backend, PathLike, SimilarityResult, T logger = logging.getLogger(__name__) @@ -117,7 +115,7 @@ def query( self, vectors: npt.NDArray, k: int = 10, - ) -> list[list[tuple[T, float]]]: + ) -> SimilarityResult[T]: """ Find the nearest neighbors to some arbitrary vector. @@ -144,7 +142,7 @@ def query_threshold( vectors: npt.NDArray, threshold: float = 0.5, max_k: int = 100, - ) -> list[list[tuple[T, float]]]: + ) -> SimilarityResult[T]: """ Find the nearest neighbors to some arbitrary vector with some threshold. Note: the output is not sorted.