From 713590a798190b34f4c43c4b097dbd61455113c3 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 21 Apr 2025 03:09:36 -0700 Subject: [PATCH 001/121] Fixed SparseVector constructor for SciPy sparse matrices - fixes #127 --- CHANGELOG.md | 4 ++++ pgvector/sparsevec.py | 2 +- tests/test_sparse_vector.py | 14 +++++++++++++- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d0e2730..1bbd73c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.4.1 (unreleased) + +- Fixed `SparseVector` constructor for SciPy sparse matrices + ## 0.4.0 (2025-03-15) - Added top-level `pgvector` package diff --git a/pgvector/sparsevec.py b/pgvector/sparsevec.py index 8df2dfd..895fbd0 100644 --- a/pgvector/sparsevec.py +++ b/pgvector/sparsevec.py @@ -85,7 +85,7 @@ def _from_sparse(self, value): if hasattr(value, 'coords'): # scipy 1.13+ - self._indices = value.coords[0].tolist() + self._indices = value.coords[-1].tolist() else: self._indices = value.col.tolist() self._values = value.data.tolist() diff --git a/tests/test_sparse_vector.py b/tests/test_sparse_vector.py index dff03dd..933cfff 100644 --- a/tests/test_sparse_vector.py +++ b/tests/test_sparse_vector.py @@ -1,7 +1,7 @@ import numpy as np from pgvector import SparseVector import pytest -from scipy.sparse import coo_array +from scipy.sparse import coo_array, csr_array, csr_matrix from struct import pack @@ -49,6 +49,18 @@ def test_dok_array(self): assert vec.to_list() == [1, 0, 2, 0, 3, 0] assert vec.indices() == [0, 2, 4] + def test_csr_array(self): + arr = csr_array(np.array([1, 0, 2, 0, 3, 0])) + vec = SparseVector(arr) + assert vec.to_list() == [1, 0, 2, 0, 3, 0] + assert vec.indices() == [0, 2, 4] + + def test_csr_matrix(self): + mat = csr_matrix(np.array([1, 0, 2, 0, 3, 0])) + vec = SparseVector(mat) + assert vec.to_list() == [1, 0, 2, 0, 3, 0] + assert vec.indices() == [0, 2, 4] + def test_repr(self): assert repr(SparseVector([1, 0, 2, 0, 3, 0])) == 'SparseVector({0: 1.0, 2: 2.0, 4: 3.0}, 6)' assert str(SparseVector([1, 0, 2, 0, 3, 0])) == 'SparseVector({0: 1.0, 2: 2.0, 4: 3.0}, 6)' From 76afd8ec3013ac58bb6cc60a1b5b705f157ea18b Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 21 Apr 2025 03:15:41 -0700 Subject: [PATCH 002/121] Added test for coo_matrix --- tests/test_sparse_vector.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/test_sparse_vector.py b/tests/test_sparse_vector.py index 933cfff..0cf0a72 100644 --- a/tests/test_sparse_vector.py +++ b/tests/test_sparse_vector.py @@ -1,7 +1,7 @@ import numpy as np from pgvector import SparseVector import pytest -from scipy.sparse import coo_array, csr_array, csr_matrix +from scipy.sparse import coo_array, coo_matrix, csr_array, csr_matrix from struct import pack @@ -43,6 +43,12 @@ def test_coo_array_dimensions(self): SparseVector(coo_array(np.array([1, 0, 2, 0, 3, 0])), 6) assert str(error.value) == 'extra argument' + def test_coo_matrix(self): + mat = coo_matrix(np.array([1, 0, 2, 0, 3, 0])) + vec = SparseVector(mat) + assert vec.to_list() == [1, 0, 2, 0, 3, 0] + assert vec.indices() == [0, 2, 4] + def test_dok_array(self): arr = coo_array(np.array([1, 0, 2, 0, 3, 0])).todok() vec = SparseVector(arr) From 809287f92847e1c609a9c395891da76f674379ea Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 21 Apr 2025 03:20:20 -0700 Subject: [PATCH 003/121] Fixed CI --- tests/test_sparse_vector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_sparse_vector.py b/tests/test_sparse_vector.py index 0cf0a72..d580f32 100644 --- a/tests/test_sparse_vector.py +++ b/tests/test_sparse_vector.py @@ -56,7 +56,7 @@ def test_dok_array(self): assert vec.indices() == [0, 2, 4] def test_csr_array(self): - arr = csr_array(np.array([1, 0, 2, 0, 3, 0])) + arr = csr_array(np.array([[1, 0, 2, 0, 3, 0]])) vec = SparseVector(arr) assert vec.to_list() == [1, 0, 2, 0, 3, 0] assert vec.indices() == [0, 2, 4] From f9d2073df5cce39f0691ead6f9e030516baac7f8 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sat, 26 Apr 2025 11:56:00 -0700 Subject: [PATCH 004/121] Version bump to 0.4.1 [skip ci] --- CHANGELOG.md | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1bbd73c..0ed80e3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.4.1 (unreleased) +## 0.4.1 (2025-04-26) - Fixed `SparseVector` constructor for SciPy sparse matrices diff --git a/pyproject.toml b/pyproject.toml index b889f4b..9395f9e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "pgvector" -version = "0.4.0" +version = "0.4.1" description = "pgvector support for Python" readme = "README.md" authors = [ From 7793bb069942fbcc2e77cf7349c59ffc28d8b6e0 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 21 May 2025 18:16:18 -0700 Subject: [PATCH 005/121] Improved example [skip ci] --- examples/loading/example.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/loading/example.py b/examples/loading/example.py index 0702129..7f3dce8 100644 --- a/examples/loading/example.py +++ b/examples/loading/example.py @@ -25,12 +25,12 @@ copy.set_types(['vector']) for i, embedding in enumerate(embeddings): + copy.write_row([embedding]) + # show progress if i % 10000 == 0: print('.', end='', flush=True) - copy.write_row([embedding]) - print('\nSuccess!') # create any indexes *after* loading initial data (skipping for this example) From 91088aacfadad37c9b8ea533b1e2b16b08d12ac4 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 8 Jun 2025 16:28:24 -0700 Subject: [PATCH 006/121] Updated readme [skip ci] --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 24d9bb9..7c302b1 100644 --- a/README.md +++ b/README.md @@ -409,7 +409,7 @@ Enable the extension conn.execute('CREATE EXTENSION IF NOT EXISTS vector') ``` -Register the vector type with your connection +Register the types with your connection ```python from pgvector.psycopg import register_vector @@ -472,7 +472,7 @@ cur = conn.cursor() cur.execute('CREATE EXTENSION IF NOT EXISTS vector') ``` -Register the vector type with your connection or cursor +Register the types with your connection or cursor ```python from pgvector.psycopg2 import register_vector @@ -518,7 +518,7 @@ Enable the extension await conn.execute('CREATE EXTENSION IF NOT EXISTS vector') ``` -Register the vector type with your connection +Register the types with your connection ```python from pgvector.asyncpg import register_vector @@ -572,7 +572,7 @@ Enable the extension conn.run('CREATE EXTENSION IF NOT EXISTS vector') ``` -Register the vector type with your connection +Register the types with your connection ```python from pgvector.pg8000 import register_vector From ee3e71ca2c07a12a8332a3877c0ce14adc9a5da8 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 16 Jun 2025 15:36:53 -0700 Subject: [PATCH 007/121] Updated format for license identifier --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9395f9e..0cfa183 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ readme = "README.md" authors = [ {name = "Andrew Kane", email = "andrew@ankane.org"} ] -license = {text = "MIT"} +license = "MIT" requires-python = ">= 3.9" dependencies = [ "numpy" From 33dee606229489c9ffb0cb5a1cd72bd4705ac618 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Sep 2025 16:35:08 -0700 Subject: [PATCH 008/121] Added support for str objects for bit type with SQLAlchemy - #137 Co-authored-by: Giacomo rua --- CHANGELOG.md | 4 ++++ pgvector/sqlalchemy/bit.py | 12 ++++++++++++ tests/test_sqlalchemy.py | 5 +++++ 3 files changed, 21 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ed80e3..f219b22 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.4.2 (unreleased) + +- Added support for `str` objects for `bit` type with SQLAlchemy + ## 0.4.1 (2025-04-26) - Fixed `SparseVector` constructor for SciPy sparse matrices diff --git a/pgvector/sqlalchemy/bit.py b/pgvector/sqlalchemy/bit.py index 0f83f3c..1ea85c3 100644 --- a/pgvector/sqlalchemy/bit.py +++ b/pgvector/sqlalchemy/bit.py @@ -14,6 +14,18 @@ def get_col_spec(self, **kw): return 'BIT' return 'BIT(%d)' % self.length + def bind_processor(self, dialect): + if dialect.__class__.__name__ == 'PGDialect_asyncpg': + import asyncpg + + def process(value): + if isinstance(value, str): + return asyncpg.BitString(value) + return value + return process + else: + return super().bind_processor(dialect) + class comparator_factory(UserDefinedType.Comparator): def hamming_distance(self, other): return self.op('<~>', return_type=Float)(other) diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index 5aec977..cd7bad8 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -596,6 +596,11 @@ async def test_bit(self, engine): item = await session.get(Item, 1) assert item.binary_embedding == embedding + if engine == asyncpg_engine: + session.add(Item(id=2, binary_embedding='101')) + item = await session.get(Item, 2) + assert item.binary_embedding == embedding + await engine.dispose() @pytest.mark.asyncio From dc9a8f959995f009649fd230139ca41193e0a801 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 3 Sep 2025 12:04:21 -0700 Subject: [PATCH 009/121] Added test for binary quantization with re-ranking --- tests/test_sqlalchemy.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index cd7bad8..702eee1 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -528,6 +528,22 @@ def test_binary_quantize(self, engine): items = session.query(Item).order_by(distance).all() assert [v.id for v in items] == [2, 3, 1] + def test_binary_quantize_reranking(self, engine): + # recreate index (could also vacuum table) + binary_quantize_index.drop(setup_engine) + binary_quantize_index.create(setup_engine) + + with Session(engine) as session: + session.add(Item(id=1, embedding=[-1, -2, -3])) + session.add(Item(id=2, embedding=[1, -2, 3])) + session.add(Item(id=3, embedding=[1, 2, 3])) + session.commit() + + distance = func.cast(func.binary_quantize(Item.embedding), BIT(3)).hamming_distance(func.binary_quantize(func.cast([3, -1, 2], VECTOR(3)))) + subquery = session.query(Item).order_by(distance).limit(20).subquery() + items = session.query(subquery).order_by(subquery.c.embedding.cosine_distance([3, -1, 2])).limit(5).all() + assert [v.id for v in items] == [2, 3, 1] + @pytest.mark.parametrize('engine', array_engines) class TestSqlalchemyArray: From caf1a2e0dd7a1ba2ad0ca9f09b50516dcfffcdeb Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 3 Sep 2025 12:09:18 -0700 Subject: [PATCH 010/121] Added docs for binary quantization with SQLAlchemy [skip ci] --- README.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/README.md b/README.md index 7c302b1..bfec8bb 100644 --- a/README.md +++ b/README.md @@ -271,6 +271,38 @@ order = func.cast(Item.embedding, HALFVEC(3)).l2_distance([3, 1, 2]) session.scalars(select(Item).order_by(order).limit(5)) ``` +#### Binary Quantization + +Use expression indexing for binary quantization + +```python +from pgvector.sqlalchemy import BIT +from sqlalchemy.sql import func + +index = Index( + 'my_index', + func.cast(func.binary_quantize(Item.embedding), BIT(3)).label('embedding'), + postgresql_using='hnsw', + postgresql_with={'m': 16, 'ef_construction': 64}, + postgresql_ops={'embedding': 'bit_hamming_ops'} +) +``` + +Get the nearest neighbors by Hamming distance + +```python +order = func.cast(func.binary_quantize(Item.embedding), BIT(3)).hamming_distance(func.binary_quantize(func.cast([3, -1, 2], VECTOR(3)))) +session.scalars(select(Item).order_by(order).limit(5)) +``` + +Re-rank by the original vectors for better recall + +```python +order = func.cast(func.binary_quantize(Item.embedding), BIT(3)).hamming_distance(func.binary_quantize(func.cast([3, -1, 2], VECTOR(3)))) +subquery = session.query(Item).order_by(order).limit(20).subquery() +session.scalars(select(subquery).order_by(subquery.c.embedding.cosine_distance([3, -1, 2])).limit(5)) +``` + #### Arrays Add an array column From c820a53bfb46196551de3c3f59f81b192d890574 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 3 Sep 2025 12:11:38 -0700 Subject: [PATCH 011/121] Simplified examples [skip ci] --- README.md | 2 -- tests/test_sqlalchemy.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/README.md b/README.md index bfec8bb..7cff86c 100644 --- a/README.md +++ b/README.md @@ -259,7 +259,6 @@ index = Index( 'my_index', func.cast(Item.embedding, HALFVEC(3)).label('embedding'), postgresql_using='hnsw', - postgresql_with={'m': 16, 'ef_construction': 64}, postgresql_ops={'embedding': 'halfvec_l2_ops'} ) ``` @@ -283,7 +282,6 @@ index = Index( 'my_index', func.cast(func.binary_quantize(Item.embedding), BIT(3)).label('embedding'), postgresql_using='hnsw', - postgresql_with={'m': 16, 'ef_construction': 64}, postgresql_ops={'embedding': 'bit_hamming_ops'} ) ``` diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index 702eee1..c59c12e 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -103,7 +103,6 @@ class Item(Base): 'sqlalchemy_orm_half_precision_index', func.cast(Item.embedding, HALFVEC(3)).label('embedding'), postgresql_using='hnsw', - postgresql_with={'m': 16, 'ef_construction': 64}, postgresql_ops={'embedding': 'halfvec_l2_ops'} ) half_precision_index.create(setup_engine) @@ -112,7 +111,6 @@ class Item(Base): 'sqlalchemy_orm_binary_quantize_index', func.cast(func.binary_quantize(Item.embedding), BIT(3)).label('embedding'), postgresql_using='hnsw', - postgresql_with={'m': 16, 'ef_construction': 64}, postgresql_ops={'embedding': 'bit_hamming_ops'} ) binary_quantize_index.create(setup_engine) From 1a72b7571adf3325174b383aca85bfb3a5b925fa Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sat, 6 Sep 2025 00:12:48 -0700 Subject: [PATCH 012/121] Updated pgvector on CI --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4d4e8ed..d943ea0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -19,7 +19,7 @@ jobs: dev-files: true - run: | cd /tmp - git clone --branch v0.8.0 https://github.com/pgvector/pgvector.git + git clone --branch v0.8.1 https://github.com/pgvector/pgvector.git cd pgvector make sudo make install From e211ba4029f204734f0c001fbb90f6a594d561ae Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Thu, 9 Oct 2025 23:19:50 -0700 Subject: [PATCH 013/121] Test with Python 3.14 on CI --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d943ea0..52ab712 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -6,7 +6,7 @@ jobs: strategy: fail-fast: false matrix: - python: [3.13, 3.9] + python: [3.14, 3.9] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 From e2986daf2b1533cc2c849f7e39350e31d57ac325 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Thu, 4 Dec 2025 16:47:23 -0800 Subject: [PATCH 014/121] Added support for Django 6 --- CHANGELOG.md | 1 + pgvector/django/extensions.py | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f219b22..62da0bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ ## 0.4.2 (unreleased) +- Added support for Django 6 - Added support for `str` objects for `bit` type with SQLAlchemy ## 0.4.1 (2025-04-26) diff --git a/pgvector/django/extensions.py b/pgvector/django/extensions.py index 0573f72..1d04739 100644 --- a/pgvector/django/extensions.py +++ b/pgvector/django/extensions.py @@ -1,6 +1,11 @@ +from django import VERSION from django.contrib.postgres.operations import CreateExtension class VectorExtension(CreateExtension): - def __init__(self): - self.name = 'vector' + if VERSION[0] >= 6: + def __init__(self, hints=None): + super().__init__('vector', hints=hints) + else: + def __init__(self): + self.name = 'vector' From 674f5ba3410c873d49f50fa9725b95d9db50c674 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Thu, 4 Dec 2025 16:50:18 -0800 Subject: [PATCH 015/121] Updated checkout action [skip ci] --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 52ab712..34f15d5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -8,7 +8,7 @@ jobs: matrix: python: [3.14, 3.9] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} From 2968f258f9486531bd1340cbda4ff8fcaf06cdc1 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Thu, 4 Dec 2025 17:06:01 -0800 Subject: [PATCH 016/121] Version bump to 0.4.2 [skip ci] --- CHANGELOG.md | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 62da0bb..745335f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.4.2 (unreleased) +## 0.4.2 (2025-12-04) - Added support for Django 6 - Added support for `str` objects for `bit` type with SQLAlchemy diff --git a/pyproject.toml b/pyproject.toml index 0cfa183..6f91e04 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "pgvector" -version = "0.4.1" +version = "0.4.2" description = "pgvector support for Python" readme = "README.md" authors = [ From 05387da3c5ce0dc9f1d6ef238dcae118aa8176ea Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sat, 6 Dec 2025 16:31:46 -0800 Subject: [PATCH 017/121] Updated examples [skip ci] --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 7cff86c..a208ae0 100644 --- a/README.md +++ b/README.md @@ -177,10 +177,10 @@ session.execute(text('CREATE EXTENSION IF NOT EXISTS vector')) Add a vector column ```python -from pgvector.sqlalchemy import Vector +from pgvector.sqlalchemy import VECTOR class Item(Base): - embedding = mapped_column(Vector(3)) + embedding = mapped_column(VECTOR(3)) ``` Also supports `HALFVEC`, `BIT`, and `SPARSEVEC` @@ -306,11 +306,11 @@ session.scalars(select(subquery).order_by(subquery.c.embedding.cosine_distance([ Add an array column ```python -from pgvector.sqlalchemy import Vector +from pgvector.sqlalchemy import VECTOR from sqlalchemy import ARRAY class Item(Base): - embeddings = mapped_column(ARRAY(Vector(3))) + embeddings = mapped_column(ARRAY(VECTOR(3))) ``` And register the types with the underlying driver @@ -359,10 +359,10 @@ session.exec(text('CREATE EXTENSION IF NOT EXISTS vector')) Add a vector column ```python -from pgvector.sqlalchemy import Vector +from pgvector.sqlalchemy import VECTOR class Item(SQLModel, table=True): - embedding: Any = Field(sa_type=Vector(3)) + embedding: Any = Field(sa_type=VECTOR(3)) ``` Also supports `HALFVEC`, `BIT`, and `SPARSEVEC` From 6d8db07f74fd95b3673fd8149f3f805a15788f48 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sat, 6 Dec 2025 16:34:08 -0800 Subject: [PATCH 018/121] Updated readme [skip ci] --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a208ae0..36d6c06 100644 --- a/README.md +++ b/README.md @@ -345,7 +345,7 @@ from sqlalchemy import event @event.listens_for(engine, "connect") def connect(dbapi_connection, connection_record): - register_vector(dbapi_connection, arrays=True) + register_vector(dbapi_connection) ``` ## SQLModel From b34f1c994e843dd7468d600b0f0ff5dbb949ec61 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sat, 6 Dec 2025 16:38:13 -0800 Subject: [PATCH 019/121] Updated examples [skip ci] --- examples/implicit/example.py | 6 +++--- examples/lightfm/example.py | 6 +++--- examples/surprise/example.py | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/examples/implicit/example.py b/examples/implicit/example.py index f70eb8c..2cbf7c6 100644 --- a/examples/implicit/example.py +++ b/examples/implicit/example.py @@ -1,6 +1,6 @@ import implicit from implicit.datasets.movielens import get_movielens -from pgvector.sqlalchemy import Vector +from pgvector.sqlalchemy import VECTOR from sqlalchemy import create_engine, insert, select, text, Integer, String from sqlalchemy.orm import declarative_base, mapped_column, Session @@ -16,7 +16,7 @@ class User(Base): __tablename__ = 'user' id = mapped_column(Integer, primary_key=True) - factors = mapped_column(Vector(20)) + factors = mapped_column(VECTOR(20)) class Item(Base): @@ -24,7 +24,7 @@ class Item(Base): id = mapped_column(Integer, primary_key=True) title = mapped_column(String) - factors = mapped_column(Vector(20)) + factors = mapped_column(VECTOR(20)) Base.metadata.drop_all(engine) diff --git a/examples/lightfm/example.py b/examples/lightfm/example.py index fcb9027..65031c4 100644 --- a/examples/lightfm/example.py +++ b/examples/lightfm/example.py @@ -1,6 +1,6 @@ from lightfm import LightFM from lightfm.datasets import fetch_movielens -from pgvector.sqlalchemy import Vector +from pgvector.sqlalchemy import VECTOR from sqlalchemy import create_engine, insert, select, text, Float, Integer, String from sqlalchemy.orm import declarative_base, mapped_column, Session @@ -16,7 +16,7 @@ class User(Base): __tablename__ = 'user' id = mapped_column(Integer, primary_key=True) - factors = mapped_column(Vector(20)) + factors = mapped_column(VECTOR(20)) class Item(Base): @@ -24,7 +24,7 @@ class Item(Base): id = mapped_column(Integer, primary_key=True) title = mapped_column(String) - factors = mapped_column(Vector(20)) + factors = mapped_column(VECTOR(20)) bias = mapped_column(Float) diff --git a/examples/surprise/example.py b/examples/surprise/example.py index bd7d18d..e413bcf 100644 --- a/examples/surprise/example.py +++ b/examples/surprise/example.py @@ -1,4 +1,4 @@ -from pgvector.sqlalchemy import Vector +from pgvector.sqlalchemy import VECTOR from sqlalchemy import create_engine, insert, select, text, Integer from sqlalchemy.orm import declarative_base, mapped_column, Session from surprise import Dataset, SVD @@ -15,14 +15,14 @@ class User(Base): __tablename__ = 'user' id = mapped_column(Integer, primary_key=True) - factors = mapped_column(Vector(20)) + factors = mapped_column(VECTOR(20)) class Item(Base): __tablename__ = 'item' id = mapped_column(Integer, primary_key=True) - factors = mapped_column(Vector(20)) + factors = mapped_column(VECTOR(20)) Base.metadata.drop_all(engine) From e1dda975bdb2635f273cc3e6cc1b9c01780cec00 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 17 Dec 2025 12:20:29 -0800 Subject: [PATCH 020/121] Switched to getuser for tests --- tests/test_pg8000.py | 4 ++-- tests/test_sqlalchemy.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_pg8000.py b/tests/test_pg8000.py index 4d3e474..61fbc4c 100644 --- a/tests/test_pg8000.py +++ b/tests/test_pg8000.py @@ -1,10 +1,10 @@ +from getpass import getuser import numpy as np -import os from pgvector import HalfVector, SparseVector, Vector from pgvector.pg8000 import register_vector from pg8000.native import Connection -conn = Connection(os.environ["USER"], database='pgvector_python_test') +conn = Connection(getuser(), database='pgvector_python_test') conn.run('CREATE EXTENSION IF NOT EXISTS vector') conn.run('DROP TABLE IF EXISTS pg8000_items') diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index c59c12e..4e870cc 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -1,6 +1,6 @@ import asyncpg +from getpass import getuser import numpy as np -import os from pgvector import HalfVector, SparseVector, Vector from pgvector.sqlalchemy import VECTOR, HALFVEC, BIT, SPARSEVEC, avg, sum import pytest @@ -28,7 +28,7 @@ def psycopg2_connect(dbapi_connection, connection_record): register_vector(dbapi_connection) -pg8000_engine = create_engine(f'postgresql+pg8000://{os.environ["USER"]}@localhost/pgvector_python_test') +pg8000_engine = create_engine(f'postgresql+pg8000://{getuser()}@localhost/pgvector_python_test') if sqlalchemy_version > 1: psycopg_engine = create_engine('postgresql+psycopg://localhost/pgvector_python_test') From 5392e2cd3200574829610efedef1678dafbaa4d7 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 17 Dec 2025 13:13:06 -0800 Subject: [PATCH 021/121] Switched to dependency groups --- .github/workflows/build.yml | 2 +- README.md | 2 +- pyproject.toml | 15 +++++++++++++++ requirements.txt | 12 ------------ 4 files changed, 17 insertions(+), 14 deletions(-) delete mode 100644 requirements.txt diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 34f15d5..7d0225b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,7 +12,7 @@ jobs: - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} - - run: pip install -r requirements.txt + - run: pip install --group dev - uses: ankane/setup-postgres@v1 with: database: pgvector_python_test diff --git a/README.md b/README.md index 36d6c06..7671c00 100644 --- a/README.md +++ b/README.md @@ -807,7 +807,7 @@ To get started with development: ```sh git clone https://github.com/pgvector/pgvector-python.git cd pgvector-python -pip install -r requirements.txt +pip install --group dev createdb pgvector_python_test pytest ``` diff --git a/pyproject.toml b/pyproject.toml index 6f91e04..5716d05 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,21 @@ dependencies = [ [project.urls] Homepage = "https://github.com/pgvector/pgvector-python" +[dependency-groups] +dev = [ + "asyncpg", + "Django", + "peewee", + "pg8000", + "psycopg[binary,pool]", + "psycopg2-binary", + "pytest", + "pytest-asyncio", + "scipy", + "SQLAlchemy[asyncio]>=2", + "sqlmodel>=0.0.12" +] + [tool.pytest.ini_options] asyncio_mode = "auto" asyncio_default_fixture_loop_scope = "function" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index a13be06..0000000 --- a/requirements.txt +++ /dev/null @@ -1,12 +0,0 @@ -asyncpg -Django -numpy -peewee -pg8000 -psycopg[binary,pool] -psycopg2-binary -pytest -pytest-asyncio -scipy -SQLAlchemy[asyncio]>=2 -sqlmodel>=0.0.12 From 6c1fa981ad624f7ded69e3a60df8fa303466e19c Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Thu, 18 Dec 2025 21:52:46 -0800 Subject: [PATCH 022/121] Updated example to pyproject.toml [skip ci] --- README.md | 2 +- examples/citus/pyproject.toml | 11 +++++++++++ examples/citus/requirements.txt | 3 --- examples/cohere/pyproject.toml | 11 +++++++++++ examples/cohere/requirements.txt | 3 --- examples/colbert/pyproject.toml | 12 ++++++++++++ examples/colbert/requirements.txt | 4 ---- examples/colpali/pyproject.toml | 12 ++++++++++++ examples/colpali/requirements.txt | 4 ---- examples/gensim/pyproject.toml | 13 +++++++++++++ examples/gensim/requirements.txt | 5 ----- examples/hybrid_search/pyproject.toml | 11 +++++++++++ examples/hybrid_search/requirements.txt | 3 --- examples/image_search/pyproject.toml | 14 ++++++++++++++ examples/image_search/requirements.txt | 6 ------ examples/imagehash/pyproject.toml | 13 +++++++++++++ examples/imagehash/requirements.txt | 5 ----- examples/implicit/pyproject.toml | 13 +++++++++++++ examples/implicit/requirements.txt | 5 ----- examples/lightfm/pyproject.toml | 12 ++++++++++++ examples/lightfm/requirements.txt | 4 ---- examples/loading/pyproject.toml | 11 +++++++++++ examples/loading/requirements.txt | 3 --- examples/openai/pyproject.toml | 11 +++++++++++ examples/openai/requirements.txt | 3 --- examples/rag/pyproject.toml | 11 +++++++++++ examples/rag/requirements.txt | 3 --- examples/rdkit/pyproject.toml | 11 +++++++++++ examples/rdkit/requirements.txt | 3 --- examples/sentence_transformers/pyproject.toml | 11 +++++++++++ examples/sentence_transformers/requirements.txt | 3 --- examples/sparse_search/pyproject.toml | 13 +++++++++++++ examples/sparse_search/requirements.txt | 5 ----- examples/surprise/pyproject.toml | 12 ++++++++++++ examples/surprise/requirements.txt | 4 ---- 35 files changed, 203 insertions(+), 67 deletions(-) create mode 100644 examples/citus/pyproject.toml delete mode 100644 examples/citus/requirements.txt create mode 100644 examples/cohere/pyproject.toml delete mode 100644 examples/cohere/requirements.txt create mode 100644 examples/colbert/pyproject.toml delete mode 100644 examples/colbert/requirements.txt create mode 100644 examples/colpali/pyproject.toml delete mode 100644 examples/colpali/requirements.txt create mode 100644 examples/gensim/pyproject.toml delete mode 100644 examples/gensim/requirements.txt create mode 100644 examples/hybrid_search/pyproject.toml delete mode 100644 examples/hybrid_search/requirements.txt create mode 100644 examples/image_search/pyproject.toml delete mode 100644 examples/image_search/requirements.txt create mode 100644 examples/imagehash/pyproject.toml delete mode 100644 examples/imagehash/requirements.txt create mode 100644 examples/implicit/pyproject.toml delete mode 100644 examples/implicit/requirements.txt create mode 100644 examples/lightfm/pyproject.toml delete mode 100644 examples/lightfm/requirements.txt create mode 100644 examples/loading/pyproject.toml delete mode 100644 examples/loading/requirements.txt create mode 100644 examples/openai/pyproject.toml delete mode 100644 examples/openai/requirements.txt create mode 100644 examples/rag/pyproject.toml delete mode 100644 examples/rag/requirements.txt create mode 100644 examples/rdkit/pyproject.toml delete mode 100644 examples/rdkit/requirements.txt create mode 100644 examples/sentence_transformers/pyproject.toml delete mode 100644 examples/sentence_transformers/requirements.txt create mode 100644 examples/sparse_search/pyproject.toml delete mode 100644 examples/sparse_search/requirements.txt create mode 100644 examples/surprise/pyproject.toml delete mode 100644 examples/surprise/requirements.txt diff --git a/README.md b/README.md index 7671c00..95d5fbe 100644 --- a/README.md +++ b/README.md @@ -816,7 +816,7 @@ To run an example: ```sh cd examples/loading -pip install -r requirements.txt +pip install --group dev createdb pgvector_example python3 example.py ``` diff --git a/examples/citus/pyproject.toml b/examples/citus/pyproject.toml new file mode 100644 index 0000000..ee40a36 --- /dev/null +++ b/examples/citus/pyproject.toml @@ -0,0 +1,11 @@ +[project] +name = "example" +version = "0.1.0" +requires-python = ">= 3.9" + +[dependency-groups] +dev = [ + "numpy", + "pgvector", + "psycopg[binary]" +] diff --git a/examples/citus/requirements.txt b/examples/citus/requirements.txt deleted file mode 100644 index 1cf8ee9..0000000 --- a/examples/citus/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -numpy -pgvector -psycopg[binary] diff --git a/examples/cohere/pyproject.toml b/examples/cohere/pyproject.toml new file mode 100644 index 0000000..f0c88b7 --- /dev/null +++ b/examples/cohere/pyproject.toml @@ -0,0 +1,11 @@ +[project] +name = "example" +version = "0.1.0" +requires-python = ">= 3.9" + +[dependency-groups] +dev = [ + "cohere", + "pgvector", + "psycopg[binary]" +] diff --git a/examples/cohere/requirements.txt b/examples/cohere/requirements.txt deleted file mode 100644 index 22fd056..0000000 --- a/examples/cohere/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -cohere -pgvector -psycopg[binary] diff --git a/examples/colbert/pyproject.toml b/examples/colbert/pyproject.toml new file mode 100644 index 0000000..face4d2 --- /dev/null +++ b/examples/colbert/pyproject.toml @@ -0,0 +1,12 @@ +[project] +name = "example" +version = "0.1.0" +requires-python = ">= 3.9" + +[dependency-groups] +dev = [ + "colbert-ai", + "pgvector", + "psycopg[binary]", + "transformers==4.49.0" +] diff --git a/examples/colbert/requirements.txt b/examples/colbert/requirements.txt deleted file mode 100644 index 54b2cb9..0000000 --- a/examples/colbert/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -colbert-ai -pgvector -psycopg[binary] -transformers==4.49.0 diff --git a/examples/colpali/pyproject.toml b/examples/colpali/pyproject.toml new file mode 100644 index 0000000..23fb23f --- /dev/null +++ b/examples/colpali/pyproject.toml @@ -0,0 +1,12 @@ +[project] +name = "example" +version = "0.1.0" +requires-python = ">= 3.9" + +[dependency-groups] +dev = [ + "colpali-engine", + "datasets", + "pgvector", + "psycopg[binary]" +] diff --git a/examples/colpali/requirements.txt b/examples/colpali/requirements.txt deleted file mode 100644 index 4cf770d..0000000 --- a/examples/colpali/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -colpali-engine -datasets -pgvector -psycopg[binary] diff --git a/examples/gensim/pyproject.toml b/examples/gensim/pyproject.toml new file mode 100644 index 0000000..7a33423 --- /dev/null +++ b/examples/gensim/pyproject.toml @@ -0,0 +1,13 @@ +[project] +name = "example" +version = "0.1.0" +requires-python = ">= 3.9" + +[dependency-groups] +dev = [ + "gensim", + "numpy", + "pgvector", + "psycopg[binary]", + "scipy<1.13" +] diff --git a/examples/gensim/requirements.txt b/examples/gensim/requirements.txt deleted file mode 100644 index 15411cd..0000000 --- a/examples/gensim/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -gensim -numpy -pgvector -psycopg[binary] -scipy<1.13 diff --git a/examples/hybrid_search/pyproject.toml b/examples/hybrid_search/pyproject.toml new file mode 100644 index 0000000..b5a904a --- /dev/null +++ b/examples/hybrid_search/pyproject.toml @@ -0,0 +1,11 @@ +[project] +name = "example" +version = "0.1.0" +requires-python = ">= 3.9" + +[dependency-groups] +dev = [ + "pgvector", + "psycopg[binary]", + "sentence-transformers" +] diff --git a/examples/hybrid_search/requirements.txt b/examples/hybrid_search/requirements.txt deleted file mode 100644 index 237dcd1..0000000 --- a/examples/hybrid_search/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -pgvector -psycopg[binary] -sentence-transformers diff --git a/examples/image_search/pyproject.toml b/examples/image_search/pyproject.toml new file mode 100644 index 0000000..7644382 --- /dev/null +++ b/examples/image_search/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "example" +version = "0.1.0" +requires-python = ">= 3.9" + +[dependency-groups] +dev = [ + "matplotlib", + "pgvector", + "psycopg[binary]", + "torch", + "torchvision", + "tqdm" +] diff --git a/examples/image_search/requirements.txt b/examples/image_search/requirements.txt deleted file mode 100644 index 3d82365..0000000 --- a/examples/image_search/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -matplotlib -pgvector -psycopg[binary] -torch -torchvision -tqdm diff --git a/examples/imagehash/pyproject.toml b/examples/imagehash/pyproject.toml new file mode 100644 index 0000000..cf06c2b --- /dev/null +++ b/examples/imagehash/pyproject.toml @@ -0,0 +1,13 @@ +[project] +name = "example" +version = "0.1.0" +requires-python = ">= 3.9" + +[dependency-groups] +dev = [ + "datasets", + "imagehash", + "matplotlib", + "pgvector", + "psycopg[binary]" +] diff --git a/examples/imagehash/requirements.txt b/examples/imagehash/requirements.txt deleted file mode 100644 index e3971e6..0000000 --- a/examples/imagehash/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -datasets -imagehash -matplotlib -pgvector -psycopg[binary] diff --git a/examples/implicit/pyproject.toml b/examples/implicit/pyproject.toml new file mode 100644 index 0000000..c03b187 --- /dev/null +++ b/examples/implicit/pyproject.toml @@ -0,0 +1,13 @@ +[project] +name = "example" +version = "0.1.0" +requires-python = ">= 3.9" + +[dependency-groups] +dev = [ + "h5py", + "implicit", + "pgvector", + "psycopg[binary]", + "SQLAlchemy" +] diff --git a/examples/implicit/requirements.txt b/examples/implicit/requirements.txt deleted file mode 100644 index 424abbd..0000000 --- a/examples/implicit/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -h5py -implicit -pgvector -psycopg[binary] -SQLAlchemy diff --git a/examples/lightfm/pyproject.toml b/examples/lightfm/pyproject.toml new file mode 100644 index 0000000..c202058 --- /dev/null +++ b/examples/lightfm/pyproject.toml @@ -0,0 +1,12 @@ +[project] +name = "example" +version = "0.1.0" +requires-python = ">= 3.9" + +[dependency-groups] +dev = [ + "lightfm", + "pgvector", + "psycopg[binary]", + "SQLAlchemy" +] diff --git a/examples/lightfm/requirements.txt b/examples/lightfm/requirements.txt deleted file mode 100644 index cfa5f51..0000000 --- a/examples/lightfm/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -lightfm -pgvector -psycopg[binary] -SQLAlchemy diff --git a/examples/loading/pyproject.toml b/examples/loading/pyproject.toml new file mode 100644 index 0000000..ee40a36 --- /dev/null +++ b/examples/loading/pyproject.toml @@ -0,0 +1,11 @@ +[project] +name = "example" +version = "0.1.0" +requires-python = ">= 3.9" + +[dependency-groups] +dev = [ + "numpy", + "pgvector", + "psycopg[binary]" +] diff --git a/examples/loading/requirements.txt b/examples/loading/requirements.txt deleted file mode 100644 index 1cf8ee9..0000000 --- a/examples/loading/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -numpy -pgvector -psycopg[binary] diff --git a/examples/openai/pyproject.toml b/examples/openai/pyproject.toml new file mode 100644 index 0000000..3e6661a --- /dev/null +++ b/examples/openai/pyproject.toml @@ -0,0 +1,11 @@ +[project] +name = "example" +version = "0.1.0" +requires-python = ">= 3.9" + +[dependency-groups] +dev = [ + "openai", + "pgvector", + "psycopg[binary]" +] diff --git a/examples/openai/requirements.txt b/examples/openai/requirements.txt deleted file mode 100644 index 18587e2..0000000 --- a/examples/openai/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -openai -pgvector -psycopg[binary] diff --git a/examples/rag/pyproject.toml b/examples/rag/pyproject.toml new file mode 100644 index 0000000..fa0dcfd --- /dev/null +++ b/examples/rag/pyproject.toml @@ -0,0 +1,11 @@ +[project] +name = "example" +version = "0.1.0" +requires-python = ">= 3.9" + +[dependency-groups] +dev = [ + "ollama", + "pgvector", + "psycopg[binary]" +] diff --git a/examples/rag/requirements.txt b/examples/rag/requirements.txt deleted file mode 100644 index 4eb5864..0000000 --- a/examples/rag/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -ollama -pgvector -psycopg[binary] diff --git a/examples/rdkit/pyproject.toml b/examples/rdkit/pyproject.toml new file mode 100644 index 0000000..f8c035a --- /dev/null +++ b/examples/rdkit/pyproject.toml @@ -0,0 +1,11 @@ +[project] +name = "example" +version = "0.1.0" +requires-python = ">= 3.9" + +[dependency-groups] +dev = [ + "pgvector", + "psycopg[binary]", + "rdkit" +] diff --git a/examples/rdkit/requirements.txt b/examples/rdkit/requirements.txt deleted file mode 100644 index 85a3e4f..0000000 --- a/examples/rdkit/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -pgvector -psycopg[binary] -rdkit diff --git a/examples/sentence_transformers/pyproject.toml b/examples/sentence_transformers/pyproject.toml new file mode 100644 index 0000000..b5a904a --- /dev/null +++ b/examples/sentence_transformers/pyproject.toml @@ -0,0 +1,11 @@ +[project] +name = "example" +version = "0.1.0" +requires-python = ">= 3.9" + +[dependency-groups] +dev = [ + "pgvector", + "psycopg[binary]", + "sentence-transformers" +] diff --git a/examples/sentence_transformers/requirements.txt b/examples/sentence_transformers/requirements.txt deleted file mode 100644 index 237dcd1..0000000 --- a/examples/sentence_transformers/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -pgvector -psycopg[binary] -sentence-transformers diff --git a/examples/sparse_search/pyproject.toml b/examples/sparse_search/pyproject.toml new file mode 100644 index 0000000..7927c34 --- /dev/null +++ b/examples/sparse_search/pyproject.toml @@ -0,0 +1,13 @@ +[project] +name = "example" +version = "0.1.0" +requires-python = ">= 3.9" + +[dependency-groups] +dev = [ + "numpy", + "pgvector", + "psycopg[binary]", + "torch", + "transformers" +] diff --git a/examples/sparse_search/requirements.txt b/examples/sparse_search/requirements.txt deleted file mode 100644 index 3de81c7..0000000 --- a/examples/sparse_search/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -numpy -pgvector -psycopg[binary] -torch -transformers diff --git a/examples/surprise/pyproject.toml b/examples/surprise/pyproject.toml new file mode 100644 index 0000000..94c6f13 --- /dev/null +++ b/examples/surprise/pyproject.toml @@ -0,0 +1,12 @@ +[project] +name = "example" +version = "0.1.0" +requires-python = ">= 3.9" + +[dependency-groups] +dev = [ + "pgvector", + "psycopg[binary]", + "scikit-surprise", + "SQLAlchemy" +] diff --git a/examples/surprise/requirements.txt b/examples/surprise/requirements.txt deleted file mode 100644 index cb2dca4..0000000 --- a/examples/surprise/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -pgvector -psycopg[binary] -scikit-surprise -SQLAlchemy From 016478953c094d908096e775c4366309601a2a38 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Fri, 19 Dec 2025 16:35:14 -0800 Subject: [PATCH 023/121] Updated gitignore [skip ci] --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index c55ff44..5556c9f 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ venv/ *.pyc __pycache__ .pytest_cache/ +*.lock examples/rag/README.md From a3520fef143f01a93bc2e70b00070080d2996c29 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 7 Jan 2026 15:26:48 -0800 Subject: [PATCH 024/121] Updated CI --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7d0225b..7f46dd6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -9,7 +9,7 @@ jobs: python: [3.14, 3.9] steps: - uses: actions/checkout@v5 - - uses: actions/setup-python@v5 + - uses: actions/setup-python@v6 with: python-version: ${{ matrix.python }} - run: pip install --group dev From 8bd3c61fc0954fc7d0e1060ce81a91315ca15010 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Thu, 22 Jan 2026 10:20:14 -0800 Subject: [PATCH 025/121] Added tests for subqueries with SQLAlchemy - closes #147 --- tests/test_sqlalchemy.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index 4e870cc..7558942 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -396,6 +396,20 @@ def test_sparsevec_l1_distance_orm(self, engine): items = session.scalars(select(Item).order_by(Item.sparse_embedding.l1_distance([1, 1, 1]))) assert [v.id for v in items] == [1, 3, 2] + def test_subquery(self, engine): + create_items() + with Session(engine) as session: + subquery = select(Item.embedding).filter_by(id=1).scalar_subquery() + items = session.query(Item).order_by(Item.embedding.l2_distance(subquery)).all() + assert [v.id for v in items] == [1, 3, 2] + + def test_subquery_orm(self, engine): + create_items() + with Session(engine) as session: + subquery = select(Item.embedding).filter_by(id=1).scalar_subquery() + items = session.scalars(select(Item).order_by(Item.embedding.l2_distance(subquery))) + assert [v.id for v in items] == [1, 3, 2] + def test_filter(self, engine): create_items() with Session(engine) as session: From 5b06584aaf1588b04441e622e28bd9b0aa09924a Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sat, 28 Feb 2026 18:44:10 -0800 Subject: [PATCH 026/121] Updated pgvector on CI --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7f46dd6..85db5b6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -19,7 +19,7 @@ jobs: dev-files: true - run: | cd /tmp - git clone --branch v0.8.1 https://github.com/pgvector/pgvector.git + git clone --branch v0.8.2 https://github.com/pgvector/pgvector.git cd pgvector make sudo make install From f606ee2ae8d188600f85a1d20c01524f6c3e8828 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 18 May 2026 20:15:56 -0700 Subject: [PATCH 027/121] Updated checkout action --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 85db5b6..fb4badc 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -8,7 +8,7 @@ jobs: matrix: python: [3.14, 3.9] steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - uses: actions/setup-python@v6 with: python-version: ${{ matrix.python }} From e835b44219a3faf0de160e76a6d866358f53e182 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 31 May 2026 11:52:20 -0700 Subject: [PATCH 028/121] Dropped support for Python < 3.10 [skip ci] --- .github/workflows/build.yml | 2 +- CHANGELOG.md | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index fb4badc..6c7cf3b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -6,7 +6,7 @@ jobs: strategy: fail-fast: false matrix: - python: [3.14, 3.9] + python: [3.14, 3.10] steps: - uses: actions/checkout@v6 - uses: actions/setup-python@v6 diff --git a/CHANGELOG.md b/CHANGELOG.md index 745335f..85f9390 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.5.0 (unreleased) + +- Dropped support for Python < 3.10 + ## 0.4.2 (2025-12-04) - Added support for Django 6 From 7b5663fa3fec88cd7c7a1775a63b297451d39ae5 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 31 May 2026 12:07:28 -0700 Subject: [PATCH 029/121] Updated lint [skip ci] --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f0831c4..7462142 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ .PHONY: lint build publish clean lint: - pycodestyle . --ignore=E501 + pycodestyle . --ignore=E501 --exclude=.venv build: python3 -m build From fa5fd61f34bde8b8de99852549d0094b44bf3f40 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 31 May 2026 12:08:12 -0700 Subject: [PATCH 030/121] Dropped support for SQLAlchemy < 2 --- .github/workflows/build.yml | 3 -- CHANGELOG.md | 1 + tests/test_sqlalchemy.py | 82 +++++++++++++++++-------------------- 3 files changed, 38 insertions(+), 48 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6c7cf3b..5bac05c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -24,6 +24,3 @@ jobs: make sudo make install - run: pytest - - - run: pip install "SQLAlchemy<2" -U - - run: pytest tests/test_sqlalchemy.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 85f9390..b5923fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ ## 0.5.0 (unreleased) - Dropped support for Python < 3.10 +- Dropped support for SQLAlchemy < 2 ## 0.4.2 (2025-12-04) diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index 7558942..8a260d8 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -6,18 +6,11 @@ import pytest from sqlalchemy import create_engine, event, insert, inspect, select, text, MetaData, Table, Column, Index, Integer, ARRAY from sqlalchemy.exc import StatementError +from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine from sqlalchemy.ext.automap import automap_base -from sqlalchemy.orm import declarative_base, Session +from sqlalchemy.orm import declarative_base, mapped_column, Session from sqlalchemy.sql import func -try: - from sqlalchemy.orm import mapped_column - from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine - sqlalchemy_version = 2 -except ImportError: - mapped_column = Column - sqlalchemy_version = 1 - psycopg2_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test') psycopg2_type_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test') @@ -30,42 +23,41 @@ def psycopg2_connect(dbapi_connection, connection_record): pg8000_engine = create_engine(f'postgresql+pg8000://{getuser()}@localhost/pgvector_python_test') -if sqlalchemy_version > 1: - psycopg_engine = create_engine('postgresql+psycopg://localhost/pgvector_python_test') - psycopg_type_engine = create_engine('postgresql+psycopg://localhost/pgvector_python_test') - - @event.listens_for(psycopg_type_engine, "connect") - def psycopg_connect(dbapi_connection, connection_record): - from pgvector.psycopg import register_vector - register_vector(dbapi_connection) - - psycopg_async_engine = create_async_engine('postgresql+psycopg://localhost/pgvector_python_test') - psycopg_async_type_engine = create_async_engine('postgresql+psycopg://localhost/pgvector_python_test') - - @event.listens_for(psycopg_async_type_engine.sync_engine, "connect") - def psycopg_async_connect(dbapi_connection, connection_record): - from pgvector.psycopg import register_vector_async - dbapi_connection.run_async(register_vector_async) - - asyncpg_engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test') - asyncpg_type_engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test') - - @event.listens_for(asyncpg_type_engine.sync_engine, "connect") - def asyncpg_connect(dbapi_connection, connection_record): - from pgvector.asyncpg import register_vector - dbapi_connection.run_async(register_vector) - -engines = [psycopg2_engine, psycopg2_type_engine, pg8000_engine] -array_engines = [psycopg2_type_engine] -async_engines = [] -async_array_engines = [] - -if sqlalchemy_version > 1: - engines += [psycopg_engine, psycopg_type_engine] - array_engines += [psycopg_type_engine] - # TODO support asyncpg_type_engine - async_engines += [psycopg_async_engine, psycopg_async_type_engine, asyncpg_engine] - async_array_engines += [psycopg_async_type_engine, asyncpg_engine] +psycopg_engine = create_engine('postgresql+psycopg://localhost/pgvector_python_test') +psycopg_type_engine = create_engine('postgresql+psycopg://localhost/pgvector_python_test') + + +@event.listens_for(psycopg_type_engine, "connect") +def psycopg_connect(dbapi_connection, connection_record): + from pgvector.psycopg import register_vector + register_vector(dbapi_connection) + + +psycopg_async_engine = create_async_engine('postgresql+psycopg://localhost/pgvector_python_test') +psycopg_async_type_engine = create_async_engine('postgresql+psycopg://localhost/pgvector_python_test') + + +@event.listens_for(psycopg_async_type_engine.sync_engine, "connect") +def psycopg_async_connect(dbapi_connection, connection_record): + from pgvector.psycopg import register_vector_async + dbapi_connection.run_async(register_vector_async) + + +asyncpg_engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test') +asyncpg_type_engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test') + + +@event.listens_for(asyncpg_type_engine.sync_engine, "connect") +def asyncpg_connect(dbapi_connection, connection_record): + from pgvector.asyncpg import register_vector + dbapi_connection.run_async(register_vector) + + +engines = [psycopg_engine, psycopg_type_engine, psycopg2_engine, psycopg2_type_engine, pg8000_engine] +array_engines = [psycopg_type_engine, psycopg2_type_engine] +# TODO support asyncpg_type_engine +async_engines = [psycopg_async_engine, psycopg_async_type_engine, asyncpg_engine] +async_array_engines = [psycopg_async_type_engine, asyncpg_engine] setup_engine = engines[0] with Session(setup_engine) as session: From 751e10572f22d8cf8b85464eee65837ee94f6feb Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 31 May 2026 12:11:36 -0700 Subject: [PATCH 031/121] Fixed CI --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5bac05c..1ed395f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -6,7 +6,7 @@ jobs: strategy: fail-fast: false matrix: - python: [3.14, 3.10] + python: [3.14, "3.10"] steps: - uses: actions/checkout@v6 - uses: actions/setup-python@v6 From 68e1a80f08cf0e88604e656da9a5aefa8aa4d90b Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 31 May 2026 12:30:11 -0700 Subject: [PATCH 032/121] Improved typechecking for psycopg tests [skip ci] --- tests/test_psycopg.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/tests/test_psycopg.py b/tests/test_psycopg.py index 698b34f..fb6aa4c 100644 --- a/tests/test_psycopg.py +++ b/tests/test_psycopg.py @@ -29,73 +29,73 @@ def test_vector(self): def test_vector_binary_format(self): embedding = np.array([1.5, 2, 3]) - res = conn.execute('SELECT %b::vector', (embedding,), binary=True).fetchone()[0] + res = next(conn.execute('SELECT %b::vector', (embedding,), binary=True))[0] assert np.array_equal(res, embedding) def test_vector_text_format(self): embedding = np.array([1.5, 2, 3]) - res = conn.execute('SELECT %t::vector', (embedding,)).fetchone()[0] + res = next(conn.execute('SELECT %t::vector', (embedding,)))[0] assert np.array_equal(res, embedding) def test_vector_binary_format_correct(self): embedding = np.array([1.5, 2, 3]) - res = conn.execute('SELECT %b::vector::text', (embedding,)).fetchone()[0] + res = next(conn.execute('SELECT %b::vector::text', (embedding,)))[0] assert res == '[1.5,2,3]' def test_vector_text_format_non_contiguous(self): embedding = np.flipud(np.array([1.5, 2, 3])) assert not embedding.data.contiguous - res = conn.execute('SELECT %t::vector', (embedding,)).fetchone()[0] + res = next(conn.execute('SELECT %t::vector', (embedding,)))[0] assert np.array_equal(res, [3, 2, 1.5]) def test_vector_binary_format_non_contiguous(self): embedding = np.flipud(np.array([1.5, 2, 3])) assert not embedding.data.contiguous - res = conn.execute('SELECT %b::vector', (embedding,)).fetchone()[0] + res = next(conn.execute('SELECT %b::vector', (embedding,)))[0] assert np.array_equal(res, [3, 2, 1.5]) def test_vector_class_binary_format(self): embedding = Vector([1.5, 2, 3]) - res = conn.execute('SELECT %b::vector', (embedding,), binary=True).fetchone()[0] + res = next(conn.execute('SELECT %b::vector', (embedding,), binary=True))[0] assert np.array_equal(res, [1.5, 2, 3]) def test_vector_class_text_format(self): embedding = Vector([1.5, 2, 3]) - res = conn.execute('SELECT %t::vector', (embedding,)).fetchone()[0] + res = next(conn.execute('SELECT %t::vector', (embedding,)))[0] assert np.array_equal(res, [1.5, 2, 3]) def test_halfvec(self): embedding = HalfVector([1.5, 2, 3]) conn.execute('INSERT INTO psycopg_items (half_embedding) VALUES (%s)', (embedding,)) - res = conn.execute('SELECT half_embedding FROM psycopg_items ORDER BY id').fetchone()[0] + res = next(conn.execute('SELECT half_embedding FROM psycopg_items ORDER BY id'))[0] assert res == HalfVector([1.5, 2, 3]) def test_halfvec_binary_format(self): embedding = HalfVector([1.5, 2, 3]) - res = conn.execute('SELECT %b::halfvec', (embedding,), binary=True).fetchone()[0] + res = next(conn.execute('SELECT %b::halfvec', (embedding,), binary=True))[0] assert res == HalfVector([1.5, 2, 3]) def test_halfvec_text_format(self): embedding = HalfVector([1.5, 2, 3]) - res = conn.execute('SELECT %t::halfvec', (embedding,)).fetchone()[0] + res = next(conn.execute('SELECT %t::halfvec', (embedding,)))[0] assert res == HalfVector([1.5, 2, 3]) def test_bit(self): embedding = Bit([True, False, True]) conn.execute('INSERT INTO psycopg_items (binary_embedding) VALUES (%s)', (embedding,)) - res = conn.execute('SELECT binary_embedding FROM psycopg_items ORDER BY id').fetchone()[0] + res = next(conn.execute('SELECT binary_embedding FROM psycopg_items ORDER BY id'))[0] assert res == '101' def test_bit_binary_format(self): embedding = Bit([False, True, False, True, False, False, False, False, True]) - res = conn.execute('SELECT %b::bit(9)', (embedding,), binary=True).fetchone()[0] + res = next(conn.execute('SELECT %b::bit(9)', (embedding,), binary=True))[0] assert repr(Bit.from_binary(res)) == 'Bit(010100001)' def test_bit_text_format(self): embedding = Bit([False, True, False, True, False, False, False, False, True]) - res = conn.execute('SELECT %t::bit(9)', (embedding,)).fetchone()[0] + res = next(conn.execute('SELECT %t::bit(9)', (embedding,)))[0] assert res == '010100001' assert repr(Bit(res)) == 'Bit(010100001)' @@ -103,17 +103,17 @@ def test_sparsevec(self): embedding = SparseVector([1.5, 2, 3]) conn.execute('INSERT INTO psycopg_items (sparse_embedding) VALUES (%s)', (embedding,)) - res = conn.execute('SELECT sparse_embedding FROM psycopg_items ORDER BY id').fetchone()[0] + res = next(conn.execute('SELECT sparse_embedding FROM psycopg_items ORDER BY id'))[0] assert res == SparseVector([1.5, 2, 3]) def test_sparsevec_binary_format(self): embedding = SparseVector([1.5, 0, 2, 0, 3, 0]) - res = conn.execute('SELECT %b::sparsevec', (embedding,), binary=True).fetchone()[0] + res = next(conn.execute('SELECT %b::sparsevec', (embedding,), binary=True))[0] assert res == embedding def test_sparsevec_text_format(self): embedding = SparseVector([1.5, 0, 2, 0, 3, 0]) - res = conn.execute('SELECT %t::sparsevec', (embedding,)).fetchone()[0] + res = next(conn.execute('SELECT %t::sparsevec', (embedding,)))[0] assert res == embedding def test_text_copy_from(self): @@ -170,7 +170,7 @@ def test_vector_array(self): embeddings = [np.array([1.5, 2, 3]), np.array([4.5, 5, 6])] conn.execute('INSERT INTO psycopg_items (embeddings) VALUES (%s)', (embeddings,)) - res = conn.execute('SELECT embeddings FROM psycopg_items ORDER BY id').fetchone() + res = next(conn.execute('SELECT embeddings FROM psycopg_items ORDER BY id')) assert np.array_equal(res[0][0], embeddings[0]) assert np.array_equal(res[0][1], embeddings[1]) From 87d09131c95c1af4a9c81fd0cf687b13b535ee04 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 31 May 2026 12:52:23 -0700 Subject: [PATCH 033/121] Improved typechecking for tests [skip ci] --- tests/test_asyncpg.py | 2 +- tests/test_sqlalchemy.py | 26 +++++++++++++------------- tests/test_sqlmodel.py | 8 ++++---- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/tests/test_asyncpg.py b/tests/test_asyncpg.py index 34d66a1..cabfc5a 100644 --- a/tests/test_asyncpg.py +++ b/tests/test_asyncpg.py @@ -64,7 +64,7 @@ async def test_bit(self): await register_vector(conn) - embedding = asyncpg.BitString('101') + embedding = asyncpg.BitString('101') # type: ignore await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), (NULL)", embedding) res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id") diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index 8a260d8..95b3da7 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -190,7 +190,7 @@ def test_vector(self, engine): with Session(engine) as session: session.add(Item(id=1, embedding=[1, 2, 3])) session.commit() - item = session.get(Item, 1) + item = session.get_one(Item, 1) assert np.array_equal(item.embedding, [1, 2, 3]) def test_vector_l2_distance(self, engine): @@ -245,7 +245,7 @@ def test_halfvec(self, engine): with Session(engine) as session: session.add(Item(id=1, half_embedding=[1, 2, 3])) session.commit() - item = session.get(Item, 1) + item = session.get_one(Item, 1) assert item.half_embedding == HalfVector([1, 2, 3]) def test_halfvec_l2_distance(self, engine): @@ -300,7 +300,7 @@ def test_bit(self, engine): with Session(engine) as session: session.add(Item(id=1, binary_embedding='101')) session.commit() - item = session.get(Item, 1) + item = session.get_one(Item, 1) assert item.binary_embedding == '101' def test_bit_hamming_distance(self, engine): @@ -337,7 +337,7 @@ def test_sparsevec(self, engine): with Session(engine) as session: session.add(Item(id=1, sparse_embedding=[1, 2, 3])) session.commit() - item = session.get(Item, 1) + item = session.get_one(Item, 1) assert item.sparse_embedding == SparseVector([1, 2, 3]) def test_sparsevec_l2_distance(self, engine): @@ -560,7 +560,7 @@ def test_vector_array(self, engine): session.commit() # this fails if the driver does not cast arrays - item = session.get(Item, 1) + item = session.get_one(Item, 1) assert np.array_equal(item.embeddings[0], [1, 2, 3]) assert np.array_equal(item.embeddings[1], [4, 5, 6]) @@ -570,7 +570,7 @@ def test_halfvec_array(self, engine): session.commit() # this fails if the driver does not cast arrays - item = session.get(Item, 1) + item = session.get_one(Item, 1) assert item.half_embeddings == [HalfVector([1, 2, 3]), HalfVector([4, 5, 6])] @@ -587,7 +587,7 @@ async def test_vector(self, engine): async with session.begin(): embedding = np.array([1, 2, 3]) session.add(Item(id=1, embedding=embedding)) - item = await session.get(Item, 1) + item = await session.get_one(Item, 1) assert np.array_equal(item.embedding, embedding) await engine.dispose() @@ -600,7 +600,7 @@ async def test_halfvec(self, engine): async with session.begin(): embedding = [1, 2, 3] session.add(Item(id=1, half_embedding=embedding)) - item = await session.get(Item, 1) + item = await session.get_one(Item, 1) assert item.half_embedding == HalfVector(embedding) await engine.dispose() @@ -613,12 +613,12 @@ async def test_bit(self, engine): async with session.begin(): embedding = asyncpg.BitString('101') if engine == asyncpg_engine else '101' session.add(Item(id=1, binary_embedding=embedding)) - item = await session.get(Item, 1) + item = await session.get_one(Item, 1) assert item.binary_embedding == embedding if engine == asyncpg_engine: session.add(Item(id=2, binary_embedding='101')) - item = await session.get(Item, 2) + item = await session.get_one(Item, 2) assert item.binary_embedding == embedding await engine.dispose() @@ -631,7 +631,7 @@ async def test_sparsevec(self, engine): async with session.begin(): embedding = [1, 2, 3] session.add(Item(id=1, sparse_embedding=embedding)) - item = await session.get(Item, 1) + item = await session.get_one(Item, 1) assert item.sparse_embedding == SparseVector(embedding) await engine.dispose() @@ -662,12 +662,12 @@ async def test_vector_array(self, engine): async with async_session() as session: async with session.begin(): session.add(Item(id=1, embeddings=[Vector([1, 2, 3]), Vector([4, 5, 6])])) - item = await session.get(Item, 1) + item = await session.get_one(Item, 1) assert np.array_equal(item.embeddings[0], [1, 2, 3]) assert np.array_equal(item.embeddings[1], [4, 5, 6]) session.add(Item(id=2, embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])])) - item = await session.get(Item, 2) + item = await session.get_one(Item, 2) assert np.array_equal(item.embeddings[0], [1, 2, 3]) assert np.array_equal(item.embeddings[1], [4, 5, 6]) diff --git a/tests/test_sqlmodel.py b/tests/test_sqlmodel.py index f4994f4..616beaa 100644 --- a/tests/test_sqlmodel.py +++ b/tests/test_sqlmodel.py @@ -75,7 +75,7 @@ def test_vector(self): with Session(engine) as session: session.add(Item(id=1, embedding=[1, 2, 3])) session.commit() - item = session.get(Item, 1) + item = session.get_one(Item, 1) assert np.array_equal(item.embedding, np.array([1, 2, 3])) def test_vector_l2_distance(self): @@ -106,7 +106,7 @@ def test_halfvec(self): with Session(engine) as session: session.add(Item(id=1, half_embedding=[1, 2, 3])) session.commit() - item = session.get(Item, 1) + item = session.get_one(Item, 1) assert item.half_embedding == HalfVector([1, 2, 3]) def test_halfvec_l2_distance(self): @@ -137,7 +137,7 @@ def test_bit(self): with Session(engine) as session: session.add(Item(id=1, binary_embedding='101')) session.commit() - item = session.get(Item, 1) + item = session.get_one(Item, 1) assert item.binary_embedding == '101' def test_bit_hamming_distance(self): @@ -156,7 +156,7 @@ def test_sparsevec(self): with Session(engine) as session: session.add(Item(id=1, sparse_embedding=[1, 2, 3])) session.commit() - item = session.get(Item, 1) + item = session.get_one(Item, 1) assert item.sparse_embedding == SparseVector([1, 2, 3]) def test_sparsevec_l2_distance(self): From f083137e40524f7f370b3fe88088335081d316e7 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 31 May 2026 12:59:06 -0700 Subject: [PATCH 034/121] Improved typechecking for tests [skip ci] --- tests/test_sqlalchemy.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index 95b3da7..f57edc4 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -428,11 +428,11 @@ def test_select_orm(self, engine): def test_avg(self, engine): with Session(engine) as session: - res = session.query(avg(Item.embedding)).first()[0] + res = session.query(avg(Item.embedding)).one()[0] assert res is None session.add(Item(embedding=[1, 2, 3])) session.add(Item(embedding=[4, 5, 6])) - res = session.query(avg(Item.embedding)).first()[0] + res = session.query(avg(Item.embedding)).one()[0] assert np.array_equal(res, np.array([2.5, 3.5, 4.5])) def test_avg_orm(self, engine): @@ -441,16 +441,16 @@ def test_avg_orm(self, engine): assert res is None session.add(Item(embedding=[1, 2, 3])) session.add(Item(embedding=[4, 5, 6])) - res = session.scalars(select(avg(Item.embedding))).first() + res = session.scalars(select(avg(Item.embedding))).one() assert np.array_equal(res, np.array([2.5, 3.5, 4.5])) def test_sum(self, engine): with Session(engine) as session: - res = session.query(sum(Item.embedding)).first()[0] + res = session.query(sum(Item.embedding)).one()[0] assert res is None session.add(Item(embedding=[1, 2, 3])) session.add(Item(embedding=[4, 5, 6])) - res = session.query(sum(Item.embedding)).first()[0] + res = session.query(sum(Item.embedding)).one()[0] assert np.array_equal(res, np.array([5, 7, 9])) def test_sum_orm(self, engine): @@ -459,7 +459,7 @@ def test_sum_orm(self, engine): assert res is None session.add(Item(embedding=[1, 2, 3])) session.add(Item(embedding=[4, 5, 6])) - res = session.scalars(select(sum(Item.embedding))).first() + res = session.scalars(select(sum(Item.embedding))).one() assert np.array_equal(res, np.array([5, 7, 9])) def test_bad_dimensions(self, engine): @@ -611,7 +611,7 @@ async def test_bit(self, engine): async with async_session() as session: async with session.begin(): - embedding = asyncpg.BitString('101') if engine == asyncpg_engine else '101' + embedding = asyncpg.BitString('101') if engine == asyncpg_engine else '101' # type: ignore session.add(Item(id=1, binary_embedding=embedding)) item = await session.get_one(Item, 1) assert item.binary_embedding == embedding @@ -645,7 +645,7 @@ async def test_avg(self, engine): session.add(Item(embedding=[1, 2, 3])) session.add(Item(embedding=[4, 5, 6])) res = await session.scalars(select(avg(Item.embedding))) - assert np.array_equal(res.first(), [2.5, 3.5, 4.5]) + assert np.array_equal(res.one(), [2.5, 3.5, 4.5]) await engine.dispose() From 6fe7c083a78749b3e79323d8bbedc2d0ec5d84d4 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 31 May 2026 13:02:24 -0700 Subject: [PATCH 035/121] Improved typechecking for tests [skip ci] --- tests/test_sqlmodel.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_sqlmodel.py b/tests/test_sqlmodel.py index 616beaa..5f74148 100644 --- a/tests/test_sqlmodel.py +++ b/tests/test_sqlmodel.py @@ -8,17 +8,17 @@ engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test') with Session(engine) as session: - session.exec(text('CREATE EXTENSION IF NOT EXISTS vector')) + session.exec(text('CREATE EXTENSION IF NOT EXISTS vector')) # type: ignore class Item(SQLModel, table=True): __tablename__ = 'sqlmodel_item' id: Optional[int] = Field(default=None, primary_key=True) - embedding: Optional[Any] = Field(default=None, sa_type=VECTOR(3)) - half_embedding: Optional[Any] = Field(default=None, sa_type=HALFVEC(3)) - binary_embedding: Optional[Any] = Field(default=None, sa_type=BIT(3)) - sparse_embedding: Optional[Any] = Field(default=None, sa_type=SPARSEVEC(3)) + embedding: Optional[Any] = Field(default=None, sa_type=VECTOR(3)) # type: ignore + half_embedding: Optional[Any] = Field(default=None, sa_type=HALFVEC(3)) # type: ignore + binary_embedding: Optional[Any] = Field(default=None, sa_type=BIT(3)) # type: ignore + sparse_embedding: Optional[Any] = Field(default=None, sa_type=SPARSEVEC(3)) # type: ignore SQLModel.metadata.drop_all(engine) @@ -202,7 +202,7 @@ def test_vector_avg(self): session.add(Item(embedding=[1, 2, 3])) session.add(Item(embedding=[4, 5, 6])) res = session.exec(select(avg(Item.embedding))).first() - assert np.array_equal(res, np.array([2.5, 3.5, 4.5])) + assert np.array_equal(res, np.array([2.5, 3.5, 4.5])) # type: ignore def test_vector_sum(self): with Session(engine) as session: @@ -211,7 +211,7 @@ def test_vector_sum(self): session.add(Item(embedding=[1, 2, 3])) session.add(Item(embedding=[4, 5, 6])) res = session.exec(select(sum(Item.embedding))).first() - assert np.array_equal(res, np.array([5, 7, 9])) + assert np.array_equal(res, np.array([5, 7, 9])) # type: ignore def test_halfvec_avg(self): with Session(engine) as session: From 93fabd2e5167e8076ed77f153f6c9b69e0caf6c0 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 31 May 2026 13:09:14 -0700 Subject: [PATCH 036/121] Fixed typechecking errors [skip ci] --- pgvector/django/bit.py | 2 +- pgvector/django/halfvec.py | 2 +- pgvector/django/sparsevec.py | 2 +- pgvector/django/vector.py | 2 +- pgvector/sqlalchemy/bit.py | 2 +- pgvector/sqlalchemy/halfvec.py | 4 ++-- pgvector/sqlalchemy/sparsevec.py | 4 ++-- pgvector/sqlalchemy/vector.py | 4 ++-- 8 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pgvector/django/bit.py b/pgvector/django/bit.py index 2cc847a..29a31e1 100644 --- a/pgvector/django/bit.py +++ b/pgvector/django/bit.py @@ -21,7 +21,7 @@ def db_type(self, connection): return 'bit' return 'bit(%d)' % self.length - def formfield(self, **kwargs): + def formfield(self, **kwargs): # type: ignore return super().formfield(form_class=BitFormField, **kwargs) diff --git a/pgvector/django/halfvec.py b/pgvector/django/halfvec.py index 3aeb90f..36101ca 100644 --- a/pgvector/django/halfvec.py +++ b/pgvector/django/halfvec.py @@ -40,7 +40,7 @@ def get_prep_value(self, value): def value_to_string(self, obj): return self.get_prep_value(self.value_from_object(obj)) - def formfield(self, **kwargs): + def formfield(self, **kwargs): # type: ignore return super().formfield(form_class=HalfVectorFormField, **kwargs) diff --git a/pgvector/django/sparsevec.py b/pgvector/django/sparsevec.py index 580f27c..7f4db77 100644 --- a/pgvector/django/sparsevec.py +++ b/pgvector/django/sparsevec.py @@ -35,7 +35,7 @@ def get_prep_value(self, value): def value_to_string(self, obj): return self.get_prep_value(self.value_from_object(obj)) - def formfield(self, **kwargs): + def formfield(self, **kwargs): # type: ignore return super().formfield(form_class=SparseVectorFormField, **kwargs) diff --git a/pgvector/django/vector.py b/pgvector/django/vector.py index 861cfde..dde96f8 100644 --- a/pgvector/django/vector.py +++ b/pgvector/django/vector.py @@ -48,7 +48,7 @@ def run_validators(self, value): value = value.tolist() super().run_validators(value) - def formfield(self, **kwargs): + def formfield(self, **kwargs): # type: ignore return super().formfield(form_class=VectorFormField, **kwargs) diff --git a/pgvector/sqlalchemy/bit.py b/pgvector/sqlalchemy/bit.py index 1ea85c3..bbf822e 100644 --- a/pgvector/sqlalchemy/bit.py +++ b/pgvector/sqlalchemy/bit.py @@ -35,4 +35,4 @@ def jaccard_distance(self, other): # for reflection -ischema_names['bit'] = BIT +ischema_names['bit'] = BIT # type: ignore diff --git a/pgvector/sqlalchemy/halfvec.py b/pgvector/sqlalchemy/halfvec.py index 10688b5..f51d1a6 100644 --- a/pgvector/sqlalchemy/halfvec.py +++ b/pgvector/sqlalchemy/halfvec.py @@ -25,7 +25,7 @@ def literal_processor(self, dialect): string_literal_processor = self._string._cached_literal_processor(dialect) def process(value): - return string_literal_processor(HalfVector._to_db(value, self.dim)) + return string_literal_processor(HalfVector._to_db(value, self.dim)) # type: ignore return process def result_processor(self, dialect, coltype): @@ -48,4 +48,4 @@ def l1_distance(self, other): # for reflection -ischema_names['halfvec'] = HALFVEC +ischema_names['halfvec'] = HALFVEC # type: ignore diff --git a/pgvector/sqlalchemy/sparsevec.py b/pgvector/sqlalchemy/sparsevec.py index 0058679..6828bfd 100644 --- a/pgvector/sqlalchemy/sparsevec.py +++ b/pgvector/sqlalchemy/sparsevec.py @@ -25,7 +25,7 @@ def literal_processor(self, dialect): string_literal_processor = self._string._cached_literal_processor(dialect) def process(value): - return string_literal_processor(SparseVector._to_db(value, self.dim)) + return string_literal_processor(SparseVector._to_db(value, self.dim)) # type: ignore return process def result_processor(self, dialect, coltype): @@ -48,4 +48,4 @@ def l1_distance(self, other): # for reflection -ischema_names['sparsevec'] = SPARSEVEC +ischema_names['sparsevec'] = SPARSEVEC # type: ignore diff --git a/pgvector/sqlalchemy/vector.py b/pgvector/sqlalchemy/vector.py index 5a1e11f..a9e6f22 100644 --- a/pgvector/sqlalchemy/vector.py +++ b/pgvector/sqlalchemy/vector.py @@ -25,7 +25,7 @@ def literal_processor(self, dialect): string_literal_processor = self._string._cached_literal_processor(dialect) def process(value): - return string_literal_processor(Vector._to_db(value, self.dim)) + return string_literal_processor(Vector._to_db(value, self.dim)) # type: ignore return process def result_processor(self, dialect, coltype): @@ -48,4 +48,4 @@ def l1_distance(self, other): # for reflection -ischema_names['vector'] = VECTOR +ischema_names['vector'] = VECTOR # type: ignore From 563ebdfa836e9b9e07b23b8046a298862447e12a Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 31 May 2026 13:15:54 -0700 Subject: [PATCH 037/121] Improved typechecking for tests [skip ci] --- Makefile | 3 +++ tests/test_django.py | 2 ++ tests/test_sqlmodel.py | 44 +++++++++++++++++++++--------------------- 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/Makefile b/Makefile index 7462142..ccf995a 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,9 @@ lint: pycodestyle . --ignore=E501 --exclude=.venv +check: + ty check pgvector tests + build: python3 -m build diff --git a/tests/test_django.py b/tests/test_django.py index 7a8a6eb..9e44401 100644 --- a/tests/test_django.py +++ b/tests/test_django.py @@ -1,3 +1,5 @@ +# type: ignore + import django from django.conf import settings from django.contrib.postgres.fields import ArrayField diff --git a/tests/test_sqlmodel.py b/tests/test_sqlmodel.py index 5f74148..d67c735 100644 --- a/tests/test_sqlmodel.py +++ b/tests/test_sqlmodel.py @@ -26,7 +26,7 @@ class Item(SQLModel, table=True): index = Index( 'sqlmodel_index', - Item.embedding, + Item.embedding, # type: ignore postgresql_using='hnsw', postgresql_with={'m': 16, 'ef_construction': 64}, postgresql_ops={'embedding': 'vector_l2_ops'} @@ -65,10 +65,10 @@ def test_orm(self): assert items[0].id == 1 assert items[1].id == 2 assert items[2].id == 3 - assert np.array_equal(items[0].embedding, np.array([1.5, 2, 3])) - assert items[0].embedding.dtype == np.float32 - assert np.array_equal(items[1].embedding, np.array([4, 5, 6])) - assert items[1].embedding.dtype == np.float32 + assert np.array_equal(items[0].embedding, np.array([1.5, 2, 3])) # type: ignore + assert items[0].embedding.dtype == np.float32 # type: ignore + assert np.array_equal(items[1].embedding, np.array([4, 5, 6])) # type: ignore + assert items[1].embedding.dtype == np.float32 # type: ignore assert items[2].embedding is None def test_vector(self): @@ -76,30 +76,30 @@ def test_vector(self): session.add(Item(id=1, embedding=[1, 2, 3])) session.commit() item = session.get_one(Item, 1) - assert np.array_equal(item.embedding, np.array([1, 2, 3])) + assert np.array_equal(item.embedding, np.array([1, 2, 3])) # type: ignore def test_vector_l2_distance(self): create_items() with Session(engine) as session: - items = session.exec(select(Item).order_by(Item.embedding.l2_distance([1, 1, 1]))) + items = session.exec(select(Item).order_by(Item.embedding.l2_distance([1, 1, 1]))) # type: ignore assert [v.id for v in items] == [1, 3, 2] def test_vector_max_inner_product(self): create_items() with Session(engine) as session: - items = session.exec(select(Item).order_by(Item.embedding.max_inner_product([1, 1, 1]))) + items = session.exec(select(Item).order_by(Item.embedding.max_inner_product([1, 1, 1]))) # type: ignore assert [v.id for v in items] == [2, 3, 1] def test_vector_cosine_distance(self): create_items() with Session(engine) as session: - items = session.exec(select(Item).order_by(Item.embedding.cosine_distance([1, 1, 1]))) + items = session.exec(select(Item).order_by(Item.embedding.cosine_distance([1, 1, 1]))) # type: ignore assert [v.id for v in items] == [1, 2, 3] def test_vector_l1_distance(self): create_items() with Session(engine) as session: - items = session.exec(select(Item).order_by(Item.embedding.l1_distance([1, 1, 1]))) + items = session.exec(select(Item).order_by(Item.embedding.l1_distance([1, 1, 1]))) # type: ignore assert [v.id for v in items] == [1, 3, 2] def test_halfvec(self): @@ -112,25 +112,25 @@ def test_halfvec(self): def test_halfvec_l2_distance(self): create_items() with Session(engine) as session: - items = session.exec(select(Item).order_by(Item.half_embedding.l2_distance([1, 1, 1]))) + items = session.exec(select(Item).order_by(Item.half_embedding.l2_distance([1, 1, 1]))) # type: ignore assert [v.id for v in items] == [1, 3, 2] def test_halfvec_max_inner_product(self): create_items() with Session(engine) as session: - items = session.exec(select(Item).order_by(Item.half_embedding.max_inner_product([1, 1, 1]))) + items = session.exec(select(Item).order_by(Item.half_embedding.max_inner_product([1, 1, 1]))) # type: ignore assert [v.id for v in items] == [2, 3, 1] def test_halfvec_cosine_distance(self): create_items() with Session(engine) as session: - items = session.exec(select(Item).order_by(Item.half_embedding.cosine_distance([1, 1, 1]))) + items = session.exec(select(Item).order_by(Item.half_embedding.cosine_distance([1, 1, 1]))) # type: ignore assert [v.id for v in items] == [1, 2, 3] def test_halfvec_l1_distance(self): create_items() with Session(engine) as session: - items = session.exec(select(Item).order_by(Item.half_embedding.l1_distance([1, 1, 1]))) + items = session.exec(select(Item).order_by(Item.half_embedding.l1_distance([1, 1, 1]))) # type: ignore assert [v.id for v in items] == [1, 3, 2] def test_bit(self): @@ -143,13 +143,13 @@ def test_bit(self): def test_bit_hamming_distance(self): create_items() with Session(engine) as session: - items = session.exec(select(Item).order_by(Item.binary_embedding.hamming_distance('101'))) + items = session.exec(select(Item).order_by(Item.binary_embedding.hamming_distance('101'))) # type: ignore assert [v.id for v in items] == [2, 3, 1] def test_bit_jaccard_distance(self): create_items() with Session(engine) as session: - items = session.exec(select(Item).order_by(Item.binary_embedding.jaccard_distance('101'))) + items = session.exec(select(Item).order_by(Item.binary_embedding.jaccard_distance('101'))) # type: ignore assert [v.id for v in items] == [2, 3, 1] def test_sparsevec(self): @@ -162,37 +162,37 @@ def test_sparsevec(self): def test_sparsevec_l2_distance(self): create_items() with Session(engine) as session: - items = session.exec(select(Item).order_by(Item.sparse_embedding.l2_distance([1, 1, 1]))) + items = session.exec(select(Item).order_by(Item.sparse_embedding.l2_distance([1, 1, 1]))) # type: ignore assert [v.id for v in items] == [1, 3, 2] def test_sparsevec_max_inner_product(self): create_items() with Session(engine) as session: - items = session.exec(select(Item).order_by(Item.sparse_embedding.max_inner_product([1, 1, 1]))) + items = session.exec(select(Item).order_by(Item.sparse_embedding.max_inner_product([1, 1, 1]))) # type: ignore assert [v.id for v in items] == [2, 3, 1] def test_sparsevec_cosine_distance(self): create_items() with Session(engine) as session: - items = session.exec(select(Item).order_by(Item.sparse_embedding.cosine_distance([1, 1, 1]))) + items = session.exec(select(Item).order_by(Item.sparse_embedding.cosine_distance([1, 1, 1]))) # type: ignore assert [v.id for v in items] == [1, 2, 3] def test_sparsevec_l1_distance(self): create_items() with Session(engine) as session: - items = session.exec(select(Item).order_by(Item.sparse_embedding.l1_distance([1, 1, 1]))) + items = session.exec(select(Item).order_by(Item.sparse_embedding.l1_distance([1, 1, 1]))) # type: ignore assert [v.id for v in items] == [1, 3, 2] def test_filter(self): create_items() with Session(engine) as session: - items = session.exec(select(Item).filter(Item.embedding.l2_distance([1, 1, 1]) < 1)) + items = session.exec(select(Item).filter(Item.embedding.l2_distance([1, 1, 1]) < 1)) # type: ignore assert [v.id for v in items] == [1] def test_select(self): with Session(engine) as session: session.add(Item(embedding=[2, 3, 3])) - items = session.exec(select(Item.embedding.l2_distance([1, 1, 1]))).all() + items = session.exec(select(Item.embedding.l2_distance([1, 1, 1]))).all() # type: ignore assert items[0] == 3 def test_vector_avg(self): From 6a2df2a08f8e6a76fae05b0a4fb8466d723ddc48 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 31 May 2026 16:11:51 -0700 Subject: [PATCH 038/121] Dropped support for Python < 3.10, part 2 [skip ci] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5716d05..3fb3ab5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ authors = [ {name = "Andrew Kane", email = "andrew@ankane.org"} ] license = "MIT" -requires-python = ">= 3.9" +requires-python = ">= 3.10" dependencies = [ "numpy" ] From c1233e2c05bc2f2e0de9042e5d02f81e0c8514b2 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 1 Jun 2026 18:50:55 -0700 Subject: [PATCH 039/121] Fixed link [skip ci] --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 95d5fbe..24f72bb 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [pgvector](https://github.com/pgvector/pgvector) support for Python -Supports [Django](https://github.com/django/django), [SQLAlchemy](https://github.com/sqlalchemy/sqlalchemy), [SQLModel](https://github.com/tiangolo/sqlmodel), [Psycopg 3](https://github.com/psycopg/psycopg), [Psycopg 2](https://github.com/psycopg/psycopg2), [asyncpg](https://github.com/MagicStack/asyncpg), [pg8000](https://github.com/tlocke/pg8000), and [Peewee](https://github.com/coleifer/peewee) +Supports [Django](https://github.com/django/django), [SQLAlchemy](https://github.com/sqlalchemy/sqlalchemy), [SQLModel](https://github.com/tiangolo/sqlmodel), [Psycopg 3](https://github.com/psycopg/psycopg), [Psycopg 2](https://github.com/psycopg/psycopg2), [asyncpg](https://github.com/MagicStack/asyncpg), [pg8000](https://codeberg.org/tlocke/pg8000), and [Peewee](https://github.com/coleifer/peewee) [![Build Status](https://github.com/pgvector/pgvector-python/actions/workflows/build.yml/badge.svg)](https://github.com/pgvector/pgvector-python/actions) From afa127fcd84639d7a0f5f02bf46b47a533e47d6c Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 1 Jun 2026 19:42:51 -0700 Subject: [PATCH 040/121] Added type hints [skip ci] --- CHANGELOG.md | 1 + pgvector/asyncpg/register.py | 3 ++- pgvector/bit.py | 28 +++++++++++---------- pgvector/halfvec.py | 34 +++++++++++++------------ pgvector/pg8000/register.py | 3 ++- pgvector/psycopg/bit.py | 11 +++++--- pgvector/psycopg/halfvec.py | 20 +++++++++------ pgvector/psycopg/register.py | 8 ++++-- pgvector/psycopg/sparsevec.py | 20 +++++++++------ pgvector/psycopg/vector.py | 21 ++++++++++------ pgvector/psycopg2/halfvec.py | 11 ++++---- pgvector/psycopg2/register.py | 4 +-- pgvector/psycopg2/sparsevec.py | 11 ++++---- pgvector/psycopg2/vector.py | 11 ++++---- pgvector/py.typed | 0 pgvector/sparsevec.py | 46 ++++++++++++++++++---------------- pgvector/vector.py | 30 +++++++++++----------- 17 files changed, 152 insertions(+), 110 deletions(-) create mode 100644 pgvector/py.typed diff --git a/CHANGELOG.md b/CHANGELOG.md index b5923fa..9ff616b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ ## 0.5.0 (unreleased) +- Added type hints - Dropped support for Python < 3.10 - Dropped support for SQLAlchemy < 2 diff --git a/pgvector/asyncpg/register.py b/pgvector/asyncpg/register.py index 63726f3..6718850 100644 --- a/pgvector/asyncpg/register.py +++ b/pgvector/asyncpg/register.py @@ -1,7 +1,8 @@ +from asyncpg import Connection from .. import Vector, HalfVector, SparseVector -async def register_vector(conn, schema='public'): +async def register_vector(conn: Connection, schema: str = 'public') -> None: await conn.set_type_codec( 'vector', schema=schema, diff --git a/pgvector/bit.py b/pgvector/bit.py index 26a9d8d..cb1bec4 100644 --- a/pgvector/bit.py +++ b/pgvector/bit.py @@ -1,10 +1,12 @@ +from __future__ import annotations import numpy as np from struct import pack, unpack_from +from typing import Any from warnings import warn class Bit: - def __init__(self, value): + def __init__(self, value: Any) -> None: if isinstance(value, bytes): self._len = 8 * len(value) self._data = value @@ -26,32 +28,32 @@ def __init__(self, value): self._len = len(value) self._data = np.packbits(value).tobytes() - def __repr__(self): + def __repr__(self) -> str: return f'Bit({self.to_text()})' - def __eq__(self, other): + def __eq__(self, other: Any) -> bool: if isinstance(other, self.__class__): return self._len == other._len and self._data == other._data return False - def to_list(self): + def to_list(self) -> list[bool]: return self.to_numpy().tolist() - def to_numpy(self): + def to_numpy(self) -> np.ndarray: return np.unpackbits(np.frombuffer(self._data, dtype=np.uint8), count=self._len).astype(bool) - def to_text(self): + def to_text(self) -> str: return ''.join(format(v, '08b') for v in self._data)[:self._len] - def to_binary(self): + def to_binary(self) -> bytes: return pack('>i', self._len) + self._data @classmethod - def from_text(cls, value): + def from_text(cls, value: str) -> Bit: return cls(str(value)) @classmethod - def from_binary(cls, value): + def from_binary(cls, value: bytes) -> Bit: if not isinstance(value, bytes): raise ValueError('expected bytes') @@ -61,15 +63,15 @@ def from_binary(cls, value): return bit @classmethod - def _to_db(cls, value): - if not isinstance(value, cls): + def _to_db(cls, value: Bit) -> str: + if not isinstance(value, Bit): raise ValueError('expected bit') return value.to_text() @classmethod - def _to_db_binary(cls, value): - if not isinstance(value, cls): + def _to_db_binary(cls, value: Bit) -> bytes: + if not isinstance(value, Bit): raise ValueError('expected bit') return value.to_binary() diff --git a/pgvector/halfvec.py b/pgvector/halfvec.py index f335f2f..755b9d6 100644 --- a/pgvector/halfvec.py +++ b/pgvector/halfvec.py @@ -1,9 +1,11 @@ +from __future__ import annotations import numpy as np from struct import pack, unpack_from +from typing import Any class HalfVector: - def __init__(self, value): + def __init__(self, value: Any) -> None: # asarray still copies if same dtype if not isinstance(value, np.ndarray) or value.dtype != '>f2': value = np.asarray(value, dtype='>f2') @@ -13,40 +15,40 @@ def __init__(self, value): self._value = value - def __repr__(self): + def __repr__(self) -> str: return f'HalfVector({self.to_list()})' - def __eq__(self, other): + def __eq__(self, other: Any) -> bool: if isinstance(other, self.__class__): return np.array_equal(self.to_numpy(), other.to_numpy()) return False - def dimensions(self): + def dimensions(self) -> int: return len(self._value) - def to_list(self): + def to_list(self) -> list[float]: return self._value.tolist() - def to_numpy(self): + def to_numpy(self) -> np.ndarray: return self._value - def to_text(self): + def to_text(self) -> str: return '[' + ','.join([str(float(v)) for v in self._value]) + ']' - def to_binary(self): + def to_binary(self) -> bytes: return pack('>HH', self.dimensions(), 0) + self._value.tobytes() @classmethod - def from_text(cls, value): + def from_text(cls, value: str) -> HalfVector: return cls([float(v) for v in value[1:-1].split(',')]) @classmethod - def from_binary(cls, value): + def from_binary(cls, value: bytes) -> HalfVector: dim, unused = unpack_from('>HH', value) return cls(np.frombuffer(value, dtype='>f2', count=dim, offset=4)) @classmethod - def _to_db(cls, value, dim=None): + def _to_db(cls, value: Any, dim: int | None = None) -> str | None: if value is None: return value @@ -59,7 +61,7 @@ def _to_db(cls, value, dim=None): return value.to_text() @classmethod - def _to_db_binary(cls, value): + def _to_db_binary(cls, value: Any) -> bytes | None: if value is None: return value @@ -69,15 +71,15 @@ def _to_db_binary(cls, value): return value.to_binary() @classmethod - def _from_db(cls, value): - if value is None or isinstance(value, cls): + def _from_db(cls, value: str | HalfVector | None) -> HalfVector | None: + if value is None or isinstance(value, HalfVector): return value return cls.from_text(value) @classmethod - def _from_db_binary(cls, value): - if value is None or isinstance(value, cls): + def _from_db_binary(cls, value: bytes | HalfVector | None) -> HalfVector | None: + if value is None or isinstance(value, HalfVector): return value return cls.from_binary(value) diff --git a/pgvector/pg8000/register.py b/pgvector/pg8000/register.py index 15ee219..476495b 100644 --- a/pgvector/pg8000/register.py +++ b/pgvector/pg8000/register.py @@ -1,8 +1,9 @@ import numpy as np +from pg8000.native import Connection from .. import Vector, HalfVector, SparseVector -def register_vector(conn): +def register_vector(conn: Connection) -> None: # use to_regtype to get first matching type in search path res = conn.run("SELECT typname, oid FROM pg_type WHERE oid IN (to_regtype('vector'), to_regtype('halfvec'), to_regtype('sparsevec'))") type_info = dict(res) diff --git a/pgvector/psycopg/bit.py b/pgvector/psycopg/bit.py index cffe8fb..8cfacbf 100644 --- a/pgvector/psycopg/bit.py +++ b/pgvector/psycopg/bit.py @@ -1,13 +1,18 @@ +from psycopg import BaseConnection +from psycopg.types import TypeInfo from psycopg.adapt import Dumper from psycopg.pq import Format +from typing import Any, TypeAlias from .. import Bit +Buffer: TypeAlias = bytes | bytearray | memoryview + class BitDumper(Dumper): format = Format.TEXT - def dump(self, obj): + def dump(self, obj: Bit) -> Buffer | None: return Bit._to_db(obj).encode('utf8') @@ -15,11 +20,11 @@ class BitBinaryDumper(BitDumper): format = Format.BINARY - def dump(self, obj): + def dump(self, obj: Bit) -> Buffer | None: return Bit._to_db_binary(obj) -def register_bit_info(context, info): +def register_bit_info(context: BaseConnection[Any], info: TypeInfo) -> None: info.register(context) # add oid to anonymous class for set_types diff --git a/pgvector/psycopg/halfvec.py b/pgvector/psycopg/halfvec.py index b3a0060..5ded56d 100644 --- a/pgvector/psycopg/halfvec.py +++ b/pgvector/psycopg/halfvec.py @@ -1,21 +1,27 @@ +from psycopg import BaseConnection from psycopg.adapt import Loader, Dumper from psycopg.pq import Format +from psycopg.types import TypeInfo +from typing import Any, TypeAlias from .. import HalfVector +Buffer: TypeAlias = bytes | bytearray | memoryview + class HalfVectorDumper(Dumper): format = Format.TEXT - def dump(self, obj): - return HalfVector._to_db(obj).encode('utf8') + def dump(self, obj: HalfVector) -> Buffer | None: + value = HalfVector._to_db(obj) + return value if value is None else value.encode('utf8') class HalfVectorBinaryDumper(HalfVectorDumper): format = Format.BINARY - def dump(self, obj): + def dump(self, obj: HalfVector) -> Buffer | None: return HalfVector._to_db_binary(obj) @@ -23,7 +29,7 @@ class HalfVectorLoader(Loader): format = Format.TEXT - def load(self, data): + def load(self, data: Buffer) -> HalfVector | None: if isinstance(data, memoryview): data = bytes(data) return HalfVector._from_db(data.decode('utf8')) @@ -33,13 +39,13 @@ class HalfVectorBinaryLoader(HalfVectorLoader): format = Format.BINARY - def load(self, data): - if isinstance(data, memoryview): + def load(self, data: Buffer) -> HalfVector | None: + if isinstance(data, (bytearray, memoryview)): data = bytes(data) return HalfVector._from_db_binary(data) -def register_halfvec_info(context, info): +def register_halfvec_info(context: BaseConnection[Any], info: TypeInfo) -> None: info.register(context) # add oid to anonymous class for set_types diff --git a/pgvector/psycopg/register.py b/pgvector/psycopg/register.py index b93fd3e..0256b94 100644 --- a/pgvector/psycopg/register.py +++ b/pgvector/psycopg/register.py @@ -1,15 +1,18 @@ +from psycopg import AsyncConnection, Connection from psycopg.types import TypeInfo +from typing import Any from .bit import register_bit_info from .halfvec import register_halfvec_info from .sparsevec import register_sparsevec_info from .vector import register_vector_info -def register_vector(context): +def register_vector(context: Connection[Any]) -> None: info = TypeInfo.fetch(context, 'vector') register_vector_info(context, info) info = TypeInfo.fetch(context, 'bit') + assert info is not None register_bit_info(context, info) info = TypeInfo.fetch(context, 'halfvec') @@ -21,11 +24,12 @@ def register_vector(context): register_sparsevec_info(context, info) -async def register_vector_async(context): +async def register_vector_async(context: AsyncConnection[Any]) -> None: info = await TypeInfo.fetch(context, 'vector') register_vector_info(context, info) info = await TypeInfo.fetch(context, 'bit') + assert info is not None register_bit_info(context, info) info = await TypeInfo.fetch(context, 'halfvec') diff --git a/pgvector/psycopg/sparsevec.py b/pgvector/psycopg/sparsevec.py index 384a0e1..b209b8f 100644 --- a/pgvector/psycopg/sparsevec.py +++ b/pgvector/psycopg/sparsevec.py @@ -1,21 +1,27 @@ +from psycopg import BaseConnection from psycopg.adapt import Loader, Dumper from psycopg.pq import Format +from psycopg.types import TypeInfo +from typing import Any, TypeAlias from .. import SparseVector +Buffer: TypeAlias = bytes | bytearray | memoryview + class SparseVectorDumper(Dumper): format = Format.TEXT - def dump(self, obj): - return SparseVector._to_db(obj).encode('utf8') + def dump(self, obj: SparseVector) -> Buffer | None: + value = SparseVector._to_db(obj) + return value if value is None else value.encode('utf8') class SparseVectorBinaryDumper(SparseVectorDumper): format = Format.BINARY - def dump(self, obj): + def dump(self, obj: SparseVector) -> Buffer | None: return SparseVector._to_db_binary(obj) @@ -23,7 +29,7 @@ class SparseVectorLoader(Loader): format = Format.TEXT - def load(self, data): + def load(self, data: Buffer) -> SparseVector | None: if isinstance(data, memoryview): data = bytes(data) return SparseVector._from_db(data.decode('utf8')) @@ -33,13 +39,13 @@ class SparseVectorBinaryLoader(SparseVectorLoader): format = Format.BINARY - def load(self, data): - if isinstance(data, memoryview): + def load(self, data: Buffer) -> SparseVector | None: + if isinstance(data, (bytearray, memoryview)): data = bytes(data) return SparseVector._from_db_binary(data) -def register_sparsevec_info(context, info): +def register_sparsevec_info(context: BaseConnection[Any], info: TypeInfo) -> None: info.register(context) # add oid to anonymous class for set_types diff --git a/pgvector/psycopg/vector.py b/pgvector/psycopg/vector.py index db9e826..1492017 100644 --- a/pgvector/psycopg/vector.py +++ b/pgvector/psycopg/vector.py @@ -1,22 +1,29 @@ +import numpy as np import psycopg +from psycopg import BaseConnection from psycopg.adapt import Loader, Dumper from psycopg.pq import Format +from psycopg.types import TypeInfo +from typing import Any, TypeAlias from .. import Vector +Buffer: TypeAlias = bytes | bytearray | memoryview + class VectorDumper(Dumper): format = Format.TEXT - def dump(self, obj): - return Vector._to_db(obj).encode('utf8') + def dump(self, obj: Vector) -> Buffer | None: + value = Vector._to_db(obj) + return value if value is None else value.encode('utf8') class VectorBinaryDumper(VectorDumper): format = Format.BINARY - def dump(self, obj): + def dump(self, obj: Vector) -> Buffer | None: return Vector._to_db_binary(obj) @@ -24,7 +31,7 @@ class VectorLoader(Loader): format = Format.TEXT - def load(self, data): + def load(self, data: Buffer) -> np.ndarray | None: if isinstance(data, memoryview): data = bytes(data) return Vector._from_db(data.decode('utf8')) @@ -34,13 +41,13 @@ class VectorBinaryLoader(VectorLoader): format = Format.BINARY - def load(self, data): - if isinstance(data, memoryview): + def load(self, data: Buffer) -> np.ndarray | None: + if isinstance(data, (bytearray, memoryview)): data = bytes(data) return Vector._from_db_binary(data) -def register_vector_info(context, info): +def register_vector_info(context: BaseConnection[Any], info: TypeInfo | None) -> None: if info is None: raise psycopg.ProgrammingError('vector type not found in the database') info.register(context) diff --git a/pgvector/psycopg2/halfvec.py b/pgvector/psycopg2/halfvec.py index 0a4c736..5b72f5c 100644 --- a/pgvector/psycopg2/halfvec.py +++ b/pgvector/psycopg2/halfvec.py @@ -1,20 +1,21 @@ -from psycopg2.extensions import adapt, new_array_type, new_type, register_adapter, register_type +from psycopg2.extensions import adapt, connection, cursor, new_array_type, new_type, register_adapter, register_type +from typing import Any from .. import HalfVector class HalfvecAdapter: - def __init__(self, value): + def __init__(self, value: Any) -> None: self._value = value - def getquoted(self): + def getquoted(self) -> Any: return adapt(HalfVector._to_db(self._value)).getquoted() -def cast_halfvec(value, cur): +def cast_halfvec(value: str | None, cur: cursor) -> HalfVector | None: return HalfVector._from_db(value) -def register_halfvec_info(oid, array_oid, scope): +def register_halfvec_info(oid: int, array_oid: int | None, scope: connection | cursor | None) -> None: halfvec = new_type((oid,), 'HALFVEC', cast_halfvec) register_type(halfvec, scope) diff --git a/pgvector/psycopg2/register.py b/pgvector/psycopg2/register.py index 1bc9d44..22beb8c 100644 --- a/pgvector/psycopg2/register.py +++ b/pgvector/psycopg2/register.py @@ -1,12 +1,12 @@ import psycopg2 -from psycopg2.extensions import cursor +from psycopg2.extensions import connection, cursor from .halfvec import register_halfvec_info from .sparsevec import register_sparsevec_info from .vector import register_vector_info # note: register_adapter is always global -def register_vector(conn_or_curs, globally=False, arrays=True): +def register_vector(conn_or_curs: connection | cursor, globally: bool = False, arrays: bool = True) -> None: conn = conn_or_curs if hasattr(conn_or_curs, 'cursor') else conn_or_curs.connection cur = conn.cursor(cursor_factory=cursor) scope = None if globally else conn_or_curs diff --git a/pgvector/psycopg2/sparsevec.py b/pgvector/psycopg2/sparsevec.py index 148eff2..073e45b 100644 --- a/pgvector/psycopg2/sparsevec.py +++ b/pgvector/psycopg2/sparsevec.py @@ -1,20 +1,21 @@ -from psycopg2.extensions import adapt, new_array_type, new_type, register_adapter, register_type +from psycopg2.extensions import adapt, connection, cursor, new_array_type, new_type, register_adapter, register_type +from typing import Any from .. import SparseVector class SparsevecAdapter: - def __init__(self, value): + def __init__(self, value: Any) -> None: self._value = value - def getquoted(self): + def getquoted(self) -> Any: return adapt(SparseVector._to_db(self._value)).getquoted() -def cast_sparsevec(value, cur): +def cast_sparsevec(value: str | None, cur: cursor) -> SparseVector | None: return SparseVector._from_db(value) -def register_sparsevec_info(oid, array_oid, scope): +def register_sparsevec_info(oid: int, array_oid: int | None, scope: connection | cursor | None) -> None: sparsevec = new_type((oid,), 'SPARSEVEC', cast_sparsevec) register_type(sparsevec, scope) diff --git a/pgvector/psycopg2/vector.py b/pgvector/psycopg2/vector.py index 562de18..0e2d1e3 100644 --- a/pgvector/psycopg2/vector.py +++ b/pgvector/psycopg2/vector.py @@ -1,21 +1,22 @@ import numpy as np -from psycopg2.extensions import adapt, new_array_type, new_type, register_adapter, register_type +from psycopg2.extensions import adapt, connection, cursor, new_array_type, new_type, register_adapter, register_type +from typing import Any from .. import Vector class VectorAdapter: - def __init__(self, value): + def __init__(self, value: Any) -> None: self._value = value - def getquoted(self): + def getquoted(self) -> Any: return adapt(Vector._to_db(self._value)).getquoted() -def cast_vector(value, cur): +def cast_vector(value: str | None, cur: cursor) -> np.ndarray | None: return Vector._from_db(value) -def register_vector_info(oid, array_oid, scope): +def register_vector_info(oid: int, array_oid: int | None, scope: connection | cursor | None) -> None: vector = new_type((oid,), 'VECTOR', cast_vector) register_type(vector, scope) diff --git a/pgvector/py.typed b/pgvector/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/pgvector/sparsevec.py b/pgvector/sparsevec.py index 895fbd0..388116d 100644 --- a/pgvector/sparsevec.py +++ b/pgvector/sparsevec.py @@ -1,11 +1,13 @@ +from __future__ import annotations import numpy as np from struct import pack, unpack_from +from typing import Any NO_DEFAULT = object() class SparseVector: - def __init__(self, value, dimensions=NO_DEFAULT, /): + def __init__(self, value: Any, dimensions: int | Any = NO_DEFAULT, /) -> None: if value.__class__.__module__.startswith('scipy.sparse.'): if dimensions is not NO_DEFAULT: raise ValueError('extra argument') @@ -22,22 +24,22 @@ def __init__(self, value, dimensions=NO_DEFAULT, /): self._from_dense(value) - def __repr__(self): + def __repr__(self) -> str: elements = dict(zip(self._indices, self._values)) return f'SparseVector({elements}, {self._dim})' - def __eq__(self, other): + def __eq__(self, other: Any) -> bool: if isinstance(other, self.__class__): return self.dimensions() == other.dimensions() and self.indices() == other.indices() and self.values() == other.values() return False - def dimensions(self): + def dimensions(self) -> int: return self._dim - def indices(self): + def indices(self) -> list[int]: return self._indices - def values(self): + def values(self) -> list[float]: return self._values def to_coo(self): @@ -46,26 +48,26 @@ def to_coo(self): coords = ([0] * len(self._indices), self._indices) return coo_array((self._values, coords), shape=(1, self._dim)) - def to_list(self): + def to_list(self) -> list[float]: vec = [0.0] * self._dim for i, v in zip(self._indices, self._values): vec[i] = v return vec - def to_numpy(self): + def to_numpy(self) -> np.ndarray: vec = np.repeat(0.0, self._dim).astype(np.float32) for i, v in zip(self._indices, self._values): vec[i] = v return vec - def to_text(self): + def to_text(self) -> str: return '{' + ','.join([f'{int(i) + 1}:{float(v)}' for i, v in zip(self._indices, self._values)]) + '}/' + str(int(self._dim)) - def to_binary(self): + def to_binary(self) -> bytes: nnz = len(self._indices) return pack(f'>iii{nnz}i{nnz}f', self._dim, nnz, 0, *self._indices, *self._values) - def _from_dict(self, d, dim): + def _from_dict(self, d: dict[int, float], dim: int) -> None: elements = [(i, v) for i, v in d.items() if v != 0] elements.sort() @@ -73,7 +75,7 @@ def _from_dict(self, d, dim): self._indices = [int(v[0]) for v in elements] self._values = [float(v[1]) for v in elements] - def _from_sparse(self, value): + def _from_sparse(self, value: Any) -> None: value = value.tocoo() if value.ndim == 1: @@ -90,13 +92,13 @@ def _from_sparse(self, value): self._indices = value.col.tolist() self._values = value.data.tolist() - def _from_dense(self, value): + def _from_dense(self, value: list[float]) -> None: self._dim = len(value) self._indices = [i for i, v in enumerate(value) if v != 0] self._values = [float(value[i]) for i in self._indices] @classmethod - def from_text(cls, value): + def from_text(cls, value: str) -> SparseVector: elements, dim = value.split('/', 2) indices = [] values = [] @@ -109,14 +111,14 @@ def from_text(cls, value): return cls._from_parts(int(dim), indices, values) @classmethod - def from_binary(cls, value): + def from_binary(cls, value: bytes) -> SparseVector: dim, nnz, unused = unpack_from('>iii', value) indices = unpack_from(f'>{nnz}i', value, 12) values = unpack_from(f'>{nnz}f', value, 12 + nnz * 4) return cls._from_parts(int(dim), list(indices), list(values)) @classmethod - def _from_parts(cls, dim, indices, values): + def _from_parts(cls, dim: int, indices: list[int], values: list[float]) -> SparseVector: vec = cls.__new__(cls) vec._dim = dim vec._indices = indices @@ -124,7 +126,7 @@ def _from_parts(cls, dim, indices, values): return vec @classmethod - def _to_db(cls, value, dim=None): + def _to_db(cls, value: Any, dim: int | None = None) -> str | None: if value is None: return value @@ -137,7 +139,7 @@ def _to_db(cls, value, dim=None): return value.to_text() @classmethod - def _to_db_binary(cls, value): + def _to_db_binary(cls, value: Any) -> bytes | None: if value is None: return value @@ -147,15 +149,15 @@ def _to_db_binary(cls, value): return value.to_binary() @classmethod - def _from_db(cls, value): - if value is None or isinstance(value, cls): + def _from_db(cls, value: str | SparseVector | None) -> SparseVector | None: + if value is None or isinstance(value, SparseVector): return value return cls.from_text(value) @classmethod - def _from_db_binary(cls, value): - if value is None or isinstance(value, cls): + def _from_db_binary(cls, value: bytes | SparseVector | None) -> SparseVector | None: + if value is None or isinstance(value, SparseVector): return value return cls.from_binary(value) diff --git a/pgvector/vector.py b/pgvector/vector.py index ebbcafd..f5ba742 100644 --- a/pgvector/vector.py +++ b/pgvector/vector.py @@ -1,9 +1,11 @@ +from __future__ import annotations import numpy as np from struct import pack, unpack_from +from typing import Any class Vector: - def __init__(self, value): + def __init__(self, value: Any) -> None: # asarray still copies if same dtype if not isinstance(value, np.ndarray) or value.dtype != '>f4': value = np.asarray(value, dtype='>f4') @@ -13,40 +15,40 @@ def __init__(self, value): self._value = value - def __repr__(self): + def __repr__(self) -> str: return f'Vector({self.to_list()})' - def __eq__(self, other): + def __eq__(self, other: Any) -> bool: if isinstance(other, self.__class__): return np.array_equal(self.to_numpy(), other.to_numpy()) return False - def dimensions(self): + def dimensions(self) -> int: return len(self._value) - def to_list(self): + def to_list(self) -> list[float]: return self._value.tolist() - def to_numpy(self): + def to_numpy(self) -> np.ndarray: return self._value - def to_text(self): + def to_text(self) -> str: return '[' + ','.join([str(float(v)) for v in self._value]) + ']' - def to_binary(self): + def to_binary(self) -> bytes: return pack('>HH', self.dimensions(), 0) + self._value.tobytes() @classmethod - def from_text(cls, value): + def from_text(cls, value: str) -> Vector: return cls([float(v) for v in value[1:-1].split(',')]) @classmethod - def from_binary(cls, value): + def from_binary(cls, value: bytes) -> Vector: dim, unused = unpack_from('>HH', value) return cls(np.frombuffer(value, dtype='>f4', count=dim, offset=4)) @classmethod - def _to_db(cls, value, dim=None): + def _to_db(cls, value: Any, dim: int | None = None) -> str | None: if value is None: return value @@ -59,7 +61,7 @@ def _to_db(cls, value, dim=None): return value.to_text() @classmethod - def _to_db_binary(cls, value): + def _to_db_binary(cls, value: Any) -> bytes | None: if value is None: return value @@ -69,14 +71,14 @@ def _to_db_binary(cls, value): return value.to_binary() @classmethod - def _from_db(cls, value): + def _from_db(cls, value: str | np.ndarray | None) -> np.ndarray | None: if value is None or isinstance(value, np.ndarray): return value return cls.from_text(value).to_numpy().astype(np.float32) @classmethod - def _from_db_binary(cls, value): + def _from_db_binary(cls, value: bytes | np.ndarray | None) -> np.ndarray | None: if value is None or isinstance(value, np.ndarray): return value From d95fa2ef366039ac4b5e3badd679b074b9f1085a Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 1 Jun 2026 20:05:25 -0700 Subject: [PATCH 041/121] Added more type hints [skip ci] --- pgvector/sqlalchemy/bit.py | 12 +++++++----- pgvector/sqlalchemy/halfvec.py | 20 +++++++++++--------- pgvector/sqlalchemy/sparsevec.py | 20 +++++++++++--------- pgvector/sqlalchemy/vector.py | 20 +++++++++++--------- 4 files changed, 40 insertions(+), 32 deletions(-) diff --git a/pgvector/sqlalchemy/bit.py b/pgvector/sqlalchemy/bit.py index bbf822e..c0bb6ba 100644 --- a/pgvector/sqlalchemy/bit.py +++ b/pgvector/sqlalchemy/bit.py @@ -1,20 +1,22 @@ from sqlalchemy.dialects.postgresql.base import ischema_names from sqlalchemy.types import UserDefinedType, Float +from sqlalchemy import Dialect +from typing import Any class BIT(UserDefinedType): cache_ok = True - def __init__(self, length=None): + def __init__(self, length: int | None = None) -> None: super(UserDefinedType, self).__init__() self.length = length - def get_col_spec(self, **kw): + def get_col_spec(self, **kw) -> str: if self.length is None: return 'BIT' return 'BIT(%d)' % self.length - def bind_processor(self, dialect): + def bind_processor(self, dialect: Dialect) -> Any: if dialect.__class__.__name__ == 'PGDialect_asyncpg': import asyncpg @@ -27,10 +29,10 @@ def process(value): return super().bind_processor(dialect) class comparator_factory(UserDefinedType.Comparator): - def hamming_distance(self, other): + def hamming_distance(self, other: Any) -> Any: return self.op('<~>', return_type=Float)(other) - def jaccard_distance(self, other): + def jaccard_distance(self, other: Any) -> Any: return self.op('<%>', return_type=Float)(other) diff --git a/pgvector/sqlalchemy/halfvec.py b/pgvector/sqlalchemy/halfvec.py index f51d1a6..3612cfc 100644 --- a/pgvector/sqlalchemy/halfvec.py +++ b/pgvector/sqlalchemy/halfvec.py @@ -1,5 +1,7 @@ from sqlalchemy.dialects.postgresql.base import ischema_names from sqlalchemy.types import UserDefinedType, Float, String +from sqlalchemy import Dialect +from typing import Any from .. import HalfVector @@ -7,43 +9,43 @@ class HALFVEC(UserDefinedType): cache_ok = True _string = String() - def __init__(self, dim=None): + def __init__(self, dim: int | None = None) -> None: super(UserDefinedType, self).__init__() self.dim = dim - def get_col_spec(self, **kw): + def get_col_spec(self, **kw) -> str: if self.dim is None: return 'HALFVEC' return 'HALFVEC(%d)' % self.dim - def bind_processor(self, dialect): + def bind_processor(self, dialect: Dialect): def process(value): return HalfVector._to_db(value, self.dim) return process - def literal_processor(self, dialect): + def literal_processor(self, dialect: Dialect) -> Any: string_literal_processor = self._string._cached_literal_processor(dialect) def process(value): return string_literal_processor(HalfVector._to_db(value, self.dim)) # type: ignore return process - def result_processor(self, dialect, coltype): + def result_processor(self, dialect: Dialect, coltype: Any) -> Any: def process(value): return HalfVector._from_db(value) return process class comparator_factory(UserDefinedType.Comparator): - def l2_distance(self, other): + def l2_distance(self, other: Any) -> Any: return self.op('<->', return_type=Float)(other) - def max_inner_product(self, other): + def max_inner_product(self, other: Any) -> Any: return self.op('<#>', return_type=Float)(other) - def cosine_distance(self, other): + def cosine_distance(self, other: Any) -> Any: return self.op('<=>', return_type=Float)(other) - def l1_distance(self, other): + def l1_distance(self, other: Any) -> Any: return self.op('<+>', return_type=Float)(other) diff --git a/pgvector/sqlalchemy/sparsevec.py b/pgvector/sqlalchemy/sparsevec.py index 6828bfd..e409bf1 100644 --- a/pgvector/sqlalchemy/sparsevec.py +++ b/pgvector/sqlalchemy/sparsevec.py @@ -1,5 +1,7 @@ from sqlalchemy.dialects.postgresql.base import ischema_names from sqlalchemy.types import UserDefinedType, Float, String +from sqlalchemy import Dialect +from typing import Any from .. import SparseVector @@ -7,43 +9,43 @@ class SPARSEVEC(UserDefinedType): cache_ok = True _string = String() - def __init__(self, dim=None): + def __init__(self, dim: int | None = None) -> None: super(UserDefinedType, self).__init__() self.dim = dim - def get_col_spec(self, **kw): + def get_col_spec(self, **kw) -> str: if self.dim is None: return 'SPARSEVEC' return 'SPARSEVEC(%d)' % self.dim - def bind_processor(self, dialect): + def bind_processor(self, dialect: Dialect) -> Any: def process(value): return SparseVector._to_db(value, self.dim) return process - def literal_processor(self, dialect): + def literal_processor(self, dialect: Dialect) -> Any: string_literal_processor = self._string._cached_literal_processor(dialect) def process(value): return string_literal_processor(SparseVector._to_db(value, self.dim)) # type: ignore return process - def result_processor(self, dialect, coltype): + def result_processor(self, dialect: Dialect, coltype: Any) -> Any: def process(value): return SparseVector._from_db(value) return process class comparator_factory(UserDefinedType.Comparator): - def l2_distance(self, other): + def l2_distance(self, other: Any) -> Any: return self.op('<->', return_type=Float)(other) - def max_inner_product(self, other): + def max_inner_product(self, other: Any) -> Any: return self.op('<#>', return_type=Float)(other) - def cosine_distance(self, other): + def cosine_distance(self, other: Any) -> Any: return self.op('<=>', return_type=Float)(other) - def l1_distance(self, other): + def l1_distance(self, other: Any) -> Any: return self.op('<+>', return_type=Float)(other) diff --git a/pgvector/sqlalchemy/vector.py b/pgvector/sqlalchemy/vector.py index a9e6f22..187e039 100644 --- a/pgvector/sqlalchemy/vector.py +++ b/pgvector/sqlalchemy/vector.py @@ -1,5 +1,7 @@ from sqlalchemy.dialects.postgresql.base import ischema_names from sqlalchemy.types import UserDefinedType, Float, String +from sqlalchemy import Dialect +from typing import Any from .. import Vector @@ -7,43 +9,43 @@ class VECTOR(UserDefinedType): cache_ok = True _string = String() - def __init__(self, dim=None): + def __init__(self, dim: int | None = None) -> None: super(UserDefinedType, self).__init__() self.dim = dim - def get_col_spec(self, **kw): + def get_col_spec(self, **kw) -> str: if self.dim is None: return 'VECTOR' return 'VECTOR(%d)' % self.dim - def bind_processor(self, dialect): + def bind_processor(self, dialect: Dialect) -> Any: def process(value): return Vector._to_db(value, self.dim) return process - def literal_processor(self, dialect): + def literal_processor(self, dialect: Dialect) -> Any: string_literal_processor = self._string._cached_literal_processor(dialect) def process(value): return string_literal_processor(Vector._to_db(value, self.dim)) # type: ignore return process - def result_processor(self, dialect, coltype): + def result_processor(self, dialect: Dialect, coltype: Any) -> Any: def process(value): return Vector._from_db(value) return process class comparator_factory(UserDefinedType.Comparator): - def l2_distance(self, other): + def l2_distance(self, other: Any) -> Any: return self.op('<->', return_type=Float)(other) - def max_inner_product(self, other): + def max_inner_product(self, other: Any) -> Any: return self.op('<#>', return_type=Float)(other) - def cosine_distance(self, other): + def cosine_distance(self, other: Any) -> Any: return self.op('<=>', return_type=Float)(other) - def l1_distance(self, other): + def l1_distance(self, other: Any) -> Any: return self.op('<+>', return_type=Float)(other) From 0673ca77c13482844807b7c03670898c32f23f2c Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 1 Jun 2026 20:08:14 -0700 Subject: [PATCH 042/121] Improved type hints [skip ci] --- pgvector/sqlalchemy/bit.py | 6 +++--- pgvector/sqlalchemy/halfvec.py | 10 +++++----- pgvector/sqlalchemy/sparsevec.py | 10 +++++----- pgvector/sqlalchemy/vector.py | 10 +++++----- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/pgvector/sqlalchemy/bit.py b/pgvector/sqlalchemy/bit.py index c0bb6ba..f38c994 100644 --- a/pgvector/sqlalchemy/bit.py +++ b/pgvector/sqlalchemy/bit.py @@ -1,6 +1,6 @@ from sqlalchemy.dialects.postgresql.base import ischema_names from sqlalchemy.types import UserDefinedType, Float -from sqlalchemy import Dialect +from sqlalchemy import Dialect, Operators from typing import Any @@ -29,10 +29,10 @@ def process(value): return super().bind_processor(dialect) class comparator_factory(UserDefinedType.Comparator): - def hamming_distance(self, other: Any) -> Any: + def hamming_distance(self, other: Any) -> Operators: return self.op('<~>', return_type=Float)(other) - def jaccard_distance(self, other: Any) -> Any: + def jaccard_distance(self, other: Any) -> Operators: return self.op('<%>', return_type=Float)(other) diff --git a/pgvector/sqlalchemy/halfvec.py b/pgvector/sqlalchemy/halfvec.py index 3612cfc..f964c3a 100644 --- a/pgvector/sqlalchemy/halfvec.py +++ b/pgvector/sqlalchemy/halfvec.py @@ -1,6 +1,6 @@ from sqlalchemy.dialects.postgresql.base import ischema_names from sqlalchemy.types import UserDefinedType, Float, String -from sqlalchemy import Dialect +from sqlalchemy import Dialect, Operators from typing import Any from .. import HalfVector @@ -36,16 +36,16 @@ def process(value): return process class comparator_factory(UserDefinedType.Comparator): - def l2_distance(self, other: Any) -> Any: + def l2_distance(self, other: Any) -> Operators: return self.op('<->', return_type=Float)(other) - def max_inner_product(self, other: Any) -> Any: + def max_inner_product(self, other: Any) -> Operators: return self.op('<#>', return_type=Float)(other) - def cosine_distance(self, other: Any) -> Any: + def cosine_distance(self, other: Any) -> Operators: return self.op('<=>', return_type=Float)(other) - def l1_distance(self, other: Any) -> Any: + def l1_distance(self, other: Any) -> Operators: return self.op('<+>', return_type=Float)(other) diff --git a/pgvector/sqlalchemy/sparsevec.py b/pgvector/sqlalchemy/sparsevec.py index e409bf1..17964c7 100644 --- a/pgvector/sqlalchemy/sparsevec.py +++ b/pgvector/sqlalchemy/sparsevec.py @@ -1,6 +1,6 @@ from sqlalchemy.dialects.postgresql.base import ischema_names from sqlalchemy.types import UserDefinedType, Float, String -from sqlalchemy import Dialect +from sqlalchemy import Dialect, Operators from typing import Any from .. import SparseVector @@ -36,16 +36,16 @@ def process(value): return process class comparator_factory(UserDefinedType.Comparator): - def l2_distance(self, other: Any) -> Any: + def l2_distance(self, other: Any) -> Operators: return self.op('<->', return_type=Float)(other) - def max_inner_product(self, other: Any) -> Any: + def max_inner_product(self, other: Any) -> Operators: return self.op('<#>', return_type=Float)(other) - def cosine_distance(self, other: Any) -> Any: + def cosine_distance(self, other: Any) -> Operators: return self.op('<=>', return_type=Float)(other) - def l1_distance(self, other: Any) -> Any: + def l1_distance(self, other: Any) -> Operators: return self.op('<+>', return_type=Float)(other) diff --git a/pgvector/sqlalchemy/vector.py b/pgvector/sqlalchemy/vector.py index 187e039..e1e301e 100644 --- a/pgvector/sqlalchemy/vector.py +++ b/pgvector/sqlalchemy/vector.py @@ -1,6 +1,6 @@ from sqlalchemy.dialects.postgresql.base import ischema_names from sqlalchemy.types import UserDefinedType, Float, String -from sqlalchemy import Dialect +from sqlalchemy import Dialect, Operators from typing import Any from .. import Vector @@ -36,16 +36,16 @@ def process(value): return process class comparator_factory(UserDefinedType.Comparator): - def l2_distance(self, other: Any) -> Any: + def l2_distance(self, other: Any) -> Operators: return self.op('<->', return_type=Float)(other) - def max_inner_product(self, other: Any) -> Any: + def max_inner_product(self, other: Any) -> Operators: return self.op('<#>', return_type=Float)(other) - def cosine_distance(self, other: Any) -> Any: + def cosine_distance(self, other: Any) -> Operators: return self.op('<=>', return_type=Float)(other) - def l1_distance(self, other: Any) -> Any: + def l1_distance(self, other: Any) -> Operators: return self.op('<+>', return_type=Float)(other) From f4498f287984dbcb81c6a55909dee51df6cadd2b Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 1 Jun 2026 20:23:30 -0700 Subject: [PATCH 043/121] Improved type hints [skip ci] --- pgvector/peewee/bit.py | 13 +++++++------ pgvector/peewee/halfvec.py | 21 +++++++++++---------- pgvector/peewee/sparsevec.py | 21 +++++++++++---------- pgvector/peewee/vector.py | 22 ++++++++++++---------- pgvector/sqlalchemy/halfvec.py | 2 +- 5 files changed, 42 insertions(+), 37 deletions(-) diff --git a/pgvector/peewee/bit.py b/pgvector/peewee/bit.py index ee5f12f..a2bb797 100644 --- a/pgvector/peewee/bit.py +++ b/pgvector/peewee/bit.py @@ -1,21 +1,22 @@ from peewee import Expression, Field +from typing import Any class FixedBitField(Field): field_type = 'bit' - def __init__(self, max_length=None, *args, **kwargs): + def __init__(self, max_length: int | None = None, *args, **kwargs) -> None: self.max_length = max_length super(FixedBitField, self).__init__(*args, **kwargs) - def get_modifiers(self): - return self.max_length and [self.max_length] or None + def get_modifiers(self) -> list[int] | None: + return [self.max_length] if self.max_length else None - def _distance(self, op, vector): + def _distance(self, op: str, vector: Any) -> Expression: return Expression(lhs=self, op=op, rhs=self.to_value(vector)) - def hamming_distance(self, vector): + def hamming_distance(self, vector: Any) -> Expression: return self._distance('<~>', vector) - def jaccard_distance(self, vector): + def jaccard_distance(self, vector: Any) -> Expression: return self._distance('<%%>', vector) diff --git a/pgvector/peewee/halfvec.py b/pgvector/peewee/halfvec.py index 0901fd2..234da40 100644 --- a/pgvector/peewee/halfvec.py +++ b/pgvector/peewee/halfvec.py @@ -1,34 +1,35 @@ from peewee import Expression, Field +from typing import Any from .. import HalfVector class HalfVectorField(Field): field_type = 'halfvec' - def __init__(self, dimensions=None, *args, **kwargs): + def __init__(self, dimensions: int | None = None, *args, **kwargs) -> None: self.dimensions = dimensions super(HalfVectorField, self).__init__(*args, **kwargs) - def get_modifiers(self): - return self.dimensions and [self.dimensions] or None + def get_modifiers(self) -> list[int] | None: + return [self.dimensions] if self.dimensions else None - def db_value(self, value): + def db_value(self, value: Any) -> str | None: return HalfVector._to_db(value) - def python_value(self, value): + def python_value(self, value: Any) -> HalfVector | None: return HalfVector._from_db(value) - def _distance(self, op, vector): + def _distance(self, op: str, vector: Any) -> Expression: return Expression(lhs=self, op=op, rhs=self.to_value(vector)) - def l2_distance(self, vector): + def l2_distance(self, vector: Any) -> Expression: return self._distance('<->', vector) - def max_inner_product(self, vector): + def max_inner_product(self, vector: Any) -> Expression: return self._distance('<#>', vector) - def cosine_distance(self, vector): + def cosine_distance(self, vector: Any) -> Expression: return self._distance('<=>', vector) - def l1_distance(self, vector): + def l1_distance(self, vector: Any) -> Expression: return self._distance('<+>', vector) diff --git a/pgvector/peewee/sparsevec.py b/pgvector/peewee/sparsevec.py index 86dea73..816ff68 100644 --- a/pgvector/peewee/sparsevec.py +++ b/pgvector/peewee/sparsevec.py @@ -1,34 +1,35 @@ from peewee import Expression, Field +from typing import Any from .. import SparseVector class SparseVectorField(Field): field_type = 'sparsevec' - def __init__(self, dimensions=None, *args, **kwargs): + def __init__(self, dimensions: int | None = None, *args, **kwargs) -> None: self.dimensions = dimensions super(SparseVectorField, self).__init__(*args, **kwargs) - def get_modifiers(self): - return self.dimensions and [self.dimensions] or None + def get_modifiers(self) -> list[int] | None: + return [self.dimensions] if self.dimensions else None - def db_value(self, value): + def db_value(self, value: Any) -> str | None: return SparseVector._to_db(value) - def python_value(self, value): + def python_value(self, value: Any) -> SparseVector | None: return SparseVector._from_db(value) - def _distance(self, op, vector): + def _distance(self, op: str, vector: Any) -> Expression: return Expression(lhs=self, op=op, rhs=self.to_value(vector)) - def l2_distance(self, vector): + def l2_distance(self, vector: Any) -> Expression: return self._distance('<->', vector) - def max_inner_product(self, vector): + def max_inner_product(self, vector: Any) -> Expression: return self._distance('<#>', vector) - def cosine_distance(self, vector): + def cosine_distance(self, vector: Any) -> Expression: return self._distance('<=>', vector) - def l1_distance(self, vector): + def l1_distance(self, vector: Any) -> Expression: return self._distance('<+>', vector) diff --git a/pgvector/peewee/vector.py b/pgvector/peewee/vector.py index 83f9997..6cdbef6 100644 --- a/pgvector/peewee/vector.py +++ b/pgvector/peewee/vector.py @@ -1,34 +1,36 @@ +import numpy as np from peewee import Expression, Field +from typing import Any from .. import Vector class VectorField(Field): field_type = 'vector' - def __init__(self, dimensions=None, *args, **kwargs): + def __init__(self, dimensions: int | None = None, *args, **kwargs) -> None: self.dimensions = dimensions super(VectorField, self).__init__(*args, **kwargs) - def get_modifiers(self): - return self.dimensions and [self.dimensions] or None + def get_modifiers(self) -> list[int] | None: + return [self.dimensions] if self.dimensions else None - def db_value(self, value): + def db_value(self, value: Any) -> str | None: return Vector._to_db(value) - def python_value(self, value): + def python_value(self, value: Any) -> np.ndarray | None: return Vector._from_db(value) - def _distance(self, op, vector): + def _distance(self, op: str, vector: Any) -> Expression: return Expression(lhs=self, op=op, rhs=self.to_value(vector)) - def l2_distance(self, vector): + def l2_distance(self, vector: Any) -> Expression: return self._distance('<->', vector) - def max_inner_product(self, vector): + def max_inner_product(self, vector: Any) -> Expression: return self._distance('<#>', vector) - def cosine_distance(self, vector): + def cosine_distance(self, vector: Any) -> Expression: return self._distance('<=>', vector) - def l1_distance(self, vector): + def l1_distance(self, vector: Any) -> Expression: return self._distance('<+>', vector) diff --git a/pgvector/sqlalchemy/halfvec.py b/pgvector/sqlalchemy/halfvec.py index f964c3a..703861a 100644 --- a/pgvector/sqlalchemy/halfvec.py +++ b/pgvector/sqlalchemy/halfvec.py @@ -18,7 +18,7 @@ def get_col_spec(self, **kw) -> str: return 'HALFVEC' return 'HALFVEC(%d)' % self.dim - def bind_processor(self, dialect: Dialect): + def bind_processor(self, dialect: Dialect) -> Any: def process(value): return HalfVector._to_db(value, self.dim) return process From 90fd6da9c54e22575ba94e5a9e9b96b7ca7a1c04 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 1 Jun 2026 20:25:13 -0700 Subject: [PATCH 044/121] Fixed error with mypy [skip ci] --- pgvector/django/extensions.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pgvector/django/extensions.py b/pgvector/django/extensions.py index 1d04739..9974e99 100644 --- a/pgvector/django/extensions.py +++ b/pgvector/django/extensions.py @@ -3,9 +3,8 @@ class VectorExtension(CreateExtension): - if VERSION[0] >= 6: - def __init__(self, hints=None): + def __init__(self, hints=None): + if VERSION[0] >= 6: super().__init__('vector', hints=hints) - else: - def __init__(self): + else: self.name = 'vector' From 9c2d44add965e726d6ee4fc6b3993b2d1fe2f3d7 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 1 Jun 2026 21:00:00 -0700 Subject: [PATCH 045/121] Added more type hints [skip ci] --- pgvector/django/bit.py | 11 ++++++----- pgvector/django/extensions.py | 3 ++- pgvector/django/functions.py | 5 +++-- pgvector/django/halfvec.py | 19 ++++++++++--------- pgvector/django/indexes.py | 13 +++++++------ pgvector/django/sparsevec.py | 21 +++++++++++---------- pgvector/django/vector.py | 27 ++++++++++++++------------- 7 files changed, 53 insertions(+), 46 deletions(-) diff --git a/pgvector/django/bit.py b/pgvector/django/bit.py index 29a31e1..cfc98ec 100644 --- a/pgvector/django/bit.py +++ b/pgvector/django/bit.py @@ -1,32 +1,33 @@ from django import forms from django.db.models import Field +from typing import Any # https://docs.djangoproject.com/en/5.0/howto/custom-model-fields/ class BitField(Field): description = 'Bit string' - def __init__(self, *args, length=None, **kwargs): + def __init__(self, *args: Any, length: int | None = None, **kwargs: Any) -> None: self.length = length super().__init__(*args, **kwargs) - def deconstruct(self): + def deconstruct(self) -> tuple: name, path, args, kwargs = super().deconstruct() if self.length is not None: kwargs['length'] = self.length return name, path, args, kwargs - def db_type(self, connection): + def db_type(self, connection: Any) -> str: if self.length is None: return 'bit' return 'bit(%d)' % self.length - def formfield(self, **kwargs): # type: ignore + def formfield(self, **kwargs: Any): # type: ignore return super().formfield(form_class=BitFormField, **kwargs) class BitFormField(forms.CharField): - def to_python(self, value): + def to_python(self, value: Any) -> Any: if isinstance(value, str) and value == '': return None return super().to_python(value) diff --git a/pgvector/django/extensions.py b/pgvector/django/extensions.py index 9974e99..debac5d 100644 --- a/pgvector/django/extensions.py +++ b/pgvector/django/extensions.py @@ -1,9 +1,10 @@ from django import VERSION from django.contrib.postgres.operations import CreateExtension +from typing import Any class VectorExtension(CreateExtension): - def __init__(self, hints=None): + def __init__(self, hints: Any = None) -> None: if VERSION[0] >= 6: super().__init__('vector', hints=hints) else: diff --git a/pgvector/django/functions.py b/pgvector/django/functions.py index 9df4fdb..af76a67 100644 --- a/pgvector/django/functions.py +++ b/pgvector/django/functions.py @@ -1,11 +1,12 @@ from django.db.models import FloatField, Func, Value from .. import Vector, HalfVector, SparseVector +from typing import Any class DistanceBase(Func): output_field = FloatField() - def __init__(self, expression, vector, **extra): + def __init__(self, expression: Any, vector: Any, **extra: Any) -> None: if not hasattr(vector, 'resolve_expression'): if isinstance(vector, HalfVector): vector = Value(HalfVector._to_db(vector)) @@ -23,7 +24,7 @@ def __init__(self, expression, vector, **extra): class BitDistanceBase(Func): output_field = FloatField() - def __init__(self, expression, vector, **extra): + def __init__(self, expression: Any, vector: Any, **extra: Any) -> None: if not hasattr(vector, 'resolve_expression'): vector = Value(vector) super().__init__(expression, vector, **extra) diff --git a/pgvector/django/halfvec.py b/pgvector/django/halfvec.py index 36101ca..973cc0d 100644 --- a/pgvector/django/halfvec.py +++ b/pgvector/django/halfvec.py @@ -1,5 +1,6 @@ from django import forms from django.db.models import Field +from typing import Any from .. import HalfVector @@ -8,25 +9,25 @@ class HalfVectorField(Field): description = 'Half vector' empty_strings_allowed = False - def __init__(self, *args, dimensions=None, **kwargs): + def __init__(self, *args: Any, dimensions: int | None = None, **kwargs: Any) -> None: self.dimensions = dimensions super().__init__(*args, **kwargs) - def deconstruct(self): + def deconstruct(self) -> tuple: name, path, args, kwargs = super().deconstruct() if self.dimensions is not None: kwargs['dimensions'] = self.dimensions return name, path, args, kwargs - def db_type(self, connection): + def db_type(self, connection: Any) -> str: if self.dimensions is None: return 'halfvec' return 'halfvec(%d)' % self.dimensions - def from_db_value(self, value, expression, connection): + def from_db_value(self, value: Any, expression: Any, connection: Any) -> HalfVector | None: return HalfVector._from_db(value) - def to_python(self, value): + def to_python(self, value: Any) -> HalfVector | None: if value is None or isinstance(value, HalfVector): return value elif isinstance(value, str): @@ -34,10 +35,10 @@ def to_python(self, value): else: return HalfVector(value) - def get_prep_value(self, value): + def get_prep_value(self, value: Any) -> str | None: return HalfVector._to_db(value) - def value_to_string(self, obj): + def value_to_string(self, obj: Any) -> str | None: return self.get_prep_value(self.value_from_object(obj)) def formfield(self, **kwargs): # type: ignore @@ -45,7 +46,7 @@ def formfield(self, **kwargs): # type: ignore class HalfVectorWidget(forms.TextInput): - def format_value(self, value): + def format_value(self, value: Any) -> str | None: if isinstance(value, HalfVector): value = value.to_list() return super().format_value(value) @@ -54,7 +55,7 @@ def format_value(self, value): class HalfVectorFormField(forms.CharField): widget = HalfVectorWidget - def to_python(self, value): + def to_python(self, value: Any) -> Any: if isinstance(value, str) and value == '': return None return super().to_python(value) diff --git a/pgvector/django/indexes.py b/pgvector/django/indexes.py index 5bec0eb..d18330f 100644 --- a/pgvector/django/indexes.py +++ b/pgvector/django/indexes.py @@ -1,20 +1,21 @@ from django.contrib.postgres.indexes import PostgresIndex +from typing import Any class IvfflatIndex(PostgresIndex): suffix = 'ivfflat' - def __init__(self, *expressions, lists=None, **kwargs): + def __init__(self, *expressions: Any, lists: int | None = None, **kwargs: Any) -> None: self.lists = lists super().__init__(*expressions, **kwargs) - def deconstruct(self): + def deconstruct(self) -> tuple: path, args, kwargs = super().deconstruct() if self.lists is not None: kwargs['lists'] = self.lists return path, args, kwargs - def get_with_params(self): + def get_with_params(self) -> list[str]: with_params = [] if self.lists is not None: with_params.append('lists = %d' % self.lists) @@ -24,12 +25,12 @@ def get_with_params(self): class HnswIndex(PostgresIndex): suffix = 'hnsw' - def __init__(self, *expressions, m=None, ef_construction=None, **kwargs): + def __init__(self, *expressions: Any, m: int | None = None, ef_construction: int | None = None, **kwargs: Any) -> None: self.m = m self.ef_construction = ef_construction super().__init__(*expressions, **kwargs) - def deconstruct(self): + def deconstruct(self) -> tuple: path, args, kwargs = super().deconstruct() if self.m is not None: kwargs['m'] = self.m @@ -37,7 +38,7 @@ def deconstruct(self): kwargs['ef_construction'] = self.ef_construction return path, args, kwargs - def get_with_params(self): + def get_with_params(self) -> list[str]: with_params = [] if self.m is not None: with_params.append('m = %d' % self.m) diff --git a/pgvector/django/sparsevec.py b/pgvector/django/sparsevec.py index 7f4db77..c264d7e 100644 --- a/pgvector/django/sparsevec.py +++ b/pgvector/django/sparsevec.py @@ -1,5 +1,6 @@ from django import forms from django.db.models import Field +from typing import Any from .. import SparseVector @@ -8,39 +9,39 @@ class SparseVectorField(Field): description = 'Sparse vector' empty_strings_allowed = False - def __init__(self, *args, dimensions=None, **kwargs): + def __init__(self, *args: Any, dimensions: int | None = None, **kwargs: Any): self.dimensions = dimensions super().__init__(*args, **kwargs) - def deconstruct(self): + def deconstruct(self) -> tuple: name, path, args, kwargs = super().deconstruct() if self.dimensions is not None: kwargs['dimensions'] = self.dimensions return name, path, args, kwargs - def db_type(self, connection): + def db_type(self, connection: Any) -> str: if self.dimensions is None: return 'sparsevec' return 'sparsevec(%d)' % self.dimensions - def from_db_value(self, value, expression, connection): + def from_db_value(self, value: Any, expression: Any, connection: Any) -> SparseVector | None: return SparseVector._from_db(value) - def to_python(self, value): + def to_python(self, value: Any) -> SparseVector | None: return SparseVector._from_db(value) - def get_prep_value(self, value): + def get_prep_value(self, value: Any) -> str | None: return SparseVector._to_db(value) - def value_to_string(self, obj): + def value_to_string(self, obj: Any) -> str | None: return self.get_prep_value(self.value_from_object(obj)) - def formfield(self, **kwargs): # type: ignore + def formfield(self, **kwargs: Any): # type: ignore return super().formfield(form_class=SparseVectorFormField, **kwargs) class SparseVectorWidget(forms.TextInput): - def format_value(self, value): + def format_value(self, value: Any) -> str | None: if isinstance(value, SparseVector): value = value.to_text() return super().format_value(value) @@ -49,7 +50,7 @@ def format_value(self, value): class SparseVectorFormField(forms.CharField): widget = SparseVectorWidget - def to_python(self, value): + def to_python(self, value: Any) -> Any: if isinstance(value, str) and value == '': return None return super().to_python(value) diff --git a/pgvector/django/vector.py b/pgvector/django/vector.py index dde96f8..08c04c0 100644 --- a/pgvector/django/vector.py +++ b/pgvector/django/vector.py @@ -1,6 +1,7 @@ from django import forms from django.db.models import Field import numpy as np +from typing import Any from .. import Vector @@ -9,51 +10,51 @@ class VectorField(Field): description = 'Vector' empty_strings_allowed = False - def __init__(self, *args, dimensions=None, **kwargs): + def __init__(self, *args: Any, dimensions: int | None = None, **kwargs: Any) -> None: self.dimensions = dimensions super().__init__(*args, **kwargs) - def deconstruct(self): + def deconstruct(self) -> tuple: name, path, args, kwargs = super().deconstruct() if self.dimensions is not None: kwargs['dimensions'] = self.dimensions return name, path, args, kwargs - def db_type(self, connection): + def db_type(self, connection: Any) -> str: if self.dimensions is None: return 'vector' return 'vector(%d)' % self.dimensions - def from_db_value(self, value, expression, connection): + def from_db_value(self, value: Any, expression: Any, connection: Any) -> np.ndarray | None: return Vector._from_db(value) - def to_python(self, value): + def to_python(self, value: Any) -> np.ndarray | None: if isinstance(value, list): return np.array(value, dtype=np.float32) return Vector._from_db(value) - def get_prep_value(self, value): + def get_prep_value(self, value: Any) -> str | None: return Vector._to_db(value) - def value_to_string(self, obj): + def value_to_string(self, obj: Any) -> str | None: return self.get_prep_value(self.value_from_object(obj)) - def validate(self, value, model_instance): + def validate(self, value: Any, model_instance: Any) -> None: if isinstance(value, np.ndarray): value = value.tolist() super().validate(value, model_instance) - def run_validators(self, value): + def run_validators(self, value: Any) -> None: if isinstance(value, np.ndarray): value = value.tolist() super().run_validators(value) - def formfield(self, **kwargs): # type: ignore + def formfield(self, **kwargs: Any): # type: ignore return super().formfield(form_class=VectorFormField, **kwargs) class VectorWidget(forms.TextInput): - def format_value(self, value): + def format_value(self, value: Any) -> str | None: if isinstance(value, np.ndarray): value = value.tolist() return super().format_value(value) @@ -62,12 +63,12 @@ def format_value(self, value): class VectorFormField(forms.CharField): widget = VectorWidget - def has_changed(self, initial, data): + def has_changed(self, initial: Any, data: Any) -> bool: if isinstance(initial, np.ndarray): initial = initial.tolist() return super().has_changed(initial, data) - def to_python(self, value): + def to_python(self, value: Any) -> Any: if isinstance(value, str) and value == '': return None return super().to_python(value) From c9998e6a019a5dda32d81793c8c05dd1bc2ba668 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 1 Jun 2026 21:07:26 -0700 Subject: [PATCH 046/121] Improved type hints [skip ci] --- pgvector/django/bit.py | 2 +- pgvector/django/halfvec.py | 2 +- pgvector/django/sparsevec.py | 2 +- pgvector/django/vector.py | 2 +- pgvector/peewee/bit.py | 2 +- pgvector/peewee/halfvec.py | 2 +- pgvector/peewee/sparsevec.py | 2 +- pgvector/peewee/vector.py | 2 +- pgvector/sqlalchemy/bit.py | 2 +- pgvector/sqlalchemy/halfvec.py | 2 +- pgvector/sqlalchemy/sparsevec.py | 2 +- pgvector/sqlalchemy/vector.py | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pgvector/django/bit.py b/pgvector/django/bit.py index cfc98ec..167f550 100644 --- a/pgvector/django/bit.py +++ b/pgvector/django/bit.py @@ -22,7 +22,7 @@ def db_type(self, connection: Any) -> str: return 'bit' return 'bit(%d)' % self.length - def formfield(self, **kwargs: Any): # type: ignore + def formfield(self, **kwargs: Any) -> forms.Field: # type: ignore return super().formfield(form_class=BitFormField, **kwargs) diff --git a/pgvector/django/halfvec.py b/pgvector/django/halfvec.py index 973cc0d..d445f75 100644 --- a/pgvector/django/halfvec.py +++ b/pgvector/django/halfvec.py @@ -41,7 +41,7 @@ def get_prep_value(self, value: Any) -> str | None: def value_to_string(self, obj: Any) -> str | None: return self.get_prep_value(self.value_from_object(obj)) - def formfield(self, **kwargs): # type: ignore + def formfield(self, **kwargs) -> forms.Field: # type: ignore return super().formfield(form_class=HalfVectorFormField, **kwargs) diff --git a/pgvector/django/sparsevec.py b/pgvector/django/sparsevec.py index c264d7e..3b2bb52 100644 --- a/pgvector/django/sparsevec.py +++ b/pgvector/django/sparsevec.py @@ -36,7 +36,7 @@ def get_prep_value(self, value: Any) -> str | None: def value_to_string(self, obj: Any) -> str | None: return self.get_prep_value(self.value_from_object(obj)) - def formfield(self, **kwargs: Any): # type: ignore + def formfield(self, **kwargs: Any) -> forms.Field: # type: ignore return super().formfield(form_class=SparseVectorFormField, **kwargs) diff --git a/pgvector/django/vector.py b/pgvector/django/vector.py index 08c04c0..64d1f5e 100644 --- a/pgvector/django/vector.py +++ b/pgvector/django/vector.py @@ -49,7 +49,7 @@ def run_validators(self, value: Any) -> None: value = value.tolist() super().run_validators(value) - def formfield(self, **kwargs: Any): # type: ignore + def formfield(self, **kwargs: Any) -> forms.Field: # type: ignore return super().formfield(form_class=VectorFormField, **kwargs) diff --git a/pgvector/peewee/bit.py b/pgvector/peewee/bit.py index a2bb797..3128bcc 100644 --- a/pgvector/peewee/bit.py +++ b/pgvector/peewee/bit.py @@ -5,7 +5,7 @@ class FixedBitField(Field): field_type = 'bit' - def __init__(self, max_length: int | None = None, *args, **kwargs) -> None: + def __init__(self, max_length: int | None = None, *args: Any, **kwargs: Any) -> None: self.max_length = max_length super(FixedBitField, self).__init__(*args, **kwargs) diff --git a/pgvector/peewee/halfvec.py b/pgvector/peewee/halfvec.py index 234da40..992d8e7 100644 --- a/pgvector/peewee/halfvec.py +++ b/pgvector/peewee/halfvec.py @@ -6,7 +6,7 @@ class HalfVectorField(Field): field_type = 'halfvec' - def __init__(self, dimensions: int | None = None, *args, **kwargs) -> None: + def __init__(self, dimensions: int | None = None, *args: Any, **kwargs: Any) -> None: self.dimensions = dimensions super(HalfVectorField, self).__init__(*args, **kwargs) diff --git a/pgvector/peewee/sparsevec.py b/pgvector/peewee/sparsevec.py index 816ff68..dfb403e 100644 --- a/pgvector/peewee/sparsevec.py +++ b/pgvector/peewee/sparsevec.py @@ -6,7 +6,7 @@ class SparseVectorField(Field): field_type = 'sparsevec' - def __init__(self, dimensions: int | None = None, *args, **kwargs) -> None: + def __init__(self, dimensions: int | None = None, *args: Any, **kwargs: Any) -> None: self.dimensions = dimensions super(SparseVectorField, self).__init__(*args, **kwargs) diff --git a/pgvector/peewee/vector.py b/pgvector/peewee/vector.py index 6cdbef6..43b9f4b 100644 --- a/pgvector/peewee/vector.py +++ b/pgvector/peewee/vector.py @@ -7,7 +7,7 @@ class VectorField(Field): field_type = 'vector' - def __init__(self, dimensions: int | None = None, *args, **kwargs) -> None: + def __init__(self, dimensions: int | None = None, *args: Any, **kwargs: Any) -> None: self.dimensions = dimensions super(VectorField, self).__init__(*args, **kwargs) diff --git a/pgvector/sqlalchemy/bit.py b/pgvector/sqlalchemy/bit.py index f38c994..21caa31 100644 --- a/pgvector/sqlalchemy/bit.py +++ b/pgvector/sqlalchemy/bit.py @@ -11,7 +11,7 @@ def __init__(self, length: int | None = None) -> None: super(UserDefinedType, self).__init__() self.length = length - def get_col_spec(self, **kw) -> str: + def get_col_spec(self, **kw: Any) -> str: if self.length is None: return 'BIT' return 'BIT(%d)' % self.length diff --git a/pgvector/sqlalchemy/halfvec.py b/pgvector/sqlalchemy/halfvec.py index 703861a..4e422a2 100644 --- a/pgvector/sqlalchemy/halfvec.py +++ b/pgvector/sqlalchemy/halfvec.py @@ -13,7 +13,7 @@ def __init__(self, dim: int | None = None) -> None: super(UserDefinedType, self).__init__() self.dim = dim - def get_col_spec(self, **kw) -> str: + def get_col_spec(self, **kw: Any) -> str: if self.dim is None: return 'HALFVEC' return 'HALFVEC(%d)' % self.dim diff --git a/pgvector/sqlalchemy/sparsevec.py b/pgvector/sqlalchemy/sparsevec.py index 17964c7..d4e5cf0 100644 --- a/pgvector/sqlalchemy/sparsevec.py +++ b/pgvector/sqlalchemy/sparsevec.py @@ -13,7 +13,7 @@ def __init__(self, dim: int | None = None) -> None: super(UserDefinedType, self).__init__() self.dim = dim - def get_col_spec(self, **kw) -> str: + def get_col_spec(self, **kw: Any) -> str: if self.dim is None: return 'SPARSEVEC' return 'SPARSEVEC(%d)' % self.dim diff --git a/pgvector/sqlalchemy/vector.py b/pgvector/sqlalchemy/vector.py index e1e301e..251e1d5 100644 --- a/pgvector/sqlalchemy/vector.py +++ b/pgvector/sqlalchemy/vector.py @@ -13,7 +13,7 @@ def __init__(self, dim: int | None = None) -> None: super(UserDefinedType, self).__init__() self.dim = dim - def get_col_spec(self, **kw) -> str: + def get_col_spec(self, **kw: Any) -> str: if self.dim is None: return 'VECTOR' return 'VECTOR(%d)' % self.dim From 12f3dc514d7a8caee684b495a49cac989c444cfe Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 1 Jun 2026 21:13:22 -0700 Subject: [PATCH 047/121] Improved type hints [skip ci] --- pgvector/django/bit.py | 8 ++++++-- pgvector/django/halfvec.py | 8 ++++++-- pgvector/django/sparsevec.py | 8 ++++++-- pgvector/django/vector.py | 8 ++++++-- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/pgvector/django/bit.py b/pgvector/django/bit.py index 167f550..8086591 100644 --- a/pgvector/django/bit.py +++ b/pgvector/django/bit.py @@ -22,8 +22,12 @@ def db_type(self, connection: Any) -> str: return 'bit' return 'bit(%d)' % self.length - def formfield(self, **kwargs: Any) -> forms.Field: # type: ignore - return super().formfield(form_class=BitFormField, **kwargs) + def formfield(self, form_class: Any = None, choices_form_class: Any = None, **kwargs: Any) -> forms.Field: + return super().formfield( + form_class=BitFormField if form_class is None else form_class, + choices_form_class=choices_form_class, + **kwargs + ) class BitFormField(forms.CharField): diff --git a/pgvector/django/halfvec.py b/pgvector/django/halfvec.py index d445f75..dcca99f 100644 --- a/pgvector/django/halfvec.py +++ b/pgvector/django/halfvec.py @@ -41,8 +41,12 @@ def get_prep_value(self, value: Any) -> str | None: def value_to_string(self, obj: Any) -> str | None: return self.get_prep_value(self.value_from_object(obj)) - def formfield(self, **kwargs) -> forms.Field: # type: ignore - return super().formfield(form_class=HalfVectorFormField, **kwargs) + def formfield(self, form_class: Any = None, choices_form_class: Any = None, **kwargs: Any) -> forms.Field: + return super().formfield( + form_class=HalfVectorFormField if form_class is None else form_class, + choices_form_class=choices_form_class, + **kwargs + ) class HalfVectorWidget(forms.TextInput): diff --git a/pgvector/django/sparsevec.py b/pgvector/django/sparsevec.py index 3b2bb52..531d53c 100644 --- a/pgvector/django/sparsevec.py +++ b/pgvector/django/sparsevec.py @@ -36,8 +36,12 @@ def get_prep_value(self, value: Any) -> str | None: def value_to_string(self, obj: Any) -> str | None: return self.get_prep_value(self.value_from_object(obj)) - def formfield(self, **kwargs: Any) -> forms.Field: # type: ignore - return super().formfield(form_class=SparseVectorFormField, **kwargs) + def formfield(self, form_class: Any = None, choices_form_class: Any = None, **kwargs: Any) -> forms.Field: + return super().formfield( + form_class=SparseVectorFormField if form_class is None else form_class, + choices_form_class=choices_form_class, + **kwargs + ) class SparseVectorWidget(forms.TextInput): diff --git a/pgvector/django/vector.py b/pgvector/django/vector.py index 64d1f5e..7e34963 100644 --- a/pgvector/django/vector.py +++ b/pgvector/django/vector.py @@ -49,8 +49,12 @@ def run_validators(self, value: Any) -> None: value = value.tolist() super().run_validators(value) - def formfield(self, **kwargs: Any) -> forms.Field: # type: ignore - return super().formfield(form_class=VectorFormField, **kwargs) + def formfield(self, form_class: Any = None, choices_form_class: Any = None, **kwargs: Any) -> forms.Field: + return super().formfield( + form_class=VectorFormField if form_class is None else form_class, + choices_form_class=choices_form_class, + **kwargs + ) class VectorWidget(forms.TextInput): From 4c2b0723c9c4f0b3ffdb33d35248b45a07e73009 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 1 Jun 2026 21:23:52 -0700 Subject: [PATCH 048/121] Improved type hints [skip ci] --- pgvector/sparsevec.py | 2 +- pgvector/sqlalchemy/bit.py | 2 +- pgvector/sqlalchemy/halfvec.py | 6 +++--- pgvector/sqlalchemy/sparsevec.py | 6 +++--- pgvector/sqlalchemy/vector.py | 7 ++++--- 5 files changed, 12 insertions(+), 11 deletions(-) diff --git a/pgvector/sparsevec.py b/pgvector/sparsevec.py index 388116d..6a00981 100644 --- a/pgvector/sparsevec.py +++ b/pgvector/sparsevec.py @@ -42,7 +42,7 @@ def indices(self) -> list[int]: def values(self) -> list[float]: return self._values - def to_coo(self): + def to_coo(self) -> Any: from scipy.sparse import coo_array coords = ([0] * len(self._indices), self._indices) diff --git a/pgvector/sqlalchemy/bit.py b/pgvector/sqlalchemy/bit.py index 21caa31..6be2ab9 100644 --- a/pgvector/sqlalchemy/bit.py +++ b/pgvector/sqlalchemy/bit.py @@ -20,7 +20,7 @@ def bind_processor(self, dialect: Dialect) -> Any: if dialect.__class__.__name__ == 'PGDialect_asyncpg': import asyncpg - def process(value): + def process(value: Any) -> Any: if isinstance(value, str): return asyncpg.BitString(value) return value diff --git a/pgvector/sqlalchemy/halfvec.py b/pgvector/sqlalchemy/halfvec.py index 4e422a2..8c050ec 100644 --- a/pgvector/sqlalchemy/halfvec.py +++ b/pgvector/sqlalchemy/halfvec.py @@ -19,19 +19,19 @@ def get_col_spec(self, **kw: Any) -> str: return 'HALFVEC(%d)' % self.dim def bind_processor(self, dialect: Dialect) -> Any: - def process(value): + def process(value: Any) -> str | None: return HalfVector._to_db(value, self.dim) return process def literal_processor(self, dialect: Dialect) -> Any: string_literal_processor = self._string._cached_literal_processor(dialect) - def process(value): + def process(value: Any) -> Any: return string_literal_processor(HalfVector._to_db(value, self.dim)) # type: ignore return process def result_processor(self, dialect: Dialect, coltype: Any) -> Any: - def process(value): + def process(value: Any) -> HalfVector | None: return HalfVector._from_db(value) return process diff --git a/pgvector/sqlalchemy/sparsevec.py b/pgvector/sqlalchemy/sparsevec.py index d4e5cf0..a809c2a 100644 --- a/pgvector/sqlalchemy/sparsevec.py +++ b/pgvector/sqlalchemy/sparsevec.py @@ -19,19 +19,19 @@ def get_col_spec(self, **kw: Any) -> str: return 'SPARSEVEC(%d)' % self.dim def bind_processor(self, dialect: Dialect) -> Any: - def process(value): + def process(value: Any) -> str | None: return SparseVector._to_db(value, self.dim) return process def literal_processor(self, dialect: Dialect) -> Any: string_literal_processor = self._string._cached_literal_processor(dialect) - def process(value): + def process(value: Any) -> Any: return string_literal_processor(SparseVector._to_db(value, self.dim)) # type: ignore return process def result_processor(self, dialect: Dialect, coltype: Any) -> Any: - def process(value): + def process(value: Any) -> SparseVector | None: return SparseVector._from_db(value) return process diff --git a/pgvector/sqlalchemy/vector.py b/pgvector/sqlalchemy/vector.py index 251e1d5..76f9497 100644 --- a/pgvector/sqlalchemy/vector.py +++ b/pgvector/sqlalchemy/vector.py @@ -1,3 +1,4 @@ +import numpy as np from sqlalchemy.dialects.postgresql.base import ischema_names from sqlalchemy.types import UserDefinedType, Float, String from sqlalchemy import Dialect, Operators @@ -19,19 +20,19 @@ def get_col_spec(self, **kw: Any) -> str: return 'VECTOR(%d)' % self.dim def bind_processor(self, dialect: Dialect) -> Any: - def process(value): + def process(value: Any) -> str | None: return Vector._to_db(value, self.dim) return process def literal_processor(self, dialect: Dialect) -> Any: string_literal_processor = self._string._cached_literal_processor(dialect) - def process(value): + def process(value: Any) -> Any: return string_literal_processor(Vector._to_db(value, self.dim)) # type: ignore return process def result_processor(self, dialect: Dialect, coltype: Any) -> Any: - def process(value): + def process(value: Any) -> np.ndarray | None: return Vector._from_db(value) return process From 03bbac707d87e6d53d4759190a2d5acdca1f42f9 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 1 Jun 2026 21:44:58 -0700 Subject: [PATCH 049/121] Improved type hints [skip ci] --- pgvector/django/bit.py | 2 +- pgvector/django/halfvec.py | 2 +- pgvector/django/indexes.py | 4 ++-- pgvector/django/sparsevec.py | 2 +- pgvector/django/vector.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pgvector/django/bit.py b/pgvector/django/bit.py index 8086591..642770b 100644 --- a/pgvector/django/bit.py +++ b/pgvector/django/bit.py @@ -11,7 +11,7 @@ def __init__(self, *args: Any, length: int | None = None, **kwargs: Any) -> None self.length = length super().__init__(*args, **kwargs) - def deconstruct(self) -> tuple: + def deconstruct(self) -> tuple[Any, Any, Any, Any]: name, path, args, kwargs = super().deconstruct() if self.length is not None: kwargs['length'] = self.length diff --git a/pgvector/django/halfvec.py b/pgvector/django/halfvec.py index dcca99f..a2ec481 100644 --- a/pgvector/django/halfvec.py +++ b/pgvector/django/halfvec.py @@ -13,7 +13,7 @@ def __init__(self, *args: Any, dimensions: int | None = None, **kwargs: Any) -> self.dimensions = dimensions super().__init__(*args, **kwargs) - def deconstruct(self) -> tuple: + def deconstruct(self) -> tuple[Any, Any, Any, Any]: name, path, args, kwargs = super().deconstruct() if self.dimensions is not None: kwargs['dimensions'] = self.dimensions diff --git a/pgvector/django/indexes.py b/pgvector/django/indexes.py index d18330f..e095351 100644 --- a/pgvector/django/indexes.py +++ b/pgvector/django/indexes.py @@ -9,7 +9,7 @@ def __init__(self, *expressions: Any, lists: int | None = None, **kwargs: Any) - self.lists = lists super().__init__(*expressions, **kwargs) - def deconstruct(self) -> tuple: + def deconstruct(self) -> tuple[Any, Any, Any]: path, args, kwargs = super().deconstruct() if self.lists is not None: kwargs['lists'] = self.lists @@ -30,7 +30,7 @@ def __init__(self, *expressions: Any, m: int | None = None, ef_construction: int self.ef_construction = ef_construction super().__init__(*expressions, **kwargs) - def deconstruct(self) -> tuple: + def deconstruct(self) -> tuple[Any, Any, Any]: path, args, kwargs = super().deconstruct() if self.m is not None: kwargs['m'] = self.m diff --git a/pgvector/django/sparsevec.py b/pgvector/django/sparsevec.py index 531d53c..53a2c37 100644 --- a/pgvector/django/sparsevec.py +++ b/pgvector/django/sparsevec.py @@ -13,7 +13,7 @@ def __init__(self, *args: Any, dimensions: int | None = None, **kwargs: Any): self.dimensions = dimensions super().__init__(*args, **kwargs) - def deconstruct(self) -> tuple: + def deconstruct(self) -> tuple[Any, Any, Any, Any]: name, path, args, kwargs = super().deconstruct() if self.dimensions is not None: kwargs['dimensions'] = self.dimensions diff --git a/pgvector/django/vector.py b/pgvector/django/vector.py index 7e34963..7a12f78 100644 --- a/pgvector/django/vector.py +++ b/pgvector/django/vector.py @@ -14,7 +14,7 @@ def __init__(self, *args: Any, dimensions: int | None = None, **kwargs: Any) -> self.dimensions = dimensions super().__init__(*args, **kwargs) - def deconstruct(self) -> tuple: + def deconstruct(self) -> tuple[Any, Any, Any, Any]: name, path, args, kwargs = super().deconstruct() if self.dimensions is not None: kwargs['dimensions'] = self.dimensions From b4b684d89c4714350491832cf3533d76476bcf33 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 1 Jun 2026 22:09:50 -0700 Subject: [PATCH 050/121] Improved type hints [skip ci] --- pgvector/bit.py | 2 +- pgvector/halfvec.py | 6 +++--- pgvector/sparsevec.py | 6 +++--- pgvector/vector.py | 6 +++--- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pgvector/bit.py b/pgvector/bit.py index cb1bec4..ac99a14 100644 --- a/pgvector/bit.py +++ b/pgvector/bit.py @@ -31,7 +31,7 @@ def __init__(self, value: Any) -> None: def __repr__(self) -> str: return f'Bit({self.to_text()})' - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: if isinstance(other, self.__class__): return self._len == other._len and self._data == other._data return False diff --git a/pgvector/halfvec.py b/pgvector/halfvec.py index 755b9d6..62be2bb 100644 --- a/pgvector/halfvec.py +++ b/pgvector/halfvec.py @@ -18,7 +18,7 @@ def __init__(self, value: Any) -> None: def __repr__(self) -> str: return f'HalfVector({self.to_list()})' - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: if isinstance(other, self.__class__): return np.array_equal(self.to_numpy(), other.to_numpy()) return False @@ -48,7 +48,7 @@ def from_binary(cls, value: bytes) -> HalfVector: return cls(np.frombuffer(value, dtype='>f2', count=dim, offset=4)) @classmethod - def _to_db(cls, value: Any, dim: int | None = None) -> str | None: + def _to_db(cls, value: object, dim: int | None = None) -> str | None: if value is None: return value @@ -61,7 +61,7 @@ def _to_db(cls, value: Any, dim: int | None = None) -> str | None: return value.to_text() @classmethod - def _to_db_binary(cls, value: Any) -> bytes | None: + def _to_db_binary(cls, value: object) -> bytes | None: if value is None: return value diff --git a/pgvector/sparsevec.py b/pgvector/sparsevec.py index 6a00981..d0704ae 100644 --- a/pgvector/sparsevec.py +++ b/pgvector/sparsevec.py @@ -28,7 +28,7 @@ def __repr__(self) -> str: elements = dict(zip(self._indices, self._values)) return f'SparseVector({elements}, {self._dim})' - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: if isinstance(other, self.__class__): return self.dimensions() == other.dimensions() and self.indices() == other.indices() and self.values() == other.values() return False @@ -126,7 +126,7 @@ def _from_parts(cls, dim: int, indices: list[int], values: list[float]) -> Spars return vec @classmethod - def _to_db(cls, value: Any, dim: int | None = None) -> str | None: + def _to_db(cls, value: object, dim: int | None = None) -> str | None: if value is None: return value @@ -139,7 +139,7 @@ def _to_db(cls, value: Any, dim: int | None = None) -> str | None: return value.to_text() @classmethod - def _to_db_binary(cls, value: Any) -> bytes | None: + def _to_db_binary(cls, value: object) -> bytes | None: if value is None: return value diff --git a/pgvector/vector.py b/pgvector/vector.py index f5ba742..b90222e 100644 --- a/pgvector/vector.py +++ b/pgvector/vector.py @@ -18,7 +18,7 @@ def __init__(self, value: Any) -> None: def __repr__(self) -> str: return f'Vector({self.to_list()})' - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: if isinstance(other, self.__class__): return np.array_equal(self.to_numpy(), other.to_numpy()) return False @@ -48,7 +48,7 @@ def from_binary(cls, value: bytes) -> Vector: return cls(np.frombuffer(value, dtype='>f4', count=dim, offset=4)) @classmethod - def _to_db(cls, value: Any, dim: int | None = None) -> str | None: + def _to_db(cls, value: object, dim: int | None = None) -> str | None: if value is None: return value @@ -61,7 +61,7 @@ def _to_db(cls, value: Any, dim: int | None = None) -> str | None: return value.to_text() @classmethod - def _to_db_binary(cls, value: Any) -> bytes | None: + def _to_db_binary(cls, value: object) -> bytes | None: if value is None: return value From 8598dc08db2269de49adb15cf440270ddb24cae2 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 1 Jun 2026 22:36:51 -0700 Subject: [PATCH 051/121] Improved type hints [skip ci] --- pgvector/peewee/bit.py | 6 +++--- pgvector/peewee/halfvec.py | 10 +++++----- pgvector/peewee/sparsevec.py | 10 +++++----- pgvector/peewee/vector.py | 10 +++++----- pgvector/sqlalchemy/bit.py | 4 ++-- pgvector/sqlalchemy/halfvec.py | 8 ++++---- pgvector/sqlalchemy/sparsevec.py | 8 ++++---- pgvector/sqlalchemy/vector.py | 8 ++++---- 8 files changed, 32 insertions(+), 32 deletions(-) diff --git a/pgvector/peewee/bit.py b/pgvector/peewee/bit.py index 3128bcc..14c4555 100644 --- a/pgvector/peewee/bit.py +++ b/pgvector/peewee/bit.py @@ -12,11 +12,11 @@ def __init__(self, max_length: int | None = None, *args: Any, **kwargs: Any) -> def get_modifiers(self) -> list[int] | None: return [self.max_length] if self.max_length else None - def _distance(self, op: str, vector: Any) -> Expression: + def _distance(self, op: str, vector: object) -> Expression: return Expression(lhs=self, op=op, rhs=self.to_value(vector)) - def hamming_distance(self, vector: Any) -> Expression: + def hamming_distance(self, vector: object) -> Expression: return self._distance('<~>', vector) - def jaccard_distance(self, vector: Any) -> Expression: + def jaccard_distance(self, vector: object) -> Expression: return self._distance('<%%>', vector) diff --git a/pgvector/peewee/halfvec.py b/pgvector/peewee/halfvec.py index 992d8e7..6d55d8f 100644 --- a/pgvector/peewee/halfvec.py +++ b/pgvector/peewee/halfvec.py @@ -19,17 +19,17 @@ def db_value(self, value: Any) -> str | None: def python_value(self, value: Any) -> HalfVector | None: return HalfVector._from_db(value) - def _distance(self, op: str, vector: Any) -> Expression: + def _distance(self, op: str, vector: object) -> Expression: return Expression(lhs=self, op=op, rhs=self.to_value(vector)) - def l2_distance(self, vector: Any) -> Expression: + def l2_distance(self, vector: object) -> Expression: return self._distance('<->', vector) - def max_inner_product(self, vector: Any) -> Expression: + def max_inner_product(self, vector: object) -> Expression: return self._distance('<#>', vector) - def cosine_distance(self, vector: Any) -> Expression: + def cosine_distance(self, vector: object) -> Expression: return self._distance('<=>', vector) - def l1_distance(self, vector: Any) -> Expression: + def l1_distance(self, vector: object) -> Expression: return self._distance('<+>', vector) diff --git a/pgvector/peewee/sparsevec.py b/pgvector/peewee/sparsevec.py index dfb403e..7e66dd3 100644 --- a/pgvector/peewee/sparsevec.py +++ b/pgvector/peewee/sparsevec.py @@ -19,17 +19,17 @@ def db_value(self, value: Any) -> str | None: def python_value(self, value: Any) -> SparseVector | None: return SparseVector._from_db(value) - def _distance(self, op: str, vector: Any) -> Expression: + def _distance(self, op: str, vector: object) -> Expression: return Expression(lhs=self, op=op, rhs=self.to_value(vector)) - def l2_distance(self, vector: Any) -> Expression: + def l2_distance(self, vector: object) -> Expression: return self._distance('<->', vector) - def max_inner_product(self, vector: Any) -> Expression: + def max_inner_product(self, vector: object) -> Expression: return self._distance('<#>', vector) - def cosine_distance(self, vector: Any) -> Expression: + def cosine_distance(self, vector: object) -> Expression: return self._distance('<=>', vector) - def l1_distance(self, vector: Any) -> Expression: + def l1_distance(self, vector: object) -> Expression: return self._distance('<+>', vector) diff --git a/pgvector/peewee/vector.py b/pgvector/peewee/vector.py index 43b9f4b..f61aa3d 100644 --- a/pgvector/peewee/vector.py +++ b/pgvector/peewee/vector.py @@ -20,17 +20,17 @@ def db_value(self, value: Any) -> str | None: def python_value(self, value: Any) -> np.ndarray | None: return Vector._from_db(value) - def _distance(self, op: str, vector: Any) -> Expression: + def _distance(self, op: str, vector: object) -> Expression: return Expression(lhs=self, op=op, rhs=self.to_value(vector)) - def l2_distance(self, vector: Any) -> Expression: + def l2_distance(self, vector: object) -> Expression: return self._distance('<->', vector) - def max_inner_product(self, vector: Any) -> Expression: + def max_inner_product(self, vector: object) -> Expression: return self._distance('<#>', vector) - def cosine_distance(self, vector: Any) -> Expression: + def cosine_distance(self, vector: object) -> Expression: return self._distance('<=>', vector) - def l1_distance(self, vector: Any) -> Expression: + def l1_distance(self, vector: object) -> Expression: return self._distance('<+>', vector) diff --git a/pgvector/sqlalchemy/bit.py b/pgvector/sqlalchemy/bit.py index 6be2ab9..35b6753 100644 --- a/pgvector/sqlalchemy/bit.py +++ b/pgvector/sqlalchemy/bit.py @@ -29,10 +29,10 @@ def process(value: Any) -> Any: return super().bind_processor(dialect) class comparator_factory(UserDefinedType.Comparator): - def hamming_distance(self, other: Any) -> Operators: + def hamming_distance(self, other: object) -> Operators: return self.op('<~>', return_type=Float)(other) - def jaccard_distance(self, other: Any) -> Operators: + def jaccard_distance(self, other: object) -> Operators: return self.op('<%>', return_type=Float)(other) diff --git a/pgvector/sqlalchemy/halfvec.py b/pgvector/sqlalchemy/halfvec.py index 8c050ec..af13317 100644 --- a/pgvector/sqlalchemy/halfvec.py +++ b/pgvector/sqlalchemy/halfvec.py @@ -36,16 +36,16 @@ def process(value: Any) -> HalfVector | None: return process class comparator_factory(UserDefinedType.Comparator): - def l2_distance(self, other: Any) -> Operators: + def l2_distance(self, other: object) -> Operators: return self.op('<->', return_type=Float)(other) - def max_inner_product(self, other: Any) -> Operators: + def max_inner_product(self, other: object) -> Operators: return self.op('<#>', return_type=Float)(other) - def cosine_distance(self, other: Any) -> Operators: + def cosine_distance(self, other: object) -> Operators: return self.op('<=>', return_type=Float)(other) - def l1_distance(self, other: Any) -> Operators: + def l1_distance(self, other: object) -> Operators: return self.op('<+>', return_type=Float)(other) diff --git a/pgvector/sqlalchemy/sparsevec.py b/pgvector/sqlalchemy/sparsevec.py index a809c2a..235a779 100644 --- a/pgvector/sqlalchemy/sparsevec.py +++ b/pgvector/sqlalchemy/sparsevec.py @@ -36,16 +36,16 @@ def process(value: Any) -> SparseVector | None: return process class comparator_factory(UserDefinedType.Comparator): - def l2_distance(self, other: Any) -> Operators: + def l2_distance(self, other: object) -> Operators: return self.op('<->', return_type=Float)(other) - def max_inner_product(self, other: Any) -> Operators: + def max_inner_product(self, other: object) -> Operators: return self.op('<#>', return_type=Float)(other) - def cosine_distance(self, other: Any) -> Operators: + def cosine_distance(self, other: object) -> Operators: return self.op('<=>', return_type=Float)(other) - def l1_distance(self, other: Any) -> Operators: + def l1_distance(self, other: object) -> Operators: return self.op('<+>', return_type=Float)(other) diff --git a/pgvector/sqlalchemy/vector.py b/pgvector/sqlalchemy/vector.py index 76f9497..cf01c2f 100644 --- a/pgvector/sqlalchemy/vector.py +++ b/pgvector/sqlalchemy/vector.py @@ -37,16 +37,16 @@ def process(value: Any) -> np.ndarray | None: return process class comparator_factory(UserDefinedType.Comparator): - def l2_distance(self, other: Any) -> Operators: + def l2_distance(self, other: object) -> Operators: return self.op('<->', return_type=Float)(other) - def max_inner_product(self, other: Any) -> Operators: + def max_inner_product(self, other: object) -> Operators: return self.op('<#>', return_type=Float)(other) - def cosine_distance(self, other: Any) -> Operators: + def cosine_distance(self, other: object) -> Operators: return self.op('<=>', return_type=Float)(other) - def l1_distance(self, other: Any) -> Operators: + def l1_distance(self, other: object) -> Operators: return self.op('<+>', return_type=Float)(other) From 3a067c64ec428a989fa294972a50507d0d75b270 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 1 Jun 2026 22:38:09 -0700 Subject: [PATCH 052/121] Improved type hints [skip ci] --- pgvector/peewee/halfvec.py | 2 +- pgvector/peewee/sparsevec.py | 2 +- pgvector/peewee/vector.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pgvector/peewee/halfvec.py b/pgvector/peewee/halfvec.py index 6d55d8f..161fb92 100644 --- a/pgvector/peewee/halfvec.py +++ b/pgvector/peewee/halfvec.py @@ -13,7 +13,7 @@ def __init__(self, dimensions: int | None = None, *args: Any, **kwargs: Any) -> def get_modifiers(self) -> list[int] | None: return [self.dimensions] if self.dimensions else None - def db_value(self, value: Any) -> str | None: + def db_value(self, value: object) -> str | None: return HalfVector._to_db(value) def python_value(self, value: Any) -> HalfVector | None: diff --git a/pgvector/peewee/sparsevec.py b/pgvector/peewee/sparsevec.py index 7e66dd3..24b37b9 100644 --- a/pgvector/peewee/sparsevec.py +++ b/pgvector/peewee/sparsevec.py @@ -13,7 +13,7 @@ def __init__(self, dimensions: int | None = None, *args: Any, **kwargs: Any) -> def get_modifiers(self) -> list[int] | None: return [self.dimensions] if self.dimensions else None - def db_value(self, value: Any) -> str | None: + def db_value(self, value: object) -> str | None: return SparseVector._to_db(value) def python_value(self, value: Any) -> SparseVector | None: diff --git a/pgvector/peewee/vector.py b/pgvector/peewee/vector.py index f61aa3d..aa9509b 100644 --- a/pgvector/peewee/vector.py +++ b/pgvector/peewee/vector.py @@ -14,7 +14,7 @@ def __init__(self, dimensions: int | None = None, *args: Any, **kwargs: Any) -> def get_modifiers(self) -> list[int] | None: return [self.dimensions] if self.dimensions else None - def db_value(self, value: Any) -> str | None: + def db_value(self, value: object) -> str | None: return Vector._to_db(value) def python_value(self, value: Any) -> np.ndarray | None: From 03d113511f2fefbfe178cc977325b12398244de6 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 1 Jun 2026 22:42:45 -0700 Subject: [PATCH 053/121] Improved type hints [skip ci] --- pgvector/psycopg2/halfvec.py | 5 ++--- pgvector/psycopg2/sparsevec.py | 5 ++--- pgvector/psycopg2/vector.py | 5 ++--- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/pgvector/psycopg2/halfvec.py b/pgvector/psycopg2/halfvec.py index 5b72f5c..1313938 100644 --- a/pgvector/psycopg2/halfvec.py +++ b/pgvector/psycopg2/halfvec.py @@ -1,13 +1,12 @@ from psycopg2.extensions import adapt, connection, cursor, new_array_type, new_type, register_adapter, register_type -from typing import Any from .. import HalfVector class HalfvecAdapter: - def __init__(self, value: Any) -> None: + def __init__(self, value: object) -> None: self._value = value - def getquoted(self) -> Any: + def getquoted(self) -> bytes: return adapt(HalfVector._to_db(self._value)).getquoted() diff --git a/pgvector/psycopg2/sparsevec.py b/pgvector/psycopg2/sparsevec.py index 073e45b..34a4810 100644 --- a/pgvector/psycopg2/sparsevec.py +++ b/pgvector/psycopg2/sparsevec.py @@ -1,13 +1,12 @@ from psycopg2.extensions import adapt, connection, cursor, new_array_type, new_type, register_adapter, register_type -from typing import Any from .. import SparseVector class SparsevecAdapter: - def __init__(self, value: Any) -> None: + def __init__(self, value: object) -> None: self._value = value - def getquoted(self) -> Any: + def getquoted(self) -> bytes: return adapt(SparseVector._to_db(self._value)).getquoted() diff --git a/pgvector/psycopg2/vector.py b/pgvector/psycopg2/vector.py index 0e2d1e3..2a2c617 100644 --- a/pgvector/psycopg2/vector.py +++ b/pgvector/psycopg2/vector.py @@ -1,14 +1,13 @@ import numpy as np from psycopg2.extensions import adapt, connection, cursor, new_array_type, new_type, register_adapter, register_type -from typing import Any from .. import Vector class VectorAdapter: - def __init__(self, value: Any) -> None: + def __init__(self, value: object) -> None: self._value = value - def getquoted(self) -> Any: + def getquoted(self) -> bytes: return adapt(Vector._to_db(self._value)).getquoted() From bc02e895bd99913a2b007c77679bcc35a6195fdc Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 09:42:57 -0700 Subject: [PATCH 054/121] Moved code [skip ci] --- pgvector/psycopg/bit.py | 3 ++- pgvector/psycopg/register.py | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pgvector/psycopg/bit.py b/pgvector/psycopg/bit.py index 8cfacbf..a3f3512 100644 --- a/pgvector/psycopg/bit.py +++ b/pgvector/psycopg/bit.py @@ -24,7 +24,8 @@ def dump(self, obj: Bit) -> Buffer | None: return Bit._to_db_binary(obj) -def register_bit_info(context: BaseConnection[Any], info: TypeInfo) -> None: +def register_bit_info(context: BaseConnection[Any], info: TypeInfo | None) -> None: + assert info is not None info.register(context) # add oid to anonymous class for set_types diff --git a/pgvector/psycopg/register.py b/pgvector/psycopg/register.py index 0256b94..15c2fc2 100644 --- a/pgvector/psycopg/register.py +++ b/pgvector/psycopg/register.py @@ -12,7 +12,6 @@ def register_vector(context: Connection[Any]) -> None: register_vector_info(context, info) info = TypeInfo.fetch(context, 'bit') - assert info is not None register_bit_info(context, info) info = TypeInfo.fetch(context, 'halfvec') @@ -29,7 +28,6 @@ async def register_vector_async(context: AsyncConnection[Any]) -> None: register_vector_info(context, info) info = await TypeInfo.fetch(context, 'bit') - assert info is not None register_bit_info(context, info) info = await TypeInfo.fetch(context, 'halfvec') From b1f9c8d8e0fe90509b35d4d4c2207e82e174a300 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 10:05:02 -0700 Subject: [PATCH 055/121] Improved type hints [skip ci] --- pgvector/halfvec.py | 8 +++++--- pgvector/sparsevec.py | 2 +- pgvector/vector.py | 8 +++++--- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/pgvector/halfvec.py b/pgvector/halfvec.py index 62be2bb..7b24037 100644 --- a/pgvector/halfvec.py +++ b/pgvector/halfvec.py @@ -1,19 +1,21 @@ from __future__ import annotations import numpy as np from struct import pack, unpack_from -from typing import Any class HalfVector: - def __init__(self, value: Any) -> None: + def __init__(self, value: object) -> None: # asarray still copies if same dtype if not isinstance(value, np.ndarray) or value.dtype != '>f2': value = np.asarray(value, dtype='>f2') + # for mypy + assert isinstance(value, np.ndarray) + if value.ndim != 1: raise ValueError('expected ndim to be 1') - self._value = value + self._value = np.atleast_1d(value) def __repr__(self) -> str: return f'HalfVector({self.to_list()})' diff --git a/pgvector/sparsevec.py b/pgvector/sparsevec.py index d0704ae..c8da4d2 100644 --- a/pgvector/sparsevec.py +++ b/pgvector/sparsevec.py @@ -7,7 +7,7 @@ class SparseVector: - def __init__(self, value: Any, dimensions: int | Any = NO_DEFAULT, /) -> None: + def __init__(self, value: dict[int, float] | list[float] | Any, dimensions: int | Any = NO_DEFAULT, /) -> None: if value.__class__.__module__.startswith('scipy.sparse.'): if dimensions is not NO_DEFAULT: raise ValueError('extra argument') diff --git a/pgvector/vector.py b/pgvector/vector.py index b90222e..272da3a 100644 --- a/pgvector/vector.py +++ b/pgvector/vector.py @@ -1,19 +1,21 @@ from __future__ import annotations import numpy as np from struct import pack, unpack_from -from typing import Any class Vector: - def __init__(self, value: Any) -> None: + def __init__(self, value: object) -> None: # asarray still copies if same dtype if not isinstance(value, np.ndarray) or value.dtype != '>f4': value = np.asarray(value, dtype='>f4') + # for mypy + assert isinstance(value, np.ndarray) + if value.ndim != 1: raise ValueError('expected ndim to be 1') - self._value = value + self._value = np.atleast_1d(value) def __repr__(self) -> str: return f'Vector({self.to_list()})' From 09a98372b02f718f1f9f910b13bfb4a5362e02bd Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 10:14:14 -0700 Subject: [PATCH 056/121] Added overloads [skip ci] --- pgvector/sparsevec.py | 14 +++++++++++++- tests/test_sparse_vector.py | 4 ++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/pgvector/sparsevec.py b/pgvector/sparsevec.py index c8da4d2..3685b31 100644 --- a/pgvector/sparsevec.py +++ b/pgvector/sparsevec.py @@ -1,12 +1,24 @@ from __future__ import annotations import numpy as np from struct import pack, unpack_from -from typing import Any +from typing import Any, overload NO_DEFAULT = object() class SparseVector: + @overload + def __init__(self, value: dict[int, float], dimensions: int, /) -> None: + ... + + @overload + def __init__(self, value: list[float], /) -> None: + ... + + @overload + def __init__(self, value: Any, /) -> None: + ... + def __init__(self, value: dict[int, float] | list[float] | Any, dimensions: int | Any = NO_DEFAULT, /) -> None: if value.__class__.__module__.startswith('scipy.sparse.'): if dimensions is not NO_DEFAULT: diff --git a/tests/test_sparse_vector.py b/tests/test_sparse_vector.py index d580f32..8753e43 100644 --- a/tests/test_sparse_vector.py +++ b/tests/test_sparse_vector.py @@ -14,7 +14,7 @@ def test_list(self): def test_list_dimensions(self): with pytest.raises(ValueError) as error: - SparseVector([1, 0, 2, 0, 3, 0], 6) + SparseVector([1, 0, 2, 0, 3, 0], 6) # ty: ignore[invalid-argument-type] assert str(error.value) == 'extra argument' def test_ndarray(self): @@ -40,7 +40,7 @@ def test_coo_array(self): def test_coo_array_dimensions(self): with pytest.raises(ValueError) as error: - SparseVector(coo_array(np.array([1, 0, 2, 0, 3, 0])), 6) + SparseVector(coo_array(np.array([1, 0, 2, 0, 3, 0])), 6) # ty: ignore[invalid-argument-type] assert str(error.value) == 'extra argument' def test_coo_matrix(self): From 66c4515858aee43e3893cc78a91df609e55362da Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 10:19:23 -0700 Subject: [PATCH 057/121] Improved type hints [skip ci] --- pgvector/bit.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pgvector/bit.py b/pgvector/bit.py index ac99a14..f95a18f 100644 --- a/pgvector/bit.py +++ b/pgvector/bit.py @@ -1,12 +1,11 @@ from __future__ import annotations import numpy as np from struct import pack, unpack_from -from typing import Any from warnings import warn class Bit: - def __init__(self, value: Any) -> None: + def __init__(self, value: object) -> None: if isinstance(value, bytes): self._len = 8 * len(value) self._data = value @@ -16,12 +15,18 @@ def __init__(self, value: Any) -> None: else: value = np.asarray(value) + # for mypy + assert isinstance(value, np.ndarray) + if value.dtype != np.bool: # skip warning for result of np.unpackbits if value.dtype != np.uint8 or np.any(value > 1): warn('expected elements to be boolean', stacklevel=2) value = value.astype(bool) + # for mypy + assert isinstance(value, np.ndarray) + if value.ndim != 1: raise ValueError('expected ndim to be 1') From 110bd425df368a6c8a412bb70c6d32967408aae4 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 10:43:52 -0700 Subject: [PATCH 058/121] Improved type hints [skip ci] --- pgvector/halfvec.py | 6 +++--- pgvector/vector.py | 6 +++--- tests/test_half_vector.py | 6 +++--- tests/test_sqlalchemy.py | 2 +- tests/test_vector.py | 6 +++--- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/pgvector/halfvec.py b/pgvector/halfvec.py index 7b24037..2d9507c 100644 --- a/pgvector/halfvec.py +++ b/pgvector/halfvec.py @@ -4,7 +4,7 @@ class HalfVector: - def __init__(self, value: object) -> None: + def __init__(self, value: list[float] | tuple[float, ...] | np.ndarray) -> None: # asarray still copies if same dtype if not isinstance(value, np.ndarray) or value.dtype != '>f2': value = np.asarray(value, dtype='>f2') @@ -55,7 +55,7 @@ def _to_db(cls, value: object, dim: int | None = None) -> str | None: return value if not isinstance(value, cls): - value = cls(value) + value = cls(value) # ty: ignore[invalid-argument-type] if dim is not None and value.dimensions() != dim: raise ValueError('expected %d dimensions, not %d' % (dim, value.dimensions())) @@ -68,7 +68,7 @@ def _to_db_binary(cls, value: object) -> bytes | None: return value if not isinstance(value, cls): - value = cls(value) + value = cls(value) # ty: ignore[invalid-argument-type] return value.to_binary() diff --git a/pgvector/vector.py b/pgvector/vector.py index 272da3a..2af8416 100644 --- a/pgvector/vector.py +++ b/pgvector/vector.py @@ -4,7 +4,7 @@ class Vector: - def __init__(self, value: object) -> None: + def __init__(self, value: list[float] | tuple[float, ...] | np.ndarray) -> None: # asarray still copies if same dtype if not isinstance(value, np.ndarray) or value.dtype != '>f4': value = np.asarray(value, dtype='>f4') @@ -55,7 +55,7 @@ def _to_db(cls, value: object, dim: int | None = None) -> str | None: return value if not isinstance(value, cls): - value = cls(value) + value = cls(value) # ty: ignore[invalid-argument-type] if dim is not None and value.dimensions() != dim: raise ValueError('expected %d dimensions, not %d' % (dim, value.dimensions())) @@ -68,7 +68,7 @@ def _to_db_binary(cls, value: object) -> bytes | None: return value if not isinstance(value, cls): - value = cls(value) + value = cls(value) # ty: ignore[invalid-argument-type] return value.to_binary() diff --git a/tests/test_half_vector.py b/tests/test_half_vector.py index 78b4977..f7fe347 100644 --- a/tests/test_half_vector.py +++ b/tests/test_half_vector.py @@ -10,7 +10,7 @@ def test_list(self): def test_list_str(self): with pytest.raises(ValueError, match='could not convert string to float'): - HalfVector([1, 'two', 3]) + HalfVector([1, 'two', 3]) # ty: ignore[invalid-argument-type] def test_tuple(self): assert HalfVector((1, 2, 3)).to_list() == [1, 2, 3] @@ -27,12 +27,12 @@ def test_ndarray_same_object(self): def test_ndim_two(self): with pytest.raises(ValueError) as error: - HalfVector([[1, 2], [3, 4]]) + HalfVector([[1, 2], [3, 4]]) # ty: ignore[invalid-argument-type] assert str(error.value) == 'expected ndim to be 1' def test_ndim_zero(self): with pytest.raises(ValueError) as error: - HalfVector(1) + HalfVector(1) # ty: ignore[invalid-argument-type] assert str(error.value) == 'expected ndim to be 1' def test_repr(self): diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index f57edc4..e1acdc6 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -601,7 +601,7 @@ async def test_halfvec(self, engine): embedding = [1, 2, 3] session.add(Item(id=1, half_embedding=embedding)) item = await session.get_one(Item, 1) - assert item.half_embedding == HalfVector(embedding) + assert item.half_embedding == HalfVector(embedding) # ty: ignore[invalid-argument-type] await engine.dispose() diff --git a/tests/test_vector.py b/tests/test_vector.py index e5a16fe..789efea 100644 --- a/tests/test_vector.py +++ b/tests/test_vector.py @@ -10,7 +10,7 @@ def test_list(self): def test_list_str(self): with pytest.raises(ValueError, match='could not convert string to float'): - Vector([1, 'two', 3]) + Vector([1, 'two', 3]) # ty: ignore[invalid-argument-type] def test_tuple(self): assert Vector((1, 2, 3)).to_list() == [1, 2, 3] @@ -27,12 +27,12 @@ def test_ndarray_same_object(self): def test_ndim_two(self): with pytest.raises(ValueError) as error: - Vector([[1, 2], [3, 4]]) + Vector([[1, 2], [3, 4]]) # ty: ignore[invalid-argument-type] assert str(error.value) == 'expected ndim to be 1' def test_ndim_zero(self): with pytest.raises(ValueError) as error: - Vector(1) + Vector(1) # ty: ignore[invalid-argument-type] assert str(error.value) == 'expected ndim to be 1' def test_repr(self): From 03dd0e0e76cc4aff49c8dc8c7fa95e448db21382 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 10:48:25 -0700 Subject: [PATCH 059/121] Fixed error with mypy [skip ci] --- tests/test_sqlalchemy.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index e1acdc6..dbb6f12 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -8,7 +8,7 @@ from sqlalchemy.exc import StatementError from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine from sqlalchemy.ext.automap import automap_base -from sqlalchemy.orm import declarative_base, mapped_column, Session +from sqlalchemy.orm import mapped_column, DeclarativeBase, Session from sqlalchemy.sql import func psycopg2_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test') @@ -64,7 +64,9 @@ def asyncpg_connect(dbapi_connection, connection_record): session.execute(text('CREATE EXTENSION IF NOT EXISTS vector')) session.commit() -Base = declarative_base() + +class Base(DeclarativeBase): + pass class Item(Base): From fbea9d7ed6eb007b404e85628047740473b11ecb Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 10:52:15 -0700 Subject: [PATCH 060/121] Improved type hints [skip ci] --- pgvector/bit.py | 2 +- tests/test_bit.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pgvector/bit.py b/pgvector/bit.py index f95a18f..08ab910 100644 --- a/pgvector/bit.py +++ b/pgvector/bit.py @@ -5,7 +5,7 @@ class Bit: - def __init__(self, value: object) -> None: + def __init__(self, value: bytes | str | list[bool] | tuple[bool, ...] | np.ndarray) -> None: if isinstance(value, bytes): self._len = 8 * len(value) self._data = value diff --git a/tests/test_bit.py b/tests/test_bit.py index 5a71642..0e29eb7 100644 --- a/tests/test_bit.py +++ b/tests/test_bit.py @@ -9,11 +9,11 @@ def test_list(self): def test_list_none(self): with pytest.warns(UserWarning, match='expected elements to be boolean'): - assert Bit([True, None, True]).to_text() == '101' + assert Bit([True, None, True]).to_text() == '101' # ty: ignore[invalid-argument-type] def test_list_int(self): with pytest.warns(UserWarning, match='expected elements to be boolean'): - assert Bit([254, 7, 0]).to_text() == '110' + assert Bit([254, 7, 0]).to_text() == '110' # ty: ignore[invalid-argument-type] def test_tuple(self): assert Bit((True, False, True)).to_list() == [True, False, True] @@ -46,12 +46,12 @@ def test_ndarray_uint16(self): def test_ndim_two(self): with pytest.raises(ValueError) as error: - Bit([[True, False], [True, False]]) + Bit([[True, False], [True, False]]) # ty: ignore[invalid-argument-type] assert str(error.value) == 'expected ndim to be 1' def test_ndim_zero(self): with pytest.raises(ValueError) as error: - Bit(True) + Bit(True) # ty: ignore[invalid-argument-type] assert str(error.value) == 'expected ndim to be 1' def test_repr(self): From 0178236b34cad23346c36810fc6c9d0133aa311f Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 10:53:46 -0700 Subject: [PATCH 061/121] Added comments [skip ci] --- pgvector/halfvec.py | 1 + pgvector/vector.py | 1 + 2 files changed, 2 insertions(+) diff --git a/pgvector/halfvec.py b/pgvector/halfvec.py index 2d9507c..638de94 100644 --- a/pgvector/halfvec.py +++ b/pgvector/halfvec.py @@ -15,6 +15,7 @@ def __init__(self, value: list[float] | tuple[float, ...] | np.ndarray) -> None: if value.ndim != 1: raise ValueError('expected ndim to be 1') + # atleast_1d for ty self._value = np.atleast_1d(value) def __repr__(self) -> str: diff --git a/pgvector/vector.py b/pgvector/vector.py index 2af8416..a101a6f 100644 --- a/pgvector/vector.py +++ b/pgvector/vector.py @@ -15,6 +15,7 @@ def __init__(self, value: list[float] | tuple[float, ...] | np.ndarray) -> None: if value.ndim != 1: raise ValueError('expected ndim to be 1') + # atleast_1d for ty self._value = np.atleast_1d(value) def __repr__(self) -> str: From e86d7c40938fdb4ef85f54ea28ba9bbd8baed03b Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 11:20:23 -0700 Subject: [PATCH 062/121] Improved type hints [skip ci] --- pgvector/bit.py | 2 +- pgvector/halfvec.py | 3 ++- pgvector/vector.py | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pgvector/bit.py b/pgvector/bit.py index 08ab910..70a5df3 100644 --- a/pgvector/bit.py +++ b/pgvector/bit.py @@ -44,7 +44,7 @@ def __eq__(self, other: object) -> bool: def to_list(self) -> list[bool]: return self.to_numpy().tolist() - def to_numpy(self) -> np.ndarray: + def to_numpy(self) -> np.ndarray[tuple[int], np.dtype[np.bool]]: return np.unpackbits(np.frombuffer(self._data, dtype=np.uint8), count=self._len).astype(bool) def to_text(self) -> str: diff --git a/pgvector/halfvec.py b/pgvector/halfvec.py index 638de94..3e1745d 100644 --- a/pgvector/halfvec.py +++ b/pgvector/halfvec.py @@ -32,7 +32,8 @@ def dimensions(self) -> int: def to_list(self) -> list[float]: return self._value.tolist() - def to_numpy(self) -> np.ndarray: + def to_numpy(self) -> np.ndarray[tuple[int], np.dtype[np.float16]]: + # TODO return native endian return self._value def to_text(self) -> str: diff --git a/pgvector/vector.py b/pgvector/vector.py index a101a6f..67d1232 100644 --- a/pgvector/vector.py +++ b/pgvector/vector.py @@ -32,7 +32,8 @@ def dimensions(self) -> int: def to_list(self) -> list[float]: return self._value.tolist() - def to_numpy(self) -> np.ndarray: + def to_numpy(self) -> np.ndarray[tuple[int], np.dtype[np.float32]]: + # TODO return native endian return self._value def to_text(self) -> str: From 1913c8449c000f1023fde3fbd8b86c8d95dc9ef0 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 11:25:53 -0700 Subject: [PATCH 063/121] Improved type hints [skip ci] --- pgvector/sparsevec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pgvector/sparsevec.py b/pgvector/sparsevec.py index 3685b31..205b0b1 100644 --- a/pgvector/sparsevec.py +++ b/pgvector/sparsevec.py @@ -66,7 +66,7 @@ def to_list(self) -> list[float]: vec[i] = v return vec - def to_numpy(self) -> np.ndarray: + def to_numpy(self) -> np.ndarray[tuple[int], np.dtype[np.float32]]: vec = np.repeat(0.0, self._dim).astype(np.float32) for i, v in zip(self._indices, self._values): vec[i] = v From 6effd62635b8c2928e3bd2ee0b17b960059a7ba7 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 11:40:30 -0700 Subject: [PATCH 064/121] Dropped support for Python < 3.10 for examples [skip ci] --- examples/citus/pyproject.toml | 2 +- examples/cohere/pyproject.toml | 2 +- examples/colbert/pyproject.toml | 2 +- examples/colpali/pyproject.toml | 2 +- examples/gensim/pyproject.toml | 2 +- examples/hybrid_search/pyproject.toml | 2 +- examples/image_search/pyproject.toml | 2 +- examples/imagehash/pyproject.toml | 2 +- examples/implicit/pyproject.toml | 2 +- examples/lightfm/pyproject.toml | 2 +- examples/loading/pyproject.toml | 2 +- examples/openai/pyproject.toml | 2 +- examples/rag/pyproject.toml | 2 +- examples/rdkit/pyproject.toml | 2 +- examples/sentence_transformers/pyproject.toml | 2 +- examples/sparse_search/pyproject.toml | 2 +- examples/surprise/pyproject.toml | 2 +- 17 files changed, 17 insertions(+), 17 deletions(-) diff --git a/examples/citus/pyproject.toml b/examples/citus/pyproject.toml index ee40a36..3fb37d4 100644 --- a/examples/citus/pyproject.toml +++ b/examples/citus/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "example" version = "0.1.0" -requires-python = ">= 3.9" +requires-python = ">= 3.10" [dependency-groups] dev = [ diff --git a/examples/cohere/pyproject.toml b/examples/cohere/pyproject.toml index f0c88b7..1228b3d 100644 --- a/examples/cohere/pyproject.toml +++ b/examples/cohere/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "example" version = "0.1.0" -requires-python = ">= 3.9" +requires-python = ">= 3.10" [dependency-groups] dev = [ diff --git a/examples/colbert/pyproject.toml b/examples/colbert/pyproject.toml index face4d2..7cf1e3f 100644 --- a/examples/colbert/pyproject.toml +++ b/examples/colbert/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "example" version = "0.1.0" -requires-python = ">= 3.9" +requires-python = ">= 3.10" [dependency-groups] dev = [ diff --git a/examples/colpali/pyproject.toml b/examples/colpali/pyproject.toml index 23fb23f..20c26a7 100644 --- a/examples/colpali/pyproject.toml +++ b/examples/colpali/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "example" version = "0.1.0" -requires-python = ">= 3.9" +requires-python = ">= 3.10" [dependency-groups] dev = [ diff --git a/examples/gensim/pyproject.toml b/examples/gensim/pyproject.toml index 7a33423..4d2865a 100644 --- a/examples/gensim/pyproject.toml +++ b/examples/gensim/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "example" version = "0.1.0" -requires-python = ">= 3.9" +requires-python = ">= 3.10" [dependency-groups] dev = [ diff --git a/examples/hybrid_search/pyproject.toml b/examples/hybrid_search/pyproject.toml index b5a904a..130f287 100644 --- a/examples/hybrid_search/pyproject.toml +++ b/examples/hybrid_search/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "example" version = "0.1.0" -requires-python = ">= 3.9" +requires-python = ">= 3.10" [dependency-groups] dev = [ diff --git a/examples/image_search/pyproject.toml b/examples/image_search/pyproject.toml index 7644382..8ada0a7 100644 --- a/examples/image_search/pyproject.toml +++ b/examples/image_search/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "example" version = "0.1.0" -requires-python = ">= 3.9" +requires-python = ">= 3.10" [dependency-groups] dev = [ diff --git a/examples/imagehash/pyproject.toml b/examples/imagehash/pyproject.toml index cf06c2b..4f0e2b4 100644 --- a/examples/imagehash/pyproject.toml +++ b/examples/imagehash/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "example" version = "0.1.0" -requires-python = ">= 3.9" +requires-python = ">= 3.10" [dependency-groups] dev = [ diff --git a/examples/implicit/pyproject.toml b/examples/implicit/pyproject.toml index c03b187..ec305ee 100644 --- a/examples/implicit/pyproject.toml +++ b/examples/implicit/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "example" version = "0.1.0" -requires-python = ">= 3.9" +requires-python = ">= 3.10" [dependency-groups] dev = [ diff --git a/examples/lightfm/pyproject.toml b/examples/lightfm/pyproject.toml index c202058..047e3f1 100644 --- a/examples/lightfm/pyproject.toml +++ b/examples/lightfm/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "example" version = "0.1.0" -requires-python = ">= 3.9" +requires-python = ">= 3.10" [dependency-groups] dev = [ diff --git a/examples/loading/pyproject.toml b/examples/loading/pyproject.toml index ee40a36..3fb37d4 100644 --- a/examples/loading/pyproject.toml +++ b/examples/loading/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "example" version = "0.1.0" -requires-python = ">= 3.9" +requires-python = ">= 3.10" [dependency-groups] dev = [ diff --git a/examples/openai/pyproject.toml b/examples/openai/pyproject.toml index 3e6661a..5665e02 100644 --- a/examples/openai/pyproject.toml +++ b/examples/openai/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "example" version = "0.1.0" -requires-python = ">= 3.9" +requires-python = ">= 3.10" [dependency-groups] dev = [ diff --git a/examples/rag/pyproject.toml b/examples/rag/pyproject.toml index fa0dcfd..2e74790 100644 --- a/examples/rag/pyproject.toml +++ b/examples/rag/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "example" version = "0.1.0" -requires-python = ">= 3.9" +requires-python = ">= 3.10" [dependency-groups] dev = [ diff --git a/examples/rdkit/pyproject.toml b/examples/rdkit/pyproject.toml index f8c035a..b717809 100644 --- a/examples/rdkit/pyproject.toml +++ b/examples/rdkit/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "example" version = "0.1.0" -requires-python = ">= 3.9" +requires-python = ">= 3.10" [dependency-groups] dev = [ diff --git a/examples/sentence_transformers/pyproject.toml b/examples/sentence_transformers/pyproject.toml index b5a904a..130f287 100644 --- a/examples/sentence_transformers/pyproject.toml +++ b/examples/sentence_transformers/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "example" version = "0.1.0" -requires-python = ">= 3.9" +requires-python = ">= 3.10" [dependency-groups] dev = [ diff --git a/examples/sparse_search/pyproject.toml b/examples/sparse_search/pyproject.toml index 7927c34..d9adf93 100644 --- a/examples/sparse_search/pyproject.toml +++ b/examples/sparse_search/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "example" version = "0.1.0" -requires-python = ">= 3.9" +requires-python = ">= 3.10" [dependency-groups] dev = [ diff --git a/examples/surprise/pyproject.toml b/examples/surprise/pyproject.toml index 94c6f13..bd0cff2 100644 --- a/examples/surprise/pyproject.toml +++ b/examples/surprise/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "example" version = "0.1.0" -requires-python = ">= 3.9" +requires-python = ">= 3.10" [dependency-groups] dev = [ From 35c1abf3e4cb06b29b996336b99d5787edef3ca1 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 12:06:09 -0700 Subject: [PATCH 065/121] Removed re-exported classes --- CHANGELOG.md | 1 + pgvector/asyncpg/__init__.py | 8 +------- pgvector/django/__init__.py | 7 +------ pgvector/peewee/__init__.py | 7 +------ pgvector/psycopg/__init__.py | 7 ------- pgvector/psycopg2/__init__.py | 7 +------ pgvector/sqlalchemy/__init__.py | 5 ----- 7 files changed, 5 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ff616b..4ba2b90 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ ## 0.5.0 (unreleased) - Added type hints +- Removed re-exported classes (use top-level `pgvector` package instead) - Dropped support for Python < 3.10 - Dropped support for SQLAlchemy < 2 diff --git a/pgvector/asyncpg/__init__.py b/pgvector/asyncpg/__init__.py index c6a3b4e..b3b4440 100644 --- a/pgvector/asyncpg/__init__.py +++ b/pgvector/asyncpg/__init__.py @@ -1,11 +1,5 @@ from .register import register_vector -# TODO remove -from .. import Vector, HalfVector, SparseVector - __all__ = [ - 'register_vector', - 'Vector', - 'HalfVector', - 'SparseVector' + 'register_vector' ] diff --git a/pgvector/django/__init__.py b/pgvector/django/__init__.py index 43c64a3..3ed0f83 100644 --- a/pgvector/django/__init__.py +++ b/pgvector/django/__init__.py @@ -6,9 +6,6 @@ from .sparsevec import SparseVectorField from .vector import VectorField -# TODO remove -from .. import HalfVector, SparseVector - __all__ = [ 'VectorExtension', 'VectorField', @@ -22,7 +19,5 @@ 'CosineDistance', 'L1Distance', 'HammingDistance', - 'JaccardDistance', - 'HalfVector', - 'SparseVector' + 'JaccardDistance' ] diff --git a/pgvector/peewee/__init__.py b/pgvector/peewee/__init__.py index df21200..f557517 100644 --- a/pgvector/peewee/__init__.py +++ b/pgvector/peewee/__init__.py @@ -3,14 +3,9 @@ from .sparsevec import SparseVectorField from .vector import VectorField -# TODO remove -from .. import HalfVector, SparseVector - __all__ = [ 'VectorField', 'HalfVectorField', 'FixedBitField', - 'SparseVectorField', - 'HalfVector', - 'SparseVector' + 'SparseVectorField' ] diff --git a/pgvector/psycopg/__init__.py b/pgvector/psycopg/__init__.py index 980af84..8922eda 100644 --- a/pgvector/psycopg/__init__.py +++ b/pgvector/psycopg/__init__.py @@ -1,13 +1,6 @@ from .register import register_vector, register_vector_async -# TODO remove -from .. import Bit, HalfVector, SparseVector, Vector - __all__ = [ 'register_vector', 'register_vector_async', - 'Vector', - 'HalfVector', - 'Bit', - 'SparseVector' ] diff --git a/pgvector/psycopg2/__init__.py b/pgvector/psycopg2/__init__.py index 33e5124..b3b4440 100644 --- a/pgvector/psycopg2/__init__.py +++ b/pgvector/psycopg2/__init__.py @@ -1,10 +1,5 @@ from .register import register_vector -# TODO remove -from .. import HalfVector, SparseVector - __all__ = [ - 'register_vector', - 'HalfVector', - 'SparseVector' + 'register_vector' ] diff --git a/pgvector/sqlalchemy/__init__.py b/pgvector/sqlalchemy/__init__.py index 52adf88..593876f 100644 --- a/pgvector/sqlalchemy/__init__.py +++ b/pgvector/sqlalchemy/__init__.py @@ -5,17 +5,12 @@ from .vector import VECTOR from .vector import VECTOR as Vector -# TODO remove -from .. import HalfVector, SparseVector - __all__ = [ 'Vector', 'VECTOR', 'HALFVEC', 'BIT', 'SPARSEVEC', - 'HalfVector', - 'SparseVector', 'avg', 'sum' ] From b028150fe62dc2065206dab1d1bfea2ed331a134 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 12:07:05 -0700 Subject: [PATCH 066/121] Removed utils package [skip ci] --- CHANGELOG.md | 1 + pgvector/utils/__init__.py | 9 --------- 2 files changed, 1 insertion(+), 9 deletions(-) delete mode 100644 pgvector/utils/__init__.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ba2b90..c535b96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ ## 0.5.0 (unreleased) - Added type hints +- Removed `utils` package (use top-level `pgvector` package instead) - Removed re-exported classes (use top-level `pgvector` package instead) - Dropped support for Python < 3.10 - Dropped support for SQLAlchemy < 2 diff --git a/pgvector/utils/__init__.py b/pgvector/utils/__init__.py deleted file mode 100644 index 8cdb5d6..0000000 --- a/pgvector/utils/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# TODO remove -from .. import Bit, HalfVector, SparseVector, Vector - -__all__ = [ - 'Vector', - 'HalfVector', - 'Bit', - 'SparseVector' -] From c4c46ab203fd6077ede85704d7c6e5802a2aee2f Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 12:10:50 -0700 Subject: [PATCH 067/121] Removed support for tuples from Vector, HalfVector, and Bit constructors [skip ci] --- pgvector/bit.py | 2 +- pgvector/halfvec.py | 2 +- pgvector/vector.py | 2 +- tests/test_bit.py | 3 --- tests/test_half_vector.py | 3 --- tests/test_vector.py | 3 --- 6 files changed, 3 insertions(+), 12 deletions(-) diff --git a/pgvector/bit.py b/pgvector/bit.py index 70a5df3..9a65593 100644 --- a/pgvector/bit.py +++ b/pgvector/bit.py @@ -5,7 +5,7 @@ class Bit: - def __init__(self, value: bytes | str | list[bool] | tuple[bool, ...] | np.ndarray) -> None: + def __init__(self, value: bytes | str | list[bool] | np.ndarray) -> None: if isinstance(value, bytes): self._len = 8 * len(value) self._data = value diff --git a/pgvector/halfvec.py b/pgvector/halfvec.py index 3e1745d..c52a33c 100644 --- a/pgvector/halfvec.py +++ b/pgvector/halfvec.py @@ -4,7 +4,7 @@ class HalfVector: - def __init__(self, value: list[float] | tuple[float, ...] | np.ndarray) -> None: + def __init__(self, value: list[float] | np.ndarray) -> None: # asarray still copies if same dtype if not isinstance(value, np.ndarray) or value.dtype != '>f2': value = np.asarray(value, dtype='>f2') diff --git a/pgvector/vector.py b/pgvector/vector.py index 67d1232..51b4939 100644 --- a/pgvector/vector.py +++ b/pgvector/vector.py @@ -4,7 +4,7 @@ class Vector: - def __init__(self, value: list[float] | tuple[float, ...] | np.ndarray) -> None: + def __init__(self, value: list[float] | np.ndarray) -> None: # asarray still copies if same dtype if not isinstance(value, np.ndarray) or value.dtype != '>f4': value = np.asarray(value, dtype='>f4') diff --git a/tests/test_bit.py b/tests/test_bit.py index 0e29eb7..8663f36 100644 --- a/tests/test_bit.py +++ b/tests/test_bit.py @@ -15,9 +15,6 @@ def test_list_int(self): with pytest.warns(UserWarning, match='expected elements to be boolean'): assert Bit([254, 7, 0]).to_text() == '110' # ty: ignore[invalid-argument-type] - def test_tuple(self): - assert Bit((True, False, True)).to_list() == [True, False, True] - def test_str(self): assert Bit('101').to_list() == [True, False, True] diff --git a/tests/test_half_vector.py b/tests/test_half_vector.py index f7fe347..b619cdf 100644 --- a/tests/test_half_vector.py +++ b/tests/test_half_vector.py @@ -12,9 +12,6 @@ def test_list_str(self): with pytest.raises(ValueError, match='could not convert string to float'): HalfVector([1, 'two', 3]) # ty: ignore[invalid-argument-type] - def test_tuple(self): - assert HalfVector((1, 2, 3)).to_list() == [1, 2, 3] - def test_ndarray(self): arr = np.array([1, 2, 3]) assert HalfVector(arr).to_list() == [1, 2, 3] diff --git a/tests/test_vector.py b/tests/test_vector.py index 789efea..90550b0 100644 --- a/tests/test_vector.py +++ b/tests/test_vector.py @@ -12,9 +12,6 @@ def test_list_str(self): with pytest.raises(ValueError, match='could not convert string to float'): Vector([1, 'two', 3]) # ty: ignore[invalid-argument-type] - def test_tuple(self): - assert Vector((1, 2, 3)).to_list() == [1, 2, 3] - def test_ndarray(self): arr = np.array([1, 2, 3]) assert Vector(arr).to_list() == [1, 2, 3] From ae0f549aa7f58af6a96a50aa98e4345789ac9401 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 14:20:26 -0700 Subject: [PATCH 068/121] Improved type hints [skip ci] --- pgvector/bit.py | 2 +- pgvector/halfvec.py | 2 +- pgvector/vector.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pgvector/bit.py b/pgvector/bit.py index 9a65593..cf4fdb0 100644 --- a/pgvector/bit.py +++ b/pgvector/bit.py @@ -5,7 +5,7 @@ class Bit: - def __init__(self, value: bytes | str | list[bool] | np.ndarray) -> None: + def __init__(self, value: bytes | str | list[bool] | np.ndarray[tuple[int], np.dtype[np.bool | np.uint8]]) -> None: if isinstance(value, bytes): self._len = 8 * len(value) self._data = value diff --git a/pgvector/halfvec.py b/pgvector/halfvec.py index c52a33c..b7a1e77 100644 --- a/pgvector/halfvec.py +++ b/pgvector/halfvec.py @@ -4,7 +4,7 @@ class HalfVector: - def __init__(self, value: list[float] | np.ndarray) -> None: + def __init__(self, value: list[float] | np.ndarray[tuple[int], np.dtype[np.floating]]) -> None: # asarray still copies if same dtype if not isinstance(value, np.ndarray) or value.dtype != '>f2': value = np.asarray(value, dtype='>f2') diff --git a/pgvector/vector.py b/pgvector/vector.py index 51b4939..96bcd17 100644 --- a/pgvector/vector.py +++ b/pgvector/vector.py @@ -4,7 +4,7 @@ class Vector: - def __init__(self, value: list[float] | np.ndarray) -> None: + def __init__(self, value: list[float] | np.ndarray[tuple[int], np.dtype[np.floating]]) -> None: # asarray still copies if same dtype if not isinstance(value, np.ndarray) or value.dtype != '>f4': value = np.asarray(value, dtype='>f4') From 033923bf9c7dd8b697a778c14f204c1da5142b85 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 14:47:10 -0700 Subject: [PATCH 069/121] Improved internal storage format --- pgvector/halfvec.py | 50 +++++++++++++++++++++++---------------- pgvector/vector.py | 50 +++++++++++++++++++++++---------------- tests/test_half_vector.py | 12 ++++------ tests/test_vector.py | 12 ++++------ 4 files changed, 66 insertions(+), 58 deletions(-) diff --git a/pgvector/halfvec.py b/pgvector/halfvec.py index b7a1e77..2b3554e 100644 --- a/pgvector/halfvec.py +++ b/pgvector/halfvec.py @@ -1,46 +1,52 @@ from __future__ import annotations import numpy as np -from struct import pack, unpack_from +import struct class HalfVector: def __init__(self, value: list[float] | np.ndarray[tuple[int], np.dtype[np.floating]]) -> None: - # asarray still copies if same dtype - if not isinstance(value, np.ndarray) or value.dtype != '>f2': - value = np.asarray(value, dtype='>f2') - - # for mypy - assert isinstance(value, np.ndarray) - - if value.ndim != 1: - raise ValueError('expected ndim to be 1') - - # atleast_1d for ty - self._value = np.atleast_1d(value) + if isinstance(value, list): + dim = len(value) + try: + self._value = struct.pack(f'>HH{dim}e', dim, 0, *value) + except struct.error as e: + raise ValueError('expected list[float]') + elif isinstance(value, np.ndarray): + if value.ndim != 1: + raise ValueError('expected ndim to be 1') + + # asarray still copies if same dtype + if value.dtype != '>f2': + value = np.asarray(value, dtype='>f2') + + self._value = struct.pack('>HH', value.shape[0], 0) + value.tobytes() + else: + raise ValueError('expected list or ndarray') def __repr__(self) -> str: return f'HalfVector({self.to_list()})' def __eq__(self, other: object) -> bool: if isinstance(other, self.__class__): - return np.array_equal(self.to_numpy(), other.to_numpy()) + return self.to_binary() == other.to_binary() return False def dimensions(self) -> int: - return len(self._value) + dim, = struct.unpack_from('>H', self._value) + return dim def to_list(self) -> list[float]: - return self._value.tolist() + return list(struct.unpack_from(f'>{self.dimensions()}e', self._value[4:])) def to_numpy(self) -> np.ndarray[tuple[int], np.dtype[np.float16]]: # TODO return native endian - return self._value + return np.frombuffer(self._value, dtype='>f2', count=self.dimensions(), offset=4) def to_text(self) -> str: - return '[' + ','.join([str(float(v)) for v in self._value]) + ']' + return '[' + ','.join([str(v) for v in self.to_list()]) + ']' def to_binary(self) -> bytes: - return pack('>HH', self.dimensions(), 0) + self._value.tobytes() + return self._value @classmethod def from_text(cls, value: str) -> HalfVector: @@ -48,8 +54,10 @@ def from_text(cls, value: str) -> HalfVector: @classmethod def from_binary(cls, value: bytes) -> HalfVector: - dim, unused = unpack_from('>HH', value) - return cls(np.frombuffer(value, dtype='>f2', count=dim, offset=4)) + # TODO check dimensions/length and unused + vec = cls.__new__(cls) + vec._value = value + return vec @classmethod def _to_db(cls, value: object, dim: int | None = None) -> str | None: diff --git a/pgvector/vector.py b/pgvector/vector.py index 96bcd17..6a0ed89 100644 --- a/pgvector/vector.py +++ b/pgvector/vector.py @@ -1,46 +1,52 @@ from __future__ import annotations import numpy as np -from struct import pack, unpack_from +import struct class Vector: def __init__(self, value: list[float] | np.ndarray[tuple[int], np.dtype[np.floating]]) -> None: - # asarray still copies if same dtype - if not isinstance(value, np.ndarray) or value.dtype != '>f4': - value = np.asarray(value, dtype='>f4') - - # for mypy - assert isinstance(value, np.ndarray) - - if value.ndim != 1: - raise ValueError('expected ndim to be 1') - - # atleast_1d for ty - self._value = np.atleast_1d(value) + if isinstance(value, list): + dim = len(value) + try: + self._value = struct.pack(f'>HH{dim}f', dim, 0, *value) + except struct.error as e: + raise ValueError('expected list[float]') + elif isinstance(value, np.ndarray): + if value.ndim != 1: + raise ValueError('expected ndim to be 1') + + # asarray still copies if same dtype + if value.dtype != '>f4': + value = np.asarray(value, dtype='>f4') + + self._value = struct.pack('>HH', value.shape[0], 0) + value.tobytes() + else: + raise ValueError('expected list or ndarray') def __repr__(self) -> str: return f'Vector({self.to_list()})' def __eq__(self, other: object) -> bool: if isinstance(other, self.__class__): - return np.array_equal(self.to_numpy(), other.to_numpy()) + return self.to_binary() == other.to_binary() return False def dimensions(self) -> int: - return len(self._value) + dim, = struct.unpack_from('>H', self._value) + return dim def to_list(self) -> list[float]: - return self._value.tolist() + return list(struct.unpack_from(f'>{self.dimensions()}f', self._value[4:])) def to_numpy(self) -> np.ndarray[tuple[int], np.dtype[np.float32]]: # TODO return native endian - return self._value + return np.frombuffer(self._value, dtype='>f4', count=self.dimensions(), offset=4) def to_text(self) -> str: - return '[' + ','.join([str(float(v)) for v in self._value]) + ']' + return '[' + ','.join([str(v) for v in self.to_list()]) + ']' def to_binary(self) -> bytes: - return pack('>HH', self.dimensions(), 0) + self._value.tobytes() + return self._value @classmethod def from_text(cls, value: str) -> Vector: @@ -48,8 +54,10 @@ def from_text(cls, value: str) -> Vector: @classmethod def from_binary(cls, value: bytes) -> Vector: - dim, unused = unpack_from('>HH', value) - return cls(np.frombuffer(value, dtype='>f4', count=dim, offset=4)) + # TODO check dimensions/length and unused + vec = cls.__new__(cls) + vec._value = value + return vec @classmethod def _to_db(cls, value: object, dim: int | None = None) -> str | None: diff --git a/tests/test_half_vector.py b/tests/test_half_vector.py index b619cdf..fbc1142 100644 --- a/tests/test_half_vector.py +++ b/tests/test_half_vector.py @@ -9,28 +9,24 @@ def test_list(self): assert HalfVector([1, 2, 3]).to_list() == [1, 2, 3] def test_list_str(self): - with pytest.raises(ValueError, match='could not convert string to float'): + with pytest.raises(ValueError) as error: HalfVector([1, 'two', 3]) # ty: ignore[invalid-argument-type] + assert str(error.value) == 'expected list[float]' def test_ndarray(self): arr = np.array([1, 2, 3]) assert HalfVector(arr).to_list() == [1, 2, 3] assert HalfVector(arr).to_numpy() is not arr - def test_ndarray_same_object(self): - arr = np.array([1, 2, 3], dtype='>f2') - assert HalfVector(arr).to_list() == [1, 2, 3] - assert HalfVector(arr).to_numpy() is arr - def test_ndim_two(self): with pytest.raises(ValueError) as error: HalfVector([[1, 2], [3, 4]]) # ty: ignore[invalid-argument-type] - assert str(error.value) == 'expected ndim to be 1' + assert str(error.value) == 'expected list[float]' def test_ndim_zero(self): with pytest.raises(ValueError) as error: HalfVector(1) # ty: ignore[invalid-argument-type] - assert str(error.value) == 'expected ndim to be 1' + assert str(error.value) == 'expected list or ndarray' def test_repr(self): assert repr(HalfVector([1, 2, 3])) == 'HalfVector([1.0, 2.0, 3.0])' diff --git a/tests/test_vector.py b/tests/test_vector.py index 90550b0..cfb7878 100644 --- a/tests/test_vector.py +++ b/tests/test_vector.py @@ -9,28 +9,24 @@ def test_list(self): assert Vector([1, 2, 3]).to_list() == [1, 2, 3] def test_list_str(self): - with pytest.raises(ValueError, match='could not convert string to float'): + with pytest.raises(ValueError) as error: Vector([1, 'two', 3]) # ty: ignore[invalid-argument-type] + assert str(error.value) == 'expected list[float]' def test_ndarray(self): arr = np.array([1, 2, 3]) assert Vector(arr).to_list() == [1, 2, 3] assert Vector(arr).to_numpy() is not arr - def test_ndarray_same_object(self): - arr = np.array([1, 2, 3], dtype='>f4') - assert Vector(arr).to_list() == [1, 2, 3] - assert Vector(arr).to_numpy() is arr - def test_ndim_two(self): with pytest.raises(ValueError) as error: Vector([[1, 2], [3, 4]]) # ty: ignore[invalid-argument-type] - assert str(error.value) == 'expected ndim to be 1' + assert str(error.value) == 'expected list[float]' def test_ndim_zero(self): with pytest.raises(ValueError) as error: Vector(1) # ty: ignore[invalid-argument-type] - assert str(error.value) == 'expected ndim to be 1' + assert str(error.value) == 'expected list or ndarray' def test_repr(self): assert repr(Vector([1, 2, 3])) == 'Vector([1.0, 2.0, 3.0])' From d3e376e18e25dd6d730fb1db1adbb0b463784f69 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 14:59:08 -0700 Subject: [PATCH 070/121] Improved internal storage format for Bit [skip ci] --- pgvector/bit.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/pgvector/bit.py b/pgvector/bit.py index cf4fdb0..8e92653 100644 --- a/pgvector/bit.py +++ b/pgvector/bit.py @@ -7,8 +7,8 @@ class Bit: def __init__(self, value: bytes | str | list[bool] | np.ndarray[tuple[int], np.dtype[np.bool | np.uint8]]) -> None: if isinstance(value, bytes): - self._len = 8 * len(value) - self._data = value + _len = 8 * len(value) + _data = value else: if isinstance(value, str): value = [v != '0' for v in value] @@ -30,28 +30,34 @@ def __init__(self, value: bytes | str | list[bool] | np.ndarray[tuple[int], np.d if value.ndim != 1: raise ValueError('expected ndim to be 1') - self._len = len(value) - self._data = np.packbits(value).tobytes() + _len = len(value) + _data = np.packbits(value).tobytes() + + self._value = pack('>i', _len) + _data def __repr__(self) -> str: return f'Bit({self.to_text()})' def __eq__(self, other: object) -> bool: if isinstance(other, self.__class__): - return self._len == other._len and self._data == other._data + return self.to_binary() == other.to_binary() return False + def _len(self): + _len, = unpack_from('>i', self._value) + return _len + def to_list(self) -> list[bool]: return self.to_numpy().tolist() def to_numpy(self) -> np.ndarray[tuple[int], np.dtype[np.bool]]: - return np.unpackbits(np.frombuffer(self._data, dtype=np.uint8), count=self._len).astype(bool) + return np.unpackbits(np.frombuffer(self._value[4:], dtype=np.uint8), count=self._len()).astype(bool) def to_text(self) -> str: - return ''.join(format(v, '08b') for v in self._data)[:self._len] + return ''.join(format(v, '08b') for v in self._value[4:])[:self._len()] def to_binary(self) -> bytes: - return pack('>i', self._len) + self._data + return self._value @classmethod def from_text(cls, value: str) -> Bit: @@ -63,8 +69,7 @@ def from_binary(cls, value: bytes) -> Bit: raise ValueError('expected bytes') bit = cls.__new__(cls) - bit._len = unpack_from('>i', value)[0] - bit._data = value[4:] + bit._value = value return bit @classmethod From f74d85ccca54683189ef13bc47dacd0ea9ade75b Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 15:04:04 -0700 Subject: [PATCH 071/121] Added tests for to_numpy [skip ci] --- tests/test_half_vector.py | 6 ++++++ tests/test_vector.py | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/tests/test_half_vector.py b/tests/test_half_vector.py index fbc1142..5f218ea 100644 --- a/tests/test_half_vector.py +++ b/tests/test_half_vector.py @@ -39,6 +39,12 @@ def test_equality(self): def test_dimensions(self): assert HalfVector([1, 2, 3]).dimensions() == 3 + def test_to_numpy_readonly(self): + arr = HalfVector([1, 2, 3]).to_numpy() + with pytest.raises(ValueError) as error: + arr[0] = 4 + assert str(error.value) == 'assignment destination is read-only' + def test_from_text(self): vec = HalfVector.from_text('[1.5,2,3]') assert vec.to_list() == [1.5, 2, 3] diff --git a/tests/test_vector.py b/tests/test_vector.py index cfb7878..f67d48e 100644 --- a/tests/test_vector.py +++ b/tests/test_vector.py @@ -39,6 +39,12 @@ def test_equality(self): def test_dimensions(self): assert Vector([1, 2, 3]).dimensions() == 3 + def test_to_numpy_readonly(self): + arr = Vector([1, 2, 3]).to_numpy() + with pytest.raises(ValueError) as error: + arr[0] = 4 + assert str(error.value) == 'assignment destination is read-only' + def test_from_text(self): vec = Vector.from_text('[1.5,2,3]') assert vec.to_list() == [1.5, 2, 3] From 42767e56850f23e1e4bd57a28d74e021e6044ea5 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 15:06:39 -0700 Subject: [PATCH 072/121] Removed todos [skip ci] --- pgvector/halfvec.py | 1 - pgvector/vector.py | 1 - 2 files changed, 2 deletions(-) diff --git a/pgvector/halfvec.py b/pgvector/halfvec.py index 2b3554e..0a302e5 100644 --- a/pgvector/halfvec.py +++ b/pgvector/halfvec.py @@ -39,7 +39,6 @@ def to_list(self) -> list[float]: return list(struct.unpack_from(f'>{self.dimensions()}e', self._value[4:])) def to_numpy(self) -> np.ndarray[tuple[int], np.dtype[np.float16]]: - # TODO return native endian return np.frombuffer(self._value, dtype='>f2', count=self.dimensions(), offset=4) def to_text(self) -> str: diff --git a/pgvector/vector.py b/pgvector/vector.py index 6a0ed89..58d7fe2 100644 --- a/pgvector/vector.py +++ b/pgvector/vector.py @@ -39,7 +39,6 @@ def to_list(self) -> list[float]: return list(struct.unpack_from(f'>{self.dimensions()}f', self._value[4:])) def to_numpy(self) -> np.ndarray[tuple[int], np.dtype[np.float32]]: - # TODO return native endian return np.frombuffer(self._value, dtype='>f4', count=self.dimensions(), offset=4) def to_text(self) -> str: From d5dfd138bf841baf6474e5d69e672f582957740f Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 15:07:32 -0700 Subject: [PATCH 073/121] Removed unneeded code [skip ci] --- pgvector/halfvec.py | 2 +- pgvector/vector.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pgvector/halfvec.py b/pgvector/halfvec.py index 0a302e5..85118ef 100644 --- a/pgvector/halfvec.py +++ b/pgvector/halfvec.py @@ -9,7 +9,7 @@ def __init__(self, value: list[float] | np.ndarray[tuple[int], np.dtype[np.float dim = len(value) try: self._value = struct.pack(f'>HH{dim}e', dim, 0, *value) - except struct.error as e: + except struct.error: raise ValueError('expected list[float]') elif isinstance(value, np.ndarray): if value.ndim != 1: diff --git a/pgvector/vector.py b/pgvector/vector.py index 58d7fe2..1245442 100644 --- a/pgvector/vector.py +++ b/pgvector/vector.py @@ -9,7 +9,7 @@ def __init__(self, value: list[float] | np.ndarray[tuple[int], np.dtype[np.float dim = len(value) try: self._value = struct.pack(f'>HH{dim}f', dim, 0, *value) - except struct.error as e: + except struct.error: raise ValueError('expected list[float]') elif isinstance(value, np.ndarray): if value.ndim != 1: From f58842c91c5e3332d507587f14ebc566b7619d5c Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 15:44:45 -0700 Subject: [PATCH 074/121] Improved tests [skip ci] --- tests/test_half_vector.py | 15 +++++++++------ tests/test_vector.py | 15 +++++++++------ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/tests/test_half_vector.py b/tests/test_half_vector.py index 5f218ea..a5c303c 100644 --- a/tests/test_half_vector.py +++ b/tests/test_half_vector.py @@ -8,22 +8,25 @@ class TestHalfVector: def test_list(self): assert HalfVector([1, 2, 3]).to_list() == [1, 2, 3] + def test_list_empty(self): + assert HalfVector([]).to_list() == [] + def test_list_str(self): with pytest.raises(ValueError) as error: HalfVector([1, 'two', 3]) # ty: ignore[invalid-argument-type] assert str(error.value) == 'expected list[float]' + def test_list_list(self): + with pytest.raises(ValueError) as error: + HalfVector([[1, 2], [3, 4]]) # ty: ignore[invalid-argument-type] + assert str(error.value) == 'expected list[float]' + def test_ndarray(self): arr = np.array([1, 2, 3]) assert HalfVector(arr).to_list() == [1, 2, 3] assert HalfVector(arr).to_numpy() is not arr - def test_ndim_two(self): - with pytest.raises(ValueError) as error: - HalfVector([[1, 2], [3, 4]]) # ty: ignore[invalid-argument-type] - assert str(error.value) == 'expected list[float]' - - def test_ndim_zero(self): + def test_int(self): with pytest.raises(ValueError) as error: HalfVector(1) # ty: ignore[invalid-argument-type] assert str(error.value) == 'expected list or ndarray' diff --git a/tests/test_vector.py b/tests/test_vector.py index f67d48e..337c7a7 100644 --- a/tests/test_vector.py +++ b/tests/test_vector.py @@ -8,22 +8,25 @@ class TestVector: def test_list(self): assert Vector([1, 2, 3]).to_list() == [1, 2, 3] + def test_list_empty(self): + assert Vector([]).to_list() == [] + def test_list_str(self): with pytest.raises(ValueError) as error: Vector([1, 'two', 3]) # ty: ignore[invalid-argument-type] assert str(error.value) == 'expected list[float]' + def test_list_list(self): + with pytest.raises(ValueError) as error: + Vector([[1, 2], [3, 4]]) # ty: ignore[invalid-argument-type] + assert str(error.value) == 'expected list[float]' + def test_ndarray(self): arr = np.array([1, 2, 3]) assert Vector(arr).to_list() == [1, 2, 3] assert Vector(arr).to_numpy() is not arr - def test_ndim_two(self): - with pytest.raises(ValueError) as error: - Vector([[1, 2], [3, 4]]) # ty: ignore[invalid-argument-type] - assert str(error.value) == 'expected list[float]' - - def test_ndim_zero(self): + def test_int(self): with pytest.raises(ValueError) as error: Vector(1) # ty: ignore[invalid-argument-type] assert str(error.value) == 'expected list or ndarray' From 60990c27136f6f017d5ed140e21259fe3665acc8 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 16:09:02 -0700 Subject: [PATCH 075/121] Changed vector type to return Vector object instead of NumPy array - closes #99 --- CHANGELOG.md | 1 + pgvector/django/vector.py | 33 +++++++++------------------- pgvector/peewee/vector.py | 3 +-- pgvector/psycopg/vector.py | 4 ++-- pgvector/psycopg2/vector.py | 2 +- pgvector/sqlalchemy/vector.py | 3 +-- pgvector/vector.py | 12 +++++----- tests/test_asyncpg.py | 16 +++++--------- tests/test_django.py | 16 ++++++-------- tests/test_peewee.py | 9 ++++---- tests/test_pg8000.py | 6 ++--- tests/test_psycopg.py | 38 +++++++++++++++----------------- tests/test_psycopg2.py | 22 +++++++++---------- tests/test_sqlalchemy.py | 41 +++++++++++++++-------------------- tests/test_sqlmodel.py | 14 +++++------- 15 files changed, 93 insertions(+), 127 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c535b96..760f1e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ ## 0.5.0 (unreleased) - Added type hints +- Changed `vector` type to return `Vector` object instead of NumPy array - Removed `utils` package (use top-level `pgvector` package instead) - Removed re-exported classes (use top-level `pgvector` package instead) - Dropped support for Python < 3.10 diff --git a/pgvector/django/vector.py b/pgvector/django/vector.py index 7a12f78..87d8989 100644 --- a/pgvector/django/vector.py +++ b/pgvector/django/vector.py @@ -1,6 +1,5 @@ from django import forms from django.db.models import Field -import numpy as np from typing import Any from .. import Vector @@ -25,13 +24,16 @@ def db_type(self, connection: Any) -> str: return 'vector' return 'vector(%d)' % self.dimensions - def from_db_value(self, value: Any, expression: Any, connection: Any) -> np.ndarray | None: + def from_db_value(self, value: Any, expression: Any, connection: Any) -> Vector | None: return Vector._from_db(value) - def to_python(self, value: Any) -> np.ndarray | None: - if isinstance(value, list): - return np.array(value, dtype=np.float32) - return Vector._from_db(value) + def to_python(self, value: Any) -> Vector | None: + if value is None or isinstance(value, Vector): + return value + elif isinstance(value, str): + return Vector._from_db(value) + else: + return Vector(value) def get_prep_value(self, value: Any) -> str | None: return Vector._to_db(value) @@ -39,16 +41,6 @@ def get_prep_value(self, value: Any) -> str | None: def value_to_string(self, obj: Any) -> str | None: return self.get_prep_value(self.value_from_object(obj)) - def validate(self, value: Any, model_instance: Any) -> None: - if isinstance(value, np.ndarray): - value = value.tolist() - super().validate(value, model_instance) - - def run_validators(self, value: Any) -> None: - if isinstance(value, np.ndarray): - value = value.tolist() - super().run_validators(value) - def formfield(self, form_class: Any = None, choices_form_class: Any = None, **kwargs: Any) -> forms.Field: return super().formfield( form_class=VectorFormField if form_class is None else form_class, @@ -59,19 +51,14 @@ def formfield(self, form_class: Any = None, choices_form_class: Any = None, **kw class VectorWidget(forms.TextInput): def format_value(self, value: Any) -> str | None: - if isinstance(value, np.ndarray): - value = value.tolist() + if isinstance(value, Vector): + value = value.to_list() return super().format_value(value) class VectorFormField(forms.CharField): widget = VectorWidget - def has_changed(self, initial: Any, data: Any) -> bool: - if isinstance(initial, np.ndarray): - initial = initial.tolist() - return super().has_changed(initial, data) - def to_python(self, value: Any) -> Any: if isinstance(value, str) and value == '': return None diff --git a/pgvector/peewee/vector.py b/pgvector/peewee/vector.py index aa9509b..9652071 100644 --- a/pgvector/peewee/vector.py +++ b/pgvector/peewee/vector.py @@ -1,4 +1,3 @@ -import numpy as np from peewee import Expression, Field from typing import Any from .. import Vector @@ -17,7 +16,7 @@ def get_modifiers(self) -> list[int] | None: def db_value(self, value: object) -> str | None: return Vector._to_db(value) - def python_value(self, value: Any) -> np.ndarray | None: + def python_value(self, value: Any) -> Vector | None: return Vector._from_db(value) def _distance(self, op: str, vector: object) -> Expression: diff --git a/pgvector/psycopg/vector.py b/pgvector/psycopg/vector.py index 1492017..dcdc770 100644 --- a/pgvector/psycopg/vector.py +++ b/pgvector/psycopg/vector.py @@ -31,7 +31,7 @@ class VectorLoader(Loader): format = Format.TEXT - def load(self, data: Buffer) -> np.ndarray | None: + def load(self, data: Buffer) -> Vector | None: if isinstance(data, memoryview): data = bytes(data) return Vector._from_db(data.decode('utf8')) @@ -41,7 +41,7 @@ class VectorBinaryLoader(VectorLoader): format = Format.BINARY - def load(self, data: Buffer) -> np.ndarray | None: + def load(self, data: Buffer) -> Vector | None: if isinstance(data, (bytearray, memoryview)): data = bytes(data) return Vector._from_db_binary(data) diff --git a/pgvector/psycopg2/vector.py b/pgvector/psycopg2/vector.py index 2a2c617..ff5ad99 100644 --- a/pgvector/psycopg2/vector.py +++ b/pgvector/psycopg2/vector.py @@ -11,7 +11,7 @@ def getquoted(self) -> bytes: return adapt(Vector._to_db(self._value)).getquoted() -def cast_vector(value: str | None, cur: cursor) -> np.ndarray | None: +def cast_vector(value: str | None, cur: cursor) -> Vector | None: return Vector._from_db(value) diff --git a/pgvector/sqlalchemy/vector.py b/pgvector/sqlalchemy/vector.py index cf01c2f..14d66f3 100644 --- a/pgvector/sqlalchemy/vector.py +++ b/pgvector/sqlalchemy/vector.py @@ -1,4 +1,3 @@ -import numpy as np from sqlalchemy.dialects.postgresql.base import ischema_names from sqlalchemy.types import UserDefinedType, Float, String from sqlalchemy import Dialect, Operators @@ -32,7 +31,7 @@ def process(value: Any) -> Any: return process def result_processor(self, dialect: Dialect, coltype: Any) -> Any: - def process(value: Any) -> np.ndarray | None: + def process(value: Any) -> Vector | None: return Vector._from_db(value) return process diff --git a/pgvector/vector.py b/pgvector/vector.py index 1245442..525eace 100644 --- a/pgvector/vector.py +++ b/pgvector/vector.py @@ -82,15 +82,15 @@ def _to_db_binary(cls, value: object) -> bytes | None: return value.to_binary() @classmethod - def _from_db(cls, value: str | np.ndarray | None) -> np.ndarray | None: - if value is None or isinstance(value, np.ndarray): + def _from_db(cls, value: str | Vector | None) -> Vector | None: + if value is None or isinstance(value, Vector): return value - return cls.from_text(value).to_numpy().astype(np.float32) + return cls.from_text(value) @classmethod - def _from_db_binary(cls, value: bytes | np.ndarray | None) -> np.ndarray | None: - if value is None or isinstance(value, np.ndarray): + def _from_db_binary(cls, value: bytes | Vector | None) -> Vector | None: + if value is None or isinstance(value, Vector): return value - return cls.from_binary(value).to_numpy().astype(np.float32) + return cls.from_binary(value) diff --git a/tests/test_asyncpg.py b/tests/test_asyncpg.py index cabfc5a..45c8c5d 100644 --- a/tests/test_asyncpg.py +++ b/tests/test_asyncpg.py @@ -20,9 +20,8 @@ async def test_vector(self): await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2), (NULL)", embedding, embedding2) res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id") - assert np.array_equal(res[0]['embedding'], embedding.to_numpy()) - assert res[0]['embedding'].dtype == np.float32 - assert np.array_equal(res[1]['embedding'], embedding2) + assert res[0]['embedding'] == embedding + assert res[1]['embedding'] == Vector(embedding2) assert res[2]['embedding'] is None # ensures binary format is correct @@ -116,10 +115,8 @@ async def test_vector_array(self): await conn.execute("INSERT INTO asyncpg_items (embeddings) VALUES (ARRAY[$1, $2]::vector[])", embeddings2[0], embeddings2[1]) res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id") - assert np.array_equal(res[0]['embeddings'][0], embeddings[0].to_numpy()) - assert np.array_equal(res[0]['embeddings'][1], embeddings[1].to_numpy()) - assert np.array_equal(res[1]['embeddings'][0], embeddings2[0]) - assert np.array_equal(res[1]['embeddings'][1], embeddings2[1]) + assert res[0]['embeddings'] == embeddings + assert res[1]['embeddings'] == [Vector(e) for e in embeddings2] await conn.close() @@ -140,7 +137,6 @@ async def init(conn): await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2), (NULL)", embedding, embedding2) res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id") - assert np.array_equal(res[0]['embedding'], embedding.to_numpy()) - assert res[0]['embedding'].dtype == np.float32 - assert np.array_equal(res[1]['embedding'], embedding2) + assert res[0]['embedding'] == embedding + assert res[1]['embedding'] == Vector(embedding2) assert res[2]['embedding'] is None diff --git a/tests/test_django.py b/tests/test_django.py index 9e44401..fb37629 100644 --- a/tests/test_django.py +++ b/tests/test_django.py @@ -14,7 +14,7 @@ import numpy as np import os import pgvector.django -from pgvector import HalfVector, SparseVector +from pgvector import HalfVector, SparseVector, Vector from pgvector.django import VectorExtension, VectorField, HalfVectorField, BitField, SparseVectorField, IvfflatIndex, HnswIndex, L2Distance, MaxInnerProduct, CosineDistance, L1Distance, HammingDistance, JaccardDistance from unittest import mock @@ -167,12 +167,11 @@ def setup_method(self): def test_vector(self): Item(id=1, embedding=[1, 2, 3]).save() item = Item.objects.get(pk=1) - assert np.array_equal(item.embedding, [1, 2, 3]) - assert item.embedding.dtype == np.float32 + assert item.embedding == Vector([1, 2, 3]) def test_vector_l2_distance(self): create_items() - distance = L2Distance('embedding', [1, 1, 1]) + distance = L2Distance('embedding', Vector([1, 1, 1])) items = Item.objects.annotate(distance=distance).order_by(distance) assert [v.id for v in items] == [1, 3, 2] assert [v.distance for v in items] == [0, 1, sqrt(3)] @@ -295,7 +294,7 @@ def test_vector_avg(self): Item(embedding=[1, 2, 3]).save() Item(embedding=[4, 5, 6]).save() avg = Item.objects.aggregate(Avg('embedding'))['embedding__avg'] - assert np.array_equal(avg, [2.5, 3.5, 4.5]) + assert avg == Vector([2.5, 3.5, 4.5]) def test_vector_sum(self): sum = Item.objects.aggregate(Sum('embedding'))['embedding__sum'] @@ -303,7 +302,7 @@ def test_vector_sum(self): Item(embedding=[1, 2, 3]).save() Item(embedding=[4, 5, 6]).save() sum = Item.objects.aggregate(Sum('embedding'))['embedding__sum'] - assert np.array_equal(sum, [5, 7, 9]) + assert sum == Vector([5, 7, 9]) def test_halfvec_avg(self): avg = Item.objects.aggregate(Avg('half_embedding'))['half_embedding__avg'] @@ -349,7 +348,7 @@ def test_vector_form_save(self): assert form.has_changed() assert form.is_valid() assert form.save() - assert np.array_equal(Item.objects.get(pk=1).embedding, [4, 5, 6]) + assert Item.objects.get(pk=1).embedding == Vector([4, 5, 6]) def test_vector_form_save_missing(self): Item(id=1).save() @@ -467,8 +466,7 @@ def test_vector_array(self): # this fails if the driver does not cast arrays item = Item.objects.get(pk=1) - assert np.array_equal(item.embeddings[0], [1, 2, 3]) - assert np.array_equal(item.embeddings[1], [4, 5, 6]) + assert item.embeddings == [Vector([1, 2, 3]), Vector([4, 5, 6])] def test_double_array(self): Item(id=1, double_embedding=[1, 1, 1]).save() diff --git a/tests/test_peewee.py b/tests/test_peewee.py index 64fc009..826608a 100644 --- a/tests/test_peewee.py +++ b/tests/test_peewee.py @@ -1,7 +1,7 @@ from math import sqrt import numpy as np from peewee import Model, PostgresqlDatabase, fn -from pgvector import HalfVector, SparseVector +from pgvector import HalfVector, SparseVector, Vector from pgvector.peewee import VectorField, HalfVectorField, FixedBitField, SparseVectorField db = PostgresqlDatabase('pgvector_python_test') @@ -43,8 +43,7 @@ def setup_method(self): def test_vector(self): Item.create(id=1, embedding=[1, 2, 3]) item = Item.get_by_id(1) - assert np.array_equal(item.embedding, [1, 2, 3]) - assert item.embedding.dtype == np.float32 + assert item.embedding == Vector([1, 2, 3]) def test_vector_l2_distance(self): create_items() @@ -170,7 +169,7 @@ def test_vector_avg(self): Item.create(embedding=[1, 2, 3]) Item.create(embedding=[4, 5, 6]) avg = Item.select(fn.avg(Item.embedding).coerce(True)).scalar() - assert np.array_equal(avg, [2.5, 3.5, 4.5]) + assert avg == Vector([2.5, 3.5, 4.5]) def test_vector_sum(self): sum = Item.select(fn.sum(Item.embedding).coerce(True)).scalar() @@ -178,7 +177,7 @@ def test_vector_sum(self): Item.create(embedding=[1, 2, 3]) Item.create(embedding=[4, 5, 6]) sum = Item.select(fn.sum(Item.embedding).coerce(True)).scalar() - assert np.array_equal(sum, [5, 7, 9]) + assert sum == Vector([5, 7, 9]) def test_halfvec_avg(self): avg = Item.select(fn.avg(Item.half_embedding).coerce(True)).scalar() diff --git a/tests/test_pg8000.py b/tests/test_pg8000.py index 61fbc4c..2e4e884 100644 --- a/tests/test_pg8000.py +++ b/tests/test_pg8000.py @@ -22,8 +22,7 @@ def test_vector(self): conn.run('INSERT INTO pg8000_items (embedding) VALUES (:embedding), (NULL)', embedding=embedding) res = conn.run('SELECT embedding FROM pg8000_items ORDER BY id') - assert np.array_equal(res[0][0], embedding) - assert res[0][0].dtype == np.float32 + assert res[0][0] == Vector([1.5, 2, 3]) assert res[1][0] is None def test_vector_class(self): @@ -31,8 +30,7 @@ def test_vector_class(self): conn.run('INSERT INTO pg8000_items (embedding) VALUES (:embedding), (NULL)', embedding=embedding) res = conn.run('SELECT embedding FROM pg8000_items ORDER BY id') - assert np.array_equal(res[0][0], embedding.to_numpy()) - assert res[0][0].dtype == np.float32 + assert res[0][0] == embedding assert res[1][0] is None def test_halfvec(self): diff --git a/tests/test_psycopg.py b/tests/test_psycopg.py index fb6aa4c..71eff2b 100644 --- a/tests/test_psycopg.py +++ b/tests/test_psycopg.py @@ -23,19 +23,18 @@ def test_vector(self): conn.execute('INSERT INTO psycopg_items (embedding) VALUES (%s), (NULL)', (embedding,)) res = conn.execute('SELECT embedding FROM psycopg_items ORDER BY id').fetchall() - assert np.array_equal(res[0][0], embedding) - assert res[0][0].dtype == np.float32 + assert res[0][0] == Vector(embedding) assert res[1][0] is None def test_vector_binary_format(self): embedding = np.array([1.5, 2, 3]) res = next(conn.execute('SELECT %b::vector', (embedding,), binary=True))[0] - assert np.array_equal(res, embedding) + assert res == Vector(embedding) def test_vector_text_format(self): embedding = np.array([1.5, 2, 3]) res = next(conn.execute('SELECT %t::vector', (embedding,)))[0] - assert np.array_equal(res, embedding) + assert res == Vector(embedding) def test_vector_binary_format_correct(self): embedding = np.array([1.5, 2, 3]) @@ -46,23 +45,23 @@ def test_vector_text_format_non_contiguous(self): embedding = np.flipud(np.array([1.5, 2, 3])) assert not embedding.data.contiguous res = next(conn.execute('SELECT %t::vector', (embedding,)))[0] - assert np.array_equal(res, [3, 2, 1.5]) + assert res == Vector([3, 2, 1.5]) def test_vector_binary_format_non_contiguous(self): embedding = np.flipud(np.array([1.5, 2, 3])) assert not embedding.data.contiguous res = next(conn.execute('SELECT %b::vector', (embedding,)))[0] - assert np.array_equal(res, [3, 2, 1.5]) + assert res == Vector([3, 2, 1.5]) def test_vector_class_binary_format(self): embedding = Vector([1.5, 2, 3]) res = next(conn.execute('SELECT %b::vector', (embedding,), binary=True))[0] - assert np.array_equal(res, [1.5, 2, 3]) + assert res == embedding def test_vector_class_text_format(self): embedding = Vector([1.5, 2, 3]) res = next(conn.execute('SELECT %t::vector', (embedding,)))[0] - assert np.array_equal(res, [1.5, 2, 3]) + assert res == embedding def test_halfvec(self): embedding = HalfVector([1.5, 2, 3]) @@ -146,33 +145,33 @@ def test_text_copy_to(self): assert row[1] == "[1.5,2,3]" def test_binary_copy_to(self): - embedding = np.array([1.5, 2, 3]) + embedding = Vector([1.5, 2, 3]) half_embedding = HalfVector([1.5, 2, 3]) conn.execute('INSERT INTO psycopg_items (embedding, half_embedding) VALUES (%s, %s)', (embedding, half_embedding)) cur = conn.cursor() with cur.copy("COPY psycopg_items (embedding, half_embedding) TO STDOUT WITH (FORMAT BINARY)") as copy: for row in copy.rows(): - assert np.array_equal(Vector.from_binary(row[0]).to_numpy(), embedding) + assert Vector.from_binary(row[0]) == embedding assert HalfVector.from_binary(row[1]) == half_embedding def test_binary_copy_to_set_types(self): - embedding = np.array([1.5, 2, 3]) + embedding = Vector([1.5, 2, 3]) half_embedding = HalfVector([1.5, 2, 3]) conn.execute('INSERT INTO psycopg_items (embedding, half_embedding) VALUES (%s, %s)', (embedding, half_embedding)) cur = conn.cursor() with cur.copy("COPY psycopg_items (embedding, half_embedding) TO STDOUT WITH (FORMAT BINARY)") as copy: copy.set_types(['vector', 'halfvec']) for row in copy.rows(): - assert np.array_equal(row[0], embedding) + assert row[0] == embedding assert row[1] == half_embedding def test_vector_array(self): - embeddings = [np.array([1.5, 2, 3]), np.array([4.5, 5, 6])] + embeddings = [Vector([1.5, 2, 3]), Vector([4.5, 5, 6])] conn.execute('INSERT INTO psycopg_items (embeddings) VALUES (%s)', (embeddings,)) res = next(conn.execute('SELECT embeddings FROM psycopg_items ORDER BY id')) - assert np.array_equal(res[0][0], embeddings[0]) - assert np.array_equal(res[0][1], embeddings[1]) + assert res[0][0] == embeddings[0] + assert res[0][1] == embeddings[1] def test_pool(self): def configure(conn): @@ -182,7 +181,7 @@ def configure(conn): with pool.connection() as conn: res = conn.execute("SELECT '[1,2,3]'::vector").fetchone() - assert np.array_equal(res[0], [1, 2, 3]) + assert res[0] == Vector([1, 2, 3]) pool.close() @@ -196,14 +195,13 @@ async def test_async(self): await register_vector_async(conn) - embedding = np.array([1.5, 2, 3]) + embedding = Vector([1.5, 2, 3]) await conn.execute('INSERT INTO psycopg_items (embedding) VALUES (%s), (NULL)', (embedding,)) async with conn.cursor() as cur: await cur.execute('SELECT * FROM psycopg_items ORDER BY id') res = await cur.fetchall() - assert np.array_equal(res[0][1], embedding) - assert res[0][1].dtype == np.float32 + assert res[0][1] == embedding assert res[1][1] is None @pytest.mark.asyncio @@ -218,6 +216,6 @@ async def configure(conn): async with conn.cursor() as cur: await cur.execute("SELECT '[1,2,3]'::vector") res = await cur.fetchone() - assert np.array_equal(res[0], [1, 2, 3]) + assert res[0] == Vector([1, 2, 3]) await pool.close() diff --git a/tests/test_psycopg2.py b/tests/test_psycopg2.py index 7f4932d..7029962 100644 --- a/tests/test_psycopg2.py +++ b/tests/test_psycopg2.py @@ -26,8 +26,7 @@ def test_vector(self): cur.execute('SELECT embedding FROM psycopg2_items ORDER BY id') res = cur.fetchall() - assert np.array_equal(res[0][0], embedding) - assert res[0][0].dtype == np.float32 + assert res[0][0] == Vector(embedding) assert res[1][0] is None def test_vector_class(self): @@ -36,17 +35,16 @@ def test_vector_class(self): cur.execute('SELECT embedding FROM psycopg2_items ORDER BY id') res = cur.fetchall() - assert np.array_equal(res[0][0], embedding.to_numpy()) - assert res[0][0].dtype == np.float32 + assert res[0][0] == embedding assert res[1][0] is None def test_halfvec(self): - embedding = [1.5, 2, 3] + embedding = HalfVector([1.5, 2, 3]) cur.execute('INSERT INTO psycopg2_items (half_embedding) VALUES (%s), (NULL)', (embedding,)) cur.execute('SELECT half_embedding FROM psycopg2_items ORDER BY id') res = cur.fetchall() - assert res[0][0] == HalfVector([1.5, 2, 3]) + assert res[0][0] == embedding assert res[1][0] is None def test_halfvec_class(self): @@ -64,7 +62,7 @@ def test_bit(self): cur.execute('SELECT binary_embedding FROM psycopg2_items ORDER BY id') res = cur.fetchall() - assert res[0][0] == '101' + assert res[0][0] == embedding assert res[1][0] is None def test_sparsevec(self): @@ -73,17 +71,17 @@ def test_sparsevec(self): cur.execute('SELECT sparse_embedding FROM psycopg2_items ORDER BY id') res = cur.fetchall() - assert res[0][0] == SparseVector([1.5, 2, 3]) + assert res[0][0] == embedding assert res[1][0] is None def test_vector_array(self): - embeddings = [np.array([1.5, 2, 3]), np.array([4.5, 5, 6])] + embeddings = [Vector([1.5, 2, 3]), Vector([4.5, 5, 6])] cur.execute('INSERT INTO psycopg2_items (embeddings) VALUES (%s::vector[])', (embeddings,)) cur.execute('SELECT embeddings FROM psycopg2_items ORDER BY id') res = cur.fetchone() - assert np.array_equal(res[0][0], embeddings[0]) - assert np.array_equal(res[0][1], embeddings[1]) + assert res[0][0] == embeddings[0] + assert res[0][1] == embeddings[1] def test_halfvec_array(self): embeddings = [HalfVector([1.5, 2, 3]), HalfVector([4.5, 5, 6])] @@ -129,7 +127,7 @@ def test_pool(self): cur = conn.cursor() cur.execute("SELECT '[1,2,3]'::vector") res = cur.fetchone() - assert np.array_equal(res[0], [1, 2, 3]) + assert res[0] == Vector([1, 2, 3]) finally: pool.putconn(conn) diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index dbb6f12..f4ec787 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -182,10 +182,8 @@ def test_orm(self, engine): assert items[0].id % 3 == 1 assert items[1].id % 3 == 2 assert items[2].id % 3 == 0 - assert np.array_equal(items[0].embedding, np.array([1.5, 2, 3])) - assert items[0].embedding.dtype == np.float32 - assert np.array_equal(items[1].embedding, np.array([4, 5, 6])) - assert items[1].embedding.dtype == np.float32 + assert items[0].embedding == Vector([1.5, 2, 3]) + assert items[1].embedding == Vector([4, 5, 6]) assert items[2].embedding is None def test_vector(self, engine): @@ -193,7 +191,7 @@ def test_vector(self, engine): session.add(Item(id=1, embedding=[1, 2, 3])) session.commit() item = session.get_one(Item, 1) - assert np.array_equal(item.embedding, [1, 2, 3]) + assert item.embedding == Vector([1, 2, 3]) def test_vector_l2_distance(self, engine): create_items() @@ -435,7 +433,7 @@ def test_avg(self, engine): session.add(Item(embedding=[1, 2, 3])) session.add(Item(embedding=[4, 5, 6])) res = session.query(avg(Item.embedding)).one()[0] - assert np.array_equal(res, np.array([2.5, 3.5, 4.5])) + assert res == Vector([2.5, 3.5, 4.5]) def test_avg_orm(self, engine): with Session(engine) as session: @@ -444,7 +442,7 @@ def test_avg_orm(self, engine): session.add(Item(embedding=[1, 2, 3])) session.add(Item(embedding=[4, 5, 6])) res = session.scalars(select(avg(Item.embedding))).one() - assert np.array_equal(res, np.array([2.5, 3.5, 4.5])) + assert res == Vector([2.5, 3.5, 4.5]) def test_sum(self, engine): with Session(engine) as session: @@ -453,7 +451,7 @@ def test_sum(self, engine): session.add(Item(embedding=[1, 2, 3])) session.add(Item(embedding=[4, 5, 6])) res = session.query(sum(Item.embedding)).one()[0] - assert np.array_equal(res, np.array([5, 7, 9])) + assert res == Vector([5, 7, 9]) def test_sum_orm(self, engine): with Session(engine) as session: @@ -462,7 +460,7 @@ def test_sum_orm(self, engine): session.add(Item(embedding=[1, 2, 3])) session.add(Item(embedding=[4, 5, 6])) res = session.scalars(select(sum(Item.embedding))).one() - assert np.array_equal(res, np.array([5, 7, 9])) + assert res == Vector([5, 7, 9]) def test_bad_dimensions(self, engine): item = Item(embedding=[1, 2]) @@ -515,7 +513,7 @@ def test_automap(self, engine): with Session(engine) as session: session.execute(insert(AutoItem), [{'embedding': np.array([1, 2, 3])}]) item = session.query(AutoItem).first() - assert np.array_equal(item.embedding, [1, 2, 3]) + assert item.embedding == Vector([1, 2, 3]) def test_half_precision(self, engine): create_items() @@ -563,8 +561,7 @@ def test_vector_array(self, engine): # this fails if the driver does not cast arrays item = session.get_one(Item, 1) - assert np.array_equal(item.embeddings[0], [1, 2, 3]) - assert np.array_equal(item.embeddings[1], [4, 5, 6]) + assert item.embeddings == [Vector([1, 2, 3]), Vector([4, 5, 6])] def test_halfvec_array(self, engine): with Session(engine) as session: @@ -587,10 +584,10 @@ async def test_vector(self, engine): async with async_session() as session: async with session.begin(): - embedding = np.array([1, 2, 3]) + embedding = Vector([1, 2, 3]) session.add(Item(id=1, embedding=embedding)) item = await session.get_one(Item, 1) - assert np.array_equal(item.embedding, embedding) + assert item.embedding == embedding await engine.dispose() @@ -600,10 +597,10 @@ async def test_halfvec(self, engine): async with async_session() as session: async with session.begin(): - embedding = [1, 2, 3] + embedding = HalfVector([1, 2, 3]) session.add(Item(id=1, half_embedding=embedding)) item = await session.get_one(Item, 1) - assert item.half_embedding == HalfVector(embedding) # ty: ignore[invalid-argument-type] + assert item.half_embedding == embedding await engine.dispose() @@ -631,10 +628,10 @@ async def test_sparsevec(self, engine): async with async_session() as session: async with session.begin(): - embedding = [1, 2, 3] + embedding = SparseVector([1, 2, 3]) session.add(Item(id=1, sparse_embedding=embedding)) item = await session.get_one(Item, 1) - assert item.sparse_embedding == SparseVector(embedding) + assert item.sparse_embedding == embedding await engine.dispose() @@ -647,7 +644,7 @@ async def test_avg(self, engine): session.add(Item(embedding=[1, 2, 3])) session.add(Item(embedding=[4, 5, 6])) res = await session.scalars(select(avg(Item.embedding))) - assert np.array_equal(res.one(), [2.5, 3.5, 4.5]) + assert res.first() == Vector([2.5, 3.5, 4.5]) await engine.dispose() @@ -665,12 +662,10 @@ async def test_vector_array(self, engine): async with session.begin(): session.add(Item(id=1, embeddings=[Vector([1, 2, 3]), Vector([4, 5, 6])])) item = await session.get_one(Item, 1) - assert np.array_equal(item.embeddings[0], [1, 2, 3]) - assert np.array_equal(item.embeddings[1], [4, 5, 6]) + assert item.embeddings == [Vector([1, 2, 3]), Vector([4, 5, 6])] session.add(Item(id=2, embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])])) item = await session.get_one(Item, 2) - assert np.array_equal(item.embeddings[0], [1, 2, 3]) - assert np.array_equal(item.embeddings[1], [4, 5, 6]) + assert item.embeddings == [Vector([1, 2, 3]), Vector([4, 5, 6])] await engine.dispose() diff --git a/tests/test_sqlmodel.py b/tests/test_sqlmodel.py index d67c735..4bb9d97 100644 --- a/tests/test_sqlmodel.py +++ b/tests/test_sqlmodel.py @@ -1,5 +1,5 @@ import numpy as np -from pgvector import HalfVector, SparseVector +from pgvector import HalfVector, SparseVector, Vector from pgvector.sqlalchemy import VECTOR, HALFVEC, BIT, SPARSEVEC, avg, sum import pytest from sqlalchemy.exc import StatementError @@ -65,10 +65,8 @@ def test_orm(self): assert items[0].id == 1 assert items[1].id == 2 assert items[2].id == 3 - assert np.array_equal(items[0].embedding, np.array([1.5, 2, 3])) # type: ignore - assert items[0].embedding.dtype == np.float32 # type: ignore - assert np.array_equal(items[1].embedding, np.array([4, 5, 6])) # type: ignore - assert items[1].embedding.dtype == np.float32 # type: ignore + assert items[0].embedding == Vector([1.5, 2, 3]) + assert items[1].embedding == Vector([4, 5, 6]) assert items[2].embedding is None def test_vector(self): @@ -76,7 +74,7 @@ def test_vector(self): session.add(Item(id=1, embedding=[1, 2, 3])) session.commit() item = session.get_one(Item, 1) - assert np.array_equal(item.embedding, np.array([1, 2, 3])) # type: ignore + assert item.embedding == Vector([1, 2, 3]) def test_vector_l2_distance(self): create_items() @@ -202,7 +200,7 @@ def test_vector_avg(self): session.add(Item(embedding=[1, 2, 3])) session.add(Item(embedding=[4, 5, 6])) res = session.exec(select(avg(Item.embedding))).first() - assert np.array_equal(res, np.array([2.5, 3.5, 4.5])) # type: ignore + assert res == Vector([2.5, 3.5, 4.5]) def test_vector_sum(self): with Session(engine) as session: @@ -211,7 +209,7 @@ def test_vector_sum(self): session.add(Item(embedding=[1, 2, 3])) session.add(Item(embedding=[4, 5, 6])) res = session.exec(select(sum(Item.embedding))).first() - assert np.array_equal(res, np.array([5, 7, 9])) # type: ignore + assert res == Vector([5, 7, 9]) def test_halfvec_avg(self): with Session(engine) as session: From 2155f99e7c1b747763461902e257e4fdf710f0e3 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 16:26:40 -0700 Subject: [PATCH 076/121] Improved naming [skip ci] --- pgvector/bit.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pgvector/bit.py b/pgvector/bit.py index 8e92653..75a51e1 100644 --- a/pgvector/bit.py +++ b/pgvector/bit.py @@ -7,8 +7,8 @@ class Bit: def __init__(self, value: bytes | str | list[bool] | np.ndarray[tuple[int], np.dtype[np.bool | np.uint8]]) -> None: if isinstance(value, bytes): - _len = 8 * len(value) - _data = value + length = 8 * len(value) + data = value else: if isinstance(value, str): value = [v != '0' for v in value] @@ -30,10 +30,10 @@ def __init__(self, value: bytes | str | list[bool] | np.ndarray[tuple[int], np.d if value.ndim != 1: raise ValueError('expected ndim to be 1') - _len = len(value) - _data = np.packbits(value).tobytes() + length = len(value) + data = np.packbits(value).tobytes() - self._value = pack('>i', _len) + _data + self._value = pack('>i', length) + data def __repr__(self) -> str: return f'Bit({self.to_text()})' @@ -43,18 +43,18 @@ def __eq__(self, other: object) -> bool: return self.to_binary() == other.to_binary() return False - def _len(self): - _len, = unpack_from('>i', self._value) - return _len + def _length(self): + length, = unpack_from('>i', self._value) + return length def to_list(self) -> list[bool]: return self.to_numpy().tolist() def to_numpy(self) -> np.ndarray[tuple[int], np.dtype[np.bool]]: - return np.unpackbits(np.frombuffer(self._value[4:], dtype=np.uint8), count=self._len()).astype(bool) + return np.unpackbits(np.frombuffer(self._value[4:], dtype=np.uint8), count=self._length()).astype(bool) def to_text(self) -> str: - return ''.join(format(v, '08b') for v in self._value[4:])[:self._len()] + return ''.join(format(v, '08b') for v in self._value[4:])[:self._length()] def to_binary(self) -> bytes: return self._value From 057740bc1b24d9835116841485d2abc4ec0e377a Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 17:24:02 -0700 Subject: [PATCH 077/121] Reduced dependency on NumPy [skip ci] --- pgvector/pg8000/register.py | 7 +++++-- pgvector/psycopg/vector.py | 1 - pgvector/psycopg2/vector.py | 7 +++++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/pgvector/pg8000/register.py b/pgvector/pg8000/register.py index 476495b..e26b083 100644 --- a/pgvector/pg8000/register.py +++ b/pgvector/pg8000/register.py @@ -1,4 +1,3 @@ -import numpy as np from pg8000.native import Connection from .. import Vector, HalfVector, SparseVector @@ -12,7 +11,11 @@ def register_vector(conn: Connection) -> None: raise RuntimeError('vector type not found in the database') conn.register_out_adapter(Vector, Vector._to_db) - conn.register_out_adapter(np.ndarray, Vector._to_db) + try: + import numpy as np + conn.register_out_adapter(np.ndarray, Vector._to_db) + except ImportError: + pass conn.register_in_adapter(type_info['vector'], Vector._from_db) if 'halfvec' in type_info: diff --git a/pgvector/psycopg/vector.py b/pgvector/psycopg/vector.py index dcdc770..cf1694c 100644 --- a/pgvector/psycopg/vector.py +++ b/pgvector/psycopg/vector.py @@ -1,4 +1,3 @@ -import numpy as np import psycopg from psycopg import BaseConnection from psycopg.adapt import Loader, Dumper diff --git a/pgvector/psycopg2/vector.py b/pgvector/psycopg2/vector.py index ff5ad99..9ad5902 100644 --- a/pgvector/psycopg2/vector.py +++ b/pgvector/psycopg2/vector.py @@ -1,4 +1,3 @@ -import numpy as np from psycopg2.extensions import adapt, connection, cursor, new_array_type, new_type, register_adapter, register_type from .. import Vector @@ -23,5 +22,9 @@ def register_vector_info(oid: int, array_oid: int | None, scope: connection | cu vectorarray = new_array_type((array_oid,), 'VECTORARRAY', vector) register_type(vectorarray, scope) - register_adapter(np.ndarray, VectorAdapter) register_adapter(Vector, VectorAdapter) + try: + import numpy as np + register_adapter(np.ndarray, VectorAdapter) + except ImportError: + pass From d13c5dd2d9e638cefc8c0154cb7d83582ac180d3 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 18:08:59 -0700 Subject: [PATCH 078/121] Added test for Bit [skip ci] --- tests/test_bit.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_bit.py b/tests/test_bit.py index 8663f36..d523619 100644 --- a/tests/test_bit.py +++ b/tests/test_bit.py @@ -18,6 +18,10 @@ def test_list_int(self): def test_str(self): assert Bit('101').to_list() == [True, False, True] + def test_str_two(self): + # TODO raise + assert Bit('201').to_list() == [True, False, True] + def test_bytes(self): assert Bit(b'\xff\x00\xf0').to_text() == '111111110000000011110000' assert Bit(b'\xfe\x07\x00').to_text() == '111111100000011100000000' From f770ccffe6f2c4e3fe47fb50a51f1d613c03db75 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 18:19:42 -0700 Subject: [PATCH 079/121] Reduced dependency on NumPy [skip ci] --- pgvector/bit.py | 14 +++++++++++--- tests/test_bit.py | 5 +++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/pgvector/bit.py b/pgvector/bit.py index 75a51e1..62418cf 100644 --- a/pgvector/bit.py +++ b/pgvector/bit.py @@ -11,7 +11,15 @@ def __init__(self, value: bytes | str | list[bool] | np.ndarray[tuple[int], np.d data = value else: if isinstance(value, str): - value = [v != '0' for v in value] + length = len(value) + + if length % 8 != 0: + value += '0'*(8 - (length % 8)) + + try: + data = int(value, 2).to_bytes(len(value) // 8, byteorder='big') + except ValueError: + raise ValueError('expected bit string') else: value = np.asarray(value) @@ -30,8 +38,8 @@ def __init__(self, value: bytes | str | list[bool] | np.ndarray[tuple[int], np.d if value.ndim != 1: raise ValueError('expected ndim to be 1') - length = len(value) - data = np.packbits(value).tobytes() + length = len(value) + data = np.packbits(value).tobytes() self._value = pack('>i', length) + data diff --git a/tests/test_bit.py b/tests/test_bit.py index d523619..ed7d532 100644 --- a/tests/test_bit.py +++ b/tests/test_bit.py @@ -19,8 +19,9 @@ def test_str(self): assert Bit('101').to_list() == [True, False, True] def test_str_two(self): - # TODO raise - assert Bit('201').to_list() == [True, False, True] + with pytest.raises(ValueError) as error: + Bit('201') + assert str(error.value) == 'expected bit string' def test_bytes(self): assert Bit(b'\xff\x00\xf0').to_text() == '111111110000000011110000' From e065b87f8c6de7dc15e57409d434f4d93af074c8 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 18:20:51 -0700 Subject: [PATCH 080/121] Fixed lint [skip ci] --- pgvector/bit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pgvector/bit.py b/pgvector/bit.py index 62418cf..6a980d5 100644 --- a/pgvector/bit.py +++ b/pgvector/bit.py @@ -14,7 +14,7 @@ def __init__(self, value: bytes | str | list[bool] | np.ndarray[tuple[int], np.d length = len(value) if length % 8 != 0: - value += '0'*(8 - (length % 8)) + value += '0' * (8 - (length % 8)) try: data = int(value, 2).to_bytes(len(value) // 8, byteorder='big') From 5bc91283168a15126b29e0944993992765978901 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 18:33:02 -0700 Subject: [PATCH 081/121] Improved errors for Bit and reduced dependency on NumPy [skip ci] --- pgvector/bit.py | 19 +++++++++++++------ tests/test_bit.py | 14 ++++++++------ 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/pgvector/bit.py b/pgvector/bit.py index 6a980d5..0b0243a 100644 --- a/pgvector/bit.py +++ b/pgvector/bit.py @@ -10,6 +10,16 @@ def __init__(self, value: bytes | str | list[bool] | np.ndarray[tuple[int], np.d length = 8 * len(value) data = value else: + if isinstance(value, list): + def bit_value(v: bool) -> str: + if v is True: + return '1' + if v is False: + return '0' + raise ValueError('expected list[bool]') + + value = ''.join([bit_value(v) for v in value]) + if isinstance(value, str): length = len(value) @@ -20,12 +30,7 @@ def __init__(self, value: bytes | str | list[bool] | np.ndarray[tuple[int], np.d data = int(value, 2).to_bytes(len(value) // 8, byteorder='big') except ValueError: raise ValueError('expected bit string') - else: - value = np.asarray(value) - - # for mypy - assert isinstance(value, np.ndarray) - + elif isinstance(value, np.ndarray): if value.dtype != np.bool: # skip warning for result of np.unpackbits if value.dtype != np.uint8 or np.any(value > 1): @@ -40,6 +45,8 @@ def __init__(self, value: bytes | str | list[bool] | np.ndarray[tuple[int], np.d length = len(value) data = np.packbits(value).tobytes() + else: + raise ValueError('expected bytes, str, list, or ndarray') self._value = pack('>i', length) + data diff --git a/tests/test_bit.py b/tests/test_bit.py index ed7d532..4eaf103 100644 --- a/tests/test_bit.py +++ b/tests/test_bit.py @@ -8,12 +8,14 @@ def test_list(self): assert Bit([True, False, True]).to_list() == [True, False, True] def test_list_none(self): - with pytest.warns(UserWarning, match='expected elements to be boolean'): - assert Bit([True, None, True]).to_text() == '101' # ty: ignore[invalid-argument-type] + with pytest.raises(ValueError) as error: + Bit([True, None, True]) # ty: ignore[invalid-argument-type] + assert str(error.value) == 'expected list[bool]' def test_list_int(self): - with pytest.warns(UserWarning, match='expected elements to be boolean'): - assert Bit([254, 7, 0]).to_text() == '110' # ty: ignore[invalid-argument-type] + with pytest.raises(ValueError) as error: + Bit([254, 7, 0]) # ty: ignore[invalid-argument-type] + assert str(error.value) == 'expected list[bool]' def test_str(self): assert Bit('101').to_list() == [True, False, True] @@ -49,12 +51,12 @@ def test_ndarray_uint16(self): def test_ndim_two(self): with pytest.raises(ValueError) as error: Bit([[True, False], [True, False]]) # ty: ignore[invalid-argument-type] - assert str(error.value) == 'expected ndim to be 1' + assert str(error.value) == 'expected list[bool]' def test_ndim_zero(self): with pytest.raises(ValueError) as error: Bit(True) # ty: ignore[invalid-argument-type] - assert str(error.value) == 'expected ndim to be 1' + assert str(error.value) == 'expected bytes, str, list, or ndarray' def test_repr(self): assert repr(Bit([True, False, True])) == 'Bit(101)' From 44d3e2eb454d0b6ec1aaa1da4edd3137a4229d03 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 19:11:18 -0700 Subject: [PATCH 082/121] Removed dependency on NumPy - closes #146 --- CHANGELOG.md | 1 + pgvector/bit.py | 11 +++++-- pgvector/halfvec.py | 8 +++-- pgvector/sparsevec.py | 6 +++- pgvector/vector.py | 8 +++-- pyproject.toml | 6 ++-- tests/test_asyncpg.py | 7 ++-- tests/test_bit.py | 10 +++++- tests/test_django.py | 8 +++-- tests/test_half_vector.py | 14 ++++++-- tests/test_peewee.py | 3 +- tests/test_pg8000.py | 18 +++++++---- tests/test_psycopg.py | 64 +++++++++++++++++++++---------------- tests/test_psycopg2.py | 27 +++++++--------- tests/test_sparse_vector.py | 29 ++++++++++++++--- tests/test_sqlalchemy.py | 27 ++++++++++------ tests/test_sqlmodel.py | 6 +++- tests/test_vector.py | 14 ++++++-- 18 files changed, 177 insertions(+), 90 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 760f1e4..44b66b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ - Changed `vector` type to return `Vector` object instead of NumPy array - Removed `utils` package (use top-level `pgvector` package instead) - Removed re-exported classes (use top-level `pgvector` package instead) +- Removed dependency on NumPy - Dropped support for Python < 3.10 - Dropped support for SQLAlchemy < 2 diff --git a/pgvector/bit.py b/pgvector/bit.py index 0b0243a..c8925f0 100644 --- a/pgvector/bit.py +++ b/pgvector/bit.py @@ -1,8 +1,12 @@ from __future__ import annotations -import numpy as np from struct import pack, unpack_from from warnings import warn +try: + import numpy as np +except ImportError: + np = None + class Bit: def __init__(self, value: bytes | str | list[bool] | np.ndarray[tuple[int], np.dtype[np.bool | np.uint8]]) -> None: @@ -30,7 +34,7 @@ def bit_value(v: bool) -> str: data = int(value, 2).to_bytes(len(value) // 8, byteorder='big') except ValueError: raise ValueError('expected bit string') - elif isinstance(value, np.ndarray): + elif np is not None and isinstance(value, np.ndarray): if value.dtype != np.bool: # skip warning for result of np.unpackbits if value.dtype != np.uint8 or np.any(value > 1): @@ -63,7 +67,8 @@ def _length(self): return length def to_list(self) -> list[bool]: - return self.to_numpy().tolist() + # TODO improve + return [v != '0' for v in self.to_text()] def to_numpy(self) -> np.ndarray[tuple[int], np.dtype[np.bool]]: return np.unpackbits(np.frombuffer(self._value[4:], dtype=np.uint8), count=self._length()).astype(bool) diff --git a/pgvector/halfvec.py b/pgvector/halfvec.py index 85118ef..f317709 100644 --- a/pgvector/halfvec.py +++ b/pgvector/halfvec.py @@ -1,7 +1,11 @@ from __future__ import annotations -import numpy as np import struct +try: + import numpy as np +except ImportError: + np = None + class HalfVector: def __init__(self, value: list[float] | np.ndarray[tuple[int], np.dtype[np.floating]]) -> None: @@ -11,7 +15,7 @@ def __init__(self, value: list[float] | np.ndarray[tuple[int], np.dtype[np.float self._value = struct.pack(f'>HH{dim}e', dim, 0, *value) except struct.error: raise ValueError('expected list[float]') - elif isinstance(value, np.ndarray): + elif np is not None and isinstance(value, np.ndarray): if value.ndim != 1: raise ValueError('expected ndim to be 1') diff --git a/pgvector/sparsevec.py b/pgvector/sparsevec.py index 205b0b1..3a17b0f 100644 --- a/pgvector/sparsevec.py +++ b/pgvector/sparsevec.py @@ -1,8 +1,12 @@ from __future__ import annotations -import numpy as np from struct import pack, unpack_from from typing import Any, overload +try: + import numpy as np +except ImportError: + np = None + NO_DEFAULT = object() diff --git a/pgvector/vector.py b/pgvector/vector.py index 525eace..e6c6e60 100644 --- a/pgvector/vector.py +++ b/pgvector/vector.py @@ -1,7 +1,11 @@ from __future__ import annotations -import numpy as np import struct +try: + import numpy as np +except ImportError: + np = None + class Vector: def __init__(self, value: list[float] | np.ndarray[tuple[int], np.dtype[np.floating]]) -> None: @@ -11,7 +15,7 @@ def __init__(self, value: list[float] | np.ndarray[tuple[int], np.dtype[np.float self._value = struct.pack(f'>HH{dim}f', dim, 0, *value) except struct.error: raise ValueError('expected list[float]') - elif isinstance(value, np.ndarray): + elif np is not None and isinstance(value, np.ndarray): if value.ndim != 1: raise ValueError('expected ndim to be 1') diff --git a/pyproject.toml b/pyproject.toml index 3fb3ab5..5a7d99e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,6 @@ authors = [ license = "MIT" requires-python = ">= 3.10" dependencies = [ - "numpy" ] [project.urls] @@ -29,10 +28,13 @@ dev = [ "psycopg2-binary", "pytest", "pytest-asyncio", - "scipy", "SQLAlchemy[asyncio]>=2", "sqlmodel>=0.0.12" ] +dev-optional = [ + "numpy", + "scipy" +] [tool.pytest.ini_options] asyncio_mode = "auto" diff --git a/tests/test_asyncpg.py b/tests/test_asyncpg.py index 45c8c5d..b5a1484 100644 --- a/tests/test_asyncpg.py +++ b/tests/test_asyncpg.py @@ -1,5 +1,4 @@ import asyncpg -import numpy as np from pgvector import HalfVector, SparseVector, Vector from pgvector.asyncpg import register_vector import pytest @@ -16,7 +15,7 @@ async def test_vector(self): await register_vector(conn) embedding = Vector([1.5, 2, 3]) - embedding2 = np.array([4.5, 5, 6]) + embedding2 = [4.5, 5, 6] await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2), (NULL)", embedding, embedding2) res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id") @@ -111,7 +110,7 @@ async def test_vector_array(self): embeddings = [Vector([1.5, 2, 3]), Vector([4.5, 5, 6])] await conn.execute("INSERT INTO asyncpg_items (embeddings) VALUES ($1)", embeddings) - embeddings2 = [np.array([1.5, 2, 3]), np.array([4.5, 5, 6])] + embeddings2 = [[1.5, 2, 3], [4.5, 5, 6]] await conn.execute("INSERT INTO asyncpg_items (embeddings) VALUES (ARRAY[$1, $2]::vector[])", embeddings2[0], embeddings2[1]) res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id") @@ -133,7 +132,7 @@ async def init(conn): await conn.execute('CREATE TABLE asyncpg_items (id bigserial PRIMARY KEY, embedding vector(3))') embedding = Vector([1.5, 2, 3]) - embedding2 = np.array([1.5, 2, 3]) + embedding2 = [1.5, 2, 3] await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2), (NULL)", embedding, embedding2) res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id") diff --git a/tests/test_bit.py b/tests/test_bit.py index 4eaf103..2f5293d 100644 --- a/tests/test_bit.py +++ b/tests/test_bit.py @@ -1,7 +1,11 @@ -import numpy as np from pgvector import Bit import pytest +try: + import numpy as np +except ImportError: + np = None + class TestBit: def test_list(self): @@ -29,20 +33,24 @@ def test_bytes(self): assert Bit(b'\xff\x00\xf0').to_text() == '111111110000000011110000' assert Bit(b'\xfe\x07\x00').to_text() == '111111100000011100000000' + @pytest.mark.skipif(np is None, reason='NumPy required') def test_ndarray(self): arr = np.array([True, False, True]) assert Bit(arr).to_list() == [True, False, True] assert np.array_equal(Bit(arr).to_numpy(), arr) + @pytest.mark.skipif(np is None, reason='NumPy required') def test_ndarray_unpackbits(self): arr = np.unpackbits(np.array([254, 7, 0], dtype=np.uint8)) assert Bit(arr).to_text() == '111111100000011100000000' + @pytest.mark.skipif(np is None, reason='NumPy required') def test_ndarray_uint8(self): arr = np.array([254, 7, 0], dtype=np.uint8) with pytest.warns(UserWarning, match='expected elements to be boolean'): assert Bit(arr).to_text() == '110' + @pytest.mark.skipif(np is None, reason='NumPy required') def test_ndarray_uint16(self): arr = np.array([254, 7, 0], dtype=np.uint16) with pytest.warns(UserWarning, match='expected elements to be boolean'): diff --git a/tests/test_django.py b/tests/test_django.py index fb37629..37265ba 100644 --- a/tests/test_django.py +++ b/tests/test_django.py @@ -11,13 +11,17 @@ from django.db.migrations.loader import MigrationLoader from django.forms import ModelForm from math import sqrt -import numpy as np import os import pgvector.django from pgvector import HalfVector, SparseVector, Vector from pgvector.django import VectorExtension, VectorField, HalfVectorField, BitField, SparseVectorField, IvfflatIndex, HnswIndex, L2Distance, MaxInnerProduct, CosineDistance, L1Distance, HammingDistance, JaccardDistance from unittest import mock +try: + import numpy as np +except ImportError: + np = None + settings.configure( DATABASES={ 'default': { @@ -458,7 +462,7 @@ def test_missing(self): assert Item.objects.first().sparse_embedding is None def test_vector_array(self): - Item(id=1, embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])]).save() + Item(id=1, embeddings=[Vector([1, 2, 3]), Vector([4, 5, 6])]).save() with connection.cursor() as cursor: from pgvector.psycopg import register_vector diff --git a/tests/test_half_vector.py b/tests/test_half_vector.py index a5c303c..0080971 100644 --- a/tests/test_half_vector.py +++ b/tests/test_half_vector.py @@ -1,8 +1,12 @@ -import numpy as np from pgvector import HalfVector import pytest from struct import pack +try: + import numpy as np +except ImportError: + np = None + class TestHalfVector: def test_list(self): @@ -21,6 +25,7 @@ def test_list_list(self): HalfVector([[1, 2], [3, 4]]) # ty: ignore[invalid-argument-type] assert str(error.value) == 'expected list[float]' + @pytest.mark.skipif(np is None, reason='NumPy required') def test_ndarray(self): arr = np.array([1, 2, 3]) assert HalfVector(arr).to_list() == [1, 2, 3] @@ -42,6 +47,7 @@ def test_equality(self): def test_dimensions(self): assert HalfVector([1, 2, 3]).dimensions() == 3 + @pytest.mark.skipif(np is None, reason='NumPy required') def test_to_numpy_readonly(self): arr = HalfVector([1, 2, 3]).to_numpy() with pytest.raises(ValueError) as error: @@ -51,11 +57,13 @@ def test_to_numpy_readonly(self): def test_from_text(self): vec = HalfVector.from_text('[1.5,2,3]') assert vec.to_list() == [1.5, 2, 3] - assert np.array_equal(vec.to_numpy(), [1.5, 2, 3]) + if np is not None: + assert np.array_equal(vec.to_numpy(), [1.5, 2, 3]) def test_from_binary(self): data = pack('>HH3e', 3, 0, 1.5, 2, 3) vec = HalfVector.from_binary(data) assert vec.to_list() == [1.5, 2, 3] - assert np.array_equal(vec.to_numpy(), [1.5, 2, 3]) + if np is not None: + assert np.array_equal(vec.to_numpy(), [1.5, 2, 3]) assert vec.to_binary() == data diff --git a/tests/test_peewee.py b/tests/test_peewee.py index 826608a..095a5db 100644 --- a/tests/test_peewee.py +++ b/tests/test_peewee.py @@ -1,5 +1,4 @@ from math import sqrt -import numpy as np from peewee import Model, PostgresqlDatabase, fn from pgvector import HalfVector, SparseVector, Vector from pgvector.peewee import VectorField, HalfVectorField, FixedBitField, SparseVectorField @@ -197,7 +196,7 @@ def test_halfvec_sum(self): def test_get_or_create(self): Item.get_or_create(id=1, defaults={'embedding': [1, 2, 3]}) - Item.get_or_create(embedding=np.array([4, 5, 6])) + Item.get_or_create(embedding=Vector([4, 5, 6])) Item.get_or_create(embedding=Item.embedding.to_value([7, 8, 9])) def test_vector_array(self): diff --git a/tests/test_pg8000.py b/tests/test_pg8000.py index 2e4e884..8281ba5 100644 --- a/tests/test_pg8000.py +++ b/tests/test_pg8000.py @@ -1,8 +1,13 @@ from getpass import getuser -import numpy as np from pgvector import HalfVector, SparseVector, Vector from pgvector.pg8000 import register_vector from pg8000.native import Connection +import pytest + +try: + import numpy as np +except ImportError: + np = None conn = Connection(getuser(), database='pgvector_python_test') @@ -18,19 +23,20 @@ def setup_method(self): conn.run('DELETE FROM pg8000_items') def test_vector(self): - embedding = np.array([1.5, 2, 3]) + embedding = Vector([1.5, 2, 3]) conn.run('INSERT INTO pg8000_items (embedding) VALUES (:embedding), (NULL)', embedding=embedding) res = conn.run('SELECT embedding FROM pg8000_items ORDER BY id') - assert res[0][0] == Vector([1.5, 2, 3]) + assert res[0][0] == embedding assert res[1][0] is None - def test_vector_class(self): - embedding = Vector([1.5, 2, 3]) + @pytest.mark.skipif(np is None, reason='NumPy required') + def test_vector_numpy(self): + embedding = np.array([1.5, 2, 3]) conn.run('INSERT INTO pg8000_items (embedding) VALUES (:embedding), (NULL)', embedding=embedding) res = conn.run('SELECT embedding FROM pg8000_items ORDER BY id') - assert res[0][0] == embedding + assert res[0][0] == Vector([1.5, 2, 3]) assert res[1][0] is None def test_halfvec(self): diff --git a/tests/test_psycopg.py b/tests/test_psycopg.py index 71eff2b..7ca6257 100644 --- a/tests/test_psycopg.py +++ b/tests/test_psycopg.py @@ -1,10 +1,14 @@ -import numpy as np from pgvector import Bit, HalfVector, SparseVector, Vector from pgvector.psycopg import register_vector, register_vector_async import psycopg from psycopg_pool import ConnectionPool, AsyncConnectionPool import pytest +try: + import numpy as np +except ImportError: + np = None + conn = psycopg.connect(dbname='pgvector_python_test', autocommit=True) conn.execute('CREATE EXTENSION IF NOT EXISTS vector') @@ -19,49 +23,53 @@ def setup_method(self): conn.execute('DELETE FROM psycopg_items') def test_vector(self): - embedding = np.array([1.5, 2, 3]) + embedding = Vector([1.5, 2, 3]) conn.execute('INSERT INTO psycopg_items (embedding) VALUES (%s), (NULL)', (embedding,)) res = conn.execute('SELECT embedding FROM psycopg_items ORDER BY id').fetchall() - assert res[0][0] == Vector(embedding) + assert res[0][0] == embedding assert res[1][0] is None def test_vector_binary_format(self): - embedding = np.array([1.5, 2, 3]) + embedding = Vector([1.5, 2, 3]) res = next(conn.execute('SELECT %b::vector', (embedding,), binary=True))[0] - assert res == Vector(embedding) + assert res == embedding def test_vector_text_format(self): - embedding = np.array([1.5, 2, 3]) + embedding = Vector([1.5, 2, 3]) res = next(conn.execute('SELECT %t::vector', (embedding,)))[0] - assert res == Vector(embedding) + assert res == embedding def test_vector_binary_format_correct(self): - embedding = np.array([1.5, 2, 3]) + embedding = Vector([1.5, 2, 3]) res = next(conn.execute('SELECT %b::vector::text', (embedding,)))[0] assert res == '[1.5,2,3]' - def test_vector_text_format_non_contiguous(self): - embedding = np.flipud(np.array([1.5, 2, 3])) - assert not embedding.data.contiguous + @pytest.mark.skipif(np is None, reason='NumPy required') + def test_vector_numpy_binary_format(self): + embedding = np.array([1.5, 2, 3]) + res = next(conn.execute('SELECT %b::vector', (embedding,), binary=True))[0] + assert res == Vector(embedding) + + @pytest.mark.skipif(np is None, reason='NumPy required') + def test_vector_numpy_text_format(self): + embedding = np.array([1.5, 2, 3]) res = next(conn.execute('SELECT %t::vector', (embedding,)))[0] - assert res == Vector([3, 2, 1.5]) + assert res == Vector(embedding) - def test_vector_binary_format_non_contiguous(self): + @pytest.mark.skipif(np is None, reason='NumPy required') + def test_vector_numpy_binary_format_non_contiguous(self): embedding = np.flipud(np.array([1.5, 2, 3])) assert not embedding.data.contiguous res = next(conn.execute('SELECT %b::vector', (embedding,)))[0] assert res == Vector([3, 2, 1.5]) - def test_vector_class_binary_format(self): - embedding = Vector([1.5, 2, 3]) - res = next(conn.execute('SELECT %b::vector', (embedding,), binary=True))[0] - assert res == embedding - - def test_vector_class_text_format(self): - embedding = Vector([1.5, 2, 3]) + @pytest.mark.skipif(np is None, reason='NumPy required') + def test_vector_numpy_text_format_non_contiguous(self): + embedding = np.flipud(np.array([1.5, 2, 3])) + assert not embedding.data.contiguous res = next(conn.execute('SELECT %t::vector', (embedding,)))[0] - assert res == embedding + assert res == Vector([3, 2, 1.5]) def test_halfvec(self): embedding = HalfVector([1.5, 2, 3]) @@ -116,26 +124,26 @@ def test_sparsevec_text_format(self): assert res == embedding def test_text_copy_from(self): - embedding = np.array([1.5, 2, 3]) + embedding = [1.5, 2, 3] cur = conn.cursor() with cur.copy("COPY psycopg_items (embedding, half_embedding, binary_embedding, sparse_embedding) FROM STDIN") as copy: - copy.write_row([embedding, HalfVector(embedding), '101', SparseVector(embedding)]) + copy.write_row([Vector(embedding), HalfVector(embedding), '101', SparseVector(embedding)]) def test_binary_copy_from(self): - embedding = np.array([1.5, 2, 3]) + embedding = [1.5, 2, 3] cur = conn.cursor() with cur.copy("COPY psycopg_items (embedding, half_embedding, binary_embedding, sparse_embedding) FROM STDIN WITH (FORMAT BINARY)") as copy: - copy.write_row([embedding, HalfVector(embedding), Bit('101'), SparseVector(embedding)]) + copy.write_row([Vector(embedding), HalfVector(embedding), Bit('101'), SparseVector(embedding)]) def test_binary_copy_from_set_types(self): - embedding = np.array([1.5, 2, 3]) + embedding = [1.5, 2, 3] cur = conn.cursor() with cur.copy("COPY psycopg_items (id, embedding, half_embedding, binary_embedding, sparse_embedding) FROM STDIN WITH (FORMAT BINARY)") as copy: copy.set_types(['int8', 'vector', 'halfvec', 'bit', 'sparsevec']) - copy.write_row([1, embedding, HalfVector(embedding), Bit('101'), SparseVector(embedding)]) + copy.write_row([1, Vector(embedding), HalfVector(embedding), Bit('101'), SparseVector(embedding)]) def test_text_copy_to(self): - embedding = np.array([1.5, 2, 3]) + embedding = Vector([1.5, 2, 3]) half_embedding = HalfVector([1.5, 2, 3]) conn.execute('INSERT INTO psycopg_items (embedding, half_embedding) VALUES (%s, %s)', (embedding, half_embedding)) cur = conn.cursor() diff --git a/tests/test_psycopg2.py b/tests/test_psycopg2.py index 7029962..df5a308 100644 --- a/tests/test_psycopg2.py +++ b/tests/test_psycopg2.py @@ -1,9 +1,14 @@ -import numpy as np from pgvector import HalfVector, SparseVector, Vector from pgvector.psycopg2 import register_vector import psycopg2 from psycopg2.extras import DictCursor, RealDictCursor, NamedTupleCursor from psycopg2.pool import ThreadedConnectionPool +import pytest + +try: + import numpy as np +except ImportError: + np = None conn = psycopg2.connect(dbname='pgvector_python_test') conn.autocommit = True @@ -21,21 +26,22 @@ def setup_method(self): cur.execute('DELETE FROM psycopg2_items') def test_vector(self): - embedding = np.array([1.5, 2, 3]) + embedding = Vector([1.5, 2, 3]) cur.execute('INSERT INTO psycopg2_items (embedding) VALUES (%s), (NULL)', (embedding,)) cur.execute('SELECT embedding FROM psycopg2_items ORDER BY id') res = cur.fetchall() - assert res[0][0] == Vector(embedding) + assert res[0][0] == embedding assert res[1][0] is None - def test_vector_class(self): - embedding = Vector([1.5, 2, 3]) + @pytest.mark.skipif(np is None, reason='NumPy required') + def test_vector_numpy(self): + embedding = np.array([1.5, 2, 3]) cur.execute('INSERT INTO psycopg2_items (embedding) VALUES (%s), (NULL)', (embedding,)) cur.execute('SELECT embedding FROM psycopg2_items ORDER BY id') res = cur.fetchall() - assert res[0][0] == embedding + assert res[0][0] == Vector(embedding) assert res[1][0] is None def test_halfvec(self): @@ -47,15 +53,6 @@ def test_halfvec(self): assert res[0][0] == embedding assert res[1][0] is None - def test_halfvec_class(self): - embedding = HalfVector([1.5, 2, 3]) - cur.execute('INSERT INTO psycopg2_items (half_embedding) VALUES (%s), (NULL)', (embedding,)) - - cur.execute('SELECT half_embedding FROM psycopg2_items ORDER BY id') - res = cur.fetchall() - assert res[0][0] == embedding - assert res[1][0] is None - def test_bit(self): embedding = '101' cur.execute('INSERT INTO psycopg2_items (binary_embedding) VALUES (%s), (NULL)', (embedding,)) diff --git a/tests/test_sparse_vector.py b/tests/test_sparse_vector.py index 8753e43..e785643 100644 --- a/tests/test_sparse_vector.py +++ b/tests/test_sparse_vector.py @@ -1,15 +1,24 @@ -import numpy as np from pgvector import SparseVector import pytest -from scipy.sparse import coo_array, coo_matrix, csr_array, csr_matrix from struct import pack +try: + import numpy as np +except ImportError: + np = None + +try: + from scipy.sparse import coo_array, coo_matrix, csr_array, csr_matrix +except ImportError: + coo_array = None + class TestSparseVector: def test_list(self): vec = SparseVector([1, 0, 2, 0, 3, 0]) assert vec.to_list() == [1, 0, 2, 0, 3, 0] - assert np.array_equal(vec.to_numpy(), [1, 0, 2, 0, 3, 0]) + if np is not None: + assert np.array_equal(vec.to_numpy(), [1, 0, 2, 0, 3, 0]) assert vec.indices() == [0, 2, 4] def test_list_dimensions(self): @@ -17,6 +26,7 @@ def test_list_dimensions(self): SparseVector([1, 0, 2, 0, 3, 0], 6) # ty: ignore[invalid-argument-type] assert str(error.value) == 'extra argument' + @pytest.mark.skipif(np is None, reason='NumPy required') def test_ndarray(self): vec = SparseVector(np.array([1, 0, 2, 0, 3, 0])) assert vec.to_list() == [1, 0, 2, 0, 3, 0] @@ -32,35 +42,41 @@ def test_dict_no_dimensions(self): SparseVector({0: 1, 2: 2, 4: 3}) assert str(error.value) == 'missing dimensions' + @pytest.mark.skipif(coo_array is None, reason='SciPy required') def test_coo_array(self): arr = coo_array(np.array([1, 0, 2, 0, 3, 0])) vec = SparseVector(arr) assert vec.to_list() == [1, 0, 2, 0, 3, 0] assert vec.indices() == [0, 2, 4] + @pytest.mark.skipif(coo_array is None, reason='SciPy required') def test_coo_array_dimensions(self): with pytest.raises(ValueError) as error: SparseVector(coo_array(np.array([1, 0, 2, 0, 3, 0])), 6) # ty: ignore[invalid-argument-type] assert str(error.value) == 'extra argument' + @pytest.mark.skipif(coo_array is None, reason='SciPy required') def test_coo_matrix(self): mat = coo_matrix(np.array([1, 0, 2, 0, 3, 0])) vec = SparseVector(mat) assert vec.to_list() == [1, 0, 2, 0, 3, 0] assert vec.indices() == [0, 2, 4] + @pytest.mark.skipif(coo_array is None, reason='SciPy required') def test_dok_array(self): arr = coo_array(np.array([1, 0, 2, 0, 3, 0])).todok() vec = SparseVector(arr) assert vec.to_list() == [1, 0, 2, 0, 3, 0] assert vec.indices() == [0, 2, 4] + @pytest.mark.skipif(coo_array is None, reason='SciPy required') def test_csr_array(self): arr = csr_array(np.array([[1, 0, 2, 0, 3, 0]])) vec = SparseVector(arr) assert vec.to_list() == [1, 0, 2, 0, 3, 0] assert vec.indices() == [0, 2, 4] + @pytest.mark.skipif(coo_array is None, reason='SciPy required') def test_csr_matrix(self): mat = csr_matrix(np.array([1, 0, 2, 0, 3, 0])) vec = SparseVector(mat) @@ -86,6 +102,7 @@ def test_indices(self): def test_values(self): assert SparseVector([1, 0, 2, 0, 3, 0]).values() == [1, 2, 3] + @pytest.mark.skipif(np is None or coo_array is None, reason='NumPy and SciPy required') def test_to_coo(self): assert np.array_equal(SparseVector([1, 0, 2, 0, 3, 0]).to_coo().toarray(), [[1, 0, 2, 0, 3, 0]]) @@ -99,7 +116,8 @@ def test_from_text(self): assert vec.indices() == [0, 2, 4] assert vec.values() == [1.5, 2, 3] assert vec.to_list() == [1.5, 0, 2, 0, 3, 0] - assert np.array_equal(vec.to_numpy(), [1.5, 0, 2, 0, 3, 0]) + if np is not None: + assert np.array_equal(vec.to_numpy(), [1.5, 0, 2, 0, 3, 0]) def test_from_binary(self): data = pack('>iii3i3f', 6, 3, 0, 0, 2, 4, 1.5, 2, 3) @@ -108,5 +126,6 @@ def test_from_binary(self): assert vec.indices() == [0, 2, 4] assert vec.values() == [1.5, 2, 3] assert vec.to_list() == [1.5, 0, 2, 0, 3, 0] - assert np.array_equal(vec.to_numpy(), [1.5, 0, 2, 0, 3, 0]) + if np is not None: + assert np.array_equal(vec.to_numpy(), [1.5, 0, 2, 0, 3, 0]) assert vec.to_binary() == data diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index f4ec787..5a9bec3 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -1,6 +1,5 @@ import asyncpg from getpass import getuser -import numpy as np from pgvector import HalfVector, SparseVector, Vector from pgvector.sqlalchemy import VECTOR, HALFVEC, BIT, SPARSEVEC, avg, sum import pytest @@ -11,6 +10,11 @@ from sqlalchemy.orm import mapped_column, DeclarativeBase, Session from sqlalchemy.sql import func +try: + import numpy as np +except ImportError: + np = None + psycopg2_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test') psycopg2_type_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test') @@ -165,7 +169,7 @@ def test_core(self, engine): hnsw_index.create(engine) def test_orm(self, engine): - item = Item(embedding=np.array([1.5, 2, 3])) + item = Item(embedding=Vector([1.5, 2, 3])) item2 = Item(embedding=[4, 5, 6]) item3 = Item() @@ -469,6 +473,7 @@ def test_bad_dimensions(self, engine): with pytest.raises(StatementError, match='expected 3 dimensions, not 2'): session.commit() + @pytest.mark.skipif(np is None, reason='NumPy required') def test_bad_ndim(self, engine): item = Item(embedding=np.array([[1, 2, 3]])) with Session(engine) as session: @@ -476,6 +481,7 @@ def test_bad_ndim(self, engine): with pytest.raises(StatementError, match='expected ndim to be 1'): session.commit() + @pytest.mark.skipif(np is None, reason='NumPy required') def test_bad_dtype(self, engine): item = Item(embedding=np.array(['one', 'two', 'three'])) with Session(engine) as session: @@ -493,11 +499,11 @@ def test_literal_binds(self, engine): def test_insert(self, engine): with Session(engine) as session: - session.execute(insert(Item).values(embedding=np.array([1, 2, 3]))) + session.execute(insert(Item).values(embedding=Vector([1, 2, 3]))) def test_insert_bulk(self, engine): with Session(engine) as session: - session.execute(insert(Item), [{'embedding': np.array([1, 2, 3])}]) + session.execute(insert(Item), [{'embedding': Vector([1, 2, 3])}]) # register_vector in psycopg2 tests change this behavior # def test_insert_text(self): @@ -511,7 +517,7 @@ def test_automap(self, engine): AutoBase.prepare() AutoItem = AutoBase.classes.sqlalchemy_orm_item with Session(engine) as session: - session.execute(insert(AutoItem), [{'embedding': np.array([1, 2, 3])}]) + session.execute(insert(AutoItem), [{'embedding': Vector([1, 2, 3])}]) item = session.query(AutoItem).first() assert item.embedding == Vector([1, 2, 3]) @@ -556,7 +562,7 @@ def setup_method(self): def test_vector_array(self, engine): with Session(engine) as session: - session.add(Item(id=1, embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])])) + session.add(Item(id=1, embeddings=[Vector([1, 2, 3]), Vector([4, 5, 6])])) session.commit() # this fails if the driver does not cast arrays @@ -565,7 +571,7 @@ def test_vector_array(self, engine): def test_halfvec_array(self, engine): with Session(engine) as session: - session.add(Item(id=1, half_embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])])) + session.add(Item(id=1, half_embeddings=[HalfVector([1, 2, 3]), HalfVector([4, 5, 6])])) session.commit() # this fails if the driver does not cast arrays @@ -664,8 +670,9 @@ async def test_vector_array(self, engine): item = await session.get_one(Item, 1) assert item.embeddings == [Vector([1, 2, 3]), Vector([4, 5, 6])] - session.add(Item(id=2, embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])])) - item = await session.get_one(Item, 2) - assert item.embeddings == [Vector([1, 2, 3]), Vector([4, 5, 6])] + if np is not None: + session.add(Item(id=2, embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])])) + item = await session.get_one(Item, 2) + assert item.embeddings == [Vector([1, 2, 3]), Vector([4, 5, 6])] await engine.dispose() diff --git a/tests/test_sqlmodel.py b/tests/test_sqlmodel.py index 4bb9d97..cc916bf 100644 --- a/tests/test_sqlmodel.py +++ b/tests/test_sqlmodel.py @@ -1,4 +1,3 @@ -import numpy as np from pgvector import HalfVector, SparseVector, Vector from pgvector.sqlalchemy import VECTOR, HALFVEC, BIT, SPARSEVEC, avg, sum import pytest @@ -6,6 +5,11 @@ from sqlmodel import Field, Index, Session, SQLModel, create_engine, delete, select, text from typing import Any, Optional +try: + import numpy as np +except ImportError: + np = None + engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test') with Session(engine) as session: session.exec(text('CREATE EXTENSION IF NOT EXISTS vector')) # type: ignore diff --git a/tests/test_vector.py b/tests/test_vector.py index 337c7a7..ab6fb1a 100644 --- a/tests/test_vector.py +++ b/tests/test_vector.py @@ -1,8 +1,12 @@ -import numpy as np from pgvector import Vector import pytest from struct import pack +try: + import numpy as np +except ImportError: + np = None + class TestVector: def test_list(self): @@ -21,6 +25,7 @@ def test_list_list(self): Vector([[1, 2], [3, 4]]) # ty: ignore[invalid-argument-type] assert str(error.value) == 'expected list[float]' + @pytest.mark.skipif(np is None, reason='NumPy required') def test_ndarray(self): arr = np.array([1, 2, 3]) assert Vector(arr).to_list() == [1, 2, 3] @@ -42,6 +47,7 @@ def test_equality(self): def test_dimensions(self): assert Vector([1, 2, 3]).dimensions() == 3 + @pytest.mark.skipif(np is None, reason='NumPy required') def test_to_numpy_readonly(self): arr = Vector([1, 2, 3]).to_numpy() with pytest.raises(ValueError) as error: @@ -51,11 +57,13 @@ def test_to_numpy_readonly(self): def test_from_text(self): vec = Vector.from_text('[1.5,2,3]') assert vec.to_list() == [1.5, 2, 3] - assert np.array_equal(vec.to_numpy(), [1.5, 2, 3]) + if np is not None: + assert np.array_equal(vec.to_numpy(), [1.5, 2, 3]) def test_from_binary(self): data = pack('>HH3f', 3, 0, 1.5, 2, 3) vec = Vector.from_binary(data) assert vec.to_list() == [1.5, 2, 3] - assert np.array_equal(vec.to_numpy(), [1.5, 2, 3]) + if np is not None: + assert np.array_equal(vec.to_numpy(), [1.5, 2, 3]) assert vec.to_binary() == data From 77d878bbb2eae107f644adb62b2ca376903b5dea Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 19:11:45 -0700 Subject: [PATCH 083/121] Updated changelog [skip ci] --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 44b66b3..6d89fdb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.5.0 (unreleased) +## 1.0.0 (unreleased) - Added type hints - Changed `vector` type to return `Vector` object instead of NumPy array From 2bf00f8c326507335a9faf3a7a058b74e7b48255 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 19:20:47 -0700 Subject: [PATCH 084/121] Updated style [skip ci] --- pgvector/pg8000/register.py | 3 ++- pgvector/psycopg2/vector.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pgvector/pg8000/register.py b/pgvector/pg8000/register.py index e26b083..5d44b7c 100644 --- a/pgvector/pg8000/register.py +++ b/pgvector/pg8000/register.py @@ -11,12 +11,13 @@ def register_vector(conn: Connection) -> None: raise RuntimeError('vector type not found in the database') conn.register_out_adapter(Vector, Vector._to_db) + conn.register_in_adapter(type_info['vector'], Vector._from_db) + try: import numpy as np conn.register_out_adapter(np.ndarray, Vector._to_db) except ImportError: pass - conn.register_in_adapter(type_info['vector'], Vector._from_db) if 'halfvec' in type_info: conn.register_out_adapter(HalfVector, HalfVector._to_db) diff --git a/pgvector/psycopg2/vector.py b/pgvector/psycopg2/vector.py index 9ad5902..f8832a6 100644 --- a/pgvector/psycopg2/vector.py +++ b/pgvector/psycopg2/vector.py @@ -23,6 +23,7 @@ def register_vector_info(oid: int, array_oid: int | None, scope: connection | cu register_type(vectorarray, scope) register_adapter(Vector, VectorAdapter) + try: import numpy as np register_adapter(np.ndarray, VectorAdapter) From f610d5ec1debba32ad662c28665d3bf841e2441e Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 20:20:31 -0700 Subject: [PATCH 085/121] Fixed typechecking [skip ci] --- pgvector/bit.py | 5 +++-- pgvector/halfvec.py | 5 +++-- pgvector/sparsevec.py | 2 +- pgvector/vector.py | 5 +++-- tests/test_bit.py | 11 ++++++----- tests/test_django.py | 3 ++- tests/test_half_vector.py | 7 ++++--- tests/test_pg8000.py | 5 +++-- tests/test_psycopg.py | 11 ++++++----- tests/test_psycopg2.py | 5 +++-- tests/test_sparse_vector.py | 22 ++++++++++++---------- tests/test_sqlalchemy.py | 7 ++++--- tests/test_sqlmodel.py | 3 ++- tests/test_vector.py | 7 ++++--- 14 files changed, 56 insertions(+), 42 deletions(-) diff --git a/pgvector/bit.py b/pgvector/bit.py index c8925f0..6961a40 100644 --- a/pgvector/bit.py +++ b/pgvector/bit.py @@ -4,8 +4,9 @@ try: import numpy as np + NUMPY_AVAILABLE = True except ImportError: - np = None + NUMPY_AVAILABLE = False class Bit: @@ -34,7 +35,7 @@ def bit_value(v: bool) -> str: data = int(value, 2).to_bytes(len(value) // 8, byteorder='big') except ValueError: raise ValueError('expected bit string') - elif np is not None and isinstance(value, np.ndarray): + elif NUMPY_AVAILABLE and isinstance(value, np.ndarray): if value.dtype != np.bool: # skip warning for result of np.unpackbits if value.dtype != np.uint8 or np.any(value > 1): diff --git a/pgvector/halfvec.py b/pgvector/halfvec.py index f317709..deb6f76 100644 --- a/pgvector/halfvec.py +++ b/pgvector/halfvec.py @@ -3,8 +3,9 @@ try: import numpy as np + NUMPY_AVAILABLE = True except ImportError: - np = None + NUMPY_AVAILABLE = False class HalfVector: @@ -15,7 +16,7 @@ def __init__(self, value: list[float] | np.ndarray[tuple[int], np.dtype[np.float self._value = struct.pack(f'>HH{dim}e', dim, 0, *value) except struct.error: raise ValueError('expected list[float]') - elif np is not None and isinstance(value, np.ndarray): + elif NUMPY_AVAILABLE and isinstance(value, np.ndarray): if value.ndim != 1: raise ValueError('expected ndim to be 1') diff --git a/pgvector/sparsevec.py b/pgvector/sparsevec.py index 3a17b0f..22ee39d 100644 --- a/pgvector/sparsevec.py +++ b/pgvector/sparsevec.py @@ -5,7 +5,7 @@ try: import numpy as np except ImportError: - np = None + pass NO_DEFAULT = object() diff --git a/pgvector/vector.py b/pgvector/vector.py index e6c6e60..1c1d678 100644 --- a/pgvector/vector.py +++ b/pgvector/vector.py @@ -3,8 +3,9 @@ try: import numpy as np + NUMPY_AVAILABLE = True except ImportError: - np = None + NUMPY_AVAILABLE = False class Vector: @@ -15,7 +16,7 @@ def __init__(self, value: list[float] | np.ndarray[tuple[int], np.dtype[np.float self._value = struct.pack(f'>HH{dim}f', dim, 0, *value) except struct.error: raise ValueError('expected list[float]') - elif np is not None and isinstance(value, np.ndarray): + elif NUMPY_AVAILABLE and isinstance(value, np.ndarray): if value.ndim != 1: raise ValueError('expected ndim to be 1') diff --git a/tests/test_bit.py b/tests/test_bit.py index 2f5293d..5b33d92 100644 --- a/tests/test_bit.py +++ b/tests/test_bit.py @@ -3,8 +3,9 @@ try: import numpy as np + NUMPY_AVAILABLE = True except ImportError: - np = None + NUMPY_AVAILABLE = False class TestBit: @@ -33,24 +34,24 @@ def test_bytes(self): assert Bit(b'\xff\x00\xf0').to_text() == '111111110000000011110000' assert Bit(b'\xfe\x07\x00').to_text() == '111111100000011100000000' - @pytest.mark.skipif(np is None, reason='NumPy required') + @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') def test_ndarray(self): arr = np.array([True, False, True]) assert Bit(arr).to_list() == [True, False, True] assert np.array_equal(Bit(arr).to_numpy(), arr) - @pytest.mark.skipif(np is None, reason='NumPy required') + @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') def test_ndarray_unpackbits(self): arr = np.unpackbits(np.array([254, 7, 0], dtype=np.uint8)) assert Bit(arr).to_text() == '111111100000011100000000' - @pytest.mark.skipif(np is None, reason='NumPy required') + @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') def test_ndarray_uint8(self): arr = np.array([254, 7, 0], dtype=np.uint8) with pytest.warns(UserWarning, match='expected elements to be boolean'): assert Bit(arr).to_text() == '110' - @pytest.mark.skipif(np is None, reason='NumPy required') + @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') def test_ndarray_uint16(self): arr = np.array([254, 7, 0], dtype=np.uint16) with pytest.warns(UserWarning, match='expected elements to be boolean'): diff --git a/tests/test_django.py b/tests/test_django.py index 37265ba..c64d030 100644 --- a/tests/test_django.py +++ b/tests/test_django.py @@ -19,8 +19,9 @@ try: import numpy as np + NUMPY_AVAILABLE = True except ImportError: - np = None + NUMPY_AVAILABLE = False settings.configure( DATABASES={ diff --git a/tests/test_half_vector.py b/tests/test_half_vector.py index 0080971..1a9176f 100644 --- a/tests/test_half_vector.py +++ b/tests/test_half_vector.py @@ -4,8 +4,9 @@ try: import numpy as np + NUMPY_AVAILABLE = True except ImportError: - np = None + NUMPY_AVAILABLE = False class TestHalfVector: @@ -25,7 +26,7 @@ def test_list_list(self): HalfVector([[1, 2], [3, 4]]) # ty: ignore[invalid-argument-type] assert str(error.value) == 'expected list[float]' - @pytest.mark.skipif(np is None, reason='NumPy required') + @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') def test_ndarray(self): arr = np.array([1, 2, 3]) assert HalfVector(arr).to_list() == [1, 2, 3] @@ -47,7 +48,7 @@ def test_equality(self): def test_dimensions(self): assert HalfVector([1, 2, 3]).dimensions() == 3 - @pytest.mark.skipif(np is None, reason='NumPy required') + @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') def test_to_numpy_readonly(self): arr = HalfVector([1, 2, 3]).to_numpy() with pytest.raises(ValueError) as error: diff --git a/tests/test_pg8000.py b/tests/test_pg8000.py index 8281ba5..c36cccd 100644 --- a/tests/test_pg8000.py +++ b/tests/test_pg8000.py @@ -6,8 +6,9 @@ try: import numpy as np + NUMPY_AVAILABLE = True except ImportError: - np = None + NUMPY_AVAILABLE = False conn = Connection(getuser(), database='pgvector_python_test') @@ -30,7 +31,7 @@ def test_vector(self): assert res[0][0] == embedding assert res[1][0] is None - @pytest.mark.skipif(np is None, reason='NumPy required') + @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') def test_vector_numpy(self): embedding = np.array([1.5, 2, 3]) conn.run('INSERT INTO pg8000_items (embedding) VALUES (:embedding), (NULL)', embedding=embedding) diff --git a/tests/test_psycopg.py b/tests/test_psycopg.py index 7ca6257..19c3421 100644 --- a/tests/test_psycopg.py +++ b/tests/test_psycopg.py @@ -6,8 +6,9 @@ try: import numpy as np + NUMPY_AVAILABLE = True except ImportError: - np = None + NUMPY_AVAILABLE = False conn = psycopg.connect(dbname='pgvector_python_test', autocommit=True) @@ -45,26 +46,26 @@ def test_vector_binary_format_correct(self): res = next(conn.execute('SELECT %b::vector::text', (embedding,)))[0] assert res == '[1.5,2,3]' - @pytest.mark.skipif(np is None, reason='NumPy required') + @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') def test_vector_numpy_binary_format(self): embedding = np.array([1.5, 2, 3]) res = next(conn.execute('SELECT %b::vector', (embedding,), binary=True))[0] assert res == Vector(embedding) - @pytest.mark.skipif(np is None, reason='NumPy required') + @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') def test_vector_numpy_text_format(self): embedding = np.array([1.5, 2, 3]) res = next(conn.execute('SELECT %t::vector', (embedding,)))[0] assert res == Vector(embedding) - @pytest.mark.skipif(np is None, reason='NumPy required') + @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') def test_vector_numpy_binary_format_non_contiguous(self): embedding = np.flipud(np.array([1.5, 2, 3])) assert not embedding.data.contiguous res = next(conn.execute('SELECT %b::vector', (embedding,)))[0] assert res == Vector([3, 2, 1.5]) - @pytest.mark.skipif(np is None, reason='NumPy required') + @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') def test_vector_numpy_text_format_non_contiguous(self): embedding = np.flipud(np.array([1.5, 2, 3])) assert not embedding.data.contiguous diff --git a/tests/test_psycopg2.py b/tests/test_psycopg2.py index df5a308..d0d7c86 100644 --- a/tests/test_psycopg2.py +++ b/tests/test_psycopg2.py @@ -7,8 +7,9 @@ try: import numpy as np + NUMPY_AVAILABLE = True except ImportError: - np = None + NUMPY_AVAILABLE = False conn = psycopg2.connect(dbname='pgvector_python_test') conn.autocommit = True @@ -34,7 +35,7 @@ def test_vector(self): assert res[0][0] == embedding assert res[1][0] is None - @pytest.mark.skipif(np is None, reason='NumPy required') + @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') def test_vector_numpy(self): embedding = np.array([1.5, 2, 3]) cur.execute('INSERT INTO psycopg2_items (embedding) VALUES (%s), (NULL)', (embedding,)) diff --git a/tests/test_sparse_vector.py b/tests/test_sparse_vector.py index e785643..55cded6 100644 --- a/tests/test_sparse_vector.py +++ b/tests/test_sparse_vector.py @@ -4,13 +4,15 @@ try: import numpy as np + NUMPY_AVAILABLE = True except ImportError: - np = None + NUMPY_AVAILABLE = False try: from scipy.sparse import coo_array, coo_matrix, csr_array, csr_matrix + SCIPY_AVAILABLE = True except ImportError: - coo_array = None + SCIPY_AVAILABLE = False class TestSparseVector: @@ -26,7 +28,7 @@ def test_list_dimensions(self): SparseVector([1, 0, 2, 0, 3, 0], 6) # ty: ignore[invalid-argument-type] assert str(error.value) == 'extra argument' - @pytest.mark.skipif(np is None, reason='NumPy required') + @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') def test_ndarray(self): vec = SparseVector(np.array([1, 0, 2, 0, 3, 0])) assert vec.to_list() == [1, 0, 2, 0, 3, 0] @@ -42,41 +44,41 @@ def test_dict_no_dimensions(self): SparseVector({0: 1, 2: 2, 4: 3}) assert str(error.value) == 'missing dimensions' - @pytest.mark.skipif(coo_array is None, reason='SciPy required') + @pytest.mark.skipif(SCIPY_AVAILABLE, reason='SciPy required') def test_coo_array(self): arr = coo_array(np.array([1, 0, 2, 0, 3, 0])) vec = SparseVector(arr) assert vec.to_list() == [1, 0, 2, 0, 3, 0] assert vec.indices() == [0, 2, 4] - @pytest.mark.skipif(coo_array is None, reason='SciPy required') + @pytest.mark.skipif(SCIPY_AVAILABLE, reason='SciPy required') def test_coo_array_dimensions(self): with pytest.raises(ValueError) as error: SparseVector(coo_array(np.array([1, 0, 2, 0, 3, 0])), 6) # ty: ignore[invalid-argument-type] assert str(error.value) == 'extra argument' - @pytest.mark.skipif(coo_array is None, reason='SciPy required') + @pytest.mark.skipif(SCIPY_AVAILABLE, reason='SciPy required') def test_coo_matrix(self): mat = coo_matrix(np.array([1, 0, 2, 0, 3, 0])) vec = SparseVector(mat) assert vec.to_list() == [1, 0, 2, 0, 3, 0] assert vec.indices() == [0, 2, 4] - @pytest.mark.skipif(coo_array is None, reason='SciPy required') + @pytest.mark.skipif(SCIPY_AVAILABLE, reason='SciPy required') def test_dok_array(self): arr = coo_array(np.array([1, 0, 2, 0, 3, 0])).todok() vec = SparseVector(arr) assert vec.to_list() == [1, 0, 2, 0, 3, 0] assert vec.indices() == [0, 2, 4] - @pytest.mark.skipif(coo_array is None, reason='SciPy required') + @pytest.mark.skipif(SCIPY_AVAILABLE, reason='SciPy required') def test_csr_array(self): arr = csr_array(np.array([[1, 0, 2, 0, 3, 0]])) vec = SparseVector(arr) assert vec.to_list() == [1, 0, 2, 0, 3, 0] assert vec.indices() == [0, 2, 4] - @pytest.mark.skipif(coo_array is None, reason='SciPy required') + @pytest.mark.skipif(SCIPY_AVAILABLE, reason='SciPy required') def test_csr_matrix(self): mat = csr_matrix(np.array([1, 0, 2, 0, 3, 0])) vec = SparseVector(mat) @@ -102,7 +104,7 @@ def test_indices(self): def test_values(self): assert SparseVector([1, 0, 2, 0, 3, 0]).values() == [1, 2, 3] - @pytest.mark.skipif(np is None or coo_array is None, reason='NumPy and SciPy required') + @pytest.mark.skipif(NUMPY_AVAILABLE or SCIPY_AVAILABLE, reason='NumPy and SciPy required') def test_to_coo(self): assert np.array_equal(SparseVector([1, 0, 2, 0, 3, 0]).to_coo().toarray(), [[1, 0, 2, 0, 3, 0]]) diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index 5a9bec3..a6dc382 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -12,8 +12,9 @@ try: import numpy as np + NUMPY_AVAILABLE = True except ImportError: - np = None + NUMPY_AVAILABLE = False psycopg2_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test') psycopg2_type_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test') @@ -473,7 +474,7 @@ def test_bad_dimensions(self, engine): with pytest.raises(StatementError, match='expected 3 dimensions, not 2'): session.commit() - @pytest.mark.skipif(np is None, reason='NumPy required') + @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') def test_bad_ndim(self, engine): item = Item(embedding=np.array([[1, 2, 3]])) with Session(engine) as session: @@ -481,7 +482,7 @@ def test_bad_ndim(self, engine): with pytest.raises(StatementError, match='expected ndim to be 1'): session.commit() - @pytest.mark.skipif(np is None, reason='NumPy required') + @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') def test_bad_dtype(self, engine): item = Item(embedding=np.array(['one', 'two', 'three'])) with Session(engine) as session: diff --git a/tests/test_sqlmodel.py b/tests/test_sqlmodel.py index cc916bf..4dcaedd 100644 --- a/tests/test_sqlmodel.py +++ b/tests/test_sqlmodel.py @@ -7,8 +7,9 @@ try: import numpy as np + NUMPY_AVAILABLE = True except ImportError: - np = None + NUMPY_AVAILABLE = False engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test') with Session(engine) as session: diff --git a/tests/test_vector.py b/tests/test_vector.py index ab6fb1a..9446ca8 100644 --- a/tests/test_vector.py +++ b/tests/test_vector.py @@ -4,8 +4,9 @@ try: import numpy as np + NUMPY_AVAILABLE = True except ImportError: - np = None + NUMPY_AVAILABLE = False class TestVector: @@ -25,7 +26,7 @@ def test_list_list(self): Vector([[1, 2], [3, 4]]) # ty: ignore[invalid-argument-type] assert str(error.value) == 'expected list[float]' - @pytest.mark.skipif(np is None, reason='NumPy required') + @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') def test_ndarray(self): arr = np.array([1, 2, 3]) assert Vector(arr).to_list() == [1, 2, 3] @@ -47,7 +48,7 @@ def test_equality(self): def test_dimensions(self): assert Vector([1, 2, 3]).dimensions() == 3 - @pytest.mark.skipif(np is None, reason='NumPy required') + @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') def test_to_numpy_readonly(self): arr = Vector([1, 2, 3]).to_numpy() with pytest.raises(ValueError) as error: From b1e3669eff86b1e1fd8ecb2c4bb756db21fd6619 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 20:29:34 -0700 Subject: [PATCH 086/121] Added checks to from_binary [skip ci] --- pgvector/bit.py | 5 +++++ pgvector/halfvec.py | 9 ++++++++- pgvector/vector.py | 9 ++++++++- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/pgvector/bit.py b/pgvector/bit.py index 6961a40..e2069af 100644 --- a/pgvector/bit.py +++ b/pgvector/bit.py @@ -89,6 +89,11 @@ def from_binary(cls, value: bytes) -> Bit: if not isinstance(value, bytes): raise ValueError('expected bytes') + length, = unpack_from('>i', value) + + if len(value) != 4 + (length + 7) // 8: + raise ValueError('invalid length') + bit = cls.__new__(cls) bit._value = value return bit diff --git a/pgvector/halfvec.py b/pgvector/halfvec.py index deb6f76..d89cdb6 100644 --- a/pgvector/halfvec.py +++ b/pgvector/halfvec.py @@ -58,7 +58,14 @@ def from_text(cls, value: str) -> HalfVector: @classmethod def from_binary(cls, value: bytes) -> HalfVector: - # TODO check dimensions/length and unused + dim, unused = struct.unpack_from('>HH', value) + + if len(value) != 4 + 2 * dim: + raise ValueError('invalid length') + + if unused != 0: + raise ValueError('expected unused to be 0') + vec = cls.__new__(cls) vec._value = value return vec diff --git a/pgvector/vector.py b/pgvector/vector.py index 1c1d678..bd5a2f5 100644 --- a/pgvector/vector.py +++ b/pgvector/vector.py @@ -58,7 +58,14 @@ def from_text(cls, value: str) -> Vector: @classmethod def from_binary(cls, value: bytes) -> Vector: - # TODO check dimensions/length and unused + dim, unused = struct.unpack_from('>HH', value) + + if len(value) != 4 + 4 * dim: + raise ValueError('invalid length') + + if unused != 0: + raise ValueError('expected unused to be 0') + vec = cls.__new__(cls) vec._value = value return vec From 90ed89c66f1e6ee298f928f2219f971725835e09 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 20:33:38 -0700 Subject: [PATCH 087/121] Added checks to from_binary for SparseVector [skip ci] --- pgvector/sparsevec.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pgvector/sparsevec.py b/pgvector/sparsevec.py index 22ee39d..09b796c 100644 --- a/pgvector/sparsevec.py +++ b/pgvector/sparsevec.py @@ -129,6 +129,13 @@ def from_text(cls, value: str) -> SparseVector: @classmethod def from_binary(cls, value: bytes) -> SparseVector: dim, nnz, unused = unpack_from('>iii', value) + + if len(value) != 12 + 8 * nnz: + raise ValueError('invalid length') + + if unused != 0: + raise ValueError('expected unused to be 0') + indices = unpack_from(f'>{nnz}i', value, 12) values = unpack_from(f'>{nnz}f', value, 12 + nnz * 4) return cls._from_parts(int(dim), list(indices), list(values)) From 05fe2cb8866e7eb32a58559eee5aeca8680e6c53 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 20:43:46 -0700 Subject: [PATCH 088/121] Removed unneeded check [skip ci] --- pgvector/bit.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pgvector/bit.py b/pgvector/bit.py index e2069af..eafd31a 100644 --- a/pgvector/bit.py +++ b/pgvector/bit.py @@ -42,9 +42,6 @@ def bit_value(v: bool) -> str: warn('expected elements to be boolean', stacklevel=2) value = value.astype(bool) - # for mypy - assert isinstance(value, np.ndarray) - if value.ndim != 1: raise ValueError('expected ndim to be 1') From a9a3ea15af65de6349b06dae40239138f4136aa4 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 21:05:26 -0700 Subject: [PATCH 089/121] Fixed tests --- tests/test_bit.py | 8 ++++---- tests/test_half_vector.py | 8 ++++---- tests/test_pg8000.py | 2 +- tests/test_psycopg.py | 8 ++++---- tests/test_psycopg2.py | 2 +- tests/test_sparse_vector.py | 22 +++++++++++----------- tests/test_sqlalchemy.py | 6 +++--- tests/test_vector.py | 8 ++++---- 8 files changed, 32 insertions(+), 32 deletions(-) diff --git a/tests/test_bit.py b/tests/test_bit.py index 5b33d92..f87d339 100644 --- a/tests/test_bit.py +++ b/tests/test_bit.py @@ -34,24 +34,24 @@ def test_bytes(self): assert Bit(b'\xff\x00\xf0').to_text() == '111111110000000011110000' assert Bit(b'\xfe\x07\x00').to_text() == '111111100000011100000000' - @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') + @pytest.mark.skipif(not NUMPY_AVAILABLE, reason='NumPy required') def test_ndarray(self): arr = np.array([True, False, True]) assert Bit(arr).to_list() == [True, False, True] assert np.array_equal(Bit(arr).to_numpy(), arr) - @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') + @pytest.mark.skipif(not NUMPY_AVAILABLE, reason='NumPy required') def test_ndarray_unpackbits(self): arr = np.unpackbits(np.array([254, 7, 0], dtype=np.uint8)) assert Bit(arr).to_text() == '111111100000011100000000' - @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') + @pytest.mark.skipif(not NUMPY_AVAILABLE, reason='NumPy required') def test_ndarray_uint8(self): arr = np.array([254, 7, 0], dtype=np.uint8) with pytest.warns(UserWarning, match='expected elements to be boolean'): assert Bit(arr).to_text() == '110' - @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') + @pytest.mark.skipif(not NUMPY_AVAILABLE, reason='NumPy required') def test_ndarray_uint16(self): arr = np.array([254, 7, 0], dtype=np.uint16) with pytest.warns(UserWarning, match='expected elements to be boolean'): diff --git a/tests/test_half_vector.py b/tests/test_half_vector.py index 1a9176f..d859bd8 100644 --- a/tests/test_half_vector.py +++ b/tests/test_half_vector.py @@ -26,7 +26,7 @@ def test_list_list(self): HalfVector([[1, 2], [3, 4]]) # ty: ignore[invalid-argument-type] assert str(error.value) == 'expected list[float]' - @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') + @pytest.mark.skipif(not NUMPY_AVAILABLE, reason='NumPy required') def test_ndarray(self): arr = np.array([1, 2, 3]) assert HalfVector(arr).to_list() == [1, 2, 3] @@ -48,7 +48,7 @@ def test_equality(self): def test_dimensions(self): assert HalfVector([1, 2, 3]).dimensions() == 3 - @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') + @pytest.mark.skipif(not NUMPY_AVAILABLE, reason='NumPy required') def test_to_numpy_readonly(self): arr = HalfVector([1, 2, 3]).to_numpy() with pytest.raises(ValueError) as error: @@ -58,13 +58,13 @@ def test_to_numpy_readonly(self): def test_from_text(self): vec = HalfVector.from_text('[1.5,2,3]') assert vec.to_list() == [1.5, 2, 3] - if np is not None: + if NUMPY_AVAILABLE: assert np.array_equal(vec.to_numpy(), [1.5, 2, 3]) def test_from_binary(self): data = pack('>HH3e', 3, 0, 1.5, 2, 3) vec = HalfVector.from_binary(data) assert vec.to_list() == [1.5, 2, 3] - if np is not None: + if NUMPY_AVAILABLE: assert np.array_equal(vec.to_numpy(), [1.5, 2, 3]) assert vec.to_binary() == data diff --git a/tests/test_pg8000.py b/tests/test_pg8000.py index c36cccd..057dbc9 100644 --- a/tests/test_pg8000.py +++ b/tests/test_pg8000.py @@ -31,7 +31,7 @@ def test_vector(self): assert res[0][0] == embedding assert res[1][0] is None - @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') + @pytest.mark.skipif(not NUMPY_AVAILABLE, reason='NumPy required') def test_vector_numpy(self): embedding = np.array([1.5, 2, 3]) conn.run('INSERT INTO pg8000_items (embedding) VALUES (:embedding), (NULL)', embedding=embedding) diff --git a/tests/test_psycopg.py b/tests/test_psycopg.py index 19c3421..ca04c97 100644 --- a/tests/test_psycopg.py +++ b/tests/test_psycopg.py @@ -46,26 +46,26 @@ def test_vector_binary_format_correct(self): res = next(conn.execute('SELECT %b::vector::text', (embedding,)))[0] assert res == '[1.5,2,3]' - @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') + @pytest.mark.skipif(not NUMPY_AVAILABLE, reason='NumPy required') def test_vector_numpy_binary_format(self): embedding = np.array([1.5, 2, 3]) res = next(conn.execute('SELECT %b::vector', (embedding,), binary=True))[0] assert res == Vector(embedding) - @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') + @pytest.mark.skipif(not NUMPY_AVAILABLE, reason='NumPy required') def test_vector_numpy_text_format(self): embedding = np.array([1.5, 2, 3]) res = next(conn.execute('SELECT %t::vector', (embedding,)))[0] assert res == Vector(embedding) - @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') + @pytest.mark.skipif(not NUMPY_AVAILABLE, reason='NumPy required') def test_vector_numpy_binary_format_non_contiguous(self): embedding = np.flipud(np.array([1.5, 2, 3])) assert not embedding.data.contiguous res = next(conn.execute('SELECT %b::vector', (embedding,)))[0] assert res == Vector([3, 2, 1.5]) - @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') + @pytest.mark.skipif(not NUMPY_AVAILABLE, reason='NumPy required') def test_vector_numpy_text_format_non_contiguous(self): embedding = np.flipud(np.array([1.5, 2, 3])) assert not embedding.data.contiguous diff --git a/tests/test_psycopg2.py b/tests/test_psycopg2.py index d0d7c86..d12eae0 100644 --- a/tests/test_psycopg2.py +++ b/tests/test_psycopg2.py @@ -35,7 +35,7 @@ def test_vector(self): assert res[0][0] == embedding assert res[1][0] is None - @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') + @pytest.mark.skipif(not NUMPY_AVAILABLE, reason='NumPy required') def test_vector_numpy(self): embedding = np.array([1.5, 2, 3]) cur.execute('INSERT INTO psycopg2_items (embedding) VALUES (%s), (NULL)', (embedding,)) diff --git a/tests/test_sparse_vector.py b/tests/test_sparse_vector.py index 55cded6..4da2f3f 100644 --- a/tests/test_sparse_vector.py +++ b/tests/test_sparse_vector.py @@ -19,7 +19,7 @@ class TestSparseVector: def test_list(self): vec = SparseVector([1, 0, 2, 0, 3, 0]) assert vec.to_list() == [1, 0, 2, 0, 3, 0] - if np is not None: + if NUMPY_AVAILABLE: assert np.array_equal(vec.to_numpy(), [1, 0, 2, 0, 3, 0]) assert vec.indices() == [0, 2, 4] @@ -28,7 +28,7 @@ def test_list_dimensions(self): SparseVector([1, 0, 2, 0, 3, 0], 6) # ty: ignore[invalid-argument-type] assert str(error.value) == 'extra argument' - @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') + @pytest.mark.skipif(not NUMPY_AVAILABLE, reason='NumPy required') def test_ndarray(self): vec = SparseVector(np.array([1, 0, 2, 0, 3, 0])) assert vec.to_list() == [1, 0, 2, 0, 3, 0] @@ -44,41 +44,41 @@ def test_dict_no_dimensions(self): SparseVector({0: 1, 2: 2, 4: 3}) assert str(error.value) == 'missing dimensions' - @pytest.mark.skipif(SCIPY_AVAILABLE, reason='SciPy required') + @pytest.mark.skipif(not SCIPY_AVAILABLE, reason='SciPy required') def test_coo_array(self): arr = coo_array(np.array([1, 0, 2, 0, 3, 0])) vec = SparseVector(arr) assert vec.to_list() == [1, 0, 2, 0, 3, 0] assert vec.indices() == [0, 2, 4] - @pytest.mark.skipif(SCIPY_AVAILABLE, reason='SciPy required') + @pytest.mark.skipif(not SCIPY_AVAILABLE, reason='SciPy required') def test_coo_array_dimensions(self): with pytest.raises(ValueError) as error: SparseVector(coo_array(np.array([1, 0, 2, 0, 3, 0])), 6) # ty: ignore[invalid-argument-type] assert str(error.value) == 'extra argument' - @pytest.mark.skipif(SCIPY_AVAILABLE, reason='SciPy required') + @pytest.mark.skipif(not SCIPY_AVAILABLE, reason='SciPy required') def test_coo_matrix(self): mat = coo_matrix(np.array([1, 0, 2, 0, 3, 0])) vec = SparseVector(mat) assert vec.to_list() == [1, 0, 2, 0, 3, 0] assert vec.indices() == [0, 2, 4] - @pytest.mark.skipif(SCIPY_AVAILABLE, reason='SciPy required') + @pytest.mark.skipif(not SCIPY_AVAILABLE, reason='SciPy required') def test_dok_array(self): arr = coo_array(np.array([1, 0, 2, 0, 3, 0])).todok() vec = SparseVector(arr) assert vec.to_list() == [1, 0, 2, 0, 3, 0] assert vec.indices() == [0, 2, 4] - @pytest.mark.skipif(SCIPY_AVAILABLE, reason='SciPy required') + @pytest.mark.skipif(not SCIPY_AVAILABLE, reason='SciPy required') def test_csr_array(self): arr = csr_array(np.array([[1, 0, 2, 0, 3, 0]])) vec = SparseVector(arr) assert vec.to_list() == [1, 0, 2, 0, 3, 0] assert vec.indices() == [0, 2, 4] - @pytest.mark.skipif(SCIPY_AVAILABLE, reason='SciPy required') + @pytest.mark.skipif(not SCIPY_AVAILABLE, reason='SciPy required') def test_csr_matrix(self): mat = csr_matrix(np.array([1, 0, 2, 0, 3, 0])) vec = SparseVector(mat) @@ -104,7 +104,7 @@ def test_indices(self): def test_values(self): assert SparseVector([1, 0, 2, 0, 3, 0]).values() == [1, 2, 3] - @pytest.mark.skipif(NUMPY_AVAILABLE or SCIPY_AVAILABLE, reason='NumPy and SciPy required') + @pytest.mark.skipif(not NUMPY_AVAILABLE or not SCIPY_AVAILABLE, reason='NumPy and SciPy required') def test_to_coo(self): assert np.array_equal(SparseVector([1, 0, 2, 0, 3, 0]).to_coo().toarray(), [[1, 0, 2, 0, 3, 0]]) @@ -118,7 +118,7 @@ def test_from_text(self): assert vec.indices() == [0, 2, 4] assert vec.values() == [1.5, 2, 3] assert vec.to_list() == [1.5, 0, 2, 0, 3, 0] - if np is not None: + if NUMPY_AVAILABLE: assert np.array_equal(vec.to_numpy(), [1.5, 0, 2, 0, 3, 0]) def test_from_binary(self): @@ -128,6 +128,6 @@ def test_from_binary(self): assert vec.indices() == [0, 2, 4] assert vec.values() == [1.5, 2, 3] assert vec.to_list() == [1.5, 0, 2, 0, 3, 0] - if np is not None: + if NUMPY_AVAILABLE: assert np.array_equal(vec.to_numpy(), [1.5, 0, 2, 0, 3, 0]) assert vec.to_binary() == data diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index a6dc382..35b8be2 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -474,7 +474,7 @@ def test_bad_dimensions(self, engine): with pytest.raises(StatementError, match='expected 3 dimensions, not 2'): session.commit() - @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') + @pytest.mark.skipif(not NUMPY_AVAILABLE, reason='NumPy required') def test_bad_ndim(self, engine): item = Item(embedding=np.array([[1, 2, 3]])) with Session(engine) as session: @@ -482,7 +482,7 @@ def test_bad_ndim(self, engine): with pytest.raises(StatementError, match='expected ndim to be 1'): session.commit() - @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') + @pytest.mark.skipif(not NUMPY_AVAILABLE, reason='NumPy required') def test_bad_dtype(self, engine): item = Item(embedding=np.array(['one', 'two', 'three'])) with Session(engine) as session: @@ -671,7 +671,7 @@ async def test_vector_array(self, engine): item = await session.get_one(Item, 1) assert item.embeddings == [Vector([1, 2, 3]), Vector([4, 5, 6])] - if np is not None: + if NUMPY_AVAILABLE: session.add(Item(id=2, embeddings=[np.array([1, 2, 3]), np.array([4, 5, 6])])) item = await session.get_one(Item, 2) assert item.embeddings == [Vector([1, 2, 3]), Vector([4, 5, 6])] diff --git a/tests/test_vector.py b/tests/test_vector.py index 9446ca8..a8b8e28 100644 --- a/tests/test_vector.py +++ b/tests/test_vector.py @@ -26,7 +26,7 @@ def test_list_list(self): Vector([[1, 2], [3, 4]]) # ty: ignore[invalid-argument-type] assert str(error.value) == 'expected list[float]' - @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') + @pytest.mark.skipif(not NUMPY_AVAILABLE, reason='NumPy required') def test_ndarray(self): arr = np.array([1, 2, 3]) assert Vector(arr).to_list() == [1, 2, 3] @@ -48,7 +48,7 @@ def test_equality(self): def test_dimensions(self): assert Vector([1, 2, 3]).dimensions() == 3 - @pytest.mark.skipif(NUMPY_AVAILABLE, reason='NumPy required') + @pytest.mark.skipif(not NUMPY_AVAILABLE, reason='NumPy required') def test_to_numpy_readonly(self): arr = Vector([1, 2, 3]).to_numpy() with pytest.raises(ValueError) as error: @@ -58,13 +58,13 @@ def test_to_numpy_readonly(self): def test_from_text(self): vec = Vector.from_text('[1.5,2,3]') assert vec.to_list() == [1.5, 2, 3] - if np is not None: + if NUMPY_AVAILABLE: assert np.array_equal(vec.to_numpy(), [1.5, 2, 3]) def test_from_binary(self): data = pack('>HH3f', 3, 0, 1.5, 2, 3) vec = Vector.from_binary(data) assert vec.to_list() == [1.5, 2, 3] - if np is not None: + if NUMPY_AVAILABLE: assert np.array_equal(vec.to_numpy(), [1.5, 2, 3]) assert vec.to_binary() == data From 0e2acf939ea646c078b6236449a18529d8d63ae7 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 21:07:58 -0700 Subject: [PATCH 090/121] Improved tests [skip ci] --- tests/test_bit.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_bit.py b/tests/test_bit.py index f87d339..3b31c62 100644 --- a/tests/test_bit.py +++ b/tests/test_bit.py @@ -22,6 +22,11 @@ def test_list_int(self): Bit([254, 7, 0]) # ty: ignore[invalid-argument-type] assert str(error.value) == 'expected list[bool]' + def test_list_list(self): + with pytest.raises(ValueError) as error: + Bit([[True, False], [True, False]]) # ty: ignore[invalid-argument-type] + assert str(error.value) == 'expected list[bool]' + def test_str(self): assert Bit('101').to_list() == [True, False, True] @@ -57,12 +62,7 @@ def test_ndarray_uint16(self): with pytest.warns(UserWarning, match='expected elements to be boolean'): assert Bit(arr).to_text() == '110' - def test_ndim_two(self): - with pytest.raises(ValueError) as error: - Bit([[True, False], [True, False]]) # ty: ignore[invalid-argument-type] - assert str(error.value) == 'expected list[bool]' - - def test_ndim_zero(self): + def test_bool(self): with pytest.raises(ValueError) as error: Bit(True) # ty: ignore[invalid-argument-type] assert str(error.value) == 'expected bytes, str, list, or ndarray' From a885b1c4aca5606d770619b3af0ee44d7d555017 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 21:09:57 -0700 Subject: [PATCH 091/121] Changed warning to error [skip ci] --- pgvector/bit.py | 2 +- tests/test_bit.py | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pgvector/bit.py b/pgvector/bit.py index eafd31a..ff991c8 100644 --- a/pgvector/bit.py +++ b/pgvector/bit.py @@ -39,7 +39,7 @@ def bit_value(v: bool) -> str: if value.dtype != np.bool: # skip warning for result of np.unpackbits if value.dtype != np.uint8 or np.any(value > 1): - warn('expected elements to be boolean', stacklevel=2) + raise ValueError('expected elements to be boolean') value = value.astype(bool) if value.ndim != 1: diff --git a/tests/test_bit.py b/tests/test_bit.py index 3b31c62..e96af23 100644 --- a/tests/test_bit.py +++ b/tests/test_bit.py @@ -53,14 +53,16 @@ def test_ndarray_unpackbits(self): @pytest.mark.skipif(not NUMPY_AVAILABLE, reason='NumPy required') def test_ndarray_uint8(self): arr = np.array([254, 7, 0], dtype=np.uint8) - with pytest.warns(UserWarning, match='expected elements to be boolean'): - assert Bit(arr).to_text() == '110' + with pytest.raises(ValueError) as error: + Bit(arr) + assert str(error.value) == 'expected elements to be boolean' @pytest.mark.skipif(not NUMPY_AVAILABLE, reason='NumPy required') def test_ndarray_uint16(self): arr = np.array([254, 7, 0], dtype=np.uint16) - with pytest.warns(UserWarning, match='expected elements to be boolean'): - assert Bit(arr).to_text() == '110' + with pytest.raises(ValueError) as error: + Bit(arr) + assert str(error.value) == 'expected elements to be boolean' def test_bool(self): with pytest.raises(ValueError) as error: From a27a8b0b962792a498edd15185b64942c05530e4 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 21:10:48 -0700 Subject: [PATCH 092/121] Improved code [skip ci] --- pgvector/bit.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pgvector/bit.py b/pgvector/bit.py index ff991c8..ba116cb 100644 --- a/pgvector/bit.py +++ b/pgvector/bit.py @@ -1,6 +1,5 @@ from __future__ import annotations from struct import pack, unpack_from -from warnings import warn try: import numpy as np @@ -37,7 +36,7 @@ def bit_value(v: bool) -> str: raise ValueError('expected bit string') elif NUMPY_AVAILABLE and isinstance(value, np.ndarray): if value.dtype != np.bool: - # skip warning for result of np.unpackbits + # skip error for result of np.unpackbits if value.dtype != np.uint8 or np.any(value > 1): raise ValueError('expected elements to be boolean') value = value.astype(bool) From f89f83bd9ddfdb60537dc707fef97faebd412093 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 21:49:55 -0700 Subject: [PATCH 093/121] Updated license year [skip ci] --- LICENSE.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE.txt b/LICENSE.txt index b612d6d..17e5210 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2021-2025 Andrew Kane +Copyright (c) 2021-2026 Andrew Kane Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From af6f9fc26b47767e26f88e8b18437a5c3e2ada5b Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 21:51:28 -0700 Subject: [PATCH 094/121] Added optional dependency tests to CI --- .github/workflows/build.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1ed395f..c1d335e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -24,3 +24,6 @@ jobs: make sudo make install - run: pytest + + - run: pip install --group dev-optional + - run: pytest From 536c3fbe968699c387b30c6904ad86f3f9073382 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 21:59:31 -0700 Subject: [PATCH 095/121] DRYed tests [skip ci] --- tests/test_asyncpg.py | 35 +++++++++++------------------------ 1 file changed, 11 insertions(+), 24 deletions(-) diff --git a/tests/test_asyncpg.py b/tests/test_asyncpg.py index b5a1484..01e6a7e 100644 --- a/tests/test_asyncpg.py +++ b/tests/test_asyncpg.py @@ -5,14 +5,17 @@ class TestAsyncpg: - @pytest.mark.asyncio - async def test_vector(self): + async def setup_connection(self): conn = await asyncpg.connect(database='pgvector_python_test') await conn.execute('CREATE EXTENSION IF NOT EXISTS vector') + await register_vector(conn) await conn.execute('DROP TABLE IF EXISTS asyncpg_items') - await conn.execute('CREATE TABLE asyncpg_items (id bigserial PRIMARY KEY, embedding vector(3))') + return conn - await register_vector(conn) + @pytest.mark.asyncio + async def test_vector(self): + conn = await self.setup_connection(); + await conn.execute('CREATE TABLE asyncpg_items (id bigserial PRIMARY KEY, embedding vector(3))') embedding = Vector([1.5, 2, 3]) embedding2 = [4.5, 5, 6] @@ -31,13 +34,9 @@ async def test_vector(self): @pytest.mark.asyncio async def test_halfvec(self): - conn = await asyncpg.connect(database='pgvector_python_test') - await conn.execute('CREATE EXTENSION IF NOT EXISTS vector') - await conn.execute('DROP TABLE IF EXISTS asyncpg_items') + conn = await self.setup_connection(); await conn.execute('CREATE TABLE asyncpg_items (id bigserial PRIMARY KEY, embedding halfvec(3))') - await register_vector(conn) - embedding = HalfVector([1.5, 2, 3]) embedding2 = [4.5, 5, 6] await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2), (NULL)", embedding, embedding2) @@ -55,13 +54,9 @@ async def test_halfvec(self): @pytest.mark.asyncio async def test_bit(self): - conn = await asyncpg.connect(database='pgvector_python_test') - await conn.execute('CREATE EXTENSION IF NOT EXISTS vector') - await conn.execute('DROP TABLE IF EXISTS asyncpg_items') + conn = await self.setup_connection(); await conn.execute('CREATE TABLE asyncpg_items (id bigserial PRIMARY KEY, embedding bit(3))') - await register_vector(conn) - embedding = asyncpg.BitString('101') # type: ignore await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), (NULL)", embedding) @@ -78,13 +73,9 @@ async def test_bit(self): @pytest.mark.asyncio async def test_sparsevec(self): - conn = await asyncpg.connect(database='pgvector_python_test') - await conn.execute('CREATE EXTENSION IF NOT EXISTS vector') - await conn.execute('DROP TABLE IF EXISTS asyncpg_items') + conn = await self.setup_connection(); await conn.execute('CREATE TABLE asyncpg_items (id bigserial PRIMARY KEY, embedding sparsevec(3))') - await register_vector(conn) - embedding = SparseVector([1.5, 2, 3]) await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), (NULL)", embedding) @@ -100,13 +91,9 @@ async def test_sparsevec(self): @pytest.mark.asyncio async def test_vector_array(self): - conn = await asyncpg.connect(database='pgvector_python_test') - await conn.execute('CREATE EXTENSION IF NOT EXISTS vector') - await conn.execute('DROP TABLE IF EXISTS asyncpg_items') + conn = await self.setup_connection(); await conn.execute('CREATE TABLE asyncpg_items (id bigserial PRIMARY KEY, embeddings vector[])') - await register_vector(conn) - embeddings = [Vector([1.5, 2, 3]), Vector([4.5, 5, 6])] await conn.execute("INSERT INTO asyncpg_items (embeddings) VALUES ($1)", embeddings) From d31d279784a16026c8787730a9d375f191a6161b Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 22:06:20 -0700 Subject: [PATCH 096/121] Improved test [skip ci] --- tests/test_asyncpg.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/test_asyncpg.py b/tests/test_asyncpg.py index 01e6a7e..5beb8a9 100644 --- a/tests/test_asyncpg.py +++ b/tests/test_asyncpg.py @@ -3,6 +3,12 @@ from pgvector.asyncpg import register_vector import pytest +try: + import numpy as np + NUMPY_AVAILABLE = True +except ImportError: + NUMPY_AVAILABLE = False + class TestAsyncpg: async def setup_connection(self): @@ -19,12 +25,14 @@ async def test_vector(self): embedding = Vector([1.5, 2, 3]) embedding2 = [4.5, 5, 6] - await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2), (NULL)", embedding, embedding2) + embedding3 = np.array([7.5, 8, 9]) if NUMPY_AVAILABLE else [7.5, 8, 9] + await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2), ($3), (NULL)", embedding, embedding2, embedding3) res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id") assert res[0]['embedding'] == embedding assert res[1]['embedding'] == Vector(embedding2) - assert res[2]['embedding'] is None + assert res[2]['embedding'] == Vector(embedding3) + assert res[3]['embedding'] is None # ensures binary format is correct text_res = await conn.fetch("SELECT embedding::text FROM asyncpg_items ORDER BY id LIMIT 1") From 061de102b9a2538544c9d9ab2cdc1100d20cd937 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 22:09:50 -0700 Subject: [PATCH 097/121] Improved tests [skip ci] --- tests/test_asyncpg.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tests/test_asyncpg.py b/tests/test_asyncpg.py index 5beb8a9..4901616 100644 --- a/tests/test_asyncpg.py +++ b/tests/test_asyncpg.py @@ -26,7 +26,8 @@ async def test_vector(self): embedding = Vector([1.5, 2, 3]) embedding2 = [4.5, 5, 6] embedding3 = np.array([7.5, 8, 9]) if NUMPY_AVAILABLE else [7.5, 8, 9] - await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2), ($3), (NULL)", embedding, embedding2, embedding3) + embedding4 = None + await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2), ($3), ($4)", embedding, embedding2, embedding3, embedding4) res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id") assert res[0]['embedding'] == embedding @@ -47,7 +48,8 @@ async def test_halfvec(self): embedding = HalfVector([1.5, 2, 3]) embedding2 = [4.5, 5, 6] - await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2), (NULL)", embedding, embedding2) + embedding3 = None + await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2), ($3)", embedding, embedding2, embedding3) res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id") assert res[0]['embedding'] == embedding @@ -66,7 +68,8 @@ async def test_bit(self): await conn.execute('CREATE TABLE asyncpg_items (id bigserial PRIMARY KEY, embedding bit(3))') embedding = asyncpg.BitString('101') # type: ignore - await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), (NULL)", embedding) + embedding2 = None + await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2)", embedding, embedding2) res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id") assert res[0]['embedding'].as_string() == '101' @@ -85,7 +88,8 @@ async def test_sparsevec(self): await conn.execute('CREATE TABLE asyncpg_items (id bigserial PRIMARY KEY, embedding sparsevec(3))') embedding = SparseVector([1.5, 2, 3]) - await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), (NULL)", embedding) + embedding2 = None + await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2)", embedding, embedding2) res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id") assert res[0]['embedding'] == embedding @@ -108,9 +112,15 @@ async def test_vector_array(self): embeddings2 = [[1.5, 2, 3], [4.5, 5, 6]] await conn.execute("INSERT INTO asyncpg_items (embeddings) VALUES (ARRAY[$1, $2]::vector[])", embeddings2[0], embeddings2[1]) + if NUMPY_AVAILABLE: + embeddings3 = [np.array([1.5, 2, 3]), np.array([4.5, 5, 6])] + await conn.execute("INSERT INTO asyncpg_items (embeddings) VALUES (ARRAY[$1, $2]::vector[])", embeddings3[0], embeddings3[1]) + res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id") assert res[0]['embeddings'] == embeddings assert res[1]['embeddings'] == [Vector(e) for e in embeddings2] + if NUMPY_AVAILABLE: + assert res[2]['embeddings'] == [Vector(e) for e in embeddings3] await conn.close() @@ -128,7 +138,8 @@ async def init(conn): embedding = Vector([1.5, 2, 3]) embedding2 = [1.5, 2, 3] - await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2), (NULL)", embedding, embedding2) + embedding3 = None + await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2), ($3)", embedding, embedding2, embedding3) res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id") assert res[0]['embedding'] == embedding From 57f6d942b71eda3e87ebcb523eecc72c89d55c91 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 22:13:35 -0700 Subject: [PATCH 098/121] Improved tests [skip ci] --- tests/test_pg8000.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/test_pg8000.py b/tests/test_pg8000.py index 057dbc9..0c29cf6 100644 --- a/tests/test_pg8000.py +++ b/tests/test_pg8000.py @@ -25,7 +25,8 @@ def setup_method(self): def test_vector(self): embedding = Vector([1.5, 2, 3]) - conn.run('INSERT INTO pg8000_items (embedding) VALUES (:embedding), (NULL)', embedding=embedding) + embedding2 = None + conn.run('INSERT INTO pg8000_items (embedding) VALUES (:embedding), (:embedding2)', embedding=embedding, embedding2=embedding2) res = conn.run('SELECT embedding FROM pg8000_items ORDER BY id') assert res[0][0] == embedding @@ -42,7 +43,8 @@ def test_vector_numpy(self): def test_halfvec(self): embedding = HalfVector([1.5, 2, 3]) - conn.run('INSERT INTO pg8000_items (half_embedding) VALUES (:embedding), (NULL)', embedding=embedding) + embedding2 = None + conn.run('INSERT INTO pg8000_items (half_embedding) VALUES (:embedding), (:embedding2)', embedding=embedding, embedding2=embedding2) res = conn.run('SELECT half_embedding FROM pg8000_items ORDER BY id') assert res[0][0] == embedding @@ -50,7 +52,8 @@ def test_halfvec(self): def test_bit(self): embedding = '101' - conn.run('INSERT INTO pg8000_items (binary_embedding) VALUES (:embedding), (NULL)', embedding=embedding) + embedding2 = None + conn.run('INSERT INTO pg8000_items (binary_embedding) VALUES (:embedding), (:embedding2)', embedding=embedding, embedding2=embedding2) res = conn.run('SELECT binary_embedding FROM pg8000_items ORDER BY id') assert res[0][0] == '101' @@ -58,7 +61,8 @@ def test_bit(self): def test_sparsevec(self): embedding = SparseVector([1.5, 2, 3]) - conn.run('INSERT INTO pg8000_items (sparse_embedding) VALUES (:embedding), (NULL)', embedding=embedding) + embedding2 = None + conn.run('INSERT INTO pg8000_items (sparse_embedding) VALUES (:embedding), (:embedding2)', embedding=embedding, embedding2=embedding2) res = conn.run('SELECT sparse_embedding FROM pg8000_items ORDER BY id') assert res[0][0] == embedding From 061ae69816ca77a25647a4400b7340b6f4834933 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 22:18:08 -0700 Subject: [PATCH 099/121] Improved tests [skip ci] --- tests/test_asyncpg.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/test_asyncpg.py b/tests/test_asyncpg.py index 4901616..153b98c 100644 --- a/tests/test_asyncpg.py +++ b/tests/test_asyncpg.py @@ -15,12 +15,12 @@ async def setup_connection(self): conn = await asyncpg.connect(database='pgvector_python_test') await conn.execute('CREATE EXTENSION IF NOT EXISTS vector') await register_vector(conn) - await conn.execute('DROP TABLE IF EXISTS asyncpg_items') return conn @pytest.mark.asyncio async def test_vector(self): conn = await self.setup_connection(); + await conn.execute('DROP TABLE IF EXISTS asyncpg_items') await conn.execute('CREATE TABLE asyncpg_items (id bigserial PRIMARY KEY, embedding vector(3))') embedding = Vector([1.5, 2, 3]) @@ -44,6 +44,7 @@ async def test_vector(self): @pytest.mark.asyncio async def test_halfvec(self): conn = await self.setup_connection(); + await conn.execute('DROP TABLE IF EXISTS asyncpg_items') await conn.execute('CREATE TABLE asyncpg_items (id bigserial PRIMARY KEY, embedding halfvec(3))') embedding = HalfVector([1.5, 2, 3]) @@ -65,6 +66,7 @@ async def test_halfvec(self): @pytest.mark.asyncio async def test_bit(self): conn = await self.setup_connection(); + await conn.execute('DROP TABLE IF EXISTS asyncpg_items') await conn.execute('CREATE TABLE asyncpg_items (id bigserial PRIMARY KEY, embedding bit(3))') embedding = asyncpg.BitString('101') # type: ignore @@ -85,6 +87,7 @@ async def test_bit(self): @pytest.mark.asyncio async def test_sparsevec(self): conn = await self.setup_connection(); + await conn.execute('DROP TABLE IF EXISTS asyncpg_items') await conn.execute('CREATE TABLE asyncpg_items (id bigserial PRIMARY KEY, embedding sparsevec(3))') embedding = SparseVector([1.5, 2, 3]) @@ -104,6 +107,7 @@ async def test_sparsevec(self): @pytest.mark.asyncio async def test_vector_array(self): conn = await self.setup_connection(); + await conn.execute('DROP TABLE IF EXISTS asyncpg_items') await conn.execute('CREATE TABLE asyncpg_items (id bigserial PRIMARY KEY, embeddings vector[])') embeddings = [Vector([1.5, 2, 3]), Vector([4.5, 5, 6])] From 605482b5adfa84ff91ff7e141310fbcc0f8fd5b1 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 22:27:18 -0700 Subject: [PATCH 100/121] Added Bit test [skip ci] --- tests/test_bit.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_bit.py b/tests/test_bit.py index e96af23..ff708d0 100644 --- a/tests/test_bit.py +++ b/tests/test_bit.py @@ -1,5 +1,6 @@ from pgvector import Bit import pytest +import random try: import numpy as np @@ -69,6 +70,10 @@ def test_bool(self): Bit(True) # ty: ignore[invalid-argument-type] assert str(error.value) == 'expected bytes, str, list, or ndarray' + def test_random(self): + value = ''.join(random.choices(['0', '1'], k=random.randint(1024, 2048))) + assert Bit(value).to_text() == value + def test_repr(self): assert repr(Bit([True, False, True])) == 'Bit(101)' assert str(Bit([True, False, True])) == 'Bit(101)' From a920f0414be5ab52d5fb6e9fb0706e5878c9c9b3 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 22:30:16 -0700 Subject: [PATCH 101/121] Improved tests [skip ci] --- tests/test_sparse_vector.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_sparse_vector.py b/tests/test_sparse_vector.py index 4da2f3f..9a18f98 100644 --- a/tests/test_sparse_vector.py +++ b/tests/test_sparse_vector.py @@ -23,6 +23,9 @@ def test_list(self): assert np.array_equal(vec.to_numpy(), [1, 0, 2, 0, 3, 0]) assert vec.indices() == [0, 2, 4] + def test_list_empty(self): + assert SparseVector([]).to_list() == [] + def test_list_dimensions(self): with pytest.raises(ValueError) as error: SparseVector([1, 0, 2, 0, 3, 0], 6) # ty: ignore[invalid-argument-type] @@ -39,6 +42,9 @@ def test_dict(self): assert vec.to_list() == [1, 0, 2, 0, 3, 0] assert vec.indices() == [0, 2, 4] + def test_dict_empty(self): + assert SparseVector({}, 0).to_list() == [] + def test_dict_no_dimensions(self): with pytest.raises(ValueError) as error: SparseVector({0: 1, 2: 2, 4: 3}) From 8a3dadb1b3f2ae260a0211e1cd3245bfa10a5cf9 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 22:33:12 -0700 Subject: [PATCH 102/121] Simplified tests [skip ci] --- tests/test_pg8000.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/tests/test_pg8000.py b/tests/test_pg8000.py index 0c29cf6..75f9bee 100644 --- a/tests/test_pg8000.py +++ b/tests/test_pg8000.py @@ -25,21 +25,14 @@ def setup_method(self): def test_vector(self): embedding = Vector([1.5, 2, 3]) - embedding2 = None - conn.run('INSERT INTO pg8000_items (embedding) VALUES (:embedding), (:embedding2)', embedding=embedding, embedding2=embedding2) + embedding2 = np.array([4.5, 5, 6]) if NUMPY_AVAILABLE else [4.5, 5, 6] + embedding3 = None + conn.run('INSERT INTO pg8000_items (embedding) VALUES (:embedding), (:embedding2), (:embedding3)', embedding=embedding, embedding2=embedding2, embedding3=embedding3) res = conn.run('SELECT embedding FROM pg8000_items ORDER BY id') assert res[0][0] == embedding - assert res[1][0] is None - - @pytest.mark.skipif(not NUMPY_AVAILABLE, reason='NumPy required') - def test_vector_numpy(self): - embedding = np.array([1.5, 2, 3]) - conn.run('INSERT INTO pg8000_items (embedding) VALUES (:embedding), (NULL)', embedding=embedding) - - res = conn.run('SELECT embedding FROM pg8000_items ORDER BY id') - assert res[0][0] == Vector([1.5, 2, 3]) - assert res[1][0] is None + assert res[1][0] == Vector(embedding2) + assert res[2][0] is None def test_halfvec(self): embedding = HalfVector([1.5, 2, 3]) From 41305ded9abd202dc80fcbc5b57f54e386e50b2b Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 22:34:49 -0700 Subject: [PATCH 103/121] Improved test [skip ci] --- tests/test_psycopg.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_psycopg.py b/tests/test_psycopg.py index ca04c97..80ab538 100644 --- a/tests/test_psycopg.py +++ b/tests/test_psycopg.py @@ -25,7 +25,8 @@ def setup_method(self): def test_vector(self): embedding = Vector([1.5, 2, 3]) - conn.execute('INSERT INTO psycopg_items (embedding) VALUES (%s), (NULL)', (embedding,)) + embedding2 = None + conn.execute('INSERT INTO psycopg_items (embedding) VALUES (%s), (%s)', (embedding, embedding2)) res = conn.execute('SELECT embedding FROM psycopg_items ORDER BY id').fetchall() assert res[0][0] == embedding From 706ec697775252619465bfb523ae8d3d10661837 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 22:37:05 -0700 Subject: [PATCH 104/121] Improved test [skip ci] --- tests/test_psycopg2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_psycopg2.py b/tests/test_psycopg2.py index d12eae0..466fef1 100644 --- a/tests/test_psycopg2.py +++ b/tests/test_psycopg2.py @@ -28,7 +28,8 @@ def setup_method(self): def test_vector(self): embedding = Vector([1.5, 2, 3]) - cur.execute('INSERT INTO psycopg2_items (embedding) VALUES (%s), (NULL)', (embedding,)) + embedding2 = None + cur.execute('INSERT INTO psycopg2_items (embedding) VALUES (%s), (%s)', (embedding, embedding2)) cur.execute('SELECT embedding FROM psycopg2_items ORDER BY id') res = cur.fetchall() From 058ba28d4408f09d2f7184fa76d0cfe9f0be950f Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 22:40:43 -0700 Subject: [PATCH 105/121] Fixed test [skip ci] --- tests/test_pg8000.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_pg8000.py b/tests/test_pg8000.py index 75f9bee..718d0f7 100644 --- a/tests/test_pg8000.py +++ b/tests/test_pg8000.py @@ -25,13 +25,13 @@ def setup_method(self): def test_vector(self): embedding = Vector([1.5, 2, 3]) - embedding2 = np.array([4.5, 5, 6]) if NUMPY_AVAILABLE else [4.5, 5, 6] + embedding2 = np.array([4.5, 5, 6]) if NUMPY_AVAILABLE else Vector([4.5, 5, 6]) embedding3 = None conn.run('INSERT INTO pg8000_items (embedding) VALUES (:embedding), (:embedding2), (:embedding3)', embedding=embedding, embedding2=embedding2, embedding3=embedding3) res = conn.run('SELECT embedding FROM pg8000_items ORDER BY id') assert res[0][0] == embedding - assert res[1][0] == Vector(embedding2) + assert res[1][0] == Vector([4.5, 5, 6]) assert res[2][0] is None def test_halfvec(self): From e67e8462323dafb5e2b016c02ccd3c3b9a0d6a25 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 22:52:09 -0700 Subject: [PATCH 106/121] Improved code [skip ci] --- pgvector/halfvec.py | 2 +- pgvector/vector.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pgvector/halfvec.py b/pgvector/halfvec.py index d89cdb6..87fafdc 100644 --- a/pgvector/halfvec.py +++ b/pgvector/halfvec.py @@ -47,7 +47,7 @@ def to_numpy(self) -> np.ndarray[tuple[int], np.dtype[np.float16]]: return np.frombuffer(self._value, dtype='>f2', count=self.dimensions(), offset=4) def to_text(self) -> str: - return '[' + ','.join([str(v) for v in self.to_list()]) + ']' + return f'[{','.join([str(v) for v in self.to_list()])}]' def to_binary(self) -> bytes: return self._value diff --git a/pgvector/vector.py b/pgvector/vector.py index bd5a2f5..3049e3c 100644 --- a/pgvector/vector.py +++ b/pgvector/vector.py @@ -47,7 +47,7 @@ def to_numpy(self) -> np.ndarray[tuple[int], np.dtype[np.float32]]: return np.frombuffer(self._value, dtype='>f4', count=self.dimensions(), offset=4) def to_text(self) -> str: - return '[' + ','.join([str(v) for v in self.to_list()]) + ']' + return f'[{','.join([str(v) for v in self.to_list()])}]' def to_binary(self) -> bytes: return self._value From 6deaa5f5ccbd563bf86c4fa38ab40b636631ddc7 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 22:59:07 -0700 Subject: [PATCH 107/121] Improved Bit constructor code [skip ci] --- pgvector/bit.py | 49 ++++++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/pgvector/bit.py b/pgvector/bit.py index ba116cb..1052cee 100644 --- a/pgvector/bit.py +++ b/pgvector/bit.py @@ -13,7 +13,7 @@ def __init__(self, value: bytes | str | list[bool] | np.ndarray[tuple[int], np.d if isinstance(value, bytes): length = 8 * len(value) data = value - else: + elif isinstance(value, (list, str)): if isinstance(value, list): def bit_value(v: bool) -> str: if v is True: @@ -24,30 +24,29 @@ def bit_value(v: bool) -> str: value = ''.join([bit_value(v) for v in value]) - if isinstance(value, str): - length = len(value) - - if length % 8 != 0: - value += '0' * (8 - (length % 8)) - - try: - data = int(value, 2).to_bytes(len(value) // 8, byteorder='big') - except ValueError: - raise ValueError('expected bit string') - elif NUMPY_AVAILABLE and isinstance(value, np.ndarray): - if value.dtype != np.bool: - # skip error for result of np.unpackbits - if value.dtype != np.uint8 or np.any(value > 1): - raise ValueError('expected elements to be boolean') - value = value.astype(bool) - - if value.ndim != 1: - raise ValueError('expected ndim to be 1') - - length = len(value) - data = np.packbits(value).tobytes() - else: - raise ValueError('expected bytes, str, list, or ndarray') + length = len(value) + + if length % 8 != 0: + value += '0' * (8 - (length % 8)) + + try: + data = int(value, 2).to_bytes(len(value) // 8, byteorder='big') + except ValueError: + raise ValueError('expected bit string') + elif NUMPY_AVAILABLE and isinstance(value, np.ndarray): + if value.dtype != np.bool: + # skip error for result of np.unpackbits + if value.dtype != np.uint8 or np.any(value > 1): + raise ValueError('expected elements to be boolean') + value = value.astype(bool) + + if value.ndim != 1: + raise ValueError('expected ndim to be 1') + + length = len(value) + data = np.packbits(value).tobytes() + else: + raise ValueError('expected bytes, str, list, or ndarray') self._value = pack('>i', length) + data From 634215cfc921e0f778d57c18f073f1fa258a3daf Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 23:00:43 -0700 Subject: [PATCH 108/121] Fixed quoting for Python < 3.12 [skip ci] --- pgvector/halfvec.py | 2 +- pgvector/vector.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pgvector/halfvec.py b/pgvector/halfvec.py index 87fafdc..4728065 100644 --- a/pgvector/halfvec.py +++ b/pgvector/halfvec.py @@ -47,7 +47,7 @@ def to_numpy(self) -> np.ndarray[tuple[int], np.dtype[np.float16]]: return np.frombuffer(self._value, dtype='>f2', count=self.dimensions(), offset=4) def to_text(self) -> str: - return f'[{','.join([str(v) for v in self.to_list()])}]' + return f'[{",".join([str(v) for v in self.to_list()])}]' def to_binary(self) -> bytes: return self._value diff --git a/pgvector/vector.py b/pgvector/vector.py index 3049e3c..257ad8f 100644 --- a/pgvector/vector.py +++ b/pgvector/vector.py @@ -47,7 +47,7 @@ def to_numpy(self) -> np.ndarray[tuple[int], np.dtype[np.float32]]: return np.frombuffer(self._value, dtype='>f4', count=self.dimensions(), offset=4) def to_text(self) -> str: - return f'[{','.join([str(v) for v in self.to_list()])}]' + return f'[{",".join([str(v) for v in self.to_list()])}]' def to_binary(self) -> bytes: return self._value From d96381375dd84e0f9ff907b1745ff8b0627ce4c2 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 23:21:13 -0700 Subject: [PATCH 109/121] Updated style [skip ci] --- tests/test_asyncpg.py | 36 ++++++++++++++++++------------------ tests/test_django.py | 2 +- tests/test_psycopg.py | 16 ++++++++-------- tests/test_sqlalchemy.py | 8 ++++---- 4 files changed, 31 insertions(+), 31 deletions(-) diff --git a/tests/test_asyncpg.py b/tests/test_asyncpg.py index 153b98c..2b96de0 100644 --- a/tests/test_asyncpg.py +++ b/tests/test_asyncpg.py @@ -27,16 +27,16 @@ async def test_vector(self): embedding2 = [4.5, 5, 6] embedding3 = np.array([7.5, 8, 9]) if NUMPY_AVAILABLE else [7.5, 8, 9] embedding4 = None - await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2), ($3), ($4)", embedding, embedding2, embedding3, embedding4) + await conn.execute('INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2), ($3), ($4)', embedding, embedding2, embedding3, embedding4) - res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id") + res = await conn.fetch('SELECT * FROM asyncpg_items ORDER BY id') assert res[0]['embedding'] == embedding assert res[1]['embedding'] == Vector(embedding2) assert res[2]['embedding'] == Vector(embedding3) assert res[3]['embedding'] is None # ensures binary format is correct - text_res = await conn.fetch("SELECT embedding::text FROM asyncpg_items ORDER BY id LIMIT 1") + text_res = await conn.fetch('SELECT embedding::text FROM asyncpg_items ORDER BY id LIMIT 1') assert text_res[0]['embedding'] == '[1.5,2,3]' await conn.close() @@ -50,15 +50,15 @@ async def test_halfvec(self): embedding = HalfVector([1.5, 2, 3]) embedding2 = [4.5, 5, 6] embedding3 = None - await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2), ($3)", embedding, embedding2, embedding3) + await conn.execute('INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2), ($3)', embedding, embedding2, embedding3) - res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id") + res = await conn.fetch('SELECT * FROM asyncpg_items ORDER BY id') assert res[0]['embedding'] == embedding assert res[1]['embedding'] == HalfVector(embedding2) assert res[2]['embedding'] is None # ensures binary format is correct - text_res = await conn.fetch("SELECT embedding::text FROM asyncpg_items ORDER BY id LIMIT 1") + text_res = await conn.fetch('SELECT embedding::text FROM asyncpg_items ORDER BY id LIMIT 1') assert text_res[0]['embedding'] == '[1.5,2,3]' await conn.close() @@ -71,15 +71,15 @@ async def test_bit(self): embedding = asyncpg.BitString('101') # type: ignore embedding2 = None - await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2)", embedding, embedding2) + await conn.execute('INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2)', embedding, embedding2) - res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id") + res = await conn.fetch('SELECT * FROM asyncpg_items ORDER BY id') assert res[0]['embedding'].as_string() == '101' assert res[0]['embedding'].to_int() == 5 assert res[1]['embedding'] is None # ensures binary format is correct - text_res = await conn.fetch("SELECT embedding::text FROM asyncpg_items ORDER BY id LIMIT 1") + text_res = await conn.fetch('SELECT embedding::text FROM asyncpg_items ORDER BY id LIMIT 1') assert text_res[0]['embedding'] == '101' await conn.close() @@ -92,14 +92,14 @@ async def test_sparsevec(self): embedding = SparseVector([1.5, 2, 3]) embedding2 = None - await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2)", embedding, embedding2) + await conn.execute('INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2)', embedding, embedding2) - res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id") + res = await conn.fetch('SELECT * FROM asyncpg_items ORDER BY id') assert res[0]['embedding'] == embedding assert res[1]['embedding'] is None # ensures binary format is correct - text_res = await conn.fetch("SELECT embedding::text FROM asyncpg_items ORDER BY id LIMIT 1") + text_res = await conn.fetch('SELECT embedding::text FROM asyncpg_items ORDER BY id LIMIT 1') assert text_res[0]['embedding'] == '{1:1.5,2:2,3:3}/3' await conn.close() @@ -111,16 +111,16 @@ async def test_vector_array(self): await conn.execute('CREATE TABLE asyncpg_items (id bigserial PRIMARY KEY, embeddings vector[])') embeddings = [Vector([1.5, 2, 3]), Vector([4.5, 5, 6])] - await conn.execute("INSERT INTO asyncpg_items (embeddings) VALUES ($1)", embeddings) + await conn.execute('INSERT INTO asyncpg_items (embeddings) VALUES ($1)', embeddings) embeddings2 = [[1.5, 2, 3], [4.5, 5, 6]] - await conn.execute("INSERT INTO asyncpg_items (embeddings) VALUES (ARRAY[$1, $2]::vector[])", embeddings2[0], embeddings2[1]) + await conn.execute('INSERT INTO asyncpg_items (embeddings) VALUES (ARRAY[$1, $2]::vector[])', embeddings2[0], embeddings2[1]) if NUMPY_AVAILABLE: embeddings3 = [np.array([1.5, 2, 3]), np.array([4.5, 5, 6])] - await conn.execute("INSERT INTO asyncpg_items (embeddings) VALUES (ARRAY[$1, $2]::vector[])", embeddings3[0], embeddings3[1]) + await conn.execute('INSERT INTO asyncpg_items (embeddings) VALUES (ARRAY[$1, $2]::vector[])', embeddings3[0], embeddings3[1]) - res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id") + res = await conn.fetch('SELECT * FROM asyncpg_items ORDER BY id') assert res[0]['embeddings'] == embeddings assert res[1]['embeddings'] == [Vector(e) for e in embeddings2] if NUMPY_AVAILABLE: @@ -143,9 +143,9 @@ async def init(conn): embedding = Vector([1.5, 2, 3]) embedding2 = [1.5, 2, 3] embedding3 = None - await conn.execute("INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2), ($3)", embedding, embedding2, embedding3) + await conn.execute('INSERT INTO asyncpg_items (embedding) VALUES ($1), ($2), ($3)', embedding, embedding2, embedding3) - res = await conn.fetch("SELECT * FROM asyncpg_items ORDER BY id") + res = await conn.fetch('SELECT * FROM asyncpg_items ORDER BY id') assert res[0]['embedding'] == embedding assert res[1]['embedding'] == Vector(embedding2) assert res[2]['embedding'] is None diff --git a/tests/test_django.py b/tests/test_django.py index c64d030..6bcf67a 100644 --- a/tests/test_django.py +++ b/tests/test_django.py @@ -131,7 +131,7 @@ class Migration(migrations.Migration): sql_statements = loader.collect_sql([(migration, False)]) with connection.cursor() as cursor: - cursor.execute("DROP TABLE IF EXISTS django_app_item") + cursor.execute('DROP TABLE IF EXISTS django_app_item') cursor.execute('\n'.join(sql_statements)) diff --git a/tests/test_psycopg.py b/tests/test_psycopg.py index 80ab538..13dc7c8 100644 --- a/tests/test_psycopg.py +++ b/tests/test_psycopg.py @@ -128,19 +128,19 @@ def test_sparsevec_text_format(self): def test_text_copy_from(self): embedding = [1.5, 2, 3] cur = conn.cursor() - with cur.copy("COPY psycopg_items (embedding, half_embedding, binary_embedding, sparse_embedding) FROM STDIN") as copy: + with cur.copy('COPY psycopg_items (embedding, half_embedding, binary_embedding, sparse_embedding) FROM STDIN') as copy: copy.write_row([Vector(embedding), HalfVector(embedding), '101', SparseVector(embedding)]) def test_binary_copy_from(self): embedding = [1.5, 2, 3] cur = conn.cursor() - with cur.copy("COPY psycopg_items (embedding, half_embedding, binary_embedding, sparse_embedding) FROM STDIN WITH (FORMAT BINARY)") as copy: + with cur.copy('COPY psycopg_items (embedding, half_embedding, binary_embedding, sparse_embedding) FROM STDIN WITH (FORMAT BINARY)') as copy: copy.write_row([Vector(embedding), HalfVector(embedding), Bit('101'), SparseVector(embedding)]) def test_binary_copy_from_set_types(self): embedding = [1.5, 2, 3] cur = conn.cursor() - with cur.copy("COPY psycopg_items (id, embedding, half_embedding, binary_embedding, sparse_embedding) FROM STDIN WITH (FORMAT BINARY)") as copy: + with cur.copy('COPY psycopg_items (id, embedding, half_embedding, binary_embedding, sparse_embedding) FROM STDIN WITH (FORMAT BINARY)') as copy: copy.set_types(['int8', 'vector', 'halfvec', 'bit', 'sparsevec']) copy.write_row([1, Vector(embedding), HalfVector(embedding), Bit('101'), SparseVector(embedding)]) @@ -149,17 +149,17 @@ def test_text_copy_to(self): half_embedding = HalfVector([1.5, 2, 3]) conn.execute('INSERT INTO psycopg_items (embedding, half_embedding) VALUES (%s, %s)', (embedding, half_embedding)) cur = conn.cursor() - with cur.copy("COPY psycopg_items (embedding, half_embedding) TO STDOUT") as copy: + with cur.copy('COPY psycopg_items (embedding, half_embedding) TO STDOUT') as copy: for row in copy.rows(): - assert row[0] == "[1.5,2,3]" - assert row[1] == "[1.5,2,3]" + assert row[0] == '[1.5,2,3]' + assert row[1] == '[1.5,2,3]' def test_binary_copy_to(self): embedding = Vector([1.5, 2, 3]) half_embedding = HalfVector([1.5, 2, 3]) conn.execute('INSERT INTO psycopg_items (embedding, half_embedding) VALUES (%s, %s)', (embedding, half_embedding)) cur = conn.cursor() - with cur.copy("COPY psycopg_items (embedding, half_embedding) TO STDOUT WITH (FORMAT BINARY)") as copy: + with cur.copy('COPY psycopg_items (embedding, half_embedding) TO STDOUT WITH (FORMAT BINARY)') as copy: for row in copy.rows(): assert Vector.from_binary(row[0]) == embedding assert HalfVector.from_binary(row[1]) == half_embedding @@ -169,7 +169,7 @@ def test_binary_copy_to_set_types(self): half_embedding = HalfVector([1.5, 2, 3]) conn.execute('INSERT INTO psycopg_items (embedding, half_embedding) VALUES (%s, %s)', (embedding, half_embedding)) cur = conn.cursor() - with cur.copy("COPY psycopg_items (embedding, half_embedding) TO STDOUT WITH (FORMAT BINARY)") as copy: + with cur.copy('COPY psycopg_items (embedding, half_embedding) TO STDOUT WITH (FORMAT BINARY)') as copy: copy.set_types(['vector', 'halfvec']) for row in copy.rows(): assert row[0] == embedding diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index 35b8be2..3d2c1d3 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -20,7 +20,7 @@ psycopg2_type_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test') -@event.listens_for(psycopg2_type_engine, "connect") +@event.listens_for(psycopg2_type_engine, 'connect') def psycopg2_connect(dbapi_connection, connection_record): from pgvector.psycopg2 import register_vector register_vector(dbapi_connection) @@ -32,7 +32,7 @@ def psycopg2_connect(dbapi_connection, connection_record): psycopg_type_engine = create_engine('postgresql+psycopg://localhost/pgvector_python_test') -@event.listens_for(psycopg_type_engine, "connect") +@event.listens_for(psycopg_type_engine, 'connect') def psycopg_connect(dbapi_connection, connection_record): from pgvector.psycopg import register_vector register_vector(dbapi_connection) @@ -42,7 +42,7 @@ def psycopg_connect(dbapi_connection, connection_record): psycopg_async_type_engine = create_async_engine('postgresql+psycopg://localhost/pgvector_python_test') -@event.listens_for(psycopg_async_type_engine.sync_engine, "connect") +@event.listens_for(psycopg_async_type_engine.sync_engine, 'connect') def psycopg_async_connect(dbapi_connection, connection_record): from pgvector.psycopg import register_vector_async dbapi_connection.run_async(register_vector_async) @@ -52,7 +52,7 @@ def psycopg_async_connect(dbapi_connection, connection_record): asyncpg_type_engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test') -@event.listens_for(asyncpg_type_engine.sync_engine, "connect") +@event.listens_for(asyncpg_type_engine.sync_engine, 'connect') def asyncpg_connect(dbapi_connection, connection_record): from pgvector.asyncpg import register_vector dbapi_connection.run_async(register_vector) From 6cfc910fd60fa90dfcee995fc86a535e7cc2aca1 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 23:45:23 -0700 Subject: [PATCH 110/121] Updated style [skip ci] --- tests/test_asyncpg.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_asyncpg.py b/tests/test_asyncpg.py index 2b96de0..3df3d39 100644 --- a/tests/test_asyncpg.py +++ b/tests/test_asyncpg.py @@ -19,7 +19,7 @@ async def setup_connection(self): @pytest.mark.asyncio async def test_vector(self): - conn = await self.setup_connection(); + conn = await self.setup_connection() await conn.execute('DROP TABLE IF EXISTS asyncpg_items') await conn.execute('CREATE TABLE asyncpg_items (id bigserial PRIMARY KEY, embedding vector(3))') @@ -43,7 +43,7 @@ async def test_vector(self): @pytest.mark.asyncio async def test_halfvec(self): - conn = await self.setup_connection(); + conn = await self.setup_connection() await conn.execute('DROP TABLE IF EXISTS asyncpg_items') await conn.execute('CREATE TABLE asyncpg_items (id bigserial PRIMARY KEY, embedding halfvec(3))') @@ -65,7 +65,7 @@ async def test_halfvec(self): @pytest.mark.asyncio async def test_bit(self): - conn = await self.setup_connection(); + conn = await self.setup_connection() await conn.execute('DROP TABLE IF EXISTS asyncpg_items') await conn.execute('CREATE TABLE asyncpg_items (id bigserial PRIMARY KEY, embedding bit(3))') @@ -86,7 +86,7 @@ async def test_bit(self): @pytest.mark.asyncio async def test_sparsevec(self): - conn = await self.setup_connection(); + conn = await self.setup_connection() await conn.execute('DROP TABLE IF EXISTS asyncpg_items') await conn.execute('CREATE TABLE asyncpg_items (id bigserial PRIMARY KEY, embedding sparsevec(3))') @@ -106,7 +106,7 @@ async def test_sparsevec(self): @pytest.mark.asyncio async def test_vector_array(self): - conn = await self.setup_connection(); + conn = await self.setup_connection() await conn.execute('DROP TABLE IF EXISTS asyncpg_items') await conn.execute('CREATE TABLE asyncpg_items (id bigserial PRIMARY KEY, embeddings vector[])') From 965ca1774271263ccdc586417196b6d49d9cb4ae Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 23:45:37 -0700 Subject: [PATCH 111/121] Updated style [skip ci] --- pgvector/psycopg/bit.py | 2 -- pgvector/psycopg/halfvec.py | 4 ---- pgvector/psycopg/sparsevec.py | 4 ---- pgvector/psycopg/vector.py | 4 ---- 4 files changed, 14 deletions(-) diff --git a/pgvector/psycopg/bit.py b/pgvector/psycopg/bit.py index a3f3512..f0f4021 100644 --- a/pgvector/psycopg/bit.py +++ b/pgvector/psycopg/bit.py @@ -9,7 +9,6 @@ class BitDumper(Dumper): - format = Format.TEXT def dump(self, obj: Bit) -> Buffer | None: @@ -17,7 +16,6 @@ def dump(self, obj: Bit) -> Buffer | None: class BitBinaryDumper(BitDumper): - format = Format.BINARY def dump(self, obj: Bit) -> Buffer | None: diff --git a/pgvector/psycopg/halfvec.py b/pgvector/psycopg/halfvec.py index 5ded56d..6eaf501 100644 --- a/pgvector/psycopg/halfvec.py +++ b/pgvector/psycopg/halfvec.py @@ -9,7 +9,6 @@ class HalfVectorDumper(Dumper): - format = Format.TEXT def dump(self, obj: HalfVector) -> Buffer | None: @@ -18,7 +17,6 @@ def dump(self, obj: HalfVector) -> Buffer | None: class HalfVectorBinaryDumper(HalfVectorDumper): - format = Format.BINARY def dump(self, obj: HalfVector) -> Buffer | None: @@ -26,7 +24,6 @@ def dump(self, obj: HalfVector) -> Buffer | None: class HalfVectorLoader(Loader): - format = Format.TEXT def load(self, data: Buffer) -> HalfVector | None: @@ -36,7 +33,6 @@ def load(self, data: Buffer) -> HalfVector | None: class HalfVectorBinaryLoader(HalfVectorLoader): - format = Format.BINARY def load(self, data: Buffer) -> HalfVector | None: diff --git a/pgvector/psycopg/sparsevec.py b/pgvector/psycopg/sparsevec.py index b209b8f..16eaf5a 100644 --- a/pgvector/psycopg/sparsevec.py +++ b/pgvector/psycopg/sparsevec.py @@ -9,7 +9,6 @@ class SparseVectorDumper(Dumper): - format = Format.TEXT def dump(self, obj: SparseVector) -> Buffer | None: @@ -18,7 +17,6 @@ def dump(self, obj: SparseVector) -> Buffer | None: class SparseVectorBinaryDumper(SparseVectorDumper): - format = Format.BINARY def dump(self, obj: SparseVector) -> Buffer | None: @@ -26,7 +24,6 @@ def dump(self, obj: SparseVector) -> Buffer | None: class SparseVectorLoader(Loader): - format = Format.TEXT def load(self, data: Buffer) -> SparseVector | None: @@ -36,7 +33,6 @@ def load(self, data: Buffer) -> SparseVector | None: class SparseVectorBinaryLoader(SparseVectorLoader): - format = Format.BINARY def load(self, data: Buffer) -> SparseVector | None: diff --git a/pgvector/psycopg/vector.py b/pgvector/psycopg/vector.py index cf1694c..f04cabd 100644 --- a/pgvector/psycopg/vector.py +++ b/pgvector/psycopg/vector.py @@ -10,7 +10,6 @@ class VectorDumper(Dumper): - format = Format.TEXT def dump(self, obj: Vector) -> Buffer | None: @@ -19,7 +18,6 @@ def dump(self, obj: Vector) -> Buffer | None: class VectorBinaryDumper(VectorDumper): - format = Format.BINARY def dump(self, obj: Vector) -> Buffer | None: @@ -27,7 +25,6 @@ def dump(self, obj: Vector) -> Buffer | None: class VectorLoader(Loader): - format = Format.TEXT def load(self, data: Buffer) -> Vector | None: @@ -37,7 +34,6 @@ def load(self, data: Buffer) -> Vector | None: class VectorBinaryLoader(VectorLoader): - format = Format.BINARY def load(self, data: Buffer) -> Vector | None: From 203dad334abde1f770ad411455a994da00ed84ac Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 2 Jun 2026 23:53:51 -0700 Subject: [PATCH 112/121] Improved type hints [skip ci] --- pgvector/psycopg/vector.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pgvector/psycopg/vector.py b/pgvector/psycopg/vector.py index f04cabd..7daf273 100644 --- a/pgvector/psycopg/vector.py +++ b/pgvector/psycopg/vector.py @@ -3,16 +3,19 @@ from psycopg.adapt import Loader, Dumper from psycopg.pq import Format from psycopg.types import TypeInfo -from typing import Any, TypeAlias +from typing import Any, TypeAlias, TYPE_CHECKING from .. import Vector Buffer: TypeAlias = bytes | bytearray | memoryview +if TYPE_CHECKING: + import numpy as np + class VectorDumper(Dumper): format = Format.TEXT - def dump(self, obj: Vector) -> Buffer | None: + def dump(self, obj: Vector | np.ndarray) -> Buffer | None: value = Vector._to_db(obj) return value if value is None else value.encode('utf8') @@ -20,7 +23,7 @@ def dump(self, obj: Vector) -> Buffer | None: class VectorBinaryDumper(VectorDumper): format = Format.BINARY - def dump(self, obj: Vector) -> Buffer | None: + def dump(self, obj: Vector | np.ndarray) -> Buffer | None: return Vector._to_db_binary(obj) From 07ccccefbb747e1280b052f2dc18bce6b922e206 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 9 Jun 2026 16:52:39 -0700 Subject: [PATCH 113/121] Removed dimensions check from SQLAlchemy for consistency with built-in types --- pgvector/halfvec.py | 5 +---- pgvector/sparsevec.py | 5 +---- pgvector/sqlalchemy/halfvec.py | 4 ++-- pgvector/sqlalchemy/sparsevec.py | 4 ++-- pgvector/sqlalchemy/vector.py | 4 ++-- pgvector/vector.py | 5 +---- 6 files changed, 9 insertions(+), 18 deletions(-) diff --git a/pgvector/halfvec.py b/pgvector/halfvec.py index 4728065..a9610d5 100644 --- a/pgvector/halfvec.py +++ b/pgvector/halfvec.py @@ -71,16 +71,13 @@ def from_binary(cls, value: bytes) -> HalfVector: return vec @classmethod - def _to_db(cls, value: object, dim: int | None = None) -> str | None: + def _to_db(cls, value: object) -> str | None: if value is None: return value if not isinstance(value, cls): value = cls(value) # ty: ignore[invalid-argument-type] - if dim is not None and value.dimensions() != dim: - raise ValueError('expected %d dimensions, not %d' % (dim, value.dimensions())) - return value.to_text() @classmethod diff --git a/pgvector/sparsevec.py b/pgvector/sparsevec.py index 09b796c..0d76563 100644 --- a/pgvector/sparsevec.py +++ b/pgvector/sparsevec.py @@ -149,16 +149,13 @@ def _from_parts(cls, dim: int, indices: list[int], values: list[float]) -> Spars return vec @classmethod - def _to_db(cls, value: object, dim: int | None = None) -> str | None: + def _to_db(cls, value: object) -> str | None: if value is None: return value if not isinstance(value, cls): value = cls(value) - if dim is not None and value.dimensions() != dim: - raise ValueError('expected %d dimensions, not %d' % (dim, value.dimensions())) - return value.to_text() @classmethod diff --git a/pgvector/sqlalchemy/halfvec.py b/pgvector/sqlalchemy/halfvec.py index af13317..49d5df0 100644 --- a/pgvector/sqlalchemy/halfvec.py +++ b/pgvector/sqlalchemy/halfvec.py @@ -20,14 +20,14 @@ def get_col_spec(self, **kw: Any) -> str: def bind_processor(self, dialect: Dialect) -> Any: def process(value: Any) -> str | None: - return HalfVector._to_db(value, self.dim) + return HalfVector._to_db(value) return process def literal_processor(self, dialect: Dialect) -> Any: string_literal_processor = self._string._cached_literal_processor(dialect) def process(value: Any) -> Any: - return string_literal_processor(HalfVector._to_db(value, self.dim)) # type: ignore + return string_literal_processor(HalfVector._to_db(value)) # type: ignore return process def result_processor(self, dialect: Dialect, coltype: Any) -> Any: diff --git a/pgvector/sqlalchemy/sparsevec.py b/pgvector/sqlalchemy/sparsevec.py index 235a779..2dfea01 100644 --- a/pgvector/sqlalchemy/sparsevec.py +++ b/pgvector/sqlalchemy/sparsevec.py @@ -20,14 +20,14 @@ def get_col_spec(self, **kw: Any) -> str: def bind_processor(self, dialect: Dialect) -> Any: def process(value: Any) -> str | None: - return SparseVector._to_db(value, self.dim) + return SparseVector._to_db(value) return process def literal_processor(self, dialect: Dialect) -> Any: string_literal_processor = self._string._cached_literal_processor(dialect) def process(value: Any) -> Any: - return string_literal_processor(SparseVector._to_db(value, self.dim)) # type: ignore + return string_literal_processor(SparseVector._to_db(value)) # type: ignore return process def result_processor(self, dialect: Dialect, coltype: Any) -> Any: diff --git a/pgvector/sqlalchemy/vector.py b/pgvector/sqlalchemy/vector.py index 14d66f3..20ad30f 100644 --- a/pgvector/sqlalchemy/vector.py +++ b/pgvector/sqlalchemy/vector.py @@ -20,14 +20,14 @@ def get_col_spec(self, **kw: Any) -> str: def bind_processor(self, dialect: Dialect) -> Any: def process(value: Any) -> str | None: - return Vector._to_db(value, self.dim) + return Vector._to_db(value) return process def literal_processor(self, dialect: Dialect) -> Any: string_literal_processor = self._string._cached_literal_processor(dialect) def process(value: Any) -> Any: - return string_literal_processor(Vector._to_db(value, self.dim)) # type: ignore + return string_literal_processor(Vector._to_db(value)) # type: ignore return process def result_processor(self, dialect: Dialect, coltype: Any) -> Any: diff --git a/pgvector/vector.py b/pgvector/vector.py index 257ad8f..b48f223 100644 --- a/pgvector/vector.py +++ b/pgvector/vector.py @@ -71,16 +71,13 @@ def from_binary(cls, value: bytes) -> Vector: return vec @classmethod - def _to_db(cls, value: object, dim: int | None = None) -> str | None: + def _to_db(cls, value: object) -> str | None: if value is None: return value if not isinstance(value, cls): value = cls(value) # ty: ignore[invalid-argument-type] - if dim is not None and value.dimensions() != dim: - raise ValueError('expected %d dimensions, not %d' % (dim, value.dimensions())) - return value.to_text() @classmethod From 135cd53d057b9b5081476d170dded801f1fbda7c Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 9 Jun 2026 17:16:07 -0700 Subject: [PATCH 114/121] Simplified code --- pgvector/asyncpg/register.py | 6 +++--- pgvector/django/halfvec.py | 2 +- pgvector/django/vector.py | 2 +- pgvector/halfvec.py | 7 ------- pgvector/pg8000/register.py | 6 +++--- pgvector/psycopg/bit.py | 4 ++-- pgvector/psycopg/halfvec.py | 9 ++++----- pgvector/psycopg/sparsevec.py | 9 ++++----- pgvector/psycopg/vector.py | 13 ++++++++----- pgvector/sparsevec.py | 7 ------- pgvector/vector.py | 7 ------- 11 files changed, 26 insertions(+), 46 deletions(-) diff --git a/pgvector/asyncpg/register.py b/pgvector/asyncpg/register.py index 6718850..67983da 100644 --- a/pgvector/asyncpg/register.py +++ b/pgvector/asyncpg/register.py @@ -7,7 +7,7 @@ async def register_vector(conn: Connection, schema: str = 'public') -> None: 'vector', schema=schema, encoder=Vector._to_db_binary, - decoder=Vector._from_db_binary, + decoder=Vector.from_binary, format='binary' ) @@ -16,7 +16,7 @@ async def register_vector(conn: Connection, schema: str = 'public') -> None: 'halfvec', schema=schema, encoder=HalfVector._to_db_binary, - decoder=HalfVector._from_db_binary, + decoder=HalfVector.from_binary, format='binary' ) @@ -24,7 +24,7 @@ async def register_vector(conn: Connection, schema: str = 'public') -> None: 'sparsevec', schema=schema, encoder=SparseVector._to_db_binary, - decoder=SparseVector._from_db_binary, + decoder=SparseVector.from_binary, format='binary' ) except ValueError as e: diff --git a/pgvector/django/halfvec.py b/pgvector/django/halfvec.py index a2ec481..a3901b4 100644 --- a/pgvector/django/halfvec.py +++ b/pgvector/django/halfvec.py @@ -31,7 +31,7 @@ def to_python(self, value: Any) -> HalfVector | None: if value is None or isinstance(value, HalfVector): return value elif isinstance(value, str): - return HalfVector._from_db(value) + return HalfVector.from_text(value) else: return HalfVector(value) diff --git a/pgvector/django/vector.py b/pgvector/django/vector.py index 87d8989..3aafc11 100644 --- a/pgvector/django/vector.py +++ b/pgvector/django/vector.py @@ -31,7 +31,7 @@ def to_python(self, value: Any) -> Vector | None: if value is None or isinstance(value, Vector): return value elif isinstance(value, str): - return Vector._from_db(value) + return Vector.from_text(value) else: return Vector(value) diff --git a/pgvector/halfvec.py b/pgvector/halfvec.py index a9610d5..860803c 100644 --- a/pgvector/halfvec.py +++ b/pgvector/halfvec.py @@ -96,10 +96,3 @@ def _from_db(cls, value: str | HalfVector | None) -> HalfVector | None: return value return cls.from_text(value) - - @classmethod - def _from_db_binary(cls, value: bytes | HalfVector | None) -> HalfVector | None: - if value is None or isinstance(value, HalfVector): - return value - - return cls.from_binary(value) diff --git a/pgvector/pg8000/register.py b/pgvector/pg8000/register.py index 5d44b7c..6e84b7f 100644 --- a/pgvector/pg8000/register.py +++ b/pgvector/pg8000/register.py @@ -11,7 +11,7 @@ def register_vector(conn: Connection) -> None: raise RuntimeError('vector type not found in the database') conn.register_out_adapter(Vector, Vector._to_db) - conn.register_in_adapter(type_info['vector'], Vector._from_db) + conn.register_in_adapter(type_info['vector'], Vector.from_text) try: import numpy as np @@ -21,8 +21,8 @@ def register_vector(conn: Connection) -> None: if 'halfvec' in type_info: conn.register_out_adapter(HalfVector, HalfVector._to_db) - conn.register_in_adapter(type_info['halfvec'], HalfVector._from_db) + conn.register_in_adapter(type_info['halfvec'], HalfVector.from_text) if 'sparsevec' in type_info: conn.register_out_adapter(SparseVector, SparseVector._to_db) - conn.register_in_adapter(type_info['sparsevec'], SparseVector._from_db) + conn.register_in_adapter(type_info['sparsevec'], SparseVector.from_text) diff --git a/pgvector/psycopg/bit.py b/pgvector/psycopg/bit.py index f0f4021..a6ac6b2 100644 --- a/pgvector/psycopg/bit.py +++ b/pgvector/psycopg/bit.py @@ -12,14 +12,14 @@ class BitDumper(Dumper): format = Format.TEXT def dump(self, obj: Bit) -> Buffer | None: - return Bit._to_db(obj).encode('utf8') + return obj.to_text().encode('utf8') class BitBinaryDumper(BitDumper): format = Format.BINARY def dump(self, obj: Bit) -> Buffer | None: - return Bit._to_db_binary(obj) + return obj.to_binary() def register_bit_info(context: BaseConnection[Any], info: TypeInfo | None) -> None: diff --git a/pgvector/psycopg/halfvec.py b/pgvector/psycopg/halfvec.py index 6eaf501..8c9a8c8 100644 --- a/pgvector/psycopg/halfvec.py +++ b/pgvector/psycopg/halfvec.py @@ -12,15 +12,14 @@ class HalfVectorDumper(Dumper): format = Format.TEXT def dump(self, obj: HalfVector) -> Buffer | None: - value = HalfVector._to_db(obj) - return value if value is None else value.encode('utf8') + return obj.to_text().encode('utf8') class HalfVectorBinaryDumper(HalfVectorDumper): format = Format.BINARY def dump(self, obj: HalfVector) -> Buffer | None: - return HalfVector._to_db_binary(obj) + return obj.to_binary() class HalfVectorLoader(Loader): @@ -29,7 +28,7 @@ class HalfVectorLoader(Loader): def load(self, data: Buffer) -> HalfVector | None: if isinstance(data, memoryview): data = bytes(data) - return HalfVector._from_db(data.decode('utf8')) + return HalfVector.from_text(data.decode('utf8')) class HalfVectorBinaryLoader(HalfVectorLoader): @@ -38,7 +37,7 @@ class HalfVectorBinaryLoader(HalfVectorLoader): def load(self, data: Buffer) -> HalfVector | None: if isinstance(data, (bytearray, memoryview)): data = bytes(data) - return HalfVector._from_db_binary(data) + return HalfVector.from_binary(data) def register_halfvec_info(context: BaseConnection[Any], info: TypeInfo) -> None: diff --git a/pgvector/psycopg/sparsevec.py b/pgvector/psycopg/sparsevec.py index 16eaf5a..2a98e6b 100644 --- a/pgvector/psycopg/sparsevec.py +++ b/pgvector/psycopg/sparsevec.py @@ -12,15 +12,14 @@ class SparseVectorDumper(Dumper): format = Format.TEXT def dump(self, obj: SparseVector) -> Buffer | None: - value = SparseVector._to_db(obj) - return value if value is None else value.encode('utf8') + return obj.to_text().encode('utf8') class SparseVectorBinaryDumper(SparseVectorDumper): format = Format.BINARY def dump(self, obj: SparseVector) -> Buffer | None: - return SparseVector._to_db_binary(obj) + return obj.to_binary() class SparseVectorLoader(Loader): @@ -29,7 +28,7 @@ class SparseVectorLoader(Loader): def load(self, data: Buffer) -> SparseVector | None: if isinstance(data, memoryview): data = bytes(data) - return SparseVector._from_db(data.decode('utf8')) + return SparseVector.from_text(data.decode('utf8')) class SparseVectorBinaryLoader(SparseVectorLoader): @@ -38,7 +37,7 @@ class SparseVectorBinaryLoader(SparseVectorLoader): def load(self, data: Buffer) -> SparseVector | None: if isinstance(data, (bytearray, memoryview)): data = bytes(data) - return SparseVector._from_db_binary(data) + return SparseVector.from_binary(data) def register_sparsevec_info(context: BaseConnection[Any], info: TypeInfo) -> None: diff --git a/pgvector/psycopg/vector.py b/pgvector/psycopg/vector.py index 7daf273..a858ab6 100644 --- a/pgvector/psycopg/vector.py +++ b/pgvector/psycopg/vector.py @@ -16,15 +16,18 @@ class VectorDumper(Dumper): format = Format.TEXT def dump(self, obj: Vector | np.ndarray) -> Buffer | None: - value = Vector._to_db(obj) - return value if value is None else value.encode('utf8') + if not isinstance(obj, Vector): + obj = Vector(obj) + return obj.to_text().encode('utf8') class VectorBinaryDumper(VectorDumper): format = Format.BINARY def dump(self, obj: Vector | np.ndarray) -> Buffer | None: - return Vector._to_db_binary(obj) + if not isinstance(obj, Vector): + obj = Vector(obj) + return obj.to_binary() class VectorLoader(Loader): @@ -33,7 +36,7 @@ class VectorLoader(Loader): def load(self, data: Buffer) -> Vector | None: if isinstance(data, memoryview): data = bytes(data) - return Vector._from_db(data.decode('utf8')) + return Vector.from_text(data.decode('utf8')) class VectorBinaryLoader(VectorLoader): @@ -42,7 +45,7 @@ class VectorBinaryLoader(VectorLoader): def load(self, data: Buffer) -> Vector | None: if isinstance(data, (bytearray, memoryview)): data = bytes(data) - return Vector._from_db_binary(data) + return Vector.from_binary(data) def register_vector_info(context: BaseConnection[Any], info: TypeInfo | None) -> None: diff --git a/pgvector/sparsevec.py b/pgvector/sparsevec.py index 0d76563..3fd9b53 100644 --- a/pgvector/sparsevec.py +++ b/pgvector/sparsevec.py @@ -174,10 +174,3 @@ def _from_db(cls, value: str | SparseVector | None) -> SparseVector | None: return value return cls.from_text(value) - - @classmethod - def _from_db_binary(cls, value: bytes | SparseVector | None) -> SparseVector | None: - if value is None or isinstance(value, SparseVector): - return value - - return cls.from_binary(value) diff --git a/pgvector/vector.py b/pgvector/vector.py index b48f223..c682fe7 100644 --- a/pgvector/vector.py +++ b/pgvector/vector.py @@ -96,10 +96,3 @@ def _from_db(cls, value: str | Vector | None) -> Vector | None: return value return cls.from_text(value) - - @classmethod - def _from_db_binary(cls, value: bytes | Vector | None) -> Vector | None: - if value is None or isinstance(value, Vector): - return value - - return cls.from_binary(value) From 15f1ac7c688a0ed1ab157284b08f04368b692c9e Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 9 Jun 2026 17:22:15 -0700 Subject: [PATCH 115/121] Simplified Psycopg 2 code --- pgvector/psycopg2/halfvec.py | 4 +++- pgvector/psycopg2/sparsevec.py | 4 +++- pgvector/psycopg2/vector.py | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/pgvector/psycopg2/halfvec.py b/pgvector/psycopg2/halfvec.py index 1313938..cf26576 100644 --- a/pgvector/psycopg2/halfvec.py +++ b/pgvector/psycopg2/halfvec.py @@ -11,7 +11,9 @@ def getquoted(self) -> bytes: def cast_halfvec(value: str | None, cur: cursor) -> HalfVector | None: - return HalfVector._from_db(value) + if value is None: + return None + return HalfVector.from_text(value) def register_halfvec_info(oid: int, array_oid: int | None, scope: connection | cursor | None) -> None: diff --git a/pgvector/psycopg2/sparsevec.py b/pgvector/psycopg2/sparsevec.py index 34a4810..9e8c819 100644 --- a/pgvector/psycopg2/sparsevec.py +++ b/pgvector/psycopg2/sparsevec.py @@ -11,7 +11,9 @@ def getquoted(self) -> bytes: def cast_sparsevec(value: str | None, cur: cursor) -> SparseVector | None: - return SparseVector._from_db(value) + if value is None: + return None + return SparseVector.from_text(value) def register_sparsevec_info(oid: int, array_oid: int | None, scope: connection | cursor | None) -> None: diff --git a/pgvector/psycopg2/vector.py b/pgvector/psycopg2/vector.py index f8832a6..7a3e3c4 100644 --- a/pgvector/psycopg2/vector.py +++ b/pgvector/psycopg2/vector.py @@ -11,7 +11,9 @@ def getquoted(self) -> bytes: def cast_vector(value: str | None, cur: cursor) -> Vector | None: - return Vector._from_db(value) + if value is None: + return None + return Vector.from_text(value) def register_vector_info(oid: int, array_oid: int | None, scope: connection | cursor | None) -> None: From 1868e6d86c649a84c392eaf400a741b1c63a8375 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 9 Jun 2026 17:27:44 -0700 Subject: [PATCH 116/121] Simplified code --- pgvector/psycopg2/halfvec.py | 4 ++-- pgvector/psycopg2/sparsevec.py | 4 ++-- pgvector/psycopg2/vector.py | 10 ++++++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/pgvector/psycopg2/halfvec.py b/pgvector/psycopg2/halfvec.py index cf26576..8c95f7e 100644 --- a/pgvector/psycopg2/halfvec.py +++ b/pgvector/psycopg2/halfvec.py @@ -3,11 +3,11 @@ class HalfvecAdapter: - def __init__(self, value: object) -> None: + def __init__(self, value: HalfVector) -> None: self._value = value def getquoted(self) -> bytes: - return adapt(HalfVector._to_db(self._value)).getquoted() + return adapt(self._value.to_text()).getquoted() def cast_halfvec(value: str | None, cur: cursor) -> HalfVector | None: diff --git a/pgvector/psycopg2/sparsevec.py b/pgvector/psycopg2/sparsevec.py index 9e8c819..6fb7f2f 100644 --- a/pgvector/psycopg2/sparsevec.py +++ b/pgvector/psycopg2/sparsevec.py @@ -3,11 +3,11 @@ class SparsevecAdapter: - def __init__(self, value: object) -> None: + def __init__(self, value: SparseVector) -> None: self._value = value def getquoted(self) -> bytes: - return adapt(SparseVector._to_db(self._value)).getquoted() + return adapt(self._value.to_text()).getquoted() def cast_sparsevec(value: str | None, cur: cursor) -> SparseVector | None: diff --git a/pgvector/psycopg2/vector.py b/pgvector/psycopg2/vector.py index 7a3e3c4..b193eb6 100644 --- a/pgvector/psycopg2/vector.py +++ b/pgvector/psycopg2/vector.py @@ -1,13 +1,19 @@ from psycopg2.extensions import adapt, connection, cursor, new_array_type, new_type, register_adapter, register_type +from typing import TYPE_CHECKING from .. import Vector +if TYPE_CHECKING: + import numpy as np + class VectorAdapter: - def __init__(self, value: object) -> None: + def __init__(self, value: Vector | np.ndarray) -> None: + if not isinstance(value, Vector): + value = Vector(value) self._value = value def getquoted(self) -> bytes: - return adapt(Vector._to_db(self._value)).getquoted() + return adapt(self._value.to_text()).getquoted() def cast_vector(value: str | None, cur: cursor) -> Vector | None: From 12ffb43dbf87253a9341eef1eff640615dc091cd Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 9 Jun 2026 18:05:50 -0700 Subject: [PATCH 117/121] Fixed CI --- pgvector/psycopg/vector.py | 13 +++++++++---- pgvector/psycopg2/vector.py | 11 +++++------ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/pgvector/psycopg/vector.py b/pgvector/psycopg/vector.py index a858ab6..b9608b3 100644 --- a/pgvector/psycopg/vector.py +++ b/pgvector/psycopg/vector.py @@ -3,13 +3,16 @@ from psycopg.adapt import Loader, Dumper from psycopg.pq import Format from psycopg.types import TypeInfo -from typing import Any, TypeAlias, TYPE_CHECKING +from typing import Any, TypeAlias from .. import Vector Buffer: TypeAlias = bytes | bytearray | memoryview -if TYPE_CHECKING: +try: import numpy as np + NUMPY_AVAILABLE = True +except ImportError: + NUMPY_AVAILABLE = False class VectorDumper(Dumper): @@ -58,9 +61,11 @@ def register_vector_info(context: BaseConnection[Any], info: TypeInfo | None) -> binary_dumper = type('', (VectorBinaryDumper,), {'oid': info.oid}) adapters = context.adapters - adapters.register_dumper('numpy.ndarray', text_dumper) - adapters.register_dumper('numpy.ndarray', binary_dumper) adapters.register_dumper(Vector, text_dumper) adapters.register_dumper(Vector, binary_dumper) adapters.register_loader(info.oid, VectorLoader) adapters.register_loader(info.oid, VectorBinaryLoader) + + if NUMPY_AVAILABLE: + adapters.register_dumper(np.ndarray, text_dumper) + adapters.register_dumper(np.ndarray, binary_dumper) diff --git a/pgvector/psycopg2/vector.py b/pgvector/psycopg2/vector.py index b193eb6..254ffcb 100644 --- a/pgvector/psycopg2/vector.py +++ b/pgvector/psycopg2/vector.py @@ -1,9 +1,11 @@ from psycopg2.extensions import adapt, connection, cursor, new_array_type, new_type, register_adapter, register_type -from typing import TYPE_CHECKING from .. import Vector -if TYPE_CHECKING: +try: import numpy as np + NUMPY_AVAILABLE = True +except ImportError: + NUMPY_AVAILABLE = False class VectorAdapter: @@ -32,8 +34,5 @@ def register_vector_info(oid: int, array_oid: int | None, scope: connection | cu register_adapter(Vector, VectorAdapter) - try: - import numpy as np + if NUMPY_AVAILABLE: register_adapter(np.ndarray, VectorAdapter) - except ImportError: - pass From 075a40703c26e66ccd0b338785e2a36cc38d023f Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 9 Jun 2026 18:13:28 -0700 Subject: [PATCH 118/121] Fixed CI --- pgvector/psycopg/vector.py | 4 ++-- pgvector/psycopg2/vector.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pgvector/psycopg/vector.py b/pgvector/psycopg/vector.py index b9608b3..b8b3611 100644 --- a/pgvector/psycopg/vector.py +++ b/pgvector/psycopg/vector.py @@ -18,7 +18,7 @@ class VectorDumper(Dumper): format = Format.TEXT - def dump(self, obj: Vector | np.ndarray) -> Buffer | None: + def dump(self, obj: 'Vector | np.ndarray') -> Buffer | None: if not isinstance(obj, Vector): obj = Vector(obj) return obj.to_text().encode('utf8') @@ -27,7 +27,7 @@ def dump(self, obj: Vector | np.ndarray) -> Buffer | None: class VectorBinaryDumper(VectorDumper): format = Format.BINARY - def dump(self, obj: Vector | np.ndarray) -> Buffer | None: + def dump(self, obj: 'Vector | np.ndarray') -> Buffer | None: if not isinstance(obj, Vector): obj = Vector(obj) return obj.to_binary() diff --git a/pgvector/psycopg2/vector.py b/pgvector/psycopg2/vector.py index 254ffcb..4571fc5 100644 --- a/pgvector/psycopg2/vector.py +++ b/pgvector/psycopg2/vector.py @@ -9,7 +9,7 @@ class VectorAdapter: - def __init__(self, value: Vector | np.ndarray) -> None: + def __init__(self, value: 'Vector | np.ndarray') -> None: if not isinstance(value, Vector): value = Vector(value) self._value = value From f6358361ff9b7889b1efa9cfcaa0b8560807603f Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 9 Jun 2026 21:07:25 -0700 Subject: [PATCH 119/121] Improved example [skip ci] --- examples/surprise/example.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/surprise/example.py b/examples/surprise/example.py index e413bcf..40760e2 100644 --- a/examples/surprise/example.py +++ b/examples/surprise/example.py @@ -40,10 +40,10 @@ class Item(Base): session.execute(insert(User), users) session.execute(insert(Item), items) -user = session.get(User, 1) +user = session.get_one(User, 1) items = session.scalars(select(Item).order_by(Item.factors.max_inner_product(user.factors)).limit(5)) print('user-based recs:', [item.id for item in items]) -item = session.get(Item, 50) +item = session.get_one(Item, 50) items = session.scalars(select(Item).filter(Item.id != item.id).order_by(Item.factors.cosine_distance(item.factors)).limit(5)) print('item-based recs:', [item.id for item in items]) From cfd141ac419ff0826bad32809e8d5eda2d5b692b Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 9 Jun 2026 21:08:40 -0700 Subject: [PATCH 120/121] Improved example [skip ci] --- examples/surprise/example.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/surprise/example.py b/examples/surprise/example.py index 40760e2..6a40784 100644 --- a/examples/surprise/example.py +++ b/examples/surprise/example.py @@ -1,6 +1,6 @@ from pgvector.sqlalchemy import VECTOR from sqlalchemy import create_engine, insert, select, text, Integer -from sqlalchemy.orm import declarative_base, mapped_column, Session +from sqlalchemy.orm import mapped_column, DeclarativeBase, Session from surprise import Dataset, SVD engine = create_engine('postgresql+psycopg://localhost/pgvector_example') @@ -8,7 +8,9 @@ conn.execute(text('CREATE EXTENSION IF NOT EXISTS vector')) conn.commit() -Base = declarative_base() + +class Base(DeclarativeBase): + pass class User(Base): From 711e021f04932607787aa021c08fe96a6154a5f3 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 9 Jun 2026 23:30:58 -0700 Subject: [PATCH 121/121] Updated examples [skip ci] --- examples/implicit/example.py | 10 ++++++---- examples/lightfm/example.py | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/examples/implicit/example.py b/examples/implicit/example.py index 2cbf7c6..ebb1ff1 100644 --- a/examples/implicit/example.py +++ b/examples/implicit/example.py @@ -2,14 +2,16 @@ from implicit.datasets.movielens import get_movielens from pgvector.sqlalchemy import VECTOR from sqlalchemy import create_engine, insert, select, text, Integer, String -from sqlalchemy.orm import declarative_base, mapped_column, Session +from sqlalchemy.orm import mapped_column, DeclarativeBase, Session engine = create_engine('postgresql+psycopg://localhost/pgvector_example') with engine.connect() as conn: conn.execute(text('CREATE EXTENSION IF NOT EXISTS vector')) conn.commit() -Base = declarative_base() + +class Base(DeclarativeBase): + pass class User(Base): @@ -41,10 +43,10 @@ class Item(Base): session.execute(insert(User), users) session.execute(insert(Item), items) -user = session.get(User, 1) +user = session.get_one(User, 1) items = session.scalars(select(Item).order_by(Item.factors.max_inner_product(user.factors)).limit(5)) print('user-based recs:', [item.title for item in items]) -item = session.scalars(select(Item).filter(Item.title == 'Star Wars (1977)')).first() +item = session.scalars(select(Item).filter(Item.title == 'Star Wars (1977)')).one() items = session.scalars(select(Item).filter(Item.id != item.id).order_by(Item.factors.cosine_distance(item.factors)).limit(5)) print('item-based recs:', [item.title for item in items]) diff --git a/examples/lightfm/example.py b/examples/lightfm/example.py index 65031c4..c869e89 100644 --- a/examples/lightfm/example.py +++ b/examples/lightfm/example.py @@ -2,14 +2,16 @@ from lightfm.datasets import fetch_movielens from pgvector.sqlalchemy import VECTOR from sqlalchemy import create_engine, insert, select, text, Float, Integer, String -from sqlalchemy.orm import declarative_base, mapped_column, Session +from sqlalchemy.orm import mapped_column, DeclarativeBase, Session engine = create_engine('postgresql+psycopg://localhost/pgvector_example') with engine.connect() as conn: conn.execute(text('CREATE EXTENSION IF NOT EXISTS vector')) conn.commit() -Base = declarative_base() + +class Base(DeclarativeBase): + pass class User(Base): @@ -45,12 +47,12 @@ class Item(Base): session.execute(insert(User), users) session.execute(insert(Item), items) -user = session.get(User, 1) +user = session.get_one(User, 1) # subtract item bias for negative inner product items = session.scalars(select(Item).order_by(Item.factors.max_inner_product(user.factors) - Item.bias).limit(5)) print('user-based recs:', [item.title for item in items]) # broken due to https://github.com/lyst/lightfm/issues/682 -item = session.scalars(select(Item).filter(Item.title == 'Star Wars (1977)')).first() +item = session.scalars(select(Item).filter(Item.title == 'Star Wars (1977)')).one() items = session.scalars(select(Item).filter(Item.id != item.id).order_by(Item.factors.cosine_distance(item.factors)).limit(5)) print('item-based recs:', [item.title for item in items])