pgvector support for Python
Supports Django, SQLAlchemy, SQLModel, Psycopg 3, Psycopg 2, asyncpg, and Peewee
Run:
pip install pgvectorAnd follow the instructions for your database library:
Or check out some examples:
- Embeddings with OpenAI
- Sentence embeddings with SentenceTransformers
- Hybrid search with SentenceTransformers
- Image search with PyTorch
- Implicit feedback recommendations with Implicit
- Explicit feedback recommendations with Surprise
- Recommendations with LightFM
Create the extension
from pgvector.django import VectorExtension
class Migration(migrations.Migration):
operations = [
VectorExtension()
]Add a vector field
from pgvector.django import VectorField
class Item(models.Model):
embedding = VectorField(dimensions=3)Insert a vector
item = Item(embedding=[1, 2, 3])
item.save()Get the nearest neighbors to a vector
from pgvector.django import L2Distance
Item.objects.order_by(L2Distance('embedding', [3, 1, 2]))[:5]Also supports MaxInnerProduct and CosineDistance
Get the distance
Item.objects.annotate(distance=L2Distance('embedding', [3, 1, 2]))Get items within a certain distance
Item.objects.alias(distance=L2Distance('embedding', [3, 1, 2])).filter(distance__lt=5)Average vectors
from django.db.models import Avg
Item.objects.aggregate(Avg('embedding'))Also supports Sum
Add an approximate index
from pgvector.django import IvfflatIndex, HnswIndex
class Item(models.Model):
class Meta:
indexes = [
IvfflatIndex(
name='my_index',
fields=['embedding'],
lists=100,
opclasses=['vector_l2_ops']
),
# or
HnswIndex(
name='my_index',
fields=['embedding'],
m=16,
ef_construction=64,
opclasses=['vector_l2_ops']
)
]Use vector_ip_ops for inner product and vector_cosine_ops for cosine distance
Add a vector column
from pgvector.sqlalchemy import Vector
class Item(Base):
embedding = mapped_column(Vector(3))Insert a vector
item = Item(embedding=[1, 2, 3])
session.add(item)
session.commit()Get the nearest neighbors to a vector
session.scalars(select(Item).order_by(Item.embedding.l2_distance([3, 1, 2])).limit(5))Also supports max_inner_product and cosine_distance
Get the distance
session.scalars(select(Item.embedding.l2_distance([3, 1, 2])))Get items within a certain distance
session.scalars(select(Item).filter(Item.embedding.l2_distance([3, 1, 2]) < 5))Add an approximate index
index = Index('my_index', Item.embedding,
postgresql_using='ivfflat',
postgresql_with={'lists': 100},
postgresql_ops={'embedding': 'vector_l2_ops'}
)
# or
index = Index('my_index', Item.embedding,
postgresql_using='hnsw',
postgresql_with={'m': 16, 'ef_construction': 64},
postgresql_ops={'embedding': 'vector_l2_ops'}
)
index.create(engine)Use vector_ip_ops for inner product and vector_cosine_ops for cosine distance
Add a vector column
from pgvector.sqlalchemy import Vector
from sqlalchemy import Column
class Item(SQLModel, table=True):
embedding: List[float] = Field(sa_column=Column(Vector(3)))Insert a vector
item = Item(embedding=[1, 2, 3])
session.add(item)
session.commit()Get the nearest neighbors to a vector
session.exec(select(Item).order_by(Item.embedding.l2_distance([3, 1, 2])).limit(5))Also supports max_inner_product and cosine_distance
Register the vector type with your connection
from pgvector.psycopg import register_vector
register_vector(conn)For async connections, use
from pgvector.psycopg import register_vector_async
await register_vector_async(conn)Insert a vector
embedding = np.array([1, 2, 3])
conn.execute('INSERT INTO item (embedding) VALUES (%s)', (embedding,))Get the nearest neighbors to a vector
conn.execute('SELECT * FROM item ORDER BY embedding <-> %s LIMIT 5', (embedding,)).fetchall()Register the vector type with your connection or cursor
from pgvector.psycopg2 import register_vector
register_vector(conn)Insert a vector
embedding = np.array([1, 2, 3])
cur.execute('INSERT INTO item (embedding) VALUES (%s)', (embedding,))Get the nearest neighbors to a vector
cur.execute('SELECT * FROM item ORDER BY embedding <-> %s LIMIT 5', (embedding,))
cur.fetchall()Register the vector type with your connection
from pgvector.asyncpg import register_vector
await register_vector(conn)or your pool
async def init(conn):
await register_vector(conn)
pool = await asyncpg.create_pool(..., init=init)Insert a vector
embedding = np.array([1, 2, 3])
await conn.execute('INSERT INTO item (embedding) VALUES ($1)', embedding)Get the nearest neighbors to a vector
await conn.fetch('SELECT * FROM item ORDER BY embedding <-> $1 LIMIT 5', embedding)Add a vector column
from pgvector.peewee import VectorField
class Item(BaseModel):
embedding = VectorField(dimensions=3)Insert a vector
item = Item.create(embedding=[1, 2, 3])Get the nearest neighbors to a vector
Item.select().order_by(Item.embedding.l2_distance([3, 1, 2])).limit(5)Also supports max_inner_product and cosine_distance
Get the distance
Item.select(Item.embedding.l2_distance([3, 1, 2]).alias('distance'))Get items within a certain distance
Item.select().where(Item.embedding.l2_distance([3, 1, 2]) < 5)Add an approximate index
Item.add_index('embedding vector_l2_ops', using='hnsw')Use vector_ip_ops for inner product and vector_cosine_ops for cosine distance
View the changelog
Everyone is encouraged to help improve this project. Here are a few ways you can help:
- Report bugs
- Fix bugs and submit pull requests
- Write, clarify, or fix documentation
- Suggest or add new features
To get started with development:
git clone https://github.com/pgvector/pgvector-python.git
cd pgvector-python
pip install -r requirements.txt
createdb pgvector_python_test
pytest