From 037ca32d1d1be2c95c4d24bd5e86ce4ce1e65cab Mon Sep 17 00:00:00 2001 From: Averi Kitsch Date: Tue, 21 Jan 2025 09:20:49 -0800 Subject: [PATCH 01/21] chore(tests): fix multi-modal image testing (#322) * chore: update package deps * update image tests * lint * chore(tests): fix multi-modal image testing --- tests/test_async_vectorstore.py | 7 +++++-- tests/test_vectorstore.py | 17 ++++++++++++----- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/tests/test_async_vectorstore.py b/tests/test_async_vectorstore.py index 8c95c18c..9b450287 100644 --- a/tests/test_async_vectorstore.py +++ b/tests/test_async_vectorstore.py @@ -149,7 +149,10 @@ async def image_vs(self, engine): await engine._ainit_vectorstore_table( IMAGE_TABLE, VECTOR_SIZE, - metadata_columns=[Column("image_id", "TEXT"), Column("source", "TEXT")], + metadata_columns=[ + Column("image_id", "TEXT"), + Column("source", "TEXT"), + ], ) vs = await AsyncAlloyDBVectorStore.create( engine, @@ -246,7 +249,7 @@ async def test_aadd_images(self, engine, image_vs, image_uris): ] await image_vs.aadd_images(image_uris, metadatas, ids) results = await afetch(engine, (f'SELECT * FROM "{IMAGE_TABLE}"')) - assert len(results) == 4 + assert len(results) == len(image_uris) assert results[0]["image_id"] == "0" assert results[0]["source"] == "google.com" await aexecute(engine, (f'TRUNCATE TABLE "{IMAGE_TABLE}"')) diff --git a/tests/test_vectorstore.py b/tests/test_vectorstore.py index ee35ebb6..3392a14b 100644 --- a/tests/test_vectorstore.py +++ b/tests/test_vectorstore.py @@ -131,6 +131,7 @@ async def engine(self, db_project, db_region, db_cluster, db_instance, db_name): yield engine await aexecute(engine, f'DROP TABLE IF EXISTS "{DEFAULT_TABLE}"') await engine.close() + await engine._connector.close() @pytest_asyncio.fixture(scope="class") async def vs(self, engine): @@ -194,7 +195,7 @@ async def vs_custom(self, engine): await aexecute(engine, f'DROP TABLE IF EXISTS "{CUSTOM_TABLE}"') @pytest_asyncio.fixture(scope="class") - async def image_uris(self): + def image_uris(self): red_uri = str(uuid.uuid4()).replace("-", "_") + "test_image_red.jpg" green_uri = str(uuid.uuid4()).replace("-", "_") + "test_image_green.jpg" blue_uri = str(uuid.uuid4()).replace("-", "_") + "test_image_blue.jpg" @@ -336,7 +337,10 @@ async def test_aadd_images(self, engine_sync, image_uris): engine_sync.init_vectorstore_table( IMAGE_TABLE, VECTOR_SIZE, - metadata_columns=[Column("image_id", "TEXT"), Column("source", "TEXT")], + metadata_columns=[ + Column("image_id", "TEXT"), + Column("source", "TEXT"), + ], ) vs = AlloyDBVectorStore.create_sync( engine_sync, @@ -351,7 +355,7 @@ async def test_aadd_images(self, engine_sync, image_uris): ] await vs.aadd_images(image_uris, metadatas, ids) results = await afetch(engine_sync, f'SELECT * FROM "{IMAGE_TABLE}"') - assert len(results) == 4 + assert len(results) == len(image_uris) assert results[0]["image_id"] == "0" assert results[0]["source"] == "google.com" await aexecute(engine_sync, f'TRUNCATE TABLE "{IMAGE_TABLE}"') @@ -394,11 +398,11 @@ async def test_add_images(self, engine_sync, image_uris): embedding_service=image_embedding_service, table_name=IMAGE_TABLE_SYNC, ) - yield vs + ids = [str(uuid.uuid4()) for i in range(len(image_uris))] vs.add_images(image_uris, ids=ids) results = await afetch(engine_sync, (f'SELECT * FROM "{IMAGE_TABLE_SYNC}"')) - assert len(results) == 3 + assert len(results) == len(image_uris) await vs.adelete(ids) await aexecute(engine_sync, f'DROP TABLE IF EXISTS "{IMAGE_TABLE_SYNC}"') @@ -511,6 +515,7 @@ async def getconn(): await aexecute(engine, f"DROP TABLE {table_name}") await engine.close() + await engine._connector.close() async def test_from_engine_loop_connector( self, @@ -614,6 +619,7 @@ async def test_from_engine_args_url( assert len(results) == 2 await aexecute(engine, f"DROP TABLE {table_name}") await engine.close() + await engine._connector.close() async def test_from_engine_loop( self, @@ -654,6 +660,7 @@ async def test_from_engine_loop( assert len(results) == 2 await aexecute(engine, f"DROP TABLE {table_name}") await engine.close() + await engine._connector.close() def test_get_table_name(self, vs): assert vs.get_table_name() == DEFAULT_TABLE From 41db231397a9d8746befd9c30b7fd4be05f460f1 Mon Sep 17 00:00:00 2001 From: Averi Kitsch Date: Tue, 21 Jan 2025 09:34:23 -0800 Subject: [PATCH 02/21] chore: update package deps (#319) * chore: update package deps * restore tests --- pyproject.toml | 10 +++++----- requirements.txt | 8 ++++---- samples/index_tuning_sample/requirements.txt | 6 +++--- samples/requirements.txt | 10 +++++----- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3e61df84..d5fd159a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,14 +39,14 @@ Changelog = "https://github.com/googleapis/langchain-google-alloydb-pg-python/bl [project.optional-dependencies] test = [ - "black[jupyter]==24.8.0", + "black[jupyter]==24.10.0", "isort==5.13.2", - "mypy==1.11.2", - "pytest-asyncio==0.24.0", - "pytest==8.3.3", + "mypy==1.14.1", + "pytest-asyncio==0.25.1", + "pytest==8.3.4", "pytest-cov==6.0.0", "pytest-depends==1.0.1", - "Pillow==11.0.0" + "Pillow==11.1.0" ] [build-system] diff --git a/requirements.txt b/requirements.txt index 1543665c..796407dc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ -google-cloud-alloydb-connector[asyncpg]==1.4.0 -google-cloud-storage>=2.18.2, <3.0.0 +google-cloud-alloydb-connector[asyncpg]==1.7.0 +google-cloud-storage>=2.18.2 langchain-core==0.3.0 numpy==1.26.4 -pgvector==0.3.3 -SQLAlchemy[asyncio]==2.0.34 +pgvector==0.3.6 +SQLAlchemy[asyncio]==2.0.37 diff --git a/samples/index_tuning_sample/requirements.txt b/samples/index_tuning_sample/requirements.txt index b6f466ef..b79793eb 100644 --- a/samples/index_tuning_sample/requirements.txt +++ b/samples/index_tuning_sample/requirements.txt @@ -1,3 +1,3 @@ -langchain-google-alloydb-pg==0.7.0 -langchain==0.3.0 -langchain-google-vertexai==2.0.7 +langchain-google-alloydb-pg==0.9.0 +langchain==0.3.14 +langchain-google-vertexai==2.0.11 \ No newline at end of file diff --git a/samples/requirements.txt b/samples/requirements.txt index 439acca6..6a0753a4 100644 --- a/samples/requirements.txt +++ b/samples/requirements.txt @@ -1,5 +1,5 @@ -google-cloud-aiplatform[reasoningengine,langchain]==1.72 -google-cloud-resource-manager==1.13.0 -langchain-community==0.3.7 -langchain-google-alloydb-pg==0.7.0 -langchain-google-vertexai==2.0.7 +google-cloud-aiplatform[reasoningengine,langchain]==1.77.0 +google-cloud-resource-manager==1.14.0 +langchain-community==0.3.14 +langchain-google-alloydb-pg==0.9.0 +langchain-google-vertexai==2.0.11 \ No newline at end of file From e51ba1bc9c5838b1d43140d8648b5e938b73514b Mon Sep 17 00:00:00 2001 From: Vishwaraj Anand Date: Thu, 23 Jan 2025 00:04:27 +0530 Subject: [PATCH 03/21] chore(samples): add samples to migrate pinecone to alloy db (#292) * chore: add samples to migrate pinecone to alloy db * fix: add Google file header * fix: address PR comments * fix: address pr comments Changes: 1. Made snippets as standalone files 2. Compressed snippet functions into a single file. * chore: address some pr comments * fix: lint * fix: lint * fix: lint add type hints to params of main method * chore: remove custom id column requirement * chore: address pr comments * fix: working pinecone snippets * chore: update header year * chore: lint fixes * chore: tests skip adding test data if index exists * chore: address pr comments * docs: Add ChromaDB migration code snippets (#297) * docs: Add Milvus migration code snippets (#299) * docs: Add Qdrant migration code snippets (#298) * docs: Add Weaviate migration code snippets (#300) * chore: pr comments on sorted imports * chore: add index_type in milvus index creation --------- Co-authored-by: dishaprakash <57954147+dishaprakash@users.noreply.github.com> --- .github/workflows/lint.yml | 3 + ...migrate_chromadb_vectorstore_to_alloydb.py | 169 ++++++++++++++ .../migrate_milvus_vectorstore_to_alloydb.py | 175 +++++++++++++++ ...migrate_pinecone_vectorstore_to_alloydb.py | 196 +++++++++++++++++ .../migrate_qdrant_vectorstore_to_alloydb.py | 179 +++++++++++++++ ...migrate_weaviate_vectorstore_to_alloydb.py | 176 +++++++++++++++ samples/migrations/requirements.txt | 13 ++ samples/migrations/test_chromadb_migration.py | 186 ++++++++++++++++ samples/migrations/test_milvus_migration.py | 187 ++++++++++++++++ samples/migrations/test_pinecone_migration.py | 207 ++++++++++++++++++ samples/migrations/test_qdrant_migration.py | 196 +++++++++++++++++ samples/migrations/test_weaviate_migration.py | 174 +++++++++++++++ 12 files changed, 1861 insertions(+) create mode 100644 samples/migrations/migrate_chromadb_vectorstore_to_alloydb.py create mode 100644 samples/migrations/migrate_milvus_vectorstore_to_alloydb.py create mode 100644 samples/migrations/migrate_pinecone_vectorstore_to_alloydb.py create mode 100644 samples/migrations/migrate_qdrant_vectorstore_to_alloydb.py create mode 100644 samples/migrations/migrate_weaviate_vectorstore_to_alloydb.py create mode 100644 samples/migrations/requirements.txt create mode 100644 samples/migrations/test_chromadb_migration.py create mode 100644 samples/migrations/test_milvus_migration.py create mode 100644 samples/migrations/test_pinecone_migration.py create mode 100644 samples/migrations/test_qdrant_migration.py create mode 100644 samples/migrations/test_weaviate_migration.py diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 7221db90..1d9e2b4b 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -44,6 +44,9 @@ jobs: - name: Install Sample requirements run: pip install -r samples/requirements.txt + - name: Install Migration snippets requirements + run: pip install -r samples/migrations/requirements.txt + - name: Install module (and test requirements) run: pip install -e .[test] diff --git a/samples/migrations/migrate_chromadb_vectorstore_to_alloydb.py b/samples/migrations/migrate_chromadb_vectorstore_to_alloydb.py new file mode 100644 index 00000000..e7fff684 --- /dev/null +++ b/samples/migrations/migrate_chromadb_vectorstore_to_alloydb.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python + +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +from typing import Any, Iterator + +"""Migrate Chroma to LangChain AlloyDBVectorStore. +Given a Chroma collection, the following code fetches the data from Chroma +in batches and uploads to an AlloyDBVectorStore. +""" + +# TODO(dev): Replace the values below +CHROMADB_COLLECTION_NAME = "example_collection" +PROJECT_ID = "my-project-id" +REGION = "us-central1" +CLUSTER = "my-cluster" +INSTANCE = "my-instance" +DB_NAME = "my-db" +DB_USER = "postgres" +DB_PWD = "secret-password" + +# TODO(developer): Optional, change the values below. +CHROMADB_PATH = "./chromadb_data" +VECTOR_SIZE = 768 +CHROMADB_BATCH_SIZE = 10 +ALLOYDB_TABLE_NAME = "alloydb_table" +MAX_CONCURRENCY = 100 + +from langchain_chroma import Chroma # type: ignore + + +def get_data_batch( + chromadb_client: Chroma, chromadb_batch_size: int = CHROMADB_BATCH_SIZE +) -> Iterator[tuple[list[str], list[Any], list[list[float]], list[Any]]]: + # [START chromadb_get_data_batch] + # Iterate through the IDs and download their contents + offset = 0 + while True: + docs = chromadb_client.get( + include=["metadatas", "documents", "embeddings"], + limit=chromadb_batch_size, + offset=offset, + ) + + if len(docs["documents"]) == 0: + break + + yield docs["ids"], docs["documents"], docs["embeddings"].tolist(), docs[ + "metadatas" + ] + + offset += chromadb_batch_size + + # [END chromadb_get_data_batch] + print("ChromaDB client fetched all data from collection.") + + +async def main( + chromadb_collection_name: str = CHROMADB_COLLECTION_NAME, + vector_size: int = VECTOR_SIZE, + chromadb_batch_size: int = CHROMADB_BATCH_SIZE, + chromadb_path: str = CHROMADB_PATH, + project_id: str = PROJECT_ID, + region: str = REGION, + cluster: str = CLUSTER, + instance: str = INSTANCE, + alloydb_table: str = ALLOYDB_TABLE_NAME, + db_name: str = DB_NAME, + db_user: str = DB_USER, + db_pwd: str = DB_PWD, + max_concurrency: int = MAX_CONCURRENCY, +) -> None: + # [START chromadb_vectorstore_alloydb_migration_embedding_service] + # The VectorStore interface requires an embedding service. This workflow does not + # generate new embeddings, therefore FakeEmbeddings class is used to avoid any costs. + from langchain_core.embeddings import FakeEmbeddings + + embeddings_service = FakeEmbeddings(size=vector_size) + # [END chromadb_vectorstore_alloydb_migration_embedding_service] + print("Langchain Fake Embeddings service initiated.") + + # [START chromadb_get_client] + from langchain_chroma import Chroma + + chromadb_client = Chroma( + collection_name=chromadb_collection_name, + embedding_function=embeddings_service, + persist_directory=chromadb_path, + ) + # [END chromadb_get_client] + print("ChromaDB vectorstore reference initiated.") + + # [START chromadb_vectorstore_alloydb_migration_get_client] + from langchain_google_alloydb_pg import AlloyDBEngine + + alloydb_engine = await AlloyDBEngine.afrom_instance( + project_id=project_id, + region=region, + cluster=cluster, + instance=instance, + database=db_name, + user=db_user, + password=db_pwd, + ) + # [END chromadb_vectorstore_alloydb_migration_get_client] + print("Langchain AlloyDB client initiated.") + + # [START chromadb_vectorstore_alloydb_migration_create_table] + await alloydb_engine.ainit_vectorstore_table( + table_name=alloydb_table, + vector_size=vector_size, + ) + # [END chromadb_vectorstore_alloydb_migration_create_table] + print("Langchain AlloyDB vectorstore table created.") + + # [START chromadb_vectorstore_alloydb_migration_vector_store] + from langchain_google_alloydb_pg import AlloyDBVectorStore + + vs = await AlloyDBVectorStore.create( + engine=alloydb_engine, + embedding_service=embeddings_service, + table_name=alloydb_table, + ) + # [END chromadb_vectorstore_alloydb_migration_vector_store] + print("Langchain AlloyDBVectorStore initialized.") + + data_iterator = get_data_batch( + chromadb_client=chromadb_client, + chromadb_batch_size=chromadb_batch_size, + ) + + # [START chromadb_vectorstore_alloydb_migration_insert_data_batch] + pending: set[Any] = set() + for ids, contents, embeddings, metadatas in data_iterator: + pending.add( + asyncio.ensure_future( + vs.aadd_embeddings( + texts=contents, + embeddings=embeddings, + metadatas=metadatas, + ids=ids, + ) + ) + ) + if len(pending) >= max_concurrency: + _, pending = await asyncio.wait( + pending, return_when=asyncio.FIRST_COMPLETED + ) + if pending: + await asyncio.wait(pending) + # [END chromadb_vectorstore_alloydb_migration_insert_data_batch] + print("Migration completed, inserted all the batches of data to AlloyDB.") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/samples/migrations/migrate_milvus_vectorstore_to_alloydb.py b/samples/migrations/migrate_milvus_vectorstore_to_alloydb.py new file mode 100644 index 00000000..deb58741 --- /dev/null +++ b/samples/migrations/migrate_milvus_vectorstore_to_alloydb.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python + +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +from typing import Any, Iterator + +"""Migrate Milvus to Langchain AlloyDBVectorStore. +Given a Milvus collection, the following code fetches the data from Milvus +in batches and uploads to an AlloyDBVectorStore. +""" + +# TODO(dev): Replace the values below +PROJECT_ID = "my-project-id" +REGION = "us-central1" +CLUSTER = "my-cluster" +INSTANCE = "my-instance" +DB_NAME = "my-db" +DB_USER = "postgres" +DB_PWD = "secret-password" + +# TODO(developer): Optional, change the values below. +MILVUS_URI = "./milvus_data" +MILVUS_COLLECTION_NAME = "test_milvus" +VECTOR_SIZE = 768 +MILVUS_BATCH_SIZE = 10 +ALLOYDB_TABLE_NAME = "alloydb_table" +MAX_CONCURRENCY = 100 + +from pymilvus import MilvusClient # type: ignore + + +def get_data_batch( + milvus_client: MilvusClient, + milvus_batch_size: int = MILVUS_BATCH_SIZE, + milvus_collection_name: str = MILVUS_COLLECTION_NAME, +) -> Iterator[tuple[list[str], list[Any], list[list[float]], list[Any]]]: + # [START milvus_get_data_batch] + # Iterate through the IDs and download their contents + iterator = milvus_client.query_iterator( + collection_name=milvus_collection_name, + filter='pk >= "0"', + output_fields=["pk", "text", "vector", "idv"], + batch_size=milvus_batch_size, + ) + + while True: + ids = [] + content = [] + embeddings = [] + metadatas = [] + page = iterator.next() + if len(page) == 0: + iterator.close() + break + for i in range(len(page)): + doc = page[i] + ids.append(doc["pk"]) + content.append(doc["text"]) + embeddings.append(doc["vector"]) + del doc["pk"] + del doc["text"] + del doc["vector"] + metadatas.append(doc) + yield ids, content, embeddings, metadatas + # [END milvus_get_data_batch] + print("Milvus client fetched all data from collection.") + + +async def main( + milvus_collection_name: str = MILVUS_COLLECTION_NAME, + vector_size: int = VECTOR_SIZE, + milvus_batch_size: int = MILVUS_BATCH_SIZE, + milvus_uri: str = MILVUS_URI, + project_id: str = PROJECT_ID, + region: str = REGION, + cluster: str = CLUSTER, + instance: str = INSTANCE, + alloydb_table: str = ALLOYDB_TABLE_NAME, + db_name: str = DB_NAME, + db_user: str = DB_USER, + db_pwd: str = DB_PWD, + max_concurrency: int = MAX_CONCURRENCY, +) -> None: + # [START milvus_get_client] + milvus_client = MilvusClient(uri=milvus_uri) + # [END milvus_get_client] + print("Milvus client initiated.") + + # [START milvus_vectorstore_alloydb_migration_get_client] + from langchain_google_alloydb_pg import AlloyDBEngine + + alloydb_engine = await AlloyDBEngine.afrom_instance( + project_id=project_id, + region=region, + cluster=cluster, + instance=instance, + database=db_name, + user=db_user, + password=db_pwd, + ) + # [END milvus_vectorstore_alloydb_migration_get_client] + print("Langchain AlloyDB client initiated.") + + # [START milvus_vectorstore_alloydb_migration_embedding_service] + # The VectorStore interface requires an embedding service. This workflow does not + # generate new embeddings, therefore FakeEmbeddings class is used to avoid any costs. + from langchain_core.embeddings import FakeEmbeddings + + embeddings_service = FakeEmbeddings(size=vector_size) + # [END milvus_vectorstore_alloydb_migration_embedding_service] + print("Langchain Fake Embeddings service initiated.") + + # [START milvus_vectorstore_alloydb_migration_create_table] + await alloydb_engine.ainit_vectorstore_table( + table_name=alloydb_table, + vector_size=vector_size, + ) + # [END milvus_vectorstore_alloydb_migration_create_table] + print("Langchain AlloyDB vectorstore table created.") + + # [START milvus_vectorstore_alloydb_migration_vector_store] + from langchain_google_alloydb_pg import AlloyDBVectorStore + + vs = await AlloyDBVectorStore.create( + engine=alloydb_engine, + embedding_service=embeddings_service, + table_name=alloydb_table, + ) + # [END milvus_vectorstore_alloydb_migration_vector_store] + print("Langchain AlloyDBVectorStore initialized.") + + data_iterator = get_data_batch( + milvus_client=milvus_client, + milvus_batch_size=milvus_batch_size, + milvus_collection_name=milvus_collection_name, + ) + + # [START milvus_vectorstore_alloydb_migration_insert_data_batch] + pending: set[Any] = set() + for ids, contents, embeddings, metadatas in data_iterator: + pending.add( + asyncio.ensure_future( + vs.aadd_embeddings( + texts=contents, + embeddings=embeddings, + metadatas=metadatas, + ids=ids, + ) + ) + ) + if len(pending) >= max_concurrency: + _, pending = await asyncio.wait( + pending, return_when=asyncio.FIRST_COMPLETED + ) + if pending: + await asyncio.wait(pending) + # [END milvus_vectorstore_alloydb_migration_insert_data_batch] + print("Migration completed, inserted all the batches of data to AlloyDB.") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/samples/migrations/migrate_pinecone_vectorstore_to_alloydb.py b/samples/migrations/migrate_pinecone_vectorstore_to_alloydb.py new file mode 100644 index 00000000..e5dd8278 --- /dev/null +++ b/samples/migrations/migrate_pinecone_vectorstore_to_alloydb.py @@ -0,0 +1,196 @@ +#!/usr/bin/env python + +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import asyncio +from typing import Any, Iterator + +"""Migrate PineconeVectorStore to Langchain AlloyDBVectorStore. + +Given a pinecone index, the following code fetches the data from pinecone +in batches and uploads to an AlloyDBVectorStore. +""" + +# TODO(dev): Replace the values below +PINECONE_API_KEY = "my-pc-api-key" +PINECONE_INDEX_NAME = "my-pc-index-name" +PROJECT_ID = "my-project-id" +REGION = "us-central1" +CLUSTER = "my-cluster" +INSTANCE = "my-instance" +DB_NAME = "my-db" +DB_USER = "postgres" +DB_PWD = "secret-password" + +# TODO(developer): Optional, change the values below. +PINECONE_NAMESPACE = "" +VECTOR_SIZE = 768 +PINECONE_BATCH_SIZE = 10 +ALLOYDB_TABLE_NAME = "alloydb_table" +MAX_CONCURRENCY = 100 + +from pinecone import Index # type: ignore + + +def get_ids_batch( + pinecone_index: Index, + pinecone_namespace: str = PINECONE_NAMESPACE, + pinecone_batch_size: int = PINECONE_BATCH_SIZE, +) -> Iterator[list[str]]: + # [START pinecone_get_ids_batch] + results = pinecone_index.list_paginated( + prefix="", namespace=pinecone_namespace, limit=pinecone_batch_size + ) + ids = [v.id for v in results.vectors] + yield ids + + while results.pagination is not None: + pagination_token = results.pagination.next + results = pinecone_index.list_paginated( + prefix="", pagination_token=pagination_token, limit=pinecone_batch_size + ) + + # Extract and yield the next batch of IDs + ids = [v.id for v in results.vectors] + yield ids + # [END pinecone_get_ids_batch] + print("Pinecone client fetched all ids from index.") + + +def get_data_batch( + pinecone_index: Index, pinecone_namespace: str, pinecone_batch_size: int +) -> Iterator[tuple[list[str], list[str], list[Any], list[Any]]]: + id_iterator = get_ids_batch(pinecone_index, pinecone_namespace, pinecone_batch_size) + # [START pinecone_get_data_batch] + # Iterate through the IDs and download their contents + for ids in id_iterator: + all_data = pinecone_index.fetch(ids=ids) + ids = [] + embeddings = [] + contents = [] + metadatas = [] + + # Process each vector in the current batch + for doc in all_data["vectors"].values(): + ids.append(doc["id"]) + embeddings.append(doc["values"]) + contents.append(str(doc["metadata"]["text"])) + del doc["metadata"]["text"] + metadata = doc["metadata"] + metadatas.append(metadata) + + # Yield the current batch of results + yield ids, contents, embeddings, metadatas + # [END pinecone_get_data_batch] + print("Pinecone client fetched all data from index.") + + +async def main( + pinecone_api_key: str = PINECONE_API_KEY, + pinecone_index_name: str = PINECONE_INDEX_NAME, + pinecone_namespace: str = PINECONE_NAMESPACE, + vector_size: int = VECTOR_SIZE, + pinecone_batch_size: int = PINECONE_BATCH_SIZE, + project_id: str = PROJECT_ID, + region: str = REGION, + cluster: str = CLUSTER, + instance: str = INSTANCE, + alloydb_table: str = ALLOYDB_TABLE_NAME, + db_name: str = DB_NAME, + db_user: str = DB_USER, + db_pwd: str = DB_PWD, + max_concurrency: int = MAX_CONCURRENCY, +) -> None: + # [START pinecone_get_client] + from pinecone import Pinecone # type: ignore + + pinecone_client = Pinecone(api_key=pinecone_api_key) + pinecone_index = pinecone_client.Index(pinecone_index_name) + print("Pinecone index reference initiated.") + # [END pinecone_get_client] + + # [START pinecone_vectorstore_alloydb_migration_get_client] + from langchain_google_alloydb_pg import AlloyDBEngine + + alloydb_engine = await AlloyDBEngine.afrom_instance( + project_id=project_id, + region=region, + cluster=cluster, + instance=instance, + database=db_name, + user=db_user, + password=db_pwd, + ) + # [END pinecone_vectorstore_alloydb_migration_get_client] + print("Langchain AlloyDB client initiated.") + + # [START pinecone_vectorstore_alloydb_migration_create_table] + await alloydb_engine.ainit_vectorstore_table( + table_name=alloydb_table, + vector_size=vector_size, + ) + # [END pinecone_vectorstore_alloydb_migration_create_table] + print("Langchain AlloyDB vectorstore table created.") + + # [START pinecone_vectorstore_alloydb_migration_embedding_service] + # The VectorStore interface requires an embedding service. This workflow does not + # generate new embeddings, therefore FakeEmbeddings class is used to avoid any costs. + from langchain_core.embeddings import FakeEmbeddings + + embedding_service = FakeEmbeddings(size=vector_size) + # [END pinecone_vectorstore_alloydb_migration_embedding_service] + print("Langchain Fake Embeddings service initiated.") + + # [START pinecone_vectorstore_alloydb_migration_vector_store] + from langchain_google_alloydb_pg import AlloyDBVectorStore + + vector_store = await AlloyDBVectorStore.create( + engine=alloydb_engine, + embedding_service=embedding_service, + table_name=alloydb_table, + ) + # [END pinecone_vectorstore_alloydb_migration_vector_store] + print("Langchain AlloyDBVectorStore initialized.") + + data_iterator = get_data_batch( + pinecone_index, pinecone_namespace, pinecone_batch_size + ) + + # [START pinecone_vectorstore_alloydb_migration_insert_data_batch] + pending: set[Any] = set() + for ids, contents, embeddings, metadatas in data_iterator: + pending.add( + asyncio.ensure_future( + vector_store.aadd_embeddings( + texts=contents, + embeddings=embeddings, + metadatas=metadatas, + ids=ids, + ) + ) + ) + if len(pending) >= max_concurrency: + _, pending = await asyncio.wait( + pending, return_when=asyncio.FIRST_COMPLETED + ) + if pending: + await asyncio.wait(pending) + # [END pinecone_vectorstore_alloydb_migration_insert_data_batch] + print("Migration completed, inserted all the batches of data to AlloyDB.") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/samples/migrations/migrate_qdrant_vectorstore_to_alloydb.py b/samples/migrations/migrate_qdrant_vectorstore_to_alloydb.py new file mode 100644 index 00000000..89692c80 --- /dev/null +++ b/samples/migrations/migrate_qdrant_vectorstore_to_alloydb.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python + +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +from typing import Any, Iterator, List + +"""Migrate QdrantVectorStore to Langchain AlloyDBVectorStore. +Given a qdrant collection, the following code fetches the data from qdrant +in batches and uploads to an AlloyDBVectorStore. +""" + +# TODO(dev): Replace the values below +PROJECT_ID = "my-project-id" +REGION = "us-central1" +CLUSTER = "my-cluster" +INSTANCE = "my-instance" +DB_NAME = "my-db" +DB_USER = "postgres" +DB_PWD = "secret-password" + +# TODO(developer): Change the values below. +QDRANT_COLLECTION_NAME = "test_qdrant" +QDRANT_PATH = "./qdrant_data" +VECTOR_SIZE = 768 +QDRANT_BATCH_SIZE = 10 +ALLOYDB_TABLE_NAME = "alloydb_table" +MAX_CONCURRENCY = 100 + +from qdrant_client import QdrantClient # type: ignore + + +def get_data_batch( + qdrant_client: QdrantClient, + qdrant_batch_size: int = QDRANT_BATCH_SIZE, + qdrant_collection_name: str = QDRANT_COLLECTION_NAME, +) -> Iterator[tuple[list[str], list[Any], list[list[float]], list[Any]]]: + # [START qdrant_get_data_batch] + # Iterate through the IDs and download their contents + offset = None + while True: + docs, offset = qdrant_client.scroll( + collection_name=qdrant_collection_name, + with_vectors=True, + limit=qdrant_batch_size, + offset=offset, + with_payload=True, + ) + + ids: List[str] = [] + contents: List[Any] = [] + embeddings: List[List[float]] = [] + metadatas: List[Any] = [] + + for doc in docs: + if doc.payload and doc.vector: + ids.append(str(doc.id)) + contents.append(doc.payload["page_content"]) + embeddings.append(doc.vector) # type: ignore + metadatas.append(doc.payload["metadata"]) + + yield ids, contents, embeddings, metadatas + + if not offset: + break + + # [END qdrant_get_data_batch] + print("Qdrant client fetched all data from collection.") + + +async def main( + qdrant_collection_name: str = QDRANT_COLLECTION_NAME, + vector_size: int = VECTOR_SIZE, + qdrant_batch_size: int = QDRANT_BATCH_SIZE, + qdrant_path: str = QDRANT_PATH, + project_id: str = PROJECT_ID, + region: str = REGION, + cluster: str = CLUSTER, + instance: str = INSTANCE, + alloydb_table: str = ALLOYDB_TABLE_NAME, + db_name: str = DB_NAME, + db_user: str = DB_USER, + db_pwd: str = DB_PWD, + max_concurrency: int = MAX_CONCURRENCY, +) -> None: + # [START qdrant_get_client] + from qdrant_client import QdrantClient + + qdrant_client = QdrantClient(path=qdrant_path) + + # [END qdrant_get_client] + print("Qdrant client initiated.") + + # [START qdrant_vectorstore_alloydb_migration_get_client] + from langchain_google_alloydb_pg import AlloyDBEngine + + alloydb_engine = await AlloyDBEngine.afrom_instance( + project_id=project_id, + region=region, + cluster=cluster, + instance=instance, + database=db_name, + user=db_user, + password=db_pwd, + ) + # [END qdrant_vectorstore_alloydb_migration_get_client] + print("Langchain AlloyDB client initiated.") + + # [START qdrant_vectorstore_alloydb_migration_embedding_service] + # The VectorStore interface requires an embedding service. This workflow does not + # generate new embeddings, therefore FakeEmbeddings class is used to avoid any costs. + from langchain_core.embeddings import FakeEmbeddings + + embeddings_service = FakeEmbeddings(size=vector_size) + # [END qdrant_vectorstore_alloydb_migration_embedding_service] + print("Langchain Fake Embeddings service initiated.") + + # [START qdrant_vectorstore_alloydb_migration_create_table] + await alloydb_engine.ainit_vectorstore_table( + table_name=alloydb_table, + vector_size=vector_size, + ) + # [END qdrant_vectorstore_alloydb_migration_create_table] + print("Langchain AlloyDB vectorstore table created.") + + # [START qdrant_vectorstore_alloydb_migration_vector_store] + from langchain_google_alloydb_pg import AlloyDBVectorStore + + vs = await AlloyDBVectorStore.create( + engine=alloydb_engine, + embedding_service=embeddings_service, + table_name=alloydb_table, + ) + # [END qdrant_vectorstore_alloydb_migration_vector_store] + print("Langchain AlloyDBVectorStore initialized.") + + data_iterator = get_data_batch( + qdrant_client=qdrant_client, + qdrant_batch_size=qdrant_batch_size, + qdrant_collection_name=qdrant_collection_name, + ) + + # [START qdrant_vectorstore_alloydb_migration_insert_data_batch] + pending: set[Any] = set() + for ids, contents, embeddings, metadatas in data_iterator: + pending.add( + asyncio.ensure_future( + vs.aadd_embeddings( + texts=contents, + embeddings=embeddings, + metadatas=metadatas, + ids=ids, + ) + ) + ) + if len(pending) >= max_concurrency: + _, pending = await asyncio.wait( + pending, return_when=asyncio.FIRST_COMPLETED + ) + if pending: + await asyncio.wait(pending) + # [END qdrant_vectorstore_alloydb_migration_insert_data_batch] + print("Migration completed, inserted all the batches of data to AlloyDB.") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/samples/migrations/migrate_weaviate_vectorstore_to_alloydb.py b/samples/migrations/migrate_weaviate_vectorstore_to_alloydb.py new file mode 100644 index 00000000..51faf31e --- /dev/null +++ b/samples/migrations/migrate_weaviate_vectorstore_to_alloydb.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python + +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +from typing import Any, Iterator + +"""Migrate WeaviateVectorStore to Langchain AlloyDBVectorStore. +Given a weaviate collection, the following code fetches the data from weaviate +in batches and uploads to an AlloyDBVectorStore. +""" + +# TODO(dev): Replace the values below +WEAVIATE_API_KEY = "my-wv-api-key" +WEAVIATE_CLUSTER_URL = "my-wv-cluster-url" +EMBEDDING_API_KEY = "my-wv-embedding-api-key" +PROJECT_ID = "my-project-id" +REGION = "us-central1" +CLUSTER = "my-cluster" +INSTANCE = "my-instance" +DB_NAME = "my-db" +DB_USER = "postgres" +DB_PWD = "secret-password" + +# TODO(developer): Optional, change the values below. +WEAVIATE_COLLECTION_NAME = "test_weaviate_collection" +VECTOR_SIZE = 768 +WEAVIATE_BATCH_SIZE = 10 +ALLOYDB_TABLE_NAME = "alloydb_table" +MAX_CONCURRENCY = 100 + +from weaviate.collections import Collection # type: ignore + + +def get_data_batch( + weaviate_collection: Collection, weaviate_batch_size: int = WEAVIATE_BATCH_SIZE +) -> Iterator[tuple[list[str], list[Any], list[list[float]], list[Any]]]: + # [START weaviate_get_data_batch] + # Iterate through the IDs and download their contents + ids = [] + content = [] + embeddings = [] + metadatas = [] + + for item in weaviate_collection.iterator(include_vector=True): + ids.append(str(item.uuid)) + content.append(item.properties["page_content"]) + embeddings.append(item.vector["default"]) + metadatas.append(item.properties["metadata"]) + + if len(ids) >= weaviate_batch_size: + # Yield the current batch of results + yield ids, content, embeddings, metadatas + # Reset lists to start a new batch + ids = [] + content = [] + embeddings = [] + metadatas = [] + # [END weaviate_get_data_batch] + print("Weaviate client fetched all data from collection.") + + +async def main( + weaviate_api_key: str = WEAVIATE_API_KEY, + weaviate_collection_name: str = WEAVIATE_COLLECTION_NAME, + weaviate_cluster_url: str = WEAVIATE_CLUSTER_URL, + vector_size: int = VECTOR_SIZE, + weaviate_batch_size: int = WEAVIATE_BATCH_SIZE, + embedding_api_key: str = EMBEDDING_API_KEY, + project_id: str = PROJECT_ID, + region: str = REGION, + cluster: str = CLUSTER, + instance: str = INSTANCE, + alloydb_table: str = ALLOYDB_TABLE_NAME, + db_name: str = DB_NAME, + db_user: str = DB_USER, + db_pwd: str = DB_PWD, + max_concurrency: int = MAX_CONCURRENCY, +) -> None: + # [START weaviate_get_client] + import weaviate + + weaviate_client = weaviate.connect_to_weaviate_cloud( + cluster_url=weaviate_cluster_url, + auth_credentials=weaviate.auth.AuthApiKey(weaviate_api_key), + headers={"X-Cohere-Api-Key": embedding_api_key}, + ) + weaviate_collection = weaviate_client.collections.get(weaviate_collection_name) + # [END weaviate_get_client] + print("Weaviate collection reference initiated.") + + # [START weaviate_vectorstore_alloydb_migration_embedding_service] + # The VectorStore interface requires an embedding service. This workflow does not + # generate new embeddings, therefore FakeEmbeddings class is used to avoid any costs. + from langchain_core.embeddings import FakeEmbeddings + + embeddings_service = FakeEmbeddings(size=vector_size) + # [END weaviate_vectorstore_alloydb_migration_embedding_service] + print("Langchain Fake Embeddings service initiated.") + + # [START weaviate_vectorstore_alloydb_migration_get_client] + from langchain_google_alloydb_pg import AlloyDBEngine + + alloydb_engine = await AlloyDBEngine.afrom_instance( + project_id=project_id, + region=region, + cluster=cluster, + instance=instance, + database=db_name, + user=db_user, + password=db_pwd, + ) + print("Langchain AlloyDB client initiated.") + # [END weaviate_vectorstore_alloydb_migration_get_client] + + # [START weaviate_vectorstore_alloydb_migration_create_table] + await alloydb_engine.ainit_vectorstore_table( + table_name=alloydb_table, + vector_size=vector_size, + ) + + # [END weaviate_vectorstore_alloydb_migration_create_table] + print("Langchain AlloyDB vectorstore table created.") + + # [START weaviate_vectorstore_alloydb_migration_vector_store] + from langchain_google_alloydb_pg import AlloyDBVectorStore + + vs = await AlloyDBVectorStore.create( + engine=alloydb_engine, + embedding_service=embeddings_service, + table_name=alloydb_table, + ) + # [END weaviate_vectorstore_alloydb_migration_vector_store] + print("Langchain AlloyDBVectorStore initialized.") + + data_iterator = get_data_batch( + weaviate_collection=weaviate_collection, weaviate_batch_size=weaviate_batch_size + ) + # [START weaviate_vectorstore_alloydb_migration_insert_data_batch] + pending: set[Any] = set() + for ids, contents, embeddings, metadatas in data_iterator: + pending.add( + asyncio.ensure_future( + vs.aadd_embeddings( + texts=contents, + embeddings=embeddings, + metadatas=metadatas, + ids=ids, + ) + ) + ) + if len(pending) >= max_concurrency: + _, pending = await asyncio.wait( + pending, return_when=asyncio.FIRST_COMPLETED + ) + if pending: + await asyncio.wait(pending) + # [END weaviate_vectorstore_alloydb_migration_insert_data_batch] + print("Migration completed, inserted all the batches of data to AlloyDB.") + weaviate_client.close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/samples/migrations/requirements.txt b/samples/migrations/requirements.txt new file mode 100644 index 00000000..96aed4df --- /dev/null +++ b/samples/migrations/requirements.txt @@ -0,0 +1,13 @@ +grpcio-tools==1.67.1 +langchain-chroma==0.2.0 +langchain-core==0.3.26 +langchain-google-alloydb-pg==0.8.0 +langchain-google-vertexai==2.0.9 +langchain-milvus==0.1.8 +langchain-pinecone==0.2.0 +langchain-qdrant==0.2.0 +pinecone-client==5.0.1 +protobuf==5.29.1 +pymilvus==2.5.3 +qdrant-client==1.12.2 +weaviate-client==4.10.4 diff --git a/samples/migrations/test_chromadb_migration.py b/samples/migrations/test_chromadb_migration.py new file mode 100644 index 00000000..6f3e02fd --- /dev/null +++ b/samples/migrations/test_chromadb_migration.py @@ -0,0 +1,186 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import uuid +from typing import Sequence + +import pytest +import pytest_asyncio +from langchain_chroma import Chroma +from langchain_core.documents import Document +from langchain_core.embeddings import FakeEmbeddings +from migrate_chromadb_vectorstore_to_alloydb import main +from sqlalchemy import text +from sqlalchemy.engine.row import RowMapping + +from langchain_google_alloydb_pg import AlloyDBEngine + +DEFAULT_TABLE = "test_chromadb_migration" + str(uuid.uuid4()) + +EMBEDDING_SERVICE = FakeEmbeddings(size=768) +PERSISTENT_DB_PATH = "./chromadb_data" + + +def get_env_var(key: str, desc: str) -> str: + v = os.environ.get(key) + if v is None: + raise ValueError(f"Must set env var {key} to: {desc}") + return v + + +async def aexecute( + engine: AlloyDBEngine, + query: str, +) -> None: + async def run(engine, query): + async with engine._pool.connect() as conn: + await conn.execute(text(query)) + await conn.commit() + + await engine._run_as_async(run(engine, query)) + + +async def afetch(engine: AlloyDBEngine, query: str) -> Sequence[RowMapping]: + async def run(engine, query): + async with engine._pool.connect() as conn: + result = await conn.execute(text(query)) + result_map = result.mappings() + result_fetch = result_map.fetchall() + return result_fetch + + return await engine._run_as_async(run(engine, query)) + + +def create_chroma_collection(collection_name): + vector_store = Chroma( + collection_name=collection_name, + embedding_function=EMBEDDING_SERVICE, + persist_directory=PERSISTENT_DB_PATH, + create_collection_if_not_exists=True, + ) + + uuids = [f"{str(uuid.uuid4())}" for i in range(1000)] + documents = [ + Document(page_content=f"content#{i}", metadata={"idv": f"{i}"}) + for i in range(1000) + ] + + vector_store.add_documents(documents=documents, ids=uuids) + + +@pytest.mark.asyncio(loop_scope="class") +class TestMigrations: + @pytest.fixture(scope="module") + def db_project(self) -> str: + return get_env_var("PROJECT_ID", "project id for google cloud") + + @pytest.fixture(scope="module") + def db_region(self) -> str: + return get_env_var("REGION", "region for AlloyDB instance") + + @pytest.fixture(scope="module") + def db_cluster(self) -> str: + return get_env_var("CLUSTER_ID", "cluster for AlloyDB") + + @pytest.fixture(scope="module") + def db_instance(self) -> str: + return get_env_var("INSTANCE_ID", "instance for AlloyDB") + + @pytest.fixture(scope="module") + def db_name(self) -> str: + return get_env_var("DATABASE_ID", "database name on AlloyDB instance") + + @pytest.fixture(scope="module") + def db_user(self) -> str: + return get_env_var("DB_USER", "database user for AlloyDB") + + @pytest.fixture(scope="module") + def db_password(self) -> str: + return get_env_var("DB_PASSWORD", "database password for AlloyDB") + + @pytest.fixture(scope="module") + def chromadb_collection_name(self) -> str: + return get_env_var( + "CHROMADB_COLLECTION_NAME", "collection name for chromadb instance" + ) + + @pytest_asyncio.fixture(scope="class") + async def engine( + self, + db_project, + db_region, + db_cluster, + db_instance, + db_name, + db_user, + db_password, + ): + engine = await AlloyDBEngine.afrom_instance( + project_id=db_project, + cluster=db_cluster, + instance=db_instance, + region=db_region, + database=db_name, + user=db_user, + password=db_password, + ) + + yield engine + await aexecute(engine, f'DROP TABLE IF EXISTS "{DEFAULT_TABLE}"') + await engine.close() + + async def test_chromadb( + self, + engine, + capsys, + chromadb_collection_name, + db_project, + db_region, + db_cluster, + db_instance, + db_name, + db_user, + db_password, + ): + create_chroma_collection(collection_name=chromadb_collection_name) + + await main( + chromadb_collection_name=chromadb_collection_name, + chromadb_path=PERSISTENT_DB_PATH, + vector_size=768, + chromadb_batch_size=50, + project_id=db_project, + region=db_region, + cluster=db_cluster, + instance=db_instance, + alloydb_table=DEFAULT_TABLE, + db_name=db_name, + db_user=db_user, + db_pwd=db_password, + ) + + out, err = capsys.readouterr() + + # Assert on the script's output + assert "Error" not in err # Check for errors + assert "ChromaDB vectorstore reference initiated." in out + assert "Langchain AlloyDB client initiated" in out + assert "Langchain Fake Embeddings service initiated." in out + assert "Langchain AlloyDB vectorstore table created" in out + assert "Langchain AlloyDBVectorStore initialized" in out + assert "ChromaDB client fetched all data from collection." in out + assert "Migration completed, inserted all the batches of data to AlloyDB" in out + results = await afetch(engine, f'SELECT * FROM "{DEFAULT_TABLE}"') + assert len(results) == 1000 diff --git a/samples/migrations/test_milvus_migration.py b/samples/migrations/test_milvus_migration.py new file mode 100644 index 00000000..c06b66d2 --- /dev/null +++ b/samples/migrations/test_milvus_migration.py @@ -0,0 +1,187 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import uuid +from typing import Sequence + +import pytest +import pytest_asyncio +from langchain_core.documents import Document +from langchain_core.embeddings import FakeEmbeddings +from langchain_milvus import Milvus +from migrate_milvus_vectorstore_to_alloydb import main +from sqlalchemy import text +from sqlalchemy.engine.row import RowMapping + +from langchain_google_alloydb_pg import AlloyDBEngine + +DEFAULT_TABLE = "test_milvus_migration" + str(uuid.uuid4()) +PERSISTENT_DB_PATH = "./milvus_data.db" +EMBEDDING_SERVICE = FakeEmbeddings(size=768) + + +def get_env_var(key: str, desc: str) -> str: + v = os.environ.get(key) + if v is None: + raise ValueError(f"Must set env var {key} to: {desc}") + return v + + +async def aexecute( + engine: AlloyDBEngine, + query: str, +) -> None: + async def run(engine, query): + async with engine._pool.connect() as conn: + await conn.execute(text(query)) + await conn.commit() + + await engine._run_as_async(run(engine, query)) + + +async def afetch(engine: AlloyDBEngine, query: str) -> Sequence[RowMapping]: + async def run(engine, query): + async with engine._pool.connect() as conn: + result = await conn.execute(text(query)) + result_map = result.mappings() + result_fetch = result_map.fetchall() + return result_fetch + + return await engine._run_as_async(run(engine, query)) + + +def create_milvus_collection(collection): + vector_store = Milvus( + embedding_function=EMBEDDING_SERVICE, + connection_args={"uri": PERSISTENT_DB_PATH}, + collection_name=collection, + index_params={ + "index_type": "IVF_FLAT", # defaults to HNSW but local mode supports IVF_FLAT + }, + ) + + uuids = [str(uuid.uuid4()) for i in range(1000)] + documents = [ + Document(page_content=f"content#{i}", metadata={"idv": f"{i}"}) + for i in range(1000) + ] + + vector_store.add_documents(documents=documents, ids=uuids) + + +@pytest.mark.asyncio(loop_scope="class") +class TestMigrations: + @pytest.fixture(scope="module") + def db_project(self) -> str: + return get_env_var("PROJECT_ID", "project id for google cloud") + + @pytest.fixture(scope="module") + def db_region(self) -> str: + return get_env_var("REGION", "region for AlloyDB instance") + + @pytest.fixture(scope="module") + def db_cluster(self) -> str: + return get_env_var("CLUSTER_ID", "cluster for AlloyDB") + + @pytest.fixture(scope="module") + def db_instance(self) -> str: + return get_env_var("INSTANCE_ID", "instance for AlloyDB") + + @pytest.fixture(scope="module") + def db_name(self) -> str: + return get_env_var("DATABASE_ID", "database name on AlloyDB instance") + + @pytest.fixture(scope="module") + def db_user(self) -> str: + return get_env_var("DB_USER", "database user for AlloyDB") + + @pytest.fixture(scope="module") + def db_password(self) -> str: + return get_env_var("DB_PASSWORD", "database password for AlloyDB") + + @pytest.fixture(scope="module") + def milvus_collection_name(self) -> str: + return get_env_var( + "MILVUS_COLLECTION_NAME", "collection name for milvus instance" + ) + + @pytest_asyncio.fixture(scope="class") + async def engine( + self, + db_project, + db_region, + db_cluster, + db_instance, + db_name, + db_user, + db_password, + ): + engine = await AlloyDBEngine.afrom_instance( + project_id=db_project, + cluster=db_cluster, + instance=db_instance, + region=db_region, + database=db_name, + user=db_user, + password=db_password, + ) + + yield engine + await aexecute(engine, f'DROP TABLE IF EXISTS "{DEFAULT_TABLE}"') + await engine.close() + + async def test_milvus( + self, + engine, + capsys, + milvus_collection_name, + db_project, + db_region, + db_cluster, + db_instance, + db_name, + db_user, + db_password, + ): + create_milvus_collection(milvus_collection_name) + + await main( + milvus_collection_name=milvus_collection_name, + milvus_uri=PERSISTENT_DB_PATH, + vector_size=768, + milvus_batch_size=50, + project_id=db_project, + region=db_region, + cluster=db_cluster, + instance=db_instance, + alloydb_table=DEFAULT_TABLE, + db_name=db_name, + db_user=db_user, + db_pwd=db_password, + ) + + out, err = capsys.readouterr() + + # Assert on the script's output + assert "Error" not in err # Check for errors + assert "Milvus client initiated." in out + assert "Langchain AlloyDB client initiated" in out + assert "Langchain Fake Embeddings service initiated." in out + assert "Langchain AlloyDB vectorstore table created" in out + assert "Langchain AlloyDBVectorStore initialized" in out + assert "Milvus client fetched all data from collection." in out + assert "Migration completed, inserted all the batches of data to AlloyDB" in out + results = await afetch(engine, f'SELECT * FROM "{DEFAULT_TABLE}"') + assert len(results) == 1000 diff --git a/samples/migrations/test_pinecone_migration.py b/samples/migrations/test_pinecone_migration.py new file mode 100644 index 00000000..c9066ce5 --- /dev/null +++ b/samples/migrations/test_pinecone_migration.py @@ -0,0 +1,207 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import time +import uuid +from typing import Sequence + +import pytest +import pytest_asyncio +from langchain_core.documents import Document +from langchain_google_vertexai import VertexAIEmbeddings +from langchain_pinecone import PineconeVectorStore # type: ignore +from migrate_pinecone_vectorstore_to_alloydb import main +from pinecone import Pinecone, ServerlessSpec # type: ignore +from sqlalchemy import text +from sqlalchemy.engine.row import RowMapping + +from langchain_google_alloydb_pg import AlloyDBEngine + +DEFAULT_TABLE = "test_pinecone_migration" + str(uuid.uuid4()) + + +def get_env_var(key: str, desc: str) -> str: + v = os.environ.get(key) + if v is None: + raise ValueError(f"Must set env var {key} to: {desc}") + return v + + +async def aexecute( + engine: AlloyDBEngine, + query: str, +) -> None: + async def run(engine, query): + async with engine._pool.connect() as conn: + await conn.execute(text(query)) + await conn.commit() + + await engine._run_as_async(run(engine, query)) + + +async def afetch(engine: AlloyDBEngine, query: str) -> Sequence[RowMapping]: + async def run(engine, query): + async with engine._pool.connect() as conn: + result = await conn.execute(text(query)) + result_map = result.mappings() + result_fetch = result_map.fetchall() + return result_fetch + + return await engine._run_as_async(run(engine, query)) + + +def create_pinecone_index( + pinecone_index_name: str, pinecone_api_key: str, project_id: str +) -> None: + client = Pinecone(api_key=pinecone_api_key) + existing_indexes = [index_info["name"] for index_info in client.list_indexes()] + if pinecone_index_name in existing_indexes: + # Assume documents already added if index exists + return + # Create the index and add test documents + client.create_index( + name=pinecone_index_name, + dimension=768, + metric="cosine", + spec=ServerlessSpec(cloud="aws", region="us-east-1"), + ) + while not client.describe_index(pinecone_index_name).status["ready"]: + time.sleep(1) + + index = client.Index(pinecone_index_name) + vector_store = PineconeVectorStore( + index=index, + embedding=VertexAIEmbeddings( + model_name="textembedding-gecko@003", project=project_id + ), + ) + + ids = ids = [f"{str(uuid.uuid4())}" for i in range(1000)] + documents = [ + Document(page_content=f"content#{i}", metadata={"idv": f"{i}"}) + for i in range(1000) + ] + vector_store.add_documents(documents=documents, ids=ids) + + +@pytest.mark.asyncio(loop_scope="class") +class TestMigrations: + @pytest.fixture(scope="module") + def db_project(self) -> str: + return get_env_var("PROJECT_ID", "project id for google cloud") + + @pytest.fixture(scope="module") + def db_region(self) -> str: + return get_env_var("REGION", "region for AlloyDB instance") + + @pytest.fixture(scope="module") + def db_cluster(self) -> str: + return get_env_var("CLUSTER_ID", "cluster for AlloyDB") + + @pytest.fixture(scope="module") + def db_instance(self) -> str: + return get_env_var("INSTANCE_ID", "instance for AlloyDB") + + @pytest.fixture(scope="module") + def db_name(self) -> str: + return get_env_var("DATABASE_ID", "database name on AlloyDB instance") + + @pytest.fixture(scope="module") + def db_user(self) -> str: + return get_env_var("DB_USER", "database user for AlloyDB") + + @pytest.fixture(scope="module") + def db_password(self) -> str: + return get_env_var("DB_PASSWORD", "database password for AlloyDB") + + @pytest.fixture(scope="module") + def pinecone_api_key(self) -> str: + return get_env_var("PINECONE_API_KEY", "API KEY for pinecone instance") + + @pytest.fixture(scope="module") + def pinecone_index_name(self) -> str: + return get_env_var("PINECONE_INDEX_NAME", "index name for pinecone instance") + + @pytest_asyncio.fixture(scope="class") + async def engine( + self, + db_project, + db_region, + db_cluster, + db_instance, + db_name, + db_user, + db_password, + ): + engine = await AlloyDBEngine.afrom_instance( + project_id=db_project, + cluster=db_cluster, + instance=db_instance, + region=db_region, + database=db_name, + user=db_user, + password=db_password, + ) + + yield engine + await aexecute(engine, f'DROP TABLE IF EXISTS "{DEFAULT_TABLE}"') + await engine.close() + + async def test_pinecone( + self, + engine, + capsys, + pinecone_api_key, + pinecone_index_name, + db_project, + db_region, + db_cluster, + db_instance, + db_name, + db_user, + db_password, + ): + create_pinecone_index(pinecone_index_name, pinecone_api_key, db_project) + + await main( + pinecone_api_key=pinecone_api_key, + pinecone_index_name=pinecone_index_name, + pinecone_namespace="", + vector_size=768, + pinecone_batch_size=50, + project_id=db_project, + region=db_region, + cluster=db_cluster, + instance=db_instance, + alloydb_table=DEFAULT_TABLE, + db_name=db_name, + db_user=db_user, + db_pwd=db_password, + ) + + out, err = capsys.readouterr() + + # Assert on the script's output + assert "Error" not in err # Check for errors + assert "Pinecone index reference initiated" in out + assert "Langchain AlloyDB client initiated" in out + assert "Langchain AlloyDB vectorstore table created" in out + assert "Langchain Fake Embeddings service initiated" in out + assert "Langchain AlloyDBVectorStore initialized" in out + assert "Pinecone client fetched all ids from index" in out + assert "Pinecone client fetched all data from index" in out + assert "Migration completed, inserted all the batches of data to AlloyDB" in out + results = await afetch(engine, f'SELECT * FROM "{DEFAULT_TABLE}"') + assert len(results) == 1000 diff --git a/samples/migrations/test_qdrant_migration.py b/samples/migrations/test_qdrant_migration.py new file mode 100644 index 00000000..bb42e4cb --- /dev/null +++ b/samples/migrations/test_qdrant_migration.py @@ -0,0 +1,196 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import uuid +from typing import Sequence + +import pytest +import pytest_asyncio +from langchain_core.documents import Document +from langchain_core.embeddings import FakeEmbeddings +from langchain_qdrant import QdrantVectorStore +from migrate_qdrant_vectorstore_to_alloydb import main +from qdrant_client import QdrantClient +from qdrant_client.http.models import Distance, VectorParams +from sqlalchemy import text +from sqlalchemy.engine.row import RowMapping + +from langchain_google_alloydb_pg import AlloyDBEngine + +DEFAULT_TABLE = "test_qdrant_migration" + str(uuid.uuid4()) +EMBEDDING_SERVICE = FakeEmbeddings(size=768) +PERSISTENT_DB_PATH = "./qdrant_data" + + +def get_env_var(key: str, desc: str) -> str: + v = os.environ.get(key) + if v is None: + raise ValueError(f"Must set env var {key} to: {desc}") + return v + + +async def aexecute( + engine: AlloyDBEngine, + query: str, +) -> None: + async def run(engine, query): + async with engine._pool.connect() as conn: + await conn.execute(text(query)) + await conn.commit() + + await engine._run_as_async(run(engine, query)) + + +def create_qdrant_collection(collection_name): + client = QdrantClient(path=PERSISTENT_DB_PATH) + # delete pre-existing collection names to avoid conflict + client.delete_collection( + collection_name=collection_name, + ) + client.create_collection( + collection_name=collection_name, + vectors_config=VectorParams(size=768, distance=Distance.COSINE), + ) + + vector_store = QdrantVectorStore( + client=client, + collection_name=collection_name, + embedding=EMBEDDING_SERVICE, + ) + + uuids = [f"{str(uuid.uuid4())}" for i in range(1000)] + documents = [ + Document(page_content=f"content#{i}", metadata={"idv": f"{i}"}) + for i in range(1000) + ] + + vector_store.add_documents(documents=documents, ids=uuids) + + +async def afetch(engine: AlloyDBEngine, query: str) -> Sequence[RowMapping]: + async def run(engine, query): + async with engine._pool.connect() as conn: + result = await conn.execute(text(query)) + result_map = result.mappings() + result_fetch = result_map.fetchall() + return result_fetch + + return await engine._run_as_async(run(engine, query)) + + +@pytest.mark.asyncio(loop_scope="class") +class TestMigrations: + @pytest.fixture(scope="module") + def db_project(self) -> str: + return get_env_var("PROJECT_ID", "project id for google cloud") + + @pytest.fixture(scope="module") + def db_region(self) -> str: + return get_env_var("REGION", "region for AlloyDB instance") + + @pytest.fixture(scope="module") + def db_cluster(self) -> str: + return get_env_var("CLUSTER_ID", "cluster for AlloyDB") + + @pytest.fixture(scope="module") + def db_instance(self) -> str: + return get_env_var("INSTANCE_ID", "instance for AlloyDB") + + @pytest.fixture(scope="module") + def db_name(self) -> str: + return get_env_var("DATABASE_ID", "database name on AlloyDB instance") + + @pytest.fixture(scope="module") + def db_user(self) -> str: + return get_env_var("DB_USER", "database user for AlloyDB") + + @pytest.fixture(scope="module") + def db_password(self) -> str: + return get_env_var("DB_PASSWORD", "database password for AlloyDB") + + @pytest.fixture(scope="module") + def qdrant_collection_name(self) -> str: + return get_env_var( + "QDRANT_COLLECTION_NAME", "collection name for qdrant instance" + ) + + @pytest_asyncio.fixture(scope="class") + async def engine( + self, + db_project, + db_region, + db_cluster, + db_instance, + db_name, + db_user, + db_password, + ): + engine = await AlloyDBEngine.afrom_instance( + project_id=db_project, + cluster=db_cluster, + instance=db_instance, + region=db_region, + database=db_name, + user=db_user, + password=db_password, + ) + + yield engine + await aexecute(engine, f'DROP TABLE IF EXISTS "{DEFAULT_TABLE}"') + await engine.close() + + async def test_qdrant( + self, + engine, + capsys, + qdrant_collection_name, + db_project, + db_region, + db_cluster, + db_instance, + db_name, + db_user, + db_password, + ): + create_qdrant_collection(qdrant_collection_name) + + await main( + qdrant_collection_name=qdrant_collection_name, + qdrant_path=PERSISTENT_DB_PATH, + vector_size=768, + qdrant_batch_size=50, + project_id=db_project, + region=db_region, + cluster=db_cluster, + instance=db_instance, + alloydb_table=DEFAULT_TABLE, + db_name=db_name, + db_user=db_user, + db_pwd=db_password, + ) + + out, err = capsys.readouterr() + + # Assert on the script's output + assert "Error" not in err # Check for errors + assert "Qdrant client initiated." in out + assert "Langchain AlloyDB client initiated" in out + assert "Langchain Fake Embeddings service initiated." in out + assert "Langchain AlloyDB vectorstore table created" in out + assert "Langchain AlloyDBVectorStore initialized" in out + assert "Qdrant client fetched all data from collection." in out + assert "Migration completed, inserted all the batches of data to AlloyDB" in out + results = await afetch(engine, f'SELECT * FROM "{DEFAULT_TABLE}"') + assert len(results) == 1000 diff --git a/samples/migrations/test_weaviate_migration.py b/samples/migrations/test_weaviate_migration.py new file mode 100644 index 00000000..80760f55 --- /dev/null +++ b/samples/migrations/test_weaviate_migration.py @@ -0,0 +1,174 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import uuid +from typing import Sequence + +import pytest +import pytest_asyncio +from migrate_weaviate_vectorstore_to_alloydb import main +from sqlalchemy import text +from sqlalchemy.engine.row import RowMapping + +from langchain_google_alloydb_pg import AlloyDBEngine + +DEFAULT_TABLE = "test_weaviate_migration" + str(uuid.uuid4()) + + +def get_env_var(key: str, desc: str) -> str: + v = os.environ.get(key) + if v is None: + raise ValueError(f"Must set env var {key} to: {desc}") + return v + + +async def aexecute( + engine: AlloyDBEngine, + query: str, +) -> None: + async def run(engine, query): + async with engine._pool.connect() as conn: + await conn.execute(text(query)) + await conn.commit() + + await engine._run_as_async(run(engine, query)) + + +async def afetch(engine: AlloyDBEngine, query: str) -> Sequence[RowMapping]: + async def run(engine, query): + async with engine._pool.connect() as conn: + result = await conn.execute(text(query)) + result_map = result.mappings() + result_fetch = result_map.fetchall() + return result_fetch + + return await engine._run_as_async(run(engine, query)) + + +@pytest.mark.asyncio(loop_scope="class") +class TestMigrations: + @pytest.fixture(scope="module") + def db_project(self) -> str: + return get_env_var("PROJECT_ID", "project id for google cloud") + + @pytest.fixture(scope="module") + def db_region(self) -> str: + return get_env_var("REGION", "region for AlloyDB instance") + + @pytest.fixture(scope="module") + def db_cluster(self) -> str: + return get_env_var("CLUSTER_ID", "cluster for AlloyDB") + + @pytest.fixture(scope="module") + def db_instance(self) -> str: + return get_env_var("INSTANCE_ID", "instance for AlloyDB") + + @pytest.fixture(scope="module") + def db_name(self) -> str: + return get_env_var("DATABASE_ID", "database name on AlloyDB instance") + + @pytest.fixture(scope="module") + def db_user(self) -> str: + return get_env_var("DB_USER", "database user for AlloyDB") + + @pytest.fixture(scope="module") + def db_password(self) -> str: + return get_env_var("DB_PASSWORD", "database password for AlloyDB") + + @pytest.fixture(scope="module") + def weaviate_api_key(self) -> str: + return get_env_var("WEAVIATE_API_KEY", "API KEY for weaviate instance") + + @pytest.fixture(scope="module") + def weaviate_cluster_url(self) -> str: + return get_env_var("WEAVIATE_URL", "Cluster URL for weaviate instance") + + @pytest.fixture(scope="module") + def embedding_api_key(self) -> str: + return get_env_var("EMBEDDING_API_KEY", "API key for embedding service") + + @pytest.fixture(scope="module") + def weaviate_collection_name(self) -> str: + return get_env_var( + "WEAVIATE_COLLECTION_NAME", "collection name for weaviate instance" + ) + + @pytest_asyncio.fixture(scope="class") + async def engine( + self, + db_project, + db_region, + db_cluster, + db_instance, + db_name, + db_user, + db_password, + ): + engine = await AlloyDBEngine.afrom_instance( + project_id=db_project, + cluster=db_cluster, + instance=db_instance, + region=db_region, + database=db_name, + user=db_user, + password=db_password, + ) + + yield engine + await aexecute(engine, f'DROP TABLE IF EXISTS "{DEFAULT_TABLE}"') + await engine.close() + + async def test_weaviate( + self, + engine, + capsys, + weaviate_api_key, + weaviate_collection_name, + embedding_api_key, + db_project, + db_region, + db_cluster, + db_instance, + db_name, + db_user, + db_password, + ): + await main( + weaviate_api_key=weaviate_api_key, + weaviate_collection_name=weaviate_collection_name, + embedding_api_key=embedding_api_key, + vector_size=768, + weaviate_batch_size=50, + project_id=db_project, + region=db_region, + cluster=db_cluster, + instance=db_instance, + alloydb_table=DEFAULT_TABLE, + db_name=db_name, + db_user=db_user, + db_pwd=db_password, + ) + + out, err = capsys.readouterr() + + # Assert on the script's output + assert "Error" not in err # Check for errors + assert "Weaviate collection reference initiated" in out + assert "Langchain AlloyDB client initiated" in out + assert "Langchain Fake Embeddings service initiated." in out + assert "Langchain AlloyDB vectorstore table created" in out + assert "Langchain AlloyDBVectorStore initialized" in out + assert "Weaviate client fetched all data from collection." in out + assert "Migration completed, inserted all the batches of data to AlloyDB" in out From 8d5c2f93f62df2ee9acb85dbfe9058aa9378457b Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 23 Jan 2025 17:29:04 +0100 Subject: [PATCH 04/21] chore(deps): update python-nonmajor (#231) * fix(deps): update python-nonmajor * Update requirements.txt * chore: fix migrations dependency conflict * chore: fixing tests --------- Co-authored-by: Averi Kitsch Co-authored-by: Vishwaraj Anand --- pyproject.toml | 2 +- requirements.txt | 2 +- samples/index_tuning_sample/requirements.txt | 4 ++-- samples/langchain_on_vertexai/requirements.txt | 10 +++++----- samples/migrations/requirements.txt | 6 +++--- samples/requirements.txt | 4 ++-- tests/test_async_vectorstore_search.py | 4 +++- tests/test_vectorstore_search.py | 4 +++- 8 files changed, 20 insertions(+), 16 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d5fd159a..0b0e2a7c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ test = [ "black[jupyter]==24.10.0", "isort==5.13.2", "mypy==1.14.1", - "pytest-asyncio==0.25.1", + "pytest-asyncio==0.25.2", "pytest==8.3.4", "pytest-cov==6.0.0", "pytest-depends==1.0.1", diff --git a/requirements.txt b/requirements.txt index 796407dc..883335e6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ google-cloud-alloydb-connector[asyncpg]==1.7.0 google-cloud-storage>=2.18.2 -langchain-core==0.3.0 +langchain-core==0.3.31 numpy==1.26.4 pgvector==0.3.6 SQLAlchemy[asyncio]==2.0.37 diff --git a/samples/index_tuning_sample/requirements.txt b/samples/index_tuning_sample/requirements.txt index b79793eb..67a1f07b 100644 --- a/samples/index_tuning_sample/requirements.txt +++ b/samples/index_tuning_sample/requirements.txt @@ -1,3 +1,3 @@ -langchain-google-alloydb-pg==0.9.0 -langchain==0.3.14 +langchain-google-alloydb-pg==0.9.1 +langchain==0.3.15 langchain-google-vertexai==2.0.11 \ No newline at end of file diff --git a/samples/langchain_on_vertexai/requirements.txt b/samples/langchain_on_vertexai/requirements.txt index 2d7fa7d8..c474eecd 100644 --- a/samples/langchain_on_vertexai/requirements.txt +++ b/samples/langchain_on_vertexai/requirements.txt @@ -1,8 +1,8 @@ -google-cloud-aiplatform[reasoningengine,langchain]==1.72 -langchain-google-alloydb-pg==0.7.0 -langchain-google-vertexai==2.0.7 -google-cloud-resource-manager==1.13.0 -langchain-community==0.3.7 +google-cloud-aiplatform[reasoningengine,langchain]==1.77.0 +langchain-google-alloydb-pg==0.9.1 +langchain-google-vertexai==2.0.11 +google-cloud-resource-manager==1.14.0 +langchain-community==0.3.15 # Required to fix: "PydanticUndefinedAnnotation: name 'SafetySetting' is not defined" # Todo: remove after upstream issue is fixed: https://github.com/langchain-ai/langchain/issues/28271 pydantic==2.9.0 diff --git a/samples/migrations/requirements.txt b/samples/migrations/requirements.txt index 96aed4df..f10d8a32 100644 --- a/samples/migrations/requirements.txt +++ b/samples/migrations/requirements.txt @@ -1,8 +1,8 @@ grpcio-tools==1.67.1 langchain-chroma==0.2.0 -langchain-core==0.3.26 -langchain-google-alloydb-pg==0.8.0 -langchain-google-vertexai==2.0.9 +langchain-core==0.3.31 +langchain-google-alloydb-pg==0.9.1 +langchain-google-vertexai==2.0.11 langchain-milvus==0.1.8 langchain-pinecone==0.2.0 langchain-qdrant==0.2.0 diff --git a/samples/requirements.txt b/samples/requirements.txt index 6a0753a4..5311b479 100644 --- a/samples/requirements.txt +++ b/samples/requirements.txt @@ -1,5 +1,5 @@ google-cloud-aiplatform[reasoningengine,langchain]==1.77.0 google-cloud-resource-manager==1.14.0 -langchain-community==0.3.14 -langchain-google-alloydb-pg==0.9.0 +langchain-community==0.3.15 +langchain-google-alloydb-pg==0.9.1 langchain-google-vertexai==2.0.11 \ No newline at end of file diff --git a/tests/test_async_vectorstore_search.py b/tests/test_async_vectorstore_search.py index 199491ea..ab58706b 100644 --- a/tests/test_async_vectorstore_search.py +++ b/tests/test_async_vectorstore_search.py @@ -245,7 +245,9 @@ async def test_similarity_search_with_relevance_scores_threshold_cosine(self, vs results = await vs.asimilarity_search_with_relevance_scores( "foo", **score_threshold ) - assert len(results) == 4 + # Note: Since tests use FakeEmbeddings which are non-normalized vectors, results might have scores beyond the range [0,1]. + # For a normalized embedding service, a threshold of zero will yield all matched documents. + assert len(results) == 2 score_threshold = {"score_threshold": 0.02} results = await vs.asimilarity_search_with_relevance_scores( diff --git a/tests/test_vectorstore_search.py b/tests/test_vectorstore_search.py index 9cc0e056..09aad724 100644 --- a/tests/test_vectorstore_search.py +++ b/tests/test_vectorstore_search.py @@ -235,7 +235,9 @@ async def test_similarity_search_with_relevance_scores_threshold_cosine(self, vs results = await vs.asimilarity_search_with_relevance_scores( "foo", **score_threshold ) - assert len(results) == 4 + # Note: Since tests use FakeEmbeddings which are non-normalized vectors, results might have scores beyond the range [0,1]. + # For a normalized embedding service, a threshold of zero will yield all matched documents. + assert len(results) == 2 score_threshold = {"score_threshold": 0.02} results = await vs.asimilarity_search_with_relevance_scores( From 27f27e36a7fc8644f451ac9473090169cf7def9f Mon Sep 17 00:00:00 2001 From: dishaprakash <57954147+dishaprakash@users.noreply.github.com> Date: Mon, 27 Jan 2025 17:26:06 +0000 Subject: [PATCH 05/21] chore(docs): Reorder markdown in Document Loader Notebook (#328) --- docs/document_loader.ipynb | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/docs/document_loader.ipynb b/docs/document_loader.ipynb index 95759687..f9b1a9b8 100644 --- a/docs/document_loader.ipynb +++ b/docs/document_loader.ipynb @@ -232,25 +232,6 @@ ")" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Optional Tip: 💡\n", - "You can also specify a schema name by passing `schema_name` wherever you pass `table_name`. Eg:\n", - "\n", - "```python\n", - "SCHEMA_NAME=\"my_schema\"\n", - "\n", - "await engine.ainit_document_table(\n", - " table_name=TABLE_NAME,\n", - " schema_name=SCHEMA_NAME, # Default: \"public\"\n", - "\n", - " ...\n", - ")\n", - "```" - ] - }, { "cell_type": "markdown", "metadata": { @@ -444,6 +425,25 @@ ")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Optional Tip: 💡\n", + "You can also specify a schema name by passing `schema_name` wherever you pass `table_name`. Eg:\n", + "\n", + "```python\n", + "SCHEMA_NAME=\"my_schema\"\n", + "\n", + "await engine.ainit_document_table(\n", + " table_name=TABLE_NAME,\n", + " schema_name=SCHEMA_NAME, # Default: \"public\"\n", + "\n", + " ...\n", + ")\n", + "```" + ] + }, { "cell_type": "markdown", "metadata": {}, From d0e130f34a13a58883abb384fb9bbcaa10382220 Mon Sep 17 00:00:00 2001 From: Vishwaraj Anand Date: Tue, 28 Jan 2025 21:53:14 +0530 Subject: [PATCH 06/21] chore: fix weaviate snippets (#329) * chore: mypy fix * chore: remove VertexAIEmbeddings from test * chore: fix weaviate snippets * chore: lint fix * chore: pr comments * chore: add comment to clarity how to connect to weaviate local * chore: moving WEAVIATE_COLLECTION_NAME var to required * chore: reordered required and optional variables --------- Co-authored-by: Averi Kitsch --- ...migrate_chromadb_vectorstore_to_alloydb.py | 2 +- .../migrate_milvus_vectorstore_to_alloydb.py | 4 +- .../migrate_qdrant_vectorstore_to_alloydb.py | 6 +-- ...migrate_weaviate_vectorstore_to_alloydb.py | 29 +++++++---- samples/migrations/requirements.txt | 2 +- samples/migrations/test_milvus_migration.py | 2 +- samples/migrations/test_pinecone_migration.py | 6 +-- samples/migrations/test_qdrant_migration.py | 2 +- samples/migrations/test_weaviate_migration.py | 50 ++++++++++++++++--- 9 files changed, 72 insertions(+), 31 deletions(-) diff --git a/samples/migrations/migrate_chromadb_vectorstore_to_alloydb.py b/samples/migrations/migrate_chromadb_vectorstore_to_alloydb.py index e7fff684..ca4983f4 100644 --- a/samples/migrations/migrate_chromadb_vectorstore_to_alloydb.py +++ b/samples/migrations/migrate_chromadb_vectorstore_to_alloydb.py @@ -23,6 +23,7 @@ """ # TODO(dev): Replace the values below +CHROMADB_PATH = "./chroma_langchain_db" CHROMADB_COLLECTION_NAME = "example_collection" PROJECT_ID = "my-project-id" REGION = "us-central1" @@ -33,7 +34,6 @@ DB_PWD = "secret-password" # TODO(developer): Optional, change the values below. -CHROMADB_PATH = "./chromadb_data" VECTOR_SIZE = 768 CHROMADB_BATCH_SIZE = 10 ALLOYDB_TABLE_NAME = "alloydb_table" diff --git a/samples/migrations/migrate_milvus_vectorstore_to_alloydb.py b/samples/migrations/migrate_milvus_vectorstore_to_alloydb.py index deb58741..557aee15 100644 --- a/samples/migrations/migrate_milvus_vectorstore_to_alloydb.py +++ b/samples/migrations/migrate_milvus_vectorstore_to_alloydb.py @@ -23,6 +23,8 @@ """ # TODO(dev): Replace the values below +MILVUS_URI = "./milvus_example.db" +MILVUS_COLLECTION_NAME = "langchain_example" PROJECT_ID = "my-project-id" REGION = "us-central1" CLUSTER = "my-cluster" @@ -32,8 +34,6 @@ DB_PWD = "secret-password" # TODO(developer): Optional, change the values below. -MILVUS_URI = "./milvus_data" -MILVUS_COLLECTION_NAME = "test_milvus" VECTOR_SIZE = 768 MILVUS_BATCH_SIZE = 10 ALLOYDB_TABLE_NAME = "alloydb_table" diff --git a/samples/migrations/migrate_qdrant_vectorstore_to_alloydb.py b/samples/migrations/migrate_qdrant_vectorstore_to_alloydb.py index 89692c80..0bded892 100644 --- a/samples/migrations/migrate_qdrant_vectorstore_to_alloydb.py +++ b/samples/migrations/migrate_qdrant_vectorstore_to_alloydb.py @@ -23,6 +23,8 @@ """ # TODO(dev): Replace the values below +QDRANT_PATH = "/tmp/langchain_qdrant" +QDRANT_COLLECTION_NAME = "demo_collection" PROJECT_ID = "my-project-id" REGION = "us-central1" CLUSTER = "my-cluster" @@ -31,9 +33,7 @@ DB_USER = "postgres" DB_PWD = "secret-password" -# TODO(developer): Change the values below. -QDRANT_COLLECTION_NAME = "test_qdrant" -QDRANT_PATH = "./qdrant_data" +# TODO(developer): Optional, change the values below. VECTOR_SIZE = 768 QDRANT_BATCH_SIZE = 10 ALLOYDB_TABLE_NAME = "alloydb_table" diff --git a/samples/migrations/migrate_weaviate_vectorstore_to_alloydb.py b/samples/migrations/migrate_weaviate_vectorstore_to_alloydb.py index 51faf31e..f48013c6 100644 --- a/samples/migrations/migrate_weaviate_vectorstore_to_alloydb.py +++ b/samples/migrations/migrate_weaviate_vectorstore_to_alloydb.py @@ -25,7 +25,7 @@ # TODO(dev): Replace the values below WEAVIATE_API_KEY = "my-wv-api-key" WEAVIATE_CLUSTER_URL = "my-wv-cluster-url" -EMBEDDING_API_KEY = "my-wv-embedding-api-key" +WEAVIATE_COLLECTION_NAME = "example_collection" PROJECT_ID = "my-project-id" REGION = "us-central1" CLUSTER = "my-cluster" @@ -35,20 +35,24 @@ DB_PWD = "secret-password" # TODO(developer): Optional, change the values below. -WEAVIATE_COLLECTION_NAME = "test_weaviate_collection" +WEAVIATE_TEXT_KEY = "text" VECTOR_SIZE = 768 WEAVIATE_BATCH_SIZE = 10 ALLOYDB_TABLE_NAME = "alloydb_table" MAX_CONCURRENCY = 100 -from weaviate.collections import Collection # type: ignore +from weaviate import WeaviateClient def get_data_batch( - weaviate_collection: Collection, weaviate_batch_size: int = WEAVIATE_BATCH_SIZE + weaviate_client: WeaviateClient, + weaviate_collection_name: str = WEAVIATE_COLLECTION_NAME, + weaviate_text_key: str = WEAVIATE_TEXT_KEY, + weaviate_batch_size: int = WEAVIATE_BATCH_SIZE, ) -> Iterator[tuple[list[str], list[Any], list[list[float]], list[Any]]]: # [START weaviate_get_data_batch] # Iterate through the IDs and download their contents + weaviate_collection = weaviate_client.collections.get(weaviate_collection_name) ids = [] content = [] embeddings = [] @@ -56,9 +60,10 @@ def get_data_batch( for item in weaviate_collection.iterator(include_vector=True): ids.append(str(item.uuid)) - content.append(item.properties["page_content"]) + content.append(item.properties[weaviate_text_key]) embeddings.append(item.vector["default"]) - metadatas.append(item.properties["metadata"]) + del item.properties[weaviate_text_key] # type: ignore + metadatas.append(item.properties) if len(ids) >= weaviate_batch_size: # Yield the current batch of results @@ -75,10 +80,10 @@ def get_data_batch( async def main( weaviate_api_key: str = WEAVIATE_API_KEY, weaviate_collection_name: str = WEAVIATE_COLLECTION_NAME, + weaviate_text_key: str = WEAVIATE_TEXT_KEY, weaviate_cluster_url: str = WEAVIATE_CLUSTER_URL, vector_size: int = VECTOR_SIZE, weaviate_batch_size: int = WEAVIATE_BATCH_SIZE, - embedding_api_key: str = EMBEDDING_API_KEY, project_id: str = PROJECT_ID, region: str = REGION, cluster: str = CLUSTER, @@ -92,14 +97,13 @@ async def main( # [START weaviate_get_client] import weaviate + # For a locally running weaviate instance, use `weaviate.connect_to_local()` weaviate_client = weaviate.connect_to_weaviate_cloud( cluster_url=weaviate_cluster_url, auth_credentials=weaviate.auth.AuthApiKey(weaviate_api_key), - headers={"X-Cohere-Api-Key": embedding_api_key}, ) - weaviate_collection = weaviate_client.collections.get(weaviate_collection_name) # [END weaviate_get_client] - print("Weaviate collection reference initiated.") + print("Weaviate client initiated.") # [START weaviate_vectorstore_alloydb_migration_embedding_service] # The VectorStore interface requires an embedding service. This workflow does not @@ -146,7 +150,10 @@ async def main( print("Langchain AlloyDBVectorStore initialized.") data_iterator = get_data_batch( - weaviate_collection=weaviate_collection, weaviate_batch_size=weaviate_batch_size + weaviate_client=weaviate_client, + weaviate_collection_name=weaviate_collection_name, + weaviate_text_key=weaviate_text_key, + weaviate_batch_size=weaviate_batch_size, ) # [START weaviate_vectorstore_alloydb_migration_insert_data_batch] pending: set[Any] = set() diff --git a/samples/migrations/requirements.txt b/samples/migrations/requirements.txt index f10d8a32..76cefc1d 100644 --- a/samples/migrations/requirements.txt +++ b/samples/migrations/requirements.txt @@ -2,10 +2,10 @@ grpcio-tools==1.67.1 langchain-chroma==0.2.0 langchain-core==0.3.31 langchain-google-alloydb-pg==0.9.1 -langchain-google-vertexai==2.0.11 langchain-milvus==0.1.8 langchain-pinecone==0.2.0 langchain-qdrant==0.2.0 +langchain-weaviate==0.0.3 pinecone-client==5.0.1 protobuf==5.29.1 pymilvus==2.5.3 diff --git a/samples/migrations/test_milvus_migration.py b/samples/migrations/test_milvus_migration.py index c06b66d2..44ea3f0c 100644 --- a/samples/migrations/test_milvus_migration.py +++ b/samples/migrations/test_milvus_migration.py @@ -20,7 +20,7 @@ import pytest_asyncio from langchain_core.documents import Document from langchain_core.embeddings import FakeEmbeddings -from langchain_milvus import Milvus +from langchain_milvus import Milvus # type: ignore from migrate_milvus_vectorstore_to_alloydb import main from sqlalchemy import text from sqlalchemy.engine.row import RowMapping diff --git a/samples/migrations/test_pinecone_migration.py b/samples/migrations/test_pinecone_migration.py index c9066ce5..d3ecca1b 100644 --- a/samples/migrations/test_pinecone_migration.py +++ b/samples/migrations/test_pinecone_migration.py @@ -20,7 +20,7 @@ import pytest import pytest_asyncio from langchain_core.documents import Document -from langchain_google_vertexai import VertexAIEmbeddings +from langchain_core.embeddings import FakeEmbeddings from langchain_pinecone import PineconeVectorStore # type: ignore from migrate_pinecone_vectorstore_to_alloydb import main from pinecone import Pinecone, ServerlessSpec # type: ignore @@ -83,9 +83,7 @@ def create_pinecone_index( index = client.Index(pinecone_index_name) vector_store = PineconeVectorStore( index=index, - embedding=VertexAIEmbeddings( - model_name="textembedding-gecko@003", project=project_id - ), + embedding=FakeEmbeddings(size=768), ) ids = ids = [f"{str(uuid.uuid4())}" for i in range(1000)] diff --git a/samples/migrations/test_qdrant_migration.py b/samples/migrations/test_qdrant_migration.py index bb42e4cb..05ab8125 100644 --- a/samples/migrations/test_qdrant_migration.py +++ b/samples/migrations/test_qdrant_migration.py @@ -20,7 +20,7 @@ import pytest_asyncio from langchain_core.documents import Document from langchain_core.embeddings import FakeEmbeddings -from langchain_qdrant import QdrantVectorStore +from langchain_qdrant import QdrantVectorStore # type: ignore from migrate_qdrant_vectorstore_to_alloydb import main from qdrant_client import QdrantClient from qdrant_client.http.models import Distance, VectorParams diff --git a/samples/migrations/test_weaviate_migration.py b/samples/migrations/test_weaviate_migration.py index 80760f55..c873ffa0 100644 --- a/samples/migrations/test_weaviate_migration.py +++ b/samples/migrations/test_weaviate_migration.py @@ -18,9 +18,14 @@ import pytest import pytest_asyncio +import weaviate +from langchain_core.documents import Document +from langchain_core.embeddings import FakeEmbeddings +from langchain_weaviate.vectorstores import WeaviateVectorStore # type: ignore from migrate_weaviate_vectorstore_to_alloydb import main from sqlalchemy import text from sqlalchemy.engine.row import RowMapping +from weaviate.auth import Auth from langchain_google_alloydb_pg import AlloyDBEngine @@ -57,6 +62,34 @@ async def run(engine, query): return await engine._run_as_async(run(engine, query)) +async def create_weaviate_index( + weaviate_api_key: str, weaviate_cluster_url: str, weaviate_collection_name: str +) -> None: + uuids = [f"{str(uuid.uuid4())}" for i in range(1000)] + documents = [ + Document(page_content=f"content#{i}", metadata={"idv": f"{i}"}) + for i in range(1000) + ] + # For a locally running weaviate instance, use `weaviate.connect_to_local()` + with weaviate.connect_to_weaviate_cloud( + cluster_url=weaviate_cluster_url, + auth_credentials=Auth.api_key(weaviate_api_key), + ) as weaviate_client: + # delete collection if exists + try: + weaviate_client.collections.delete(weaviate_collection_name) + except Exception: + pass + + db = WeaviateVectorStore.from_documents( + documents=documents, + ids=uuids, + embedding=FakeEmbeddings(size=768), + client=weaviate_client, + index_name=weaviate_collection_name, + ) + + @pytest.mark.asyncio(loop_scope="class") class TestMigrations: @pytest.fixture(scope="module") @@ -95,10 +128,6 @@ def weaviate_api_key(self) -> str: def weaviate_cluster_url(self) -> str: return get_env_var("WEAVIATE_URL", "Cluster URL for weaviate instance") - @pytest.fixture(scope="module") - def embedding_api_key(self) -> str: - return get_env_var("EMBEDDING_API_KEY", "API key for embedding service") - @pytest.fixture(scope="module") def weaviate_collection_name(self) -> str: return get_env_var( @@ -136,7 +165,7 @@ async def test_weaviate( capsys, weaviate_api_key, weaviate_collection_name, - embedding_api_key, + weaviate_cluster_url, db_project, db_region, db_cluster, @@ -145,10 +174,15 @@ async def test_weaviate( db_user, db_password, ): + await create_weaviate_index( + weaviate_api_key, weaviate_cluster_url, weaviate_collection_name + ) + await main( weaviate_api_key=weaviate_api_key, weaviate_collection_name=weaviate_collection_name, - embedding_api_key=embedding_api_key, + weaviate_text_key="text", + weaviate_cluster_url=weaviate_cluster_url, vector_size=768, weaviate_batch_size=50, project_id=db_project, @@ -165,10 +199,12 @@ async def test_weaviate( # Assert on the script's output assert "Error" not in err # Check for errors - assert "Weaviate collection reference initiated" in out + assert "Weaviate client initiated" in out assert "Langchain AlloyDB client initiated" in out assert "Langchain Fake Embeddings service initiated." in out assert "Langchain AlloyDB vectorstore table created" in out assert "Langchain AlloyDBVectorStore initialized" in out assert "Weaviate client fetched all data from collection." in out assert "Migration completed, inserted all the batches of data to AlloyDB" in out + results = await afetch(engine, f'SELECT * FROM "{DEFAULT_TABLE}"') + assert len(results) == 1000 From 4b35258eb4aba1623581ff42045ca1f9d0bbcae4 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 30 Jan 2025 14:49:25 +0100 Subject: [PATCH 07/21] chore(deps): update python-nonmajor (#326) --- pyproject.toml | 2 +- requirements.txt | 2 +- samples/index_tuning_sample/requirements.txt | 4 ++-- samples/langchain_on_vertexai/requirements.txt | 8 ++++---- samples/migrations/requirements.txt | 14 ++++++-------- samples/requirements.txt | 6 +++--- 6 files changed, 17 insertions(+), 19 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0b0e2a7c..a2b39e08 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ test = [ "black[jupyter]==24.10.0", "isort==5.13.2", "mypy==1.14.1", - "pytest-asyncio==0.25.2", + "pytest-asyncio==0.25.3", "pytest==8.3.4", "pytest-cov==6.0.0", "pytest-depends==1.0.1", diff --git a/requirements.txt b/requirements.txt index 883335e6..80a035e2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ google-cloud-alloydb-connector[asyncpg]==1.7.0 google-cloud-storage>=2.18.2 -langchain-core==0.3.31 +langchain-core==0.3.33 numpy==1.26.4 pgvector==0.3.6 SQLAlchemy[asyncio]==2.0.37 diff --git a/samples/index_tuning_sample/requirements.txt b/samples/index_tuning_sample/requirements.txt index 67a1f07b..90aea5c2 100644 --- a/samples/index_tuning_sample/requirements.txt +++ b/samples/index_tuning_sample/requirements.txt @@ -1,3 +1,3 @@ langchain-google-alloydb-pg==0.9.1 -langchain==0.3.15 -langchain-google-vertexai==2.0.11 \ No newline at end of file +langchain==0.3.17 +langchain-google-vertexai==2.0.12 \ No newline at end of file diff --git a/samples/langchain_on_vertexai/requirements.txt b/samples/langchain_on_vertexai/requirements.txt index c474eecd..c9818ecf 100644 --- a/samples/langchain_on_vertexai/requirements.txt +++ b/samples/langchain_on_vertexai/requirements.txt @@ -1,8 +1,8 @@ -google-cloud-aiplatform[reasoningengine,langchain]==1.77.0 +google-cloud-aiplatform[reasoningengine,langchain]==1.79.0 langchain-google-alloydb-pg==0.9.1 -langchain-google-vertexai==2.0.11 +langchain-google-vertexai==2.0.12 google-cloud-resource-manager==1.14.0 -langchain-community==0.3.15 +langchain-community==0.3.16 # Required to fix: "PydanticUndefinedAnnotation: name 'SafetySetting' is not defined" # Todo: remove after upstream issue is fixed: https://github.com/langchain-ai/langchain/issues/28271 -pydantic==2.9.0 +pydantic==2.10.6 diff --git a/samples/migrations/requirements.txt b/samples/migrations/requirements.txt index 76cefc1d..3bd0c53e 100644 --- a/samples/migrations/requirements.txt +++ b/samples/migrations/requirements.txt @@ -1,13 +1,11 @@ -grpcio-tools==1.67.1 -langchain-chroma==0.2.0 -langchain-core==0.3.31 +langchain-chroma==0.2.1 +langchain-core==0.3.33 langchain-google-alloydb-pg==0.9.1 langchain-milvus==0.1.8 -langchain-pinecone==0.2.0 +langchain-pinecone==0.2.2 langchain-qdrant==0.2.0 langchain-weaviate==0.0.3 -pinecone-client==5.0.1 -protobuf==5.29.1 -pymilvus==2.5.3 -qdrant-client==1.12.2 +pinecone-client==4.1.2 +pymilvus==2.5.4 +qdrant-client==1.13.2 weaviate-client==4.10.4 diff --git a/samples/requirements.txt b/samples/requirements.txt index 5311b479..08a8f9cd 100644 --- a/samples/requirements.txt +++ b/samples/requirements.txt @@ -1,5 +1,5 @@ -google-cloud-aiplatform[reasoningengine,langchain]==1.77.0 +google-cloud-aiplatform[reasoningengine,langchain]==1.79.0 google-cloud-resource-manager==1.14.0 -langchain-community==0.3.15 +langchain-community==0.3.16 langchain-google-alloydb-pg==0.9.1 -langchain-google-vertexai==2.0.11 \ No newline at end of file +langchain-google-vertexai==2.0.12 \ No newline at end of file From 222f89cb25f53a6a61a178bbc3b4015ceb73d11d Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 30 Jan 2025 20:21:04 +0100 Subject: [PATCH 08/21] chore(deps): update github actions (#331) --- .github/workflows/docs.yml | 4 ++-- .github/workflows/lint.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index d886e3c6..4cedb708 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -10,7 +10,7 @@ jobs: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - name: Setup Python - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5 with: python-version: "3.10" - name: Install nox @@ -26,7 +26,7 @@ jobs: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - name: Setup Python - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5 with: python-version: "3.10" - name: Install nox diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 1d9e2b4b..7db69e67 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -34,7 +34,7 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - name: Setup Python - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 + uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 with: python-version: "3.11" From e65360a2f033d14f7941ede4077c56547c61ba77 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 30 Jan 2025 20:28:44 +0100 Subject: [PATCH 09/21] chore(deps): update dependency isort to v6 (#330) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a2b39e08..15422920 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ Changelog = "https://github.com/googleapis/langchain-google-alloydb-pg-python/bl [project.optional-dependencies] test = [ "black[jupyter]==24.10.0", - "isort==5.13.2", + "isort==6.0.0", "mypy==1.14.1", "pytest-asyncio==0.25.3", "pytest==8.3.4", From d115bae4154945d2c8808645aedd6073e3775033 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 30 Jan 2025 20:40:20 +0100 Subject: [PATCH 10/21] chore(deps): update dependency black to v25 (#333) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 15422920..744b1eea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ Changelog = "https://github.com/googleapis/langchain-google-alloydb-pg-python/bl [project.optional-dependencies] test = [ - "black[jupyter]==24.10.0", + "black[jupyter]==25.1.0", "isort==6.0.0", "mypy==1.14.1", "pytest-asyncio==0.25.3", From 42508bb2a31fd7013e10ff2ffee310432fb625bc Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 30 Jan 2025 20:57:41 +0100 Subject: [PATCH 11/21] chore(deps): update dependency google-cloud-storage to v3 (#334) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 744b1eea..e2c89fae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ authors = [ ] dependencies = [ "google-cloud-alloydb-connector[asyncpg]>=1.2.0, <2.0.0", - "google-cloud-storage>=2.18.2, <3.0.0", + "google-cloud-storage>=3.0.0, <3.1.0", "langchain-core>=0.2.36, <1.0.0", "numpy>=1.24.4, <2.0.0", "pgvector>=0.2.5, <1.0.0", From 6a9a441244817449ee11126f2d58a131a0d8ec50 Mon Sep 17 00:00:00 2001 From: Vishwaraj Anand Date: Fri, 31 Jan 2025 01:51:50 +0530 Subject: [PATCH 12/21] chore: fix snippets print statements (#335) * chore: fix snippets * chore: fix snippets print statements --------- Co-authored-by: Averi Kitsch --- .../migrate_pinecone_vectorstore_to_alloydb.py | 10 +++++----- .../migrate_weaviate_vectorstore_to_alloydb.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/samples/migrations/migrate_pinecone_vectorstore_to_alloydb.py b/samples/migrations/migrate_pinecone_vectorstore_to_alloydb.py index e5dd8278..d6882b9b 100644 --- a/samples/migrations/migrate_pinecone_vectorstore_to_alloydb.py +++ b/samples/migrations/migrate_pinecone_vectorstore_to_alloydb.py @@ -119,8 +119,8 @@ async def main( pinecone_client = Pinecone(api_key=pinecone_api_key) pinecone_index = pinecone_client.Index(pinecone_index_name) - print("Pinecone index reference initiated.") # [END pinecone_get_client] + print("Pinecone index reference initiated.") # [START pinecone_vectorstore_alloydb_migration_get_client] from langchain_google_alloydb_pg import AlloyDBEngine @@ -150,16 +150,16 @@ async def main( # generate new embeddings, therefore FakeEmbeddings class is used to avoid any costs. from langchain_core.embeddings import FakeEmbeddings - embedding_service = FakeEmbeddings(size=vector_size) + embeddings_service = FakeEmbeddings(size=vector_size) # [END pinecone_vectorstore_alloydb_migration_embedding_service] print("Langchain Fake Embeddings service initiated.") # [START pinecone_vectorstore_alloydb_migration_vector_store] from langchain_google_alloydb_pg import AlloyDBVectorStore - vector_store = await AlloyDBVectorStore.create( + vs = await AlloyDBVectorStore.create( engine=alloydb_engine, - embedding_service=embedding_service, + embedding_service=embeddings_service, table_name=alloydb_table, ) # [END pinecone_vectorstore_alloydb_migration_vector_store] @@ -174,7 +174,7 @@ async def main( for ids, contents, embeddings, metadatas in data_iterator: pending.add( asyncio.ensure_future( - vector_store.aadd_embeddings( + vs.aadd_embeddings( texts=contents, embeddings=embeddings, metadatas=metadatas, diff --git a/samples/migrations/migrate_weaviate_vectorstore_to_alloydb.py b/samples/migrations/migrate_weaviate_vectorstore_to_alloydb.py index f48013c6..2cc818c7 100644 --- a/samples/migrations/migrate_weaviate_vectorstore_to_alloydb.py +++ b/samples/migrations/migrate_weaviate_vectorstore_to_alloydb.py @@ -126,8 +126,8 @@ async def main( user=db_user, password=db_pwd, ) - print("Langchain AlloyDB client initiated.") # [END weaviate_vectorstore_alloydb_migration_get_client] + print("Langchain AlloyDB client initiated.") # [START weaviate_vectorstore_alloydb_migration_create_table] await alloydb_engine.ainit_vectorstore_table( From 1d3b6ecdea78d3e0a59f8a21ab248636f15a88da Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 4 Feb 2025 21:29:36 +0100 Subject: [PATCH 13/21] chore(deps): update python-nonmajor (#338) --- samples/index_tuning_sample/requirements.txt | 2 +- samples/langchain_on_vertexai/requirements.txt | 2 +- samples/migrations/requirements.txt | 2 +- samples/requirements.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/samples/index_tuning_sample/requirements.txt b/samples/index_tuning_sample/requirements.txt index 90aea5c2..a3ae4faf 100644 --- a/samples/index_tuning_sample/requirements.txt +++ b/samples/index_tuning_sample/requirements.txt @@ -1,3 +1,3 @@ langchain-google-alloydb-pg==0.9.1 langchain==0.3.17 -langchain-google-vertexai==2.0.12 \ No newline at end of file +langchain-google-vertexai==2.0.13 \ No newline at end of file diff --git a/samples/langchain_on_vertexai/requirements.txt b/samples/langchain_on_vertexai/requirements.txt index c9818ecf..24514c56 100644 --- a/samples/langchain_on_vertexai/requirements.txt +++ b/samples/langchain_on_vertexai/requirements.txt @@ -1,6 +1,6 @@ google-cloud-aiplatform[reasoningengine,langchain]==1.79.0 langchain-google-alloydb-pg==0.9.1 -langchain-google-vertexai==2.0.12 +langchain-google-vertexai==2.0.13 google-cloud-resource-manager==1.14.0 langchain-community==0.3.16 # Required to fix: "PydanticUndefinedAnnotation: name 'SafetySetting' is not defined" diff --git a/samples/migrations/requirements.txt b/samples/migrations/requirements.txt index 3bd0c53e..0a5e4dcf 100644 --- a/samples/migrations/requirements.txt +++ b/samples/migrations/requirements.txt @@ -4,7 +4,7 @@ langchain-google-alloydb-pg==0.9.1 langchain-milvus==0.1.8 langchain-pinecone==0.2.2 langchain-qdrant==0.2.0 -langchain-weaviate==0.0.3 +langchain-weaviate==0.0.4 pinecone-client==4.1.2 pymilvus==2.5.4 qdrant-client==1.13.2 diff --git a/samples/requirements.txt b/samples/requirements.txt index 08a8f9cd..7b10c346 100644 --- a/samples/requirements.txt +++ b/samples/requirements.txt @@ -2,4 +2,4 @@ google-cloud-aiplatform[reasoningengine,langchain]==1.79.0 google-cloud-resource-manager==1.14.0 langchain-community==0.3.16 langchain-google-alloydb-pg==0.9.1 -langchain-google-vertexai==2.0.12 \ No newline at end of file +langchain-google-vertexai==2.0.13 \ No newline at end of file From 792337f94cd07a0e2f75f6dcd919942921209231 Mon Sep 17 00:00:00 2001 From: Vishwaraj Anand Date: Fri, 7 Feb 2025 04:24:46 +0530 Subject: [PATCH 14/21] fix: add write messages to Chat History (#341) * fix: add write definition for messages property in Chat History * chore: add test for setting messages property in Chat History --- .../chat_message_history.py | 10 ++++++++-- tests/test_chatmessagehistory.py | 14 ++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/langchain_google_alloydb_pg/chat_message_history.py b/src/langchain_google_alloydb_pg/chat_message_history.py index b3c8d07c..31b9d66b 100644 --- a/src/langchain_google_alloydb_pg/chat_message_history.py +++ b/src/langchain_google_alloydb_pg/chat_message_history.py @@ -107,11 +107,17 @@ def create_sync( history = engine._run_as_sync(coro) return cls(cls.__create_key, engine, history) - @property # type: ignore[override] + @property def messages(self) -> list[BaseMessage]: - """The abstraction required a property.""" + """Fetches all messages stored in AlloyDB.""" return self._engine._run_as_sync(self.__history._aget_messages()) + @messages.setter + def messages(self, value: list[BaseMessage]) -> None: + """Clear the stored messages and appends a list of messages to the record in AlloyDB.""" + self.clear() + self.add_messages(value) + async def aadd_message(self, message: BaseMessage) -> None: """Append the message to the record in AlloyDB""" await self._engine._run_as_async(self.__history.aadd_message(message)) diff --git a/tests/test_chatmessagehistory.py b/tests/test_chatmessagehistory.py index 7693567b..4e408081 100644 --- a/tests/test_chatmessagehistory.py +++ b/tests/test_chatmessagehistory.py @@ -174,6 +174,20 @@ async def test_chat_message_history_sync_messages( assert len(history2.messages) == 0 +@pytest.mark.asyncio +async def test_chat_message_history_set_messages( + async_engine: AlloyDBEngine, +) -> None: + history = await AlloyDBChatMessageHistory.create( + engine=async_engine, session_id="test", table_name=table_name_async + ) + msg1 = HumanMessage(content="hi!") + msg2 = AIMessage(content="bye -_-") + # verify setting messages property adds to message history + history.messages = [msg1, msg2] + assert len(history.messages) == 2 + + @pytest.mark.asyncio async def test_chat_table_async(async_engine): with pytest.raises(ValueError): From 4a42bd5f1906c430875a4a3136bfc5650a51d8cc Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 7 Feb 2025 07:42:36 +0100 Subject: [PATCH 15/21] chore(deps): update python-nonmajor (#339) --- pyproject.toml | 2 +- requirements.txt | 4 ++-- samples/migrations/requirements.txt | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e2c89fae..7efde2fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ Changelog = "https://github.com/googleapis/langchain-google-alloydb-pg-python/bl test = [ "black[jupyter]==25.1.0", "isort==6.0.0", - "mypy==1.14.1", + "mypy==1.15.0", "pytest-asyncio==0.25.3", "pytest==8.3.4", "pytest-cov==6.0.0", diff --git a/requirements.txt b/requirements.txt index 80a035e2..dbaca744 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ google-cloud-alloydb-connector[asyncpg]==1.7.0 google-cloud-storage>=2.18.2 -langchain-core==0.3.33 +langchain-core==0.3.34 numpy==1.26.4 pgvector==0.3.6 -SQLAlchemy[asyncio]==2.0.37 +SQLAlchemy[asyncio]==2.0.38 diff --git a/samples/migrations/requirements.txt b/samples/migrations/requirements.txt index 0a5e4dcf..83dda918 100644 --- a/samples/migrations/requirements.txt +++ b/samples/migrations/requirements.txt @@ -1,5 +1,5 @@ langchain-chroma==0.2.1 -langchain-core==0.3.33 +langchain-core==0.3.34 langchain-google-alloydb-pg==0.9.1 langchain-milvus==0.1.8 langchain-pinecone==0.2.2 From 9ab5f0cc16a33b3d44dcdba344aa4008c3df3b12 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 7 Feb 2025 21:00:48 +0100 Subject: [PATCH 16/21] chore(deps): update dependency langchain to v0.3.18 (#344) --- samples/index_tuning_sample/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/index_tuning_sample/requirements.txt b/samples/index_tuning_sample/requirements.txt index a3ae4faf..f1212cb8 100644 --- a/samples/index_tuning_sample/requirements.txt +++ b/samples/index_tuning_sample/requirements.txt @@ -1,3 +1,3 @@ langchain-google-alloydb-pg==0.9.1 -langchain==0.3.17 +langchain==0.3.18 langchain-google-vertexai==2.0.13 \ No newline at end of file From f8d1385b3d20d67d790449ad02abca0a87be4472 Mon Sep 17 00:00:00 2001 From: dishaprakash <57954147+dishaprakash@users.noreply.github.com> Date: Tue, 11 Feb 2025 08:05:31 +0000 Subject: [PATCH 17/21] fix: Enquote column names to not match reserved keywords. (#346) * chore: Enquote column names to not match reserved keywords. * Linter fix --- src/langchain_google_alloydb_pg/async_vectorstore.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/langchain_google_alloydb_pg/async_vectorstore.py b/src/langchain_google_alloydb_pg/async_vectorstore.py index 2fccb25e..d828a233 100644 --- a/src/langchain_google_alloydb_pg/async_vectorstore.py +++ b/src/langchain_google_alloydb_pg/async_vectorstore.py @@ -242,11 +242,11 @@ async def aadd_embeddings( # Insert embeddings for id, content, embedding, metadata in zip(ids, texts, embeddings, metadatas): metadata_col_names = ( - ", " + ", ".join(self.metadata_columns) + ", " + ", ".join(f'"{col}"' for col in self.metadata_columns) if len(self.metadata_columns) > 0 else "" ) - insert_stmt = f'INSERT INTO "{self.schema_name}"."{self.table_name}"({self.id_column}, {self.content_column}, {self.embedding_column}{metadata_col_names}' + insert_stmt = f'INSERT INTO "{self.schema_name}"."{self.table_name}"("{self.id_column}", "{self.content_column}", "{self.embedding_column}"{metadata_col_names}' values = {"id": id, "content": content, "embedding": str(embedding)} values_stmt = "VALUES (:id, :content, :embedding" if not embedding and isinstance(self.embedding_service, AlloyDBEmbeddings): @@ -264,7 +264,9 @@ async def aadd_embeddings( # Add JSON column and/or close statement insert_stmt += ( - f", {self.metadata_json_column})" if self.metadata_json_column else ")" + f""", "{self.metadata_json_column}")""" + if self.metadata_json_column + else ")" ) if self.metadata_json_column: values_stmt += ", :extra)" From ae443ed5943b3945a4286b8dc258fffabb20ceba Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 11 Feb 2025 13:34:34 +0100 Subject: [PATCH 18/21] chore(deps): update dependency langchain-community to v0.3.17 (#345) --- samples/langchain_on_vertexai/requirements.txt | 2 +- samples/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/langchain_on_vertexai/requirements.txt b/samples/langchain_on_vertexai/requirements.txt index 24514c56..befbb9d1 100644 --- a/samples/langchain_on_vertexai/requirements.txt +++ b/samples/langchain_on_vertexai/requirements.txt @@ -2,7 +2,7 @@ google-cloud-aiplatform[reasoningengine,langchain]==1.79.0 langchain-google-alloydb-pg==0.9.1 langchain-google-vertexai==2.0.13 google-cloud-resource-manager==1.14.0 -langchain-community==0.3.16 +langchain-community==0.3.17 # Required to fix: "PydanticUndefinedAnnotation: name 'SafetySetting' is not defined" # Todo: remove after upstream issue is fixed: https://github.com/langchain-ai/langchain/issues/28271 pydantic==2.10.6 diff --git a/samples/requirements.txt b/samples/requirements.txt index 7b10c346..075fac5b 100644 --- a/samples/requirements.txt +++ b/samples/requirements.txt @@ -1,5 +1,5 @@ google-cloud-aiplatform[reasoningengine,langchain]==1.79.0 google-cloud-resource-manager==1.14.0 -langchain-community==0.3.16 +langchain-community==0.3.17 langchain-google-alloydb-pg==0.9.1 langchain-google-vertexai==2.0.13 \ No newline at end of file From 074f9932a8099256ff210771473badbd2156713b Mon Sep 17 00:00:00 2001 From: Vishwaraj Anand Date: Tue, 11 Feb 2025 23:50:08 +0530 Subject: [PATCH 19/21] chore: update gh workflow permissions (#347) * chore: update gh workflow permissions * chore: update gh workflow permissions for docs.yml --------- Co-authored-by: Averi Kitsch --- .github/workflows/cloud_build_failure_reporter.yml | 3 ++- .github/workflows/docs.yml | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cloud_build_failure_reporter.yml b/.github/workflows/cloud_build_failure_reporter.yml index da387522..d329ef1d 100644 --- a/.github/workflows/cloud_build_failure_reporter.yml +++ b/.github/workflows/cloud_build_failure_reporter.yml @@ -33,6 +33,7 @@ jobs: permissions: issues: 'write' checks: 'read' + contents: 'read' runs-on: 'ubuntu-latest' @@ -177,4 +178,4 @@ jobs: } to ${commits[commits.length - 1].html_url}.` ); } - \ No newline at end of file + diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 4cedb708..f4ec740f 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -3,6 +3,8 @@ on: branches: - main name: docs +permissions: + contents: read # This applies to all jobs jobs: docs: runs-on: ubuntu-latest From cff8402bb6dc87cd5f6b43f383039cb4b8acd79d Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 12 Feb 2025 19:55:35 +0100 Subject: [PATCH 20/21] chore(deps): update python-nonmajor (#348) --- samples/langchain_on_vertexai/requirements.txt | 2 +- samples/migrations/requirements.txt | 2 +- samples/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/langchain_on_vertexai/requirements.txt b/samples/langchain_on_vertexai/requirements.txt index befbb9d1..859ea0ef 100644 --- a/samples/langchain_on_vertexai/requirements.txt +++ b/samples/langchain_on_vertexai/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-aiplatform[reasoningengine,langchain]==1.79.0 +google-cloud-aiplatform[reasoningengine,langchain]==1.80.0 langchain-google-alloydb-pg==0.9.1 langchain-google-vertexai==2.0.13 google-cloud-resource-manager==1.14.0 diff --git a/samples/migrations/requirements.txt b/samples/migrations/requirements.txt index 83dda918..35cc3825 100644 --- a/samples/migrations/requirements.txt +++ b/samples/migrations/requirements.txt @@ -2,7 +2,7 @@ langchain-chroma==0.2.1 langchain-core==0.3.34 langchain-google-alloydb-pg==0.9.1 langchain-milvus==0.1.8 -langchain-pinecone==0.2.2 +langchain-pinecone==0.2.3 langchain-qdrant==0.2.0 langchain-weaviate==0.0.4 pinecone-client==4.1.2 diff --git a/samples/requirements.txt b/samples/requirements.txt index 075fac5b..adb3029d 100644 --- a/samples/requirements.txt +++ b/samples/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-aiplatform[reasoningengine,langchain]==1.79.0 +google-cloud-aiplatform[reasoningengine,langchain]==1.80.0 google-cloud-resource-manager==1.14.0 langchain-community==0.3.17 langchain-google-alloydb-pg==0.9.1 From 23407b42baed4b182e6e46b006082a0bc1c3fd02 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 12 Feb 2025 11:05:15 -0800 Subject: [PATCH 21/21] chore(main): release 0.9.2 (#343) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 8 ++++++++ src/langchain_google_alloydb_pg/version.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6476c816..2a56a8e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## [0.9.2](https://github.com/googleapis/langchain-google-alloydb-pg-python/compare/v0.9.1...v0.9.2) (2025-02-12) + + +### Bug Fixes + +* Add write messages to Chat History ([#341](https://github.com/googleapis/langchain-google-alloydb-pg-python/issues/341)) ([792337f](https://github.com/googleapis/langchain-google-alloydb-pg-python/commit/792337f94cd07a0e2f75f6dcd919942921209231)) +* Enquote column names to not match reserved keywords. ([#346](https://github.com/googleapis/langchain-google-alloydb-pg-python/issues/346)) ([f8d1385](https://github.com/googleapis/langchain-google-alloydb-pg-python/commit/f8d1385b3d20d67d790449ad02abca0a87be4472)) + ## [0.9.1](https://github.com/googleapis/langchain-google-alloydb-pg-python/compare/v0.9.0...v0.9.1) (2025-01-16) diff --git a/src/langchain_google_alloydb_pg/version.py b/src/langchain_google_alloydb_pg/version.py index 2beb1a31..6cb89e67 100644 --- a/src/langchain_google_alloydb_pg/version.py +++ b/src/langchain_google_alloydb_pg/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "0.9.1" +__version__ = "0.9.2"