From 9edea9b4c6df53e715f6059bbacb061de4c82ac2 Mon Sep 17 00:00:00 2001 From: Linghua Jin Date: Sat, 15 Mar 2025 14:35:47 -0700 Subject: [PATCH 01/13] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 6cff973..ccdf6c2 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,8 @@ Cocoindex Quickstart demo following the [Cocoindex Quickstart](https://cocoindex.io/docs/quickstart) guide. Super easy to start, get your RAG data pipeline running in ~50 lines of python 🤗. +⭐ Please give cocoindex a star to support us: [Cocoindex on Github](https://github.com/cocoindex/cocoindex) if you like it, thank you so much with a warm coconut hug 🥥. [![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex) + Video tutorial with detailed explanation: [Cocoindex Quickstart Video Guide](https://www.youtube.com/watch?v=dQw4w9WgXcQ) ## Prerequisite From dc6ab4b42a2dc5128b331bc74d83e3d353bd6d1e Mon Sep 17 00:00:00 2001 From: Linghua Jin Date: Sat, 15 Mar 2025 14:36:25 -0700 Subject: [PATCH 02/13] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ccdf6c2..af5b083 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Cocoindex Quickstart demo following the [Cocoindex Quickstart](https://cocoindex.io/docs/quickstart) guide. Super easy to start, get your RAG data pipeline running in ~50 lines of python 🤗. -⭐ Please give cocoindex a star to support us: [Cocoindex on Github](https://github.com/cocoindex/cocoindex) if you like it, thank you so much with a warm coconut hug 🥥. [![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex) +[![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex) Please give cocoindex a star ⭐ to support us: [Cocoindex on Github](https://github.com/cocoindex/cocoindex) if you like it, thank you so much with a warm coconut hug 🥥. Video tutorial with detailed explanation: [Cocoindex Quickstart Video Guide](https://www.youtube.com/watch?v=dQw4w9WgXcQ) From 6955f2f32febd3b46c5f2623d0955c2d661656d1 Mon Sep 17 00:00:00 2001 From: Linghua Jin Date: Sat, 15 Mar 2025 14:37:12 -0700 Subject: [PATCH 03/13] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index af5b083..ccdf6c2 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Cocoindex Quickstart demo following the [Cocoindex Quickstart](https://cocoindex.io/docs/quickstart) guide. Super easy to start, get your RAG data pipeline running in ~50 lines of python 🤗. -[![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex) Please give cocoindex a star ⭐ to support us: [Cocoindex on Github](https://github.com/cocoindex/cocoindex) if you like it, thank you so much with a warm coconut hug 🥥. +⭐ Please give cocoindex a star to support us: [Cocoindex on Github](https://github.com/cocoindex/cocoindex) if you like it, thank you so much with a warm coconut hug 🥥. [![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex) Video tutorial with detailed explanation: [Cocoindex Quickstart Video Guide](https://www.youtube.com/watch?v=dQw4w9WgXcQ) From 3517ec854797601c83ce02d339c5b6399fde4992 Mon Sep 17 00:00:00 2001 From: Linghua Jin Date: Sat, 15 Mar 2025 14:38:43 -0700 Subject: [PATCH 04/13] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ccdf6c2..e398945 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Cocoindex Quickstart demo following the [Cocoindex Quickstart](https://cocoindex.io/docs/quickstart) guide. Super easy to start, get your RAG data pipeline running in ~50 lines of python 🤗. -⭐ Please give cocoindex a star to support us: [Cocoindex on Github](https://github.com/cocoindex/cocoindex) if you like it, thank you so much with a warm coconut hug 🥥. [![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex) +⭐ Please give [Cocoindex on Github](https://github.com/cocoindex-io/cocoindex) a star to support us if you like our work. Thank you so much with a warm coconut hug 🥥. [![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex) Video tutorial with detailed explanation: [Cocoindex Quickstart Video Guide](https://www.youtube.com/watch?v=dQw4w9WgXcQ) From d6ca520e6cdaeef2921fdeccf284babb38e9406c Mon Sep 17 00:00:00 2001 From: Linghua Jin Date: Sat, 15 Mar 2025 14:35:47 -0700 Subject: [PATCH 05/13] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 6cff973..e398945 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,8 @@ Cocoindex Quickstart demo following the [Cocoindex Quickstart](https://cocoindex.io/docs/quickstart) guide. Super easy to start, get your RAG data pipeline running in ~50 lines of python 🤗. +⭐ Please give [Cocoindex on Github](https://github.com/cocoindex-io/cocoindex) a star to support us if you like our work. Thank you so much with a warm coconut hug 🥥. [![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex) + Video tutorial with detailed explanation: [Cocoindex Quickstart Video Guide](https://www.youtube.com/watch?v=dQw4w9WgXcQ) ## Prerequisite From 5b8cb924219c535ab35720e13f812cfed1e96afb Mon Sep 17 00:00:00 2001 From: Linghua Jin Date: Sat, 15 Mar 2025 14:40:11 -0700 Subject: [PATCH 06/13] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e398945..03f3d0d 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,9 @@

Cocoindex Quickstart demo following the [Cocoindex Quickstart](https://cocoindex.io/docs/quickstart) guide. -Super easy to start, get your RAG data pipeline running in ~50 lines of python 🤗. +Super easy to start, get your RAG data pipeline running in ~50 lines of python 🚀. -⭐ Please give [Cocoindex on Github](https://github.com/cocoindex-io/cocoindex) a star to support us if you like our work. Thank you so much with a warm coconut hug 🥥. [![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex) +⭐ Please give [Cocoindex on Github](https://github.com/cocoindex-io/cocoindex) a star to support us if you like our work. Thank you so much with a warm coconut hug 🥥🤗. [![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex) Video tutorial with detailed explanation: [Cocoindex Quickstart Video Guide](https://www.youtube.com/watch?v=dQw4w9WgXcQ) From 3bb0ac05e142be1f96ce84439f6ecbb47d386614 Mon Sep 17 00:00:00 2001 From: Linghua Jin Date: Sat, 15 Mar 2025 14:40:41 -0700 Subject: [PATCH 07/13] Update README.md --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 5ba17d6..d3e2780 100644 --- a/README.md +++ b/README.md @@ -2,12 +2,11 @@ CocoIndex

-Cocoindex Quickstart demo following the [Cocoindex Quickstart](https://cocoindex.io/docs/quickstart) guide. +Quickstart demo following the [Cocoindex Quickstart](https://cocoindex.io/docs/quickstart) guide. Super easy to start, get your RAG data pipeline running in ~50 lines of python 🚀. ⭐ Please give [Cocoindex on Github](https://github.com/cocoindex-io/cocoindex) a star to support us if you like our work. Thank you so much with a warm coconut hug 🥥🤗. [![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex) -⭐ Please give [Cocoindex on Github](https://github.com/cocoindex-io/cocoindex) a star to support us if you like our work. Thank you so much with a warm coconut hug 🥥. [![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex) Video tutorial with detailed explanation: [Cocoindex Quickstart Video Guide](https://www.youtube.com/watch?v=dQw4w9WgXcQ) From 5217f78986b8be789ff25e256c7c395d40828799 Mon Sep 17 00:00:00 2001 From: Linghua Jin Date: Sat, 15 Mar 2025 14:42:51 -0700 Subject: [PATCH 08/13] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d3e2780..9306f66 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@

Quickstart demo following the [Cocoindex Quickstart](https://cocoindex.io/docs/quickstart) guide. -Super easy to start, get your RAG data pipeline running in ~50 lines of python 🚀. +Super easy to get your RAG data pipeline running in ~50 lines of python 🚀. ⭐ Please give [Cocoindex on Github](https://github.com/cocoindex-io/cocoindex) a star to support us if you like our work. Thank you so much with a warm coconut hug 🥥🤗. [![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex) From 91d1704f595cbd317db6dfa323fd2befa00ea6bf Mon Sep 17 00:00:00 2001 From: Linghua Jin Date: Sat, 15 Mar 2025 15:33:37 -0700 Subject: [PATCH 09/13] Create CONTRIBUTING.md --- CONTRIBUTING.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..c2c1812 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1 @@ +We love contributions from our community ❤️. Please check out our [contributing guide](https://cocoindex.io/docs/about/contributing). \ No newline at end of file From 3f665e439b6a7b194a872b59c724c28231615dc4 Mon Sep 17 00:00:00 2001 From: Linghua Jin Date: Sat, 15 Mar 2025 16:38:10 -0700 Subject: [PATCH 10/13] steps --- README.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9306f66..a0cf10f 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,14 @@ Super easy to get your RAG data pipeline running in ~50 lines of python 🚀. Video tutorial with detailed explanation: [Cocoindex Quickstart Video Guide](https://www.youtube.com/watch?v=dQw4w9WgXcQ) ## Prerequisite -[Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres) if you don't have one. +-[Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres) if you don't have one. -Make sure you have specify the database URL by environment variable: +- Install CocoIndex +```bash +pip install cocoindex +``` + +- Make sure you have specify the database URL by environment variable: ``` export COCOINDEX_DATABASE_URL="postgresql://cocoindex:cocoindex@localhost:5432/cocoindex" ``` From 88c529c233cb5d44be5386cf86eb543cd35adf81 Mon Sep 17 00:00:00 2001 From: Linghua Jin Date: Sat, 15 Mar 2025 16:39:08 -0700 Subject: [PATCH 11/13] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a0cf10f..0d390e2 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Super easy to get your RAG data pipeline running in ~50 lines of python 🚀. Video tutorial with detailed explanation: [Cocoindex Quickstart Video Guide](https://www.youtube.com/watch?v=dQw4w9WgXcQ) ## Prerequisite --[Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres) if you don't have one. +- [Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres) if you don't have one. - Install CocoIndex ```bash From 46db27d579f5a2c13973a6bc31a450acbfa0c8e0 Mon Sep 17 00:00:00 2001 From: Linghua Jin Date: Sat, 15 Mar 2025 16:43:30 -0700 Subject: [PATCH 12/13] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0d390e2..36cf6ae 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ Video tutorial with detailed explanation: [Cocoindex Quickstart Video Guide](htt - Install CocoIndex ```bash -pip install cocoindex +pip install -U cocoindex ``` - Make sure you have specify the database URL by environment variable: From d0e3d1ee7adf75cb3f5c84b605666ba79ec7767e Mon Sep 17 00:00:00 2001 From: Linghua Jin Date: Wed, 28 May 2025 17:43:35 -0700 Subject: [PATCH 13/13] update query handler for quickstart --- .env | 2 ++ README.md | 4 +-- quickstart.py | 89 +++++++++++++++++++++++++++++++++++---------------- 3 files changed, 66 insertions(+), 29 deletions(-) create mode 100644 .env diff --git a/.env b/.env new file mode 100644 index 0000000..335f306 --- /dev/null +++ b/.env @@ -0,0 +1,2 @@ +# Postgres database address for cocoindex +COCOINDEX_DATABASE_URL=postgres://cocoindex:cocoindex@localhost/cocoindex diff --git a/README.md b/README.md index 36cf6ae..02fa1b0 100644 --- a/README.md +++ b/README.md @@ -28,13 +28,13 @@ export COCOINDEX_DATABASE_URL="postgresql://cocoindex:cocoindex@localhost:5432/c Setup index: ```bash -python quickstart.py cocoindex setup +cocoindex setup quickstart.py ``` Update index: ```bash -python quickstart.py cocoindex update +cocoindex update quickstart.py ``` Run query: diff --git a/quickstart.py b/quickstart.py index 911f101..579082c 100644 --- a/quickstart.py +++ b/quickstart.py @@ -1,4 +1,22 @@ import cocoindex +from dotenv import load_dotenv +from psycopg_pool import ConnectionPool +import os + +@cocoindex.transform_flow() +def text_to_embedding( + text: cocoindex.DataSlice[str], +) -> cocoindex.DataSlice[list[float]]: + """ + Embed the text using a SentenceTransformer model. + This is a shared logic between indexing and querying, so extract it as a function. + """ + return text.transform( + cocoindex.functions.SentenceTransformerEmbed( + model="sentence-transformers/all-MiniLM-L6-v2" + ) + ) + @cocoindex.flow_def(name="TextEmbedding") def text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope): # Add a data source to read files from a directory @@ -18,9 +36,7 @@ def text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind # Transform data of each chunk with doc["chunks"].row() as chunk: # Embed the chunk, put into `embedding` field - chunk["embedding"] = chunk["text"].transform( - cocoindex.functions.SentenceTransformerEmbed( - model="sentence-transformers/all-MiniLM-L6-v2")) + chunk["embedding"] = text_to_embedding(chunk["text"]) # Collect the chunk into the collector. doc_embeddings.collect(filename=doc["filename"], location=chunk["location"], @@ -31,35 +47,54 @@ def text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind "doc_embeddings", cocoindex.storages.Postgres(), primary_key_fields=["filename", "location"], - vector_index=[("embedding", cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY)]) + vector_indexes=[ + cocoindex.VectorIndexDef( + field_name="embedding", + metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY, + ) + ], + ) -query_handler = cocoindex.query.SimpleSemanticsQueryHandler( - name="SemanticsSearch", - flow=text_embedding_flow, - target_name="doc_embeddings", - query_transform_flow=lambda text: text.transform( - cocoindex.functions.SentenceTransformerEmbed( - model="sentence-transformers/all-MiniLM-L6-v2")), - default_similarity_metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY) +def search(pool: ConnectionPool, query: str, top_k: int = 5): + # Get the table name, for the export target in the text_embedding_flow above. + table_name = cocoindex.utils.get_target_storage_default_name( + text_embedding_flow, "doc_embeddings" + ) + # Evaluate the transform flow defined above with the input query, to get the embedding. + query_vector = text_to_embedding.eval(query) + # Run the query and get the results. + with pool.connection() as conn: + with conn.cursor() as cur: + cur.execute( + f""" + SELECT filename, text, embedding <=> %s::vector AS distance + FROM {table_name} ORDER BY distance LIMIT %s + """, + (query_vector, top_k), + ) + return [ + {"filename": row[0], "text": row[1], "score": 1.0 - row[2]} + for row in cur.fetchall() + ] -@cocoindex.main_fn() def _main(): - # Run queries to demonstrate the query capabilities. + # Initialize the database connection pool. + pool = ConnectionPool(os.getenv("COCOINDEX_DATABASE_URL")) + # Run queries in a loop to demonstrate the query capabilities. while True: - try: - query = input("Enter search query (or Enter to quit): ") - if query == '': - break - results, _ = query_handler.search(query, 10) - print("\nSearch results:") - for result in results: - print(f"[{result.score:.3f}] {result.data['filename']}") - print(f" {result.data['text']}") - print("---") - print() - except KeyboardInterrupt: + query = input("Enter search query (or Enter to quit): ") + if query == "": break - + # Run the query function with the database connection pool and the query. + results = search(pool, query) + print("\nSearch results:") + for result in results: + print(f"[{result['score']:.3f}] {result['filename']}") + print(f" {result['text']}") + print("---") + print() if __name__ == "__main__": + load_dotenv() + cocoindex.init() _main() \ No newline at end of file