From 9edea9b4c6df53e715f6059bbacb061de4c82ac2 Mon Sep 17 00:00:00 2001
From: Linghua Jin
Date: Sat, 15 Mar 2025 14:35:47 -0700
Subject: [PATCH 01/13] Update README.md
---
README.md | 2 ++
1 file changed, 2 insertions(+)
diff --git a/README.md b/README.md
index 6cff973..ccdf6c2 100644
--- a/README.md
+++ b/README.md
@@ -5,6 +5,8 @@
Cocoindex Quickstart demo following the [Cocoindex Quickstart](https://cocoindex.io/docs/quickstart) guide.
Super easy to start, get your RAG data pipeline running in ~50 lines of python 🤗.
+⭐ Please give cocoindex a star to support us: [Cocoindex on Github](https://github.com/cocoindex/cocoindex) if you like it, thank you so much with a warm coconut hug 🥥. [](https://github.com/cocoindex-io/cocoindex)
+
Video tutorial with detailed explanation: [Cocoindex Quickstart Video Guide](https://www.youtube.com/watch?v=dQw4w9WgXcQ)
## Prerequisite
From dc6ab4b42a2dc5128b331bc74d83e3d353bd6d1e Mon Sep 17 00:00:00 2001
From: Linghua Jin
Date: Sat, 15 Mar 2025 14:36:25 -0700
Subject: [PATCH 02/13] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index ccdf6c2..af5b083 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
Cocoindex Quickstart demo following the [Cocoindex Quickstart](https://cocoindex.io/docs/quickstart) guide.
Super easy to start, get your RAG data pipeline running in ~50 lines of python 🤗.
-⭐ Please give cocoindex a star to support us: [Cocoindex on Github](https://github.com/cocoindex/cocoindex) if you like it, thank you so much with a warm coconut hug 🥥. [](https://github.com/cocoindex-io/cocoindex)
+[](https://github.com/cocoindex-io/cocoindex) Please give cocoindex a star ⭐ to support us: [Cocoindex on Github](https://github.com/cocoindex/cocoindex) if you like it, thank you so much with a warm coconut hug 🥥.
Video tutorial with detailed explanation: [Cocoindex Quickstart Video Guide](https://www.youtube.com/watch?v=dQw4w9WgXcQ)
From 6955f2f32febd3b46c5f2623d0955c2d661656d1 Mon Sep 17 00:00:00 2001
From: Linghua Jin
Date: Sat, 15 Mar 2025 14:37:12 -0700
Subject: [PATCH 03/13] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index af5b083..ccdf6c2 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
Cocoindex Quickstart demo following the [Cocoindex Quickstart](https://cocoindex.io/docs/quickstart) guide.
Super easy to start, get your RAG data pipeline running in ~50 lines of python 🤗.
-[](https://github.com/cocoindex-io/cocoindex) Please give cocoindex a star ⭐ to support us: [Cocoindex on Github](https://github.com/cocoindex/cocoindex) if you like it, thank you so much with a warm coconut hug 🥥.
+⭐ Please give cocoindex a star to support us: [Cocoindex on Github](https://github.com/cocoindex/cocoindex) if you like it, thank you so much with a warm coconut hug 🥥. [](https://github.com/cocoindex-io/cocoindex)
Video tutorial with detailed explanation: [Cocoindex Quickstart Video Guide](https://www.youtube.com/watch?v=dQw4w9WgXcQ)
From 3517ec854797601c83ce02d339c5b6399fde4992 Mon Sep 17 00:00:00 2001
From: Linghua Jin
Date: Sat, 15 Mar 2025 14:38:43 -0700
Subject: [PATCH 04/13] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index ccdf6c2..e398945 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
Cocoindex Quickstart demo following the [Cocoindex Quickstart](https://cocoindex.io/docs/quickstart) guide.
Super easy to start, get your RAG data pipeline running in ~50 lines of python 🤗.
-⭐ Please give cocoindex a star to support us: [Cocoindex on Github](https://github.com/cocoindex/cocoindex) if you like it, thank you so much with a warm coconut hug 🥥. [](https://github.com/cocoindex-io/cocoindex)
+⭐ Please give [Cocoindex on Github](https://github.com/cocoindex-io/cocoindex) a star to support us if you like our work. Thank you so much with a warm coconut hug 🥥. [](https://github.com/cocoindex-io/cocoindex)
Video tutorial with detailed explanation: [Cocoindex Quickstart Video Guide](https://www.youtube.com/watch?v=dQw4w9WgXcQ)
From d6ca520e6cdaeef2921fdeccf284babb38e9406c Mon Sep 17 00:00:00 2001
From: Linghua Jin
Date: Sat, 15 Mar 2025 14:35:47 -0700
Subject: [PATCH 05/13] Update README.md
---
README.md | 2 ++
1 file changed, 2 insertions(+)
diff --git a/README.md b/README.md
index 6cff973..e398945 100644
--- a/README.md
+++ b/README.md
@@ -5,6 +5,8 @@
Cocoindex Quickstart demo following the [Cocoindex Quickstart](https://cocoindex.io/docs/quickstart) guide.
Super easy to start, get your RAG data pipeline running in ~50 lines of python 🤗.
+⭐ Please give [Cocoindex on Github](https://github.com/cocoindex-io/cocoindex) a star to support us if you like our work. Thank you so much with a warm coconut hug 🥥. [](https://github.com/cocoindex-io/cocoindex)
+
Video tutorial with detailed explanation: [Cocoindex Quickstart Video Guide](https://www.youtube.com/watch?v=dQw4w9WgXcQ)
## Prerequisite
From 5b8cb924219c535ab35720e13f812cfed1e96afb Mon Sep 17 00:00:00 2001
From: Linghua Jin
Date: Sat, 15 Mar 2025 14:40:11 -0700
Subject: [PATCH 06/13] Update README.md
---
README.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index e398945..03f3d0d 100644
--- a/README.md
+++ b/README.md
@@ -3,9 +3,9 @@
Cocoindex Quickstart demo following the [Cocoindex Quickstart](https://cocoindex.io/docs/quickstart) guide.
-Super easy to start, get your RAG data pipeline running in ~50 lines of python 🤗.
+Super easy to start, get your RAG data pipeline running in ~50 lines of python 🚀.
-⭐ Please give [Cocoindex on Github](https://github.com/cocoindex-io/cocoindex) a star to support us if you like our work. Thank you so much with a warm coconut hug 🥥. [](https://github.com/cocoindex-io/cocoindex)
+⭐ Please give [Cocoindex on Github](https://github.com/cocoindex-io/cocoindex) a star to support us if you like our work. Thank you so much with a warm coconut hug 🥥🤗. [](https://github.com/cocoindex-io/cocoindex)
Video tutorial with detailed explanation: [Cocoindex Quickstart Video Guide](https://www.youtube.com/watch?v=dQw4w9WgXcQ)
From 3bb0ac05e142be1f96ce84439f6ecbb47d386614 Mon Sep 17 00:00:00 2001
From: Linghua Jin
Date: Sat, 15 Mar 2025 14:40:41 -0700
Subject: [PATCH 07/13] Update README.md
---
README.md | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 5ba17d6..d3e2780 100644
--- a/README.md
+++ b/README.md
@@ -2,12 +2,11 @@
-Cocoindex Quickstart demo following the [Cocoindex Quickstart](https://cocoindex.io/docs/quickstart) guide.
+Quickstart demo following the [Cocoindex Quickstart](https://cocoindex.io/docs/quickstart) guide.
Super easy to start, get your RAG data pipeline running in ~50 lines of python 🚀.
⭐ Please give [Cocoindex on Github](https://github.com/cocoindex-io/cocoindex) a star to support us if you like our work. Thank you so much with a warm coconut hug 🥥🤗. [](https://github.com/cocoindex-io/cocoindex)
-⭐ Please give [Cocoindex on Github](https://github.com/cocoindex-io/cocoindex) a star to support us if you like our work. Thank you so much with a warm coconut hug 🥥. [](https://github.com/cocoindex-io/cocoindex)
Video tutorial with detailed explanation: [Cocoindex Quickstart Video Guide](https://www.youtube.com/watch?v=dQw4w9WgXcQ)
From 5217f78986b8be789ff25e256c7c395d40828799 Mon Sep 17 00:00:00 2001
From: Linghua Jin
Date: Sat, 15 Mar 2025 14:42:51 -0700
Subject: [PATCH 08/13] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index d3e2780..9306f66 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
Quickstart demo following the [Cocoindex Quickstart](https://cocoindex.io/docs/quickstart) guide.
-Super easy to start, get your RAG data pipeline running in ~50 lines of python 🚀.
+Super easy to get your RAG data pipeline running in ~50 lines of python 🚀.
⭐ Please give [Cocoindex on Github](https://github.com/cocoindex-io/cocoindex) a star to support us if you like our work. Thank you so much with a warm coconut hug 🥥🤗. [](https://github.com/cocoindex-io/cocoindex)
From 91d1704f595cbd317db6dfa323fd2befa00ea6bf Mon Sep 17 00:00:00 2001
From: Linghua Jin
Date: Sat, 15 Mar 2025 15:33:37 -0700
Subject: [PATCH 09/13] Create CONTRIBUTING.md
---
CONTRIBUTING.md | 1 +
1 file changed, 1 insertion(+)
create mode 100644 CONTRIBUTING.md
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..c2c1812
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1 @@
+We love contributions from our community ❤️. Please check out our [contributing guide](https://cocoindex.io/docs/about/contributing).
\ No newline at end of file
From 3f665e439b6a7b194a872b59c724c28231615dc4 Mon Sep 17 00:00:00 2001
From: Linghua Jin
Date: Sat, 15 Mar 2025 16:38:10 -0700
Subject: [PATCH 10/13] steps
---
README.md | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 9306f66..a0cf10f 100644
--- a/README.md
+++ b/README.md
@@ -11,9 +11,14 @@ Super easy to get your RAG data pipeline running in ~50 lines of python 🚀.
Video tutorial with detailed explanation: [Cocoindex Quickstart Video Guide](https://www.youtube.com/watch?v=dQw4w9WgXcQ)
## Prerequisite
-[Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres) if you don't have one.
+-[Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres) if you don't have one.
-Make sure you have specify the database URL by environment variable:
+- Install CocoIndex
+```bash
+pip install cocoindex
+```
+
+- Make sure you have specify the database URL by environment variable:
```
export COCOINDEX_DATABASE_URL="postgresql://cocoindex:cocoindex@localhost:5432/cocoindex"
```
From 88c529c233cb5d44be5386cf86eb543cd35adf81 Mon Sep 17 00:00:00 2001
From: Linghua Jin
Date: Sat, 15 Mar 2025 16:39:08 -0700
Subject: [PATCH 11/13] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index a0cf10f..0d390e2 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ Super easy to get your RAG data pipeline running in ~50 lines of python 🚀.
Video tutorial with detailed explanation: [Cocoindex Quickstart Video Guide](https://www.youtube.com/watch?v=dQw4w9WgXcQ)
## Prerequisite
--[Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres) if you don't have one.
+- [Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres) if you don't have one.
- Install CocoIndex
```bash
From 46db27d579f5a2c13973a6bc31a450acbfa0c8e0 Mon Sep 17 00:00:00 2001
From: Linghua Jin
Date: Sat, 15 Mar 2025 16:43:30 -0700
Subject: [PATCH 12/13] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 0d390e2..36cf6ae 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@ Video tutorial with detailed explanation: [Cocoindex Quickstart Video Guide](htt
- Install CocoIndex
```bash
-pip install cocoindex
+pip install -U cocoindex
```
- Make sure you have specify the database URL by environment variable:
From d0e3d1ee7adf75cb3f5c84b605666ba79ec7767e Mon Sep 17 00:00:00 2001
From: Linghua Jin
Date: Wed, 28 May 2025 17:43:35 -0700
Subject: [PATCH 13/13] update query handler for quickstart
---
.env | 2 ++
README.md | 4 +--
quickstart.py | 89 +++++++++++++++++++++++++++++++++++----------------
3 files changed, 66 insertions(+), 29 deletions(-)
create mode 100644 .env
diff --git a/.env b/.env
new file mode 100644
index 0000000..335f306
--- /dev/null
+++ b/.env
@@ -0,0 +1,2 @@
+# Postgres database address for cocoindex
+COCOINDEX_DATABASE_URL=postgres://cocoindex:cocoindex@localhost/cocoindex
diff --git a/README.md b/README.md
index 36cf6ae..02fa1b0 100644
--- a/README.md
+++ b/README.md
@@ -28,13 +28,13 @@ export COCOINDEX_DATABASE_URL="postgresql://cocoindex:cocoindex@localhost:5432/c
Setup index:
```bash
-python quickstart.py cocoindex setup
+cocoindex setup quickstart.py
```
Update index:
```bash
-python quickstart.py cocoindex update
+cocoindex update quickstart.py
```
Run query:
diff --git a/quickstart.py b/quickstart.py
index 911f101..579082c 100644
--- a/quickstart.py
+++ b/quickstart.py
@@ -1,4 +1,22 @@
import cocoindex
+from dotenv import load_dotenv
+from psycopg_pool import ConnectionPool
+import os
+
+@cocoindex.transform_flow()
+def text_to_embedding(
+ text: cocoindex.DataSlice[str],
+) -> cocoindex.DataSlice[list[float]]:
+ """
+ Embed the text using a SentenceTransformer model.
+ This is a shared logic between indexing and querying, so extract it as a function.
+ """
+ return text.transform(
+ cocoindex.functions.SentenceTransformerEmbed(
+ model="sentence-transformers/all-MiniLM-L6-v2"
+ )
+ )
+
@cocoindex.flow_def(name="TextEmbedding")
def text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope):
# Add a data source to read files from a directory
@@ -18,9 +36,7 @@ def text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind
# Transform data of each chunk
with doc["chunks"].row() as chunk:
# Embed the chunk, put into `embedding` field
- chunk["embedding"] = chunk["text"].transform(
- cocoindex.functions.SentenceTransformerEmbed(
- model="sentence-transformers/all-MiniLM-L6-v2"))
+ chunk["embedding"] = text_to_embedding(chunk["text"])
# Collect the chunk into the collector.
doc_embeddings.collect(filename=doc["filename"], location=chunk["location"],
@@ -31,35 +47,54 @@ def text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind
"doc_embeddings",
cocoindex.storages.Postgres(),
primary_key_fields=["filename", "location"],
- vector_index=[("embedding", cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY)])
+ vector_indexes=[
+ cocoindex.VectorIndexDef(
+ field_name="embedding",
+ metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY,
+ )
+ ],
+ )
-query_handler = cocoindex.query.SimpleSemanticsQueryHandler(
- name="SemanticsSearch",
- flow=text_embedding_flow,
- target_name="doc_embeddings",
- query_transform_flow=lambda text: text.transform(
- cocoindex.functions.SentenceTransformerEmbed(
- model="sentence-transformers/all-MiniLM-L6-v2")),
- default_similarity_metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY)
+def search(pool: ConnectionPool, query: str, top_k: int = 5):
+ # Get the table name, for the export target in the text_embedding_flow above.
+ table_name = cocoindex.utils.get_target_storage_default_name(
+ text_embedding_flow, "doc_embeddings"
+ )
+ # Evaluate the transform flow defined above with the input query, to get the embedding.
+ query_vector = text_to_embedding.eval(query)
+ # Run the query and get the results.
+ with pool.connection() as conn:
+ with conn.cursor() as cur:
+ cur.execute(
+ f"""
+ SELECT filename, text, embedding <=> %s::vector AS distance
+ FROM {table_name} ORDER BY distance LIMIT %s
+ """,
+ (query_vector, top_k),
+ )
+ return [
+ {"filename": row[0], "text": row[1], "score": 1.0 - row[2]}
+ for row in cur.fetchall()
+ ]
-@cocoindex.main_fn()
def _main():
- # Run queries to demonstrate the query capabilities.
+ # Initialize the database connection pool.
+ pool = ConnectionPool(os.getenv("COCOINDEX_DATABASE_URL"))
+ # Run queries in a loop to demonstrate the query capabilities.
while True:
- try:
- query = input("Enter search query (or Enter to quit): ")
- if query == '':
- break
- results, _ = query_handler.search(query, 10)
- print("\nSearch results:")
- for result in results:
- print(f"[{result.score:.3f}] {result.data['filename']}")
- print(f" {result.data['text']}")
- print("---")
- print()
- except KeyboardInterrupt:
+ query = input("Enter search query (or Enter to quit): ")
+ if query == "":
break
-
+ # Run the query function with the database connection pool and the query.
+ results = search(pool, query)
+ print("\nSearch results:")
+ for result in results:
+ print(f"[{result['score']:.3f}] {result['filename']}")
+ print(f" {result['text']}")
+ print("---")
+ print()
if __name__ == "__main__":
+ load_dotenv()
+ cocoindex.init()
_main()
\ No newline at end of file