From 0fcef54c480b5f15106dcde59c90fd68275b56e4 Mon Sep 17 00:00:00 2001
From: Linghua Jin <linghua@cocoindex.io>
Date: Fri, 29 Aug 2025 15:12:45 -0700
Subject: [PATCH 1/2] simplify get started

---
 .env          |   6 +++
 README.md     |  18 ++++----
 main.py       |  53 +++++++++++++++++++++++
 quickstart.py | 113 --------------------------------------------------
 4 files changed, 68 insertions(+), 122 deletions(-)
 create mode 100644 .env
 create mode 100644 main.py
 delete mode 100644 quickstart.py

diff --git a/.env b/.env
new file mode 100644
index 0000000..335feb6
--- /dev/null
+++ b/.env
@@ -0,0 +1,6 @@
+# Postgres database address for cocoindex
+COCOINDEX_DATABASE_URL=postgres://cocoindex:cocoindex@localhost/cocoindex
+
+# Fallback to CPU for operations not supported by MPS on Mac.
+# It's no-op for other platforms.
+PYTORCH_ENABLE_MPS_FALLBACK=1
diff --git a/README.md b/README.md
index 7ceaf22..05862a3 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 Quickstart demo following the [Cocoindex Quickstart](https://cocoindex.io/docs/quickstart) guide.
 Super easy to get your RAG data pipeline running in ~50 lines of python 🚀.
 
-⭐ Please give [Cocoindex on Github](https://github.com/cocoindex-io/cocoindex) a star to support us if you like our work. Thank you so much with a warm coconut hug 🥥🤗. [![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex)
+⭐ Star [Cocoindex on Github](https://github.com/cocoindex-io/cocoindex) if you like it! [![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex)
 
 
 Video tutorial with detailed explanation: [Cocoindex Quickstart Video Guide](https://www.youtube.com/watch?v=dQw4w9WgXcQ)
@@ -15,12 +15,7 @@ Video tutorial with detailed explanation: [Cocoindex Quickstart Video Guide](htt
 
 - Install CocoIndex and other dependencies:
 ```bash
-pip install -U "cocoindex[embeddings]" "psycopg[binary,pool]" pgvector
-```
-
--  Make sure you have specify the database URL by environment variable:
-```
-export COCOINDEX_DATABASE_URL="postgresql://cocoindex:cocoindex@localhost:5432/cocoindex"
+pip install -U "cocoindex[embeddings]" 
 ```
 
 ## Run
@@ -28,11 +23,16 @@ export COCOINDEX_DATABASE_URL="postgresql://cocoindex:cocoindex@localhost:5432/c
 Update index:
 
 ```bash
-cocoindex update --setup quickstart.py
+cocoindex update --setup main.py
 ```
 
 Run query:
 
 ```bash
-python quickstart.py
+python main.py
+```
+
+## Run with CocoInsight
+```bash
+cocoindex server -ci main.py
 ```
\ No newline at end of file
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..90f3651
--- /dev/null
+++ b/main.py
@@ -0,0 +1,53 @@
+import cocoindex
+
+@cocoindex.flow_def(name="TextEmbeddingQuickStart")
+def text_embedding_flow(
+    flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
+):
+    # Add a data source to read files from a directory
+    data_scope["documents"] = flow_builder.add_source(
+        cocoindex.sources.LocalFile(path="markdown_files")
+    )
+
+    # Add a collector for data to be exported to the vector index
+    doc_embeddings = data_scope.add_collector()
+
+    # Transform data of each document
+    with data_scope["documents"].row() as doc:
+        # Split the document into chunks, put into `chunks` field
+        doc["chunks"] = doc["content"].transform(
+            cocoindex.functions.SplitRecursively(),
+            language="javascript",
+            chunk_size=300,
+            chunk_overlap=100,
+        )
+
+        # Transform data of each chunk
+        with doc["chunks"].row() as chunk:
+            # Embed the chunk, put into `embedding` field (inlined transform)
+            chunk["embedding"] = chunk["text"].transform(
+                cocoindex.functions.SentenceTransformerEmbed(
+                    model="sentence-transformers/all-MiniLM-L6-v2"
+                )
+            )
+
+            # Collect the chunk into the collector.
+            doc_embeddings.collect(
+                filename=doc["filename"],
+                location=chunk["location"],
+                text=chunk["text"],
+                embedding=chunk["embedding"],
+            )
+
+    # Export collected data to a vector index.
+    doc_embeddings.export(
+        "doc_embeddings",
+        cocoindex.storages.Postgres(),
+        primary_key_fields=["filename", "location"],
+        vector_indexes=[
+            cocoindex.VectorIndexDef(
+                field_name="embedding",
+                metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY,
+            )
+        ],
+    )
diff --git a/quickstart.py b/quickstart.py
deleted file mode 100644
index 302b84f..0000000
--- a/quickstart.py
+++ /dev/null
@@ -1,113 +0,0 @@
-import cocoindex
-from psycopg_pool import ConnectionPool
-import os
-
-
-@cocoindex.transform_flow()
-def text_to_embedding(
-    text: cocoindex.DataSlice[str],
-) -> cocoindex.DataSlice[list[float]]:
-    """
-    Embed the text using a SentenceTransformer model.
-    This is a shared logic between indexing and querying, so extract it as a function.
-    """
-    return text.transform(
-        cocoindex.functions.SentenceTransformerEmbed(
-            model="sentence-transformers/all-MiniLM-L6-v2"
-        )
-    )
-
-
-@cocoindex.flow_def(name="TextEmbeddingQuickStart")
-def text_embedding_flow(
-    flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
-):
-    # Add a data source to read files from a directory
-    data_scope["documents"] = flow_builder.add_source(
-        cocoindex.sources.LocalFile(path="markdown_files")
-    )
-
-    # Add a collector for data to be exported to the vector index
-    doc_embeddings = data_scope.add_collector()
-
-    # Transform data of each document
-    with data_scope["documents"].row() as doc:
-        # Split the document into chunks, put into `chunks` field
-        doc["chunks"] = doc["content"].transform(
-            cocoindex.functions.SplitRecursively(),
-            language="javascript",
-            chunk_size=300,
-            chunk_overlap=100,
-        )
-
-        # Transform data of each chunk
-        with doc["chunks"].row() as chunk:
-            # Embed the chunk, put into `embedding` field
-            chunk["embedding"] = text_to_embedding(chunk["text"])
-
-            # Collect the chunk into the collector.
-            doc_embeddings.collect(
-                filename=doc["filename"],
-                location=chunk["location"],
-                text=chunk["text"],
-                embedding=chunk["embedding"],
-            )
-
-    # Export collected data to a vector index.
-    doc_embeddings.export(
-        "doc_embeddings",
-        cocoindex.storages.Postgres(),
-        primary_key_fields=["filename", "location"],
-        vector_indexes=[
-            cocoindex.VectorIndexDef(
-                field_name="embedding",
-                metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY,
-            )
-        ],
-    )
-
-
-def search(pool: ConnectionPool, query: str, top_k: int = 5):
-    # Get the table name, for the export target in the text_embedding_flow above.
-    table_name = cocoindex.utils.get_target_storage_default_name(
-        text_embedding_flow, "doc_embeddings"
-    )
-    # Evaluate the transform flow defined above with the input query, to get the embedding.
-    query_vector = text_to_embedding.eval(query)
-    # Run the query and get the results.
-    with pool.connection() as conn:
-        with conn.cursor() as cur:
-            cur.execute(
-                f"""
-                SELECT filename, text, embedding <=> %s::vector AS distance
-                FROM {table_name} ORDER BY distance LIMIT %s
-            """,
-                (query_vector, top_k),
-            )
-            return [
-                {"filename": row[0], "text": row[1], "score": 1.0 - row[2]}
-                for row in cur.fetchall()
-            ]
-
-
-def _main():
-    # Initialize the database connection pool.
-    pool = ConnectionPool(os.getenv("COCOINDEX_DATABASE_URL"))
-    # Run queries in a loop to demonstrate the query capabilities.
-    while True:
-        query = input("Enter search query (or Enter to quit): ")
-        if query == "":
-            break
-        # Run the query function with the database connection pool and the query.
-        results = search(pool, query)
-        print("\nSearch results:")
-        for result in results:
-            print(f"[{result['score']:.3f}] {result['filename']}")
-            print(f"    {result['text']}")
-            print("---")
-        print()
-
-
-if __name__ == "__main__":
-    cocoindex.init()
-    _main()

From 4e9d530444142da16ec5d3c5760350aa45aeb664 Mon Sep 17 00:00:00 2001
From: Linghua Jin <linghua@cocoindex.io>
Date: Fri, 29 Aug 2025 15:29:49 -0700
Subject: [PATCH 2/2] Delete .env

---
 .env | 6 ------
 1 file changed, 6 deletions(-)
 delete mode 100644 .env

diff --git a/.env b/.env
deleted file mode 100644
index 335feb6..0000000
--- a/.env
+++ /dev/null
@@ -1,6 +0,0 @@
-# Postgres database address for cocoindex
-COCOINDEX_DATABASE_URL=postgres://cocoindex:cocoindex@localhost/cocoindex
-
-# Fallback to CPU for operations not supported by MPS on Mac.
-# It's no-op for other platforms.
-PYTORCH_ENABLE_MPS_FALLBACK=1