From b0e200d71b5bf765aa00d9ccff7fe87dbfee0d37 Mon Sep 17 00:00:00 2001
From: Jiangzhou He <hejiangzhou@gmail.com>
Date: Wed, 2 Jul 2025 09:01:46 -0700
Subject: [PATCH] chore: update instructions and remove unnecessary code

---
 .env          |  2 --
 README.md     | 12 +++---------
 quickstart.py | 31 ++++++++++++++++++++++---------
 3 files changed, 25 insertions(+), 20 deletions(-)
 delete mode 100644 .env

diff --git a/.env b/.env
deleted file mode 100644
index 335f306..0000000
--- a/.env
+++ /dev/null
@@ -1,2 +0,0 @@
-# Postgres database address for cocoindex
-COCOINDEX_DATABASE_URL=postgres://cocoindex:cocoindex@localhost/cocoindex
diff --git a/README.md b/README.md
index 02fa1b0..7ceaf22 100644
--- a/README.md
+++ b/README.md
@@ -13,9 +13,9 @@ Video tutorial with detailed explanation: [Cocoindex Quickstart Video Guide](htt
 ## Prerequisite
 - [Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres) if you don't have one.
 
-- Install CocoIndex
+- Install CocoIndex and other dependencies:
 ```bash
-pip install -U cocoindex
+pip install -U "cocoindex[embeddings]" "psycopg[binary,pool]" pgvector
 ```
 
 -  Make sure you have specify the database URL by environment variable:
@@ -25,16 +25,10 @@ export COCOINDEX_DATABASE_URL="postgresql://cocoindex:cocoindex@localhost:5432/c
 
 ## Run
 
-Setup index:
-
-```bash
-cocoindex setup quickstart.py
-```
-
 Update index:
 
 ```bash
-cocoindex update quickstart.py
+cocoindex update --setup quickstart.py
 ```
 
 Run query:
diff --git a/quickstart.py b/quickstart.py
index 579082c..302b84f 100644
--- a/quickstart.py
+++ b/quickstart.py
@@ -1,8 +1,8 @@
 import cocoindex
-from dotenv import load_dotenv
 from psycopg_pool import ConnectionPool
 import os
 
+
 @cocoindex.transform_flow()
 def text_to_embedding(
     text: cocoindex.DataSlice[str],
@@ -17,11 +17,15 @@ def text_to_embedding(
         )
     )
 
-@cocoindex.flow_def(name="TextEmbedding")
-def text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope):
+
+@cocoindex.flow_def(name="TextEmbeddingQuickStart")
+def text_embedding_flow(
+    flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
+):
     # Add a data source to read files from a directory
     data_scope["documents"] = flow_builder.add_source(
-        cocoindex.sources.LocalFile(path="markdown_files"))
+        cocoindex.sources.LocalFile(path="markdown_files")
+    )
 
     # Add a collector for data to be exported to the vector index
     doc_embeddings = data_scope.add_collector()
@@ -31,7 +35,10 @@ def text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind
         # Split the document into chunks, put into `chunks` field
         doc["chunks"] = doc["content"].transform(
             cocoindex.functions.SplitRecursively(),
-            language="javascript", chunk_size=300, chunk_overlap=100)
+            language="javascript",
+            chunk_size=300,
+            chunk_overlap=100,
+        )
 
         # Transform data of each chunk
         with doc["chunks"].row() as chunk:
@@ -39,8 +46,12 @@ def text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind
             chunk["embedding"] = text_to_embedding(chunk["text"])
 
             # Collect the chunk into the collector.
-            doc_embeddings.collect(filename=doc["filename"], location=chunk["location"],
-                                   text=chunk["text"], embedding=chunk["embedding"])
+            doc_embeddings.collect(
+                filename=doc["filename"],
+                location=chunk["location"],
+                text=chunk["text"],
+                embedding=chunk["embedding"],
+            )
 
     # Export collected data to a vector index.
     doc_embeddings.export(
@@ -55,6 +66,7 @@ def text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind
         ],
     )
 
+
 def search(pool: ConnectionPool, query: str, top_k: int = 5):
     # Get the table name, for the export target in the text_embedding_flow above.
     table_name = cocoindex.utils.get_target_storage_default_name(
@@ -77,6 +89,7 @@ def search(pool: ConnectionPool, query: str, top_k: int = 5):
                 for row in cur.fetchall()
             ]
 
+
 def _main():
     # Initialize the database connection pool.
     pool = ConnectionPool(os.getenv("COCOINDEX_DATABASE_URL"))
@@ -94,7 +107,7 @@ def _main():
             print("---")
         print()
 
+
 if __name__ == "__main__":
-    load_dotenv()
     cocoindex.init()
-    _main()
\ No newline at end of file
+    _main()