From c6745da99567d3f68fa4f9db3bf4d66890eb2b11 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Thu, 29 Aug 2024 19:11:35 +0000
Subject: [PATCH 1/4] add tests

---
 tests/system/small/test_session.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py
index 5b5db74ea6..6f3d464e53 100644
--- a/tests/system/small/test_session.py
+++ b/tests/system/small/test_session.py
@@ -1036,6 +1036,26 @@ def test_read_csv_local_w_usecols(session, scalars_pandas_df_index, engine):
         assert len(df.columns) == 1
 
 
+@pytest.mark.parametrize(
+    "engine",
+    [
+        pytest.param(
+            "bigquery",
+            id="bq_engine",
+            marks=pytest.mark.xfail(
+                raises=NotImplementedError,
+            ),
+        ),
+        pytest.param(None, id="default_engine"),
+    ],
+)
+def test_read_csv_others(session, scalars_pandas_df_index, engine):
+    # TODO: Update to `main` branch after merging https://github.com/googleapis/python-bigquery-dataframes/pull/938
+    uri = "https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/main_chelsealin_addcsvfile/tests/data/people.csv"
+    df = session.read_csv(uri, engine=engine)
+    assert len(df.columns) == 3
+
+
 @pytest.mark.parametrize(
     "engine",
     [

From 073c615692de979f2b939008d130ca41659f2b20 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Thu, 29 Aug 2024 19:32:08 +0000
Subject: [PATCH 2/4] feat: enable read_csv() to process other files

---
 bigframes/session/__init__.py | 6 ++++--
 bigframes/session/loader.py   | 8 +++++++-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index c91266b875..5c85552bba 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -1008,10 +1008,12 @@ def _check_file_size(self, filepath: str):
             blob = bucket.blob(blob_name)
             blob.reload()
             file_size = blob.size
-        else:  # local file path
+        elif os.path.exists(filepath):  # local file path
             file_size = os.path.getsize(filepath)
+        else:
+            file_size = None
 
-        if file_size > max_size:
+        if file_size is not None and file_size > max_size:
             # Convert to GB
             file_size = round(file_size / (1024**3), 1)
             max_size = int(max_size / 1024**3)
diff --git a/bigframes/session/loader.py b/bigframes/session/loader.py
index edfd57b965..924fddce12 100644
--- a/bigframes/session/loader.py
+++ b/bigframes/session/loader.py
@@ -18,6 +18,7 @@
 import dataclasses
 import datetime
 import itertools
+import os
 import typing
 from typing import Dict, Hashable, IO, Iterable, List, Optional, Sequence, Tuple, Union
 
@@ -421,11 +422,16 @@ def _read_bigquery_load_job(
                 load_job = self._bqclient.load_table_from_uri(
                     filepath_or_buffer, table, job_config=job_config
                 )
-            else:
+            elif os.path.exists(filepath_or_buffer):  # local file path
                 with open(filepath_or_buffer, "rb") as source_file:
                     load_job = self._bqclient.load_table_from_file(
                         source_file, table, job_config=job_config
                     )
+            else:
+                raise NotImplementedError(
+                    f"BigQuery engine only supports a local file path or GCS path. "
+                    f"{constants.FEEDBACK_LINK}"
+                )
         else:
             load_job = self._bqclient.load_table_from_file(
                 filepath_or_buffer, table, job_config=job_config

From fa68d2d9e480d213c38b0984714abaeadf35545e Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Sun, 1 Sep 2024 01:17:36 +0000
Subject: [PATCH 3/4] update to main

---
 tests/system/small/test_session.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py
index 6f3d464e53..3dce0963cb 100644
--- a/tests/system/small/test_session.py
+++ b/tests/system/small/test_session.py
@@ -1051,7 +1051,7 @@ def test_read_csv_local_w_usecols(session, scalars_pandas_df_index, engine):
 )
 def test_read_csv_others(session, scalars_pandas_df_index, engine):
     # TODO: Update to `main` branch after merging https://github.com/googleapis/python-bigquery-dataframes/pull/938
-    uri = "https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/main_chelsealin_addcsvfile/tests/data/people.csv"
+    uri = "https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/main/tests/data/people.csv"
     df = session.read_csv(uri, engine=engine)
     assert len(df.columns) == 3
 

From 1726161bfdb5d68fc79e74fddcee1c94c636c075 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Tue, 3 Sep 2024 18:26:47 +0000
Subject: [PATCH 4/4] add docs

---
 tests/system/small/test_session.py                          | 3 +--
 third_party/bigframes_vendored/pandas/io/parsers/readers.py | 6 +++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py
index 3dce0963cb..ed3e38e6f8 100644
--- a/tests/system/small/test_session.py
+++ b/tests/system/small/test_session.py
@@ -1049,8 +1049,7 @@ def test_read_csv_local_w_usecols(session, scalars_pandas_df_index, engine):
         pytest.param(None, id="default_engine"),
     ],
 )
-def test_read_csv_others(session, scalars_pandas_df_index, engine):
-    # TODO: Update to `main` branch after merging https://github.com/googleapis/python-bigquery-dataframes/pull/938
+def test_read_csv_others(session, engine):
     uri = "https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/main/tests/data/people.csv"
     df = session.read_csv(uri, engine=engine)
     assert len(df.columns) == 3
diff --git a/third_party/bigframes_vendored/pandas/io/parsers/readers.py b/third_party/bigframes_vendored/pandas/io/parsers/readers.py
index 248cf8e0fe..35b2a1982a 100644
--- a/third_party/bigframes_vendored/pandas/io/parsers/readers.py
+++ b/third_party/bigframes_vendored/pandas/io/parsers/readers.py
@@ -51,8 +51,7 @@ def read_csv(
         encoding: Optional[str] = None,
         **kwargs,
     ):
-        """Loads DataFrame from comma-separated values (csv) file locally or from
-        Cloud Storage.
+        """Loads data from a comma-separated values (csv) file into a DataFrame.
 
         The CSV file data will be persisted as a temporary BigQuery table, which can be
         automatically recycled after the Session is closed.
@@ -60,7 +59,8 @@ def read_csv(
         .. note::
             using `engine="bigquery"` will not guarantee the same ordering as the
             file. Instead, set a serialized index column as the index and sort by
-            that in the resulting DataFrame.
+            that in the resulting DataFrame. Only files stored on your local machine
+            or in Google Cloud Storage are supported.
 
         .. note::
             For non-bigquery engine, data is inlined in the query SQL if it is