From fc223f87bac09019914aef4131d99bfc9a507cfc Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Mon, 19 Aug 2024 20:37:52 +0000 Subject: [PATCH 1/3] feat: add llm.TextEmbeddingGenerator to support new embedding models --- bigframes/ml/llm.py | 166 +++++++++++++++++++++++++++++- bigframes/ml/loader.py | 3 + tests/system/small/ml/test_llm.py | 41 ++++++++ 3 files changed, 207 insertions(+), 3 deletions(-) diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py index 2517178d89..ff4710258a 100644 --- a/bigframes/ml/llm.py +++ b/bigframes/ml/llm.py @@ -40,11 +40,18 @@ _EMBEDDING_GENERATOR_GECKO_ENDPOINT = "textembedding-gecko" _EMBEDDING_GENERATOR_GECKO_MULTILINGUAL_ENDPOINT = "textembedding-gecko-multilingual" -_EMBEDDING_GENERATOR_ENDPOINTS = ( +_PALM2_EMBEDDING_GENERATOR_ENDPOINTS = ( _EMBEDDING_GENERATOR_GECKO_ENDPOINT, _EMBEDDING_GENERATOR_GECKO_MULTILINGUAL_ENDPOINT, ) +_TEXT_EMBEDDING_004_ENDPOINT = "text-embedding-004" +_TEXT_MULTILINGUAL_EMBEDDING_002_ENDPOINT = "text-multilingual-embedding-002" +_TEXT_EMBEDDING_ENDPOINTS = ( + _TEXT_EMBEDDING_004_ENDPOINT, + _TEXT_MULTILINGUAL_EMBEDDING_002_ENDPOINT, +) + _GEMINI_PRO_ENDPOINT = "gemini-pro" _GEMINI_1P5_PRO_PREVIEW_ENDPOINT = "gemini-1.5-pro-preview-0514" _GEMINI_1P5_PRO_FLASH_PREVIEW_ENDPOINT = "gemini-1.5-flash-preview-0514" @@ -57,6 +64,7 @@ _ML_GENERATE_TEXT_STATUS = "ml_generate_text_status" _ML_EMBED_TEXT_STATUS = "ml_embed_text_status" +_ML_GENERATE_EMBEDDING_STATUS = "ml_generate_embedding_status" @log_adapter.class_logger @@ -387,6 +395,10 @@ def to_gbq(self, model_name: str, replace: bool = False) -> PaLM2TextGenerator: class PaLM2TextEmbeddingGenerator(base.BaseEstimator): """PaLM2 text embedding generator LLM model. + .. note:: + Models in this class are outdated and going to be deprecated. To use the most updated text embedding models, go to the TextEmbeddingGenerator class. + + Args: model_name (str, Default to "textembedding-gecko"): The model for text embedding. “textembedding-gecko” returns model embeddings for text inputs. @@ -447,9 +459,9 @@ def _create_bqml_model(self): iam_role="aiplatform.user", ) - if self.model_name not in _EMBEDDING_GENERATOR_ENDPOINTS: + if self.model_name not in _PALM2_EMBEDDING_GENERATOR_ENDPOINTS: raise ValueError( - f"Model name {self.model_name} is not supported. We only support {', '.join(_EMBEDDING_GENERATOR_ENDPOINTS)}." + f"Model name {self.model_name} is not supported. We only support {', '.join(_PALM2_EMBEDDING_GENERATOR_ENDPOINTS)}." ) endpoint = ( @@ -551,6 +563,154 @@ def to_gbq( return new_model.session.read_gbq_model(model_name) +@log_adapter.class_logger +class TextEmbeddingGenerator(base.BaseEstimator): + """Text embedding generator LLM model. + + Args: + model_name (str, Default to "text-embedding-004"): + The model for text embedding. Possible values are "text-embedding-004" or "text-multilingual-embedding-002". + text-embedding models returns model embeddings for text inputs. + text-multilingual-embedding models returns model embeddings for text inputs which support over 100 languages. + Default to "text-embedding-004". + session (bigframes.Session or None): + BQ session to create the model. If None, use the global default session. + connection_name (str or None): + Connection to connect with remote service. str of the format ... + If None, use default connection in session context. + """ + + def __init__( + self, + *, + model_name: Literal[ + "text-embedding-004", "text-multilingual-embedding-002" + ] = "text-embedding-004", + session: Optional[bigframes.Session] = None, + connection_name: Optional[str] = None, + ): + self.model_name = model_name + self.session = session or bpd.get_global_session() + self._bq_connection_manager = self.session.bqconnectionmanager + + connection_name = connection_name or self.session._bq_connection + self.connection_name = clients.resolve_full_bq_connection_name( + connection_name, + default_project=self.session._project, + default_location=self.session._location, + ) + + self._bqml_model_factory = globals.bqml_model_factory() + self._bqml_model: core.BqmlModel = self._create_bqml_model() + + def _create_bqml_model(self): + # Parse and create connection if needed. + if not self.connection_name: + raise ValueError( + "Must provide connection_name, either in constructor or through session options." + ) + + if self._bq_connection_manager: + connection_name_parts = self.connection_name.split(".") + if len(connection_name_parts) != 3: + raise ValueError( + f"connection_name must be of the format .., got {self.connection_name}." + ) + self._bq_connection_manager.create_bq_connection( + project_id=connection_name_parts[0], + location=connection_name_parts[1], + connection_id=connection_name_parts[2], + iam_role="aiplatform.user", + ) + + if self.model_name not in _TEXT_EMBEDDING_ENDPOINTS: + raise ValueError( + f"Model name {self.model_name} is not supported. We only support {', '.join(_TEXT_EMBEDDING_ENDPOINTS)}." + ) + + options = { + "endpoint": self.model_name, + } + return self._bqml_model_factory.create_remote_model( + session=self.session, connection_name=self.connection_name, options=options + ) + + @classmethod + def _from_bq( + cls, session: bigframes.Session, bq_model: bigquery.Model + ) -> TextEmbeddingGenerator: + assert bq_model.model_type == "MODEL_TYPE_UNSPECIFIED" + assert "remoteModelInfo" in bq_model._properties + assert "endpoint" in bq_model._properties["remoteModelInfo"] + assert "connection" in bq_model._properties["remoteModelInfo"] + + # Parse the remote model endpoint + bqml_endpoint = bq_model._properties["remoteModelInfo"]["endpoint"] + model_connection = bq_model._properties["remoteModelInfo"]["connection"] + model_endpoint = bqml_endpoint.split("/")[-1] + + model = cls( + session=session, + model_name=model_endpoint, # type: ignore + connection_name=model_connection, + ) + + model._bqml_model = core.BqmlModel(session, bq_model) + return model + + def predict(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame: + """Predict the result from input DataFrame. + + Args: + X (bigframes.dataframe.DataFrame or bigframes.series.Series): + Input DataFrame, which needs to contain a column with name "content". Only the column will be used as input. Content can include preamble, questions, suggestions, instructions, or examples. + + Returns: + bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted values. + """ + + # Params reference: https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models + (X,) = utils.convert_to_dataframe(X) + + if len(X.columns) != 1: + raise ValueError( + f"Only support one column as input. {constants.FEEDBACK_LINK}" + ) + + # BQML identified the column by name + col_label = cast(blocks.Label, X.columns[0]) + X = X.rename(columns={col_label: "content"}) + + options = { + "flatten_json_output": True, + } + + df = self._bqml_model.generate_embedding(X, options) + + if (df[_ML_GENERATE_EMBEDDING_STATUS] != "").any(): + warnings.warn( + f"Some predictions failed. Check column {_ML_GENERATE_EMBEDDING_STATUS} for detailed status. You may want to filter the failed rows and retry.", + RuntimeWarning, + ) + + return df + + def to_gbq(self, model_name: str, replace: bool = False) -> TextEmbeddingGenerator: + """Save the model to BigQuery. + + Args: + model_name (str): + The name of the model. + replace (bool, default False): + Determine whether to replace if the model already exists. Default to False. + + Returns: + PaLM2TextEmbeddingGenerator: Saved model.""" + + new_model = self._bqml_model.copy(model_name, replace) + return new_model.session.read_gbq_model(model_name) + + @log_adapter.class_logger class GeminiTextGenerator(base.BaseEstimator): """Gemini text generator LLM model. diff --git a/bigframes/ml/loader.py b/bigframes/ml/loader.py index 515fb50c6f..bd01342152 100644 --- a/bigframes/ml/loader.py +++ b/bigframes/ml/loader.py @@ -63,6 +63,8 @@ llm._GEMINI_PRO_ENDPOINT: llm.GeminiTextGenerator, llm._GEMINI_1P5_PRO_PREVIEW_ENDPOINT: llm.GeminiTextGenerator, llm._GEMINI_1P5_PRO_FLASH_PREVIEW_ENDPOINT: llm.GeminiTextGenerator, + llm._TEXT_EMBEDDING_004_ENDPOINT: llm.TextEmbeddingGenerator, + llm._TEXT_MULTILINGUAL_EMBEDDING_002_ENDPOINT: llm.TextEmbeddingGenerator, } ) @@ -84,6 +86,7 @@ def from_bq( imported.XGBoostModel, llm.PaLM2TextGenerator, llm.PaLM2TextEmbeddingGenerator, + llm.TextEmbeddingGenerator, pipeline.Pipeline, compose.ColumnTransformer, preprocessing.PreprocessingType, diff --git a/tests/system/small/ml/test_llm.py b/tests/system/small/ml/test_llm.py index b926004fd8..c2f62096d0 100644 --- a/tests/system/small/ml/test_llm.py +++ b/tests/system/small/ml/test_llm.py @@ -304,6 +304,47 @@ def test_embedding_generator_predict_series_success( assert len(value) == 768 +@pytest.mark.parametrize( + "model_name", + ("text-embedding-004", "text-multilingual-embedding-002"), +) +def test_create_load_text_embedding_generator_model( + dataset_id, model_name, session, bq_connection +): + text_embedding_model = llm.TextEmbeddingGenerator( + model_name=model_name, connection_name=bq_connection, session=session + ) + assert text_embedding_model is not None + assert text_embedding_model._bqml_model is not None + + # save, load to ensure configuration was kept + reloaded_model = text_embedding_model.to_gbq( + f"{dataset_id}.temp_text_model", replace=True + ) + assert f"{dataset_id}.temp_text_model" == reloaded_model._bqml_model.model_name + assert reloaded_model.connection_name == bq_connection + assert reloaded_model.model_name == model_name + + +@pytest.mark.parametrize( + "model_name", + ("text-embedding-004", "text-multilingual-embedding-002"), +) +@pytest.mark.flaky(retries=2) +def test_gemini_text_embedding_generator_predict_default_params_success( + llm_text_df, model_name, session, bq_connection +): + text_embedding_model = llm.TextEmbeddingGenerator( + model_name=model_name, connection_name=bq_connection, session=session + ) + df = text_embedding_model.predict(llm_text_df).to_pandas() + assert df.shape == (3, 4) + assert "ml_generate_embedding_result" in df.columns + series = df["ml_generate_embedding_result"] + value = series[0] + assert len(value) == 768 + + @pytest.mark.parametrize( "model_name", ("gemini-pro", "gemini-1.5-pro-preview-0514", "gemini-1.5-flash-preview-0514"), From c69b947920938a8792433899b3ad530d8ac51208 Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Mon, 19 Aug 2024 22:18:31 +0000 Subject: [PATCH 2/3] fix docs --- bigframes/ml/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py index ff4710258a..45634423c6 100644 --- a/bigframes/ml/llm.py +++ b/bigframes/ml/llm.py @@ -705,7 +705,7 @@ def to_gbq(self, model_name: str, replace: bool = False) -> TextEmbeddingGenerat Determine whether to replace if the model already exists. Default to False. Returns: - PaLM2TextEmbeddingGenerator: Saved model.""" + TextEmbeddingGenerator: Saved model.""" new_model = self._bqml_model.copy(model_name, replace) return new_model.session.read_gbq_model(model_name) From 1c41a13e0c64d83086bf752d70feb8c012686333 Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Mon, 19 Aug 2024 23:34:06 +0000 Subject: [PATCH 3/3] docs: update embedding notebooks --- .../bq_dataframes_llm_code_generation.ipynb | 4 +- .../bq_dataframes_llm_kmeans.ipynb | 1091 +++++++++-------- 2 files changed, 553 insertions(+), 542 deletions(-) diff --git a/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb b/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb index 4ea766604d..c0c3c58a3c 100644 --- a/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb +++ b/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb @@ -89,7 +89,7 @@ "\n", "The steps include:\n", "\n", - "- Defining an LLM model in BigQuery DataFrames, specifically the [`text-bison` model of the PaLM API](https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text), using `bigframes.ml.llm`.\n", + "- Defining an LLM model in BigQuery DataFrames, specifically the [Gemini Model](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models), using `bigframes.ml.llm`.\n", "- Creating a DataFrame by reading in data from Cloud Storage.\n", "- Manipulating data in the DataFrame to build LLM prompts.\n", "- Sending DataFrame prompts to the LLM model using the `predict` method.\n", @@ -385,7 +385,7 @@ "source": [ "# Define the LLM model\n", "\n", - "BigQuery DataFrames provides integration with [`text-bison` model of the PaLM API](https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text) via Vertex AI.\n", + "BigQuery DataFrames provides integration with [Gemini Models](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models) via Vertex AI.\n", "\n", "This section walks through a few steps required in order to use the model in your notebook." ] diff --git a/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb b/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb index ab6fd93f9a..d49a44a780 100644 --- a/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb +++ b/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb @@ -57,9 +57,9 @@ "source": [ "## Overview\n", "\n", - "The goal of this notebook is to demonstrate a comment characterization algorithm for an online business. We will accomplish this using [Google's PaLM 2](https://ai.google/discover/palm2/) and [KMeans clustering](https://en.wikipedia.org/wiki/K-means_clustering) in three steps:\n", + "The goal of this notebook is to demonstrate a comment characterization algorithm for an online business. We will accomplish this using [Google's Embedding Models](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#models) and [KMeans clustering](https://en.wikipedia.org/wiki/K-means_clustering) in three steps:\n", "\n", - "1. Use PaLM2TextEmbeddingGenerator to [generate text embeddings](https://cloud.google.com/vertex-ai/docs/generative-ai/embeddings/get-text-embeddings) for each of 10000 complaints sent to an online bank. If you're not familiar with what a text embedding is, it's a list of numbers that are like coordinates in an imaginary \"meaning space\" for sentences. (It's like [word embeddings](https://en.wikipedia.org/wiki/Word_embedding), but for more general text.) The important point for our purposes is that similar sentences are close to each other in this imaginary space.\n", + "1. Use TextEmbeddingGenerator to [generate text embeddings](https://cloud.google.com/vertex-ai/docs/generative-ai/embeddings/get-text-embeddings) for each of 10000 complaints sent to an online bank. If you're not familiar with what a text embedding is, it's a list of numbers that are like coordinates in an imaginary \"meaning space\" for sentences. (It's like [word embeddings](https://en.wikipedia.org/wiki/Word_embedding), but for more general text.) The important point for our purposes is that similar sentences are close to each other in this imaginary space.\n", "2. Use KMeans clustering to group together complaints whose text embeddings are near to eachother. This will give us sets of similar complaints, but we don't yet know _why_ these complaints are similar.\n", "3. Prompt GeminiTextGenerator in English asking what the difference is between the groups of complaints that we got. Thanks to the power of modern LLMs, the response might give us a very good idea of what these complaints are all about, but remember to [\"understand the limits of your dataset and model.\"](https://ai.google/responsibility/responsible-ai-practices/#:~:text=Understand%20the%20limitations%20of%20your%20dataset%20and%20model)\n", "\n", @@ -289,7 +289,7 @@ { "data": { "text/html": [ - "Query job 952b852e-7cf0-493d-8258-fe60daf45ebf is DONE. 2.3 GB processed. Open Job" + "Query job 960f637d-89eb-4bbf-a34c-36ed624e8e9a is DONE. 2.3 GB processed. Open Job" ], "text/plain": [ "" @@ -301,7 +301,7 @@ { "data": { "text/html": [ - "Query job f9939880-6c66-4da5-9e90-daf8d9a9d83c is DONE. 50.3 MB processed. Open Job" + "Query job 59bb207c-98e1-4dab-8686-320f276b09df is DONE. 63.7 MB processed. Open Job" ], "text/plain": [ "" @@ -336,24 +336,24 @@ " \n", " \n", " \n", - " 1799560\n", - " Thursday, XX/XX/XXXX, unauthorized charges wer...\n", + " 2557016\n", + " I've been disputing fraud accounts on my credi...\n", " \n", " \n", - " 1800272\n", - " The credit reporting company is reporting inac...\n", + " 2557686\n", + " American Express Platinum totally messed up my...\n", " \n", " \n", - " 1800409\n", - " In accordance with the Fair Credit Reporting a...\n", + " 2558170\n", + " I recently looked at my credit report and noti...\n", " \n", " \n", - " 1800550\n", - " I told the credit bureaus to \" investigate eve...\n", + " 2558545\n", + " Select Portfolio Servicing contacted my insura...\n", " \n", " \n", - " 1800818\n", - " Im writing in reference regarding XXXXXXXX XXX...\n", + " 2558652\n", + " I checked my credit report and I am upset on w...\n", " \n", " \n", "\n", @@ -361,11 +361,11 @@ ], "text/plain": [ " consumer_complaint_narrative\n", - "1799560 Thursday, XX/XX/XXXX, unauthorized charges wer...\n", - "1800272 The credit reporting company is reporting inac...\n", - "1800409 In accordance with the Fair Credit Reporting a...\n", - "1800550 I told the credit bureaus to \" investigate eve...\n", - "1800818 Im writing in reference regarding XXXXXXXX XXX..." + "2557016 I've been disputing fraud accounts on my credi...\n", + "2557686 American Express Platinum totally messed up my...\n", + "2558170 I recently looked at my credit report and noti...\n", + "2558545 Select Portfolio Servicing contacted my insura...\n", + "2558652 I checked my credit report and I am upset on w..." ] }, "execution_count": 7, @@ -418,7 +418,7 @@ { "data": { "text/html": [ - "Query job e3ff0549-f0ee-4508-bb4f-beea14bf54f5 is DONE. 0 Bytes processed. Open Job" + "Query job e4616b5e-b4c0-490c-a249-484f373f89d9 is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -429,9 +429,9 @@ } ], "source": [ - "from bigframes.ml.llm import PaLM2TextEmbeddingGenerator\n", + "from bigframes.ml.llm import TextEmbeddingGenerator\n", "\n", - "model = PaLM2TextEmbeddingGenerator() # No connection id needed" + "model = TextEmbeddingGenerator() # No connection id needed" ] }, { @@ -444,7 +444,7 @@ { "data": { "text/html": [ - "Query job 5b3d8f8c-9e8d-4378-b4df-e3328300f17a is DONE. 1.3 GB processed. Open Job" + "Query job 89f96e88-2dd5-4326-8912-925b237e2877 is DONE. 1.3 GB processed. Open Job" ], "text/plain": [ "" @@ -454,21 +454,17 @@ "output_type": "display_data" }, { - "data": { - "text/html": [ - "Query job f35c2982-4953-45fa-84bd-d0ce04e13c5e is DONE. 80.0 kB processed. Open Job" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/garrettwu/src/bigframes/bigframes/core/__init__.py:108: PreviewWarning: Interpreting JSON column(s) as StringDtype. This behavior may change in future versions.\n", + " warnings.warn(\n" + ] }, { "data": { "text/html": [ - "Query job b70c55a3-b18b-4313-86b0-31f5b3b570fb is DONE. 20.0 kB processed. Open Job" + "Query job bcdbfe96-2cce-4269-81f4-0334033b458b is DONE. 20.0 kB processed. Open Job" ], "text/plain": [ "" @@ -480,7 +476,7 @@ { "data": { "text/html": [ - "Query job 2b2cfd9f-c713-4411-a3ca-1916cec84ff0 is DONE. 0 Bytes processed. Open Job" + "Query job 3b89850f-4491-4343-912a-7a2fd3137790 is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -492,7 +488,7 @@ { "data": { "text/html": [ - "Query job 09cadae1-1c66-43cf-a76f-7495b0123006 is DONE. 71.9 MB processed. Open Job" + "Query job a2999e90-8d14-4f4a-99dc-4e769df01837 is DONE. 72.0 MB processed. Open Job" ], "text/plain": [ "" @@ -522,187 +518,188 @@ " \n", " \n", " \n", - " text_embedding\n", - " statistics\n", - " ml_embed_text_status\n", + " ml_generate_embedding_result\n", + " ml_generate_embedding_statistics\n", + " ml_generate_embedding_status\n", " content\n", " \n", " \n", " \n", " \n", - " 782\n", - " [ 2.78223325e-02 -1.71949025e-02 -5.01214415e-...\n", - " {\"token_count\":121,\"truncated\":false}\n", + " 415\n", + " [ 2.56774724e-02 -1.06168222e-02 3.06945704e-...\n", + " {\"token_count\":171,\"truncated\":false}\n", " \n", - " I 've sent multiple letters to this agency abo...\n", + " DEPT OF EDUCATION/XXXX is stating I was late ...\n", " \n", " \n", - " 795\n", - " [ 1.39164589e-02 -5.08313216e-02 -4.53360938e-...\n", - " {\"token_count\":141,\"truncated\":false}\n", + " 596\n", + " [ 5.90653270e-02 -9.31344274e-03 -7.12460047e-...\n", + " {\"token_count\":668,\"truncated\":false}\n", " \n", - " I receive social security XXXX funds in my XXX...\n", + " I alerted my credit card company XX/XX/2017 th...\n", " \n", " \n", - " 861\n", - " [ 7.80681521e-03 -3.23560014e-02 -6.76454604e-...\n", - " {\"token_count\":160,\"truncated\":false}\n", + " 706\n", + " [ 0.01298233 0.00130001 0.01800315 0.037078...\n", + " {\"token_count\":252,\"truncated\":false}\n", " \n", - " Hello, My name is XXXX XXXX XXXX. I have a pro...\n", + " Sallie mae is corrupt. \n", + "I have tried to talk t...\n", " \n", " \n", - " 1103\n", - " [ 4.18044440e-02 -4.28444222e-02 -6.26875088e-...\n", - " {\"token_count\":31,\"truncated\":false}\n", + " 804\n", + " [-1.39777679e-02 1.68943349e-02 5.53999236e-...\n", + " {\"token_count\":412,\"truncated\":false}\n", " \n", - " The debt occurred more than 7 years in the pas...\n", + " In accordance with the Fair Credit Reporting a...\n", " \n", " \n", - " 1241\n", - " [ 7.74183637e-03 -6.50701150e-02 -2.13856809e-...\n", - " {\"token_count\":23,\"truncated\":false}\n", + " 861\n", + " [ 2.33309343e-02 -2.36528926e-03 3.37129943e-...\n", + " {\"token_count\":160,\"truncated\":false}\n", " \n", - " UNAUTHORIZED CREDIT REPORTING NO ACCOUNT TO VA...\n", + " Hello, My name is XXXX XXXX XXXX. I have a pro...\n", " \n", " \n", - " 1729\n", - " [ 3.15782428e-02 -1.23979878e-02 -3.70296165e-...\n", - " {\"token_count\":382,\"truncated\":false}\n", + " 1030\n", + " [ 0.06060313 -0.06495965 -0.03605044 -0.028016...\n", + " {\"token_count\":298,\"truncated\":false}\n", " \n", - " XXXX on XXXX XX/XX/2021 I have Mr. Cooper mort...\n", + " Hello, I would like to complain about PayPal H...\n", " \n", " \n", - " 2167\n", - " [ 9.87241510e-03 -1.81103535e-02 -4.17162031e-...\n", - " {\"token_count\":556,\"truncated\":false}\n", + " 1582\n", + " [ 0.01255985 -0.01652482 -0.02638046 0.036858...\n", + " {\"token_count\":814,\"truncated\":false}\n", " \n", - " This is the third such complaint I have submit...\n", + " Transunion is listing personal information ( n...\n", " \n", " \n", - " 2219\n", - " [ 2.56749280e-02 -4.92163002e-02 -5.67202382e-...\n", - " {\"token_count\":196,\"truncated\":false}\n", + " 1600\n", + " [ 5.13355099e-02 4.01246967e-03 5.72342947e-...\n", + " {\"token_count\":653,\"truncated\":false}\n", " \n", - " Found and add online for a Prepaid Credit card...\n", + " On XX/XX/XXXX, I called Citizen Bank at XXXX t...\n", " \n", " \n", - " 2392\n", - " [ 2.34611966e-02 -4.74611111e-02 -3.59710641e-...\n", - " {\"token_count\":641,\"truncated\":false}\n", + " 2060\n", + " [ 6.44792162e-04 4.95899878e-02 4.67925966e-...\n", + " {\"token_count\":136,\"truncated\":false}\n", " \n", - " I am furnishing this complaint against Fed Loa...\n", + " Theses names are the known liars that I have s...\n", " \n", " \n", - " 2528\n", - " [ 1.90760177e-02 -4.90266569e-02 -5.60806654e-...\n", - " {\"token_count\":176,\"truncated\":false}\n", + " 2283\n", + " [ 4.71848622e-02 -8.68239347e-03 5.80501892e-...\n", + " {\"token_count\":478,\"truncated\":false}\n", " \n", - " Despite multiple written requests, the unverif...\n", + " My house was hit by a tree XX/XX/2018. My insu...\n", " \n", " \n", - " 2737\n", - " [ 1.81887485e-02 -8.74284655e-03 -2.73009986e-...\n", - " {\"token_count\":230,\"truncated\":false}\n", + " 2421\n", + " [-2.90394691e-03 -1.81679502e-02 -7.99657404e-...\n", + " {\"token_count\":389,\"truncated\":false}\n", " \n", - " After unsatisfying communication in the messag...\n", + " I became aware of a credit inquiry on my XXXX...\n", " \n", " \n", - " 2859\n", - " [ 3.52482982e-02 -3.30757573e-02 -4.48422395e-...\n", - " {\"token_count\":238,\"truncated\":false}\n", + " 2422\n", + " [-6.70500053e-03 1.51133696e-02 4.94448021e-...\n", + " {\"token_count\":124,\"truncated\":false}\n", " \n", - " Good Morning. My name is XXXX XXXX. My account...\n", + " I have sent numerous letters, police reports a...\n", " \n", " \n", - " 3439\n", - " [ 3.40348878e-03 -2.72301212e-02 -2.03482248e-...\n", - " {\"token_count\":197,\"truncated\":false}\n", + " 2658\n", + " [ 6.70989677e-02 -3.53626162e-02 1.08648362e-...\n", + " {\"token_count\":762,\"truncated\":false}\n", " \n", - " I have ongoing disputes that are preventing me...\n", + " This letter concerns two disputes ( chargeback...\n", " \n", " \n", - " 3738\n", - " [ 0.01422119 -0.01114973 -0.04438976 -0.024421...\n", - " {\"token_count\":160,\"truncated\":false}\n", + " 2883\n", + " [-1.28255319e-02 -1.89735275e-02 5.68657108e-...\n", + " {\"token_count\":71,\"truncated\":false}\n", " \n", - " I had a loan with national Collegiate Trust. i...\n", + " It is very frustrating that this has been goin...\n", " \n", " \n", - " 3805\n", - " [ 1.08179580e-02 -3.44337188e-02 -5.08812033e-...\n", - " {\"token_count\":477,\"truncated\":false}\n", + " 2951\n", + " [ 3.23301251e-03 -2.61142217e-02 1.31891826e-...\n", + " {\"token_count\":95,\"truncated\":false}\n", " \n", - " Hi I am submitting this XXXX XXXX this isn't a...\n", + " I, the consumer, in fact, have a right to priv...\n", " \n", " \n", - " 3915\n", - " [-7.23852217e-03 -4.69538383e-02 -5.60489520e-...\n", - " {\"token_count\":116,\"truncated\":false}\n", + " 2992\n", + " [-2.22910382e-03 -1.07050659e-02 4.74211425e-...\n", + " {\"token_count\":407,\"truncated\":false}\n", " \n", - " portfolio is showin on my credit report with a...\n", + " XXXX XXXX XXXX should not be reporting to Expe...\n", " \n", " \n", - " 3917\n", - " [-8.92711710e-03 -4.49132621e-02 -4.29662578e-...\n", - " {\"token_count\":71,\"truncated\":false}\n", + " 3969\n", + " [ 1.58297736e-02 3.01055871e-02 5.60088176e-...\n", + " {\"token_count\":287,\"truncated\":false}\n", " \n", - " the company shared my information with another...\n", + " DEAR CFPB ; XXXX ; XXXX ; AND TRANSUNION ; SEE...\n", " \n", " \n", - " 4281\n", - " [-1.69487391e-02 -1.89835522e-02 -3.80971469e-...\n", - " {\"token_count\":130,\"truncated\":false}\n", + " 4087\n", + " [ 1.99207035e-03 -7.62321474e-03 7.92114343e-...\n", + " {\"token_count\":88,\"truncated\":false}\n", " \n", - " I tried to submit a teacher loan forgiveness a...\n", + " This debt was from my identity being stolen I ...\n", " \n", " \n", - " 4470\n", - " [ 1.28689921e-02 -3.25881056e-02 -6.53645024e-...\n", - " {\"token_count\":200,\"truncated\":false}\n", + " 4326\n", + " [ 3.44273262e-02 -3.36350128e-02 1.91939529e-...\n", + " {\"token_count\":52,\"truncated\":false}\n", " \n", - " in accordance with the Fair Credit Reporting a...\n", + " The items that are reflected on my credit repo...\n", " \n", " \n", - " 4915\n", - " [ 5.19403480e-02 -7.32436478e-02 -4.60561663e-...\n", - " {\"token_count\":23,\"truncated\":false}\n", + " 4682\n", + " [ 2.47727744e-02 -1.77769139e-02 4.63737026e-...\n", + " {\"token_count\":284,\"truncated\":false}\n", " \n", - " XXXX XXXX did not give me a receipt or a copy ...\n", + " I filed for chapter XXXX bankruptcy on XXXX...\n", " \n", " \n", - " 4928\n", - " [-4.43694415e-03 -3.66776163e-04 -9.08496231e-...\n", - " {\"token_count\":83,\"truncated\":false}\n", + " 5005\n", + " [ 2.51834448e-02 -4.92606424e-02 -1.37688573e-...\n", + " {\"token_count\":17,\"truncated\":false}\n", " \n", - " This company has filed a civil suit during a g...\n", + " There are 2 Inquires on my credit report that ...\n", " \n", " \n", - " 5338\n", - " [ 2.19908613e-03 -3.93951498e-02 -6.52823672e-...\n", - " {\"token_count\":1279,\"truncated\":false}\n", + " 5144\n", + " [ 3.26358266e-02 -3.67171178e-03 3.65621522e-...\n", + " {\"token_count\":105,\"truncated\":false}\n", " \n", - " My credit report contains errors that is keepi...\n", + " My mortgage was sold from XXXX XXXX to freed...\n", " \n", " \n", - " 5582\n", - " [ 2.86326781e-02 -4.89189997e-02 -8.68150592e-...\n", - " {\"token_count\":396,\"truncated\":false}\n", + " 6090\n", + " [ 2.47520711e-02 1.09149124e-02 1.35175223e-...\n", + " {\"token_count\":545,\"truncated\":false}\n", " \n", - " Coast Professional, XXXX, LA contacted me by m...\n", + " On XX/XX/XXXX this company received certified...\n", " \n", " \n", - " 6386\n", - " [ 3.33276950e-02 1.53224478e-02 -1.89354066e-...\n", - " {\"token_count\":79,\"truncated\":false}\n", + " 6449\n", + " [ 1.86854266e-02 1.31238240e-03 -4.96791191e-...\n", + " {\"token_count\":104,\"truncated\":false}\n", " \n", - " Cares act refund requested in XXXX, called mul...\n", + " After hours on the phone with multiple agents,...\n", " \n", " \n", - " 6956\n", - " [ 1.47060463e-02 -3.36431377e-02 -6.56675845e-...\n", - " {\"token_count\":194,\"truncated\":false}\n", + " 6486\n", + " [ 1.56347770e-02 2.23377198e-02 -1.32683543e-...\n", + " {\"token_count\":211,\"truncated\":false}\n", " \n", - " n accordance with the Fair Credit Reporting ac...\n", + " On XX/XX/2019 two charges one for XXXX and one...\n", " \n", " \n", "\n", @@ -710,86 +707,87 @@ "[10000 rows x 4 columns in total]" ], "text/plain": [ - " text_embedding \\\n", - "782 [ 2.78223325e-02 -1.71949025e-02 -5.01214415e-... \n", - "795 [ 1.39164589e-02 -5.08313216e-02 -4.53360938e-... \n", - "861 [ 7.80681521e-03 -3.23560014e-02 -6.76454604e-... \n", - "1103 [ 4.18044440e-02 -4.28444222e-02 -6.26875088e-... \n", - "1241 [ 7.74183637e-03 -6.50701150e-02 -2.13856809e-... \n", - "1729 [ 3.15782428e-02 -1.23979878e-02 -3.70296165e-... \n", - "2167 [ 9.87241510e-03 -1.81103535e-02 -4.17162031e-... \n", - "2219 [ 2.56749280e-02 -4.92163002e-02 -5.67202382e-... \n", - "2392 [ 2.34611966e-02 -4.74611111e-02 -3.59710641e-... \n", - "2528 [ 1.90760177e-02 -4.90266569e-02 -5.60806654e-... \n", - "2737 [ 1.81887485e-02 -8.74284655e-03 -2.73009986e-... \n", - "2859 [ 3.52482982e-02 -3.30757573e-02 -4.48422395e-... \n", - "3439 [ 3.40348878e-03 -2.72301212e-02 -2.03482248e-... \n", - "3738 [ 0.01422119 -0.01114973 -0.04438976 -0.024421... \n", - "3805 [ 1.08179580e-02 -3.44337188e-02 -5.08812033e-... \n", - "3915 [-7.23852217e-03 -4.69538383e-02 -5.60489520e-... \n", - "3917 [-8.92711710e-03 -4.49132621e-02 -4.29662578e-... \n", - "4281 [-1.69487391e-02 -1.89835522e-02 -3.80971469e-... \n", - "4470 [ 1.28689921e-02 -3.25881056e-02 -6.53645024e-... \n", - "4915 [ 5.19403480e-02 -7.32436478e-02 -4.60561663e-... \n", - "4928 [-4.43694415e-03 -3.66776163e-04 -9.08496231e-... \n", - "5338 [ 2.19908613e-03 -3.93951498e-02 -6.52823672e-... \n", - "5582 [ 2.86326781e-02 -4.89189997e-02 -8.68150592e-... \n", - "6386 [ 3.33276950e-02 1.53224478e-02 -1.89354066e-... \n", - "6956 [ 1.47060463e-02 -3.36431377e-02 -6.56675845e-... \n", + " ml_generate_embedding_result \\\n", + "415 [ 2.56774724e-02 -1.06168222e-02 3.06945704e-... \n", + "596 [ 5.90653270e-02 -9.31344274e-03 -7.12460047e-... \n", + "706 [ 0.01298233 0.00130001 0.01800315 0.037078... \n", + "804 [-1.39777679e-02 1.68943349e-02 5.53999236e-... \n", + "861 [ 2.33309343e-02 -2.36528926e-03 3.37129943e-... \n", + "1030 [ 0.06060313 -0.06495965 -0.03605044 -0.028016... \n", + "1582 [ 0.01255985 -0.01652482 -0.02638046 0.036858... \n", + "1600 [ 5.13355099e-02 4.01246967e-03 5.72342947e-... \n", + "2060 [ 6.44792162e-04 4.95899878e-02 4.67925966e-... \n", + "2283 [ 4.71848622e-02 -8.68239347e-03 5.80501892e-... \n", + "2421 [-2.90394691e-03 -1.81679502e-02 -7.99657404e-... \n", + "2422 [-6.70500053e-03 1.51133696e-02 4.94448021e-... \n", + "2658 [ 6.70989677e-02 -3.53626162e-02 1.08648362e-... \n", + "2883 [-1.28255319e-02 -1.89735275e-02 5.68657108e-... \n", + "2951 [ 3.23301251e-03 -2.61142217e-02 1.31891826e-... \n", + "2992 [-2.22910382e-03 -1.07050659e-02 4.74211425e-... \n", + "3969 [ 1.58297736e-02 3.01055871e-02 5.60088176e-... \n", + "4087 [ 1.99207035e-03 -7.62321474e-03 7.92114343e-... \n", + "4326 [ 3.44273262e-02 -3.36350128e-02 1.91939529e-... \n", + "4682 [ 2.47727744e-02 -1.77769139e-02 4.63737026e-... \n", + "5005 [ 2.51834448e-02 -4.92606424e-02 -1.37688573e-... \n", + "5144 [ 3.26358266e-02 -3.67171178e-03 3.65621522e-... \n", + "6090 [ 2.47520711e-02 1.09149124e-02 1.35175223e-... \n", + "6449 [ 1.86854266e-02 1.31238240e-03 -4.96791191e-... \n", + "6486 [ 1.56347770e-02 2.23377198e-02 -1.32683543e-... \n", "\n", - " statistics ml_embed_text_status \\\n", - "782 {\"token_count\":121,\"truncated\":false} \n", - "795 {\"token_count\":141,\"truncated\":false} \n", - "861 {\"token_count\":160,\"truncated\":false} \n", - "1103 {\"token_count\":31,\"truncated\":false} \n", - "1241 {\"token_count\":23,\"truncated\":false} \n", - "1729 {\"token_count\":382,\"truncated\":false} \n", - "2167 {\"token_count\":556,\"truncated\":false} \n", - "2219 {\"token_count\":196,\"truncated\":false} \n", - "2392 {\"token_count\":641,\"truncated\":false} \n", - "2528 {\"token_count\":176,\"truncated\":false} \n", - "2737 {\"token_count\":230,\"truncated\":false} \n", - "2859 {\"token_count\":238,\"truncated\":false} \n", - "3439 {\"token_count\":197,\"truncated\":false} \n", - "3738 {\"token_count\":160,\"truncated\":false} \n", - "3805 {\"token_count\":477,\"truncated\":false} \n", - "3915 {\"token_count\":116,\"truncated\":false} \n", - "3917 {\"token_count\":71,\"truncated\":false} \n", - "4281 {\"token_count\":130,\"truncated\":false} \n", - "4470 {\"token_count\":200,\"truncated\":false} \n", - "4915 {\"token_count\":23,\"truncated\":false} \n", - "4928 {\"token_count\":83,\"truncated\":false} \n", - "5338 {\"token_count\":1279,\"truncated\":false} \n", - "5582 {\"token_count\":396,\"truncated\":false} \n", - "6386 {\"token_count\":79,\"truncated\":false} \n", - "6956 {\"token_count\":194,\"truncated\":false} \n", + " ml_generate_embedding_statistics ml_generate_embedding_status \\\n", + "415 {\"token_count\":171,\"truncated\":false} \n", + "596 {\"token_count\":668,\"truncated\":false} \n", + "706 {\"token_count\":252,\"truncated\":false} \n", + "804 {\"token_count\":412,\"truncated\":false} \n", + "861 {\"token_count\":160,\"truncated\":false} \n", + "1030 {\"token_count\":298,\"truncated\":false} \n", + "1582 {\"token_count\":814,\"truncated\":false} \n", + "1600 {\"token_count\":653,\"truncated\":false} \n", + "2060 {\"token_count\":136,\"truncated\":false} \n", + "2283 {\"token_count\":478,\"truncated\":false} \n", + "2421 {\"token_count\":389,\"truncated\":false} \n", + "2422 {\"token_count\":124,\"truncated\":false} \n", + "2658 {\"token_count\":762,\"truncated\":false} \n", + "2883 {\"token_count\":71,\"truncated\":false} \n", + "2951 {\"token_count\":95,\"truncated\":false} \n", + "2992 {\"token_count\":407,\"truncated\":false} \n", + "3969 {\"token_count\":287,\"truncated\":false} \n", + "4087 {\"token_count\":88,\"truncated\":false} \n", + "4326 {\"token_count\":52,\"truncated\":false} \n", + "4682 {\"token_count\":284,\"truncated\":false} \n", + "5005 {\"token_count\":17,\"truncated\":false} \n", + "5144 {\"token_count\":105,\"truncated\":false} \n", + "6090 {\"token_count\":545,\"truncated\":false} \n", + "6449 {\"token_count\":104,\"truncated\":false} \n", + "6486 {\"token_count\":211,\"truncated\":false} \n", "\n", " content \n", - "782 I 've sent multiple letters to this agency abo... \n", - "795 I receive social security XXXX funds in my XXX... \n", + "415 DEPT OF EDUCATION/XXXX is stating I was late ... \n", + "596 I alerted my credit card company XX/XX/2017 th... \n", + "706 Sallie mae is corrupt. \n", + "I have tried to talk t... \n", + "804 In accordance with the Fair Credit Reporting a... \n", "861 Hello, My name is XXXX XXXX XXXX. I have a pro... \n", - "1103 The debt occurred more than 7 years in the pas... \n", - "1241 UNAUTHORIZED CREDIT REPORTING NO ACCOUNT TO VA... \n", - "1729 XXXX on XXXX XX/XX/2021 I have Mr. Cooper mort... \n", - "2167 This is the third such complaint I have submit... \n", - "2219 Found and add online for a Prepaid Credit card... \n", - "2392 I am furnishing this complaint against Fed Loa... \n", - "2528 Despite multiple written requests, the unverif... \n", - "2737 After unsatisfying communication in the messag... \n", - "2859 Good Morning. My name is XXXX XXXX. My account... \n", - "3439 I have ongoing disputes that are preventing me... \n", - "3738 I had a loan with national Collegiate Trust. i... \n", - "3805 Hi I am submitting this XXXX XXXX this isn't a... \n", - "3915 portfolio is showin on my credit report with a... \n", - "3917 the company shared my information with another... \n", - "4281 I tried to submit a teacher loan forgiveness a... \n", - "4470 in accordance with the Fair Credit Reporting a... \n", - "4915 XXXX XXXX did not give me a receipt or a copy ... \n", - "4928 This company has filed a civil suit during a g... \n", - "5338 My credit report contains errors that is keepi... \n", - "5582 Coast Professional, XXXX, LA contacted me by m... \n", - "6386 Cares act refund requested in XXXX, called mul... \n", - "6956 n accordance with the Fair Credit Reporting ac... \n", + "1030 Hello, I would like to complain about PayPal H... \n", + "1582 Transunion is listing personal information ( n... \n", + "1600 On XX/XX/XXXX, I called Citizen Bank at XXXX t... \n", + "2060 Theses names are the known liars that I have s... \n", + "2283 My house was hit by a tree XX/XX/2018. My insu... \n", + "2421 I became aware of a credit inquiry on my XXXX... \n", + "2422 I have sent numerous letters, police reports a... \n", + "2658 This letter concerns two disputes ( chargeback... \n", + "2883 It is very frustrating that this has been goin... \n", + "2951 I, the consumer, in fact, have a right to priv... \n", + "2992 XXXX XXXX XXXX should not be reporting to Expe... \n", + "3969 DEAR CFPB ; XXXX ; XXXX ; AND TRANSUNION ; SEE... \n", + "4087 This debt was from my identity being stolen I ... \n", + "4326 The items that are reflected on my credit repo... \n", + "4682 I filed for chapter XXXX bankruptcy on XXXX... \n", + "5005 There are 2 Inquires on my credit report that ... \n", + "5144 My mortgage was sold from XXXX XXXX to freed... \n", + "6090 On XX/XX/XXXX this company received certified... \n", + "6449 After hours on the phone with multiple agents,... \n", + "6486 On XX/XX/2019 two charges one for XXXX and one... \n", "...\n", "\n", "[10000 rows x 4 columns]" @@ -816,13 +814,13 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "Query job 2c99b34a-1956-4de7-8330-898f1f25560b is DONE. 71.9 MB processed. Open Job" + "Query job 16915c47-ab13-4d06-94aa-9ebdb65d91fe is DONE. 72.0 MB processed. Open Job" ], "text/plain": [ "" @@ -834,7 +832,7 @@ { "data": { "text/html": [ - "Query job 3ffed5f8-935a-4a3f-a560-6416445e4868 is DONE. 0 Bytes processed. Open Job" + "Query job 4ab4fbf0-6fd3-4936-9915-cfd7ccd106d1 is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -846,7 +844,7 @@ { "data": { "text/html": [ - "Query job 7b55783a-6d8f-41b9-b404-73253140029a is DONE. 72.3 MB processed. Open Job" + "Query job b11d3794-6bb8-4c47-a91b-dcc472cf4d69 is DONE. 72.4 MB processed. Open Job" ], "text/plain": [ "" @@ -876,187 +874,188 @@ " \n", " \n", " \n", - " text_embedding\n", - " statistics\n", - " ml_embed_text_status\n", + " ml_generate_embedding_result\n", + " ml_generate_embedding_statistics\n", + " ml_generate_embedding_status\n", " content\n", " \n", " \n", " \n", " \n", - " 782\n", - " [ 2.78223325e-02 -1.71949025e-02 -5.01214415e-...\n", - " {\"token_count\":121,\"truncated\":false}\n", + " 415\n", + " [ 2.56774724e-02 -1.06168222e-02 3.06945704e-...\n", + " {\"token_count\":171,\"truncated\":false}\n", " \n", - " I 've sent multiple letters to this agency abo...\n", + " DEPT OF EDUCATION/XXXX is stating I was late ...\n", " \n", " \n", - " 795\n", - " [ 1.39164589e-02 -5.08313216e-02 -4.53360938e-...\n", - " {\"token_count\":141,\"truncated\":false}\n", + " 596\n", + " [ 5.90653270e-02 -9.31344274e-03 -7.12460047e-...\n", + " {\"token_count\":668,\"truncated\":false}\n", " \n", - " I receive social security XXXX funds in my XXX...\n", + " I alerted my credit card company XX/XX/2017 th...\n", " \n", " \n", - " 861\n", - " [ 7.80681521e-03 -3.23560014e-02 -6.76454604e-...\n", - " {\"token_count\":160,\"truncated\":false}\n", + " 706\n", + " [ 0.01298233 0.00130001 0.01800315 0.037078...\n", + " {\"token_count\":252,\"truncated\":false}\n", " \n", - " Hello, My name is XXXX XXXX XXXX. I have a pro...\n", + " Sallie mae is corrupt. \n", + "I have tried to talk t...\n", " \n", " \n", - " 1103\n", - " [ 4.18044440e-02 -4.28444222e-02 -6.26875088e-...\n", - " {\"token_count\":31,\"truncated\":false}\n", + " 804\n", + " [-1.39777679e-02 1.68943349e-02 5.53999236e-...\n", + " {\"token_count\":412,\"truncated\":false}\n", " \n", - " The debt occurred more than 7 years in the pas...\n", + " In accordance with the Fair Credit Reporting a...\n", " \n", " \n", - " 1241\n", - " [ 7.74183637e-03 -6.50701150e-02 -2.13856809e-...\n", - " {\"token_count\":23,\"truncated\":false}\n", + " 861\n", + " [ 2.33309343e-02 -2.36528926e-03 3.37129943e-...\n", + " {\"token_count\":160,\"truncated\":false}\n", " \n", - " UNAUTHORIZED CREDIT REPORTING NO ACCOUNT TO VA...\n", + " Hello, My name is XXXX XXXX XXXX. I have a pro...\n", " \n", " \n", - " 1729\n", - " [ 3.15782428e-02 -1.23979878e-02 -3.70296165e-...\n", - " {\"token_count\":382,\"truncated\":false}\n", + " 1030\n", + " [ 0.06060313 -0.06495965 -0.03605044 -0.028016...\n", + " {\"token_count\":298,\"truncated\":false}\n", " \n", - " XXXX on XXXX XX/XX/2021 I have Mr. Cooper mort...\n", + " Hello, I would like to complain about PayPal H...\n", " \n", " \n", - " 2167\n", - " [ 9.87241510e-03 -1.81103535e-02 -4.17162031e-...\n", - " {\"token_count\":556,\"truncated\":false}\n", + " 1582\n", + " [ 0.01255985 -0.01652482 -0.02638046 0.036858...\n", + " {\"token_count\":814,\"truncated\":false}\n", " \n", - " This is the third such complaint I have submit...\n", + " Transunion is listing personal information ( n...\n", " \n", " \n", - " 2219\n", - " [ 2.56749280e-02 -4.92163002e-02 -5.67202382e-...\n", - " {\"token_count\":196,\"truncated\":false}\n", + " 1600\n", + " [ 5.13355099e-02 4.01246967e-03 5.72342947e-...\n", + " {\"token_count\":653,\"truncated\":false}\n", " \n", - " Found and add online for a Prepaid Credit card...\n", + " On XX/XX/XXXX, I called Citizen Bank at XXXX t...\n", " \n", " \n", - " 2392\n", - " [ 2.34611966e-02 -4.74611111e-02 -3.59710641e-...\n", - " {\"token_count\":641,\"truncated\":false}\n", + " 2060\n", + " [ 6.44792162e-04 4.95899878e-02 4.67925966e-...\n", + " {\"token_count\":136,\"truncated\":false}\n", " \n", - " I am furnishing this complaint against Fed Loa...\n", + " Theses names are the known liars that I have s...\n", " \n", " \n", - " 2528\n", - " [ 1.90760177e-02 -4.90266569e-02 -5.60806654e-...\n", - " {\"token_count\":176,\"truncated\":false}\n", + " 2283\n", + " [ 4.71848622e-02 -8.68239347e-03 5.80501892e-...\n", + " {\"token_count\":478,\"truncated\":false}\n", " \n", - " Despite multiple written requests, the unverif...\n", + " My house was hit by a tree XX/XX/2018. My insu...\n", " \n", " \n", - " 2737\n", - " [ 1.81887485e-02 -8.74284655e-03 -2.73009986e-...\n", - " {\"token_count\":230,\"truncated\":false}\n", + " 2421\n", + " [-2.90394691e-03 -1.81679502e-02 -7.99657404e-...\n", + " {\"token_count\":389,\"truncated\":false}\n", " \n", - " After unsatisfying communication in the messag...\n", + " I became aware of a credit inquiry on my XXXX...\n", " \n", " \n", - " 2859\n", - " [ 3.52482982e-02 -3.30757573e-02 -4.48422395e-...\n", - " {\"token_count\":238,\"truncated\":false}\n", + " 2422\n", + " [-6.70500053e-03 1.51133696e-02 4.94448021e-...\n", + " {\"token_count\":124,\"truncated\":false}\n", " \n", - " Good Morning. My name is XXXX XXXX. My account...\n", + " I have sent numerous letters, police reports a...\n", " \n", " \n", - " 3439\n", - " [ 3.40348878e-03 -2.72301212e-02 -2.03482248e-...\n", - " {\"token_count\":197,\"truncated\":false}\n", + " 2658\n", + " [ 6.70989677e-02 -3.53626162e-02 1.08648362e-...\n", + " {\"token_count\":762,\"truncated\":false}\n", " \n", - " I have ongoing disputes that are preventing me...\n", + " This letter concerns two disputes ( chargeback...\n", " \n", " \n", - " 3738\n", - " [ 0.01422119 -0.01114973 -0.04438976 -0.024421...\n", - " {\"token_count\":160,\"truncated\":false}\n", + " 2883\n", + " [-1.28255319e-02 -1.89735275e-02 5.68657108e-...\n", + " {\"token_count\":71,\"truncated\":false}\n", " \n", - " I had a loan with national Collegiate Trust. i...\n", + " It is very frustrating that this has been goin...\n", " \n", " \n", - " 3805\n", - " [ 1.08179580e-02 -3.44337188e-02 -5.08812033e-...\n", - " {\"token_count\":477,\"truncated\":false}\n", + " 2951\n", + " [ 3.23301251e-03 -2.61142217e-02 1.31891826e-...\n", + " {\"token_count\":95,\"truncated\":false}\n", " \n", - " Hi I am submitting this XXXX XXXX this isn't a...\n", + " I, the consumer, in fact, have a right to priv...\n", " \n", " \n", - " 3915\n", - " [-7.23852217e-03 -4.69538383e-02 -5.60489520e-...\n", - " {\"token_count\":116,\"truncated\":false}\n", + " 2992\n", + " [-2.22910382e-03 -1.07050659e-02 4.74211425e-...\n", + " {\"token_count\":407,\"truncated\":false}\n", " \n", - " portfolio is showin on my credit report with a...\n", + " XXXX XXXX XXXX should not be reporting to Expe...\n", " \n", " \n", - " 3917\n", - " [-8.92711710e-03 -4.49132621e-02 -4.29662578e-...\n", - " {\"token_count\":71,\"truncated\":false}\n", + " 3969\n", + " [ 1.58297736e-02 3.01055871e-02 5.60088176e-...\n", + " {\"token_count\":287,\"truncated\":false}\n", " \n", - " the company shared my information with another...\n", + " DEAR CFPB ; XXXX ; XXXX ; AND TRANSUNION ; SEE...\n", " \n", " \n", - " 4281\n", - " [-1.69487391e-02 -1.89835522e-02 -3.80971469e-...\n", - " {\"token_count\":130,\"truncated\":false}\n", + " 4087\n", + " [ 1.99207035e-03 -7.62321474e-03 7.92114343e-...\n", + " {\"token_count\":88,\"truncated\":false}\n", " \n", - " I tried to submit a teacher loan forgiveness a...\n", + " This debt was from my identity being stolen I ...\n", " \n", " \n", - " 4470\n", - " [ 1.28689921e-02 -3.25881056e-02 -6.53645024e-...\n", - " {\"token_count\":200,\"truncated\":false}\n", + " 4326\n", + " [ 3.44273262e-02 -3.36350128e-02 1.91939529e-...\n", + " {\"token_count\":52,\"truncated\":false}\n", " \n", - " in accordance with the Fair Credit Reporting a...\n", + " The items that are reflected on my credit repo...\n", " \n", " \n", - " 4915\n", - " [ 5.19403480e-02 -7.32436478e-02 -4.60561663e-...\n", - " {\"token_count\":23,\"truncated\":false}\n", + " 4682\n", + " [ 2.47727744e-02 -1.77769139e-02 4.63737026e-...\n", + " {\"token_count\":284,\"truncated\":false}\n", " \n", - " XXXX XXXX did not give me a receipt or a copy ...\n", + " I filed for chapter XXXX bankruptcy on XXXX...\n", " \n", " \n", - " 4928\n", - " [-4.43694415e-03 -3.66776163e-04 -9.08496231e-...\n", - " {\"token_count\":83,\"truncated\":false}\n", + " 5005\n", + " [ 2.51834448e-02 -4.92606424e-02 -1.37688573e-...\n", + " {\"token_count\":17,\"truncated\":false}\n", " \n", - " This company has filed a civil suit during a g...\n", + " There are 2 Inquires on my credit report that ...\n", " \n", " \n", - " 5338\n", - " [ 2.19908613e-03 -3.93951498e-02 -6.52823672e-...\n", - " {\"token_count\":1279,\"truncated\":false}\n", + " 5144\n", + " [ 3.26358266e-02 -3.67171178e-03 3.65621522e-...\n", + " {\"token_count\":105,\"truncated\":false}\n", " \n", - " My credit report contains errors that is keepi...\n", + " My mortgage was sold from XXXX XXXX to freed...\n", " \n", " \n", - " 5582\n", - " [ 2.86326781e-02 -4.89189997e-02 -8.68150592e-...\n", - " {\"token_count\":396,\"truncated\":false}\n", + " 6090\n", + " [ 2.47520711e-02 1.09149124e-02 1.35175223e-...\n", + " {\"token_count\":545,\"truncated\":false}\n", " \n", - " Coast Professional, XXXX, LA contacted me by m...\n", + " On XX/XX/XXXX this company received certified...\n", " \n", " \n", - " 6386\n", - " [ 3.33276950e-02 1.53224478e-02 -1.89354066e-...\n", - " {\"token_count\":79,\"truncated\":false}\n", + " 6449\n", + " [ 1.86854266e-02 1.31238240e-03 -4.96791191e-...\n", + " {\"token_count\":104,\"truncated\":false}\n", " \n", - " Cares act refund requested in XXXX, called mul...\n", + " After hours on the phone with multiple agents,...\n", " \n", " \n", - " 6956\n", - " [ 1.47060463e-02 -3.36431377e-02 -6.56675845e-...\n", - " {\"token_count\":194,\"truncated\":false}\n", + " 6486\n", + " [ 1.56347770e-02 2.23377198e-02 -1.32683543e-...\n", + " {\"token_count\":211,\"truncated\":false}\n", " \n", - " n accordance with the Fair Credit Reporting ac...\n", + " On XX/XX/2019 two charges one for XXXX and one...\n", " \n", " \n", "\n", @@ -1064,102 +1063,103 @@ "[10000 rows x 4 columns in total]" ], "text/plain": [ - " text_embedding \\\n", - "782 [ 2.78223325e-02 -1.71949025e-02 -5.01214415e-... \n", - "795 [ 1.39164589e-02 -5.08313216e-02 -4.53360938e-... \n", - "861 [ 7.80681521e-03 -3.23560014e-02 -6.76454604e-... \n", - "1103 [ 4.18044440e-02 -4.28444222e-02 -6.26875088e-... \n", - "1241 [ 7.74183637e-03 -6.50701150e-02 -2.13856809e-... \n", - "1729 [ 3.15782428e-02 -1.23979878e-02 -3.70296165e-... \n", - "2167 [ 9.87241510e-03 -1.81103535e-02 -4.17162031e-... \n", - "2219 [ 2.56749280e-02 -4.92163002e-02 -5.67202382e-... \n", - "2392 [ 2.34611966e-02 -4.74611111e-02 -3.59710641e-... \n", - "2528 [ 1.90760177e-02 -4.90266569e-02 -5.60806654e-... \n", - "2737 [ 1.81887485e-02 -8.74284655e-03 -2.73009986e-... \n", - "2859 [ 3.52482982e-02 -3.30757573e-02 -4.48422395e-... \n", - "3439 [ 3.40348878e-03 -2.72301212e-02 -2.03482248e-... \n", - "3738 [ 0.01422119 -0.01114973 -0.04438976 -0.024421... \n", - "3805 [ 1.08179580e-02 -3.44337188e-02 -5.08812033e-... \n", - "3915 [-7.23852217e-03 -4.69538383e-02 -5.60489520e-... \n", - "3917 [-8.92711710e-03 -4.49132621e-02 -4.29662578e-... \n", - "4281 [-1.69487391e-02 -1.89835522e-02 -3.80971469e-... \n", - "4470 [ 1.28689921e-02 -3.25881056e-02 -6.53645024e-... \n", - "4915 [ 5.19403480e-02 -7.32436478e-02 -4.60561663e-... \n", - "4928 [-4.43694415e-03 -3.66776163e-04 -9.08496231e-... \n", - "5338 [ 2.19908613e-03 -3.93951498e-02 -6.52823672e-... \n", - "5582 [ 2.86326781e-02 -4.89189997e-02 -8.68150592e-... \n", - "6386 [ 3.33276950e-02 1.53224478e-02 -1.89354066e-... \n", - "6956 [ 1.47060463e-02 -3.36431377e-02 -6.56675845e-... \n", + " ml_generate_embedding_result \\\n", + "415 [ 2.56774724e-02 -1.06168222e-02 3.06945704e-... \n", + "596 [ 5.90653270e-02 -9.31344274e-03 -7.12460047e-... \n", + "706 [ 0.01298233 0.00130001 0.01800315 0.037078... \n", + "804 [-1.39777679e-02 1.68943349e-02 5.53999236e-... \n", + "861 [ 2.33309343e-02 -2.36528926e-03 3.37129943e-... \n", + "1030 [ 0.06060313 -0.06495965 -0.03605044 -0.028016... \n", + "1582 [ 0.01255985 -0.01652482 -0.02638046 0.036858... \n", + "1600 [ 5.13355099e-02 4.01246967e-03 5.72342947e-... \n", + "2060 [ 6.44792162e-04 4.95899878e-02 4.67925966e-... \n", + "2283 [ 4.71848622e-02 -8.68239347e-03 5.80501892e-... \n", + "2421 [-2.90394691e-03 -1.81679502e-02 -7.99657404e-... \n", + "2422 [-6.70500053e-03 1.51133696e-02 4.94448021e-... \n", + "2658 [ 6.70989677e-02 -3.53626162e-02 1.08648362e-... \n", + "2883 [-1.28255319e-02 -1.89735275e-02 5.68657108e-... \n", + "2951 [ 3.23301251e-03 -2.61142217e-02 1.31891826e-... \n", + "2992 [-2.22910382e-03 -1.07050659e-02 4.74211425e-... \n", + "3969 [ 1.58297736e-02 3.01055871e-02 5.60088176e-... \n", + "4087 [ 1.99207035e-03 -7.62321474e-03 7.92114343e-... \n", + "4326 [ 3.44273262e-02 -3.36350128e-02 1.91939529e-... \n", + "4682 [ 2.47727744e-02 -1.77769139e-02 4.63737026e-... \n", + "5005 [ 2.51834448e-02 -4.92606424e-02 -1.37688573e-... \n", + "5144 [ 3.26358266e-02 -3.67171178e-03 3.65621522e-... \n", + "6090 [ 2.47520711e-02 1.09149124e-02 1.35175223e-... \n", + "6449 [ 1.86854266e-02 1.31238240e-03 -4.96791191e-... \n", + "6486 [ 1.56347770e-02 2.23377198e-02 -1.32683543e-... \n", "\n", - " statistics ml_embed_text_status \\\n", - "782 {\"token_count\":121,\"truncated\":false} \n", - "795 {\"token_count\":141,\"truncated\":false} \n", - "861 {\"token_count\":160,\"truncated\":false} \n", - "1103 {\"token_count\":31,\"truncated\":false} \n", - "1241 {\"token_count\":23,\"truncated\":false} \n", - "1729 {\"token_count\":382,\"truncated\":false} \n", - "2167 {\"token_count\":556,\"truncated\":false} \n", - "2219 {\"token_count\":196,\"truncated\":false} \n", - "2392 {\"token_count\":641,\"truncated\":false} \n", - "2528 {\"token_count\":176,\"truncated\":false} \n", - "2737 {\"token_count\":230,\"truncated\":false} \n", - "2859 {\"token_count\":238,\"truncated\":false} \n", - "3439 {\"token_count\":197,\"truncated\":false} \n", - "3738 {\"token_count\":160,\"truncated\":false} \n", - "3805 {\"token_count\":477,\"truncated\":false} \n", - "3915 {\"token_count\":116,\"truncated\":false} \n", - "3917 {\"token_count\":71,\"truncated\":false} \n", - "4281 {\"token_count\":130,\"truncated\":false} \n", - "4470 {\"token_count\":200,\"truncated\":false} \n", - "4915 {\"token_count\":23,\"truncated\":false} \n", - "4928 {\"token_count\":83,\"truncated\":false} \n", - "5338 {\"token_count\":1279,\"truncated\":false} \n", - "5582 {\"token_count\":396,\"truncated\":false} \n", - "6386 {\"token_count\":79,\"truncated\":false} \n", - "6956 {\"token_count\":194,\"truncated\":false} \n", + " ml_generate_embedding_statistics ml_generate_embedding_status \\\n", + "415 {\"token_count\":171,\"truncated\":false} \n", + "596 {\"token_count\":668,\"truncated\":false} \n", + "706 {\"token_count\":252,\"truncated\":false} \n", + "804 {\"token_count\":412,\"truncated\":false} \n", + "861 {\"token_count\":160,\"truncated\":false} \n", + "1030 {\"token_count\":298,\"truncated\":false} \n", + "1582 {\"token_count\":814,\"truncated\":false} \n", + "1600 {\"token_count\":653,\"truncated\":false} \n", + "2060 {\"token_count\":136,\"truncated\":false} \n", + "2283 {\"token_count\":478,\"truncated\":false} \n", + "2421 {\"token_count\":389,\"truncated\":false} \n", + "2422 {\"token_count\":124,\"truncated\":false} \n", + "2658 {\"token_count\":762,\"truncated\":false} \n", + "2883 {\"token_count\":71,\"truncated\":false} \n", + "2951 {\"token_count\":95,\"truncated\":false} \n", + "2992 {\"token_count\":407,\"truncated\":false} \n", + "3969 {\"token_count\":287,\"truncated\":false} \n", + "4087 {\"token_count\":88,\"truncated\":false} \n", + "4326 {\"token_count\":52,\"truncated\":false} \n", + "4682 {\"token_count\":284,\"truncated\":false} \n", + "5005 {\"token_count\":17,\"truncated\":false} \n", + "5144 {\"token_count\":105,\"truncated\":false} \n", + "6090 {\"token_count\":545,\"truncated\":false} \n", + "6449 {\"token_count\":104,\"truncated\":false} \n", + "6486 {\"token_count\":211,\"truncated\":false} \n", "\n", " content \n", - "782 I 've sent multiple letters to this agency abo... \n", - "795 I receive social security XXXX funds in my XXX... \n", + "415 DEPT OF EDUCATION/XXXX is stating I was late ... \n", + "596 I alerted my credit card company XX/XX/2017 th... \n", + "706 Sallie mae is corrupt. \n", + "I have tried to talk t... \n", + "804 In accordance with the Fair Credit Reporting a... \n", "861 Hello, My name is XXXX XXXX XXXX. I have a pro... \n", - "1103 The debt occurred more than 7 years in the pas... \n", - "1241 UNAUTHORIZED CREDIT REPORTING NO ACCOUNT TO VA... \n", - "1729 XXXX on XXXX XX/XX/2021 I have Mr. Cooper mort... \n", - "2167 This is the third such complaint I have submit... \n", - "2219 Found and add online for a Prepaid Credit card... \n", - "2392 I am furnishing this complaint against Fed Loa... \n", - "2528 Despite multiple written requests, the unverif... \n", - "2737 After unsatisfying communication in the messag... \n", - "2859 Good Morning. My name is XXXX XXXX. My account... \n", - "3439 I have ongoing disputes that are preventing me... \n", - "3738 I had a loan with national Collegiate Trust. i... \n", - "3805 Hi I am submitting this XXXX XXXX this isn't a... \n", - "3915 portfolio is showin on my credit report with a... \n", - "3917 the company shared my information with another... \n", - "4281 I tried to submit a teacher loan forgiveness a... \n", - "4470 in accordance with the Fair Credit Reporting a... \n", - "4915 XXXX XXXX did not give me a receipt or a copy ... \n", - "4928 This company has filed a civil suit during a g... \n", - "5338 My credit report contains errors that is keepi... \n", - "5582 Coast Professional, XXXX, LA contacted me by m... \n", - "6386 Cares act refund requested in XXXX, called mul... \n", - "6956 n accordance with the Fair Credit Reporting ac... \n", + "1030 Hello, I would like to complain about PayPal H... \n", + "1582 Transunion is listing personal information ( n... \n", + "1600 On XX/XX/XXXX, I called Citizen Bank at XXXX t... \n", + "2060 Theses names are the known liars that I have s... \n", + "2283 My house was hit by a tree XX/XX/2018. My insu... \n", + "2421 I became aware of a credit inquiry on my XXXX... \n", + "2422 I have sent numerous letters, police reports a... \n", + "2658 This letter concerns two disputes ( chargeback... \n", + "2883 It is very frustrating that this has been goin... \n", + "2951 I, the consumer, in fact, have a right to priv... \n", + "2992 XXXX XXXX XXXX should not be reporting to Expe... \n", + "3969 DEAR CFPB ; XXXX ; XXXX ; AND TRANSUNION ; SEE... \n", + "4087 This debt was from my identity being stolen I ... \n", + "4326 The items that are reflected on my credit repo... \n", + "4682 I filed for chapter XXXX bankruptcy on XXXX... \n", + "5005 There are 2 Inquires on my credit report that ... \n", + "5144 My mortgage was sold from XXXX XXXX to freed... \n", + "6090 On XX/XX/XXXX this company received certified... \n", + "6449 After hours on the phone with multiple agents,... \n", + "6486 On XX/XX/2019 two charges one for XXXX and one... \n", "...\n", "\n", "[10000 rows x 4 columns]" ] }, - "execution_count": 11, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "successful_rows = (\n", - " (predicted_embeddings[\"ml_embed_text_status\"] == \"\")\n", + " (predicted_embeddings[\"ml_generate_embedding_status\"] == \"\")\n", " # Series.str.len() gives the length of an array.\n", " # See: https://stackoverflow.com/a/41340543/101923\n", - " & (predicted_embeddings[\"text_embedding\"].str.len() != 0)\n", + " & (predicted_embeddings[\"ml_generate_embedding_result\"].str.len() != 0)\n", ")\n", "predicted_embeddings = predicted_embeddings[successful_rows]\n", "predicted_embeddings\n" @@ -1185,7 +1185,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": { "id": "AhNTnEC5FRz2" }, @@ -1206,7 +1206,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "metadata": { "id": "6poSxh-fGJF7" }, @@ -1214,19 +1214,7 @@ { "data": { "text/html": [ - "Query job 46da96c8-c454-44d3-8b98-0e1bfeca69dd is DONE. 61.7 MB processed. Open Job" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Query job dc6fe7cf-329d-4274-aff9-0b8dc2e56230 is DONE. 0 Bytes processed. Open Job" + "Query job 3e01544b-9bc2-4298-8f7d-1e9f186ac72f is DONE. 61.6 MB processed. Open Job" ], "text/plain": [ "" @@ -1238,7 +1226,7 @@ { "data": { "text/html": [ - "Query job 8c25a14a-af39-40a9-add5-de0f14bce9ce is DONE. 72.4 MB processed. Open Job" + "Query job 8aca135c-65c3-4804-9c25-0d47fad0beb5 is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -1250,7 +1238,7 @@ { "data": { "text/html": [ - "Query job 0a6a45b2-7c35-4be8-91a3-391a5381553e is DONE. 80.0 kB processed. Open Job" + "Query job 0b15374d-d34b-4f2e-8a48-b77d7e7757ab is DONE. 72.7 MB processed. Open Job" ], "text/plain": [ "" @@ -1262,7 +1250,7 @@ { "data": { "text/html": [ - "Query job b5e00edd-de21-40c1-bf61-9f1affdea318 is DONE. 73.1 MB processed. Open Job" + "Query job fed90511-76f8-4aec-a988-e1a4dab711b0 is DONE. 73.2 MB processed. Open Job" ], "text/plain": [ "" @@ -1294,57 +1282,57 @@ " \n", " CENTROID_ID\n", " NEAREST_CENTROIDS_DISTANCE\n", - " text_embedding\n", - " statistics\n", - " ml_embed_text_status\n", + " ml_generate_embedding_result\n", + " ml_generate_embedding_statistics\n", + " ml_generate_embedding_status\n", " content\n", " \n", " \n", " \n", " \n", - " 1094645\n", + " 3172121\n", " 1\n", - " [{'CENTROID_ID': 1, 'DISTANCE': 0.530282685572...\n", - " [ 7.32792774e-03 -7.59598315e-02 -4.49591577e-...\n", + " [{'CENTROID_ID': 1, 'DISTANCE': 0.756634267893...\n", + " [ 3.18095312e-02 -3.54472063e-02 -7.13569671e-...\n", " {\"token_count\":10,\"truncated\":false}\n", " \n", - " I do not have an account with this creditor\n", + " Company did not provide verification and detai...\n", " \n", " \n", - " 3372485\n", + " 2137420\n", " 1\n", - " [{'CENTROID_ID': 1, 'DISTANCE': 0.643931578310...\n", - " [-0.00161087 -0.04956109 -0.07371692 -0.057822...\n", - " {\"token_count\":10,\"truncated\":false}\n", + " [{'CENTROID_ID': 1, 'DISTANCE': 0.606628249825...\n", + " [ 1.91578846e-02 5.55988774e-02 8.88887007e-...\n", + " {\"token_count\":100,\"truncated\":false}\n", " \n", - " Hard inquiries in my report that I do not reco...\n", + " I have already filed a dispute with Consumer A...\n", " \n", " \n", - " 2669308\n", + " 2350775\n", " 1\n", - " [{'CENTROID_ID': 1, 'DISTANCE': 0.599709344244...\n", - " [ 5.50241247e-02 -1.50039541e-02 -2.08624080e-...\n", + " [{'CENTROID_ID': 1, 'DISTANCE': 0.606676295233...\n", + " [ 2.25369893e-02 2.29400061e-02 -6.42273854e-...\n", " {\"token_count\":100,\"truncated\":false}\n", " \n", - " I purchase {$25.00} for stock on the cash app ...\n", + " I informed Central Financial Control & provide...\n", " \n", " \n", - " 133816\n", + " 2904146\n", " 1\n", - " [{'CENTROID_ID': 1, 'DISTANCE': 0.618444281124...\n", - " [ 1.77251529e-02 -3.89547497e-02 -3.82236368e-...\n", + " [{'CENTROID_ID': 1, 'DISTANCE': 0.596729348974...\n", + " [ 9.35115516e-02 4.27814946e-03 4.62085977e-...\n", " {\"token_count\":100,\"truncated\":false}\n", " \n", - " BBVA fees I am in The Texas snow storm where I...\n", + " I received a letter from a collections agency ...\n", " \n", " \n", - " 2697156\n", + " 1075571\n", " 1\n", - " [{'CENTROID_ID': 1, 'DISTANCE': 0.500398902102...\n", - " [-1.28429877e-02 -1.85956229e-02 -3.93197313e-...\n", - " {\"token_count\":1011,\"truncated\":false}\n", + " [{'CENTROID_ID': 1, 'DISTANCE': 0.453806107968...\n", + " [-1.93953840e-03 -5.80236455e-03 8.49655271e-...\n", + " {\"token_count\":100,\"truncated\":false}\n", " \n", - " After paying on my student loan for years, I o...\n", + " I have not done business with this company, i ...\n", " \n", " \n", "\n", @@ -1352,42 +1340,42 @@ ], "text/plain": [ " CENTROID_ID NEAREST_CENTROIDS_DISTANCE \\\n", - "1094645 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.530282685572... \n", - "3372485 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.643931578310... \n", - "2669308 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.599709344244... \n", - "133816 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.618444281124... \n", - "2697156 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.500398902102... \n", + "3172121 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.756634267893... \n", + "2137420 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.606628249825... \n", + "2350775 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.606676295233... \n", + "2904146 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.596729348974... \n", + "1075571 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.453806107968... \n", "\n", - " text_embedding \\\n", - "1094645 [ 7.32792774e-03 -7.59598315e-02 -4.49591577e-... \n", - "3372485 [-0.00161087 -0.04956109 -0.07371692 -0.057822... \n", - "2669308 [ 5.50241247e-02 -1.50039541e-02 -2.08624080e-... \n", - "133816 [ 1.77251529e-02 -3.89547497e-02 -3.82236368e-... \n", - "2697156 [-1.28429877e-02 -1.85956229e-02 -3.93197313e-... \n", + " ml_generate_embedding_result \\\n", + "3172121 [ 3.18095312e-02 -3.54472063e-02 -7.13569671e-... \n", + "2137420 [ 1.91578846e-02 5.55988774e-02 8.88887007e-... \n", + "2350775 [ 2.25369893e-02 2.29400061e-02 -6.42273854e-... \n", + "2904146 [ 9.35115516e-02 4.27814946e-03 4.62085977e-... \n", + "1075571 [-1.93953840e-03 -5.80236455e-03 8.49655271e-... \n", "\n", - " statistics ml_embed_text_status \\\n", - "1094645 {\"token_count\":10,\"truncated\":false} \n", - "3372485 {\"token_count\":10,\"truncated\":false} \n", - "2669308 {\"token_count\":100,\"truncated\":false} \n", - "133816 {\"token_count\":100,\"truncated\":false} \n", - "2697156 {\"token_count\":1011,\"truncated\":false} \n", + " ml_generate_embedding_statistics ml_generate_embedding_status \\\n", + "3172121 {\"token_count\":10,\"truncated\":false} \n", + "2137420 {\"token_count\":100,\"truncated\":false} \n", + "2350775 {\"token_count\":100,\"truncated\":false} \n", + "2904146 {\"token_count\":100,\"truncated\":false} \n", + "1075571 {\"token_count\":100,\"truncated\":false} \n", "\n", " content \n", - "1094645 I do not have an account with this creditor \n", - "3372485 Hard inquiries in my report that I do not reco... \n", - "2669308 I purchase {$25.00} for stock on the cash app ... \n", - "133816 BBVA fees I am in The Texas snow storm where I... \n", - "2697156 After paying on my student loan for years, I o... " + "3172121 Company did not provide verification and detai... \n", + "2137420 I have already filed a dispute with Consumer A... \n", + "2350775 I informed Central Financial Control & provide... \n", + "2904146 I received a letter from a collections agency ... \n", + "1075571 I have not done business with this company, i ... " ] }, - "execution_count": 13, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Use KMeans clustering to calculate our groups. Will take ~3 minutes.\n", - "cluster_model.fit(predicted_embeddings[[\"text_embedding\"]])\n", + "cluster_model.fit(predicted_embeddings[[\"ml_generate_embedding_result\"]])\n", "clustered_result = cluster_model.predict(predicted_embeddings)\n", "# Notice the CENTROID_ID column, which is the ID number of the group that\n", "# each complaint belongs to.\n", @@ -1422,7 +1410,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 16, "metadata": { "id": "2E7wXM_jGqo6" }, @@ -1430,7 +1418,7 @@ { "data": { "text/html": [ - "Query job 8d4f24d6-dc37-47d3-8b4d-4505a55c4ccc is DONE. 10.4 MB processed. Open Job" + "Query job d6c61334-255f-43fe-9a8f-9fbf6cdcb2be is DONE. 10.5 MB processed. Open Job" ], "text/plain": [ "" @@ -1442,7 +1430,7 @@ { "data": { "text/html": [ - "Query job c1f979ee-1f5d-4f37-8595-ee2167c06e63 is DONE. 10.4 MB processed. Open Job" + "Query job 03a12383-6752-45ca-9b01-36eecc74fb8a is DONE. 10.5 MB processed. Open Job" ], "text/plain": [ "" @@ -1468,7 +1456,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 17, "metadata": { "id": "ZNDiueI9IP5e" }, @@ -1478,38 +1466,45 @@ "output_type": "stream", "text": [ "comment list 1:\n", - "1. This is the third such complaint I have submitted regarding the same type of issue over the past 12-18 months. \n", + "1. This debt was from my identity being stolen I didnt open any account that resulted in this collection i have completed a police report which can be verified with the XXXX police @ XXXX report # XXXX and i have a notarized identity theft affidavit from ftc please remove this off of my credit and close my file ASAP\n", + "2. On XX/XX/XXXX this company received certified mail asking for validation of debt. On XX/XX/XXXX the company still did not validate debt owed and they did not mark the debt disputed by XX/XX/XXXX through the major credit reporting bureaus. This is a violation of the FDCPA and FCRA. I did send a second letter which the company received on XX/XX/XXXX . A lady from the company called and talked to me about the debt on XX/XX/XXXX but again did not have the credit bureaus mark the item as disputed. The company still violated the laws. Section [ 15 U.S.C. 1681s-2 ] ( 3 ) duty to provide notice of dispute. If the completeness or accuracy of any information furnished by any person to any consumer reporting agency is disputed to such person by a consumer, the person may not furnish the information to any consumer reporting agency without notice that such information is disputed. ( B ) ti me of notice! The notice required under sub paragraph ( A ) shall be provided to the customer prior to, or no later than 30 days after, furnishing the negative information to a consumer reporting agency described in section 603 ( p ). This company violated the state laws. I received no information until XX/XX/XXXX . Therefore by law the company should have the item removed from the credit agencies such as transunion and XXXX . I tried to call the company back about the laws that was broken and left my name no return call. The copy of my credit reports are below and as you can see the items was n't marked disputed. XXXX is marked disputed because on XX/XX/XXXX I myself disputed the information with the credit bureau. The lady stated they did n't receive my dispute letter until XX/XX/XXXX . Included is certified mail reciepts with date, time stamp, and signature of the person who signed for the certified mail on XX/XX/XXXX and XX/XX/XXXX . So again the company violated the laws and I have all the proof. If I have a contract with this company please send to me by mail a contract bearing my signature of the contract.\n", + "3. On XX/XX/2022, Pioneer Credit Recovery of XXXX, NY identified an alleged debt, which I do not owe. \n", + "\n", + "On XX/XX/2022, I wrote a dispute letter to Pioneer, requesting that they stop communication with me, record my dispute, and provide verification of the debt if they believe otherwise. \n", "\n", - "On XX/XX/XXXX, my co-signed account was flagged by Navient as past due. The XXXX payment was mailed priority on XX/XX/XXXX and received by Navient on XX/XX/XXXX and delivered to \" an individual '' per the post office. \n", - "I called Navient on XX/XX/XXXX to talk to them about why my account was flagged since they received the payment long before the due date. The payment is sent via XXXX money orders under the same cover. The XXXX money order ( {$160.00} ) was cashed on XX/XX/XXXX per XXXX XXXX, the second money order ( {$250.00} ) which was sent in the same priority envelope and received the same time has not been cashed. \n", - "When I called the customer service agent at Navient she told me that my account was past due and wanted me to send another payment. When I explained that they had received the payment she argued with me that if they received it, the payment would have been cashed. I asked to speak with a supervisor. \n", - "I was connected with supervisor, XXXX XXXX, who asked that I send copies of the payments to him so he could submit for a missing payment request. I faxed the proof on XXXX @ XXXX with a receipt acknowledgment. \n", - "On XX/XX/XXXX, the payment was still not applied to the account. When I called XXXX XXXX, the money order was still not cashed. I called Navient again. Because of an argumentative customer service rep again, I requested to speak with a supervisor. I spoke with XXXX XXXX. She states that payment was not received. I explained the situation again. She said the missing payment request had not been submitted. She had me upload the documents so she could request a missing payment search. I have done everything I have been asked. \n", - "This issues continues to occur. For approximately 6 months at a time, Navient gets this right then there are issues again and again. I have submitted CFPB complaints about this in the past. \n", - "I was told it would take 5-7 business days to be resolved.\n", - "2. I tried to submit a teacher loan forgiveness application and they lost my application. I submitted the application again after talking to XXXX people at Nelnet. Then when I called back to check on the status they told me that I needed to submit another form for a different school that I worked at. I had already called previously and asked if I needed to submit any other papers and they told me \" no ''. Therefore, I have been paying my loan for 5 months and it should be forgiven. I am still paying my loan because I have to wait for them to approve the new forgiveness paperwork.\n", - "3. PayPal continues to overcharge their currency rate. It it always inflated in their favor so that they collect the difference.\n", - "4. My government feeds are not coming on to my card and I need the problem fix today\n", - "5. Paypal Credit 's website is confusing and does not accurately reflect all activity. When speaking with representatives like XXXX, it 's confusing to them and they can barely follow along with it. I am not receiving statements, which proves it difficult to determine the due dates on the accounts. The Reps are n't knowledgeable and the only thing they repeat to you is the amount due on the screen.\n", + "Pioneer has not responded with verification, but has attempted to collect the debt since then by phone ( XX/XX/2022 ) and mail ( XX/XX/2022 ).\n", + "4. Disputed with the company on several occasions and they still havent provided proof in a timely manner. The FCRA gives the company 30 days to respond. I have not gotten a response.\n", + "5. I am not aware of this XXXX XXXX XXXX XXXX XXXX , XXXX balance. I have never seen anything dealing with this lender. Also, I have been threated that in 30 days they will seek to make a judgement on debt that does not belong to me. I understand that they are looking to offer me a settlement. However, I do not believe the validity of such debt accusation. Furthermore, I will not be limited to the action of court threats when I did not receive any notice of debt based on communication. The amount is {$880.00} from MBNA which was acquired by Bank of America in 2006. I do not claim debt.\n", "\n", "comment list 2:\n", - "1. XXXX on XXXX XX/XX/2021 I have Mr. Cooper mortgage for years now. On XXXX XXXX XXXX I made an additional payment of $ XXXX towards my principal. More than 4 days - it's not reflected in the Amortization schedule ( Amortization schedule is not even visible ). Even after so many additional principal payments, Payoff calculator is way off and it still shows XXXX maturity date while it should start showing something like XX/XX/XXXX/XX/XX/XXXX as the initial date. There are lots of discrepancies on their website to reflect the balance and total. When called customer service on Friday and also chatted - i was assured of fixing this - but no fix till this point of time. Customer service there is a long wait. Auto bot doesn't let customers talk to the real person. \n", - "Finally after a lots of follow-up I got the amortization schedule via email but it is not reflecting another additional principal payment of {$4700.00} made on XX/XX/2021. \n", - "I did numerous chats and phone calls. Why i should depend on inefficinent humans to see my revised amortization schedule? \n", - "Why the online amortization schedule is not visible now? \n", + "1. My name is XXXX XXXX XXXX. This issue with a Loan Till Payday account was previously reported to you for collection practices, etc. I had a pay day loan in 2013. At the time, I banked with XXXX XXXX, who advised me that pay day loans are not good, and in the end XXXX closed my bank account, it was involuntary. In the interim, I made payments to the agency. XXXX and XXXX were the primary contacts. On the last payment, due to the fact that I told him I was coming in to pay cash, and they withdrew the funds, electronically, my account was affected. XXXX advised me that the payment made was the last payment and the other ( which was primarily interest remaining ) would be charged off. XXXX later called me and advised that XXXX was not authorized to make that decision and demanded the payment. I do n't understand how one person can cancel the arrangements made by someone else. \n", + "\n", + "In the end, they sold my account. It was reported to you, and that creditor then stated no further collection activity would occur. \n", + "\n", + "Last week I began receiving calls from a collection agency, XXXX XXXX stating I would called for a civil deposition on this account. I do n't even know this agency. Later, I then received another call stating that I needed to hold, and after several clicks was connected to someone at a Mediaction service. I denied the owing the loan and stated it was paid. \n", + "\n", + "Today, I received a call from an outsource service courier about a missed appointment or hearing??? What?? I have no idea who these people are. I called Loan Till Payday and was advised the loan was sold and I needed to settle with the new company. So, does this mean they are continuing to attempt to collect {$200.00}. \n", "\n", - "Worst thing, after turning on escrow account, there is no transparency. Amount of escrow account is not in sync with all the sections of my online account. It seems that there are too many internal buggy systems in Mr. Cooper and everybody from different IT department is showing a different $ $ figure at various places. \n", - "Highly inefficient organization.\n", - "2. I had a loan with national Collegiate Trust. i did n't make any payments on the loan for 6 years and due to the \" statute to limitations '' the loan collector should not be contacting me any more, by they still are in both forms phone call 's and letters. I am also trying to buy a house for my family and found out that i can not get a USDA loan for the house because National Collegiate Trust has filled the loan as a government delinquent loan. The problem with that is the loan is a private loan not a federal/Government loan. due to the way they filled the lion on my credit report i am not able to buy my first home.\n", - "3. Cares act refund requested in XXXX, called multiple times given conflicting information every time. Its now XXXX and looking like its going to be even longer potentially months before the refund arrives. Blatant disregard for the cares act signed into law and terrible customer service. This company is ill suited to service the federal government contract it enjoys.\n", - "4. In an attempt to pay off my loan with Ocwen, I sent XXXX large payments to the institution on XXXX XXXX that would have decreased my loan amount to within a couple of hundred dollars ( you can not send XXXX payment that exceeds 80 % of the loan balance so I broke the payments up into XXXX XXXX. I scheduled the payments for the same day because I did not want any interest to accrue. After a few days, I noticed where the XXXX payments were withdrawn from my bank but that my Ocwen balance had not changed to reflect my XXXX payments. I called Ocwen on XXXX XXXX to ask about the payment. The Ocwen rep explained that because the XXXX payments were scheduled for the same day, that it exceeded the 80 % max limit. I asked Ocwen to return my XXXX payments. The rep said it would take XXXX business days. I called Ocwen back on XXXX XXXX when my funds had not been returned to my bank account. I gave them my account number and routing number to my bank, information that I am sure they already had since my monthly mortgage payments are debited from my account. They asked me to wait a couple more days. I called Ocwen back on XXXX XXXX. The rep asked me to be patient and assured me that the funds would be returned by XXXX XXXX. There were no funds returned to my account on XXXX XXXX. I called Ocwen again. I was asked to wait 40+ minutes on hold while the Ocwen rep put me in touch with an escalation specialist ( ES ). The ES told me that my funds would be reurned within XXXX hours and that he had put it in as a priority because I had called so often. There were no funds on XXXX XXXX. I called Ocwen again to see if there was a problem. There was. After speaking to a rep and another ES, I was told that my funds could not be returned to me! The ES said that he did not see my funds! He claimed to put me in touch with someone who XXXX be able to address my concerns. So that 's where I am now, on hold waiting to speak to yet another person! This is a significant amount of money and I fear that Ocwen is trying to get away with keeping my XXXX payments!\n", - "5. In XX/XX/XXXX we received our first bill from XXXX XXXX for XXXX. ( attached ) We promptly paid the bill. Again, in XX/XX/XXXX we paid our second bill for XXXX. Again, both on time. Then when XX/XX/XXXX statement came we were billed XXXX. My husband called XXXX XXXX to find out what the issue was. We were told there was a loan shortfall caused by us paying XXXX ( the amount we were billed ) and that our loan was re-amoritized. I argued I had a fixed rate loan, had never missed a payment, had never made a late payment, and paid exactly what was billed. Well, after double checking my promissory note ( attached ) and TILA ( attached ) I was to always be billed XXXX. XXXX changed the monthly payment and thus caused a shortfall. When I told them this information they refused to correct the shortfall. Not only did they not correct the mistake they refused to return my calls or emails to provide answers for these issues. Around 90 days later and this issue still exists and they still refuse to answer. Additionally I offered to make up the shortfall myself by offering a check for the missing amount and they just applied it to interest. Thus the shortfall still exists. The extra amount would have gone directly to principal. Additionally, in XX/XX/XXXX we made an extra payment amount on top of the monthly payment. This was made all at the same time. The letter we sent contained directions to only apply extra payments beyond the monthly requirement be applied to principal and not the next months payment. This was ignored. Then XXXX \" a higher up '' as she calls herself lied and told me it went to principal when clearly it did not. We requested this be fixed and it has not been done. No one has offered to fix anything either. A certified letter is attached that I mailed. I also made dozens of calls.\n", + "I attempted to call the numbers, and now no one picks up just a voicemail. I called the supposed service courier and advised that their number was showing up as a spam/fraud number and that if they were a legitimate company then they should leave their name, location, a number ( not a voicemail ), and the case they are calling me about. I have not been served with any collection documents - why am I being threatened with a deposition??? \n", + "\n", + "Telephone number recently calling me : ( XXXX ) XXXX. \n", + "\n", + "Please help.\n", + "2. I receive 2 or 3 phone calls every day since early XXXX, my references receive calls. I will gladly satisfy this debt however even after 1st telling them the calls haven't stopped as though they are going to intimidate me. If the calls stopped for just 3 or 4 days I would satisfy my obligation but not because they keep calling me as well as my references.\n", + "3. Last month I received a phone call for my husband from XXXX XXXX XXXX saying he owed money and if I did not pay today it would be sent to litigation. The debt was Wachovia/wells Fargo, and account that we have never had. I had my husband call to get more information and they became very nasty with him. I called back asking for documentation on the debt because i did not think it was our debt and they became aggressive. They did email my husband something saying how much he owed, and I called back and asked to be emailed a copy, and the dollar amounts did not match. I called Wells Fargo and went over the above and verified that we have never had an account with them and I sent them the emails the XXXX sent to us and they started a fraud investigation. Yesterday I received another collections letter in the mail from the. Still trying to collect this debt. These people have my husbands full social security number ( we did not give it to them )\n", + "4. A company call XXXX XXXX XXXX came onto my private property on XX/XX/2018 and stole my automobile. I did receive any type of notice saying they collecting on a debt. If they take or threaten to take any nonjudicial action ( i.e, without a court order ) to repossess property when there is no present right to possession of the property they is in violation. l did not receive any type of notice asking if they can enter onto my private property and steal my private automobile.\n", + "5. Navient financial continues to send me erroneous debt collection emails. I have repeatedly asked them to remove my email address and to cease all communication with me. \n", + "I have no relationship with Navient and their continued threatening email is very unsettling. \n", + "\n", + "I just want their erroneous threats to stop. \n", + "\n", + "Below is the latest email I have received from them : Last Day to call this office XXXX by XXXX Regards, XXXX XXXX Team Lead Specialist Charge off Unit XXXX XXXX\n", "\n" ] } ], "source": [ - "# Build plain-text prompts to send to PaLM 2. Use only 5 complaints from each group.\n", + "# Build plain-text prompts to send to Gemini. Use only 5 complaints from each group.\n", "prompt1 = 'comment list 1:\\n'\n", "for i in range(5):\n", " prompt1 += str(i + 1) + '. ' + \\\n", @@ -1526,7 +1521,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 18, "metadata": { "id": "BfHGJLirzSvH" }, @@ -1537,37 +1532,44 @@ "text": [ "Please highlight the most obvious difference between the two lists of comments:\n", "comment list 1:\n", - "1. This is the third such complaint I have submitted regarding the same type of issue over the past 12-18 months. \n", + "1. This debt was from my identity being stolen I didnt open any account that resulted in this collection i have completed a police report which can be verified with the XXXX police @ XXXX report # XXXX and i have a notarized identity theft affidavit from ftc please remove this off of my credit and close my file ASAP\n", + "2. On XX/XX/XXXX this company received certified mail asking for validation of debt. On XX/XX/XXXX the company still did not validate debt owed and they did not mark the debt disputed by XX/XX/XXXX through the major credit reporting bureaus. This is a violation of the FDCPA and FCRA. I did send a second letter which the company received on XX/XX/XXXX . A lady from the company called and talked to me about the debt on XX/XX/XXXX but again did not have the credit bureaus mark the item as disputed. The company still violated the laws. Section [ 15 U.S.C. 1681s-2 ] ( 3 ) duty to provide notice of dispute. If the completeness or accuracy of any information furnished by any person to any consumer reporting agency is disputed to such person by a consumer, the person may not furnish the information to any consumer reporting agency without notice that such information is disputed. ( B ) ti me of notice! The notice required under sub paragraph ( A ) shall be provided to the customer prior to, or no later than 30 days after, furnishing the negative information to a consumer reporting agency described in section 603 ( p ). This company violated the state laws. I received no information until XX/XX/XXXX . Therefore by law the company should have the item removed from the credit agencies such as transunion and XXXX . I tried to call the company back about the laws that was broken and left my name no return call. The copy of my credit reports are below and as you can see the items was n't marked disputed. XXXX is marked disputed because on XX/XX/XXXX I myself disputed the information with the credit bureau. The lady stated they did n't receive my dispute letter until XX/XX/XXXX . Included is certified mail reciepts with date, time stamp, and signature of the person who signed for the certified mail on XX/XX/XXXX and XX/XX/XXXX . So again the company violated the laws and I have all the proof. If I have a contract with this company please send to me by mail a contract bearing my signature of the contract.\n", + "3. On XX/XX/2022, Pioneer Credit Recovery of XXXX, NY identified an alleged debt, which I do not owe. \n", "\n", - "On XX/XX/XXXX, my co-signed account was flagged by Navient as past due. The XXXX payment was mailed priority on XX/XX/XXXX and received by Navient on XX/XX/XXXX and delivered to \" an individual '' per the post office. \n", - "I called Navient on XX/XX/XXXX to talk to them about why my account was flagged since they received the payment long before the due date. The payment is sent via XXXX money orders under the same cover. The XXXX money order ( {$160.00} ) was cashed on XX/XX/XXXX per XXXX XXXX, the second money order ( {$250.00} ) which was sent in the same priority envelope and received the same time has not been cashed. \n", - "When I called the customer service agent at Navient she told me that my account was past due and wanted me to send another payment. When I explained that they had received the payment she argued with me that if they received it, the payment would have been cashed. I asked to speak with a supervisor. \n", - "I was connected with supervisor, XXXX XXXX, who asked that I send copies of the payments to him so he could submit for a missing payment request. I faxed the proof on XXXX @ XXXX with a receipt acknowledgment. \n", - "On XX/XX/XXXX, the payment was still not applied to the account. When I called XXXX XXXX, the money order was still not cashed. I called Navient again. Because of an argumentative customer service rep again, I requested to speak with a supervisor. I spoke with XXXX XXXX. She states that payment was not received. I explained the situation again. She said the missing payment request had not been submitted. She had me upload the documents so she could request a missing payment search. I have done everything I have been asked. \n", - "This issues continues to occur. For approximately 6 months at a time, Navient gets this right then there are issues again and again. I have submitted CFPB complaints about this in the past. \n", - "I was told it would take 5-7 business days to be resolved.\n", - "2. I tried to submit a teacher loan forgiveness application and they lost my application. I submitted the application again after talking to XXXX people at Nelnet. Then when I called back to check on the status they told me that I needed to submit another form for a different school that I worked at. I had already called previously and asked if I needed to submit any other papers and they told me \" no ''. Therefore, I have been paying my loan for 5 months and it should be forgiven. I am still paying my loan because I have to wait for them to approve the new forgiveness paperwork.\n", - "3. PayPal continues to overcharge their currency rate. It it always inflated in their favor so that they collect the difference.\n", - "4. My government feeds are not coming on to my card and I need the problem fix today\n", - "5. Paypal Credit 's website is confusing and does not accurately reflect all activity. When speaking with representatives like XXXX, it 's confusing to them and they can barely follow along with it. I am not receiving statements, which proves it difficult to determine the due dates on the accounts. The Reps are n't knowledgeable and the only thing they repeat to you is the amount due on the screen.\n", + "On XX/XX/2022, I wrote a dispute letter to Pioneer, requesting that they stop communication with me, record my dispute, and provide verification of the debt if they believe otherwise. \n", + "\n", + "Pioneer has not responded with verification, but has attempted to collect the debt since then by phone ( XX/XX/2022 ) and mail ( XX/XX/2022 ).\n", + "4. Disputed with the company on several occasions and they still havent provided proof in a timely manner. The FCRA gives the company 30 days to respond. I have not gotten a response.\n", + "5. I am not aware of this XXXX XXXX XXXX XXXX XXXX , XXXX balance. I have never seen anything dealing with this lender. Also, I have been threated that in 30 days they will seek to make a judgement on debt that does not belong to me. I understand that they are looking to offer me a settlement. However, I do not believe the validity of such debt accusation. Furthermore, I will not be limited to the action of court threats when I did not receive any notice of debt based on communication. The amount is {$880.00} from MBNA which was acquired by Bank of America in 2006. I do not claim debt.\n", "comment list 2:\n", - "1. XXXX on XXXX XX/XX/2021 I have Mr. Cooper mortgage for years now. On XXXX XXXX XXXX I made an additional payment of $ XXXX towards my principal. More than 4 days - it's not reflected in the Amortization schedule ( Amortization schedule is not even visible ). Even after so many additional principal payments, Payoff calculator is way off and it still shows XXXX maturity date while it should start showing something like XX/XX/XXXX/XX/XX/XXXX as the initial date. There are lots of discrepancies on their website to reflect the balance and total. When called customer service on Friday and also chatted - i was assured of fixing this - but no fix till this point of time. Customer service there is a long wait. Auto bot doesn't let customers talk to the real person. \n", - "Finally after a lots of follow-up I got the amortization schedule via email but it is not reflecting another additional principal payment of {$4700.00} made on XX/XX/2021. \n", - "I did numerous chats and phone calls. Why i should depend on inefficinent humans to see my revised amortization schedule? \n", - "Why the online amortization schedule is not visible now? \n", + "1. My name is XXXX XXXX XXXX. This issue with a Loan Till Payday account was previously reported to you for collection practices, etc. I had a pay day loan in 2013. At the time, I banked with XXXX XXXX, who advised me that pay day loans are not good, and in the end XXXX closed my bank account, it was involuntary. In the interim, I made payments to the agency. XXXX and XXXX were the primary contacts. On the last payment, due to the fact that I told him I was coming in to pay cash, and they withdrew the funds, electronically, my account was affected. XXXX advised me that the payment made was the last payment and the other ( which was primarily interest remaining ) would be charged off. XXXX later called me and advised that XXXX was not authorized to make that decision and demanded the payment. I do n't understand how one person can cancel the arrangements made by someone else. \n", + "\n", + "In the end, they sold my account. It was reported to you, and that creditor then stated no further collection activity would occur. \n", + "\n", + "Last week I began receiving calls from a collection agency, XXXX XXXX stating I would called for a civil deposition on this account. I do n't even know this agency. Later, I then received another call stating that I needed to hold, and after several clicks was connected to someone at a Mediaction service. I denied the owing the loan and stated it was paid. \n", + "\n", + "Today, I received a call from an outsource service courier about a missed appointment or hearing??? What?? I have no idea who these people are. I called Loan Till Payday and was advised the loan was sold and I needed to settle with the new company. So, does this mean they are continuing to attempt to collect {$200.00}. \n", + "\n", + "I attempted to call the numbers, and now no one picks up just a voicemail. I called the supposed service courier and advised that their number was showing up as a spam/fraud number and that if they were a legitimate company then they should leave their name, location, a number ( not a voicemail ), and the case they are calling me about. I have not been served with any collection documents - why am I being threatened with a deposition??? \n", + "\n", + "Telephone number recently calling me : ( XXXX ) XXXX. \n", + "\n", + "Please help.\n", + "2. I receive 2 or 3 phone calls every day since early XXXX, my references receive calls. I will gladly satisfy this debt however even after 1st telling them the calls haven't stopped as though they are going to intimidate me. If the calls stopped for just 3 or 4 days I would satisfy my obligation but not because they keep calling me as well as my references.\n", + "3. Last month I received a phone call for my husband from XXXX XXXX XXXX saying he owed money and if I did not pay today it would be sent to litigation. The debt was Wachovia/wells Fargo, and account that we have never had. I had my husband call to get more information and they became very nasty with him. I called back asking for documentation on the debt because i did not think it was our debt and they became aggressive. They did email my husband something saying how much he owed, and I called back and asked to be emailed a copy, and the dollar amounts did not match. I called Wells Fargo and went over the above and verified that we have never had an account with them and I sent them the emails the XXXX sent to us and they started a fraud investigation. Yesterday I received another collections letter in the mail from the. Still trying to collect this debt. These people have my husbands full social security number ( we did not give it to them )\n", + "4. A company call XXXX XXXX XXXX came onto my private property on XX/XX/2018 and stole my automobile. I did receive any type of notice saying they collecting on a debt. If they take or threaten to take any nonjudicial action ( i.e, without a court order ) to repossess property when there is no present right to possession of the property they is in violation. l did not receive any type of notice asking if they can enter onto my private property and steal my private automobile.\n", + "5. Navient financial continues to send me erroneous debt collection emails. I have repeatedly asked them to remove my email address and to cease all communication with me. \n", + "I have no relationship with Navient and their continued threatening email is very unsettling. \n", "\n", - "Worst thing, after turning on escrow account, there is no transparency. Amount of escrow account is not in sync with all the sections of my online account. It seems that there are too many internal buggy systems in Mr. Cooper and everybody from different IT department is showing a different $ $ figure at various places. \n", - "Highly inefficient organization.\n", - "2. I had a loan with national Collegiate Trust. i did n't make any payments on the loan for 6 years and due to the \" statute to limitations '' the loan collector should not be contacting me any more, by they still are in both forms phone call 's and letters. I am also trying to buy a house for my family and found out that i can not get a USDA loan for the house because National Collegiate Trust has filled the loan as a government delinquent loan. The problem with that is the loan is a private loan not a federal/Government loan. due to the way they filled the lion on my credit report i am not able to buy my first home.\n", - "3. Cares act refund requested in XXXX, called multiple times given conflicting information every time. Its now XXXX and looking like its going to be even longer potentially months before the refund arrives. Blatant disregard for the cares act signed into law and terrible customer service. This company is ill suited to service the federal government contract it enjoys.\n", - "4. In an attempt to pay off my loan with Ocwen, I sent XXXX large payments to the institution on XXXX XXXX that would have decreased my loan amount to within a couple of hundred dollars ( you can not send XXXX payment that exceeds 80 % of the loan balance so I broke the payments up into XXXX XXXX. I scheduled the payments for the same day because I did not want any interest to accrue. After a few days, I noticed where the XXXX payments were withdrawn from my bank but that my Ocwen balance had not changed to reflect my XXXX payments. I called Ocwen on XXXX XXXX to ask about the payment. The Ocwen rep explained that because the XXXX payments were scheduled for the same day, that it exceeded the 80 % max limit. I asked Ocwen to return my XXXX payments. The rep said it would take XXXX business days. I called Ocwen back on XXXX XXXX when my funds had not been returned to my bank account. I gave them my account number and routing number to my bank, information that I am sure they already had since my monthly mortgage payments are debited from my account. They asked me to wait a couple more days. I called Ocwen back on XXXX XXXX. The rep asked me to be patient and assured me that the funds would be returned by XXXX XXXX. There were no funds returned to my account on XXXX XXXX. I called Ocwen again. I was asked to wait 40+ minutes on hold while the Ocwen rep put me in touch with an escalation specialist ( ES ). The ES told me that my funds would be reurned within XXXX hours and that he had put it in as a priority because I had called so often. There were no funds on XXXX XXXX. I called Ocwen again to see if there was a problem. There was. After speaking to a rep and another ES, I was told that my funds could not be returned to me! The ES said that he did not see my funds! He claimed to put me in touch with someone who XXXX be able to address my concerns. So that 's where I am now, on hold waiting to speak to yet another person! This is a significant amount of money and I fear that Ocwen is trying to get away with keeping my XXXX payments!\n", - "5. In XX/XX/XXXX we received our first bill from XXXX XXXX for XXXX. ( attached ) We promptly paid the bill. Again, in XX/XX/XXXX we paid our second bill for XXXX. Again, both on time. Then when XX/XX/XXXX statement came we were billed XXXX. My husband called XXXX XXXX to find out what the issue was. We were told there was a loan shortfall caused by us paying XXXX ( the amount we were billed ) and that our loan was re-amoritized. I argued I had a fixed rate loan, had never missed a payment, had never made a late payment, and paid exactly what was billed. Well, after double checking my promissory note ( attached ) and TILA ( attached ) I was to always be billed XXXX. XXXX changed the monthly payment and thus caused a shortfall. When I told them this information they refused to correct the shortfall. Not only did they not correct the mistake they refused to return my calls or emails to provide answers for these issues. Around 90 days later and this issue still exists and they still refuse to answer. Additionally I offered to make up the shortfall myself by offering a check for the missing amount and they just applied it to interest. Thus the shortfall still exists. The extra amount would have gone directly to principal. Additionally, in XX/XX/XXXX we made an extra payment amount on top of the monthly payment. This was made all at the same time. The letter we sent contained directions to only apply extra payments beyond the monthly requirement be applied to principal and not the next months payment. This was ignored. Then XXXX \" a higher up '' as she calls herself lied and told me it went to principal when clearly it did not. We requested this be fixed and it has not been done. No one has offered to fix anything either. A certified letter is attached that I mailed. I also made dozens of calls.\n", + "I just want their erroneous threats to stop. \n", + "\n", + "Below is the latest email I have received from them : Last Day to call this office XXXX by XXXX Regards, XXXX XXXX Team Lead Specialist Charge off Unit XXXX XXXX\n", "\n" ] } ], "source": [ - "# The plain English request we will make of PaLM 2\n", + "# The plain English request we will make of Gemini\n", "prompt = (\n", " \"Please highlight the most obvious difference between \"\n", " \"the two lists of comments:\\n\" + prompt1 + prompt2\n", @@ -1585,7 +1587,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 19, "metadata": { "id": "mL5P0_3X04dE" }, @@ -1593,7 +1595,7 @@ { "data": { "text/html": [ - "Query job de5da6c9-96b5-42a1-b199-42687392fe37 is DONE. 0 Bytes processed. Open Job" + "Query job 3a46cad4-14e5-4137-a042-14380733b467 is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -1611,11 +1613,24 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 20, "metadata": { "id": "ICWHsqAW1FNk" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "Load job 939037f0-66df-42a4-b301-0b3ba26bae7c is DONE. Open Job" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# Make a DataFrame containing only a single row with our prompt for Gemini\n", "df = bf.DataFrame({\"prompt\": [prompt]})" @@ -1623,7 +1638,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 21, "metadata": { "id": "gB7e1LXU1pst" }, @@ -1631,7 +1646,7 @@ { "data": { "text/html": [ - "Query job 1363c327-00b5-4835-a902-da84882bc996 is DONE. 0 Bytes processed. Open Job" + "Query job c662b2c7-7185-4681-b7c6-60c81e9c8cd4 is DONE. 8.2 kB processed. Open Job" ], "text/plain": [ "" @@ -1641,21 +1656,17 @@ "output_type": "display_data" }, { - "data": { - "text/html": [ - "Query job c5996f1e-a140-4e7d-8775-091e1a73d882 is DONE. 8 Bytes processed. Open Job" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/garrettwu/src/bigframes/bigframes/core/__init__.py:108: PreviewWarning: Interpreting JSON column(s) as StringDtype. This behavior may change in future versions.\n", + " warnings.warn(\n" + ] }, { "data": { "text/html": [ - "Query job db1de3ab-2e6e-4b3f-8e6a-01bad33ac45f is DONE. 2 Bytes processed. Open Job" + "Query job 9a4d6735-c307-4a60-96f9-d81330925e6c is DONE. 2 Bytes processed. Open Job" ], "text/plain": [ "" @@ -1667,7 +1678,7 @@ { "data": { "text/html": [ - "Query job 38d9a9d0-7f03-4091-858b-f864da30987e is DONE. 375 Bytes processed. Open Job" + "Query job 17bde6e6-8b26-48a7-9c57-b7b9752c1f54 is DONE. 1.8 kB processed. Open Job" ], "text/plain": [ "" @@ -1679,10 +1690,10 @@ { "data": { "text/plain": [ - "'The most obvious difference between the two lists of comments is the subject matter. The first list of comments is primarily focused on issues with financial institutions, such as Navient, Nelnet, PayPal, and Mr. Cooper. The second list of comments is primarily focused on issues with government agencies, such as the National Collegiate Trust, the USDA, and Ocwen.'" + "\"## Key Differences between Comment Lists 1 and 2:\\n\\n**Comment List 1:**\\n\\n* **Focuses on Legal Violations:** The comments in List 1 primarily focus on how the debt collectors violated specific laws, such as the FDCPA and FCRA, by not validating debt, not marking accounts as disputed, and using illegal collection tactics.\\n* **Detailed Evidence:** Commenters provide detailed evidence of their claims, including dates, reference numbers, police reports, and copies of communications.\\n* **Formal Tone:** The language in List 1 is more formal and uses legal terminology, suggesting the commenters may have a deeper understanding of their rights.\\n* **Emphasis on Debt Accuracy:** Many comments explicitly deny owing the debt and question its validity, requesting proof and demanding removal from credit reports. \\n\\n**Comment List 2:**\\n\\n* **Focus on Harassment and Intimidation:** The comments in List 2 highlight the harassing and intimidating behavior of the debt collectors, such as making multiple calls, contacting references, and threatening legal action.\\n* **Emotional Language:** Commenters express frustration, fear, and anger towards the debt collectors' behavior.\\n* **Less Legal Detail:** While some commenters mention specific laws, they provide less detailed evidence than List 1.\\n* **Uncertainty About Debt:** Several commenters are unsure whether they actually owe the debt, questioning its origin and validity. \\n\\n**Overall:**\\n\\n* List 1 focuses on legal arguments and violations, while List 2 emphasizes emotional distress and improper collection tactics.\\n* List 1 provides more concrete evidence of wrongdoing, while List 2 relies more on personal experiences and descriptions.\\n* Both lists highlight the negative impacts of debt collection practices on individuals.\\n\"" ] }, - "execution_count": 19, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -1690,7 +1701,7 @@ "source": [ "# Send the request for Gemini to generate a response to our prompt\n", "major_difference = q_a_model.predict(df)\n", - "# PaLM 2's response is the only row in the dataframe result \n", + "# Gemini's response is the only row in the dataframe result \n", "major_difference[\"ml_generate_text_llm_result\"].iloc[0]" ] },