-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Closed
Labels
Description
Expected Behavior
We point our file source to a parquet dataset:
file_source = FileSource(
name="dummy_file_source",
path="s3://data/dummy/"),
timestamp_field="event_timestamp",
created_timestamp_column="created",
file_format=ParquetFormat(),
)I'm expecting to be able to use the parquet dataset format the same way I'd use a single file.
Current Behavior
Feast errors at the apply stage:
File "/Users/mzwiessele/feast_s3_dataset_error/.venv/lib/python3.8/site-packages/feast/infra/offline_stores/file_source.py", line 164, in get_table_column_names_and_types
filesystem.open_input_file(path), filesystem=filesystem
File "pyarrow/_fs.pyx", line 588, in pyarrow._fs.FileSystem.open_input_file
File "pyarrow/error.pxi", line 143, in pyarrow.lib.pyarrow_internal_check_status
File "pyarrow/error.pxi", line 114, in pyarrow.lib.check_status
OSError: Path does not exist 'data/dummy'
Full Traceback
Traceback (most recent call last):
File "/Users/mzwiessele/feast_s3_dataset_error/.venv/bin/feast", line 8, in <module>
sys.exit(cli())
File "/Users/mzwiessele/feast_s3_dataset_error/.venv/lib/python3.8/site-packages/click/core.py", line 1137, in __call__
return self.main(*args, **kwargs)
File "/Users/mzwiessele/feast_s3_dataset_error/.venv/lib/python3.8/site-packages/click/core.py", line 1062, in main
rv = self.invoke(ctx)
File "/Users/mzwiessele/feast_s3_dataset_error/.venv/lib/python3.8/site-packages/click/core.py", line 1668, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/Users/mzwiessele/feast_s3_dataset_error/.venv/lib/python3.8/site-packages/click/core.py", line 1404, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/Users/mzwiessele/feast_s3_dataset_error/.venv/lib/python3.8/site-packages/click/core.py", line 763, in invoke
return __callback(*args, **kwargs)
File "/Users/mzwiessele/feast_s3_dataset_error/.venv/lib/python3.8/site-packages/click/decorators.py", line 26, in new_func
return f(get_current_context(), *args, **kwargs)
File "/Users/mzwiessele/feast_s3_dataset_error/.venv/lib/python3.8/site-packages/feast/cli.py", line 519, in apply_total_command
apply_total(repo_config, repo, skip_source_validation)
File "/Users/mzwiessele/feast_s3_dataset_error/.venv/lib/python3.8/site-packages/feast/usage.py", line 283, in wrapper
return func(*args, **kwargs)
File "/Users/mzwiessele/feast_s3_dataset_error/.venv/lib/python3.8/site-packages/feast/repo_operations.py", line 335, in apply_total
apply_total_with_repo_instance(
File "/Users/mzwiessele/feast_s3_dataset_error/.venv/lib/python3.8/site-packages/feast/repo_operations.py", line 296, in apply_total_with_repo_instance
registry_diff, infra_diff, new_infra = store.plan(repo)
File "/Users/mzwiessele/feast_s3_dataset_error/.venv/lib/python3.8/site-packages/feast/usage.py", line 294, in wrapper
raise exc.with_traceback(traceback)
File "/Users/mzwiessele/feast_s3_dataset_error/.venv/lib/python3.8/site-packages/feast/usage.py", line 283, in wrapper
return func(*args, **kwargs)
File "/Users/mzwiessele/feast_s3_dataset_error/.venv/lib/python3.8/site-packages/feast/feature_store.py", line 723, in plan
self._make_inferences(
File "/Users/mzwiessele/feast_s3_dataset_error/.venv/lib/python3.8/site-packages/feast/feature_store.py", line 601, in _make_inferences
update_feature_views_with_inferred_features_and_entities(
File "/Users/mzwiessele/feast_s3_dataset_error/.venv/lib/python3.8/site-packages/feast/inference.py", line 179, in update_feature_views_with_inferred_features_and_entities
_infer_features_and_entities(
File "/Users/mzwiessele/feast_s3_dataset_error/.venv/lib/python3.8/site-packages/feast/inference.py", line 217, in _infer_features_and_entities
table_column_names_and_types = fv.batch_source.get_table_column_names_and_types(
File "/Users/mzwiessele/feast_s3_dataset_error/.venv/lib/python3.8/site-packages/typeguard/__init__.py", line 1033, in wrapper
retval = func(*args, **kwargs)
File "/Users/mzwiessele/feast_s3_dataset_error/.venv/lib/python3.8/site-packages/feast/infra/offline_stores/file_source.py", line 164, in get_table_column_names_and_types
filesystem.open_input_file(path), filesystem=filesystem
File "pyarrow/_fs.pyx", line 588, in pyarrow._fs.FileSystem.open_input_file
File "pyarrow/error.pxi", line 143, in pyarrow.lib.pyarrow_internal_check_status
File "pyarrow/error.pxi", line 114, in pyarrow.lib.check_status
OSError: Path does not exist 'data/dummy'Steps to reproduce
- Create default example using
feast init - Upload example
driver_stats.parquetdata to s3 dataset path:s3://data/dummy/driver_stats.parquet - Change the data source to point to the s3 dataset:
@@ -1,28 +1,28 @@ driver_stats_source = FileSource( name="driver_hourly_stats_source", - path="data/driver_stats.parquet", + path="s3://data/dummy/", timestamp_field="event_timestamp", created_timestamp_column="created", )
- Run
feast apply
Specifications
- Version: 0.24.0
- Platform: MacOS
- Subsystem: Python 3.8
Possible Solution
This PR fixes this issue: #3217
I have found this line to cause the error:
| filesystem.open_input_file(path), filesystem=filesystem |