From 87429cafb6256548d5bd8e9ff3e4200174552aa2 Mon Sep 17 00:00:00 2001 From: HaoXuAI Date: Mon, 30 Jun 2025 13:17:10 -0700 Subject: [PATCH 01/12] Draft: multi source support Signed-off-by: HaoXuAI --- protos/feast/core/FeatureView.proto | 2 + sdk/python/feast/batch_feature_view.py | 33 ++-- sdk/python/feast/feature_store.py | 2 +- sdk/python/feast/feature_view.py | 53 ++++-- sdk/python/feast/inference.py | 2 + .../compute_engines/algorithms/__init__.py | 0 .../infra/compute_engines/algorithms/topo.py | 50 ++++++ .../feast/infra/compute_engines/base.py | 40 ----- .../infra/compute_engines/dag/context.py | 3 - .../feast/infra/compute_engines/dag/node.py | 11 +- .../infra/compute_engines/feature_builder.py | 169 +++++++++++++----- .../infra/compute_engines/feature_resolver.py | 96 ++++++++++ .../infra/compute_engines/local/compute.py | 4 +- .../compute_engines/local/feature_builder.py | 87 +++++---- .../infra/compute_engines/local/nodes.py | 131 ++++++++------ .../infra/compute_engines/spark/compute.py | 3 + .../compute_engines/spark/feature_builder.py | 62 +++---- .../infra/compute_engines/spark/nodes.py | 145 ++++++++------- .../feast/infra/compute_engines/utils.py | 5 +- .../protos/feast/core/Aggregation_pb2.py | 6 +- .../feast/protos/feast/core/DataFormat_pb2.py | 6 +- .../feast/protos/feast/core/DataSource_pb2.py | 18 +- .../protos/feast/core/DatastoreTable_pb2.py | 6 +- .../feast/protos/feast/core/Entity_pb2.py | 10 +- .../protos/feast/core/FeatureService_pb2.py | 14 +- .../protos/feast/core/FeatureTable_pb2.py | 10 +- .../feast/core/FeatureViewProjection_pb2.py | 10 +- .../protos/feast/core/FeatureView_pb2.py | 30 ++-- .../protos/feast/core/FeatureView_pb2.pyi | 8 +- .../feast/protos/feast/core/Feature_pb2.py | 10 +- .../protos/feast/core/InfraObject_pb2.py | 6 +- .../feast/core/OnDemandFeatureView_pb2.py | 22 +-- .../feast/protos/feast/core/Permission_pb2.py | 14 +- .../feast/protos/feast/core/Policy_pb2.py | 6 +- .../feast/protos/feast/core/Project_pb2.py | 10 +- .../feast/protos/feast/core/Registry_pb2.py | 10 +- .../protos/feast/core/SavedDataset_pb2.py | 10 +- .../protos/feast/core/SqliteTable_pb2.py | 6 +- .../feast/protos/feast/core/Store_pb2.py | 6 +- .../feast/core/StreamFeatureView_pb2.py | 14 +- .../protos/feast/core/Transformation_pb2.py | 6 +- .../feast/core/ValidationProfile_pb2.py | 10 +- .../feast/registry/RegistryServer_pb2.py | 50 +++--- .../protos/feast/serving/Connector_pb2.py | 6 +- .../protos/feast/serving/GrpcServer_pb2.py | 14 +- .../feast/serving/ServingService_pb2.py | 18 +- .../serving/TransformationService_pb2.py | 6 +- .../feast/protos/feast/storage/Redis_pb2.py | 6 +- .../feast/protos/feast/types/EntityKey_pb2.py | 6 +- .../feast/protos/feast/types/Field_pb2.py | 10 +- .../feast/protos/feast/types/Value_pb2.py | 6 +- .../compute_engines/spark/test_compute_dag.py | 159 ++++++++++++++++ .../compute_engines/test_feature_builder.py | 125 +++++++++++++ 53 files changed, 1045 insertions(+), 507 deletions(-) create mode 100644 sdk/python/feast/infra/compute_engines/algorithms/__init__.py create mode 100644 sdk/python/feast/infra/compute_engines/algorithms/topo.py create mode 100644 sdk/python/feast/infra/compute_engines/feature_resolver.py create mode 100644 sdk/python/tests/integration/compute_engines/spark/test_compute_dag.py create mode 100644 sdk/python/tests/unit/infra/compute_engines/test_feature_builder.py diff --git a/protos/feast/core/FeatureView.proto b/protos/feast/core/FeatureView.proto index 481ae00403f..58d641d40da 100644 --- a/protos/feast/core/FeatureView.proto +++ b/protos/feast/core/FeatureView.proto @@ -79,6 +79,8 @@ message FeatureViewSpec { // Whether these features should be written to the offline store bool offline = 13; + + FeatureViewSpec source_view = 14; } message FeatureViewMeta { diff --git a/sdk/python/feast/batch_feature_view.py b/sdk/python/feast/batch_feature_view.py index 933696ced33..5b90cc15e11 100644 --- a/sdk/python/feast/batch_feature_view.py +++ b/sdk/python/feast/batch_feature_view.py @@ -65,7 +65,7 @@ class BatchFeatureView(FeatureView): materialization_intervals: List[Tuple[datetime, datetime]] udf: Optional[Callable[[Any], Any]] udf_string: Optional[str] - feature_transformation: Transformation + feature_transformation: Optional[Transformation] batch_engine: Optional[Field] aggregations: Optional[List[Aggregation]] @@ -74,7 +74,8 @@ def __init__( *, name: str, mode: Union[TransformationMode, str] = TransformationMode.PYTHON, - source: DataSource, + source: Optional[DataSource] = None, + source_view: Optional["FeatureView"] = None, entities: Optional[List[Entity]] = None, ttl: Optional[timedelta] = None, tags: Optional[Dict[str, str]] = None, @@ -83,7 +84,7 @@ def __init__( description: str = "", owner: str = "", schema: Optional[List[Field]] = None, - udf: Optional[Callable[[Any], Any]], + udf: Optional[Callable[[Any], Any]] = None, udf_string: Optional[str] = "", feature_transformation: Optional[Transformation] = None, batch_engine: Optional[Field] = None, @@ -96,14 +97,17 @@ def __init__( RuntimeWarning, ) - if ( - type(source).__name__ not in SUPPORTED_BATCH_SOURCES - and source.to_proto().type != DataSourceProto.SourceType.CUSTOM_SOURCE - ): - raise ValueError( - f"Batch feature views need a batch source, expected one of {SUPPORTED_BATCH_SOURCES} " - f"or CUSTOM_SOURCE, got {type(source).__name__}: {source.name} instead " - ) + if source is not None: + if ( + type(source).__name__ not in SUPPORTED_BATCH_SOURCES + and source.to_proto().type != DataSourceProto.SourceType.CUSTOM_SOURCE + ): + raise ValueError( + f"Batch feature views need a batch source, expected one of {SUPPORTED_BATCH_SOURCES} " + f"or CUSTOM_SOURCE, got {type(source).__name__}: {source.name} instead " + ) + elif source_view is None: + raise ValueError("BatchFeatureView must have either 'source' or 'source_view'.") self.mode = mode self.udf = udf @@ -125,13 +129,12 @@ def __init__( owner=owner, schema=schema, source=source, + source_view=source_view ) - def get_feature_transformation(self) -> Transformation: + def get_feature_transformation(self) -> Optional[Transformation]: if not self.udf: - raise ValueError( - "Either a UDF or a feature transformation must be provided for BatchFeatureView" - ) + return if self.mode in ( TransformationMode.PANDAS, TransformationMode.PYTHON, diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 5cc232d5fca..76578d80adb 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -903,7 +903,7 @@ def apply( for fv in itertools.chain( views_to_update, sfvs_to_update, odfvs_with_writes_to_update ): - if isinstance(fv, FeatureView): + if isinstance(fv, FeatureView) and fv.batch_source: data_sources_set_to_update.add(fv.batch_source) if hasattr(fv, "stream_source"): if fv.stream_source: diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index 2c2106f5a3e..c132cdc41b9 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -90,6 +90,7 @@ class FeatureView(BaseFeatureView): ttl: Optional[timedelta] batch_source: DataSource stream_source: Optional[DataSource] + source_view: Optional["FeatureView"] entity_columns: List[Field] features: List[Field] online: bool @@ -103,7 +104,8 @@ def __init__( self, *, name: str, - source: DataSource, + source: Optional[DataSource] = None, + source_view: Optional["FeatureView"] = None, schema: Optional[List[Field]] = None, entities: Optional[List[Entity]] = None, ttl: Optional[timedelta] = timedelta(days=0), @@ -144,6 +146,9 @@ def __init__( self.ttl = ttl schema = schema or [] + if (source is None) == (source_view is None): + raise ValueError("FeatureView must have exactly one of 'source' or 'source_view', not both/neither.") + # Initialize data sources. if ( isinstance(source, PushSource) @@ -201,17 +206,18 @@ def __init__( ) # TODO(felixwang9817): Add more robust validation of features. - cols = [field.name for field in schema] - for col in cols: - if ( - self.batch_source.field_mapping is not None - and col in self.batch_source.field_mapping.keys() - ): - raise ValueError( - f"The field {col} is mapped to {self.batch_source.field_mapping[col]} for this data source. " - f"Please either remove this field mapping or use {self.batch_source.field_mapping[col]} as the " - f"Entity or Feature name." - ) + if source is not None: + cols = [field.name for field in schema] + for col in cols: + if ( + self.batch_source.field_mapping is not None + and col in self.batch_source.field_mapping.keys() + ): + raise ValueError( + f"The field {col} is mapped to {self.batch_source.field_mapping[col]} for this data source. " + f"Please either remove this field mapping or use {self.batch_source.field_mapping[col]} as the " + f"Entity or Feature name." + ) super().__init__( name=name, @@ -224,6 +230,7 @@ def __init__( self.online = online self.offline = offline self.materialization_intervals = [] + self.source_view = source_view def __hash__(self): return super().__hash__() @@ -348,13 +355,18 @@ def to_proto(self) -> FeatureViewProto: meta = self.to_proto_meta() ttl_duration = self.get_ttl_duration() - batch_source_proto = self.batch_source.to_proto() - batch_source_proto.data_source_class_type = f"{self.batch_source.__class__.__module__}.{self.batch_source.__class__.__name__}" + batch_source_proto = None + if self.batch_source: + batch_source_proto = self.batch_source.to_proto() + batch_source_proto.data_source_class_type = f"{self.batch_source.__class__.__module__}.{self.batch_source.__class__.__name__}" stream_source_proto = None if self.stream_source: stream_source_proto = self.stream_source.to_proto() stream_source_proto.data_source_class_type = f"{self.stream_source.__class__.__module__}.{self.stream_source.__class__.__name__}" + source_view_proto = None + if self.source_view: + source_view_proto = self.source_view.to_proto().spec spec = FeatureViewSpecProto( name=self.name, entities=self.entities, @@ -368,6 +380,7 @@ def to_proto(self) -> FeatureViewProto: offline=self.offline, batch_source=batch_source_proto, stream_source=stream_source_proto, + source_view=source_view_proto, ) return FeatureViewProto(spec=spec, meta=meta) @@ -403,12 +416,21 @@ def from_proto(cls, feature_view_proto: FeatureViewProto): Returns: A FeatureViewProto object based on the feature view protobuf. """ - batch_source = DataSource.from_proto(feature_view_proto.spec.batch_source) + batch_source = ( + DataSource.from_proto(feature_view_proto.spec.batch_source) + if feature_view_proto.spec.HasField("batch_source") + else None + ) stream_source = ( DataSource.from_proto(feature_view_proto.spec.stream_source) if feature_view_proto.spec.HasField("stream_source") else None ) + source_view = ( + FeatureView.from_proto(FeatureViewProto(spec=feature_view_proto.spec.source_view, meta=None)) + if feature_view_proto.spec.HasField("source_view") + else None + ) feature_view = cls( name=feature_view_proto.spec.name, description=feature_view_proto.spec.description, @@ -422,6 +444,7 @@ def from_proto(cls, feature_view_proto: FeatureViewProto): else feature_view_proto.spec.ttl.ToTimedelta() ), source=batch_source, + source_view=source_view, ) if stream_source: feature_view.stream_source = stream_source diff --git a/sdk/python/feast/inference.py b/sdk/python/feast/inference.py index f2a2ee637fd..688cb7479d0 100644 --- a/sdk/python/feast/inference.py +++ b/sdk/python/feast/inference.py @@ -26,6 +26,8 @@ def update_data_sources_with_inferred_event_timestamp_col( ) -> None: ERROR_MSG_PREFIX = "Unable to infer DataSource timestamp_field" for data_source in data_sources: + if data_source is None: + continue if isinstance(data_source, RequestSource): continue if isinstance(data_source, PushSource): diff --git a/sdk/python/feast/infra/compute_engines/algorithms/__init__.py b/sdk/python/feast/infra/compute_engines/algorithms/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/feast/infra/compute_engines/algorithms/topo.py b/sdk/python/feast/infra/compute_engines/algorithms/topo.py new file mode 100644 index 00000000000..0de1dcbff62 --- /dev/null +++ b/sdk/python/feast/infra/compute_engines/algorithms/topo.py @@ -0,0 +1,50 @@ +# feast/infra/compute_engines/dag/utils.py + +from typing import List, Set +from feast.infra.compute_engines.dag.node import DAGNode + + +def topo_sort(root: DAGNode) -> List[DAGNode]: + """ + Topologically sort a DAGNode graph starting from root. + + Returns: + List of DAGNodes in execution-safe order (dependencies first). + """ + visited: Set[int] = set() + ordered: List[DAGNode] = [] + + def dfs(node: DAGNode): + if id(node) in visited: + return + visited.add(id(node)) + for input_node in node.inputs: + dfs(input_node) + ordered.append(node) + + dfs(root) + return ordered + + +def topo_sort_multiple(roots: List[DAGNode]) -> List[DAGNode]: + """ + Topologically sort a DAG with multiple roots (e.g., multiple write nodes). + + Returns: + List of all reachable DAGNodes in execution order. + """ + visited: Set[int] = set() + ordered: List[DAGNode] = [] + + def dfs(node: DAGNode): + if id(node) in visited: + return + visited.add(id(node)) + for input_node in node.inputs: + dfs(input_node) + ordered.append(node) + + for root in roots: + dfs(root) + + return ordered diff --git a/sdk/python/feast/infra/compute_engines/base.py b/sdk/python/feast/infra/compute_engines/base.py index 6acdb8d11d6..4b9dbe16de7 100644 --- a/sdk/python/feast/infra/compute_engines/base.py +++ b/sdk/python/feast/infra/compute_engines/base.py @@ -124,52 +124,12 @@ def get_execution_context( if hasattr(task, "entity_df") and task.entity_df is not None: entity_df = task.entity_df - column_info = self.get_column_info(registry, task) return ExecutionContext( project=task.project, repo_config=self.repo_config, offline_store=self.offline_store, online_store=self.online_store, entity_defs=entity_defs, - column_info=column_info, entity_df=entity_df, ) - def get_column_info( - self, - registry: BaseRegistry, - task: Union[MaterializationTask, HistoricalRetrievalTask], - ) -> ColumnInfo: - entities = [] - for entity_name in task.feature_view.entities: - entities.append(registry.get_entity(entity_name, task.project)) - - join_keys, feature_cols, ts_col, created_ts_col = _get_column_names( - task.feature_view, entities - ) - field_mapping = self.get_field_mapping(task.feature_view) - - return ColumnInfo( - join_keys=join_keys, - feature_cols=feature_cols, - ts_col=ts_col, - created_ts_col=created_ts_col, - field_mapping=field_mapping, - ) - - def get_field_mapping( - self, feature_view: Union[BatchFeatureView, StreamFeatureView, FeatureView] - ) -> Optional[dict]: - """ - Get the field mapping for a feature view. - Args: - feature_view: The feature view to get the field mapping for. - - Returns: - A dictionary mapping field names to column names. - """ - if feature_view.stream_source: - return feature_view.stream_source.field_mapping - if feature_view.batch_source: - return feature_view.batch_source.field_mapping - return None diff --git a/sdk/python/feast/infra/compute_engines/dag/context.py b/sdk/python/feast/infra/compute_engines/dag/context.py index 6b1970d25f8..46eda356223 100644 --- a/sdk/python/feast/infra/compute_engines/dag/context.py +++ b/sdk/python/feast/infra/compute_engines/dag/context.py @@ -82,15 +82,12 @@ class ExecutionContext: node_outputs: Internal cache of DAGValue outputs keyed by DAGNode name. Automatically populated during ExecutionPlan execution to avoid redundant computation. Used by downstream nodes to access their input data. - - field_mapping: A mapping of field names to their corresponding column names in the """ project: str repo_config: RepoConfig offline_store: OfflineStore online_store: OnlineStore - column_info: ColumnInfo entity_defs: List[Entity] entity_df: Union[pd.DataFrame, None] = None node_outputs: Dict[str, DAGValue] = field(default_factory=dict) diff --git a/sdk/python/feast/infra/compute_engines/dag/node.py b/sdk/python/feast/infra/compute_engines/dag/node.py index 033ae8f1780..9fb520e7c13 100644 --- a/sdk/python/feast/infra/compute_engines/dag/node.py +++ b/sdk/python/feast/infra/compute_engines/dag/node.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import List +from typing import List, Optional from feast.infra.compute_engines.dag.context import ExecutionContext from feast.infra.compute_engines.dag.value import DAGValue @@ -10,10 +10,13 @@ class DAGNode(ABC): inputs: List["DAGNode"] outputs: List["DAGNode"] - def __init__(self, name: str): + def __init__(self, name: str, inputs: Optional[List["DAGNode"]] = None): self.name = name - self.inputs = [] - self.outputs = [] + self.inputs: List["DAGNode"] = [] + self.outputs: List["DAGNode"] = [] + + for node in inputs or []: + self.add_input(node) def add_input(self, node: "DAGNode"): if node in self.inputs: diff --git a/sdk/python/feast/infra/compute_engines/feature_builder.py b/sdk/python/feast/infra/compute_engines/feature_builder.py index 9d4e4466499..0966bb9ce40 100644 --- a/sdk/python/feast/infra/compute_engines/feature_builder.py +++ b/sdk/python/feast/infra/compute_engines/feature_builder.py @@ -1,10 +1,15 @@ from abc import ABC, abstractmethod -from typing import Union +from typing import Union, List, Optional +from feast import BatchFeatureView, StreamFeatureView, FeatureView from feast.infra.common.materialization_job import MaterializationTask from feast.infra.common.retrieval_task import HistoricalRetrievalTask from feast.infra.compute_engines.dag.node import DAGNode from feast.infra.compute_engines.dag.plan import ExecutionPlan +from feast.infra.compute_engines.feature_resolver import FeatureResolver, FeatureViewNode +from feast.infra.compute_engines.dag.context import ColumnInfo +from feast.infra.registry.base_registry import BaseRegistry +from feast.utils import _get_column_names class FeatureBuilder(ABC): @@ -14,85 +19,157 @@ class FeatureBuilder(ABC): """ def __init__( - self, - task: Union[MaterializationTask, HistoricalRetrievalTask], + self, + registry: BaseRegistry, + feature_view, + task: Union[MaterializationTask, HistoricalRetrievalTask], ): - self.feature_view = task.feature_view + self.registry = registry self.task = task - self.nodes: list[DAGNode] = [] + self.nodes: List[DAGNode] = [] + self.resolver = FeatureResolver() + self.dag_root = self.resolver.resolve(feature_view) + self.sorted_nodes = self.resolver.topo_sort(self.dag_root) @abstractmethod - def build_source_node(self): + def build_source_node(self, view): raise NotImplementedError @abstractmethod - def build_aggregation_node(self, input_node): + def build_aggregation_node(self, + view, + input_node): raise NotImplementedError @abstractmethod - def build_join_node(self, input_node): + def build_join_node(self, + view, + input_node): raise NotImplementedError @abstractmethod - def build_filter_node(self, input_node): + def build_filter_node(self, + view, + input_node): raise NotImplementedError @abstractmethod - def build_dedup_node(self, input_node): + def build_dedup_node(self, + view, + input_node): raise NotImplementedError @abstractmethod - def build_transformation_node(self, input_node): + def build_transformation_node(self, + view, + input_node): raise NotImplementedError @abstractmethod - def build_output_nodes(self, input_node): + def build_output_nodes(self, + final_node): raise NotImplementedError @abstractmethod - def build_validation_node(self, input_node): - raise - - def _should_aggregate(self): - return ( - hasattr(self.feature_view, "aggregations") - and self.feature_view.aggregations is not None - and len(self.feature_view.aggregations) > 0 - ) + def build_validation_node(self, + view, + input_node): + raise NotImplementedError - def _should_transform(self): - return ( - hasattr(self.feature_view, "feature_transformation") - and self.feature_view.feature_transformation - ) + def _should_aggregate(self, + view): + return bool(getattr(view, "aggregations", [])) - def _should_validate(self): - return getattr(self.feature_view, "enable_validation", False) + def _should_transform(self, + view): + return bool(getattr(view, "feature_transformation", None)) - def _should_dedupe(self, task): - return isinstance(task, HistoricalRetrievalTask) or task.only_latest + def _should_validate(self, + view): + return getattr(view, "enable_validation", False) - def build(self) -> ExecutionPlan: - last_node = self.build_source_node() + def _should_dedupe(self, + view): + return isinstance(self.task, HistoricalRetrievalTask) or self.task.only_latest + + def _build(self, + current_node: FeatureViewNode) -> DAGNode: + current_view = current_node.view - # Join entity_df with source if needed - last_node = self.build_join_node(last_node) + # Step 1: build source or parent join + if current_node.parent: + parent_node = self._build(current_node.parent) + last_node = self.build_join_node(current_view, parent_node) + else: + last_node = self.build_source_node(current_view) - # PIT filter, TTL, and user-defined filter - last_node = self.build_filter_node(last_node) + # Step 2: filter + last_node = self.build_filter_node(current_view, last_node) - if self._should_aggregate(): - last_node = self.build_aggregation_node(last_node) + # Step 3: aggregate or dedupe + if self._should_aggregate(current_view): + last_node = self.build_aggregation_node(current_view, last_node) + elif self._should_dedupe(current_view): + last_node = self.build_dedup_node(current_view, last_node) - # Dedupe only if not aggregated - elif self._should_dedupe(self.task): - last_node = self.build_dedup_node(last_node) + # Step 4: transform + if self._should_transform(current_view): + last_node = self.build_transformation_node(current_view, last_node) - if self._should_transform(): - last_node = self.build_transformation_node(last_node) + # Step 5: validate + if self._should_validate(current_view): + last_node = self.build_validation_node(current_view, last_node) - if self._should_validate(): - last_node = self.build_validation_node(last_node) + return last_node - self.build_output_nodes(last_node) + def build(self) -> ExecutionPlan: + final_node = self._build(self.dag_root) + self.build_output_nodes(final_node) return ExecutionPlan(self.nodes) + + def get_column_info( + self, + view: Union[BatchFeatureView, StreamFeatureView, FeatureView], + ) -> ColumnInfo: + entities = [] + for entity_name in view.entities: + entities.append(self.registry.get_entity(entity_name, self.task.project)) + + if view.source_view: + # If the view has a source_view, the column information come from the tags dict + # unpack to get those values + join_keys = view.source_view.tags.get("join_keys", []) + feature_cols = view.source_view.tags.get("feature_cols", []) + ts_col = view.source_view.tags.get("ts_col", None) + created_ts_col = view.source_view.tags.get("created_ts_col", None) + else: + join_keys, feature_cols, ts_col, created_ts_col = _get_column_names( + view, entities + ) + field_mapping = self.get_field_mapping(self.task.feature_view) + + return ColumnInfo( + join_keys=join_keys, + feature_cols=feature_cols, + ts_col=ts_col, + created_ts_col=created_ts_col, + field_mapping=field_mapping, + ) + + def get_field_mapping( + self, + feature_view: Union[BatchFeatureView, StreamFeatureView, FeatureView] + ) -> Optional[dict]: + """ + Get the field mapping for a feature view. + Args: + feature_view: The feature view to get the field mapping for. + + Returns: + A dictionary mapping field names to column names. + """ + if feature_view.stream_source: + return feature_view.stream_source.field_mapping + if feature_view.batch_source: + return feature_view.batch_source.field_mapping + return None diff --git a/sdk/python/feast/infra/compute_engines/feature_resolver.py b/sdk/python/feast/infra/compute_engines/feature_resolver.py new file mode 100644 index 00000000000..22ee4790906 --- /dev/null +++ b/sdk/python/feast/infra/compute_engines/feature_resolver.py @@ -0,0 +1,96 @@ +from feast.feature_view import FeatureView + +from typing import Set, List, Optional + + +class FeatureViewNode: + """ + Logical representation of a node in the FeatureView dependency DAG. + """ + def __init__(self, + view: FeatureView): + self.view: FeatureView = view + self.parent: Optional["FeatureViewNode"] = None + + +class FeatureResolver: + """ + Resolves FeatureViews into a dependency graph (DAG) of FeatureViewNode objects. + This graph represents the logical dependencies between FeatureViews, allowing + for ordered execution and cycle detection. + """ + def __init__(self): + # Used to detect and prevent cycles in the FeatureView graph. + self.visited: Set[str] = set() + self.resolution_path: List[str] = [] + + def resolve(self, + feature_view: FeatureView) -> FeatureViewNode: + """ + Entry point for resolving a FeatureView into a DAG node. + + Args: + feature_view: The root FeatureView to build the dependency graph from. + + Returns: + A FeatureViewNode representing the root of the logical dependency DAG. + """ + root = FeatureViewNode(feature_view) + self._walk(root) + return root + + def _walk(self, + node: FeatureViewNode): + """ + Recursive traversal of the FeatureView graph. + + If `source_view` is set on the FeatureView, a parent node is created and added. + Cycles are detected using the visited set. + + Args: + node: The current FeatureViewNode being processed. + """ + view = node.view + if view.name in self.visited: + cycle_path = " โ†’ ".join(self.resolution_path + [view.name]) + raise ValueError(f"Cycle detected in FeatureView graph: {cycle_path}") + self.visited.add(view.name) + self.resolution_path.append(view.name) + + # TODO: Only one parent is allowed via source_view, support more than one + if view.source_view: + parent_node = FeatureViewNode(view.source_view) + node.parent = parent_node + self._walk(parent_node) + + self.resolution_path.pop() + + def topo_sort(self, root: FeatureViewNode) -> List[FeatureViewNode]: + visited = set() + ordered: List[FeatureViewNode] = [] + + def dfs(node: FeatureViewNode): + if id(node) in visited: + return + visited.add(id(node)) + if node.parent: + dfs(node.parent) + ordered.append(node) + + dfs(root) + return ordered + + def debug_dag(self, + node: FeatureViewNode, + depth=0): + """ + Prints the FeatureView dependency DAG for debugging. + + Args: + node: The root node to print from. + depth: Internal argument used for recursive indentation. + """ + indent = " " * depth + print(f"{indent}- {node.view.name}") + if node.parent: + self.debug_dag(node.parent, depth + 1) diff --git a/sdk/python/feast/infra/compute_engines/local/compute.py b/sdk/python/feast/infra/compute_engines/local/compute.py index 0b99a58c304..341b20dee02 100644 --- a/sdk/python/feast/infra/compute_engines/local/compute.py +++ b/sdk/python/feast/infra/compute_engines/local/compute.py @@ -68,7 +68,7 @@ def _materialize_one( backend = self._get_backend(context) try: - builder = LocalFeatureBuilder(task, backend=backend) + builder = LocalFeatureBuilder(registry, task, backend=backend) plan = builder.build() plan.execute(context) return LocalMaterializationJob( @@ -90,7 +90,7 @@ def get_historical_features( backend = self._get_backend(context) try: - builder = LocalFeatureBuilder(task=task, backend=backend) + builder = LocalFeatureBuilder(registry, task=task, backend=backend) plan = builder.build() return LocalRetrievalJob( plan=plan, diff --git a/sdk/python/feast/infra/compute_engines/local/feature_builder.py b/sdk/python/feast/infra/compute_engines/local/feature_builder.py index e3e29099360..f7163262735 100644 --- a/sdk/python/feast/infra/compute_engines/local/feature_builder.py +++ b/sdk/python/feast/infra/compute_engines/local/feature_builder.py @@ -1,5 +1,6 @@ from typing import Union +from feast.data_source import DataSource from feast.infra.common.materialization_job import MaterializationTask from feast.infra.common.retrieval_task import HistoricalRetrievalTask from feast.infra.compute_engines.feature_builder import FeatureBuilder @@ -14,86 +15,78 @@ LocalTransformationNode, LocalValidationNode, ) +from feast.infra.registry.base_registry import BaseRegistry class LocalFeatureBuilder(FeatureBuilder): def __init__( self, + registry: BaseRegistry, task: Union[MaterializationTask, HistoricalRetrievalTask], backend: DataFrameBackend, ): - super().__init__(task) + super().__init__(registry, task.feature_view, task) self.backend = backend - def build_source_node(self): - source = self.feature_view.batch_source + def build_source_node(self, view): start_time = self.task.start_time end_time = self.task.end_time - node = LocalSourceReadNode("source", source, start_time, end_time) + column_info = self.get_column_info(view) + source = view.source + node = LocalSourceReadNode("source", source, column_info, start_time, end_time) self.nodes.append(node) return node - def build_join_node(self, input_node): - node = LocalJoinNode("join", self.backend) - node.add_input(input_node) + def build_join_node(self, view, input_node): + column_info = self.get_column_info(view) + node = LocalJoinNode("join", column_info, self.backend, inputs=[input_node]) self.nodes.append(node) return node - def build_filter_node(self, input_node): - filter_expr = None - if hasattr(self.feature_view, "filter"): - filter_expr = self.feature_view.filter - ttl = self.feature_view.ttl - node = LocalFilterNode("filter", self.backend, filter_expr, ttl) - node.add_input(input_node) + def build_filter_node(self, view, input_node): + filter_expr = getattr(view, "filter", None) + ttl = getattr(view, "ttl", None) + column_info = self.get_column_info(view) + node = LocalFilterNode("filter", column_info, self.backend, filter_expr, ttl, inputs=[input_node]) self.nodes.append(node) return node - @staticmethod - def _get_aggregate_operations(agg_specs): - agg_ops = {} - for agg in agg_specs: - if agg.time_window is not None: - raise ValueError( - "Time window aggregation is not supported in local compute engine. Please use a different compute " - "engine." - ) - alias = f"{agg.function}_{agg.column}" - agg_ops[alias] = (agg.function, agg.column) - return agg_ops - - def build_aggregation_node(self, input_node): - agg_specs = self.feature_view.aggregations + def build_aggregation_node(self, view, input_node): + agg_specs = view.aggregations agg_ops = self._get_aggregate_operations(agg_specs) - group_by_keys = self.feature_view.entities - node = LocalAggregationNode("agg", self.backend, group_by_keys, agg_ops) - node.add_input(input_node) + group_by_keys = view.entities + node = LocalAggregationNode("agg", self.backend, group_by_keys, agg_ops, inputs=[input_node]) self.nodes.append(node) return node - def build_dedup_node(self, input_node): - node = LocalDedupNode("dedup", self.backend) - node.add_input(input_node) + def build_dedup_node(self, view, input_node): + column_info = self.get_column_info(view) + node = LocalDedupNode("dedup", column_info, self.backend, inputs=[input_node]) self.nodes.append(node) return node - def build_transformation_node(self, input_node): - node = LocalTransformationNode( - "transform", self.feature_view.feature_transformation, self.backend - ) - node.add_input(input_node) + def build_transformation_node(self, view, input_node): + transform_config = view.feature_transformation + node = LocalTransformationNode("transform", transform_config, self.backend, inputs=[input_node]) self.nodes.append(node) return node - def build_validation_node(self, input_node): - node = LocalValidationNode( - "validate", self.feature_view.validation_config, self.backend - ) - node.add_input(input_node) + def build_validation_node(self, view, input_node): + validation_config = view.validation_config + node = LocalValidationNode("validate", validation_config, self.backend, inputs=[input_node]) self.nodes.append(node) return node def build_output_nodes(self, input_node): - node = LocalOutputNode("output", self.feature_view) - node.add_input(input_node) + node = LocalOutputNode("output", self.dag_root.view, inputs=[input_node]) self.nodes.append(node) + + @staticmethod + def _get_aggregate_operations(agg_specs): + agg_ops = {} + for agg in agg_specs: + if agg.time_window is not None: + raise ValueError("Time window aggregation is not supported in the local compute engine.") + alias = f"{agg.function}_{agg.column}" + agg_ops[alias] = (agg.function, agg.column) + return agg_ops diff --git a/sdk/python/feast/infra/compute_engines/local/nodes.py b/sdk/python/feast/infra/compute_engines/local/nodes.py index a8c4405dd06..85bf9266f22 100644 --- a/sdk/python/feast/infra/compute_engines/local/nodes.py +++ b/sdk/python/feast/infra/compute_engines/local/nodes.py @@ -5,7 +5,7 @@ from feast import BatchFeatureView, StreamFeatureView from feast.data_source import DataSource -from feast.infra.compute_engines.dag.context import ExecutionContext +from feast.infra.compute_engines.dag.context import ExecutionContext, ColumnInfo from feast.infra.compute_engines.local.arrow_table_value import ArrowTableValue from feast.infra.compute_engines.local.backends.base import DataFrameBackend from feast.infra.compute_engines.local.local_node import LocalNode @@ -22,18 +22,21 @@ class LocalSourceReadNode(LocalNode): def __init__( - self, - name: str, - source: DataSource, - start_time: Optional[datetime] = None, - end_time: Optional[datetime] = None, + self, + name: str, + source: DataSource, + column_info: ColumnInfo, + start_time: Optional[datetime] = None, + end_time: Optional[datetime] = None, ): super().__init__(name) self.source = source + self.column_info = column_info self.start_time = start_time self.end_time = end_time - def execute(self, context: ExecutionContext) -> ArrowTableValue: + def execute(self, + context: ExecutionContext) -> ArrowTableValue: retrieval_job = create_offline_store_retrieval_job( data_source=self.source, context=context, @@ -41,20 +44,25 @@ def execute(self, context: ExecutionContext) -> ArrowTableValue: end_time=self.end_time, ) arrow_table = retrieval_job.to_arrow() - field_mapping = context.column_info.field_mapping - if field_mapping: + if self.column_info.field_mapping: arrow_table = arrow_table.rename_columns( - [field_mapping.get(col, col) for col in arrow_table.column_names] + [self.column_info.field_mapping.get(col, col) for col in arrow_table.column_names] ) return ArrowTableValue(data=arrow_table) class LocalJoinNode(LocalNode): - def __init__(self, name: str, backend: DataFrameBackend): - super().__init__(name) + def __init__(self, + name: str, + column_info: ColumnInfo, + backend: DataFrameBackend, + inputs=None): + super().__init__(name, inputs=inputs) + self.column_info = column_info self.backend = backend - def execute(self, context: ExecutionContext) -> ArrowTableValue: + def execute(self, + context: ExecutionContext) -> ArrowTableValue: feature_table = self.get_single_table(context).data if context.entity_df is None: @@ -71,14 +79,12 @@ def execute(self, context: ExecutionContext) -> ArrowTableValue: entity_schema ) - column_info = context.column_info - entity_df = self.backend.rename_columns( entity_df, {entity_df_event_timestamp_col: ENTITY_TS_ALIAS} ) joined_df = self.backend.join( - feature_df, entity_df, on=column_info.join_keys, how="left" + feature_df, entity_df, on=self.column_info.join_keys, how="left" ) result = self.backend.to_arrow(joined_df) output = ArrowTableValue(result) @@ -87,23 +93,25 @@ def execute(self, context: ExecutionContext) -> ArrowTableValue: class LocalFilterNode(LocalNode): - def __init__( - self, - name: str, - backend: DataFrameBackend, - filter_expr: Optional[str] = None, - ttl: Optional[timedelta] = None, - ): - super().__init__(name) + def __init__(self, + name: str, + column_info: ColumnInfo, + backend: DataFrameBackend, + filter_expr: Optional[str] = None, + ttl: Optional[timedelta] = None, + inputs=None): + super().__init__(name, inputs=inputs) + self.column_info = column_info self.backend = backend self.filter_expr = filter_expr self.ttl = ttl - def execute(self, context: ExecutionContext) -> ArrowTableValue: + def execute(self, + context: ExecutionContext) -> ArrowTableValue: input_table = self.get_single_table(context).data df = self.backend.from_arrow(input_table) - timestamp_column = context.column_info.timestamp_column + timestamp_column = self.column_info.timestamp_column if ENTITY_TS_ALIAS in self.backend.columns(df): # filter where feature.ts <= entity.event_timestamp @@ -127,15 +135,19 @@ def execute(self, context: ExecutionContext) -> ArrowTableValue: class LocalAggregationNode(LocalNode): - def __init__( - self, name: str, backend: DataFrameBackend, group_keys: list[str], agg_ops: dict - ): - super().__init__(name) + def __init__(self, + name: str, + backend: DataFrameBackend, + group_keys: list[str], + agg_ops: dict, + inputs=None): + super().__init__(name, inputs=inputs) self.backend = backend self.group_keys = group_keys self.agg_ops = agg_ops - def execute(self, context: ExecutionContext) -> ArrowTableValue: + def execute(self, + context: ExecutionContext) -> ArrowTableValue: input_table = self.get_single_table(context).data df = self.backend.from_arrow(input_table) grouped_df = self.backend.groupby_agg(df, self.group_keys, self.agg_ops) @@ -146,26 +158,31 @@ def execute(self, context: ExecutionContext) -> ArrowTableValue: class LocalDedupNode(LocalNode): - def __init__(self, name: str, backend: DataFrameBackend): - super().__init__(name) + def __init__(self, + name: str, + column_info: ColumnInfo, + backend: DataFrameBackend, + inputs=None): + super().__init__(name, inputs=inputs) + self.column_info = column_info self.backend = backend - def execute(self, context: ExecutionContext) -> ArrowTableValue: + def execute(self, + context: ExecutionContext) -> ArrowTableValue: input_table = self.get_single_table(context).data df = self.backend.from_arrow(input_table) # Extract join_keys, timestamp, and created_ts from context - column_info = context.column_info # Dedup strategy: sort and drop_duplicates - dedup_keys = context.column_info.join_keys + dedup_keys = self.column_info.join_keys if dedup_keys: - sort_keys = [column_info.timestamp_column] + sort_keys = [self.column_info.timestamp_column] if ( - column_info.created_timestamp_column - and column_info.created_timestamp_column in df.columns + self.column_info.created_timestamp_column + and self.column_info.created_timestamp_column in df.columns ): - sort_keys.append(column_info.created_timestamp_column) + sort_keys.append(self.column_info.created_timestamp_column) df = self.backend.drop_duplicates( df, keys=dedup_keys, sort_by=sort_keys, ascending=False @@ -177,12 +194,17 @@ def execute(self, context: ExecutionContext) -> ArrowTableValue: class LocalTransformationNode(LocalNode): - def __init__(self, name: str, transformation_fn, backend): - super().__init__(name) + def __init__(self, + name: str, + transformation_fn, + backend: DataFrameBackend, + inputs=None): + super().__init__(name, inputs=inputs) self.transformation_fn = transformation_fn self.backend = backend - def execute(self, context: ExecutionContext) -> ArrowTableValue: + def execute(self, + context: ExecutionContext) -> ArrowTableValue: input_table = self.get_single_table(context).data df = self.backend.from_arrow(input_table) transformed_df = self.transformation_fn(df) @@ -193,12 +215,17 @@ def execute(self, context: ExecutionContext) -> ArrowTableValue: class LocalValidationNode(LocalNode): - def __init__(self, name: str, validation_config, backend): - super().__init__(name) + def __init__(self, + name: str, + validation_config, + backend: DataFrameBackend, + inputs=None): + super().__init__(name, inputs=inputs) self.validation_config = validation_config self.backend = backend - def execute(self, context: ExecutionContext) -> ArrowTableValue: + def execute(self, + context: ExecutionContext) -> ArrowTableValue: input_table = self.get_single_table(context).data df = self.backend.from_arrow(input_table) # Placeholder for actual validation logic @@ -211,13 +238,15 @@ def execute(self, context: ExecutionContext) -> ArrowTableValue: class LocalOutputNode(LocalNode): - def __init__( - self, name: str, feature_view: Union[BatchFeatureView, StreamFeatureView] - ): - super().__init__(name) + def __init__(self, + name: str, + feature_view: Union[BatchFeatureView, StreamFeatureView], + inputs=None): + super().__init__(name, inputs=inputs) self.feature_view = feature_view - def execute(self, context: ExecutionContext) -> ArrowTableValue: + def execute(self, + context: ExecutionContext) -> ArrowTableValue: input_table = self.get_single_table(context).data context.node_outputs[self.name] = input_table diff --git a/sdk/python/feast/infra/compute_engines/spark/compute.py b/sdk/python/feast/infra/compute_engines/spark/compute.py index 618a3b780f6..4753ddb9516 100644 --- a/sdk/python/feast/infra/compute_engines/spark/compute.py +++ b/sdk/python/feast/infra/compute_engines/spark/compute.py @@ -116,6 +116,7 @@ def _materialize_one( try: # โœ… 2. Construct Feature Builder and run it builder = SparkFeatureBuilder( + registry=registry, spark_session=self.spark_session, task=task, ) @@ -128,6 +129,7 @@ def _materialize_one( ) except Exception as e: + raise e # ๐Ÿ›‘ Handle failure return SparkMaterializationJob( job_id=job_id, status=MaterializationJobStatus.ERROR, error=e @@ -211,6 +213,7 @@ def get_historical_features( try: # โœ… 2. Construct Feature Builder and run it builder = SparkFeatureBuilder( + registry=registry, spark_session=self.spark_session, task=task, ) diff --git a/sdk/python/feast/infra/compute_engines/spark/feature_builder.py b/sdk/python/feast/infra/compute_engines/spark/feature_builder.py index a3059105950..d33a0b15eab 100644 --- a/sdk/python/feast/infra/compute_engines/spark/feature_builder.py +++ b/sdk/python/feast/infra/compute_engines/spark/feature_builder.py @@ -14,69 +14,65 @@ SparkTransformationNode, SparkWriteNode, ) +from feast.infra.registry.base_registry import BaseRegistry class SparkFeatureBuilder(FeatureBuilder): def __init__( self, + registry: BaseRegistry, spark_session: SparkSession, task: Union[MaterializationTask, HistoricalRetrievalTask], ): - super().__init__(task) + super().__init__(registry, task.feature_view, task) self.spark_session = spark_session - def build_source_node(self): - source = self.feature_view.batch_source + def build_source_node(self, view): start_time = self.task.start_time end_time = self.task.end_time - node = SparkReadNode("source", source, self.spark_session, start_time, end_time) + source = view.batch_source + column_info = self.get_column_info(view) + node = SparkReadNode("source", source, column_info, self.spark_session, start_time, end_time) self.nodes.append(node) return node - def build_aggregation_node(self, input_node): - agg_specs = self.feature_view.aggregations - group_by_keys = self.feature_view.entities - timestamp_col = self.feature_view.batch_source.timestamp_field - node = SparkAggregationNode("agg", agg_specs, group_by_keys, timestamp_col) - node.add_input(input_node) + def build_aggregation_node(self, view, input_node): + agg_specs = view.aggregations + group_by_keys = view.entities + timestamp_col = view.batch_source.timestamp_field + node = SparkAggregationNode("agg", agg_specs, group_by_keys, timestamp_col, inputs=[input_node]) self.nodes.append(node) return node - def build_join_node(self, input_node): - node = SparkJoinNode("join", self.spark_session) - node.add_input(input_node) + def build_join_node(self, view, input_node): + column_info = self.get_column_info(view) + node = SparkJoinNode("join", column_info, self.spark_session, inputs=[input_node]) self.nodes.append(node) return node - def build_filter_node(self, input_node): - filter_expr = None - if hasattr(self.feature_view, "filter"): - filter_expr = self.feature_view.filter - ttl = self.feature_view.ttl - node = SparkFilterNode("filter", self.spark_session, ttl, filter_expr) - node.add_input(input_node) - self.nodes.append(node) + def build_filter_node(self, view, input_node): + filter_expr = getattr(view, "filter", None) + ttl = getattr(view, "ttl", None) + column_info = self.get_column_info(view) + node = SparkFilterNode("filter", column_info, self.spark_session, ttl, filter_expr, inputs=[input_node]) return node - def build_dedup_node(self, input_node): - node = SparkDedupNode("dedup", self.spark_session) - node.add_input(input_node) + def build_dedup_node(self, view, input_node): + column_info = self.get_column_info(view) + node = SparkDedupNode("dedup", column_info, self.spark_session, inputs=[input_node]) self.nodes.append(node) return node - def build_transformation_node(self, input_node): - udf_name = self.feature_view.feature_transformation.name - udf = self.feature_view.feature_transformation.udf - node = SparkTransformationNode(udf_name, udf) - node.add_input(input_node) + def build_transformation_node(self, view, input_node): + udf_name = view.feature_transformation.name + udf = view.feature_transformation.udf + node = SparkTransformationNode(udf_name, udf, inputs=[input_node]) self.nodes.append(node) return node def build_output_nodes(self, input_node): - node = SparkWriteNode("output", self.feature_view) - node.add_input(input_node) + node = SparkWriteNode("output", self.dag_root.view, inputs=[input_node]) self.nodes.append(node) - return node - def build_validation_node(self, input_node): + def build_validation_node(self, view, input_node): pass diff --git a/sdk/python/feast/infra/compute_engines/spark/nodes.py b/sdk/python/feast/infra/compute_engines/spark/nodes.py index 1ab454daa52..0ea53017f25 100644 --- a/sdk/python/feast/infra/compute_engines/spark/nodes.py +++ b/sdk/python/feast/infra/compute_engines/spark/nodes.py @@ -8,7 +8,7 @@ from feast.aggregation import Aggregation from feast.data_source import DataSource from feast.infra.common.serde import SerializedArtifacts -from feast.infra.compute_engines.dag.context import ExecutionContext +from feast.infra.compute_engines.dag.context import ExecutionContext, ColumnInfo from feast.infra.compute_engines.dag.model import DAGFormat from feast.infra.compute_engines.dag.node import DAGNode from feast.infra.compute_engines.dag.value import DAGValue @@ -32,7 +32,8 @@ # Rename entity_df event_timestamp_col to match feature_df def rename_entity_ts_column( - spark_session: SparkSession, entity_df: DataFrame + spark_session: SparkSession, + entity_df: DataFrame ) -> DataFrame: # check if entity_ts_alias already exists if ENTITY_TS_ALIAS in entity_df.columns: @@ -53,23 +54,26 @@ def rename_entity_ts_column( class SparkReadNode(DAGNode): def __init__( - self, - name: str, - source: DataSource, - spark_session: SparkSession, - start_time: Optional[datetime] = None, - end_time: Optional[datetime] = None, + self, + name: str, + source: DataSource, + column_info: ColumnInfo, + spark_session: SparkSession, + start_time: Optional[datetime] = None, + end_time: Optional[datetime] = None, ): super().__init__(name) self.source = source + self.column_info = column_info self.spark_session = spark_session self.start_time = start_time self.end_time = end_time - def execute(self, context: ExecutionContext) -> DAGValue: - column_info = context.column_info + def execute(self, + context: ExecutionContext) -> DAGValue: retrieval_job = create_offline_store_retrieval_job( data_source=self.source, + column_info=self.column_info, context=context, start_time=self.start_time, end_time=self.end_time, @@ -84,8 +88,8 @@ def execute(self, context: ExecutionContext) -> DAGValue: format=DAGFormat.SPARK, metadata={ "source": "feature_view_batch_source", - "timestamp_field": column_info.timestamp_column, - "created_timestamp_column": column_info.created_timestamp_column, + "timestamp_field": self.column_info.timestamp_column, + "created_timestamp_column": self.column_info.created_timestamp_column, "start_date": self.start_time, "end_date": self.end_time, }, @@ -94,18 +98,20 @@ def execute(self, context: ExecutionContext) -> DAGValue: class SparkAggregationNode(DAGNode): def __init__( - self, - name: str, - aggregations: List[Aggregation], - group_by_keys: List[str], - timestamp_col: str, + self, + name: str, + aggregations: List[Aggregation], + group_by_keys: List[str], + timestamp_col: str, + inputs=None, ): - super().__init__(name) + super().__init__(name, inputs=inputs) self.aggregations = aggregations self.group_by_keys = group_by_keys self.timestamp_col = timestamp_col - def execute(self, context: ExecutionContext) -> DAGValue: + def execute(self, + context: ExecutionContext) -> DAGValue: input_value = self.get_single_input_value(context) input_value.assert_format(DAGFormat.SPARK) input_df: DataFrame = input_value.data @@ -139,24 +145,29 @@ def execute(self, context: ExecutionContext) -> DAGValue: *self.group_by_keys, ).agg(*agg_exprs) + print("[SparkAggregationNode] Output schema:", grouped.columns) + return DAGValue( data=grouped, format=DAGFormat.SPARK, metadata={"aggregated": True} ) class SparkJoinNode(DAGNode): - def __init__( - self, - name: str, - spark_session: SparkSession, - ): - super().__init__(name) + def __init__(self, + name: str, + column_info: ColumnInfo, + spark_session: SparkSession, + inputs=None): + super().__init__(name, inputs=inputs) + self.column_info = column_info self.spark_session = spark_session - def execute(self, context: ExecutionContext) -> DAGValue: + def execute(self, + context: ExecutionContext) -> DAGValue: feature_value = self.get_single_input_value(context) feature_value.assert_format(DAGFormat.SPARK) feature_df: DataFrame = feature_value.data + print("[SparkJoinNode] Input schema:", feature_df.columns) entity_df = context.entity_df if entity_df is None: @@ -166,9 +177,6 @@ def execute(self, context: ExecutionContext) -> DAGValue: metadata={"joined_on": None}, ) - # Get timestamp fields from feature view - column_info = context.column_info - # Rename entity_df event_timestamp_col to match feature_df entity_df = rename_entity_ts_column( spark_session=self.spark_session, @@ -177,35 +185,38 @@ def execute(self, context: ExecutionContext) -> DAGValue: # Perform left join on entity df # TODO: give a config option to use other join types - joined = feature_df.join(entity_df, on=column_info.join_keys, how="left") + joined = feature_df.join(entity_df, on=self.column_info.join_keys, how="left") return DAGValue( data=joined, format=DAGFormat.SPARK, - metadata={"joined_on": column_info.join_keys}, + metadata={"joined_on": self.column_info.join_keys}, ) class SparkFilterNode(DAGNode): - def __init__( - self, - name: str, - spark_session: SparkSession, - ttl: Optional[timedelta] = None, - filter_condition: Optional[str] = None, - ): - super().__init__(name) + def __init__(self, + name: str, + column_info: ColumnInfo, + spark_session: SparkSession, + ttl: Optional[timedelta] = None, + filter_condition: Optional[str] = None, + inputs=None): + super().__init__(name, inputs=inputs) + self.column_info = column_info self.spark_session = spark_session self.ttl = ttl self.filter_condition = filter_condition - def execute(self, context: ExecutionContext) -> DAGValue: + def execute(self, + context: ExecutionContext) -> DAGValue: input_value = self.get_single_input_value(context) input_value.assert_format(DAGFormat.SPARK) input_df: DataFrame = input_value.data + print("[SparkFilterNode] Input schema:", input_df.columns) # Get timestamp fields from feature view - timestamp_column = context.column_info.timestamp_column + timestamp_column = self.column_info.timestamp_column # Optional filter: feature.ts <= entity.event_timestamp filtered_df = input_df @@ -234,30 +245,29 @@ def execute(self, context: ExecutionContext) -> DAGValue: class SparkDedupNode(DAGNode): - def __init__( - self, - name: str, - spark_session: SparkSession, - ): - super().__init__(name) + def __init__(self, + name: str, + column_info: ColumnInfo, + spark_session: SparkSession, + inputs=None): + super().__init__(name, inputs=inputs) + self.column_info = column_info self.spark_session = spark_session - def execute(self, context: ExecutionContext) -> DAGValue: + def execute(self, + context: ExecutionContext) -> DAGValue: input_value = self.get_single_input_value(context) input_value.assert_format(DAGFormat.SPARK) input_df: DataFrame = input_value.data - # Get timestamp fields from feature view - colmun_info = context.column_info - # Dedup based on join keys and event timestamp column # Dedup with row_number - partition_cols = context.column_info.join_keys + partition_cols = self.column_info.join_keys deduped_df = input_df if partition_cols: - ordering = [F.col(colmun_info.timestamp_column).desc()] - if colmun_info.created_timestamp_column: - ordering.append(F.col(colmun_info.created_timestamp_column).desc()) + ordering = [F.col(self.column_info.timestamp_column).desc()] + if self.column_info.created_timestamp_column: + ordering.append(F.col(self.column_info.created_timestamp_column).desc()) window = Window.partitionBy(*partition_cols).orderBy(*ordering) deduped_df = ( @@ -274,15 +284,15 @@ def execute(self, context: ExecutionContext) -> DAGValue: class SparkWriteNode(DAGNode): - def __init__( - self, - name: str, - feature_view: Union[BatchFeatureView, StreamFeatureView], - ): - super().__init__(name) + def __init__(self, + name: str, + feature_view: Union[BatchFeatureView, StreamFeatureView], + inputs=None): + super().__init__(name, inputs=inputs) self.feature_view = feature_view - def execute(self, context: ExecutionContext) -> DAGValue: + def execute(self, + context: ExecutionContext) -> DAGValue: spark_df: DataFrame = self.get_single_input_value(context).data serialized_artifacts = SerializedArtifacts.serialize( feature_view=self.feature_view, repo_config=context.repo_config @@ -324,13 +334,18 @@ def execute(self, context: ExecutionContext) -> DAGValue: class SparkTransformationNode(DAGNode): - def __init__(self, name: str, udf): - super().__init__(name) + def __init__(self, + name: str, + udf: callable, + inputs=None): + super().__init__(name, inputs) self.udf = udf - def execute(self, context: ExecutionContext) -> DAGValue: + def execute(self, + context: ExecutionContext) -> DAGValue: input_val = self.get_single_input_value(context) input_val.assert_format(DAGFormat.SPARK) + print("[SparkTransformationNode] Input schema:", input_val.data.columns) transformed_df = self.udf(input_val.data) diff --git a/sdk/python/feast/infra/compute_engines/utils.py b/sdk/python/feast/infra/compute_engines/utils.py index 09a13a72193..6ccc78118de 100644 --- a/sdk/python/feast/infra/compute_engines/utils.py +++ b/sdk/python/feast/infra/compute_engines/utils.py @@ -2,12 +2,13 @@ from typing import Optional from feast.data_source import DataSource -from feast.infra.compute_engines.dag.context import ExecutionContext +from feast.infra.compute_engines.dag.context import ExecutionContext, ColumnInfo from feast.infra.offline_stores.offline_store import RetrievalJob def create_offline_store_retrieval_job( data_source: DataSource, + column_info: ColumnInfo, context: ExecutionContext, start_time: Optional[datetime] = None, end_time: Optional[datetime] = None, @@ -16,6 +17,7 @@ def create_offline_store_retrieval_job( Create a retrieval job for the offline store. Args: data_source: The data source to pull from. + column_info: Column information containing join keys, feature columns, and timestamps. context: start_time: end_time: @@ -24,7 +26,6 @@ def create_offline_store_retrieval_job( """ offline_store = context.offline_store - column_info = context.column_info # ๐Ÿ“ฅ Reuse Feast's robust query resolver retrieval_job = offline_store.pull_all_from_table_or_query( config=context.repo_config, diff --git a/sdk/python/feast/protos/feast/core/Aggregation_pb2.py b/sdk/python/feast/protos/feast/core/Aggregation_pb2.py index 922f8f40aa2..25b68bfc49d 100644 --- a/sdk/python/feast/protos/feast/core/Aggregation_pb2.py +++ b/sdk/python/feast/protos/feast/core/Aggregation_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/Aggregation.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -21,8 +20,9 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.Aggregation_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\020AggregationProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\020AggregationProtoZ/github.com/feast-dev/feast/go/protos/feast/core' _globals['_AGGREGATION']._serialized_start=77 _globals['_AGGREGATION']._serialized_end=223 # @@protoc_insertion_point(module_scope) diff --git a/sdk/python/feast/protos/feast/core/DataFormat_pb2.py b/sdk/python/feast/protos/feast/core/DataFormat_pb2.py index a3883dcec3b..401ebf32c49 100644 --- a/sdk/python/feast/protos/feast/core/DataFormat_pb2.py +++ b/sdk/python/feast/protos/feast/core/DataFormat_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/DataFormat.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -20,8 +19,9 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.DataFormat_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\017DataFormatProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\017DataFormatProtoZ/github.com/feast-dev/feast/go/protos/feast/core' _globals['_FILEFORMAT']._serialized_start=44 _globals['_FILEFORMAT']._serialized_end=222 _globals['_FILEFORMAT_PARQUETFORMAT']._serialized_start=182 diff --git a/sdk/python/feast/protos/feast/core/DataSource_pb2.py b/sdk/python/feast/protos/feast/core/DataSource_pb2.py index ae03c7d0c42..6d815222a41 100644 --- a/sdk/python/feast/protos/feast/core/DataSource_pb2.py +++ b/sdk/python/feast/protos/feast/core/DataSource_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/DataSource.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -25,14 +24,15 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.DataSource_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\017DataSourceProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _globals['_DATASOURCE_TAGSENTRY']._options = None - _globals['_DATASOURCE_TAGSENTRY']._serialized_options = b'8\001' - _globals['_DATASOURCE_FIELDMAPPINGENTRY']._options = None - _globals['_DATASOURCE_FIELDMAPPINGENTRY']._serialized_options = b'8\001' - _globals['_DATASOURCE_REQUESTDATAOPTIONS_DEPRECATEDSCHEMAENTRY']._options = None - _globals['_DATASOURCE_REQUESTDATAOPTIONS_DEPRECATEDSCHEMAENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\017DataSourceProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _DATASOURCE_TAGSENTRY._options = None + _DATASOURCE_TAGSENTRY._serialized_options = b'8\001' + _DATASOURCE_FIELDMAPPINGENTRY._options = None + _DATASOURCE_FIELDMAPPINGENTRY._serialized_options = b'8\001' + _DATASOURCE_REQUESTDATAOPTIONS_DEPRECATEDSCHEMAENTRY._options = None + _DATASOURCE_REQUESTDATAOPTIONS_DEPRECATEDSCHEMAENTRY._serialized_options = b'8\001' _globals['_DATASOURCE']._serialized_start=189 _globals['_DATASOURCE']._serialized_end=3107 _globals['_DATASOURCE_TAGSENTRY']._serialized_start=1436 diff --git a/sdk/python/feast/protos/feast/core/DatastoreTable_pb2.py b/sdk/python/feast/protos/feast/core/DatastoreTable_pb2.py index c5dbc3ec64a..54650c1365f 100644 --- a/sdk/python/feast/protos/feast/core/DatastoreTable_pb2.py +++ b/sdk/python/feast/protos/feast/core/DatastoreTable_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/DatastoreTable.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -21,8 +20,9 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.DatastoreTable_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\023DatastoreTableProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\023DatastoreTableProtoZ/github.com/feast-dev/feast/go/protos/feast/core' _globals['_DATASTORETABLE']._serialized_start=80 _globals['_DATASTORETABLE']._serialized_end=274 # @@protoc_insertion_point(module_scope) diff --git a/sdk/python/feast/protos/feast/core/Entity_pb2.py b/sdk/python/feast/protos/feast/core/Entity_pb2.py index 2b3e7806736..682a2b127d1 100644 --- a/sdk/python/feast/protos/feast/core/Entity_pb2.py +++ b/sdk/python/feast/protos/feast/core/Entity_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/Entity.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -22,10 +21,11 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.Entity_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\013EntityProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _globals['_ENTITYSPECV2_TAGSENTRY']._options = None - _globals['_ENTITYSPECV2_TAGSENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\013EntityProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _ENTITYSPECV2_TAGSENTRY._options = None + _ENTITYSPECV2_TAGSENTRY._serialized_options = b'8\001' _globals['_ENTITY']._serialized_start=97 _globals['_ENTITY']._serialized_end=183 _globals['_ENTITYSPECV2']._serialized_start=186 diff --git a/sdk/python/feast/protos/feast/core/FeatureService_pb2.py b/sdk/python/feast/protos/feast/core/FeatureService_pb2.py index 7ef36079691..7ea598df9ad 100644 --- a/sdk/python/feast/protos/feast/core/FeatureService_pb2.py +++ b/sdk/python/feast/protos/feast/core/FeatureService_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/FeatureService.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -22,12 +21,13 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.FeatureService_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\023FeatureServiceProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _globals['_FEATURESERVICESPEC_TAGSENTRY']._options = None - _globals['_FEATURESERVICESPEC_TAGSENTRY']._serialized_options = b'8\001' - _globals['_LOGGINGCONFIG_CUSTOMDESTINATION_CONFIGENTRY']._options = None - _globals['_LOGGINGCONFIG_CUSTOMDESTINATION_CONFIGENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\023FeatureServiceProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _FEATURESERVICESPEC_TAGSENTRY._options = None + _FEATURESERVICESPEC_TAGSENTRY._serialized_options = b'8\001' + _LOGGINGCONFIG_CUSTOMDESTINATION_CONFIGENTRY._options = None + _LOGGINGCONFIG_CUSTOMDESTINATION_CONFIGENTRY._serialized_options = b'8\001' _globals['_FEATURESERVICE']._serialized_start=120 _globals['_FEATURESERVICE']._serialized_end=228 _globals['_FEATURESERVICESPEC']._serialized_start=231 diff --git a/sdk/python/feast/protos/feast/core/FeatureTable_pb2.py b/sdk/python/feast/protos/feast/core/FeatureTable_pb2.py index 713e72b5d33..c1539d767b6 100644 --- a/sdk/python/feast/protos/feast/core/FeatureTable_pb2.py +++ b/sdk/python/feast/protos/feast/core/FeatureTable_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/FeatureTable.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -24,10 +23,11 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.FeatureTable_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\021FeatureTableProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _globals['_FEATURETABLESPEC_LABELSENTRY']._options = None - _globals['_FEATURETABLESPEC_LABELSENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\021FeatureTableProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _FEATURETABLESPEC_LABELSENTRY._options = None + _FEATURETABLESPEC_LABELSENTRY._serialized_options = b'8\001' _globals['_FEATURETABLE']._serialized_start=165 _globals['_FEATURETABLE']._serialized_end=267 _globals['_FEATURETABLESPEC']._serialized_start=270 diff --git a/sdk/python/feast/protos/feast/core/FeatureViewProjection_pb2.py b/sdk/python/feast/protos/feast/core/FeatureViewProjection_pb2.py index b47d4fe392f..85c7a141a3a 100644 --- a/sdk/python/feast/protos/feast/core/FeatureViewProjection_pb2.py +++ b/sdk/python/feast/protos/feast/core/FeatureViewProjection_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/FeatureViewProjection.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -22,10 +21,11 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.FeatureViewProjection_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\025FeatureReferenceProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _globals['_FEATUREVIEWPROJECTION_JOINKEYMAPENTRY']._options = None - _globals['_FEATUREVIEWPROJECTION_JOINKEYMAPENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\025FeatureReferenceProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _FEATUREVIEWPROJECTION_JOINKEYMAPENTRY._options = None + _FEATUREVIEWPROJECTION_JOINKEYMAPENTRY._serialized_options = b'8\001' _globals['_FEATUREVIEWPROJECTION']._serialized_start=110 _globals['_FEATUREVIEWPROJECTION']._serialized_end=552 _globals['_FEATUREVIEWPROJECTION_JOINKEYMAPENTRY']._serialized_start=503 diff --git a/sdk/python/feast/protos/feast/core/FeatureView_pb2.py b/sdk/python/feast/protos/feast/core/FeatureView_pb2.py index d1456cf9faf..71096b00d2b 100644 --- a/sdk/python/feast/protos/feast/core/FeatureView_pb2.py +++ b/sdk/python/feast/protos/feast/core/FeatureView_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/FeatureView.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -18,26 +17,27 @@ from feast.protos.feast.core import Feature_pb2 as feast_dot_core_dot_Feature__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1c\x66\x65\x61st/core/FeatureView.proto\x12\nfeast.core\x1a\x1egoogle/protobuf/duration.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1b\x66\x65\x61st/core/DataSource.proto\x1a\x18\x66\x65\x61st/core/Feature.proto\"c\n\x0b\x46\x65\x61tureView\x12)\n\x04spec\x18\x01 \x01(\x0b\x32\x1b.feast.core.FeatureViewSpec\x12)\n\x04meta\x18\x02 \x01(\x0b\x32\x1b.feast.core.FeatureViewMeta\"\xce\x03\n\x0f\x46\x65\x61tureViewSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07project\x18\x02 \x01(\t\x12\x10\n\x08\x65ntities\x18\x03 \x03(\t\x12+\n\x08\x66\x65\x61tures\x18\x04 \x03(\x0b\x32\x19.feast.core.FeatureSpecV2\x12\x31\n\x0e\x65ntity_columns\x18\x0c \x03(\x0b\x32\x19.feast.core.FeatureSpecV2\x12\x13\n\x0b\x64\x65scription\x18\n \x01(\t\x12\x33\n\x04tags\x18\x05 \x03(\x0b\x32%.feast.core.FeatureViewSpec.TagsEntry\x12\r\n\x05owner\x18\x0b \x01(\t\x12&\n\x03ttl\x18\x06 \x01(\x0b\x32\x19.google.protobuf.Duration\x12,\n\x0c\x62\x61tch_source\x18\x07 \x01(\x0b\x32\x16.feast.core.DataSource\x12-\n\rstream_source\x18\t \x01(\x0b\x32\x16.feast.core.DataSource\x12\x0e\n\x06online\x18\x08 \x01(\x08\x12\x0f\n\x07offline\x18\r \x01(\x08\x1a+\n\tTagsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\xcc\x01\n\x0f\x46\x65\x61tureViewMeta\x12\x35\n\x11\x63reated_timestamp\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12:\n\x16last_updated_timestamp\x18\x02 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x46\n\x19materialization_intervals\x18\x03 \x03(\x0b\x32#.feast.core.MaterializationInterval\"w\n\x17MaterializationInterval\x12.\n\nstart_time\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12,\n\x08\x65nd_time\x18\x02 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\"@\n\x0f\x46\x65\x61tureViewList\x12-\n\x0c\x66\x65\x61tureviews\x18\x01 \x03(\x0b\x32\x17.feast.core.FeatureViewBU\n\x10\x66\x65\x61st.proto.coreB\x10\x46\x65\x61tureViewProtoZ/github.com/feast-dev/feast/go/protos/feast/coreb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1c\x66\x65\x61st/core/FeatureView.proto\x12\nfeast.core\x1a\x1egoogle/protobuf/duration.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1b\x66\x65\x61st/core/DataSource.proto\x1a\x18\x66\x65\x61st/core/Feature.proto\"c\n\x0b\x46\x65\x61tureView\x12)\n\x04spec\x18\x01 \x01(\x0b\x32\x1b.feast.core.FeatureViewSpec\x12)\n\x04meta\x18\x02 \x01(\x0b\x32\x1b.feast.core.FeatureViewMeta\"\x80\x04\n\x0f\x46\x65\x61tureViewSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07project\x18\x02 \x01(\t\x12\x10\n\x08\x65ntities\x18\x03 \x03(\t\x12+\n\x08\x66\x65\x61tures\x18\x04 \x03(\x0b\x32\x19.feast.core.FeatureSpecV2\x12\x31\n\x0e\x65ntity_columns\x18\x0c \x03(\x0b\x32\x19.feast.core.FeatureSpecV2\x12\x13\n\x0b\x64\x65scription\x18\n \x01(\t\x12\x33\n\x04tags\x18\x05 \x03(\x0b\x32%.feast.core.FeatureViewSpec.TagsEntry\x12\r\n\x05owner\x18\x0b \x01(\t\x12&\n\x03ttl\x18\x06 \x01(\x0b\x32\x19.google.protobuf.Duration\x12,\n\x0c\x62\x61tch_source\x18\x07 \x01(\x0b\x32\x16.feast.core.DataSource\x12-\n\rstream_source\x18\t \x01(\x0b\x32\x16.feast.core.DataSource\x12\x0e\n\x06online\x18\x08 \x01(\x08\x12\x0f\n\x07offline\x18\r \x01(\x08\x12\x30\n\x0bsource_view\x18\x0e \x01(\x0b\x32\x1b.feast.core.FeatureViewSpec\x1a+\n\tTagsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\xcc\x01\n\x0f\x46\x65\x61tureViewMeta\x12\x35\n\x11\x63reated_timestamp\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12:\n\x16last_updated_timestamp\x18\x02 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x46\n\x19materialization_intervals\x18\x03 \x03(\x0b\x32#.feast.core.MaterializationInterval\"w\n\x17MaterializationInterval\x12.\n\nstart_time\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12,\n\x08\x65nd_time\x18\x02 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\"@\n\x0f\x46\x65\x61tureViewList\x12-\n\x0c\x66\x65\x61tureviews\x18\x01 \x03(\x0b\x32\x17.feast.core.FeatureViewBU\n\x10\x66\x65\x61st.proto.coreB\x10\x46\x65\x61tureViewProtoZ/github.com/feast-dev/feast/go/protos/feast/coreb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.FeatureView_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\020FeatureViewProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _globals['_FEATUREVIEWSPEC_TAGSENTRY']._options = None - _globals['_FEATUREVIEWSPEC_TAGSENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\020FeatureViewProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _FEATUREVIEWSPEC_TAGSENTRY._options = None + _FEATUREVIEWSPEC_TAGSENTRY._serialized_options = b'8\001' _globals['_FEATUREVIEW']._serialized_start=164 _globals['_FEATUREVIEW']._serialized_end=263 _globals['_FEATUREVIEWSPEC']._serialized_start=266 - _globals['_FEATUREVIEWSPEC']._serialized_end=728 - _globals['_FEATUREVIEWSPEC_TAGSENTRY']._serialized_start=685 - _globals['_FEATUREVIEWSPEC_TAGSENTRY']._serialized_end=728 - _globals['_FEATUREVIEWMETA']._serialized_start=731 - _globals['_FEATUREVIEWMETA']._serialized_end=935 - _globals['_MATERIALIZATIONINTERVAL']._serialized_start=937 - _globals['_MATERIALIZATIONINTERVAL']._serialized_end=1056 - _globals['_FEATUREVIEWLIST']._serialized_start=1058 - _globals['_FEATUREVIEWLIST']._serialized_end=1122 + _globals['_FEATUREVIEWSPEC']._serialized_end=778 + _globals['_FEATUREVIEWSPEC_TAGSENTRY']._serialized_start=735 + _globals['_FEATUREVIEWSPEC_TAGSENTRY']._serialized_end=778 + _globals['_FEATUREVIEWMETA']._serialized_start=781 + _globals['_FEATUREVIEWMETA']._serialized_end=985 + _globals['_MATERIALIZATIONINTERVAL']._serialized_start=987 + _globals['_MATERIALIZATIONINTERVAL']._serialized_end=1106 + _globals['_FEATUREVIEWLIST']._serialized_start=1108 + _globals['_FEATUREVIEWLIST']._serialized_end=1172 # @@protoc_insertion_point(module_scope) diff --git a/sdk/python/feast/protos/feast/core/FeatureView_pb2.pyi b/sdk/python/feast/protos/feast/core/FeatureView_pb2.pyi index 6abeb85e263..fac7abdbbfe 100644 --- a/sdk/python/feast/protos/feast/core/FeatureView_pb2.pyi +++ b/sdk/python/feast/protos/feast/core/FeatureView_pb2.pyi @@ -91,6 +91,7 @@ class FeatureViewSpec(google.protobuf.message.Message): STREAM_SOURCE_FIELD_NUMBER: builtins.int ONLINE_FIELD_NUMBER: builtins.int OFFLINE_FIELD_NUMBER: builtins.int + SOURCE_VIEW_FIELD_NUMBER: builtins.int name: builtins.str """Name of the feature view. Must be unique. Not updated.""" project: builtins.str @@ -130,6 +131,8 @@ class FeatureViewSpec(google.protobuf.message.Message): """ offline: builtins.bool """Whether these features should be written to the offline store""" + @property + def source_view(self) -> global___FeatureViewSpec: ... def __init__( self, *, @@ -146,9 +149,10 @@ class FeatureViewSpec(google.protobuf.message.Message): stream_source: feast.core.DataSource_pb2.DataSource | None = ..., online: builtins.bool = ..., offline: builtins.bool = ..., + source_view: global___FeatureViewSpec | None = ..., ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["batch_source", b"batch_source", "stream_source", b"stream_source", "ttl", b"ttl"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["batch_source", b"batch_source", "description", b"description", "entities", b"entities", "entity_columns", b"entity_columns", "features", b"features", "name", b"name", "offline", b"offline", "online", b"online", "owner", b"owner", "project", b"project", "stream_source", b"stream_source", "tags", b"tags", "ttl", b"ttl"]) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["batch_source", b"batch_source", "source_view", b"source_view", "stream_source", b"stream_source", "ttl", b"ttl"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["batch_source", b"batch_source", "description", b"description", "entities", b"entities", "entity_columns", b"entity_columns", "features", b"features", "name", b"name", "offline", b"offline", "online", b"online", "owner", b"owner", "project", b"project", "source_view", b"source_view", "stream_source", b"stream_source", "tags", b"tags", "ttl", b"ttl"]) -> None: ... global___FeatureViewSpec = FeatureViewSpec diff --git a/sdk/python/feast/protos/feast/core/Feature_pb2.py b/sdk/python/feast/protos/feast/core/Feature_pb2.py index a02bb7ff403..19634f926ec 100644 --- a/sdk/python/feast/protos/feast/core/Feature_pb2.py +++ b/sdk/python/feast/protos/feast/core/Feature_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/Feature.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -21,10 +20,11 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.Feature_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\014FeatureProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _globals['_FEATURESPECV2_TAGSENTRY']._options = None - _globals['_FEATURESPECV2_TAGSENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\014FeatureProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _FEATURESPECV2_TAGSENTRY._options = None + _FEATURESPECV2_TAGSENTRY._serialized_options = b'8\001' _globals['_FEATURESPECV2']._serialized_start=66 _globals['_FEATURESPECV2']._serialized_end=336 _globals['_FEATURESPECV2_TAGSENTRY']._serialized_start=293 diff --git a/sdk/python/feast/protos/feast/core/InfraObject_pb2.py b/sdk/python/feast/protos/feast/core/InfraObject_pb2.py index aeea27f2e00..3ce634f304c 100644 --- a/sdk/python/feast/protos/feast/core/InfraObject_pb2.py +++ b/sdk/python/feast/protos/feast/core/InfraObject_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/InfraObject.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -22,8 +21,9 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.InfraObject_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\020InfraObjectProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\020InfraObjectProtoZ/github.com/feast-dev/feast/go/protos/feast/core' _globals['_INFRA']._serialized_start=107 _globals['_INFRA']._serialized_end=162 _globals['_INFRAOBJECT']._serialized_start=165 diff --git a/sdk/python/feast/protos/feast/core/OnDemandFeatureView_pb2.py b/sdk/python/feast/protos/feast/core/OnDemandFeatureView_pb2.py index 926b54df288..5d49136be01 100644 --- a/sdk/python/feast/protos/feast/core/OnDemandFeatureView_pb2.py +++ b/sdk/python/feast/protos/feast/core/OnDemandFeatureView_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/OnDemandFeatureView.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -26,16 +25,17 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.OnDemandFeatureView_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\030OnDemandFeatureViewProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _globals['_ONDEMANDFEATUREVIEWSPEC_SOURCESENTRY']._options = None - _globals['_ONDEMANDFEATUREVIEWSPEC_SOURCESENTRY']._serialized_options = b'8\001' - _globals['_ONDEMANDFEATUREVIEWSPEC_TAGSENTRY']._options = None - _globals['_ONDEMANDFEATUREVIEWSPEC_TAGSENTRY']._serialized_options = b'8\001' - _globals['_ONDEMANDFEATUREVIEWSPEC'].fields_by_name['user_defined_function']._options = None - _globals['_ONDEMANDFEATUREVIEWSPEC'].fields_by_name['user_defined_function']._serialized_options = b'\030\001' - _globals['_USERDEFINEDFUNCTION']._options = None - _globals['_USERDEFINEDFUNCTION']._serialized_options = b'\030\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\030OnDemandFeatureViewProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _ONDEMANDFEATUREVIEWSPEC_SOURCESENTRY._options = None + _ONDEMANDFEATUREVIEWSPEC_SOURCESENTRY._serialized_options = b'8\001' + _ONDEMANDFEATUREVIEWSPEC_TAGSENTRY._options = None + _ONDEMANDFEATUREVIEWSPEC_TAGSENTRY._serialized_options = b'8\001' + _ONDEMANDFEATUREVIEWSPEC.fields_by_name['user_defined_function']._options = None + _ONDEMANDFEATUREVIEWSPEC.fields_by_name['user_defined_function']._serialized_options = b'\030\001' + _USERDEFINEDFUNCTION._options = None + _USERDEFINEDFUNCTION._serialized_options = b'\030\001' _globals['_ONDEMANDFEATUREVIEW']._serialized_start=243 _globals['_ONDEMANDFEATUREVIEW']._serialized_end=366 _globals['_ONDEMANDFEATUREVIEWSPEC']._serialized_start=369 diff --git a/sdk/python/feast/protos/feast/core/Permission_pb2.py b/sdk/python/feast/protos/feast/core/Permission_pb2.py index 706fd2eec47..19951e6b227 100644 --- a/sdk/python/feast/protos/feast/core/Permission_pb2.py +++ b/sdk/python/feast/protos/feast/core/Permission_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/Permission.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -22,12 +21,13 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.Permission_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\017PermissionProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _globals['_PERMISSIONSPEC_REQUIREDTAGSENTRY']._options = None - _globals['_PERMISSIONSPEC_REQUIREDTAGSENTRY']._serialized_options = b'8\001' - _globals['_PERMISSIONSPEC_TAGSENTRY']._options = None - _globals['_PERMISSIONSPEC_TAGSENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\017PermissionProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _PERMISSIONSPEC_REQUIREDTAGSENTRY._options = None + _PERMISSIONSPEC_REQUIREDTAGSENTRY._serialized_options = b'8\001' + _PERMISSIONSPEC_TAGSENTRY._options = None + _PERMISSIONSPEC_TAGSENTRY._serialized_options = b'8\001' _globals['_PERMISSION']._serialized_start=101 _globals['_PERMISSION']._serialized_end=197 _globals['_PERMISSIONSPEC']._serialized_start=200 diff --git a/sdk/python/feast/protos/feast/core/Policy_pb2.py b/sdk/python/feast/protos/feast/core/Policy_pb2.py index 2fac866115c..bea067c8be7 100644 --- a/sdk/python/feast/protos/feast/core/Policy_pb2.py +++ b/sdk/python/feast/protos/feast/core/Policy_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/Policy.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -20,8 +19,9 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.Policy_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\013PolicyProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\013PolicyProtoZ/github.com/feast-dev/feast/go/protos/feast/core' _globals['_POLICY']._serialized_start=39 _globals['_POLICY']._serialized_end=151 _globals['_ROLEBASEDPOLICY']._serialized_start=153 diff --git a/sdk/python/feast/protos/feast/core/Project_pb2.py b/sdk/python/feast/protos/feast/core/Project_pb2.py index cfbf1220143..189af6f00b1 100644 --- a/sdk/python/feast/protos/feast/core/Project_pb2.py +++ b/sdk/python/feast/protos/feast/core/Project_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/Project.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -21,10 +20,11 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.Project_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\014ProjectProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _globals['_PROJECTSPEC_TAGSENTRY']._options = None - _globals['_PROJECTSPEC_TAGSENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\014ProjectProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _PROJECTSPEC_TAGSENTRY._options = None + _PROJECTSPEC_TAGSENTRY._serialized_options = b'8\001' _globals['_PROJECT']._serialized_start=73 _globals['_PROJECT']._serialized_end=160 _globals['_PROJECTSPEC']._serialized_start=163 diff --git a/sdk/python/feast/protos/feast/core/Registry_pb2.py b/sdk/python/feast/protos/feast/core/Registry_pb2.py index 671958d80c7..82aa75325f2 100644 --- a/sdk/python/feast/protos/feast/core/Registry_pb2.py +++ b/sdk/python/feast/protos/feast/core/Registry_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/Registry.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -33,10 +32,11 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.Registry_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\rRegistryProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _globals['_REGISTRY'].fields_by_name['project_metadata']._options = None - _globals['_REGISTRY'].fields_by_name['project_metadata']._serialized_options = b'\030\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\rRegistryProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _REGISTRY.fields_by_name['project_metadata']._options = None + _REGISTRY.fields_by_name['project_metadata']._serialized_options = b'\030\001' _globals['_REGISTRY']._serialized_start=449 _globals['_REGISTRY']._serialized_end=1216 _globals['_PROJECTMETADATA']._serialized_start=1218 diff --git a/sdk/python/feast/protos/feast/core/SavedDataset_pb2.py b/sdk/python/feast/protos/feast/core/SavedDataset_pb2.py index fe1e2d49eac..535d2f5772f 100644 --- a/sdk/python/feast/protos/feast/core/SavedDataset_pb2.py +++ b/sdk/python/feast/protos/feast/core/SavedDataset_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/SavedDataset.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -22,10 +21,11 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.SavedDataset_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\021SavedDatasetProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _globals['_SAVEDDATASETSPEC_TAGSENTRY']._options = None - _globals['_SAVEDDATASETSPEC_TAGSENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\021SavedDatasetProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _SAVEDDATASETSPEC_TAGSENTRY._options = None + _SAVEDDATASETSPEC_TAGSENTRY._serialized_options = b'8\001' _globals['_SAVEDDATASETSPEC']._serialized_start=108 _globals['_SAVEDDATASETSPEC']._serialized_end=401 _globals['_SAVEDDATASETSPEC_TAGSENTRY']._serialized_start=358 diff --git a/sdk/python/feast/protos/feast/core/SqliteTable_pb2.py b/sdk/python/feast/protos/feast/core/SqliteTable_pb2.py index 8cc14781c72..a61f866b514 100644 --- a/sdk/python/feast/protos/feast/core/SqliteTable_pb2.py +++ b/sdk/python/feast/protos/feast/core/SqliteTable_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/SqliteTable.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -20,8 +19,9 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.SqliteTable_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\020SqliteTableProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\020SqliteTableProtoZ/github.com/feast-dev/feast/go/protos/feast/core' _globals['_SQLITETABLE']._serialized_start=44 _globals['_SQLITETABLE']._serialized_end=85 # @@protoc_insertion_point(module_scope) diff --git a/sdk/python/feast/protos/feast/core/Store_pb2.py b/sdk/python/feast/protos/feast/core/Store_pb2.py index 7d24e11947f..4169921bc23 100644 --- a/sdk/python/feast/protos/feast/core/Store_pb2.py +++ b/sdk/python/feast/protos/feast/core/Store_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/Store.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -20,8 +19,9 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.Store_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\nStoreProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\nStoreProtoZ/github.com/feast-dev/feast/go/protos/feast/core' _globals['_STORE']._serialized_start=39 _globals['_STORE']._serialized_end=932 _globals['_STORE_REDISCONFIG']._serialized_start=286 diff --git a/sdk/python/feast/protos/feast/core/StreamFeatureView_pb2.py b/sdk/python/feast/protos/feast/core/StreamFeatureView_pb2.py index ba19088edd6..f79ae56c700 100644 --- a/sdk/python/feast/protos/feast/core/StreamFeatureView_pb2.py +++ b/sdk/python/feast/protos/feast/core/StreamFeatureView_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/StreamFeatureView.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -27,12 +26,13 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.StreamFeatureView_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\026StreamFeatureViewProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _globals['_STREAMFEATUREVIEWSPEC_TAGSENTRY']._options = None - _globals['_STREAMFEATUREVIEWSPEC_TAGSENTRY']._serialized_options = b'8\001' - _globals['_STREAMFEATUREVIEWSPEC'].fields_by_name['user_defined_function']._options = None - _globals['_STREAMFEATUREVIEWSPEC'].fields_by_name['user_defined_function']._serialized_options = b'\030\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\026StreamFeatureViewProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _STREAMFEATUREVIEWSPEC_TAGSENTRY._options = None + _STREAMFEATUREVIEWSPEC_TAGSENTRY._serialized_options = b'8\001' + _STREAMFEATUREVIEWSPEC.fields_by_name['user_defined_function']._options = None + _STREAMFEATUREVIEWSPEC.fields_by_name['user_defined_function']._serialized_options = b'\030\001' _globals['_STREAMFEATUREVIEW']._serialized_start=268 _globals['_STREAMFEATUREVIEW']._serialized_end=379 _globals['_STREAMFEATUREVIEWSPEC']._serialized_start=382 diff --git a/sdk/python/feast/protos/feast/core/Transformation_pb2.py b/sdk/python/feast/protos/feast/core/Transformation_pb2.py index 9fd11d3026b..d02127a3207 100644 --- a/sdk/python/feast/protos/feast/core/Transformation_pb2.py +++ b/sdk/python/feast/protos/feast/core/Transformation_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/Transformation.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -20,8 +19,9 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.Transformation_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\032FeatureTransformationProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\032FeatureTransformationProtoZ/github.com/feast-dev/feast/go/protos/feast/core' _globals['_USERDEFINEDFUNCTIONV2']._serialized_start=47 _globals['_USERDEFINEDFUNCTIONV2']._serialized_end=117 _globals['_FEATURETRANSFORMATIONV2']._serialized_start=120 diff --git a/sdk/python/feast/protos/feast/core/ValidationProfile_pb2.py b/sdk/python/feast/protos/feast/core/ValidationProfile_pb2.py index 0fb27ceab16..a82d7a4f0b7 100644 --- a/sdk/python/feast/protos/feast/core/ValidationProfile_pb2.py +++ b/sdk/python/feast/protos/feast/core/ValidationProfile_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/ValidationProfile.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -20,10 +19,11 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.ValidationProfile_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\021ValidationProfileZ/github.com/feast-dev/feast/go/protos/feast/core' - _globals['_VALIDATIONREFERENCE_TAGSENTRY']._options = None - _globals['_VALIDATIONREFERENCE_TAGSENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\021ValidationProfileZ/github.com/feast-dev/feast/go/protos/feast/core' + _VALIDATIONREFERENCE_TAGSENTRY._options = None + _VALIDATIONREFERENCE_TAGSENTRY._serialized_options = b'8\001' _globals['_GEVALIDATIONPROFILER']._serialized_start=51 _globals['_GEVALIDATIONPROFILER']._serialized_end=182 _globals['_GEVALIDATIONPROFILER_USERDEFINEDPROFILER']._serialized_start=147 diff --git a/sdk/python/feast/protos/feast/registry/RegistryServer_pb2.py b/sdk/python/feast/protos/feast/registry/RegistryServer_pb2.py index 2d5f7b020ab..8ccff12f791 100644 --- a/sdk/python/feast/protos/feast/registry/RegistryServer_pb2.py +++ b/sdk/python/feast/protos/feast/registry/RegistryServer_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/registry/RegistryServer.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -34,30 +33,31 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.registry.RegistryServer_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'Z3github.com/feast-dev/feast/go/protos/feast/registry' - _globals['_LISTENTITIESREQUEST_TAGSENTRY']._options = None - _globals['_LISTENTITIESREQUEST_TAGSENTRY']._serialized_options = b'8\001' - _globals['_LISTDATASOURCESREQUEST_TAGSENTRY']._options = None - _globals['_LISTDATASOURCESREQUEST_TAGSENTRY']._serialized_options = b'8\001' - _globals['_LISTFEATUREVIEWSREQUEST_TAGSENTRY']._options = None - _globals['_LISTFEATUREVIEWSREQUEST_TAGSENTRY']._serialized_options = b'8\001' - _globals['_LISTALLFEATUREVIEWSREQUEST_TAGSENTRY']._options = None - _globals['_LISTALLFEATUREVIEWSREQUEST_TAGSENTRY']._serialized_options = b'8\001' - _globals['_LISTSTREAMFEATUREVIEWSREQUEST_TAGSENTRY']._options = None - _globals['_LISTSTREAMFEATUREVIEWSREQUEST_TAGSENTRY']._serialized_options = b'8\001' - _globals['_LISTONDEMANDFEATUREVIEWSREQUEST_TAGSENTRY']._options = None - _globals['_LISTONDEMANDFEATUREVIEWSREQUEST_TAGSENTRY']._serialized_options = b'8\001' - _globals['_LISTFEATURESERVICESREQUEST_TAGSENTRY']._options = None - _globals['_LISTFEATURESERVICESREQUEST_TAGSENTRY']._serialized_options = b'8\001' - _globals['_LISTSAVEDDATASETSREQUEST_TAGSENTRY']._options = None - _globals['_LISTSAVEDDATASETSREQUEST_TAGSENTRY']._serialized_options = b'8\001' - _globals['_LISTVALIDATIONREFERENCESREQUEST_TAGSENTRY']._options = None - _globals['_LISTVALIDATIONREFERENCESREQUEST_TAGSENTRY']._serialized_options = b'8\001' - _globals['_LISTPERMISSIONSREQUEST_TAGSENTRY']._options = None - _globals['_LISTPERMISSIONSREQUEST_TAGSENTRY']._serialized_options = b'8\001' - _globals['_LISTPROJECTSREQUEST_TAGSENTRY']._options = None - _globals['_LISTPROJECTSREQUEST_TAGSENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'Z3github.com/feast-dev/feast/go/protos/feast/registry' + _LISTENTITIESREQUEST_TAGSENTRY._options = None + _LISTENTITIESREQUEST_TAGSENTRY._serialized_options = b'8\001' + _LISTDATASOURCESREQUEST_TAGSENTRY._options = None + _LISTDATASOURCESREQUEST_TAGSENTRY._serialized_options = b'8\001' + _LISTFEATUREVIEWSREQUEST_TAGSENTRY._options = None + _LISTFEATUREVIEWSREQUEST_TAGSENTRY._serialized_options = b'8\001' + _LISTALLFEATUREVIEWSREQUEST_TAGSENTRY._options = None + _LISTALLFEATUREVIEWSREQUEST_TAGSENTRY._serialized_options = b'8\001' + _LISTSTREAMFEATUREVIEWSREQUEST_TAGSENTRY._options = None + _LISTSTREAMFEATUREVIEWSREQUEST_TAGSENTRY._serialized_options = b'8\001' + _LISTONDEMANDFEATUREVIEWSREQUEST_TAGSENTRY._options = None + _LISTONDEMANDFEATUREVIEWSREQUEST_TAGSENTRY._serialized_options = b'8\001' + _LISTFEATURESERVICESREQUEST_TAGSENTRY._options = None + _LISTFEATURESERVICESREQUEST_TAGSENTRY._serialized_options = b'8\001' + _LISTSAVEDDATASETSREQUEST_TAGSENTRY._options = None + _LISTSAVEDDATASETSREQUEST_TAGSENTRY._serialized_options = b'8\001' + _LISTVALIDATIONREFERENCESREQUEST_TAGSENTRY._options = None + _LISTVALIDATIONREFERENCESREQUEST_TAGSENTRY._serialized_options = b'8\001' + _LISTPERMISSIONSREQUEST_TAGSENTRY._options = None + _LISTPERMISSIONSREQUEST_TAGSENTRY._serialized_options = b'8\001' + _LISTPROJECTSREQUEST_TAGSENTRY._options = None + _LISTPROJECTSREQUEST_TAGSENTRY._serialized_options = b'8\001' _globals['_REFRESHREQUEST']._serialized_start=487 _globals['_REFRESHREQUEST']._serialized_end=520 _globals['_UPDATEINFRAREQUEST']._serialized_start=522 diff --git a/sdk/python/feast/protos/feast/serving/Connector_pb2.py b/sdk/python/feast/protos/feast/serving/Connector_pb2.py index b38471dea8d..8b5516eabaa 100644 --- a/sdk/python/feast/protos/feast/serving/Connector_pb2.py +++ b/sdk/python/feast/protos/feast/serving/Connector_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/serving/Connector.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -24,8 +23,9 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.serving.Connector_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'Z2github.com/feast-dev/feast/go/protos/feast/serving' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'Z2github.com/feast-dev/feast/go/protos/feast/serving' _globals['_CONNECTORFEATURE']._serialized_start=173 _globals['_CONNECTORFEATURE']._serialized_end=327 _globals['_CONNECTORFEATURELIST']._serialized_start=329 diff --git a/sdk/python/feast/protos/feast/serving/GrpcServer_pb2.py b/sdk/python/feast/protos/feast/serving/GrpcServer_pb2.py index ce4db37a658..c03681f2dc8 100644 --- a/sdk/python/feast/protos/feast/serving/GrpcServer_pb2.py +++ b/sdk/python/feast/protos/feast/serving/GrpcServer_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/serving/GrpcServer.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -21,12 +20,13 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.serving.GrpcServer_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'Z2github.com/feast-dev/feast/go/protos/feast/serving' - _globals['_PUSHREQUEST_FEATURESENTRY']._options = None - _globals['_PUSHREQUEST_FEATURESENTRY']._serialized_options = b'8\001' - _globals['_WRITETOONLINESTOREREQUEST_FEATURESENTRY']._options = None - _globals['_WRITETOONLINESTOREREQUEST_FEATURESENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'Z2github.com/feast-dev/feast/go/protos/feast/serving' + _PUSHREQUEST_FEATURESENTRY._options = None + _PUSHREQUEST_FEATURESENTRY._serialized_options = b'8\001' + _WRITETOONLINESTOREREQUEST_FEATURESENTRY._options = None + _WRITETOONLINESTOREREQUEST_FEATURESENTRY._serialized_options = b'8\001' _globals['_PUSHREQUEST']._serialized_start=71 _globals['_PUSHREQUEST']._serialized_end=250 _globals['_PUSHREQUEST_FEATURESENTRY']._serialized_start=203 diff --git a/sdk/python/feast/protos/feast/serving/ServingService_pb2.py b/sdk/python/feast/protos/feast/serving/ServingService_pb2.py index fa866640577..1ad4f29c68d 100644 --- a/sdk/python/feast/protos/feast/serving/ServingService_pb2.py +++ b/sdk/python/feast/protos/feast/serving/ServingService_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/serving/ServingService.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -22,14 +21,15 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.serving.ServingService_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\023feast.proto.servingB\017ServingAPIProtoZ2github.com/feast-dev/feast/go/protos/feast/serving' - _globals['_GETONLINEFEATURESREQUESTV2_ENTITYROW_FIELDSENTRY']._options = None - _globals['_GETONLINEFEATURESREQUESTV2_ENTITYROW_FIELDSENTRY']._serialized_options = b'8\001' - _globals['_GETONLINEFEATURESREQUEST_ENTITIESENTRY']._options = None - _globals['_GETONLINEFEATURESREQUEST_ENTITIESENTRY']._serialized_options = b'8\001' - _globals['_GETONLINEFEATURESREQUEST_REQUESTCONTEXTENTRY']._options = None - _globals['_GETONLINEFEATURESREQUEST_REQUESTCONTEXTENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\023feast.proto.servingB\017ServingAPIProtoZ2github.com/feast-dev/feast/go/protos/feast/serving' + _GETONLINEFEATURESREQUESTV2_ENTITYROW_FIELDSENTRY._options = None + _GETONLINEFEATURESREQUESTV2_ENTITYROW_FIELDSENTRY._serialized_options = b'8\001' + _GETONLINEFEATURESREQUEST_ENTITIESENTRY._options = None + _GETONLINEFEATURESREQUEST_ENTITIESENTRY._serialized_options = b'8\001' + _GETONLINEFEATURESREQUEST_REQUESTCONTEXTENTRY._options = None + _GETONLINEFEATURESREQUEST_REQUESTCONTEXTENTRY._serialized_options = b'8\001' _globals['_FIELDSTATUS']._serialized_start=1560 _globals['_FIELDSTATUS']._serialized_end=1651 _globals['_GETFEASTSERVINGINFOREQUEST']._serialized_start=111 diff --git a/sdk/python/feast/protos/feast/serving/TransformationService_pb2.py b/sdk/python/feast/protos/feast/serving/TransformationService_pb2.py index bc060e9a776..0416d84b6f0 100644 --- a/sdk/python/feast/protos/feast/serving/TransformationService_pb2.py +++ b/sdk/python/feast/protos/feast/serving/TransformationService_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/serving/TransformationService.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -20,8 +19,9 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.serving.TransformationService_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\023feast.proto.servingB\035TransformationServiceAPIProtoZ2github.com/feast-dev/feast/go/protos/feast/serving' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\023feast.proto.servingB\035TransformationServiceAPIProtoZ2github.com/feast-dev/feast/go/protos/feast/serving' _globals['_TRANSFORMATIONSERVICETYPE']._serialized_start=529 _globals['_TRANSFORMATIONSERVICETYPE']._serialized_end=677 _globals['_VALUETYPE']._serialized_start=60 diff --git a/sdk/python/feast/protos/feast/storage/Redis_pb2.py b/sdk/python/feast/protos/feast/storage/Redis_pb2.py index 37d59c9df5a..c7c6e967a41 100644 --- a/sdk/python/feast/protos/feast/storage/Redis_pb2.py +++ b/sdk/python/feast/protos/feast/storage/Redis_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/storage/Redis.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -21,8 +20,9 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.storage.Redis_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\023feast.proto.storageB\nRedisProtoZ2github.com/feast-dev/feast/go/protos/feast/storage' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\023feast.proto.storageB\nRedisProtoZ2github.com/feast-dev/feast/go/protos/feast/storage' _globals['_REDISKEYV2']._serialized_start=69 _globals['_REDISKEYV2']._serialized_end=163 # @@protoc_insertion_point(module_scope) diff --git a/sdk/python/feast/protos/feast/types/EntityKey_pb2.py b/sdk/python/feast/protos/feast/types/EntityKey_pb2.py index a6e1abf7302..34480eb8a34 100644 --- a/sdk/python/feast/protos/feast/types/EntityKey_pb2.py +++ b/sdk/python/feast/protos/feast/types/EntityKey_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/types/EntityKey.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -21,8 +20,9 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.types.EntityKey_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\021feast.proto.typesB\016EntityKeyProtoZ0github.com/feast-dev/feast/go/protos/feast/types' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\021feast.proto.typesB\016EntityKeyProtoZ0github.com/feast-dev/feast/go/protos/feast/types' _globals['_ENTITYKEY']._serialized_start=69 _globals['_ENTITYKEY']._serialized_end=142 # @@protoc_insertion_point(module_scope) diff --git a/sdk/python/feast/protos/feast/types/Field_pb2.py b/sdk/python/feast/protos/feast/types/Field_pb2.py index 973fdc6cdea..f85b67245bd 100644 --- a/sdk/python/feast/protos/feast/types/Field_pb2.py +++ b/sdk/python/feast/protos/feast/types/Field_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/types/Field.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -21,10 +20,11 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.types.Field_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\021feast.proto.typesB\nFieldProtoZ0github.com/feast-dev/feast/go/protos/feast/types' - _globals['_FIELD_TAGSENTRY']._options = None - _globals['_FIELD_TAGSENTRY']._serialized_options = b'8\001' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\021feast.proto.typesB\nFieldProtoZ0github.com/feast-dev/feast/go/protos/feast/types' + _FIELD_TAGSENTRY._options = None + _FIELD_TAGSENTRY._serialized_options = b'8\001' _globals['_FIELD']._serialized_start=66 _globals['_FIELD']._serialized_end=241 _globals['_FIELD_TAGSENTRY']._serialized_start=198 diff --git a/sdk/python/feast/protos/feast/types/Value_pb2.py b/sdk/python/feast/protos/feast/types/Value_pb2.py index 18ee3311808..942359f7d79 100644 --- a/sdk/python/feast/protos/feast/types/Value_pb2.py +++ b/sdk/python/feast/protos/feast/types/Value_pb2.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/types/Value.proto -# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -20,8 +19,9 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.types.Value_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - _globals['DESCRIPTOR']._options = None - _globals['DESCRIPTOR']._serialized_options = b'\n\021feast.proto.typesB\nValueProtoZ0github.com/feast-dev/feast/go/protos/feast/types' + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\021feast.proto.typesB\nValueProtoZ0github.com/feast-dev/feast/go/protos/feast/types' _globals['_NULL']._serialized_start=1200 _globals['_NULL']._serialized_end=1216 _globals['_VALUETYPE']._serialized_start=41 diff --git a/sdk/python/tests/integration/compute_engines/spark/test_compute_dag.py b/sdk/python/tests/integration/compute_engines/spark/test_compute_dag.py new file mode 100644 index 00000000000..5711d92a3da --- /dev/null +++ b/sdk/python/tests/integration/compute_engines/spark/test_compute_dag.py @@ -0,0 +1,159 @@ +import pandas as pd +import pytest +from datetime import datetime, timedelta +from typing import cast +from unittest.mock import MagicMock +from pyspark.sql import DataFrame + +from feast import BatchFeatureView, Entity, Field +from feast.types import Float32, Int64, Int32 +from feast.aggregation import Aggregation +from feast.infra.common.materialization_job import MaterializationJobStatus, MaterializationTask +from feast.infra.compute_engines.spark.compute import SparkComputeEngine +from feast.infra.offline_stores.contrib.spark_offline_store.spark import SparkOfflineStore +from feast.infra.offline_stores.contrib.spark_offline_store.tests.data_source import SparkDataSourceCreator +from tests.integration.feature_repos.repo_configuration import construct_test_environment +from tests.integration.feature_repos.integration_test_repo_config import IntegrationTestRepoConfig +from tests.integration.feature_repos.universal.online_store.redis import RedisOnlineStoreCreator +from tqdm import tqdm + + +now = datetime.now() +today = datetime.today() + +driver = Entity( + name="driver_id", + description="driver id", +) + + +@pytest.fixture(scope="module") +def spark_env(): + config = IntegrationTestRepoConfig( + provider="local", + online_store_creator=RedisOnlineStoreCreator, + offline_store_creator=SparkDataSourceCreator, + batch_engine={"type": "spark.engine", "partitions": 10}, + ) + env = construct_test_environment(config, None, entity_key_serialization_version=2) + env.setup() + yield env + env.teardown() + + +def create_sample_datasource(spark_environment): + df = pd.DataFrame([ + { + "driver_id": 1001, + "event_timestamp": today - timedelta(days=1), + "created": now - timedelta(hours=2), + "conv_rate": 0.8, + "acc_rate": 0.5, + "avg_daily_trips": 15, + }, + { + "driver_id": 1002, + "event_timestamp": today - timedelta(days=1), + "created": now - timedelta(hours=2), + "conv_rate": 0.7, + "acc_rate": 0.4, + "avg_daily_trips": 12, + }, + ]) + ds = spark_environment.data_source_creator.create_data_source( + df, + spark_environment.feature_store.project, + timestamp_field="event_timestamp", + created_timestamp_column="created", + ) + return ds + + +def create_base_feature_view(source): + return BatchFeatureView( + name="hourly_driver_stats", + entities=[driver], + aggregations=[ + Aggregation(column="conv_rate", function="sum"), + Aggregation(column="acc_rate", function="avg"), + ], + ttl=timedelta(days=3), + schema=[ + Field(name="conv_rate", dtype=Float32), + Field(name="acc_rate", dtype=Float32), + Field(name="avg_daily_trips", dtype=Int64), + Field(name="driver_id", dtype=Int32), + ], + online=True, + offline=True, + source=source, + ) + + +def create_chained_feature_view(base_fv: BatchFeatureView): + def transform(df: DataFrame) -> DataFrame: + return df.withColumn("sum_conv_rate", df["sum_conv_rate"] * 10) + + return BatchFeatureView( + name="daily_driver_stats", + entities=[driver], + udf=transform, + udf_string="transform", + schema=[ + Field(name="sum_conv_rate", dtype=Float32), + Field(name="driver_id", dtype=Int32), + ], + online=True, + offline=True, + source_view=base_fv, + tags={ + "join_keys": "driver_id", + "feature_cols": "sum_conv_rate", + "ts_col": "event_timestamp", + "created_ts_col": "created", + } + ) + + +def _tqdm_builder(length): + return tqdm(total=length, ncols=100) + + +@pytest.mark.integration +def test_spark_dag_materialize_recursive_view(spark_env): + fs = spark_env.feature_store + registry = fs.registry + source = create_sample_datasource(spark_env) + + base_fv = create_base_feature_view(source) + chained_fv = create_chained_feature_view(base_fv) + + fs.apply([driver, base_fv, chained_fv]) + + # ๐Ÿงช Materialize top-level view; DAG will include base_fv implicitly + task = MaterializationTask( + project=fs.project, + feature_view=chained_fv, + start_time=now - timedelta(days=2), + end_time=now, + tqdm_builder=_tqdm_builder, + ) + + engine = SparkComputeEngine( + repo_config=spark_env.config, + offline_store=SparkOfflineStore(), + online_store=MagicMock(), + registry=registry, + ) + + jobs = engine.materialize(registry, task) + + # โœ… Validate jobs ran + assert len(jobs) == 1 + assert jobs[0].status() == MaterializationJobStatus.SUCCEEDED + + # โœ… Verify output exists in offline store + df = jobs[0].to_df() + assert "sum_conv_rate" in df.columns + assert sorted(df["driver_id"].tolist()) == [1001, 1002] + assert abs(df["sum_conv_rate"].iloc[0] - 16.0) < 1e-6 # (0.8 + 0.8) * 10 diff --git a/sdk/python/tests/unit/infra/compute_engines/test_feature_builder.py b/sdk/python/tests/unit/infra/compute_engines/test_feature_builder.py new file mode 100644 index 00000000000..73194a9ea15 --- /dev/null +++ b/sdk/python/tests/unit/infra/compute_engines/test_feature_builder.py @@ -0,0 +1,125 @@ +from unittest.mock import MagicMock + +import pytest +from feast.infra.compute_engines.feature_builder import FeatureBuilder +from feast.infra.compute_engines.dag.plan import ExecutionPlan + + +# --------------------------- +# Mock Feature View Definitions +# --------------------------- + +class MockFeatureView: + def __init__(self, + name, + source=None, + source_view=None, + aggregations=None, + feature_transformation=None): + self.name = name + self.source = source + self.source_view = source_view + self.aggregations = aggregations or [] + self.feature_transformation = feature_transformation + self.ttl = None + self.filter = None + self.enable_validation = False + self.entities = ["driver_id"] + self.batch_source = type("BatchSource", (), {"timestamp_field": "ts"}) + + +class MockTransformation: + def __init__(self, + name): + self.name = name + self.udf = lambda df: df + + +# --------------------------- +# Mock DAG +# --------------------------- + +hourly_driver_stats = MockFeatureView( + name="hourly_driver_stats", + source="hourly_source", + aggregations=[{"function": "sum", "column": "trips"}], + feature_transformation=MockTransformation("hourly_tf"), +) + +daily_driver_stats = MockFeatureView( + name="daily_driver_stats", + source_view=hourly_driver_stats, + aggregations=[{"function": "mean", "column": "trips"}], + feature_transformation=MockTransformation("daily_tf"), +) + + +# --------------------------- +# Mock FeatureBuilder +# --------------------------- + +class MockFeatureBuilder(FeatureBuilder): + def __init__(self, + feature_view): + super().__init__(registry=MagicMock(), feature_view=feature_view, task=MagicMock()) + + def build_source_node(self, + source): + return f"SourceNode({source})" + + def build_join_node(self, + view, + input_node): + return f"JoinNode({view.name} <- {input_node})" + + def build_filter_node(self, + view, + input_node): + return f"FilterNode({view.name} <- {input_node})" + + def build_aggregation_node(self, + view, + input_node): + return f"AggregationNode({view.name} <- {input_node})" + + def build_dedup_node(self, + view, + input_node): + return f"DedupNode({view.name} <- {input_node})" + + def build_transformation_node(self, + view, + input_node): + return f"TransformNode({view.name} <- {input_node})" + + def build_validation_node(self, + view, + input_node): + return f"ValidationNode({view.name} <- {input_node})" + + def build_output_nodes(self, + final_node): + self.nodes.append(f"OutputNode({final_node})") + + +# --------------------------- +# Test +# --------------------------- + +def test_recursive_featureview_build(): + builder = MockFeatureBuilder(daily_driver_stats) + execution_plan: ExecutionPlan = builder.build() + + expected_final_node = ( + "TransformNode(daily_driver_stats <- " + "AggregationNode(daily_driver_stats <- " + "FilterNode(daily_driver_stats <- " + "JoinNode(daily_driver_stats <- " + "TransformNode(hourly_driver_stats <- " + "AggregationNode(hourly_driver_stats <- " + "FilterNode(hourly_driver_stats <- " + "SourceNode(hourly_source))))))))" + ) + expected_output_node = f"OutputNode({expected_final_node})" + + assert execution_plan.nodes[-1] == expected_output_node From 98b61793cc0d41e3d16ab57779deca0440346b74 Mon Sep 17 00:00:00 2001 From: HaoXuAI Date: Mon, 30 Jun 2025 13:18:14 -0700 Subject: [PATCH 02/12] Draft: multi source support Signed-off-by: HaoXuAI --- sdk/python/feast/batch_feature_view.py | 6 +- sdk/python/feast/feature_view.py | 8 +- .../infra/compute_engines/algorithms/topo.py | 1 + .../feast/infra/compute_engines/base.py | 6 +- .../infra/compute_engines/feature_builder.py | 68 ++++------ .../infra/compute_engines/feature_resolver.py | 19 ++- .../compute_engines/local/feature_builder.py | 21 +++- .../infra/compute_engines/local/nodes.py | 119 +++++++++--------- .../compute_engines/spark/feature_builder.py | 25 +++- .../infra/compute_engines/spark/nodes.py | 107 ++++++++-------- .../feast/infra/compute_engines/utils.py | 2 +- .../compute_engines/spark/test_compute_dag.py | 76 ++++++----- .../compute_engines/test_feature_builder.py | 61 ++++----- 13 files changed, 261 insertions(+), 258 deletions(-) diff --git a/sdk/python/feast/batch_feature_view.py b/sdk/python/feast/batch_feature_view.py index 5b90cc15e11..03e927d40c7 100644 --- a/sdk/python/feast/batch_feature_view.py +++ b/sdk/python/feast/batch_feature_view.py @@ -107,7 +107,9 @@ def __init__( f"or CUSTOM_SOURCE, got {type(source).__name__}: {source.name} instead " ) elif source_view is None: - raise ValueError("BatchFeatureView must have either 'source' or 'source_view'.") + raise ValueError( + "BatchFeatureView must have either 'source' or 'source_view'." + ) self.mode = mode self.udf = udf @@ -129,7 +131,7 @@ def __init__( owner=owner, schema=schema, source=source, - source_view=source_view + source_view=source_view, ) def get_feature_transformation(self) -> Optional[Transformation]: diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index c132cdc41b9..df011c10eb5 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -147,7 +147,9 @@ def __init__( schema = schema or [] if (source is None) == (source_view is None): - raise ValueError("FeatureView must have exactly one of 'source' or 'source_view', not both/neither.") + raise ValueError( + "FeatureView must have exactly one of 'source' or 'source_view', not both/neither." + ) # Initialize data sources. if ( @@ -427,7 +429,9 @@ def from_proto(cls, feature_view_proto: FeatureViewProto): else None ) source_view = ( - FeatureView.from_proto(FeatureViewProto(spec=feature_view_proto.spec.source_view, meta=None)) + FeatureView.from_proto( + FeatureViewProto(spec=feature_view_proto.spec.source_view, meta=None) + ) if feature_view_proto.spec.HasField("source_view") else None ) diff --git a/sdk/python/feast/infra/compute_engines/algorithms/topo.py b/sdk/python/feast/infra/compute_engines/algorithms/topo.py index 0de1dcbff62..6deab652e92 100644 --- a/sdk/python/feast/infra/compute_engines/algorithms/topo.py +++ b/sdk/python/feast/infra/compute_engines/algorithms/topo.py @@ -1,6 +1,7 @@ # feast/infra/compute_engines/dag/utils.py from typing import List, Set + from feast.infra.compute_engines.dag.node import DAGNode diff --git a/sdk/python/feast/infra/compute_engines/base.py b/sdk/python/feast/infra/compute_engines/base.py index 4b9dbe16de7..e50494abd63 100644 --- a/sdk/python/feast/infra/compute_engines/base.py +++ b/sdk/python/feast/infra/compute_engines/base.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import List, Optional, Sequence, Union +from typing import List, Sequence, Union import pyarrow as pa @@ -12,13 +12,12 @@ MaterializationTask, ) from feast.infra.common.retrieval_task import HistoricalRetrievalTask -from feast.infra.compute_engines.dag.context import ColumnInfo, ExecutionContext +from feast.infra.compute_engines.dag.context import ExecutionContext from feast.infra.offline_stores.offline_store import OfflineStore, RetrievalJob from feast.infra.online_stores.online_store import OnlineStore from feast.infra.registry.base_registry import BaseRegistry from feast.on_demand_feature_view import OnDemandFeatureView from feast.stream_feature_view import StreamFeatureView -from feast.utils import _get_column_names class ComputeEngine(ABC): @@ -132,4 +131,3 @@ def get_execution_context( entity_defs=entity_defs, entity_df=entity_df, ) - diff --git a/sdk/python/feast/infra/compute_engines/feature_builder.py b/sdk/python/feast/infra/compute_engines/feature_builder.py index 0966bb9ce40..0414c970656 100644 --- a/sdk/python/feast/infra/compute_engines/feature_builder.py +++ b/sdk/python/feast/infra/compute_engines/feature_builder.py @@ -1,13 +1,16 @@ from abc import ABC, abstractmethod -from typing import Union, List, Optional +from typing import List, Optional, Union -from feast import BatchFeatureView, StreamFeatureView, FeatureView +from feast import BatchFeatureView, FeatureView, StreamFeatureView from feast.infra.common.materialization_job import MaterializationTask from feast.infra.common.retrieval_task import HistoricalRetrievalTask +from feast.infra.compute_engines.dag.context import ColumnInfo from feast.infra.compute_engines.dag.node import DAGNode from feast.infra.compute_engines.dag.plan import ExecutionPlan -from feast.infra.compute_engines.feature_resolver import FeatureResolver, FeatureViewNode -from feast.infra.compute_engines.dag.context import ColumnInfo +from feast.infra.compute_engines.feature_resolver import ( + FeatureResolver, + FeatureViewNode, +) from feast.infra.registry.base_registry import BaseRegistry from feast.utils import _get_column_names @@ -19,10 +22,10 @@ class FeatureBuilder(ABC): """ def __init__( - self, - registry: BaseRegistry, - feature_view, - task: Union[MaterializationTask, HistoricalRetrievalTask], + self, + registry: BaseRegistry, + feature_view, + task: Union[MaterializationTask, HistoricalRetrievalTask], ): self.registry = registry self.task = task @@ -36,64 +39,46 @@ def build_source_node(self, view): raise NotImplementedError @abstractmethod - def build_aggregation_node(self, - view, - input_node): + def build_aggregation_node(self, view, input_node): raise NotImplementedError @abstractmethod - def build_join_node(self, - view, - input_node): + def build_join_node(self, view, input_node): raise NotImplementedError @abstractmethod - def build_filter_node(self, - view, - input_node): + def build_filter_node(self, view, input_node): raise NotImplementedError @abstractmethod - def build_dedup_node(self, - view, - input_node): + def build_dedup_node(self, view, input_node): raise NotImplementedError @abstractmethod - def build_transformation_node(self, - view, - input_node): + def build_transformation_node(self, view, input_node): raise NotImplementedError @abstractmethod - def build_output_nodes(self, - final_node): + def build_output_nodes(self, final_node): raise NotImplementedError @abstractmethod - def build_validation_node(self, - view, - input_node): + def build_validation_node(self, view, input_node): raise NotImplementedError - def _should_aggregate(self, - view): + def _should_aggregate(self, view): return bool(getattr(view, "aggregations", [])) - def _should_transform(self, - view): + def _should_transform(self, view): return bool(getattr(view, "feature_transformation", None)) - def _should_validate(self, - view): + def _should_validate(self, view): return getattr(view, "enable_validation", False) - def _should_dedupe(self, - view): + def _should_dedupe(self, view): return isinstance(self.task, HistoricalRetrievalTask) or self.task.only_latest - def _build(self, - current_node: FeatureViewNode) -> DAGNode: + def _build(self, current_node: FeatureViewNode) -> DAGNode: current_view = current_node.view # Step 1: build source or parent join @@ -128,8 +113,8 @@ def build(self) -> ExecutionPlan: return ExecutionPlan(self.nodes) def get_column_info( - self, - view: Union[BatchFeatureView, StreamFeatureView, FeatureView], + self, + view: Union[BatchFeatureView, StreamFeatureView, FeatureView], ) -> ColumnInfo: entities = [] for entity_name in view.entities: @@ -157,8 +142,7 @@ def get_column_info( ) def get_field_mapping( - self, - feature_view: Union[BatchFeatureView, StreamFeatureView, FeatureView] + self, feature_view: Union[BatchFeatureView, StreamFeatureView, FeatureView] ) -> Optional[dict]: """ Get the field mapping for a feature view. diff --git a/sdk/python/feast/infra/compute_engines/feature_resolver.py b/sdk/python/feast/infra/compute_engines/feature_resolver.py index 22ee4790906..ec1cf59b4f2 100644 --- a/sdk/python/feast/infra/compute_engines/feature_resolver.py +++ b/sdk/python/feast/infra/compute_engines/feature_resolver.py @@ -1,14 +1,14 @@ -from feast.feature_view import FeatureView +from typing import List, Optional, Set -from typing import Set, List, Optional +from feast.feature_view import FeatureView class FeatureViewNode: """ Logical representation of a node in the FeatureView dependency DAG. """ - def __init__(self, - view: FeatureView): + + def __init__(self, view: FeatureView): self.view: FeatureView = view self.parent: Optional["FeatureViewNode"] = None @@ -19,13 +19,13 @@ class FeatureResolver: This graph represents the logical dependencies between FeatureViews, allowing for ordered execution and cycle detection. """ + def __init__(self): # Used to detect and prevent cycles in the FeatureView graph. self.visited: Set[str] = set() self.resolution_path: List[str] = [] - def resolve(self, - feature_view: FeatureView) -> FeatureViewNode: + def resolve(self, feature_view: FeatureView) -> FeatureViewNode: """ Entry point for resolving a FeatureView into a DAG node. @@ -39,8 +39,7 @@ def resolve(self, self._walk(root) return root - def _walk(self, - node: FeatureViewNode): + def _walk(self, node: FeatureViewNode): """ Recursive traversal of the FeatureView graph. @@ -80,9 +79,7 @@ def dfs(node: FeatureViewNode): dfs(root) return ordered - def debug_dag(self, - node: FeatureViewNode, - depth=0): + def debug_dag(self, node: FeatureViewNode, depth=0): """ Prints the FeatureView dependency DAG for debugging. diff --git a/sdk/python/feast/infra/compute_engines/local/feature_builder.py b/sdk/python/feast/infra/compute_engines/local/feature_builder.py index f7163262735..1787da75b55 100644 --- a/sdk/python/feast/infra/compute_engines/local/feature_builder.py +++ b/sdk/python/feast/infra/compute_engines/local/feature_builder.py @@ -1,6 +1,5 @@ from typing import Union -from feast.data_source import DataSource from feast.infra.common.materialization_job import MaterializationTask from feast.infra.common.retrieval_task import HistoricalRetrievalTask from feast.infra.compute_engines.feature_builder import FeatureBuilder @@ -47,7 +46,9 @@ def build_filter_node(self, view, input_node): filter_expr = getattr(view, "filter", None) ttl = getattr(view, "ttl", None) column_info = self.get_column_info(view) - node = LocalFilterNode("filter", column_info, self.backend, filter_expr, ttl, inputs=[input_node]) + node = LocalFilterNode( + "filter", column_info, self.backend, filter_expr, ttl, inputs=[input_node] + ) self.nodes.append(node) return node @@ -55,7 +56,9 @@ def build_aggregation_node(self, view, input_node): agg_specs = view.aggregations agg_ops = self._get_aggregate_operations(agg_specs) group_by_keys = view.entities - node = LocalAggregationNode("agg", self.backend, group_by_keys, agg_ops, inputs=[input_node]) + node = LocalAggregationNode( + "agg", self.backend, group_by_keys, agg_ops, inputs=[input_node] + ) self.nodes.append(node) return node @@ -67,13 +70,17 @@ def build_dedup_node(self, view, input_node): def build_transformation_node(self, view, input_node): transform_config = view.feature_transformation - node = LocalTransformationNode("transform", transform_config, self.backend, inputs=[input_node]) + node = LocalTransformationNode( + "transform", transform_config, self.backend, inputs=[input_node] + ) self.nodes.append(node) return node def build_validation_node(self, view, input_node): validation_config = view.validation_config - node = LocalValidationNode("validate", validation_config, self.backend, inputs=[input_node]) + node = LocalValidationNode( + "validate", validation_config, self.backend, inputs=[input_node] + ) self.nodes.append(node) return node @@ -86,7 +93,9 @@ def _get_aggregate_operations(agg_specs): agg_ops = {} for agg in agg_specs: if agg.time_window is not None: - raise ValueError("Time window aggregation is not supported in the local compute engine.") + raise ValueError( + "Time window aggregation is not supported in the local compute engine." + ) alias = f"{agg.function}_{agg.column}" agg_ops[alias] = (agg.function, agg.column) return agg_ops diff --git a/sdk/python/feast/infra/compute_engines/local/nodes.py b/sdk/python/feast/infra/compute_engines/local/nodes.py index 85bf9266f22..6211dd11da5 100644 --- a/sdk/python/feast/infra/compute_engines/local/nodes.py +++ b/sdk/python/feast/infra/compute_engines/local/nodes.py @@ -5,7 +5,7 @@ from feast import BatchFeatureView, StreamFeatureView from feast.data_source import DataSource -from feast.infra.compute_engines.dag.context import ExecutionContext, ColumnInfo +from feast.infra.compute_engines.dag.context import ColumnInfo, ExecutionContext from feast.infra.compute_engines.local.arrow_table_value import ArrowTableValue from feast.infra.compute_engines.local.backends.base import DataFrameBackend from feast.infra.compute_engines.local.local_node import LocalNode @@ -22,12 +22,12 @@ class LocalSourceReadNode(LocalNode): def __init__( - self, - name: str, - source: DataSource, - column_info: ColumnInfo, - start_time: Optional[datetime] = None, - end_time: Optional[datetime] = None, + self, + name: str, + source: DataSource, + column_info: ColumnInfo, + start_time: Optional[datetime] = None, + end_time: Optional[datetime] = None, ): super().__init__(name) self.source = source @@ -35,8 +35,7 @@ def __init__( self.start_time = start_time self.end_time = end_time - def execute(self, - context: ExecutionContext) -> ArrowTableValue: + def execute(self, context: ExecutionContext) -> ArrowTableValue: retrieval_job = create_offline_store_retrieval_job( data_source=self.source, context=context, @@ -46,23 +45,23 @@ def execute(self, arrow_table = retrieval_job.to_arrow() if self.column_info.field_mapping: arrow_table = arrow_table.rename_columns( - [self.column_info.field_mapping.get(col, col) for col in arrow_table.column_names] + [ + self.column_info.field_mapping.get(col, col) + for col in arrow_table.column_names + ] ) return ArrowTableValue(data=arrow_table) class LocalJoinNode(LocalNode): - def __init__(self, - name: str, - column_info: ColumnInfo, - backend: DataFrameBackend, - inputs=None): + def __init__( + self, name: str, column_info: ColumnInfo, backend: DataFrameBackend, inputs=None + ): super().__init__(name, inputs=inputs) self.column_info = column_info self.backend = backend - def execute(self, - context: ExecutionContext) -> ArrowTableValue: + def execute(self, context: ExecutionContext) -> ArrowTableValue: feature_table = self.get_single_table(context).data if context.entity_df is None: @@ -93,21 +92,22 @@ def execute(self, class LocalFilterNode(LocalNode): - def __init__(self, - name: str, - column_info: ColumnInfo, - backend: DataFrameBackend, - filter_expr: Optional[str] = None, - ttl: Optional[timedelta] = None, - inputs=None): + def __init__( + self, + name: str, + column_info: ColumnInfo, + backend: DataFrameBackend, + filter_expr: Optional[str] = None, + ttl: Optional[timedelta] = None, + inputs=None, + ): super().__init__(name, inputs=inputs) self.column_info = column_info self.backend = backend self.filter_expr = filter_expr self.ttl = ttl - def execute(self, - context: ExecutionContext) -> ArrowTableValue: + def execute(self, context: ExecutionContext) -> ArrowTableValue: input_table = self.get_single_table(context).data df = self.backend.from_arrow(input_table) @@ -135,19 +135,20 @@ def execute(self, class LocalAggregationNode(LocalNode): - def __init__(self, - name: str, - backend: DataFrameBackend, - group_keys: list[str], - agg_ops: dict, - inputs=None): + def __init__( + self, + name: str, + backend: DataFrameBackend, + group_keys: list[str], + agg_ops: dict, + inputs=None, + ): super().__init__(name, inputs=inputs) self.backend = backend self.group_keys = group_keys self.agg_ops = agg_ops - def execute(self, - context: ExecutionContext) -> ArrowTableValue: + def execute(self, context: ExecutionContext) -> ArrowTableValue: input_table = self.get_single_table(context).data df = self.backend.from_arrow(input_table) grouped_df = self.backend.groupby_agg(df, self.group_keys, self.agg_ops) @@ -158,17 +159,14 @@ def execute(self, class LocalDedupNode(LocalNode): - def __init__(self, - name: str, - column_info: ColumnInfo, - backend: DataFrameBackend, - inputs=None): + def __init__( + self, name: str, column_info: ColumnInfo, backend: DataFrameBackend, inputs=None + ): super().__init__(name, inputs=inputs) self.column_info = column_info self.backend = backend - def execute(self, - context: ExecutionContext) -> ArrowTableValue: + def execute(self, context: ExecutionContext) -> ArrowTableValue: input_table = self.get_single_table(context).data df = self.backend.from_arrow(input_table) @@ -179,8 +177,8 @@ def execute(self, if dedup_keys: sort_keys = [self.column_info.timestamp_column] if ( - self.column_info.created_timestamp_column - and self.column_info.created_timestamp_column in df.columns + self.column_info.created_timestamp_column + and self.column_info.created_timestamp_column in df.columns ): sort_keys.append(self.column_info.created_timestamp_column) @@ -194,17 +192,14 @@ def execute(self, class LocalTransformationNode(LocalNode): - def __init__(self, - name: str, - transformation_fn, - backend: DataFrameBackend, - inputs=None): + def __init__( + self, name: str, transformation_fn, backend: DataFrameBackend, inputs=None + ): super().__init__(name, inputs=inputs) self.transformation_fn = transformation_fn self.backend = backend - def execute(self, - context: ExecutionContext) -> ArrowTableValue: + def execute(self, context: ExecutionContext) -> ArrowTableValue: input_table = self.get_single_table(context).data df = self.backend.from_arrow(input_table) transformed_df = self.transformation_fn(df) @@ -215,17 +210,14 @@ def execute(self, class LocalValidationNode(LocalNode): - def __init__(self, - name: str, - validation_config, - backend: DataFrameBackend, - inputs=None): + def __init__( + self, name: str, validation_config, backend: DataFrameBackend, inputs=None + ): super().__init__(name, inputs=inputs) self.validation_config = validation_config self.backend = backend - def execute(self, - context: ExecutionContext) -> ArrowTableValue: + def execute(self, context: ExecutionContext) -> ArrowTableValue: input_table = self.get_single_table(context).data df = self.backend.from_arrow(input_table) # Placeholder for actual validation logic @@ -238,15 +230,16 @@ def execute(self, class LocalOutputNode(LocalNode): - def __init__(self, - name: str, - feature_view: Union[BatchFeatureView, StreamFeatureView], - inputs=None): + def __init__( + self, + name: str, + feature_view: Union[BatchFeatureView, StreamFeatureView], + inputs=None, + ): super().__init__(name, inputs=inputs) self.feature_view = feature_view - def execute(self, - context: ExecutionContext) -> ArrowTableValue: + def execute(self, context: ExecutionContext) -> ArrowTableValue: input_table = self.get_single_table(context).data context.node_outputs[self.name] = input_table diff --git a/sdk/python/feast/infra/compute_engines/spark/feature_builder.py b/sdk/python/feast/infra/compute_engines/spark/feature_builder.py index d33a0b15eab..ca6138b8273 100644 --- a/sdk/python/feast/infra/compute_engines/spark/feature_builder.py +++ b/sdk/python/feast/infra/compute_engines/spark/feature_builder.py @@ -32,7 +32,9 @@ def build_source_node(self, view): end_time = self.task.end_time source = view.batch_source column_info = self.get_column_info(view) - node = SparkReadNode("source", source, column_info, self.spark_session, start_time, end_time) + node = SparkReadNode( + "source", source, column_info, self.spark_session, start_time, end_time + ) self.nodes.append(node) return node @@ -40,13 +42,17 @@ def build_aggregation_node(self, view, input_node): agg_specs = view.aggregations group_by_keys = view.entities timestamp_col = view.batch_source.timestamp_field - node = SparkAggregationNode("agg", agg_specs, group_by_keys, timestamp_col, inputs=[input_node]) + node = SparkAggregationNode( + "agg", agg_specs, group_by_keys, timestamp_col, inputs=[input_node] + ) self.nodes.append(node) return node def build_join_node(self, view, input_node): column_info = self.get_column_info(view) - node = SparkJoinNode("join", column_info, self.spark_session, inputs=[input_node]) + node = SparkJoinNode( + "join", column_info, self.spark_session, inputs=[input_node] + ) self.nodes.append(node) return node @@ -54,12 +60,21 @@ def build_filter_node(self, view, input_node): filter_expr = getattr(view, "filter", None) ttl = getattr(view, "ttl", None) column_info = self.get_column_info(view) - node = SparkFilterNode("filter", column_info, self.spark_session, ttl, filter_expr, inputs=[input_node]) + node = SparkFilterNode( + "filter", + column_info, + self.spark_session, + ttl, + filter_expr, + inputs=[input_node], + ) return node def build_dedup_node(self, view, input_node): column_info = self.get_column_info(view) - node = SparkDedupNode("dedup", column_info, self.spark_session, inputs=[input_node]) + node = SparkDedupNode( + "dedup", column_info, self.spark_session, inputs=[input_node] + ) self.nodes.append(node) return node diff --git a/sdk/python/feast/infra/compute_engines/spark/nodes.py b/sdk/python/feast/infra/compute_engines/spark/nodes.py index 0ea53017f25..3333136e846 100644 --- a/sdk/python/feast/infra/compute_engines/spark/nodes.py +++ b/sdk/python/feast/infra/compute_engines/spark/nodes.py @@ -8,7 +8,7 @@ from feast.aggregation import Aggregation from feast.data_source import DataSource from feast.infra.common.serde import SerializedArtifacts -from feast.infra.compute_engines.dag.context import ExecutionContext, ColumnInfo +from feast.infra.compute_engines.dag.context import ColumnInfo, ExecutionContext from feast.infra.compute_engines.dag.model import DAGFormat from feast.infra.compute_engines.dag.node import DAGNode from feast.infra.compute_engines.dag.value import DAGValue @@ -32,8 +32,7 @@ # Rename entity_df event_timestamp_col to match feature_df def rename_entity_ts_column( - spark_session: SparkSession, - entity_df: DataFrame + spark_session: SparkSession, entity_df: DataFrame ) -> DataFrame: # check if entity_ts_alias already exists if ENTITY_TS_ALIAS in entity_df.columns: @@ -54,13 +53,13 @@ def rename_entity_ts_column( class SparkReadNode(DAGNode): def __init__( - self, - name: str, - source: DataSource, - column_info: ColumnInfo, - spark_session: SparkSession, - start_time: Optional[datetime] = None, - end_time: Optional[datetime] = None, + self, + name: str, + source: DataSource, + column_info: ColumnInfo, + spark_session: SparkSession, + start_time: Optional[datetime] = None, + end_time: Optional[datetime] = None, ): super().__init__(name) self.source = source @@ -69,8 +68,7 @@ def __init__( self.start_time = start_time self.end_time = end_time - def execute(self, - context: ExecutionContext) -> DAGValue: + def execute(self, context: ExecutionContext) -> DAGValue: retrieval_job = create_offline_store_retrieval_job( data_source=self.source, column_info=self.column_info, @@ -98,20 +96,19 @@ def execute(self, class SparkAggregationNode(DAGNode): def __init__( - self, - name: str, - aggregations: List[Aggregation], - group_by_keys: List[str], - timestamp_col: str, - inputs=None, + self, + name: str, + aggregations: List[Aggregation], + group_by_keys: List[str], + timestamp_col: str, + inputs=None, ): super().__init__(name, inputs=inputs) self.aggregations = aggregations self.group_by_keys = group_by_keys self.timestamp_col = timestamp_col - def execute(self, - context: ExecutionContext) -> DAGValue: + def execute(self, context: ExecutionContext) -> DAGValue: input_value = self.get_single_input_value(context) input_value.assert_format(DAGFormat.SPARK) input_df: DataFrame = input_value.data @@ -153,17 +150,18 @@ def execute(self, class SparkJoinNode(DAGNode): - def __init__(self, - name: str, - column_info: ColumnInfo, - spark_session: SparkSession, - inputs=None): + def __init__( + self, + name: str, + column_info: ColumnInfo, + spark_session: SparkSession, + inputs=None, + ): super().__init__(name, inputs=inputs) self.column_info = column_info self.spark_session = spark_session - def execute(self, - context: ExecutionContext) -> DAGValue: + def execute(self, context: ExecutionContext) -> DAGValue: feature_value = self.get_single_input_value(context) feature_value.assert_format(DAGFormat.SPARK) feature_df: DataFrame = feature_value.data @@ -195,21 +193,22 @@ def execute(self, class SparkFilterNode(DAGNode): - def __init__(self, - name: str, - column_info: ColumnInfo, - spark_session: SparkSession, - ttl: Optional[timedelta] = None, - filter_condition: Optional[str] = None, - inputs=None): + def __init__( + self, + name: str, + column_info: ColumnInfo, + spark_session: SparkSession, + ttl: Optional[timedelta] = None, + filter_condition: Optional[str] = None, + inputs=None, + ): super().__init__(name, inputs=inputs) self.column_info = column_info self.spark_session = spark_session self.ttl = ttl self.filter_condition = filter_condition - def execute(self, - context: ExecutionContext) -> DAGValue: + def execute(self, context: ExecutionContext) -> DAGValue: input_value = self.get_single_input_value(context) input_value.assert_format(DAGFormat.SPARK) input_df: DataFrame = input_value.data @@ -245,17 +244,18 @@ def execute(self, class SparkDedupNode(DAGNode): - def __init__(self, - name: str, - column_info: ColumnInfo, - spark_session: SparkSession, - inputs=None): + def __init__( + self, + name: str, + column_info: ColumnInfo, + spark_session: SparkSession, + inputs=None, + ): super().__init__(name, inputs=inputs) self.column_info = column_info self.spark_session = spark_session - def execute(self, - context: ExecutionContext) -> DAGValue: + def execute(self, context: ExecutionContext) -> DAGValue: input_value = self.get_single_input_value(context) input_value.assert_format(DAGFormat.SPARK) input_df: DataFrame = input_value.data @@ -284,15 +284,16 @@ def execute(self, class SparkWriteNode(DAGNode): - def __init__(self, - name: str, - feature_view: Union[BatchFeatureView, StreamFeatureView], - inputs=None): + def __init__( + self, + name: str, + feature_view: Union[BatchFeatureView, StreamFeatureView], + inputs=None, + ): super().__init__(name, inputs=inputs) self.feature_view = feature_view - def execute(self, - context: ExecutionContext) -> DAGValue: + def execute(self, context: ExecutionContext) -> DAGValue: spark_df: DataFrame = self.get_single_input_value(context).data serialized_artifacts = SerializedArtifacts.serialize( feature_view=self.feature_view, repo_config=context.repo_config @@ -334,15 +335,11 @@ def execute(self, class SparkTransformationNode(DAGNode): - def __init__(self, - name: str, - udf: callable, - inputs=None): + def __init__(self, name: str, udf: callable, inputs=None): super().__init__(name, inputs) self.udf = udf - def execute(self, - context: ExecutionContext) -> DAGValue: + def execute(self, context: ExecutionContext) -> DAGValue: input_val = self.get_single_input_value(context) input_val.assert_format(DAGFormat.SPARK) print("[SparkTransformationNode] Input schema:", input_val.data.columns) diff --git a/sdk/python/feast/infra/compute_engines/utils.py b/sdk/python/feast/infra/compute_engines/utils.py index 6ccc78118de..20a3dae981d 100644 --- a/sdk/python/feast/infra/compute_engines/utils.py +++ b/sdk/python/feast/infra/compute_engines/utils.py @@ -2,7 +2,7 @@ from typing import Optional from feast.data_source import DataSource -from feast.infra.compute_engines.dag.context import ExecutionContext, ColumnInfo +from feast.infra.compute_engines.dag.context import ColumnInfo, ExecutionContext from feast.infra.offline_stores.offline_store import RetrievalJob diff --git a/sdk/python/tests/integration/compute_engines/spark/test_compute_dag.py b/sdk/python/tests/integration/compute_engines/spark/test_compute_dag.py index 5711d92a3da..ef7402b5a4e 100644 --- a/sdk/python/tests/integration/compute_engines/spark/test_compute_dag.py +++ b/sdk/python/tests/integration/compute_engines/spark/test_compute_dag.py @@ -1,22 +1,34 @@ -import pandas as pd -import pytest from datetime import datetime, timedelta -from typing import cast from unittest.mock import MagicMock + +import pandas as pd +import pytest from pyspark.sql import DataFrame +from tqdm import tqdm from feast import BatchFeatureView, Entity, Field -from feast.types import Float32, Int64, Int32 from feast.aggregation import Aggregation -from feast.infra.common.materialization_job import MaterializationJobStatus, MaterializationTask +from feast.infra.common.materialization_job import ( + MaterializationJobStatus, + MaterializationTask, +) from feast.infra.compute_engines.spark.compute import SparkComputeEngine -from feast.infra.offline_stores.contrib.spark_offline_store.spark import SparkOfflineStore -from feast.infra.offline_stores.contrib.spark_offline_store.tests.data_source import SparkDataSourceCreator -from tests.integration.feature_repos.repo_configuration import construct_test_environment -from tests.integration.feature_repos.integration_test_repo_config import IntegrationTestRepoConfig -from tests.integration.feature_repos.universal.online_store.redis import RedisOnlineStoreCreator -from tqdm import tqdm - +from feast.infra.offline_stores.contrib.spark_offline_store.spark import ( + SparkOfflineStore, +) +from feast.infra.offline_stores.contrib.spark_offline_store.tests.data_source import ( + SparkDataSourceCreator, +) +from feast.types import Float32, Int32, Int64 +from tests.integration.feature_repos.integration_test_repo_config import ( + IntegrationTestRepoConfig, +) +from tests.integration.feature_repos.repo_configuration import ( + construct_test_environment, +) +from tests.integration.feature_repos.universal.online_store.redis import ( + RedisOnlineStoreCreator, +) now = datetime.now() today = datetime.today() @@ -42,24 +54,26 @@ def spark_env(): def create_sample_datasource(spark_environment): - df = pd.DataFrame([ - { - "driver_id": 1001, - "event_timestamp": today - timedelta(days=1), - "created": now - timedelta(hours=2), - "conv_rate": 0.8, - "acc_rate": 0.5, - "avg_daily_trips": 15, - }, - { - "driver_id": 1002, - "event_timestamp": today - timedelta(days=1), - "created": now - timedelta(hours=2), - "conv_rate": 0.7, - "acc_rate": 0.4, - "avg_daily_trips": 12, - }, - ]) + df = pd.DataFrame( + [ + { + "driver_id": 1001, + "event_timestamp": today - timedelta(days=1), + "created": now - timedelta(hours=2), + "conv_rate": 0.8, + "acc_rate": 0.5, + "avg_daily_trips": 15, + }, + { + "driver_id": 1002, + "event_timestamp": today - timedelta(days=1), + "created": now - timedelta(hours=2), + "conv_rate": 0.7, + "acc_rate": 0.4, + "avg_daily_trips": 12, + }, + ] + ) ds = spark_environment.data_source_creator.create_data_source( df, spark_environment.feature_store.project, @@ -111,7 +125,7 @@ def transform(df: DataFrame) -> DataFrame: "feature_cols": "sum_conv_rate", "ts_col": "event_timestamp", "created_ts_col": "created", - } + }, ) diff --git a/sdk/python/tests/unit/infra/compute_engines/test_feature_builder.py b/sdk/python/tests/unit/infra/compute_engines/test_feature_builder.py index 73194a9ea15..c472a359815 100644 --- a/sdk/python/tests/unit/infra/compute_engines/test_feature_builder.py +++ b/sdk/python/tests/unit/infra/compute_engines/test_feature_builder.py @@ -1,21 +1,22 @@ from unittest.mock import MagicMock -import pytest -from feast.infra.compute_engines.feature_builder import FeatureBuilder from feast.infra.compute_engines.dag.plan import ExecutionPlan - +from feast.infra.compute_engines.feature_builder import FeatureBuilder # --------------------------- # Mock Feature View Definitions # --------------------------- + class MockFeatureView: - def __init__(self, - name, - source=None, - source_view=None, - aggregations=None, - feature_transformation=None): + def __init__( + self, + name, + source=None, + source_view=None, + aggregations=None, + feature_transformation=None, + ): self.name = name self.source = source self.source_view = source_view @@ -29,8 +30,7 @@ def __init__(self, class MockTransformation: - def __init__(self, - name): + def __init__(self, name): self.name = name self.udf = lambda df: df @@ -58,47 +58,35 @@ def __init__(self, # Mock FeatureBuilder # --------------------------- + class MockFeatureBuilder(FeatureBuilder): - def __init__(self, - feature_view): - super().__init__(registry=MagicMock(), feature_view=feature_view, task=MagicMock()) + def __init__(self, feature_view): + super().__init__( + registry=MagicMock(), feature_view=feature_view, task=MagicMock() + ) - def build_source_node(self, - source): + def build_source_node(self, source): return f"SourceNode({source})" - def build_join_node(self, - view, - input_node): + def build_join_node(self, view, input_node): return f"JoinNode({view.name} <- {input_node})" - def build_filter_node(self, - view, - input_node): + def build_filter_node(self, view, input_node): return f"FilterNode({view.name} <- {input_node})" - def build_aggregation_node(self, - view, - input_node): + def build_aggregation_node(self, view, input_node): return f"AggregationNode({view.name} <- {input_node})" - def build_dedup_node(self, - view, - input_node): + def build_dedup_node(self, view, input_node): return f"DedupNode({view.name} <- {input_node})" - def build_transformation_node(self, - view, - input_node): + def build_transformation_node(self, view, input_node): return f"TransformNode({view.name} <- {input_node})" - def build_validation_node(self, - view, - input_node): + def build_validation_node(self, view, input_node): return f"ValidationNode({view.name} <- {input_node})" - def build_output_nodes(self, - final_node): + def build_output_nodes(self, final_node): self.nodes.append(f"OutputNode({final_node})") @@ -106,6 +94,7 @@ def build_output_nodes(self, # Test # --------------------------- + def test_recursive_featureview_build(): builder = MockFeatureBuilder(daily_driver_stats) execution_plan: ExecutionPlan = builder.build() From 89bac63ea1dfce8a00fd99a5343c4ccc1ea718f4 Mon Sep 17 00:00:00 2001 From: HaoXuAI Date: Mon, 7 Jul 2025 00:12:54 -0700 Subject: [PATCH 03/12] Checkpoint Signed-off-by: HaoXuAI --- protos/feast/core/FeatureView.proto | 2 +- sdk/python/feast/batch_feature_view.py | 18 +--- sdk/python/feast/feature_view.py | 63 +++++------ .../infra/compute_engines/algorithms/topo.py | 30 ++---- .../feast/infra/compute_engines/dag/plan.py | 34 +++++- .../infra/compute_engines/feature_builder.py | 101 +++++++++++------- .../infra/compute_engines/feature_resolver.py | 71 ++++++------ .../compute_engines/local/feature_builder.py | 5 +- .../compute_engines/spark/feature_builder.py | 16 ++- .../infra/compute_engines/spark/nodes.py | 57 +++++----- .../protos/feast/core/FeatureView_pb2.py | 20 ++-- .../protos/feast/core/FeatureView_pb2.pyi | 10 +- .../infra/compute_engines/spark/test_nodes.py | 47 ++++---- 13 files changed, 252 insertions(+), 222 deletions(-) diff --git a/protos/feast/core/FeatureView.proto b/protos/feast/core/FeatureView.proto index 58d641d40da..ff750acccc2 100644 --- a/protos/feast/core/FeatureView.proto +++ b/protos/feast/core/FeatureView.proto @@ -80,7 +80,7 @@ message FeatureViewSpec { // Whether these features should be written to the offline store bool offline = 13; - FeatureViewSpec source_view = 14; + repeated FeatureViewSpec source_views = 14; } message FeatureViewMeta { diff --git a/sdk/python/feast/batch_feature_view.py b/sdk/python/feast/batch_feature_view.py index 03e927d40c7..bac3d10da28 100644 --- a/sdk/python/feast/batch_feature_view.py +++ b/sdk/python/feast/batch_feature_view.py @@ -74,8 +74,7 @@ def __init__( *, name: str, mode: Union[TransformationMode, str] = TransformationMode.PYTHON, - source: Optional[DataSource] = None, - source_view: Optional["FeatureView"] = None, + source: Union[DataSource, "BatchFeatureView", List["BatchFeatureView"]], entities: Optional[List[Entity]] = None, ttl: Optional[timedelta] = None, tags: Optional[Dict[str, str]] = None, @@ -97,20 +96,6 @@ def __init__( RuntimeWarning, ) - if source is not None: - if ( - type(source).__name__ not in SUPPORTED_BATCH_SOURCES - and source.to_proto().type != DataSourceProto.SourceType.CUSTOM_SOURCE - ): - raise ValueError( - f"Batch feature views need a batch source, expected one of {SUPPORTED_BATCH_SOURCES} " - f"or CUSTOM_SOURCE, got {type(source).__name__}: {source.name} instead " - ) - elif source_view is None: - raise ValueError( - "BatchFeatureView must have either 'source' or 'source_view'." - ) - self.mode = mode self.udf = udf self.udf_string = udf_string @@ -131,7 +116,6 @@ def __init__( owner=owner, schema=schema, source=source, - source_view=source_view, ) def get_feature_transformation(self) -> Optional[Transformation]: diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index df011c10eb5..8d745443f36 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -14,7 +14,7 @@ import copy import warnings from datetime import datetime, timedelta -from typing import Dict, List, Optional, Tuple, Type +from typing import Dict, List, Optional, Tuple, Type, Union from google.protobuf.duration_pb2 import Duration from google.protobuf.message import Message @@ -90,7 +90,7 @@ class FeatureView(BaseFeatureView): ttl: Optional[timedelta] batch_source: DataSource stream_source: Optional[DataSource] - source_view: Optional["FeatureView"] + source_views: Optional[List["FeatureView"]] entity_columns: List[Field] features: List[Field] online: bool @@ -104,8 +104,7 @@ def __init__( self, *, name: str, - source: Optional[DataSource] = None, - source_view: Optional["FeatureView"] = None, + source: Union[DataSource, "FeatureView", List["FeatureView"]], schema: Optional[List[Field]] = None, entities: Optional[List[Entity]] = None, ttl: Optional[timedelta] = timedelta(days=0), @@ -146,27 +145,35 @@ def __init__( self.ttl = ttl schema = schema or [] - if (source is None) == (source_view is None): - raise ValueError( - "FeatureView must have exactly one of 'source' or 'source_view', not both/neither." - ) + # Normalize source + self.data_source: Optional[DataSource] = None + self.source_views: List[FeatureView] = [] - # Initialize data sources. + if isinstance(source, DataSource): + self.data_source = source + elif isinstance(source, FeatureView): + self.source_views = [source] + elif isinstance(source, list) and all(isinstance(sv, FeatureView) for sv in source): + self.source_views = source + else: + raise TypeError("source must be a DataSource, a FeatureView, or a list of FeatureViews.") + + # Set up stream/batch sources if ( - isinstance(source, PushSource) - or isinstance(source, KafkaSource) - or isinstance(source, KinesisSource) + isinstance(self.data_source, PushSource) + or isinstance(self.data_source, KafkaSource) + or isinstance(self.data_source, KinesisSource) ): self.stream_source = source - if not source.batch_source: + if not self.data_source.batch_source: raise ValueError( f"A batch_source needs to be specified for stream source `{source.name}`" ) else: - self.batch_source = source.batch_source + self.batch_source = self.data_source.batch_source else: self.stream_source = None - self.batch_source = source + self.batch_source = self.data_source # Initialize features and entity columns. features: List[Field] = [] @@ -227,12 +234,11 @@ def __init__( description=description, tags=tags, owner=owner, - source=source, + source=self.batch_source, ) self.online = online self.offline = offline self.materialization_intervals = [] - self.source_view = source_view def __hash__(self): return super().__hash__() @@ -366,9 +372,9 @@ def to_proto(self) -> FeatureViewProto: if self.stream_source: stream_source_proto = self.stream_source.to_proto() stream_source_proto.data_source_class_type = f"{self.stream_source.__class__.__module__}.{self.stream_source.__class__.__name__}" - source_view_proto = None - if self.source_view: - source_view_proto = self.source_view.to_proto().spec + source_view_protos = None + if self.source_views: + source_view_protos = [view.to_proto().spec for view in self.source_views] spec = FeatureViewSpecProto( name=self.name, entities=self.entities, @@ -382,7 +388,7 @@ def to_proto(self) -> FeatureViewProto: offline=self.offline, batch_source=batch_source_proto, stream_source=stream_source_proto, - source_view=source_view_proto, + source_views=source_view_protos, ) return FeatureViewProto(spec=spec, meta=meta) @@ -428,13 +434,11 @@ def from_proto(cls, feature_view_proto: FeatureViewProto): if feature_view_proto.spec.HasField("stream_source") else None ) - source_view = ( - FeatureView.from_proto( - FeatureViewProto(spec=feature_view_proto.spec.source_view, meta=None) - ) - if feature_view_proto.spec.HasField("source_view") - else None - ) + source_views = [ + FeatureView.from_proto(FeatureViewProto(spec=view_spec, meta=None)) + for view_spec in feature_view_proto.spec.source_views + ] + feature_view = cls( name=feature_view_proto.spec.name, description=feature_view_proto.spec.description, @@ -447,8 +451,7 @@ def from_proto(cls, feature_view_proto: FeatureViewProto): if feature_view_proto.spec.ttl.ToNanoseconds() == 0 else feature_view_proto.spec.ttl.ToTimedelta() ), - source=batch_source, - source_view=source_view, + source=batch_source if batch_source else source_views ) if stream_source: feature_view.stream_source = stream_source diff --git a/sdk/python/feast/infra/compute_engines/algorithms/topo.py b/sdk/python/feast/infra/compute_engines/algorithms/topo.py index 6deab652e92..8b2e7aebc13 100644 --- a/sdk/python/feast/infra/compute_engines/algorithms/topo.py +++ b/sdk/python/feast/infra/compute_engines/algorithms/topo.py @@ -1,5 +1,3 @@ -# feast/infra/compute_engines/dag/utils.py - from typing import List, Set from feast.infra.compute_engines.dag.node import DAGNode @@ -7,32 +5,26 @@ def topo_sort(root: DAGNode) -> List[DAGNode]: """ - Topologically sort a DAGNode graph starting from root. + Topologically sort a DAG starting from a single root node. + + Args: + root: The root DAGNode. Returns: - List of DAGNodes in execution-safe order (dependencies first). + A list of DAGNodes in topological order (dependencies first). """ - visited: Set[int] = set() - ordered: List[DAGNode] = [] - - def dfs(node: DAGNode): - if id(node) in visited: - return - visited.add(id(node)) - for input_node in node.inputs: - dfs(input_node) - ordered.append(node) - - dfs(root) - return ordered + return topo_sort_multiple([root]) def topo_sort_multiple(roots: List[DAGNode]) -> List[DAGNode]: """ - Topologically sort a DAG with multiple roots (e.g., multiple write nodes). + Topologically sort a DAG with multiple roots. + + Args: + roots: List of root DAGNodes. Returns: - List of all reachable DAGNodes in execution order. + A list of all reachable DAGNodes in execution-safe order. """ visited: Set[int] = set() ordered: List[DAGNode] = [] diff --git a/sdk/python/feast/infra/compute_engines/dag/plan.py b/sdk/python/feast/infra/compute_engines/dag/plan.py index 130a894bda8..6bfb871c104 100644 --- a/sdk/python/feast/infra/compute_engines/dag/plan.py +++ b/sdk/python/feast/infra/compute_engines/dag/plan.py @@ -31,7 +31,8 @@ class ExecutionPlan: Example: DAG: - ReadNode -> AggregateNode -> JoinNode -> TransformNode -> WriteNode + ReadNode -> TransformNode -> AggregateNode -> -> WriteNode + -> JoinNode -> Execution proceeds step by step, passing intermediate DAGValues through the plan while respecting node dependencies and formats. @@ -47,10 +48,6 @@ def execute(self, context: ExecutionContext) -> DAGValue: context.node_outputs = {} for node in self.nodes: - for input_node in node.inputs: - if input_node.name not in context.node_outputs: - context.node_outputs[input_node.name] = input_node.execute(context) - output = node.execute(context) context.node_outputs[node.name] = output @@ -62,3 +59,30 @@ def to_sql(self, context: ExecutionContext) -> str: This is a placeholder and should be implemented in subclasses. """ raise NotImplementedError("SQL generation is not implemented yet.") + + def to_dag(self): + """ + Generate a textual DAG representation for debugging. + + Returns: + str: A multi-line string showing the DAG structure. + """ + lines = [] + seen = set() + + def dfs(node: DAGNode, indent=0): + prefix = " " * indent + if node.name in seen: + lines.append(f"{prefix}- {node.name} (visited)") + return + seen.add(node.name) + lines.append(f"{prefix}- {node.name}") + for input_node in node.inputs: + dfs(input_node, indent + 1) + + for node in self.nodes: + dfs(node) + + return "\n".join(lines) + + diff --git a/sdk/python/feast/infra/compute_engines/feature_builder.py b/sdk/python/feast/infra/compute_engines/feature_builder.py index 0414c970656..5197a1adad5 100644 --- a/sdk/python/feast/infra/compute_engines/feature_builder.py +++ b/sdk/python/feast/infra/compute_engines/feature_builder.py @@ -1,6 +1,5 @@ from abc import ABC, abstractmethod -from typing import List, Optional, Union - +from typing import List, Optional, Union, Dict from feast import BatchFeatureView, FeatureView, StreamFeatureView from feast.infra.common.materialization_job import MaterializationTask from feast.infra.common.retrieval_task import HistoricalRetrievalTask @@ -9,10 +8,10 @@ from feast.infra.compute_engines.dag.plan import ExecutionPlan from feast.infra.compute_engines.feature_resolver import ( FeatureResolver, - FeatureViewNode, ) from feast.infra.registry.base_registry import BaseRegistry from feast.utils import _get_column_names +from feast.infra.compute_engines.algorithms.topo import topo_sort class FeatureBuilder(ABC): @@ -28,11 +27,11 @@ def __init__( task: Union[MaterializationTask, HistoricalRetrievalTask], ): self.registry = registry + self.feature_view = feature_view self.task = task self.nodes: List[DAGNode] = [] - self.resolver = FeatureResolver() - self.dag_root = self.resolver.resolve(feature_view) - self.sorted_nodes = self.resolver.topo_sort(self.dag_root) + self.feature_resolver = FeatureResolver() + self.dag_root = self.feature_resolver.resolve(self.feature_view) @abstractmethod def build_source_node(self, view): @@ -43,7 +42,7 @@ def build_aggregation_node(self, view, input_node): raise NotImplementedError @abstractmethod - def build_join_node(self, view, input_node): + def build_join_node(self, view, input_nodes): raise NotImplementedError @abstractmethod @@ -55,7 +54,7 @@ def build_dedup_node(self, view, input_node): raise NotImplementedError @abstractmethod - def build_transformation_node(self, view, input_node): + def build_transformation_node(self, view, input_nodes): raise NotImplementedError @abstractmethod @@ -78,39 +77,67 @@ def _should_validate(self, view): def _should_dedupe(self, view): return isinstance(self.task, HistoricalRetrievalTask) or self.task.only_latest - def _build(self, current_node: FeatureViewNode) -> DAGNode: - current_view = current_node.view + def _build(self, view, input_nodes: Optional[List[DAGNode]]) -> DAGNode: + + # Step 1: build source node + if view.data_source: + last_node = self.build_source_node(view) + + if self._should_transform(view): + # Transform applied to the source data + last_node = self.build_transformation_node(view, [last_node]) - # Step 1: build source or parent join - if current_node.parent: - parent_node = self._build(current_node.parent) - last_node = self.build_join_node(current_view, parent_node) + # If there are input nodes, transform or join them + elif input_nodes: + # User-defined transform handles the merging of input views + if self._should_transform(view): + last_node = self.build_transformation_node(view, input_nodes) + # Default join + else: + last_node = self.build_join_node(view, input_nodes) else: - last_node = self.build_source_node(current_view) + raise ValueError(f"FeatureView {view.name} has no valid source or inputs") # Step 2: filter - last_node = self.build_filter_node(current_view, last_node) + last_node = self.build_filter_node(view, last_node) # Step 3: aggregate or dedupe - if self._should_aggregate(current_view): - last_node = self.build_aggregation_node(current_view, last_node) - elif self._should_dedupe(current_view): - last_node = self.build_dedup_node(current_view, last_node) + if self._should_aggregate(view): + last_node = self.build_aggregation_node(view, last_node) + elif self._should_dedupe(view): + last_node = self.build_dedup_node(view, last_node) - # Step 4: transform - if self._should_transform(current_view): - last_node = self.build_transformation_node(current_view, last_node) - - # Step 5: validate - if self._should_validate(current_view): - last_node = self.build_validation_node(current_view, last_node) + # Step 4: validate + if self._should_validate(view): + last_node = self.build_validation_node(view, last_node) return last_node def build(self) -> ExecutionPlan: - final_node = self._build(self.dag_root) - self.build_output_nodes(final_node) - return ExecutionPlan(self.nodes) + # Step 1: Topo sort the FeatureViewNode DAG (Logical DAG) + logical_nodes = self.feature_resolver.topo_sort(self.dag_root) + + # Step 2: For each FeatureView, build its corresponding execution DAGNode + view_to_node: Dict[str, DAGNode] = {} + + for node in logical_nodes: + view = node.view + parent_dag_nodes = [ + view_to_node[parent.view.name] + for parent in node.inputs + if parent.view.name in view_to_node + ] + dag_node = self._build(view, parent_dag_nodes) + view_to_node[view.name] = dag_node + + # Step 3: Build output node + final_node = self.build_output_nodes(view_to_node[self.feature_view.name]) + + # Step 4: Topo sort the final DAG from the output node (Physical DAG) + sorted_nodes = topo_sort(final_node) + + # Step 5: Return sorted execution plan + return ExecutionPlan(sorted_nodes) def get_column_info( self, @@ -120,17 +147,9 @@ def get_column_info( for entity_name in view.entities: entities.append(self.registry.get_entity(entity_name, self.task.project)) - if view.source_view: - # If the view has a source_view, the column information come from the tags dict - # unpack to get those values - join_keys = view.source_view.tags.get("join_keys", []) - feature_cols = view.source_view.tags.get("feature_cols", []) - ts_col = view.source_view.tags.get("ts_col", None) - created_ts_col = view.source_view.tags.get("created_ts_col", None) - else: - join_keys, feature_cols, ts_col, created_ts_col = _get_column_names( - view, entities - ) + join_keys, feature_cols, ts_col, created_ts_col = _get_column_names( + view, entities + ) field_mapping = self.get_field_mapping(self.task.feature_view) return ColumnInfo( diff --git a/sdk/python/feast/infra/compute_engines/feature_resolver.py b/sdk/python/feast/infra/compute_engines/feature_resolver.py index ec1cf59b4f2..2edd8d71b73 100644 --- a/sdk/python/feast/infra/compute_engines/feature_resolver.py +++ b/sdk/python/feast/infra/compute_engines/feature_resolver.py @@ -1,16 +1,26 @@ from typing import List, Optional, Set from feast.feature_view import FeatureView +from feast.infra.compute_engines.dag.node import DAGNode +from feast.infra.compute_engines.algorithms.topo import topo_sort +from feast.infra.compute_engines.dag.context import ExecutionContext +from feast.infra.compute_engines.dag.value import DAGValue -class FeatureViewNode: +class FeatureViewNode(DAGNode): """ Logical representation of a node in the FeatureView dependency DAG. """ def __init__(self, view: FeatureView): + super().__init__(name=view.name) self.view: FeatureView = view - self.parent: Optional["FeatureViewNode"] = None + self.inputs: List["FeatureViewNode"] = [] + + def execute(self, context: ExecutionContext) -> DAGValue: + raise NotImplementedError( + f"FeatureViewNode '{self.name}' does not implement execute method." + ) class FeatureResolver: @@ -21,9 +31,9 @@ class FeatureResolver: """ def __init__(self): - # Used to detect and prevent cycles in the FeatureView graph. - self.visited: Set[str] = set() - self.resolution_path: List[str] = [] + self._visited: Set[str] = set() + self._resolution_path: List[str] = [] + self._node_cache: dict[str, FeatureViewNode] = {} def resolve(self, feature_view: FeatureView) -> FeatureViewNode: """ @@ -35,11 +45,9 @@ def resolve(self, feature_view: FeatureView) -> FeatureViewNode: Returns: A FeatureViewNode representing the root of the logical dependency DAG. """ - root = FeatureViewNode(feature_view) - self._walk(root) - return root + return self._walk(feature_view) - def _walk(self, node: FeatureViewNode): + def _walk(self, view: FeatureView): """ Recursive traversal of the FeatureView graph. @@ -47,37 +55,28 @@ def _walk(self, node: FeatureViewNode): Cycles are detected using the visited set. Args: - node: The current FeatureViewNode being processed. + view: The FeatureView to process. """ - view = node.view - if view.name in self.visited: - cycle_path = " โ†’ ".join(self.resolution_path + [view.name]) - raise ValueError(f"Cycle detected in FeatureView graph: {cycle_path}") - self.visited.add(view.name) - self.resolution_path.append(view.name) + if view.name in self._resolution_path: + cycle = " โ†’ ".join(self._resolution_path + [view.name]) + raise ValueError(f"Cycle detected in FeatureView DAG: {cycle}") - # TODO: Only one parent is allowed via source_view, support more than one - if view.source_view: - parent_node = FeatureViewNode(view.source_view) - node.parent = parent_node - self._walk(parent_node) + if view.name in self._node_cache: + return self._node_cache[view.name] - self.resolution_path.pop() + node = FeatureViewNode(view) + self._node_cache[view.name] = node - def topo_sort(self, root: FeatureViewNode) -> List[FeatureViewNode]: - visited = set() - ordered: List[FeatureViewNode] = [] + self._resolution_path.append(view.name) + for upstream_view in view.source_views: + input_node = self._walk(upstream_view) + node.inputs.append(input_node) + self._resolution_path.pop() - def dfs(node: FeatureViewNode): - if id(node) in visited: - return - visited.add(id(node)) - if node.parent: - dfs(node.parent) - ordered.append(node) + return node - dfs(root) - return ordered + def topo_sort(self, root: FeatureViewNode) -> List[FeatureViewNode]: + return topo_sort(root) def debug_dag(self, node: FeatureViewNode, depth=0): """ @@ -89,5 +88,5 @@ def debug_dag(self, node: FeatureViewNode, depth=0): """ indent = " " * depth print(f"{indent}- {node.view.name}") - if node.parent: - self.debug_dag(node.parent, depth + 1) + for input_node in node.inputs: + self.debug_dag(input_node, depth + 1) diff --git a/sdk/python/feast/infra/compute_engines/local/feature_builder.py b/sdk/python/feast/infra/compute_engines/local/feature_builder.py index 1787da75b55..1ef5a7574a5 100644 --- a/sdk/python/feast/infra/compute_engines/local/feature_builder.py +++ b/sdk/python/feast/infra/compute_engines/local/feature_builder.py @@ -68,10 +68,10 @@ def build_dedup_node(self, view, input_node): self.nodes.append(node) return node - def build_transformation_node(self, view, input_node): + def build_transformation_node(self, view, input_nodes): transform_config = view.feature_transformation node = LocalTransformationNode( - "transform", transform_config, self.backend, inputs=[input_node] + "transform", transform_config, self.backend, inputs=input_nodes ) self.nodes.append(node) return node @@ -87,6 +87,7 @@ def build_validation_node(self, view, input_node): def build_output_nodes(self, input_node): node = LocalOutputNode("output", self.dag_root.view, inputs=[input_node]) self.nodes.append(node) + return node @staticmethod def _get_aggregate_operations(agg_specs): diff --git a/sdk/python/feast/infra/compute_engines/spark/feature_builder.py b/sdk/python/feast/infra/compute_engines/spark/feature_builder.py index ca6138b8273..d60a1ec0d21 100644 --- a/sdk/python/feast/infra/compute_engines/spark/feature_builder.py +++ b/sdk/python/feast/infra/compute_engines/spark/feature_builder.py @@ -48,10 +48,16 @@ def build_aggregation_node(self, view, input_node): self.nodes.append(node) return node - def build_join_node(self, view, input_node): + def build_join_node(self, + view, + input_nodes): column_info = self.get_column_info(view) node = SparkJoinNode( - "join", column_info, self.spark_session, inputs=[input_node] + name=f"{view.name}_join", + column_info=column_info, + spark_session=self.spark_session, + inputs=input_nodes, + how="left", # You can make this configurable later ) self.nodes.append(node) return node @@ -68,6 +74,7 @@ def build_filter_node(self, view, input_node): filter_expr, inputs=[input_node], ) + self.nodes.append(node) return node def build_dedup_node(self, view, input_node): @@ -78,16 +85,17 @@ def build_dedup_node(self, view, input_node): self.nodes.append(node) return node - def build_transformation_node(self, view, input_node): + def build_transformation_node(self, view, input_nodes): udf_name = view.feature_transformation.name udf = view.feature_transformation.udf - node = SparkTransformationNode(udf_name, udf, inputs=[input_node]) + node = SparkTransformationNode(udf_name, udf, inputs=input_nodes) self.nodes.append(node) return node def build_output_nodes(self, input_node): node = SparkWriteNode("output", self.dag_root.view, inputs=[input_node]) self.nodes.append(node) + return node def build_validation_node(self, view, input_node): pass diff --git a/sdk/python/feast/infra/compute_engines/spark/nodes.py b/sdk/python/feast/infra/compute_engines/spark/nodes.py index 3333136e846..c19215329ce 100644 --- a/sdk/python/feast/infra/compute_engines/spark/nodes.py +++ b/sdk/python/feast/infra/compute_engines/spark/nodes.py @@ -155,40 +155,37 @@ def __init__( name: str, column_info: ColumnInfo, spark_session: SparkSession, - inputs=None, + inputs: Optional[List[DAGNode]] = None, + how: str = "left" ): - super().__init__(name, inputs=inputs) + super().__init__(name, inputs=inputs or []) self.column_info = column_info self.spark_session = spark_session + self.how = how def execute(self, context: ExecutionContext) -> DAGValue: - feature_value = self.get_single_input_value(context) - feature_value.assert_format(DAGFormat.SPARK) - feature_df: DataFrame = feature_value.data - print("[SparkJoinNode] Input schema:", feature_df.columns) + input_values = self.get_input_values(context) + for val in input_values: + val.assert_format(DAGFormat.SPARK) + # Join all input DataFrames on join_keys + joined_df = input_values[0].data + for dag_value in input_values[1:]: + joined_df = joined_df.join(dag_value.data, on=self.column_info.join_keys, how=self.how) + + # If entity_df is provided, join it in last entity_df = context.entity_df - if entity_df is None: - return DAGValue( - data=feature_df, - format=DAGFormat.SPARK, - metadata={"joined_on": None}, + if entity_df is not None: + entity_df = rename_entity_ts_column( + spark_session=self.spark_session, + entity_df=entity_df, ) - - # Rename entity_df event_timestamp_col to match feature_df - entity_df = rename_entity_ts_column( - spark_session=self.spark_session, - entity_df=entity_df, - ) - - # Perform left join on entity df - # TODO: give a config option to use other join types - joined = feature_df.join(entity_df, on=self.column_info.join_keys, how="left") + joined_df = joined_df.join(entity_df, on=self.column_info.join_keys, how=self.how) return DAGValue( - data=joined, + data=joined_df, format=DAGFormat.SPARK, - metadata={"joined_on": self.column_info.join_keys}, + metadata={"joined_on": self.column_info.join_keys, "join_type": self.how}, ) @@ -335,16 +332,20 @@ def execute(self, context: ExecutionContext) -> DAGValue: class SparkTransformationNode(DAGNode): - def __init__(self, name: str, udf: callable, inputs=None): + def __init__(self, name: str, udf: callable, inputs: List[DAGNode]): super().__init__(name, inputs) self.udf = udf def execute(self, context: ExecutionContext) -> DAGValue: - input_val = self.get_single_input_value(context) - input_val.assert_format(DAGFormat.SPARK) - print("[SparkTransformationNode] Input schema:", input_val.data.columns) + input_values = self.get_input_values(context) + for val in input_values: + val.assert_format(DAGFormat.SPARK) + + input_dfs: List[DataFrame] = [val.data for val in input_values] + + print(f"[SparkTransformationNode] Executing transform on {len(input_dfs)} input(s).") - transformed_df = self.udf(input_val.data) + transformed_df = self.udf(*input_dfs) return DAGValue( data=transformed_df, format=DAGFormat.SPARK, metadata={"transformed": True} diff --git a/sdk/python/feast/protos/feast/core/FeatureView_pb2.py b/sdk/python/feast/protos/feast/core/FeatureView_pb2.py index 71096b00d2b..632faa90cae 100644 --- a/sdk/python/feast/protos/feast/core/FeatureView_pb2.py +++ b/sdk/python/feast/protos/feast/core/FeatureView_pb2.py @@ -17,7 +17,7 @@ from feast.protos.feast.core import Feature_pb2 as feast_dot_core_dot_Feature__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1c\x66\x65\x61st/core/FeatureView.proto\x12\nfeast.core\x1a\x1egoogle/protobuf/duration.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1b\x66\x65\x61st/core/DataSource.proto\x1a\x18\x66\x65\x61st/core/Feature.proto\"c\n\x0b\x46\x65\x61tureView\x12)\n\x04spec\x18\x01 \x01(\x0b\x32\x1b.feast.core.FeatureViewSpec\x12)\n\x04meta\x18\x02 \x01(\x0b\x32\x1b.feast.core.FeatureViewMeta\"\x80\x04\n\x0f\x46\x65\x61tureViewSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07project\x18\x02 \x01(\t\x12\x10\n\x08\x65ntities\x18\x03 \x03(\t\x12+\n\x08\x66\x65\x61tures\x18\x04 \x03(\x0b\x32\x19.feast.core.FeatureSpecV2\x12\x31\n\x0e\x65ntity_columns\x18\x0c \x03(\x0b\x32\x19.feast.core.FeatureSpecV2\x12\x13\n\x0b\x64\x65scription\x18\n \x01(\t\x12\x33\n\x04tags\x18\x05 \x03(\x0b\x32%.feast.core.FeatureViewSpec.TagsEntry\x12\r\n\x05owner\x18\x0b \x01(\t\x12&\n\x03ttl\x18\x06 \x01(\x0b\x32\x19.google.protobuf.Duration\x12,\n\x0c\x62\x61tch_source\x18\x07 \x01(\x0b\x32\x16.feast.core.DataSource\x12-\n\rstream_source\x18\t \x01(\x0b\x32\x16.feast.core.DataSource\x12\x0e\n\x06online\x18\x08 \x01(\x08\x12\x0f\n\x07offline\x18\r \x01(\x08\x12\x30\n\x0bsource_view\x18\x0e \x01(\x0b\x32\x1b.feast.core.FeatureViewSpec\x1a+\n\tTagsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\xcc\x01\n\x0f\x46\x65\x61tureViewMeta\x12\x35\n\x11\x63reated_timestamp\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12:\n\x16last_updated_timestamp\x18\x02 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x46\n\x19materialization_intervals\x18\x03 \x03(\x0b\x32#.feast.core.MaterializationInterval\"w\n\x17MaterializationInterval\x12.\n\nstart_time\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12,\n\x08\x65nd_time\x18\x02 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\"@\n\x0f\x46\x65\x61tureViewList\x12-\n\x0c\x66\x65\x61tureviews\x18\x01 \x03(\x0b\x32\x17.feast.core.FeatureViewBU\n\x10\x66\x65\x61st.proto.coreB\x10\x46\x65\x61tureViewProtoZ/github.com/feast-dev/feast/go/protos/feast/coreb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1c\x66\x65\x61st/core/FeatureView.proto\x12\nfeast.core\x1a\x1egoogle/protobuf/duration.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1b\x66\x65\x61st/core/DataSource.proto\x1a\x18\x66\x65\x61st/core/Feature.proto\"c\n\x0b\x46\x65\x61tureView\x12)\n\x04spec\x18\x01 \x01(\x0b\x32\x1b.feast.core.FeatureViewSpec\x12)\n\x04meta\x18\x02 \x01(\x0b\x32\x1b.feast.core.FeatureViewMeta\"\x81\x04\n\x0f\x46\x65\x61tureViewSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07project\x18\x02 \x01(\t\x12\x10\n\x08\x65ntities\x18\x03 \x03(\t\x12+\n\x08\x66\x65\x61tures\x18\x04 \x03(\x0b\x32\x19.feast.core.FeatureSpecV2\x12\x31\n\x0e\x65ntity_columns\x18\x0c \x03(\x0b\x32\x19.feast.core.FeatureSpecV2\x12\x13\n\x0b\x64\x65scription\x18\n \x01(\t\x12\x33\n\x04tags\x18\x05 \x03(\x0b\x32%.feast.core.FeatureViewSpec.TagsEntry\x12\r\n\x05owner\x18\x0b \x01(\t\x12&\n\x03ttl\x18\x06 \x01(\x0b\x32\x19.google.protobuf.Duration\x12,\n\x0c\x62\x61tch_source\x18\x07 \x01(\x0b\x32\x16.feast.core.DataSource\x12-\n\rstream_source\x18\t \x01(\x0b\x32\x16.feast.core.DataSource\x12\x0e\n\x06online\x18\x08 \x01(\x08\x12\x0f\n\x07offline\x18\r \x01(\x08\x12\x31\n\x0csource_views\x18\x0e \x03(\x0b\x32\x1b.feast.core.FeatureViewSpec\x1a+\n\tTagsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\xcc\x01\n\x0f\x46\x65\x61tureViewMeta\x12\x35\n\x11\x63reated_timestamp\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12:\n\x16last_updated_timestamp\x18\x02 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x46\n\x19materialization_intervals\x18\x03 \x03(\x0b\x32#.feast.core.MaterializationInterval\"w\n\x17MaterializationInterval\x12.\n\nstart_time\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12,\n\x08\x65nd_time\x18\x02 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\"@\n\x0f\x46\x65\x61tureViewList\x12-\n\x0c\x66\x65\x61tureviews\x18\x01 \x03(\x0b\x32\x17.feast.core.FeatureViewBU\n\x10\x66\x65\x61st.proto.coreB\x10\x46\x65\x61tureViewProtoZ/github.com/feast-dev/feast/go/protos/feast/coreb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -31,13 +31,13 @@ _globals['_FEATUREVIEW']._serialized_start=164 _globals['_FEATUREVIEW']._serialized_end=263 _globals['_FEATUREVIEWSPEC']._serialized_start=266 - _globals['_FEATUREVIEWSPEC']._serialized_end=778 - _globals['_FEATUREVIEWSPEC_TAGSENTRY']._serialized_start=735 - _globals['_FEATUREVIEWSPEC_TAGSENTRY']._serialized_end=778 - _globals['_FEATUREVIEWMETA']._serialized_start=781 - _globals['_FEATUREVIEWMETA']._serialized_end=985 - _globals['_MATERIALIZATIONINTERVAL']._serialized_start=987 - _globals['_MATERIALIZATIONINTERVAL']._serialized_end=1106 - _globals['_FEATUREVIEWLIST']._serialized_start=1108 - _globals['_FEATUREVIEWLIST']._serialized_end=1172 + _globals['_FEATUREVIEWSPEC']._serialized_end=779 + _globals['_FEATUREVIEWSPEC_TAGSENTRY']._serialized_start=736 + _globals['_FEATUREVIEWSPEC_TAGSENTRY']._serialized_end=779 + _globals['_FEATUREVIEWMETA']._serialized_start=782 + _globals['_FEATUREVIEWMETA']._serialized_end=986 + _globals['_MATERIALIZATIONINTERVAL']._serialized_start=988 + _globals['_MATERIALIZATIONINTERVAL']._serialized_end=1107 + _globals['_FEATUREVIEWLIST']._serialized_start=1109 + _globals['_FEATUREVIEWLIST']._serialized_end=1173 # @@protoc_insertion_point(module_scope) diff --git a/sdk/python/feast/protos/feast/core/FeatureView_pb2.pyi b/sdk/python/feast/protos/feast/core/FeatureView_pb2.pyi index fac7abdbbfe..d93c9b8f80f 100644 --- a/sdk/python/feast/protos/feast/core/FeatureView_pb2.pyi +++ b/sdk/python/feast/protos/feast/core/FeatureView_pb2.pyi @@ -91,7 +91,7 @@ class FeatureViewSpec(google.protobuf.message.Message): STREAM_SOURCE_FIELD_NUMBER: builtins.int ONLINE_FIELD_NUMBER: builtins.int OFFLINE_FIELD_NUMBER: builtins.int - SOURCE_VIEW_FIELD_NUMBER: builtins.int + SOURCE_VIEWS_FIELD_NUMBER: builtins.int name: builtins.str """Name of the feature view. Must be unique. Not updated.""" project: builtins.str @@ -132,7 +132,7 @@ class FeatureViewSpec(google.protobuf.message.Message): offline: builtins.bool """Whether these features should be written to the offline store""" @property - def source_view(self) -> global___FeatureViewSpec: ... + def source_views(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___FeatureViewSpec]: ... def __init__( self, *, @@ -149,10 +149,10 @@ class FeatureViewSpec(google.protobuf.message.Message): stream_source: feast.core.DataSource_pb2.DataSource | None = ..., online: builtins.bool = ..., offline: builtins.bool = ..., - source_view: global___FeatureViewSpec | None = ..., + source_views: collections.abc.Iterable[global___FeatureViewSpec] | None = ..., ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["batch_source", b"batch_source", "source_view", b"source_view", "stream_source", b"stream_source", "ttl", b"ttl"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["batch_source", b"batch_source", "description", b"description", "entities", b"entities", "entity_columns", b"entity_columns", "features", b"features", "name", b"name", "offline", b"offline", "online", b"online", "owner", b"owner", "project", b"project", "source_view", b"source_view", "stream_source", b"stream_source", "tags", b"tags", "ttl", b"ttl"]) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["batch_source", b"batch_source", "stream_source", b"stream_source", "ttl", b"ttl"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["batch_source", b"batch_source", "description", b"description", "entities", b"entities", "entity_columns", b"entity_columns", "features", b"features", "name", b"name", "offline", b"offline", "online", b"online", "owner", b"owner", "project", b"project", "source_views", b"source_views", "stream_source", b"stream_source", "tags", b"tags", "ttl", b"ttl"]) -> None: ... global___FeatureViewSpec = FeatureViewSpec diff --git a/sdk/python/tests/unit/infra/compute_engines/spark/test_nodes.py b/sdk/python/tests/unit/infra/compute_engines/spark/test_nodes.py index 3f681017e89..6ebe3b52f6c 100644 --- a/sdk/python/tests/unit/infra/compute_engines/spark/test_nodes.py +++ b/sdk/python/tests/unit/infra/compute_engines/spark/test_nodes.py @@ -58,19 +58,15 @@ def strip_extra_spaces(df): online_store=MagicMock(), entity_defs=MagicMock(), entity_df=None, - column_info=ColumnInfo( - join_keys=["name"], - feature_cols=["age"], - ts_col="", - created_ts_col="", - ), node_outputs={"source": input_value}, ) + # Prepare mock input node + input_node = MagicMock() + input_node.name = "source" + # Create and run the node - node = SparkTransformationNode("transform", udf=strip_extra_spaces) - node.add_input(MagicMock()) - node.inputs[0].name = "source" + node = SparkTransformationNode("transform", udf=strip_extra_spaces, inputs=[input_node]) result = node.execute(context) # Assert output @@ -104,12 +100,6 @@ def test_spark_aggregation_node_executes_correctly(spark_session): online_store=MagicMock(), entity_defs=[], entity_df=None, - column_info=ColumnInfo( - join_keys=["user_id"], - feature_cols=["value"], - ts_col="", - created_ts_col="", - ), node_outputs={"source": input_value}, ) @@ -188,23 +178,26 @@ def test_spark_join_node_executes_point_in_time_join(spark_session): entity_defs=[driver], entity_df=entity_df, node_outputs={ - "feature_node": feature_val, + "source": feature_val, }, - column_info=ColumnInfo( - join_keys=["driver_id"], - feature_cols=["conv_rate", "acc_rate", "avg_daily_trips"], - ts_col="event_timestamp", - created_ts_col="created", - ), ) + # Prepare mock input node + input_node = MagicMock() + input_node.name = "source" + # Create the node and add input join_node = SparkJoinNode( name="join", spark_session=spark_session, + inputs=[input_node], + column_info=ColumnInfo( + join_keys=["driver_id"], + feature_cols=["conv_rate", "acc_rate", "avg_daily_trips"], + ts_col="event_timestamp", + created_ts_col="created", + ), ) - join_node.add_input(MagicMock()) - join_node.inputs[0].name = "feature_node" # Execute the node output = join_node.execute(context) @@ -213,6 +206,12 @@ def test_spark_join_node_executes_point_in_time_join(spark_session): dedup_node = SparkDedupNode( name="dedup", spark_session=spark_session, + column_info=ColumnInfo( + join_keys=["driver_id"], + feature_cols=["conv_rate", "acc_rate", "avg_daily_trips"], + ts_col="event_timestamp", + created_ts_col="created", + ), ) dedup_node.add_input(MagicMock()) dedup_node.inputs[0].name = "join" From 8ef4f4f9bf4cd339832f2db66b197aefd73c5b63 Mon Sep 17 00:00:00 2001 From: HaoXuAI Date: Mon, 7 Jul 2025 21:27:53 -0700 Subject: [PATCH 04/12] Checkpoint Signed-off-by: HaoXuAI --- sdk/python/feast/batch_feature_view.py | 8 +- sdk/python/feast/feature_view.py | 33 ++- .../feast/infra/compute_engines/dag/plan.py | 27 +-- .../infra/compute_engines/feature_builder.py | 6 +- .../infra/compute_engines/feature_resolver.py | 19 +- .../infra/compute_engines/local/nodes.py | 1 + .../compute_engines/spark/feature_builder.py | 4 +- .../infra/compute_engines/spark/nodes.py | 18 +- .../compute_engines/spark/test_compute.py | 139 +------------ .../compute_engines/spark/test_compute_dag.py | 189 +++++++----------- .../compute_engines/spark/utils.py | 133 ++++++++++++ .../infra/compute_engines/spark/test_nodes.py | 4 +- .../compute_engines/test_feature_builder.py | 87 +++++--- 13 files changed, 339 insertions(+), 329 deletions(-) create mode 100644 sdk/python/tests/integration/compute_engines/spark/utils.py diff --git a/sdk/python/feast/batch_feature_view.py b/sdk/python/feast/batch_feature_view.py index bac3d10da28..f4b0833b19f 100644 --- a/sdk/python/feast/batch_feature_view.py +++ b/sdk/python/feast/batch_feature_view.py @@ -11,7 +11,6 @@ from feast.entity import Entity from feast.feature_view import FeatureView from feast.field import Field -from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto from feast.transformation.base import Transformation from feast.transformation.mode import TransformationMode @@ -53,6 +52,7 @@ class BatchFeatureView(FeatureView): entities: List[str] ttl: Optional[timedelta] source: DataSource + sink_source: Optional[DataSource] = None schema: List[Field] entity_columns: List[Field] features: List[Field] @@ -75,6 +75,7 @@ def __init__( name: str, mode: Union[TransformationMode, str] = TransformationMode.PYTHON, source: Union[DataSource, "BatchFeatureView", List["BatchFeatureView"]], + sink_source: Optional[DataSource] = None, entities: Optional[List[Entity]] = None, ttl: Optional[timedelta] = None, tags: Optional[Dict[str, str]] = None, @@ -115,12 +116,13 @@ def __init__( description=description, owner=owner, schema=schema, - source=source, + source=source, # type: ignore[arg-type] + sink_source=sink_source, ) def get_feature_transformation(self) -> Optional[Transformation]: if not self.udf: - return + return None if self.mode in ( TransformationMode.PANDAS, TransformationMode.PYTHON, diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index 8d745443f36..e195b219f77 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -105,6 +105,7 @@ def __init__( *, name: str, source: Union[DataSource, "FeatureView", List["FeatureView"]], + sink_source: Optional[DataSource] = None, schema: Optional[List[Field]] = None, entities: Optional[List[Entity]] = None, ttl: Optional[timedelta] = timedelta(days=0), @@ -146,6 +147,7 @@ def __init__( schema = schema or [] # Normalize source + self.stream_source = None self.data_source: Optional[DataSource] = None self.source_views: List[FeatureView] = [] @@ -153,27 +155,36 @@ def __init__( self.data_source = source elif isinstance(source, FeatureView): self.source_views = [source] - elif isinstance(source, list) and all(isinstance(sv, FeatureView) for sv in source): + elif isinstance(source, list) and all( + isinstance(sv, FeatureView) for sv in source + ): self.source_views = source else: - raise TypeError("source must be a DataSource, a FeatureView, or a list of FeatureViews.") + raise TypeError( + "source must be a DataSource, a FeatureView, or a list of FeatureViews." + ) - # Set up stream/batch sources + # Set up stream, batch and derived view sources if ( isinstance(self.data_source, PushSource) or isinstance(self.data_source, KafkaSource) or isinstance(self.data_source, KinesisSource) ): - self.stream_source = source + # Stream source definition + self.stream_source = self.data_source if not self.data_source.batch_source: raise ValueError( - f"A batch_source needs to be specified for stream source `{source.name}`" + f"A batch_source needs to be specified for stream source `{self.data_source.name}`" ) - else: - self.batch_source = self.data_source.batch_source - else: - self.stream_source = None + self.batch_source = self.data_source.batch_source + elif self.data_source: + # Batch source definition self.batch_source = self.data_source + else: + # Derived view source definition + if not sink_source: + raise ValueError("Derived FeatureView must specify `sink_source`.") + self.batch_source = sink_source # Initialize features and entity columns. features: List[Field] = [] @@ -215,7 +226,7 @@ def __init__( ) # TODO(felixwang9817): Add more robust validation of features. - if source is not None: + if self.batch_source is not None: cols = [field.name for field in schema] for col in cols: if ( @@ -451,7 +462,7 @@ def from_proto(cls, feature_view_proto: FeatureViewProto): if feature_view_proto.spec.ttl.ToNanoseconds() == 0 else feature_view_proto.spec.ttl.ToTimedelta() ), - source=batch_source if batch_source else source_views + source=batch_source if batch_source else source_views, ) if stream_source: feature_view.stream_source = stream_source diff --git a/sdk/python/feast/infra/compute_engines/dag/plan.py b/sdk/python/feast/infra/compute_engines/dag/plan.py index 6bfb871c104..31db551e635 100644 --- a/sdk/python/feast/infra/compute_engines/dag/plan.py +++ b/sdk/python/feast/infra/compute_engines/dag/plan.py @@ -60,29 +60,16 @@ def to_sql(self, context: ExecutionContext) -> str: """ raise NotImplementedError("SQL generation is not implemented yet.") - def to_dag(self): + def to_dag(self) -> str: """ - Generate a textual DAG representation for debugging. - - Returns: - str: A multi-line string showing the DAG structure. + Render the DAG as a multiline string with full node expansion (no visited shortcut). """ - lines = [] - seen = set() - def dfs(node: DAGNode, indent=0): + def walk(node: DAGNode, indent: int = 0) -> List[str]: prefix = " " * indent - if node.name in seen: - lines.append(f"{prefix}- {node.name} (visited)") - return - seen.add(node.name) - lines.append(f"{prefix}- {node.name}") + lines = [f"{prefix}- {node.name}"] for input_node in node.inputs: - dfs(input_node, indent + 1) - - for node in self.nodes: - dfs(node) - - return "\n".join(lines) - + lines.extend(walk(input_node, indent + 1)) + return lines + return "\n".join(walk(self.nodes[-1])) diff --git a/sdk/python/feast/infra/compute_engines/feature_builder.py b/sdk/python/feast/infra/compute_engines/feature_builder.py index 5197a1adad5..320328bc6c1 100644 --- a/sdk/python/feast/infra/compute_engines/feature_builder.py +++ b/sdk/python/feast/infra/compute_engines/feature_builder.py @@ -1,8 +1,10 @@ from abc import ABC, abstractmethod -from typing import List, Optional, Union, Dict +from typing import Dict, List, Optional, Union + from feast import BatchFeatureView, FeatureView, StreamFeatureView from feast.infra.common.materialization_job import MaterializationTask from feast.infra.common.retrieval_task import HistoricalRetrievalTask +from feast.infra.compute_engines.algorithms.topo import topo_sort from feast.infra.compute_engines.dag.context import ColumnInfo from feast.infra.compute_engines.dag.node import DAGNode from feast.infra.compute_engines.dag.plan import ExecutionPlan @@ -11,7 +13,6 @@ ) from feast.infra.registry.base_registry import BaseRegistry from feast.utils import _get_column_names -from feast.infra.compute_engines.algorithms.topo import topo_sort class FeatureBuilder(ABC): @@ -78,7 +79,6 @@ def _should_dedupe(self, view): return isinstance(self.task, HistoricalRetrievalTask) or self.task.only_latest def _build(self, view, input_nodes: Optional[List[DAGNode]]) -> DAGNode: - # Step 1: build source node if view.data_source: last_node = self.build_source_node(view) diff --git a/sdk/python/feast/infra/compute_engines/feature_resolver.py b/sdk/python/feast/infra/compute_engines/feature_resolver.py index 2edd8d71b73..ae2f505c1d7 100644 --- a/sdk/python/feast/infra/compute_engines/feature_resolver.py +++ b/sdk/python/feast/infra/compute_engines/feature_resolver.py @@ -1,9 +1,9 @@ from typing import List, Optional, Set from feast.feature_view import FeatureView -from feast.infra.compute_engines.dag.node import DAGNode from feast.infra.compute_engines.algorithms.topo import topo_sort from feast.infra.compute_engines.dag.context import ExecutionContext +from feast.infra.compute_engines.dag.node import DAGNode from feast.infra.compute_engines.dag.value import DAGValue @@ -12,10 +12,12 @@ class FeatureViewNode(DAGNode): Logical representation of a node in the FeatureView dependency DAG. """ - def __init__(self, view: FeatureView): + def __init__( + self, view: FeatureView, inputs: Optional[List["FeatureViewNode"]] = None + ): super().__init__(name=view.name) self.view: FeatureView = view - self.inputs: List["FeatureViewNode"] = [] + self.inputs: List["FeatureViewNode"] = inputs or [] # type: ignore def execute(self, context: ExecutionContext) -> DAGValue: raise NotImplementedError( @@ -68,15 +70,16 @@ def _walk(self, view: FeatureView): self._node_cache[view.name] = node self._resolution_path.append(view.name) - for upstream_view in view.source_views: - input_node = self._walk(upstream_view) - node.inputs.append(input_node) + if view.source_views: + for upstream_view in view.source_views: + input_node = self._walk(upstream_view) + node.inputs.append(input_node) self._resolution_path.pop() return node def topo_sort(self, root: FeatureViewNode) -> List[FeatureViewNode]: - return topo_sort(root) + return topo_sort(root) # type: ignore def debug_dag(self, node: FeatureViewNode, depth=0): """ @@ -89,4 +92,4 @@ def debug_dag(self, node: FeatureViewNode, depth=0): indent = " " * depth print(f"{indent}- {node.view.name}") for input_node in node.inputs: - self.debug_dag(input_node, depth + 1) + self.debug_dag(input_node, depth + 1) # type: ignore diff --git a/sdk/python/feast/infra/compute_engines/local/nodes.py b/sdk/python/feast/infra/compute_engines/local/nodes.py index 6211dd11da5..e11ae63f404 100644 --- a/sdk/python/feast/infra/compute_engines/local/nodes.py +++ b/sdk/python/feast/infra/compute_engines/local/nodes.py @@ -41,6 +41,7 @@ def execute(self, context: ExecutionContext) -> ArrowTableValue: context=context, start_time=self.start_time, end_time=self.end_time, + column_info=self.column_info, ) arrow_table = retrieval_job.to_arrow() if self.column_info.field_mapping: diff --git a/sdk/python/feast/infra/compute_engines/spark/feature_builder.py b/sdk/python/feast/infra/compute_engines/spark/feature_builder.py index d60a1ec0d21..c4dc37f34e4 100644 --- a/sdk/python/feast/infra/compute_engines/spark/feature_builder.py +++ b/sdk/python/feast/infra/compute_engines/spark/feature_builder.py @@ -48,9 +48,7 @@ def build_aggregation_node(self, view, input_node): self.nodes.append(node) return node - def build_join_node(self, - view, - input_nodes): + def build_join_node(self, view, input_nodes): column_info = self.get_column_info(view) node = SparkJoinNode( name=f"{view.name}_join", diff --git a/sdk/python/feast/infra/compute_engines/spark/nodes.py b/sdk/python/feast/infra/compute_engines/spark/nodes.py index c19215329ce..fc7315ccf22 100644 --- a/sdk/python/feast/infra/compute_engines/spark/nodes.py +++ b/sdk/python/feast/infra/compute_engines/spark/nodes.py @@ -1,5 +1,5 @@ from datetime import datetime, timedelta -from typing import List, Optional, Union, cast +from typing import Callable, List, Optional, Union, cast from pyspark.sql import DataFrame, SparkSession, Window from pyspark.sql import functions as F @@ -156,7 +156,7 @@ def __init__( column_info: ColumnInfo, spark_session: SparkSession, inputs: Optional[List[DAGNode]] = None, - how: str = "left" + how: str = "left", ): super().__init__(name, inputs=inputs or []) self.column_info = column_info @@ -171,7 +171,9 @@ def execute(self, context: ExecutionContext) -> DAGValue: # Join all input DataFrames on join_keys joined_df = input_values[0].data for dag_value in input_values[1:]: - joined_df = joined_df.join(dag_value.data, on=self.column_info.join_keys, how=self.how) + joined_df = joined_df.join( + dag_value.data, on=self.column_info.join_keys, how=self.how + ) # If entity_df is provided, join it in last entity_df = context.entity_df @@ -180,7 +182,9 @@ def execute(self, context: ExecutionContext) -> DAGValue: spark_session=self.spark_session, entity_df=entity_df, ) - joined_df = joined_df.join(entity_df, on=self.column_info.join_keys, how=self.how) + joined_df = joined_df.join( + entity_df, on=self.column_info.join_keys, how=self.how + ) return DAGValue( data=joined_df, @@ -332,7 +336,7 @@ def execute(self, context: ExecutionContext) -> DAGValue: class SparkTransformationNode(DAGNode): - def __init__(self, name: str, udf: callable, inputs: List[DAGNode]): + def __init__(self, name: str, udf: Callable, inputs: List[DAGNode]): super().__init__(name, inputs) self.udf = udf @@ -343,7 +347,9 @@ def execute(self, context: ExecutionContext) -> DAGValue: input_dfs: List[DataFrame] = [val.data for val in input_values] - print(f"[SparkTransformationNode] Executing transform on {len(input_dfs)} input(s).") + print( + f"[SparkTransformationNode] Executing transform on {len(input_dfs)} input(s)." + ) transformed_df = self.udf(*input_dfs) diff --git a/sdk/python/tests/integration/compute_engines/spark/test_compute.py b/sdk/python/tests/integration/compute_engines/spark/test_compute.py index 5254db1e690..7535bec0587 100644 --- a/sdk/python/tests/integration/compute_engines/spark/test_compute.py +++ b/sdk/python/tests/integration/compute_engines/spark/test_compute.py @@ -1,15 +1,13 @@ -from datetime import datetime, timedelta +from datetime import timedelta from typing import cast from unittest.mock import MagicMock -import pandas as pd import pytest from pyspark.sql import DataFrame from tqdm import tqdm -from feast import BatchFeatureView, Entity, Field +from feast import BatchFeatureView, Field from feast.aggregation import Aggregation -from feast.data_source import DataSource from feast.infra.common.materialization_job import ( MaterializationJobStatus, MaterializationTask, @@ -20,101 +18,18 @@ from feast.infra.offline_stores.contrib.spark_offline_store.spark import ( SparkOfflineStore, ) -from feast.infra.offline_stores.contrib.spark_offline_store.tests.data_source import ( - SparkDataSourceCreator, -) from feast.types import Float32, Int32, Int64 -from tests.integration.feature_repos.integration_test_repo_config import ( - IntegrationTestRepoConfig, -) -from tests.integration.feature_repos.repo_configuration import ( - construct_test_environment, -) -from tests.integration.feature_repos.universal.online_store.redis import ( - RedisOnlineStoreCreator, -) - -now = datetime.now() -today = datetime.today() - -driver = Entity( - name="driver_id", - description="driver id", +from tests.integration.compute_engines.spark.utils import ( + _check_offline_features, + _check_online_features, + create_entity_df, + create_feature_dataset, + create_spark_environment, + driver, + now, ) -def create_feature_dataset(spark_environment) -> DataSource: - yesterday = today - timedelta(days=1) - last_week = today - timedelta(days=7) - df = pd.DataFrame( - [ - { - "driver_id": 1001, - "event_timestamp": yesterday, - "created": now - timedelta(hours=2), - "conv_rate": 0.8, - "acc_rate": 0.5, - "avg_daily_trips": 15, - }, - { - "driver_id": 1001, - "event_timestamp": last_week, - "created": now - timedelta(hours=3), - "conv_rate": 0.75, - "acc_rate": 0.9, - "avg_daily_trips": 14, - }, - { - "driver_id": 1002, - "event_timestamp": yesterday, - "created": now - timedelta(hours=2), - "conv_rate": 0.7, - "acc_rate": 0.4, - "avg_daily_trips": 12, - }, - { - "driver_id": 1002, - "event_timestamp": yesterday - timedelta(days=1), - "created": now - timedelta(hours=2), - "conv_rate": 0.3, - "acc_rate": 0.6, - "avg_daily_trips": 12, - }, - ] - ) - ds = spark_environment.data_source_creator.create_data_source( - df, - spark_environment.feature_store.project, - timestamp_field="event_timestamp", - created_timestamp_column="created", - ) - return ds - - -def create_entity_df() -> pd.DataFrame: - entity_df = pd.DataFrame( - [ - {"driver_id": 1001, "event_timestamp": today}, - {"driver_id": 1002, "event_timestamp": today}, - ] - ) - return entity_df - - -def create_spark_environment(): - spark_config = IntegrationTestRepoConfig( - provider="local", - online_store_creator=RedisOnlineStoreCreator, - offline_store_creator=SparkDataSourceCreator, - batch_engine={"type": "spark.engine", "partitions": 10}, - ) - spark_environment = construct_test_environment( - spark_config, None, entity_key_serialization_version=2 - ) - spark_environment.setup() - return spark_environment - - @pytest.mark.integration def test_spark_compute_engine_get_historical_features(): spark_environment = create_spark_environment() @@ -277,39 +192,5 @@ def tqdm_builder(length): spark_environment.teardown() -def _check_online_features( - fs, - driver_id, - feature, - expected_value, - full_feature_names: bool = True, -): - online_response = fs.get_online_features( - features=[feature], - entity_rows=[{"driver_id": driver_id}], - full_feature_names=full_feature_names, - ).to_dict() - - feature_ref = "__".join(feature.split(":")) - - assert len(online_response["driver_id"]) == 1 - assert online_response["driver_id"][0] == driver_id - assert abs(online_response[feature_ref][0] - expected_value < 1e-6), ( - "Transformed result" - ) - - -def _check_offline_features( - fs, - feature, - entity_df, -): - offline_df = fs.get_historical_features( - entity_df=entity_df, - features=[feature], - ).to_df() - assert len(offline_df) == 4 - - if __name__ == "__main__": test_spark_compute_engine_get_historical_features() diff --git a/sdk/python/tests/integration/compute_engines/spark/test_compute_dag.py b/sdk/python/tests/integration/compute_engines/spark/test_compute_dag.py index ef7402b5a4e..df786882df9 100644 --- a/sdk/python/tests/integration/compute_engines/spark/test_compute_dag.py +++ b/sdk/python/tests/integration/compute_engines/spark/test_compute_dag.py @@ -1,13 +1,11 @@ -from datetime import datetime, timedelta +from datetime import timedelta from unittest.mock import MagicMock -import pandas as pd import pytest from pyspark.sql import DataFrame from tqdm import tqdm -from feast import BatchFeatureView, Entity, Field -from feast.aggregation import Aggregation +from feast import BatchFeatureView, Field from feast.infra.common.materialization_job import ( MaterializationJobStatus, MaterializationTask, @@ -16,82 +14,25 @@ from feast.infra.offline_stores.contrib.spark_offline_store.spark import ( SparkOfflineStore, ) -from feast.infra.offline_stores.contrib.spark_offline_store.tests.data_source import ( - SparkDataSourceCreator, +from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import ( + SparkSource, ) from feast.types import Float32, Int32, Int64 -from tests.integration.feature_repos.integration_test_repo_config import ( - IntegrationTestRepoConfig, -) -from tests.integration.feature_repos.repo_configuration import ( - construct_test_environment, -) -from tests.integration.feature_repos.universal.online_store.redis import ( - RedisOnlineStoreCreator, -) - -now = datetime.now() -today = datetime.today() - -driver = Entity( - name="driver_id", - description="driver id", +from tests.integration.compute_engines.spark.utils import ( + _check_offline_features, + _check_online_features, + create_entity_df, + create_feature_dataset, + create_spark_environment, + driver, + now, ) -@pytest.fixture(scope="module") -def spark_env(): - config = IntegrationTestRepoConfig( - provider="local", - online_store_creator=RedisOnlineStoreCreator, - offline_store_creator=SparkDataSourceCreator, - batch_engine={"type": "spark.engine", "partitions": 10}, - ) - env = construct_test_environment(config, None, entity_key_serialization_version=2) - env.setup() - yield env - env.teardown() - - -def create_sample_datasource(spark_environment): - df = pd.DataFrame( - [ - { - "driver_id": 1001, - "event_timestamp": today - timedelta(days=1), - "created": now - timedelta(hours=2), - "conv_rate": 0.8, - "acc_rate": 0.5, - "avg_daily_trips": 15, - }, - { - "driver_id": 1002, - "event_timestamp": today - timedelta(days=1), - "created": now - timedelta(hours=2), - "conv_rate": 0.7, - "acc_rate": 0.4, - "avg_daily_trips": 12, - }, - ] - ) - ds = spark_environment.data_source_creator.create_data_source( - df, - spark_environment.feature_store.project, - timestamp_field="event_timestamp", - created_timestamp_column="created", - ) - return ds - - def create_base_feature_view(source): return BatchFeatureView( name="hourly_driver_stats", entities=[driver], - aggregations=[ - Aggregation(column="conv_rate", function="sum"), - Aggregation(column="acc_rate", function="avg"), - ], - ttl=timedelta(days=3), schema=[ Field(name="conv_rate", dtype=Float32), Field(name="acc_rate", dtype=Float32), @@ -105,69 +46,83 @@ def create_base_feature_view(source): def create_chained_feature_view(base_fv: BatchFeatureView): - def transform(df: DataFrame) -> DataFrame: - return df.withColumn("sum_conv_rate", df["sum_conv_rate"] * 10) + def transform_feature(df: DataFrame) -> DataFrame: + df = df.withColumn("conv_rate", df["conv_rate"] * 2) + df = df.withColumn("acc_rate", df["acc_rate"] * 2) + return df return BatchFeatureView( name="daily_driver_stats", entities=[driver], - udf=transform, + udf=transform_feature, udf_string="transform", schema=[ - Field(name="sum_conv_rate", dtype=Float32), + Field(name="conv_rate", dtype=Float32), Field(name="driver_id", dtype=Int32), ], online=True, offline=True, - source_view=base_fv, - tags={ - "join_keys": "driver_id", - "feature_cols": "sum_conv_rate", - "ts_col": "event_timestamp", - "created_ts_col": "created", - }, + source=base_fv, + sink_source=SparkSource( + name="daily_driver_stats_sink", + path="/tmp/daily_driver_stats_sink", + file_format="parquet", + timestamp_field="event_timestamp", + created_timestamp_column="created", + ), ) -def _tqdm_builder(length): - return tqdm(total=length, ncols=100) - - @pytest.mark.integration -def test_spark_dag_materialize_recursive_view(spark_env): +def test_spark_dag_materialize_recursive_view(): + spark_env = create_spark_environment() fs = spark_env.feature_store registry = fs.registry - source = create_sample_datasource(spark_env) + source = create_feature_dataset(spark_env) base_fv = create_base_feature_view(source) chained_fv = create_chained_feature_view(base_fv) - fs.apply([driver, base_fv, chained_fv]) - - # ๐Ÿงช Materialize top-level view; DAG will include base_fv implicitly - task = MaterializationTask( - project=fs.project, - feature_view=chained_fv, - start_time=now - timedelta(days=2), - end_time=now, - tqdm_builder=_tqdm_builder, - ) - - engine = SparkComputeEngine( - repo_config=spark_env.config, - offline_store=SparkOfflineStore(), - online_store=MagicMock(), - registry=registry, - ) - - jobs = engine.materialize(registry, task) - - # โœ… Validate jobs ran - assert len(jobs) == 1 - assert jobs[0].status() == MaterializationJobStatus.SUCCEEDED - - # โœ… Verify output exists in offline store - df = jobs[0].to_df() - assert "sum_conv_rate" in df.columns - assert sorted(df["driver_id"].tolist()) == [1001, 1002] - assert abs(df["sum_conv_rate"].iloc[0] - 16.0) < 1e-6 # (0.8 + 0.8) * 10 + def tqdm_builder(length): + return tqdm(total=length, ncols=100) + + try: + fs.apply([driver, base_fv, chained_fv]) + + # ๐Ÿงช Materialize top-level view; DAG will include base_fv implicitly + task = MaterializationTask( + project=fs.project, + feature_view=chained_fv, + start_time=now - timedelta(days=2), + end_time=now, + tqdm_builder=tqdm_builder, + ) + + engine = SparkComputeEngine( + repo_config=spark_env.config, + offline_store=SparkOfflineStore(), + online_store=MagicMock(), + registry=registry, + ) + + jobs = engine.materialize(registry, task) + + # โœ… Validate jobs ran + assert len(jobs) == 1 + assert jobs[0].status() == MaterializationJobStatus.SUCCEEDED + + _check_online_features( + fs=fs, + driver_id=1001, + feature="daily_driver_stats:conv_rate", + expected_value=1.6, + full_feature_names=True, + ) + + entity_df = create_entity_df() + + _check_offline_features( + fs=fs, feature="hourly_driver_stats:conv_rate", entity_df=entity_df, size=2 + ) + finally: + spark_env.teardown() diff --git a/sdk/python/tests/integration/compute_engines/spark/utils.py b/sdk/python/tests/integration/compute_engines/spark/utils.py new file mode 100644 index 00000000000..766e4cbe9a5 --- /dev/null +++ b/sdk/python/tests/integration/compute_engines/spark/utils.py @@ -0,0 +1,133 @@ +from datetime import datetime, timedelta + +import pandas as pd + +from feast import Entity +from feast.data_source import DataSource +from feast.infra.offline_stores.contrib.spark_offline_store.tests.data_source import ( + SparkDataSourceCreator, +) +from tests.integration.feature_repos.integration_test_repo_config import ( + IntegrationTestRepoConfig, +) +from tests.integration.feature_repos.repo_configuration import ( + construct_test_environment, +) +from tests.integration.feature_repos.universal.online_store.redis import ( + RedisOnlineStoreCreator, +) + +now = datetime.now() +today = datetime.today() + +driver = Entity( + name="driver_id", + description="driver id", +) + + +def create_entity_df() -> pd.DataFrame: + entity_df = pd.DataFrame( + [ + {"driver_id": 1001, "event_timestamp": today}, + {"driver_id": 1002, "event_timestamp": today}, + ] + ) + return entity_df + + +def create_feature_dataset(spark_environment) -> DataSource: + yesterday = today - timedelta(days=1) + last_week = today - timedelta(days=7) + df = pd.DataFrame( + [ + { + "driver_id": 1001, + "event_timestamp": yesterday, + "created": now - timedelta(hours=2), + "conv_rate": 0.8, + "acc_rate": 0.5, + "avg_daily_trips": 15, + }, + { + "driver_id": 1001, + "event_timestamp": last_week, + "created": now - timedelta(hours=3), + "conv_rate": 0.75, + "acc_rate": 0.9, + "avg_daily_trips": 14, + }, + { + "driver_id": 1002, + "event_timestamp": yesterday, + "created": now - timedelta(hours=2), + "conv_rate": 0.7, + "acc_rate": 0.4, + "avg_daily_trips": 12, + }, + { + "driver_id": 1002, + "event_timestamp": yesterday - timedelta(days=1), + "created": now - timedelta(hours=2), + "conv_rate": 0.3, + "acc_rate": 0.6, + "avg_daily_trips": 12, + }, + ] + ) + ds = spark_environment.data_source_creator.create_data_source( + df, + spark_environment.feature_store.project, + timestamp_field="event_timestamp", + created_timestamp_column="created", + ) + return ds + + +def create_spark_environment(): + spark_config = IntegrationTestRepoConfig( + provider="local", + online_store_creator=RedisOnlineStoreCreator, + offline_store_creator=SparkDataSourceCreator, + batch_engine={"type": "spark.engine", "partitions": 10}, + ) + spark_environment = construct_test_environment( + spark_config, None, entity_key_serialization_version=2 + ) + spark_environment.setup() + return spark_environment + + +def _check_online_features( + fs, + driver_id, + feature, + expected_value, + full_feature_names: bool = True, +): + online_response = fs.get_online_features( + features=[feature], + entity_rows=[{"driver_id": driver_id}], + full_feature_names=full_feature_names, + ).to_dict() + + feature_ref = "__".join(feature.split(":")) + + assert len(online_response["driver_id"]) == 1 + assert online_response["driver_id"][0] == driver_id + assert abs(online_response[feature_ref][0] - expected_value < 1e-6), ( + "Transformed result" + ) + + +def _check_offline_features( + fs, + feature, + entity_df, + size: int = 4, +): + offline_df = fs.get_historical_features( + entity_df=entity_df, + features=[feature], + ).to_df() + assert len(offline_df) == size diff --git a/sdk/python/tests/unit/infra/compute_engines/spark/test_nodes.py b/sdk/python/tests/unit/infra/compute_engines/spark/test_nodes.py index 6ebe3b52f6c..52ba3dd47f3 100644 --- a/sdk/python/tests/unit/infra/compute_engines/spark/test_nodes.py +++ b/sdk/python/tests/unit/infra/compute_engines/spark/test_nodes.py @@ -66,7 +66,9 @@ def strip_extra_spaces(df): input_node.name = "source" # Create and run the node - node = SparkTransformationNode("transform", udf=strip_extra_spaces, inputs=[input_node]) + node = SparkTransformationNode( + "transform", udf=strip_extra_spaces, inputs=[input_node] + ) result = node.execute(context) # Assert output diff --git a/sdk/python/tests/unit/infra/compute_engines/test_feature_builder.py b/sdk/python/tests/unit/infra/compute_engines/test_feature_builder.py index c472a359815..d0a40b78ba7 100644 --- a/sdk/python/tests/unit/infra/compute_engines/test_feature_builder.py +++ b/sdk/python/tests/unit/infra/compute_engines/test_feature_builder.py @@ -1,8 +1,26 @@ from unittest.mock import MagicMock +from feast.data_source import DataSource +from feast.infra.compute_engines.dag.context import ExecutionContext +from feast.infra.compute_engines.dag.model import DAGFormat +from feast.infra.compute_engines.dag.node import DAGNode from feast.infra.compute_engines.dag.plan import ExecutionPlan +from feast.infra.compute_engines.dag.value import DAGValue from feast.infra.compute_engines.feature_builder import FeatureBuilder +# --------------------------- +# Minimal Mock DAGNode for testing +# --------------------------- + + +class MockDAGNode(DAGNode): + def __init__(self, name, inputs=None): + super().__init__(name, inputs=inputs or []) + + def execute(self, context: ExecutionContext) -> DAGValue: + return DAGValue(data=None, format=DAGFormat.SPARK, metadata={}) + + # --------------------------- # Mock Feature View Definitions # --------------------------- @@ -13,13 +31,22 @@ def __init__( self, name, source=None, - source_view=None, aggregations=None, feature_transformation=None, ): self.name = name self.source = source - self.source_view = source_view + + # Internal resolution (emulating what real FeatureView.__init__ would do) + self.data_source = source if isinstance(source, DataSource) else None + self.source_views = ( + [source] + if isinstance(source, MockFeatureView) + else source + if isinstance(source, list) + else [] + ) + self.aggregations = aggregations or [] self.feature_transformation = feature_transformation self.ttl = None @@ -27,6 +54,8 @@ def __init__( self.enable_validation = False self.entities = ["driver_id"] self.batch_source = type("BatchSource", (), {"timestamp_field": "ts"}) + self.stream_source = None + self.tags = {} class MockTransformation: @@ -35,20 +64,22 @@ def __init__(self, name): self.udf = lambda df: df +mock_source = MagicMock(spec=DataSource) + # --------------------------- # Mock DAG # --------------------------- hourly_driver_stats = MockFeatureView( name="hourly_driver_stats", - source="hourly_source", + source=mock_source, aggregations=[{"function": "sum", "column": "trips"}], feature_transformation=MockTransformation("hourly_tf"), ) daily_driver_stats = MockFeatureView( name="daily_driver_stats", - source_view=hourly_driver_stats, + source=hourly_driver_stats, aggregations=[{"function": "mean", "column": "trips"}], feature_transformation=MockTransformation("daily_tf"), ) @@ -65,29 +96,31 @@ def __init__(self, feature_view): registry=MagicMock(), feature_view=feature_view, task=MagicMock() ) - def build_source_node(self, source): - return f"SourceNode({source})" + def build_source_node(self, view): + return MockDAGNode(f"Source({view.name})") - def build_join_node(self, view, input_node): - return f"JoinNode({view.name} <- {input_node})" + def build_join_node(self, view, input_nodes): + return MockDAGNode(f"Join({view.name})", inputs=input_nodes) def build_filter_node(self, view, input_node): - return f"FilterNode({view.name} <- {input_node})" + return MockDAGNode(f"Filter({view.name})", inputs=[input_node]) def build_aggregation_node(self, view, input_node): - return f"AggregationNode({view.name} <- {input_node})" + return MockDAGNode(f"Agg({view.name})", inputs=[input_node]) def build_dedup_node(self, view, input_node): - return f"DedupNode({view.name} <- {input_node})" + return MockDAGNode(f"Dedup({view.name})", inputs=[input_node]) - def build_transformation_node(self, view, input_node): - return f"TransformNode({view.name} <- {input_node})" + def build_transformation_node(self, view, input_nodes): + return MockDAGNode(f"Transform({view.name})", inputs=input_nodes) def build_validation_node(self, view, input_node): - return f"ValidationNode({view.name} <- {input_node})" + return MockDAGNode(f"Validate({view.name})", inputs=[input_node]) def build_output_nodes(self, final_node): - self.nodes.append(f"OutputNode({final_node})") + output_node = MockDAGNode(f"Output({final_node.name})", inputs=[final_node]) + self.nodes.append(output_node) + return output_node # --------------------------- @@ -99,16 +132,14 @@ def test_recursive_featureview_build(): builder = MockFeatureBuilder(daily_driver_stats) execution_plan: ExecutionPlan = builder.build() - expected_final_node = ( - "TransformNode(daily_driver_stats <- " - "AggregationNode(daily_driver_stats <- " - "FilterNode(daily_driver_stats <- " - "JoinNode(daily_driver_stats <- " - "TransformNode(hourly_driver_stats <- " - "AggregationNode(hourly_driver_stats <- " - "FilterNode(hourly_driver_stats <- " - "SourceNode(hourly_source))))))))" - ) - expected_output_node = f"OutputNode({expected_final_node})" - - assert execution_plan.nodes[-1] == expected_output_node + expected_output = """\ +- Output(Agg(daily_driver_stats)) + - Agg(daily_driver_stats) + - Filter(daily_driver_stats) + - Transform(daily_driver_stats) + - Agg(hourly_driver_stats) + - Filter(hourly_driver_stats) + - Transform(hourly_driver_stats) + - Source(hourly_driver_stats)""" + + assert execution_plan.to_dag() == expected_output From 74a5dee5ab99af3a939c6d4532ed6d140a1d348d Mon Sep 17 00:00:00 2001 From: HaoXuAI Date: Mon, 7 Jul 2025 21:53:53 -0700 Subject: [PATCH 05/12] Checkpoint Signed-off-by: HaoXuAI --- .../protos/feast/core/Aggregation_pb2.py | 6 +-- .../feast/protos/feast/core/DataFormat_pb2.py | 6 +-- .../feast/protos/feast/core/DataSource_pb2.py | 18 +++---- .../protos/feast/core/DatastoreTable_pb2.py | 6 +-- .../feast/protos/feast/core/Entity_pb2.py | 10 ++-- .../protos/feast/core/FeatureService_pb2.py | 14 +++--- .../protos/feast/core/FeatureTable_pb2.py | 10 ++-- .../feast/core/FeatureViewProjection_pb2.py | 10 ++-- .../protos/feast/core/FeatureView_pb2.py | 10 ++-- .../feast/protos/feast/core/Feature_pb2.py | 10 ++-- .../protos/feast/core/InfraObject_pb2.py | 6 +-- .../feast/core/OnDemandFeatureView_pb2.py | 22 ++++---- .../feast/protos/feast/core/Permission_pb2.py | 14 +++--- .../feast/protos/feast/core/Policy_pb2.py | 6 +-- .../feast/protos/feast/core/Project_pb2.py | 10 ++-- .../feast/protos/feast/core/Registry_pb2.py | 10 ++-- .../protos/feast/core/SavedDataset_pb2.py | 10 ++-- .../protos/feast/core/SqliteTable_pb2.py | 6 +-- .../feast/protos/feast/core/Store_pb2.py | 6 +-- .../feast/core/StreamFeatureView_pb2.py | 14 +++--- .../protos/feast/core/Transformation_pb2.py | 6 +-- .../feast/core/ValidationProfile_pb2.py | 10 ++-- .../feast/registry/RegistryServer_pb2.py | 50 +++++++++---------- .../protos/feast/serving/Connector_pb2.py | 6 +-- .../protos/feast/serving/GrpcServer_pb2.py | 14 +++--- .../feast/serving/ServingService_pb2.py | 18 +++---- .../serving/TransformationService_pb2.py | 6 +-- .../feast/protos/feast/storage/Redis_pb2.py | 6 +-- .../feast/protos/feast/types/EntityKey_pb2.py | 6 +-- .../feast/protos/feast/types/Field_pb2.py | 10 ++-- .../feast/protos/feast/types/Value_pb2.py | 6 +-- 31 files changed, 171 insertions(+), 171 deletions(-) diff --git a/sdk/python/feast/protos/feast/core/Aggregation_pb2.py b/sdk/python/feast/protos/feast/core/Aggregation_pb2.py index 25b68bfc49d..922f8f40aa2 100644 --- a/sdk/python/feast/protos/feast/core/Aggregation_pb2.py +++ b/sdk/python/feast/protos/feast/core/Aggregation_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/Aggregation.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -20,9 +21,8 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.Aggregation_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\020AggregationProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\020AggregationProtoZ/github.com/feast-dev/feast/go/protos/feast/core' _globals['_AGGREGATION']._serialized_start=77 _globals['_AGGREGATION']._serialized_end=223 # @@protoc_insertion_point(module_scope) diff --git a/sdk/python/feast/protos/feast/core/DataFormat_pb2.py b/sdk/python/feast/protos/feast/core/DataFormat_pb2.py index 401ebf32c49..a3883dcec3b 100644 --- a/sdk/python/feast/protos/feast/core/DataFormat_pb2.py +++ b/sdk/python/feast/protos/feast/core/DataFormat_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/DataFormat.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -19,9 +20,8 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.DataFormat_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\017DataFormatProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\017DataFormatProtoZ/github.com/feast-dev/feast/go/protos/feast/core' _globals['_FILEFORMAT']._serialized_start=44 _globals['_FILEFORMAT']._serialized_end=222 _globals['_FILEFORMAT_PARQUETFORMAT']._serialized_start=182 diff --git a/sdk/python/feast/protos/feast/core/DataSource_pb2.py b/sdk/python/feast/protos/feast/core/DataSource_pb2.py index 6d815222a41..ae03c7d0c42 100644 --- a/sdk/python/feast/protos/feast/core/DataSource_pb2.py +++ b/sdk/python/feast/protos/feast/core/DataSource_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/DataSource.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -24,15 +25,14 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.DataSource_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\017DataSourceProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _DATASOURCE_TAGSENTRY._options = None - _DATASOURCE_TAGSENTRY._serialized_options = b'8\001' - _DATASOURCE_FIELDMAPPINGENTRY._options = None - _DATASOURCE_FIELDMAPPINGENTRY._serialized_options = b'8\001' - _DATASOURCE_REQUESTDATAOPTIONS_DEPRECATEDSCHEMAENTRY._options = None - _DATASOURCE_REQUESTDATAOPTIONS_DEPRECATEDSCHEMAENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\017DataSourceProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _globals['_DATASOURCE_TAGSENTRY']._options = None + _globals['_DATASOURCE_TAGSENTRY']._serialized_options = b'8\001' + _globals['_DATASOURCE_FIELDMAPPINGENTRY']._options = None + _globals['_DATASOURCE_FIELDMAPPINGENTRY']._serialized_options = b'8\001' + _globals['_DATASOURCE_REQUESTDATAOPTIONS_DEPRECATEDSCHEMAENTRY']._options = None + _globals['_DATASOURCE_REQUESTDATAOPTIONS_DEPRECATEDSCHEMAENTRY']._serialized_options = b'8\001' _globals['_DATASOURCE']._serialized_start=189 _globals['_DATASOURCE']._serialized_end=3107 _globals['_DATASOURCE_TAGSENTRY']._serialized_start=1436 diff --git a/sdk/python/feast/protos/feast/core/DatastoreTable_pb2.py b/sdk/python/feast/protos/feast/core/DatastoreTable_pb2.py index 54650c1365f..c5dbc3ec64a 100644 --- a/sdk/python/feast/protos/feast/core/DatastoreTable_pb2.py +++ b/sdk/python/feast/protos/feast/core/DatastoreTable_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/DatastoreTable.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -20,9 +21,8 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.DatastoreTable_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\023DatastoreTableProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\023DatastoreTableProtoZ/github.com/feast-dev/feast/go/protos/feast/core' _globals['_DATASTORETABLE']._serialized_start=80 _globals['_DATASTORETABLE']._serialized_end=274 # @@protoc_insertion_point(module_scope) diff --git a/sdk/python/feast/protos/feast/core/Entity_pb2.py b/sdk/python/feast/protos/feast/core/Entity_pb2.py index 682a2b127d1..2b3e7806736 100644 --- a/sdk/python/feast/protos/feast/core/Entity_pb2.py +++ b/sdk/python/feast/protos/feast/core/Entity_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/Entity.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -21,11 +22,10 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.Entity_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\013EntityProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _ENTITYSPECV2_TAGSENTRY._options = None - _ENTITYSPECV2_TAGSENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\013EntityProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _globals['_ENTITYSPECV2_TAGSENTRY']._options = None + _globals['_ENTITYSPECV2_TAGSENTRY']._serialized_options = b'8\001' _globals['_ENTITY']._serialized_start=97 _globals['_ENTITY']._serialized_end=183 _globals['_ENTITYSPECV2']._serialized_start=186 diff --git a/sdk/python/feast/protos/feast/core/FeatureService_pb2.py b/sdk/python/feast/protos/feast/core/FeatureService_pb2.py index 7ea598df9ad..7ef36079691 100644 --- a/sdk/python/feast/protos/feast/core/FeatureService_pb2.py +++ b/sdk/python/feast/protos/feast/core/FeatureService_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/FeatureService.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -21,13 +22,12 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.FeatureService_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\023FeatureServiceProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _FEATURESERVICESPEC_TAGSENTRY._options = None - _FEATURESERVICESPEC_TAGSENTRY._serialized_options = b'8\001' - _LOGGINGCONFIG_CUSTOMDESTINATION_CONFIGENTRY._options = None - _LOGGINGCONFIG_CUSTOMDESTINATION_CONFIGENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\023FeatureServiceProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _globals['_FEATURESERVICESPEC_TAGSENTRY']._options = None + _globals['_FEATURESERVICESPEC_TAGSENTRY']._serialized_options = b'8\001' + _globals['_LOGGINGCONFIG_CUSTOMDESTINATION_CONFIGENTRY']._options = None + _globals['_LOGGINGCONFIG_CUSTOMDESTINATION_CONFIGENTRY']._serialized_options = b'8\001' _globals['_FEATURESERVICE']._serialized_start=120 _globals['_FEATURESERVICE']._serialized_end=228 _globals['_FEATURESERVICESPEC']._serialized_start=231 diff --git a/sdk/python/feast/protos/feast/core/FeatureTable_pb2.py b/sdk/python/feast/protos/feast/core/FeatureTable_pb2.py index c1539d767b6..713e72b5d33 100644 --- a/sdk/python/feast/protos/feast/core/FeatureTable_pb2.py +++ b/sdk/python/feast/protos/feast/core/FeatureTable_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/FeatureTable.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -23,11 +24,10 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.FeatureTable_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\021FeatureTableProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _FEATURETABLESPEC_LABELSENTRY._options = None - _FEATURETABLESPEC_LABELSENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\021FeatureTableProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _globals['_FEATURETABLESPEC_LABELSENTRY']._options = None + _globals['_FEATURETABLESPEC_LABELSENTRY']._serialized_options = b'8\001' _globals['_FEATURETABLE']._serialized_start=165 _globals['_FEATURETABLE']._serialized_end=267 _globals['_FEATURETABLESPEC']._serialized_start=270 diff --git a/sdk/python/feast/protos/feast/core/FeatureViewProjection_pb2.py b/sdk/python/feast/protos/feast/core/FeatureViewProjection_pb2.py index 85c7a141a3a..b47d4fe392f 100644 --- a/sdk/python/feast/protos/feast/core/FeatureViewProjection_pb2.py +++ b/sdk/python/feast/protos/feast/core/FeatureViewProjection_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/FeatureViewProjection.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -21,11 +22,10 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.FeatureViewProjection_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\025FeatureReferenceProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _FEATUREVIEWPROJECTION_JOINKEYMAPENTRY._options = None - _FEATUREVIEWPROJECTION_JOINKEYMAPENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\025FeatureReferenceProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _globals['_FEATUREVIEWPROJECTION_JOINKEYMAPENTRY']._options = None + _globals['_FEATUREVIEWPROJECTION_JOINKEYMAPENTRY']._serialized_options = b'8\001' _globals['_FEATUREVIEWPROJECTION']._serialized_start=110 _globals['_FEATUREVIEWPROJECTION']._serialized_end=552 _globals['_FEATUREVIEWPROJECTION_JOINKEYMAPENTRY']._serialized_start=503 diff --git a/sdk/python/feast/protos/feast/core/FeatureView_pb2.py b/sdk/python/feast/protos/feast/core/FeatureView_pb2.py index 632faa90cae..702335f1166 100644 --- a/sdk/python/feast/protos/feast/core/FeatureView_pb2.py +++ b/sdk/python/feast/protos/feast/core/FeatureView_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/FeatureView.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -23,11 +24,10 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.FeatureView_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\020FeatureViewProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _FEATUREVIEWSPEC_TAGSENTRY._options = None - _FEATUREVIEWSPEC_TAGSENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\020FeatureViewProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _globals['_FEATUREVIEWSPEC_TAGSENTRY']._options = None + _globals['_FEATUREVIEWSPEC_TAGSENTRY']._serialized_options = b'8\001' _globals['_FEATUREVIEW']._serialized_start=164 _globals['_FEATUREVIEW']._serialized_end=263 _globals['_FEATUREVIEWSPEC']._serialized_start=266 diff --git a/sdk/python/feast/protos/feast/core/Feature_pb2.py b/sdk/python/feast/protos/feast/core/Feature_pb2.py index 19634f926ec..a02bb7ff403 100644 --- a/sdk/python/feast/protos/feast/core/Feature_pb2.py +++ b/sdk/python/feast/protos/feast/core/Feature_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/Feature.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -20,11 +21,10 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.Feature_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\014FeatureProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _FEATURESPECV2_TAGSENTRY._options = None - _FEATURESPECV2_TAGSENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\014FeatureProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _globals['_FEATURESPECV2_TAGSENTRY']._options = None + _globals['_FEATURESPECV2_TAGSENTRY']._serialized_options = b'8\001' _globals['_FEATURESPECV2']._serialized_start=66 _globals['_FEATURESPECV2']._serialized_end=336 _globals['_FEATURESPECV2_TAGSENTRY']._serialized_start=293 diff --git a/sdk/python/feast/protos/feast/core/InfraObject_pb2.py b/sdk/python/feast/protos/feast/core/InfraObject_pb2.py index 3ce634f304c..aeea27f2e00 100644 --- a/sdk/python/feast/protos/feast/core/InfraObject_pb2.py +++ b/sdk/python/feast/protos/feast/core/InfraObject_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/InfraObject.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -21,9 +22,8 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.InfraObject_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\020InfraObjectProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\020InfraObjectProtoZ/github.com/feast-dev/feast/go/protos/feast/core' _globals['_INFRA']._serialized_start=107 _globals['_INFRA']._serialized_end=162 _globals['_INFRAOBJECT']._serialized_start=165 diff --git a/sdk/python/feast/protos/feast/core/OnDemandFeatureView_pb2.py b/sdk/python/feast/protos/feast/core/OnDemandFeatureView_pb2.py index 5d49136be01..926b54df288 100644 --- a/sdk/python/feast/protos/feast/core/OnDemandFeatureView_pb2.py +++ b/sdk/python/feast/protos/feast/core/OnDemandFeatureView_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/OnDemandFeatureView.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -25,17 +26,16 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.OnDemandFeatureView_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\030OnDemandFeatureViewProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _ONDEMANDFEATUREVIEWSPEC_SOURCESENTRY._options = None - _ONDEMANDFEATUREVIEWSPEC_SOURCESENTRY._serialized_options = b'8\001' - _ONDEMANDFEATUREVIEWSPEC_TAGSENTRY._options = None - _ONDEMANDFEATUREVIEWSPEC_TAGSENTRY._serialized_options = b'8\001' - _ONDEMANDFEATUREVIEWSPEC.fields_by_name['user_defined_function']._options = None - _ONDEMANDFEATUREVIEWSPEC.fields_by_name['user_defined_function']._serialized_options = b'\030\001' - _USERDEFINEDFUNCTION._options = None - _USERDEFINEDFUNCTION._serialized_options = b'\030\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\030OnDemandFeatureViewProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _globals['_ONDEMANDFEATUREVIEWSPEC_SOURCESENTRY']._options = None + _globals['_ONDEMANDFEATUREVIEWSPEC_SOURCESENTRY']._serialized_options = b'8\001' + _globals['_ONDEMANDFEATUREVIEWSPEC_TAGSENTRY']._options = None + _globals['_ONDEMANDFEATUREVIEWSPEC_TAGSENTRY']._serialized_options = b'8\001' + _globals['_ONDEMANDFEATUREVIEWSPEC'].fields_by_name['user_defined_function']._options = None + _globals['_ONDEMANDFEATUREVIEWSPEC'].fields_by_name['user_defined_function']._serialized_options = b'\030\001' + _globals['_USERDEFINEDFUNCTION']._options = None + _globals['_USERDEFINEDFUNCTION']._serialized_options = b'\030\001' _globals['_ONDEMANDFEATUREVIEW']._serialized_start=243 _globals['_ONDEMANDFEATUREVIEW']._serialized_end=366 _globals['_ONDEMANDFEATUREVIEWSPEC']._serialized_start=369 diff --git a/sdk/python/feast/protos/feast/core/Permission_pb2.py b/sdk/python/feast/protos/feast/core/Permission_pb2.py index 19951e6b227..706fd2eec47 100644 --- a/sdk/python/feast/protos/feast/core/Permission_pb2.py +++ b/sdk/python/feast/protos/feast/core/Permission_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/Permission.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -21,13 +22,12 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.Permission_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\017PermissionProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _PERMISSIONSPEC_REQUIREDTAGSENTRY._options = None - _PERMISSIONSPEC_REQUIREDTAGSENTRY._serialized_options = b'8\001' - _PERMISSIONSPEC_TAGSENTRY._options = None - _PERMISSIONSPEC_TAGSENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\017PermissionProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _globals['_PERMISSIONSPEC_REQUIREDTAGSENTRY']._options = None + _globals['_PERMISSIONSPEC_REQUIREDTAGSENTRY']._serialized_options = b'8\001' + _globals['_PERMISSIONSPEC_TAGSENTRY']._options = None + _globals['_PERMISSIONSPEC_TAGSENTRY']._serialized_options = b'8\001' _globals['_PERMISSION']._serialized_start=101 _globals['_PERMISSION']._serialized_end=197 _globals['_PERMISSIONSPEC']._serialized_start=200 diff --git a/sdk/python/feast/protos/feast/core/Policy_pb2.py b/sdk/python/feast/protos/feast/core/Policy_pb2.py index bea067c8be7..2fac866115c 100644 --- a/sdk/python/feast/protos/feast/core/Policy_pb2.py +++ b/sdk/python/feast/protos/feast/core/Policy_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/Policy.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -19,9 +20,8 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.Policy_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\013PolicyProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\013PolicyProtoZ/github.com/feast-dev/feast/go/protos/feast/core' _globals['_POLICY']._serialized_start=39 _globals['_POLICY']._serialized_end=151 _globals['_ROLEBASEDPOLICY']._serialized_start=153 diff --git a/sdk/python/feast/protos/feast/core/Project_pb2.py b/sdk/python/feast/protos/feast/core/Project_pb2.py index 189af6f00b1..cfbf1220143 100644 --- a/sdk/python/feast/protos/feast/core/Project_pb2.py +++ b/sdk/python/feast/protos/feast/core/Project_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/Project.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -20,11 +21,10 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.Project_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\014ProjectProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _PROJECTSPEC_TAGSENTRY._options = None - _PROJECTSPEC_TAGSENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\014ProjectProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _globals['_PROJECTSPEC_TAGSENTRY']._options = None + _globals['_PROJECTSPEC_TAGSENTRY']._serialized_options = b'8\001' _globals['_PROJECT']._serialized_start=73 _globals['_PROJECT']._serialized_end=160 _globals['_PROJECTSPEC']._serialized_start=163 diff --git a/sdk/python/feast/protos/feast/core/Registry_pb2.py b/sdk/python/feast/protos/feast/core/Registry_pb2.py index 82aa75325f2..671958d80c7 100644 --- a/sdk/python/feast/protos/feast/core/Registry_pb2.py +++ b/sdk/python/feast/protos/feast/core/Registry_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/Registry.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -32,11 +33,10 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.Registry_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\rRegistryProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _REGISTRY.fields_by_name['project_metadata']._options = None - _REGISTRY.fields_by_name['project_metadata']._serialized_options = b'\030\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\rRegistryProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _globals['_REGISTRY'].fields_by_name['project_metadata']._options = None + _globals['_REGISTRY'].fields_by_name['project_metadata']._serialized_options = b'\030\001' _globals['_REGISTRY']._serialized_start=449 _globals['_REGISTRY']._serialized_end=1216 _globals['_PROJECTMETADATA']._serialized_start=1218 diff --git a/sdk/python/feast/protos/feast/core/SavedDataset_pb2.py b/sdk/python/feast/protos/feast/core/SavedDataset_pb2.py index 535d2f5772f..fe1e2d49eac 100644 --- a/sdk/python/feast/protos/feast/core/SavedDataset_pb2.py +++ b/sdk/python/feast/protos/feast/core/SavedDataset_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/SavedDataset.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -21,11 +22,10 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.SavedDataset_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\021SavedDatasetProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _SAVEDDATASETSPEC_TAGSENTRY._options = None - _SAVEDDATASETSPEC_TAGSENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\021SavedDatasetProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _globals['_SAVEDDATASETSPEC_TAGSENTRY']._options = None + _globals['_SAVEDDATASETSPEC_TAGSENTRY']._serialized_options = b'8\001' _globals['_SAVEDDATASETSPEC']._serialized_start=108 _globals['_SAVEDDATASETSPEC']._serialized_end=401 _globals['_SAVEDDATASETSPEC_TAGSENTRY']._serialized_start=358 diff --git a/sdk/python/feast/protos/feast/core/SqliteTable_pb2.py b/sdk/python/feast/protos/feast/core/SqliteTable_pb2.py index a61f866b514..8cc14781c72 100644 --- a/sdk/python/feast/protos/feast/core/SqliteTable_pb2.py +++ b/sdk/python/feast/protos/feast/core/SqliteTable_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/SqliteTable.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -19,9 +20,8 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.SqliteTable_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\020SqliteTableProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\020SqliteTableProtoZ/github.com/feast-dev/feast/go/protos/feast/core' _globals['_SQLITETABLE']._serialized_start=44 _globals['_SQLITETABLE']._serialized_end=85 # @@protoc_insertion_point(module_scope) diff --git a/sdk/python/feast/protos/feast/core/Store_pb2.py b/sdk/python/feast/protos/feast/core/Store_pb2.py index 4169921bc23..7d24e11947f 100644 --- a/sdk/python/feast/protos/feast/core/Store_pb2.py +++ b/sdk/python/feast/protos/feast/core/Store_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/Store.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -19,9 +20,8 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.Store_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\nStoreProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\nStoreProtoZ/github.com/feast-dev/feast/go/protos/feast/core' _globals['_STORE']._serialized_start=39 _globals['_STORE']._serialized_end=932 _globals['_STORE_REDISCONFIG']._serialized_start=286 diff --git a/sdk/python/feast/protos/feast/core/StreamFeatureView_pb2.py b/sdk/python/feast/protos/feast/core/StreamFeatureView_pb2.py index f79ae56c700..ba19088edd6 100644 --- a/sdk/python/feast/protos/feast/core/StreamFeatureView_pb2.py +++ b/sdk/python/feast/protos/feast/core/StreamFeatureView_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/StreamFeatureView.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -26,13 +27,12 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.StreamFeatureView_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\026StreamFeatureViewProtoZ/github.com/feast-dev/feast/go/protos/feast/core' - _STREAMFEATUREVIEWSPEC_TAGSENTRY._options = None - _STREAMFEATUREVIEWSPEC_TAGSENTRY._serialized_options = b'8\001' - _STREAMFEATUREVIEWSPEC.fields_by_name['user_defined_function']._options = None - _STREAMFEATUREVIEWSPEC.fields_by_name['user_defined_function']._serialized_options = b'\030\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\026StreamFeatureViewProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _globals['_STREAMFEATUREVIEWSPEC_TAGSENTRY']._options = None + _globals['_STREAMFEATUREVIEWSPEC_TAGSENTRY']._serialized_options = b'8\001' + _globals['_STREAMFEATUREVIEWSPEC'].fields_by_name['user_defined_function']._options = None + _globals['_STREAMFEATUREVIEWSPEC'].fields_by_name['user_defined_function']._serialized_options = b'\030\001' _globals['_STREAMFEATUREVIEW']._serialized_start=268 _globals['_STREAMFEATUREVIEW']._serialized_end=379 _globals['_STREAMFEATUREVIEWSPEC']._serialized_start=382 diff --git a/sdk/python/feast/protos/feast/core/Transformation_pb2.py b/sdk/python/feast/protos/feast/core/Transformation_pb2.py index d02127a3207..9fd11d3026b 100644 --- a/sdk/python/feast/protos/feast/core/Transformation_pb2.py +++ b/sdk/python/feast/protos/feast/core/Transformation_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/Transformation.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -19,9 +20,8 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.Transformation_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\032FeatureTransformationProtoZ/github.com/feast-dev/feast/go/protos/feast/core' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\032FeatureTransformationProtoZ/github.com/feast-dev/feast/go/protos/feast/core' _globals['_USERDEFINEDFUNCTIONV2']._serialized_start=47 _globals['_USERDEFINEDFUNCTIONV2']._serialized_end=117 _globals['_FEATURETRANSFORMATIONV2']._serialized_start=120 diff --git a/sdk/python/feast/protos/feast/core/ValidationProfile_pb2.py b/sdk/python/feast/protos/feast/core/ValidationProfile_pb2.py index a82d7a4f0b7..0fb27ceab16 100644 --- a/sdk/python/feast/protos/feast/core/ValidationProfile_pb2.py +++ b/sdk/python/feast/protos/feast/core/ValidationProfile_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/core/ValidationProfile.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -19,11 +20,10 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.core.ValidationProfile_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\020feast.proto.coreB\021ValidationProfileZ/github.com/feast-dev/feast/go/protos/feast/core' - _VALIDATIONREFERENCE_TAGSENTRY._options = None - _VALIDATIONREFERENCE_TAGSENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\020feast.proto.coreB\021ValidationProfileZ/github.com/feast-dev/feast/go/protos/feast/core' + _globals['_VALIDATIONREFERENCE_TAGSENTRY']._options = None + _globals['_VALIDATIONREFERENCE_TAGSENTRY']._serialized_options = b'8\001' _globals['_GEVALIDATIONPROFILER']._serialized_start=51 _globals['_GEVALIDATIONPROFILER']._serialized_end=182 _globals['_GEVALIDATIONPROFILER_USERDEFINEDPROFILER']._serialized_start=147 diff --git a/sdk/python/feast/protos/feast/registry/RegistryServer_pb2.py b/sdk/python/feast/protos/feast/registry/RegistryServer_pb2.py index 8ccff12f791..2d5f7b020ab 100644 --- a/sdk/python/feast/protos/feast/registry/RegistryServer_pb2.py +++ b/sdk/python/feast/protos/feast/registry/RegistryServer_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/registry/RegistryServer.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -33,31 +34,30 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.registry.RegistryServer_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'Z3github.com/feast-dev/feast/go/protos/feast/registry' - _LISTENTITIESREQUEST_TAGSENTRY._options = None - _LISTENTITIESREQUEST_TAGSENTRY._serialized_options = b'8\001' - _LISTDATASOURCESREQUEST_TAGSENTRY._options = None - _LISTDATASOURCESREQUEST_TAGSENTRY._serialized_options = b'8\001' - _LISTFEATUREVIEWSREQUEST_TAGSENTRY._options = None - _LISTFEATUREVIEWSREQUEST_TAGSENTRY._serialized_options = b'8\001' - _LISTALLFEATUREVIEWSREQUEST_TAGSENTRY._options = None - _LISTALLFEATUREVIEWSREQUEST_TAGSENTRY._serialized_options = b'8\001' - _LISTSTREAMFEATUREVIEWSREQUEST_TAGSENTRY._options = None - _LISTSTREAMFEATUREVIEWSREQUEST_TAGSENTRY._serialized_options = b'8\001' - _LISTONDEMANDFEATUREVIEWSREQUEST_TAGSENTRY._options = None - _LISTONDEMANDFEATUREVIEWSREQUEST_TAGSENTRY._serialized_options = b'8\001' - _LISTFEATURESERVICESREQUEST_TAGSENTRY._options = None - _LISTFEATURESERVICESREQUEST_TAGSENTRY._serialized_options = b'8\001' - _LISTSAVEDDATASETSREQUEST_TAGSENTRY._options = None - _LISTSAVEDDATASETSREQUEST_TAGSENTRY._serialized_options = b'8\001' - _LISTVALIDATIONREFERENCESREQUEST_TAGSENTRY._options = None - _LISTVALIDATIONREFERENCESREQUEST_TAGSENTRY._serialized_options = b'8\001' - _LISTPERMISSIONSREQUEST_TAGSENTRY._options = None - _LISTPERMISSIONSREQUEST_TAGSENTRY._serialized_options = b'8\001' - _LISTPROJECTSREQUEST_TAGSENTRY._options = None - _LISTPROJECTSREQUEST_TAGSENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'Z3github.com/feast-dev/feast/go/protos/feast/registry' + _globals['_LISTENTITIESREQUEST_TAGSENTRY']._options = None + _globals['_LISTENTITIESREQUEST_TAGSENTRY']._serialized_options = b'8\001' + _globals['_LISTDATASOURCESREQUEST_TAGSENTRY']._options = None + _globals['_LISTDATASOURCESREQUEST_TAGSENTRY']._serialized_options = b'8\001' + _globals['_LISTFEATUREVIEWSREQUEST_TAGSENTRY']._options = None + _globals['_LISTFEATUREVIEWSREQUEST_TAGSENTRY']._serialized_options = b'8\001' + _globals['_LISTALLFEATUREVIEWSREQUEST_TAGSENTRY']._options = None + _globals['_LISTALLFEATUREVIEWSREQUEST_TAGSENTRY']._serialized_options = b'8\001' + _globals['_LISTSTREAMFEATUREVIEWSREQUEST_TAGSENTRY']._options = None + _globals['_LISTSTREAMFEATUREVIEWSREQUEST_TAGSENTRY']._serialized_options = b'8\001' + _globals['_LISTONDEMANDFEATUREVIEWSREQUEST_TAGSENTRY']._options = None + _globals['_LISTONDEMANDFEATUREVIEWSREQUEST_TAGSENTRY']._serialized_options = b'8\001' + _globals['_LISTFEATURESERVICESREQUEST_TAGSENTRY']._options = None + _globals['_LISTFEATURESERVICESREQUEST_TAGSENTRY']._serialized_options = b'8\001' + _globals['_LISTSAVEDDATASETSREQUEST_TAGSENTRY']._options = None + _globals['_LISTSAVEDDATASETSREQUEST_TAGSENTRY']._serialized_options = b'8\001' + _globals['_LISTVALIDATIONREFERENCESREQUEST_TAGSENTRY']._options = None + _globals['_LISTVALIDATIONREFERENCESREQUEST_TAGSENTRY']._serialized_options = b'8\001' + _globals['_LISTPERMISSIONSREQUEST_TAGSENTRY']._options = None + _globals['_LISTPERMISSIONSREQUEST_TAGSENTRY']._serialized_options = b'8\001' + _globals['_LISTPROJECTSREQUEST_TAGSENTRY']._options = None + _globals['_LISTPROJECTSREQUEST_TAGSENTRY']._serialized_options = b'8\001' _globals['_REFRESHREQUEST']._serialized_start=487 _globals['_REFRESHREQUEST']._serialized_end=520 _globals['_UPDATEINFRAREQUEST']._serialized_start=522 diff --git a/sdk/python/feast/protos/feast/serving/Connector_pb2.py b/sdk/python/feast/protos/feast/serving/Connector_pb2.py index 8b5516eabaa..b38471dea8d 100644 --- a/sdk/python/feast/protos/feast/serving/Connector_pb2.py +++ b/sdk/python/feast/protos/feast/serving/Connector_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/serving/Connector.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -23,9 +24,8 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.serving.Connector_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'Z2github.com/feast-dev/feast/go/protos/feast/serving' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'Z2github.com/feast-dev/feast/go/protos/feast/serving' _globals['_CONNECTORFEATURE']._serialized_start=173 _globals['_CONNECTORFEATURE']._serialized_end=327 _globals['_CONNECTORFEATURELIST']._serialized_start=329 diff --git a/sdk/python/feast/protos/feast/serving/GrpcServer_pb2.py b/sdk/python/feast/protos/feast/serving/GrpcServer_pb2.py index c03681f2dc8..ce4db37a658 100644 --- a/sdk/python/feast/protos/feast/serving/GrpcServer_pb2.py +++ b/sdk/python/feast/protos/feast/serving/GrpcServer_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/serving/GrpcServer.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -20,13 +21,12 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.serving.GrpcServer_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'Z2github.com/feast-dev/feast/go/protos/feast/serving' - _PUSHREQUEST_FEATURESENTRY._options = None - _PUSHREQUEST_FEATURESENTRY._serialized_options = b'8\001' - _WRITETOONLINESTOREREQUEST_FEATURESENTRY._options = None - _WRITETOONLINESTOREREQUEST_FEATURESENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'Z2github.com/feast-dev/feast/go/protos/feast/serving' + _globals['_PUSHREQUEST_FEATURESENTRY']._options = None + _globals['_PUSHREQUEST_FEATURESENTRY']._serialized_options = b'8\001' + _globals['_WRITETOONLINESTOREREQUEST_FEATURESENTRY']._options = None + _globals['_WRITETOONLINESTOREREQUEST_FEATURESENTRY']._serialized_options = b'8\001' _globals['_PUSHREQUEST']._serialized_start=71 _globals['_PUSHREQUEST']._serialized_end=250 _globals['_PUSHREQUEST_FEATURESENTRY']._serialized_start=203 diff --git a/sdk/python/feast/protos/feast/serving/ServingService_pb2.py b/sdk/python/feast/protos/feast/serving/ServingService_pb2.py index 1ad4f29c68d..fa866640577 100644 --- a/sdk/python/feast/protos/feast/serving/ServingService_pb2.py +++ b/sdk/python/feast/protos/feast/serving/ServingService_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/serving/ServingService.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -21,15 +22,14 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.serving.ServingService_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\023feast.proto.servingB\017ServingAPIProtoZ2github.com/feast-dev/feast/go/protos/feast/serving' - _GETONLINEFEATURESREQUESTV2_ENTITYROW_FIELDSENTRY._options = None - _GETONLINEFEATURESREQUESTV2_ENTITYROW_FIELDSENTRY._serialized_options = b'8\001' - _GETONLINEFEATURESREQUEST_ENTITIESENTRY._options = None - _GETONLINEFEATURESREQUEST_ENTITIESENTRY._serialized_options = b'8\001' - _GETONLINEFEATURESREQUEST_REQUESTCONTEXTENTRY._options = None - _GETONLINEFEATURESREQUEST_REQUESTCONTEXTENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\023feast.proto.servingB\017ServingAPIProtoZ2github.com/feast-dev/feast/go/protos/feast/serving' + _globals['_GETONLINEFEATURESREQUESTV2_ENTITYROW_FIELDSENTRY']._options = None + _globals['_GETONLINEFEATURESREQUESTV2_ENTITYROW_FIELDSENTRY']._serialized_options = b'8\001' + _globals['_GETONLINEFEATURESREQUEST_ENTITIESENTRY']._options = None + _globals['_GETONLINEFEATURESREQUEST_ENTITIESENTRY']._serialized_options = b'8\001' + _globals['_GETONLINEFEATURESREQUEST_REQUESTCONTEXTENTRY']._options = None + _globals['_GETONLINEFEATURESREQUEST_REQUESTCONTEXTENTRY']._serialized_options = b'8\001' _globals['_FIELDSTATUS']._serialized_start=1560 _globals['_FIELDSTATUS']._serialized_end=1651 _globals['_GETFEASTSERVINGINFOREQUEST']._serialized_start=111 diff --git a/sdk/python/feast/protos/feast/serving/TransformationService_pb2.py b/sdk/python/feast/protos/feast/serving/TransformationService_pb2.py index 0416d84b6f0..bc060e9a776 100644 --- a/sdk/python/feast/protos/feast/serving/TransformationService_pb2.py +++ b/sdk/python/feast/protos/feast/serving/TransformationService_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/serving/TransformationService.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -19,9 +20,8 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.serving.TransformationService_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\023feast.proto.servingB\035TransformationServiceAPIProtoZ2github.com/feast-dev/feast/go/protos/feast/serving' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\023feast.proto.servingB\035TransformationServiceAPIProtoZ2github.com/feast-dev/feast/go/protos/feast/serving' _globals['_TRANSFORMATIONSERVICETYPE']._serialized_start=529 _globals['_TRANSFORMATIONSERVICETYPE']._serialized_end=677 _globals['_VALUETYPE']._serialized_start=60 diff --git a/sdk/python/feast/protos/feast/storage/Redis_pb2.py b/sdk/python/feast/protos/feast/storage/Redis_pb2.py index c7c6e967a41..37d59c9df5a 100644 --- a/sdk/python/feast/protos/feast/storage/Redis_pb2.py +++ b/sdk/python/feast/protos/feast/storage/Redis_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/storage/Redis.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -20,9 +21,8 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.storage.Redis_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\023feast.proto.storageB\nRedisProtoZ2github.com/feast-dev/feast/go/protos/feast/storage' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\023feast.proto.storageB\nRedisProtoZ2github.com/feast-dev/feast/go/protos/feast/storage' _globals['_REDISKEYV2']._serialized_start=69 _globals['_REDISKEYV2']._serialized_end=163 # @@protoc_insertion_point(module_scope) diff --git a/sdk/python/feast/protos/feast/types/EntityKey_pb2.py b/sdk/python/feast/protos/feast/types/EntityKey_pb2.py index 34480eb8a34..a6e1abf7302 100644 --- a/sdk/python/feast/protos/feast/types/EntityKey_pb2.py +++ b/sdk/python/feast/protos/feast/types/EntityKey_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/types/EntityKey.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -20,9 +21,8 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.types.EntityKey_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\021feast.proto.typesB\016EntityKeyProtoZ0github.com/feast-dev/feast/go/protos/feast/types' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\021feast.proto.typesB\016EntityKeyProtoZ0github.com/feast-dev/feast/go/protos/feast/types' _globals['_ENTITYKEY']._serialized_start=69 _globals['_ENTITYKEY']._serialized_end=142 # @@protoc_insertion_point(module_scope) diff --git a/sdk/python/feast/protos/feast/types/Field_pb2.py b/sdk/python/feast/protos/feast/types/Field_pb2.py index f85b67245bd..973fdc6cdea 100644 --- a/sdk/python/feast/protos/feast/types/Field_pb2.py +++ b/sdk/python/feast/protos/feast/types/Field_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/types/Field.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -20,11 +21,10 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.types.Field_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\021feast.proto.typesB\nFieldProtoZ0github.com/feast-dev/feast/go/protos/feast/types' - _FIELD_TAGSENTRY._options = None - _FIELD_TAGSENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\021feast.proto.typesB\nFieldProtoZ0github.com/feast-dev/feast/go/protos/feast/types' + _globals['_FIELD_TAGSENTRY']._options = None + _globals['_FIELD_TAGSENTRY']._serialized_options = b'8\001' _globals['_FIELD']._serialized_start=66 _globals['_FIELD']._serialized_end=241 _globals['_FIELD_TAGSENTRY']._serialized_start=198 diff --git a/sdk/python/feast/protos/feast/types/Value_pb2.py b/sdk/python/feast/protos/feast/types/Value_pb2.py index 942359f7d79..18ee3311808 100644 --- a/sdk/python/feast/protos/feast/types/Value_pb2.py +++ b/sdk/python/feast/protos/feast/types/Value_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: feast/types/Value.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -19,9 +20,8 @@ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'feast.types.Value_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'\n\021feast.proto.typesB\nValueProtoZ0github.com/feast-dev/feast/go/protos/feast/types' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'\n\021feast.proto.typesB\nValueProtoZ0github.com/feast-dev/feast/go/protos/feast/types' _globals['_NULL']._serialized_start=1200 _globals['_NULL']._serialized_end=1216 _globals['_VALUETYPE']._serialized_start=41 From 53bb6fb3c62f9b67bfd7bf5235f832cdda00d431 Mon Sep 17 00:00:00 2001 From: HaoXuAI Date: Mon, 7 Jul 2025 23:39:41 -0700 Subject: [PATCH 06/12] fix testing Signed-off-by: HaoXuAI --- sdk/python/feast/feature_view.py | 2 +- .../compute_engines/local/feature_builder.py | 4 +- .../infra/compute_engines/local/nodes.py | 66 ++++++++++++------- .../infra/compute_engines/local/test_nodes.py | 8 +-- sdk/python/tests/unit/permissions/conftest.py | 10 ++- 5 files changed, 53 insertions(+), 37 deletions(-) diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index e195b219f77..16e786bced8 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -161,7 +161,7 @@ def __init__( self.source_views = source else: raise TypeError( - "source must be a DataSource, a FeatureView, or a list of FeatureViews." + "source must be a DataSource, a FeatureView, or a list of FeatureView." ) # Set up stream, batch and derived view sources diff --git a/sdk/python/feast/infra/compute_engines/local/feature_builder.py b/sdk/python/feast/infra/compute_engines/local/feature_builder.py index 1ef5a7574a5..143b1926ad7 100644 --- a/sdk/python/feast/infra/compute_engines/local/feature_builder.py +++ b/sdk/python/feast/infra/compute_engines/local/feature_builder.py @@ -36,9 +36,9 @@ def build_source_node(self, view): self.nodes.append(node) return node - def build_join_node(self, view, input_node): + def build_join_node(self, view, input_nodes): column_info = self.get_column_info(view) - node = LocalJoinNode("join", column_info, self.backend, inputs=[input_node]) + node = LocalJoinNode("join", column_info, self.backend, inputs=input_nodes) self.nodes.append(node) return node diff --git a/sdk/python/feast/infra/compute_engines/local/nodes.py b/sdk/python/feast/infra/compute_engines/local/nodes.py index e11ae63f404..fcf9f21fcce 100644 --- a/sdk/python/feast/infra/compute_engines/local/nodes.py +++ b/sdk/python/feast/infra/compute_engines/local/nodes.py @@ -1,11 +1,13 @@ from datetime import datetime, timedelta -from typing import Optional, Union +from typing import List, Optional, Union import pyarrow as pa from feast import BatchFeatureView, StreamFeatureView from feast.data_source import DataSource from feast.infra.compute_engines.dag.context import ColumnInfo, ExecutionContext +from feast.infra.compute_engines.dag.model import DAGFormat +from feast.infra.compute_engines.dag.node import DAGNode from feast.infra.compute_engines.local.arrow_table_value import ArrowTableValue from feast.infra.compute_engines.local.backends.base import DataFrameBackend from feast.infra.compute_engines.local.local_node import LocalNode @@ -56,40 +58,55 @@ def execute(self, context: ExecutionContext) -> ArrowTableValue: class LocalJoinNode(LocalNode): def __init__( - self, name: str, column_info: ColumnInfo, backend: DataFrameBackend, inputs=None + self, + name: str, + column_info: ColumnInfo, + backend: DataFrameBackend, + inputs: Optional[List["DAGNode"]] = None, + how: str = "left", ): - super().__init__(name, inputs=inputs) + super().__init__(name, inputs or []) self.column_info = column_info self.backend = backend + self.how = how def execute(self, context: ExecutionContext) -> ArrowTableValue: - feature_table = self.get_single_table(context).data + input_values = self.get_input_values(context) + for val in input_values: + val.assert_format(DAGFormat.ARROW) + + # Convert all upstream ArrowTables to backend DataFrames + joined_df = self.backend.from_arrow(input_values[0].data) + for val in input_values[1:]: + next_df = self.backend.from_arrow(val.data) + joined_df = self.backend.join( + joined_df, + next_df, + on=self.column_info.join_keys, + how=self.how, + ) - if context.entity_df is None: - output = ArrowTableValue(feature_table) - context.node_outputs[self.name] = output - return output + # If entity_df is provided, join it in last + if context.entity_df is not None: + entity_df = self.backend.from_arrow(pa.Table.from_pandas(context.entity_df)) - entity_table = pa.Table.from_pandas(context.entity_df) - feature_df = self.backend.from_arrow(feature_table) - entity_df = self.backend.from_arrow(entity_table) + entity_schema = dict(zip(entity_df.columns, entity_df.dtypes)) + entity_ts_col = infer_event_timestamp_from_entity_df(entity_schema) - entity_schema = dict(zip(entity_df.columns, entity_df.dtypes)) - entity_df_event_timestamp_col = infer_event_timestamp_from_entity_df( - entity_schema - ) + if entity_ts_col != ENTITY_TS_ALIAS: + entity_df = self.backend.rename_columns( + entity_df, {entity_ts_col: ENTITY_TS_ALIAS} + ) - entity_df = self.backend.rename_columns( - entity_df, {entity_df_event_timestamp_col: ENTITY_TS_ALIAS} - ) + joined_df = self.backend.join( + joined_df, + entity_df, + on=self.column_info.join_keys, + how=self.how, + ) - joined_df = self.backend.join( - feature_df, entity_df, on=self.column_info.join_keys, how="left" - ) result = self.backend.to_arrow(joined_df) - output = ArrowTableValue(result) - context.node_outputs[self.name] = output - return output + return ArrowTableValue(result) class LocalFilterNode(LocalNode): @@ -155,7 +172,6 @@ def execute(self, context: ExecutionContext) -> ArrowTableValue: grouped_df = self.backend.groupby_agg(df, self.group_keys, self.agg_ops) result = self.backend.to_arrow(grouped_df) output = ArrowTableValue(result) - context.node_outputs[self.name] = output return output diff --git a/sdk/python/tests/unit/infra/compute_engines/local/test_nodes.py b/sdk/python/tests/unit/infra/compute_engines/local/test_nodes.py index c486b4148fc..a074367cbe1 100644 --- a/sdk/python/tests/unit/infra/compute_engines/local/test_nodes.py +++ b/sdk/python/tests/unit/infra/compute_engines/local/test_nodes.py @@ -4,7 +4,7 @@ import pandas as pd import pyarrow as pa -from feast.infra.compute_engines.dag.context import ColumnInfo, ExecutionContext +from feast.infra.compute_engines.dag.context import ExecutionContext from feast.infra.compute_engines.local.arrow_table_value import ArrowTableValue from feast.infra.compute_engines.local.backends.pandas_backend import PandasBackend from feast.infra.compute_engines.local.nodes import ( @@ -45,12 +45,6 @@ def create_context(node_outputs): entity_defs=MagicMock(), entity_df=entity_df, node_outputs=node_outputs, - column_info=ColumnInfo( - join_keys=["entity_id"], - feature_cols=["value"], - ts_col="event_timestamp", - created_ts_col=None, - ), ) diff --git a/sdk/python/tests/unit/permissions/conftest.py b/sdk/python/tests/unit/permissions/conftest.py index ba277d13b49..fceb9f0b197 100644 --- a/sdk/python/tests/unit/permissions/conftest.py +++ b/sdk/python/tests/unit/permissions/conftest.py @@ -1,8 +1,9 @@ -from unittest.mock import Mock +from unittest.mock import MagicMock, Mock import pytest from feast import FeatureView +from feast.data_source import DataSource from feast.entity import Entity from feast.infra.registry.base_registry import BaseRegistry from feast.permissions.decorator import require_permissions @@ -17,9 +18,14 @@ class SecuredFeatureView(FeatureView): def __init__(self, name, tags): + mock_source = MagicMock(spec=DataSource) + mock_source.created_timestamp_column = None + mock_source.timestamp_field = None + mock_source.date_partition_column = None + super().__init__( name=name, - source=Mock(), + source=mock_source, tags=tags, ) From ea50f43da4b7cac5a58a8e4396db349729f83b9d Mon Sep 17 00:00:00 2001 From: HaoXuAI Date: Tue, 8 Jul 2025 08:03:20 -0700 Subject: [PATCH 07/12] fix testing Signed-off-by: HaoXuAI --- .../infra/compute_engines/feature_builder.py | 6 +- .../compute_engines/local/feature_builder.py | 2 +- .../infra/compute_engines/local/nodes.py | 2 +- .../compute_engines/spark/feature_builder.py | 25 +++-- .../infra/compute_engines/spark/nodes.py | 36 +++++-- .../compute_engines/spark/test_compute_dag.py | 96 +++++++++++++++++++ .../compute_engines/test_feature_builder.py | 2 +- 7 files changed, 149 insertions(+), 20 deletions(-) diff --git a/sdk/python/feast/infra/compute_engines/feature_builder.py b/sdk/python/feast/infra/compute_engines/feature_builder.py index 320328bc6c1..26f3703c794 100644 --- a/sdk/python/feast/infra/compute_engines/feature_builder.py +++ b/sdk/python/feast/infra/compute_engines/feature_builder.py @@ -59,7 +59,7 @@ def build_transformation_node(self, view, input_nodes): raise NotImplementedError @abstractmethod - def build_output_nodes(self, final_node): + def build_output_nodes(self, view, final_node): raise NotImplementedError @abstractmethod @@ -131,7 +131,9 @@ def build(self) -> ExecutionPlan: view_to_node[view.name] = dag_node # Step 3: Build output node - final_node = self.build_output_nodes(view_to_node[self.feature_view.name]) + final_node = self.build_output_nodes( + self.feature_view, view_to_node[self.feature_view.name] + ) # Step 4: Topo sort the final DAG from the output node (Physical DAG) sorted_nodes = topo_sort(final_node) diff --git a/sdk/python/feast/infra/compute_engines/local/feature_builder.py b/sdk/python/feast/infra/compute_engines/local/feature_builder.py index 143b1926ad7..8cecaa431df 100644 --- a/sdk/python/feast/infra/compute_engines/local/feature_builder.py +++ b/sdk/python/feast/infra/compute_engines/local/feature_builder.py @@ -84,7 +84,7 @@ def build_validation_node(self, view, input_node): self.nodes.append(node) return node - def build_output_nodes(self, input_node): + def build_output_nodes(self, view, input_node): node = LocalOutputNode("output", self.dag_root.view, inputs=[input_node]) self.nodes.append(node) return node diff --git a/sdk/python/feast/infra/compute_engines/local/nodes.py b/sdk/python/feast/infra/compute_engines/local/nodes.py index fcf9f21fcce..2efb4c3192d 100644 --- a/sdk/python/feast/infra/compute_engines/local/nodes.py +++ b/sdk/python/feast/infra/compute_engines/local/nodes.py @@ -63,7 +63,7 @@ def __init__( column_info: ColumnInfo, backend: DataFrameBackend, inputs: Optional[List["DAGNode"]] = None, - how: str = "left", + how: str = "inner", ): super().__init__(name, inputs or []) self.column_info = column_info diff --git a/sdk/python/feast/infra/compute_engines/spark/feature_builder.py b/sdk/python/feast/infra/compute_engines/spark/feature_builder.py index c4dc37f34e4..e042bb000dc 100644 --- a/sdk/python/feast/infra/compute_engines/spark/feature_builder.py +++ b/sdk/python/feast/infra/compute_engines/spark/feature_builder.py @@ -33,7 +33,12 @@ def build_source_node(self, view): source = view.batch_source column_info = self.get_column_info(view) node = SparkReadNode( - "source", source, column_info, self.spark_session, start_time, end_time + f"{view.name}:source", + source, + column_info, + self.spark_session, + start_time, + end_time, ) self.nodes.append(node) return node @@ -43,7 +48,11 @@ def build_aggregation_node(self, view, input_node): group_by_keys = view.entities timestamp_col = view.batch_source.timestamp_field node = SparkAggregationNode( - "agg", agg_specs, group_by_keys, timestamp_col, inputs=[input_node] + f"{view.name}:agg", + agg_specs, + group_by_keys, + timestamp_col, + inputs=[input_node], ) self.nodes.append(node) return node @@ -51,7 +60,7 @@ def build_aggregation_node(self, view, input_node): def build_join_node(self, view, input_nodes): column_info = self.get_column_info(view) node = SparkJoinNode( - name=f"{view.name}_join", + name=f"{view.name}:join", column_info=column_info, spark_session=self.spark_session, inputs=input_nodes, @@ -65,7 +74,7 @@ def build_filter_node(self, view, input_node): ttl = getattr(view, "ttl", None) column_info = self.get_column_info(view) node = SparkFilterNode( - "filter", + f"{view.name}:filter", column_info, self.spark_session, ttl, @@ -78,7 +87,7 @@ def build_filter_node(self, view, input_node): def build_dedup_node(self, view, input_node): column_info = self.get_column_info(view) node = SparkDedupNode( - "dedup", column_info, self.spark_session, inputs=[input_node] + f"{view.name}:dedup", column_info, self.spark_session, inputs=[input_node] ) self.nodes.append(node) return node @@ -90,8 +99,10 @@ def build_transformation_node(self, view, input_nodes): self.nodes.append(node) return node - def build_output_nodes(self, input_node): - node = SparkWriteNode("output", self.dag_root.view, inputs=[input_node]) + def build_output_nodes(self, view, input_node): + node = SparkWriteNode( + f"{view.name}:output", self.dag_root.view, inputs=[input_node] + ) self.nodes.append(node) return node diff --git a/sdk/python/feast/infra/compute_engines/spark/nodes.py b/sdk/python/feast/infra/compute_engines/spark/nodes.py index fc7315ccf22..8e70637af8c 100644 --- a/sdk/python/feast/infra/compute_engines/spark/nodes.py +++ b/sdk/python/feast/infra/compute_engines/spark/nodes.py @@ -156,7 +156,7 @@ def __init__( column_info: ColumnInfo, spark_session: SparkSession, inputs: Optional[List[DAGNode]] = None, - how: str = "left", + how: str = "inner", ): super().__init__(name, inputs=inputs or []) self.column_info = column_info @@ -169,11 +169,28 @@ def execute(self, context: ExecutionContext) -> DAGValue: val.assert_format(DAGFormat.SPARK) # Join all input DataFrames on join_keys - joined_df = input_values[0].data - for dag_value in input_values[1:]: - joined_df = joined_df.join( - dag_value.data, on=self.column_info.join_keys, how=self.how - ) + joined_df = None + for i, dag_value in enumerate(input_values): + df = dag_value.data + + # Use original FeatureView name if available + fv_name = self.inputs[i].name.split(":")[0] + prefix = fv_name + "__" + + # Skip renaming join keys to preserve join compatibility + renamed_cols = [ + F.col(c).alias(f"{prefix}{c}") + if c not in self.column_info.join_keys + else F.col(c) + for c in df.columns + ] + df = df.select(*renamed_cols) + if joined_df is None: + joined_df = df + else: + joined_df = joined_df.join( + df, on=self.column_info.join_keys, how=self.how + ) # If entity_df is provided, join it in last entity_df = context.entity_df @@ -182,8 +199,11 @@ def execute(self, context: ExecutionContext) -> DAGValue: spark_session=self.spark_session, entity_df=entity_df, ) - joined_df = joined_df.join( - entity_df, on=self.column_info.join_keys, how=self.how + if joined_df is None: + raise RuntimeError("No input features available to join with entity_df") + + joined_df = entity_df.join( + joined_df, on=self.column_info.join_keys, how="left" ) return DAGValue( diff --git a/sdk/python/tests/integration/compute_engines/spark/test_compute_dag.py b/sdk/python/tests/integration/compute_engines/spark/test_compute_dag.py index df786882df9..24277d9a323 100644 --- a/sdk/python/tests/integration/compute_engines/spark/test_compute_dag.py +++ b/sdk/python/tests/integration/compute_engines/spark/test_compute_dag.py @@ -6,6 +6,7 @@ from tqdm import tqdm from feast import BatchFeatureView, Field +from feast.aggregation import Aggregation from feast.infra.common.materialization_job import ( MaterializationJobStatus, MaterializationTask, @@ -45,6 +46,26 @@ def create_base_feature_view(source): ) +def create_agg_feature_view(source): + return BatchFeatureView( + name="agg_hourly_driver_stats", + entities=[driver], + schema=[ + Field(name="conv_rate", dtype=Float32), + Field(name="acc_rate", dtype=Float32), + Field(name="avg_daily_trips", dtype=Int64), + Field(name="driver_id", dtype=Int32), + ], + online=True, + offline=True, + source=source, + aggregations=[ + Aggregation(column="conv_rate", function="sum"), + Aggregation(column="acc_rate", function="avg"), + ], + ) + + def create_chained_feature_view(base_fv: BatchFeatureView): def transform_feature(df: DataFrame) -> DataFrame: df = df.withColumn("conv_rate", df["conv_rate"] * 2) @@ -126,3 +147,78 @@ def tqdm_builder(length): ) finally: spark_env.teardown() + + +@pytest.mark.integration +def test_spark_dag_materialize_multi_views(): + spark_env = create_spark_environment() + fs = spark_env.feature_store + registry = fs.registry + source = create_feature_dataset(spark_env) + + base_fv = create_base_feature_view(source) + chained_fv = create_chained_feature_view(base_fv) + + multi_view = BatchFeatureView( + name="multi_view", + entities=[driver], + schema=[ + Field(name="driver_id", dtype=Int32), + Field(name="daily_driver_stats__conv_rate", dtype=Float32), + Field(name="daily_driver_stats__acc_rate", dtype=Float32), + ], + online=True, + offline=True, + source=[base_fv, chained_fv], + sink_source=SparkSource( + name="multi_view_sink", + path="/tmp/multi_view_sink", + file_format="parquet", + timestamp_field="daily_driver_stats__event_timestamp", + created_timestamp_column="daily_driver_stats__created", + ), + ) + + def tqdm_builder(length): + return tqdm(total=length, ncols=100) + + try: + fs.apply([driver, base_fv, chained_fv, multi_view]) + + # ๐Ÿงช Materialize multi-view + task = MaterializationTask( + project=fs.project, + feature_view=multi_view, + start_time=now - timedelta(days=2), + end_time=now, + tqdm_builder=tqdm_builder, + ) + + engine = SparkComputeEngine( + repo_config=spark_env.config, + offline_store=SparkOfflineStore(), + online_store=MagicMock(), + registry=registry, + ) + + jobs = engine.materialize(registry, task) + + # โœ… Validate jobs ran + assert len(jobs) == 1 + assert jobs[0].status() == MaterializationJobStatus.SUCCEEDED + + _check_online_features( + fs=fs, + driver_id=1001, + feature="multi_view:daily_driver_stats__conv_rate", + expected_value=1.6, + full_feature_names=True, + ) + + entity_df = create_entity_df() + + _check_offline_features( + fs=fs, feature="hourly_driver_stats:conv_rate", entity_df=entity_df, size=2 + ) + finally: + spark_env.teardown() diff --git a/sdk/python/tests/unit/infra/compute_engines/test_feature_builder.py b/sdk/python/tests/unit/infra/compute_engines/test_feature_builder.py index d0a40b78ba7..b78ef15299c 100644 --- a/sdk/python/tests/unit/infra/compute_engines/test_feature_builder.py +++ b/sdk/python/tests/unit/infra/compute_engines/test_feature_builder.py @@ -117,7 +117,7 @@ def build_transformation_node(self, view, input_nodes): def build_validation_node(self, view, input_node): return MockDAGNode(f"Validate({view.name})", inputs=[input_node]) - def build_output_nodes(self, final_node): + def build_output_nodes(self, view, final_node): output_node = MockDAGNode(f"Output({final_node.name})", inputs=[final_node]) self.nodes.append(output_node) return output_node From 5e74aa80185c5a0c1d1b9c3df50ebafda8535f36 Mon Sep 17 00:00:00 2001 From: HaoXuAI Date: Tue, 8 Jul 2025 08:05:16 -0700 Subject: [PATCH 08/12] fix testing Signed-off-by: HaoXuAI --- sdk/python/feast/infra/compute_engines/local/nodes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/python/feast/infra/compute_engines/local/nodes.py b/sdk/python/feast/infra/compute_engines/local/nodes.py index 2efb4c3192d..870a098261d 100644 --- a/sdk/python/feast/infra/compute_engines/local/nodes.py +++ b/sdk/python/feast/infra/compute_engines/local/nodes.py @@ -99,10 +99,10 @@ def execute(self, context: ExecutionContext) -> ArrowTableValue: ) joined_df = self.backend.join( - joined_df, entity_df, + joined_df, on=self.column_info.join_keys, - how=self.how, + how="left", ) result = self.backend.to_arrow(joined_df) From 07f5e0b068e11725dceed7b4b3504c69ce1fcd76 Mon Sep 17 00:00:00 2001 From: HaoXuAI Date: Tue, 8 Jul 2025 08:33:23 -0700 Subject: [PATCH 09/12] fix testing Signed-off-by: HaoXuAI --- .../infra/compute_engines/spark/nodes.py | 7 ------ .../infra/compute_engines/local/test_nodes.py | 25 +++++++++++++++++-- .../infra/compute_engines/spark/test_nodes.py | 18 +++++++------ 3 files changed, 34 insertions(+), 16 deletions(-) diff --git a/sdk/python/feast/infra/compute_engines/spark/nodes.py b/sdk/python/feast/infra/compute_engines/spark/nodes.py index 8e70637af8c..7c2b0bd7916 100644 --- a/sdk/python/feast/infra/compute_engines/spark/nodes.py +++ b/sdk/python/feast/infra/compute_engines/spark/nodes.py @@ -142,8 +142,6 @@ def execute(self, context: ExecutionContext) -> DAGValue: *self.group_by_keys, ).agg(*agg_exprs) - print("[SparkAggregationNode] Output schema:", grouped.columns) - return DAGValue( data=grouped, format=DAGFormat.SPARK, metadata={"aggregated": True} ) @@ -233,7 +231,6 @@ def execute(self, context: ExecutionContext) -> DAGValue: input_value = self.get_single_input_value(context) input_value.assert_format(DAGFormat.SPARK) input_df: DataFrame = input_value.data - print("[SparkFilterNode] Input schema:", input_df.columns) # Get timestamp fields from feature view timestamp_column = self.column_info.timestamp_column @@ -367,10 +364,6 @@ def execute(self, context: ExecutionContext) -> DAGValue: input_dfs: List[DataFrame] = [val.data for val in input_values] - print( - f"[SparkTransformationNode] Executing transform on {len(input_dfs)} input(s)." - ) - transformed_df = self.udf(*input_dfs) return DAGValue( diff --git a/sdk/python/tests/unit/infra/compute_engines/local/test_nodes.py b/sdk/python/tests/unit/infra/compute_engines/local/test_nodes.py index a074367cbe1..20e23c35e03 100644 --- a/sdk/python/tests/unit/infra/compute_engines/local/test_nodes.py +++ b/sdk/python/tests/unit/infra/compute_engines/local/test_nodes.py @@ -4,7 +4,7 @@ import pandas as pd import pyarrow as pa -from feast.infra.compute_engines.dag.context import ExecutionContext +from feast.infra.compute_engines.dag.context import ColumnInfo, ExecutionContext from feast.infra.compute_engines.local.arrow_table_value import ArrowTableValue from feast.infra.compute_engines.local.backends.pandas_backend import PandasBackend from feast.infra.compute_engines.local.nodes import ( @@ -58,6 +58,12 @@ def test_local_filter_node(): name="filter", backend=backend, filter_expr="value > 15", + column_info=ColumnInfo( + join_keys=["entity_id"], + feature_cols=["value"], + ts_col="event_timestamp", + created_ts_col=None, + ), ) filter_node.add_input(MagicMock()) filter_node.inputs[0].name = "source" @@ -104,6 +110,12 @@ def test_local_join_node(): join_node = LocalJoinNode( name="join", backend=backend, + column_info=ColumnInfo( + join_keys=["entity_id"], + feature_cols=["value"], + ts_col="event_timestamp", + created_ts_col=None, + ), ) join_node.add_input(MagicMock()) join_node.inputs[0].name = "source" @@ -150,7 +162,16 @@ def test_local_dedup_node(): context.entity_timestamp_col = "event_timestamp" # Build node - node = LocalDedupNode(name="dedup", backend=backend) + node = LocalDedupNode( + name="dedup", + backend=backend, + column_info=ColumnInfo( + join_keys=["entity_id"], + feature_cols=["value"], + ts_col="event_timestamp", + created_ts_col="created_ts", + ), + ) node.add_input(MagicMock()) node.inputs[0].name = "source" diff --git a/sdk/python/tests/unit/infra/compute_engines/spark/test_nodes.py b/sdk/python/tests/unit/infra/compute_engines/spark/test_nodes.py index 52ba3dd47f3..61824074ae1 100644 --- a/sdk/python/tests/unit/infra/compute_engines/spark/test_nodes.py +++ b/sdk/python/tests/unit/infra/compute_engines/spark/test_nodes.py @@ -210,9 +210,13 @@ def test_spark_join_node_executes_point_in_time_join(spark_session): spark_session=spark_session, column_info=ColumnInfo( join_keys=["driver_id"], - feature_cols=["conv_rate", "acc_rate", "avg_daily_trips"], - ts_col="event_timestamp", - created_ts_col="created", + feature_cols=[ + "source__conv_rate", + "source__acc_rate", + "source__avg_daily_trips", + ], + ts_col="source__event_timestamp", + created_ts_col="source__created", ), ) dedup_node.add_input(MagicMock()) @@ -226,10 +230,10 @@ def test_spark_join_node_executes_point_in_time_join(spark_session): # Validate result for driver_id = 1001 assert result_df[0]["driver_id"] == 1001 - assert abs(result_df[0]["conv_rate"] - 0.8) < 1e-6 - assert result_df[0]["avg_daily_trips"] == 15 + assert abs(result_df[0]["source__conv_rate"] - 0.8) < 1e-6 + assert result_df[0]["source__avg_daily_trips"] == 15 # Validate result for driver_id = 1002 assert result_df[1]["driver_id"] == 1002 - assert abs(result_df[1]["conv_rate"] - 0.7) < 1e-6 - assert result_df[1]["avg_daily_trips"] == 12 + assert abs(result_df[1]["source__conv_rate"] - 0.7) < 1e-6 + assert result_df[1]["source__avg_daily_trips"] == 12 From d2eb23f7a57baf913d56b6d096d876046b079f27 Mon Sep 17 00:00:00 2001 From: HaoXuAI Date: Tue, 8 Jul 2025 08:50:49 -0700 Subject: [PATCH 10/12] fix testing Signed-off-by: HaoXuAI --- sdk/python/feast/batch_feature_view.py | 10 ++++++++++ .../feast/infra/compute_engines/spark/compute.py | 1 - sdk/python/feast/stream_feature_view.py | 9 ++++++--- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/sdk/python/feast/batch_feature_view.py b/sdk/python/feast/batch_feature_view.py index f4b0833b19f..85a71f01c43 100644 --- a/sdk/python/feast/batch_feature_view.py +++ b/sdk/python/feast/batch_feature_view.py @@ -11,6 +11,7 @@ from feast.entity import Entity from feast.feature_view import FeatureView from feast.field import Field +from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto from feast.transformation.base import Transformation from feast.transformation.mode import TransformationMode @@ -97,6 +98,15 @@ def __init__( RuntimeWarning, ) + if isinstance(source, DataSource) and ( + type(source).__name__ not in SUPPORTED_BATCH_SOURCES + and source.to_proto().type != DataSourceProto.SourceType.CUSTOM_SOURCE + ): + raise ValueError( + f"Batch feature views need a batch source, expected one of {SUPPORTED_BATCH_SOURCES} " + f"or CUSTOM_SOURCE, got {type(source).__name__}: {source.name} instead " + ) + self.mode = mode self.udf = udf self.udf_string = udf_string diff --git a/sdk/python/feast/infra/compute_engines/spark/compute.py b/sdk/python/feast/infra/compute_engines/spark/compute.py index 4753ddb9516..59a271a926e 100644 --- a/sdk/python/feast/infra/compute_engines/spark/compute.py +++ b/sdk/python/feast/infra/compute_engines/spark/compute.py @@ -129,7 +129,6 @@ def _materialize_one( ) except Exception as e: - raise e # ๐Ÿ›‘ Handle failure return SparkMaterializationJob( job_id=job_id, status=MaterializationJobStatus.ERROR, error=e diff --git a/sdk/python/feast/stream_feature_view.py b/sdk/python/feast/stream_feature_view.py index e3608b10354..dcbbd33df7c 100644 --- a/sdk/python/feast/stream_feature_view.py +++ b/sdk/python/feast/stream_feature_view.py @@ -69,6 +69,7 @@ class StreamFeatureView(FeatureView): entities: List[str] ttl: Optional[timedelta] source: DataSource + sink_source: Optional[DataSource] = None schema: List[Field] entity_columns: List[Field] features: List[Field] @@ -90,7 +91,8 @@ def __init__( self, *, name: str, - source: DataSource, + source: Union[DataSource, "StreamFeatureView", List["StreamFeatureView"]], + sink_source: Optional[DataSource] = None, entities: Optional[List[Entity]] = None, ttl: timedelta = timedelta(days=0), tags: Optional[Dict[str, str]] = None, @@ -114,7 +116,7 @@ def __init__( RuntimeWarning, ) - if ( + if isinstance(source, DataSource) and ( type(source).__name__ not in SUPPORTED_STREAM_SOURCES and source.to_proto().type != DataSourceProto.SourceType.CUSTOM_SOURCE ): @@ -148,7 +150,8 @@ def __init__( description=description, owner=owner, schema=schema, - source=source, + source=source, # type: ignore[arg-type] + sink_source=sink_source, ) def get_feature_transformation(self) -> Optional[Transformation]: From 9122f4b00dab89a1d91aa13f8460cf98d899598f Mon Sep 17 00:00:00 2001 From: HaoXuAI Date: Tue, 8 Jul 2025 09:06:16 -0700 Subject: [PATCH 11/12] fix testing Signed-off-by: HaoXuAI --- .../tests/integration/compute_engines/spark/test_compute.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/python/tests/integration/compute_engines/spark/test_compute.py b/sdk/python/tests/integration/compute_engines/spark/test_compute.py index 7535bec0587..806abf8002d 100644 --- a/sdk/python/tests/integration/compute_engines/spark/test_compute.py +++ b/sdk/python/tests/integration/compute_engines/spark/test_compute.py @@ -38,8 +38,8 @@ def test_spark_compute_engine_get_historical_features(): data_source = create_feature_dataset(spark_environment) def transform_feature(df: DataFrame) -> DataFrame: - df = df.withColumn("sum_conv_rate", df["sum_conv_rate"] * 2) - df = df.withColumn("avg_acc_rate", df["avg_acc_rate"] * 2) + df = df.withColumn("conv_rate", df["conv_rate"] * 2) + df = df.withColumn("acc_rate", df["acc_rate"] * 2) return df driver_stats_fv = BatchFeatureView( From 15f8bdecc53c7191e396c36bc88e47fd1740f4d2 Mon Sep 17 00:00:00 2001 From: HaoXuAI Date: Tue, 8 Jul 2025 09:22:26 -0700 Subject: [PATCH 12/12] fix testing Signed-off-by: HaoXuAI --- .../tests/integration/compute_engines/spark/test_compute.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/python/tests/integration/compute_engines/spark/test_compute.py b/sdk/python/tests/integration/compute_engines/spark/test_compute.py index 806abf8002d..e0855ae31f3 100644 --- a/sdk/python/tests/integration/compute_engines/spark/test_compute.py +++ b/sdk/python/tests/integration/compute_engines/spark/test_compute.py @@ -91,9 +91,9 @@ def transform_feature(df: DataFrame) -> DataFrame: # โœ… Assert output assert df_out.driver_id.to_list() == [1001, 1002] - assert abs(df_out["sum_conv_rate"].to_list()[0] - 1.6) < 1e-6 + assert abs(df_out["sum_conv_rate"].to_list()[0] - 3.1) < 1e-6 assert abs(df_out["sum_conv_rate"].to_list()[1] - 2.0) < 1e-6 - assert abs(df_out["avg_acc_rate"].to_list()[0] - 1.0) < 1e-6 + assert abs(df_out["avg_acc_rate"].to_list()[0] - 1.4) < 1e-6 assert abs(df_out["avg_acc_rate"].to_list()[1] - 1.0) < 1e-6 finally: