diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 9d9921a2bf9..4a540eb4953 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -176,6 +176,16 @@ def python_type_to_feast_value_type( item_type = type_map[str(value.dtype)] return ValueType[item_type.name + "_LIST"] + # Special handling for pandas 'object' dtype - infer from actual value type + # This must come after array handling to avoid intercepting array types + if type_name == "object" and not isinstance(value, (list, np.ndarray)): + if value is not None: + actual_type = type(value).__name__.lower() + if actual_type in type_map: + return type_map[actual_type] + # Default to STRING for object dtype (pandas commonly uses object for strings) + return ValueType.STRING + if isinstance(value, (list, np.ndarray)): # if the value's type is "ndarray" and we couldn't infer from "value.dtype" # this is most probably array of "object", diff --git a/sdk/python/tests/unit/test_on_demand_feature_view.py b/sdk/python/tests/unit/test_on_demand_feature_view.py index 505146aa612..a3286846dd4 100644 --- a/sdk/python/tests/unit/test_on_demand_feature_view.py +++ b/sdk/python/tests/unit/test_on_demand_feature_view.py @@ -17,6 +17,7 @@ import pandas as pd import pytest +from feast.data_source import RequestSource from feast.feature_view import FeatureView from feast.field import Field from feast.infra.offline_stores.file_source import FileSource @@ -26,7 +27,7 @@ PythonTransformation, on_demand_feature_view, ) -from feast.types import Float32 +from feast.types import Float32, String def udf1(features_df: pd.DataFrame) -> pd.DataFrame: @@ -418,3 +419,40 @@ def another_transform(features_df: pd.DataFrame) -> pd.DataFrame: deserialized = OnDemandFeatureView.from_proto(proto) assert deserialized.name == CUSTOM_FUNCTION_NAME + + +def test_on_demand_string_features(): + """Test that on-demand feature views work with STRING type inputs from RequestSource.""" + # Define a request data source with STRING type fields + input_request = RequestSource( + name="vals_to_add", + schema=[ + Field(name="val_to_add", dtype=String), + Field(name="val_to_add_2", dtype=String), + ], + ) + + # Use input data to create new features in Pandas mode + @on_demand_feature_view( + sources=[input_request], + schema=[ + Field(name="conv_rate_plus_val1", dtype=String), + Field(name="conv_rate_plus_val2", dtype=String), + ], + mode="pandas", + ) + def transformed_conv_rate(features_df: pd.DataFrame) -> pd.DataFrame: + df = pd.DataFrame() + df["conv_rate_plus_val1"] = features_df["val_to_add"] + df["conv_rate_plus_val2"] = features_df["val_to_add_2"] + return df + + # This should work without raising ValueError about 'object' type + transformed_conv_rate.infer_features() + + # Verify the inferred features have the correct type + assert len(transformed_conv_rate.features) == 2 + assert transformed_conv_rate.features[0].name == "conv_rate_plus_val1" + assert transformed_conv_rate.features[0].dtype == String + assert transformed_conv_rate.features[1].name == "conv_rate_plus_val2" + assert transformed_conv_rate.features[1].dtype == String