From d4a649dce4de318754421490d2b03f04e9bb4cd1 Mon Sep 17 00:00:00 2001 From: Aniket Paluskar Date: Tue, 13 Jan 2026 02:47:19 +0530 Subject: [PATCH 1/2] fix: Search API to return all matching tags in matched_tags field Signed-off-by: Aniket Paluskar --- .../feature-servers/registry-server.md | 13 ++-- .../feast/api/registry/rest/rest_utils.py | 66 ++++++++++--------- sdk/python/tests/unit/api/test_search_api.py | 56 ++++++++-------- 3 files changed, 70 insertions(+), 65 deletions(-) diff --git a/docs/reference/feature-servers/registry-server.md b/docs/reference/feature-servers/registry-server.md index dbcf661fc86..496eaa8badc 100644 --- a/docs/reference/feature-servers/registry-server.md +++ b/docs/reference/feature-servers/registry-server.md @@ -1207,28 +1207,33 @@ Please refer the [page](./../../../docs/getting-started/concepts/permission.md) "name": "user_id", "description": "Primary identifier for users", "project": "project1", - "match_score": 100 + "match_score": 100, + "matched_tags": {} }, { "type": "featureView", "name": "user_features", "description": "User demographic and behavioral features", "project": "project1", - "match_score": 100 + "match_score": 100, + "matched_tags": {"team": "user_analytics"} }, { "type": "feature", "name": "user_age", "description": "Age of the user in years", "project": "project1", - "match_score": 80 + "featureView": "user_features", + "match_score": 80, + "matched_tags": {} }, { "type": "dataSource", "name": "user_analytics", "description": "Analytics data for user behavior tracking", "project": "project2", - "match_score": 80 + "match_score": 80, + "matched_tags": {"source": "user_data"} } ], "pagination": { diff --git a/sdk/python/feast/api/registry/rest/rest_utils.py b/sdk/python/feast/api/registry/rest/rest_utils.py index dac6b7ccf7b..4c517b0abdf 100644 --- a/sdk/python/feast/api/registry/rest/rest_utils.py +++ b/sdk/python/feast/api/registry/rest/rest_utils.py @@ -18,7 +18,6 @@ MATCH_SCORE_NAME = 100 MATCH_SCORE_DESCRIPTION = 80 MATCH_SCORE_TAGS = 60 -MATCH_SCORE_PARTIAL = 40 def grpc_call(handler_fn, request): @@ -537,12 +536,42 @@ def filter_search_results_and_match_score( ) -> List[Dict]: """Filter search results based on query string""" if not query: + # Add all tags as matched_tags when no query (all tags match) + for result in results: + result["matched_tags"] = result.get("tags", {}) return results query_lower = query.lower() filtered_results = [] for result in results: + matched_tags = {} + best_fuzzy_tag_score = 0.0 + + # Collect all matching tags (exact and fuzzy) upfront + tags = result.get("tags", {}) + has_exact_tag_match = False + + for key, value in tags.items(): + key_lower = str(key).lower() + value_str = str(value).lower() + tag_combined = f"{key_lower}={value_str}" + + # Exact match in key or value + if query_lower in tag_combined: + has_exact_tag_match = True + matched_tags[key] = value + else: + # Fuzzy match for tags (on combined "key:value" string) + tag_fuzzy_score = fuzzy_match(query_lower, tag_combined) + + if tag_fuzzy_score >= MATCH_SCORE_DEFAULT_THRESHOLD: + matched_tags[key] = value + if tag_fuzzy_score > best_fuzzy_tag_score: + best_fuzzy_tag_score = tag_fuzzy_score + + result["matched_tags"] = matched_tags + # Search in name if query_lower in result.get("name", "").lower(): result["match_score"] = MATCH_SCORE_NAME @@ -555,42 +584,15 @@ def filter_search_results_and_match_score( filtered_results.append(result) continue - # Search in tags - tags = result.get("tags", {}) - tag_match = False - matched_tag = None - best_fuzzy_score = 0.0 - best_fuzzy_tag = None - - for key, value in tags.items(): - key_lower = key.lower() - value_str = str(value).lower() - - # Exact match in key or value - if query_lower in key_lower or query_lower in value_str: - tag_match = True - # Store the matched tag as a dictionary - matched_tag = {key: value} - break - - # Fuzzy match for tags (on combined "key:value" string) - tag_combined = f"{key_lower}={value_str}" - tag_fuzzy_score = fuzzy_match(query_lower, tag_combined) - - if tag_fuzzy_score > best_fuzzy_score: - best_fuzzy_score = tag_fuzzy_score - best_fuzzy_tag = {key: value} - - if tag_match: + # Exact tag match + if has_exact_tag_match: result["match_score"] = MATCH_SCORE_TAGS - result["matched_tag"] = matched_tag filtered_results.append(result) continue # Fuzzy tag match - if best_fuzzy_score >= MATCH_SCORE_DEFAULT_THRESHOLD: - result["match_score"] = best_fuzzy_score * 100 - result["matched_tag"] = best_fuzzy_tag + if best_fuzzy_tag_score >= MATCH_SCORE_DEFAULT_THRESHOLD: + result["match_score"] = best_fuzzy_tag_score * 100 filtered_results.append(result) continue diff --git a/sdk/python/tests/unit/api/test_search_api.py b/sdk/python/tests/unit/api/test_search_api.py index 9116db1c59b..80755ac0d1f 100644 --- a/sdk/python/tests/unit/api/test_search_api.py +++ b/sdk/python/tests/unit/api/test_search_api.py @@ -734,8 +734,8 @@ def test_search_by_tags(self, shared_search_responses): f"Expected to find some of {expected_resources} but found none in {found_resources}" ) - def test_search_matched_tag_exact_match(self, search_test_app): - """Test that matched_tag field is present when a tag matches exactly""" + def test_search_matched_tags_exact_match(self, search_test_app): + """Test that matched_tags field is present when a tag matches exactly""" # Search for "data" which should match tag key "team" with value "data" response = search_test_app.get("/search?query=data") assert response.status_code == 200 @@ -745,34 +745,33 @@ def test_search_matched_tag_exact_match(self, search_test_app): # Find results that matched via tags (match_score = 60) tag_matched_results = [ - r for r in results if r.get("match_score") == 60 and "matched_tag" in r + r for r in results if r.get("match_score") == 60 and "matched_tags" in r ] assert len(tag_matched_results) > 0, ( - "Expected to find at least one result with matched_tag from tag matching" + "Expected to find at least one result with matched_tags from tag matching" ) - # Verify matched_tag is present and has a valid dictionary value + # Verify matched_tags is present and has a valid dictionary value for result in tag_matched_results: - matched_tag = result.get("matched_tag") - assert matched_tag is not None, ( - f"matched_tag should not be None for result {result['name']}" + matched_tags = result.get("matched_tags") + assert matched_tags is not None, ( + f"matched_tags should not be None for result {result['name']}" ) - assert isinstance(matched_tag, dict), ( - f"matched_tag should be a dictionary, got {type(matched_tag)}" + assert isinstance(matched_tags, dict), ( + f"matched_tags should be a dictionary, got {type(matched_tags)}" ) - # matched_tag should be a dictionary with key:value format - assert len(matched_tag) > 0, "matched_tag should not be empty" - assert len(matched_tag) == 1, ( - f"matched_tag should contain exactly one key-value pair, got {len(matched_tag)}" + # matched_tags should be a non-empty dict for tag-matched results + assert len(matched_tags) > 0, ( + "matched_tags should not be empty for tag matches" ) logger.debug( - f"Found {len(tag_matched_results)} results with matched_tag: {[r['name'] + ' -> ' + str(r.get('matched_tag', 'N/A')) for r in tag_matched_results]}" + f"Found {len(tag_matched_results)} results with matched_tags: {[r['name'] + ' -> ' + str(r.get('matched_tags', 'N/A')) for r in tag_matched_results]}" ) - def test_search_matched_tag_fuzzy_match(self, search_test_app): - """Test that matched_tag field is present when a tag matches via fuzzy matching""" + def test_search_matched_tags_fuzzy_match(self, search_test_app): + """Test that matched_tags field is present when a tag matches via fuzzy matching""" # Search for "te" which should fuzzy match tag key "team" # "te" vs "team": overlap={'t','e'}/union={'t','e','a','m'} = 2/4 = 50% (below threshold) # Try "tea" which should fuzzy match "team" better @@ -789,7 +788,7 @@ def test_search_matched_tag_fuzzy_match(self, search_test_app): for r in results if r.get("match_score", 0) >= 40 and r.get("match_score", 0) < 60 - and "matched_tag" in r + and "matched_tags" in r ] # If we don't find fuzzy matches, try a different query that's more likely to match @@ -805,22 +804,21 @@ def test_search_matched_tag_fuzzy_match(self, search_test_app): for r in results if r.get("match_score", 0) >= 40 and r.get("match_score", 0) < 60 - and "matched_tag" in r + and "matched_tags" in r ] if len(fuzzy_tag_matched_results) > 0: - # Verify matched_tag is present for fuzzy matches + # Verify matched_tags is present for fuzzy matches for result in fuzzy_tag_matched_results: - matched_tag = result.get("matched_tag") - assert matched_tag is not None, ( - f"matched_tag should not be None for fuzzy-matched result {result['name']}" + matched_tags = result.get("matched_tags") + assert matched_tags is not None, ( + f"matched_tags should not be None for fuzzy-matched result {result['name']}" ) - assert isinstance(matched_tag, dict), ( - f"matched_tag should be a dictionary, got {type(matched_tag)}" + assert isinstance(matched_tags, dict), ( + f"matched_tags should be a dictionary, got {type(matched_tags)}" ) - assert len(matched_tag) > 0, "matched_tag should not be empty" - assert len(matched_tag) == 1, ( - f"matched_tag should contain exactly one key-value pair, got {len(matched_tag)}" + assert len(matched_tags) > 0, ( + "matched_tags should not be empty for fuzzy tag matches" ) # Verify the match_score is in the fuzzy range assert 40 <= result.get("match_score", 0) < 60, ( @@ -828,7 +826,7 @@ def test_search_matched_tag_fuzzy_match(self, search_test_app): ) logger.debug( - f"Found {len(fuzzy_tag_matched_results)} results with fuzzy matched_tag: {[r['name'] + ' -> ' + str(r.get('matched_tag', 'N/A')) + ' (score: ' + str(r.get('match_score', 'N/A')) + ')' for r in fuzzy_tag_matched_results]}" + f"Found {len(fuzzy_tag_matched_results)} results with fuzzy matched_tags: {[r['name'] + ' -> ' + str(r.get('matched_tags', 'N/A')) + ' (score: ' + str(r.get('match_score', 'N/A')) + ')' for r in fuzzy_tag_matched_results]}" ) def test_search_sorting_functionality(self, shared_search_responses): From 2f421f306233c1f16833ed37c2c3bca3e64726ea Mon Sep 17 00:00:00 2001 From: Aniket Paluskar Date: Tue, 13 Jan 2026 17:05:41 +0530 Subject: [PATCH 2/2] Added unit tests to check if multiple tags are returned if they are matching the query Signed-off-by: Aniket Paluskar --- sdk/python/tests/unit/api/test_search_api.py | 42 ++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/sdk/python/tests/unit/api/test_search_api.py b/sdk/python/tests/unit/api/test_search_api.py index 80755ac0d1f..f0d7c3942e8 100644 --- a/sdk/python/tests/unit/api/test_search_api.py +++ b/sdk/python/tests/unit/api/test_search_api.py @@ -770,6 +770,48 @@ def test_search_matched_tags_exact_match(self, search_test_app): f"Found {len(tag_matched_results)} results with matched_tags: {[r['name'] + ' -> ' + str(r.get('matched_tags', 'N/A')) for r in tag_matched_results]}" ) + def test_search_matched_tags_multiple_tags(self, search_test_app): + """Test that multiple matching tags are returned in matched_tags""" + # Search for "a" which should match: + # - Names containing "a" (e.g., user_training_dataset, data sources) + # - Tags where key/value contains "a": "team" (key), "data" (value), "training" (value) + response = search_test_app.get("/search?query=a") + logger.info(response.json()) + assert response.status_code == 200 + + data = response.json() + results = data["results"] + + # Find user_training_dataset which has tags: {"environment": "test", "purpose": "training", "team": "data"} + # "team" contains "a", "data" contains "a", "training" contains "a" + # So matched_tags should have at least 2 entries: "purpose" and "team" + dataset_results = [ + r for r in results if r.get("name") == "user_training_dataset" + ] + + assert len(dataset_results) > 0, ( + "Expected to find user_training_dataset in results" + ) + + dataset_result = dataset_results[0] + matched_tags = dataset_result.get("matched_tags", {}) + + assert isinstance(matched_tags, dict), ( + f"matched_tags should be a dictionary, got {type(matched_tags)}" + ) + + # Should have multiple matching tags: "purpose" and "team" + assert len(matched_tags) >= 2, ( + f"Expected at least 2 matching tags for 'a' query, got {len(matched_tags)}: {matched_tags}" + ) + + # Verify the expected tags are present + assert "team" in matched_tags and "purpose" in matched_tags, ( + f"Expected 'team' and 'purpose' in matched_tags, got: {matched_tags}" + ) + + logger.debug(f"user_training_dataset matched_tags: {matched_tags}") + def test_search_matched_tags_fuzzy_match(self, search_test_app): """Test that matched_tags field is present when a tag matches via fuzzy matching""" # Search for "te" which should fuzzy match tag key "team"