Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions docs/reference/feature-servers/registry-server.md
Original file line number Diff line number Diff line change
Expand Up @@ -1207,28 +1207,33 @@ Please refer the [page](./../../../docs/getting-started/concepts/permission.md)
"name": "user_id",
"description": "Primary identifier for users",
"project": "project1",
"match_score": 100
"match_score": 100,
"matched_tags": {}
},
{
"type": "featureView",
"name": "user_features",
"description": "User demographic and behavioral features",
"project": "project1",
"match_score": 100
"match_score": 100,
"matched_tags": {"team": "user_analytics"}
},
{
"type": "feature",
"name": "user_age",
"description": "Age of the user in years",
"project": "project1",
"match_score": 80
"featureView": "user_features",
"match_score": 80,
"matched_tags": {}
},
{
"type": "dataSource",
"name": "user_analytics",
"description": "Analytics data for user behavior tracking",
"project": "project2",
"match_score": 80
"match_score": 80,
"matched_tags": {"source": "user_data"}
}
],
"pagination": {
Expand Down
66 changes: 34 additions & 32 deletions sdk/python/feast/api/registry/rest/rest_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
MATCH_SCORE_NAME = 100
MATCH_SCORE_DESCRIPTION = 80
MATCH_SCORE_TAGS = 60
MATCH_SCORE_PARTIAL = 40


def grpc_call(handler_fn, request):
Expand Down Expand Up @@ -537,12 +536,42 @@ def filter_search_results_and_match_score(
) -> List[Dict]:
"""Filter search results based on query string"""
if not query:
# Add all tags as matched_tags when no query (all tags match)
for result in results:
result["matched_tags"] = result.get("tags", {})
return results

query_lower = query.lower()
filtered_results = []

for result in results:
matched_tags = {}
best_fuzzy_tag_score = 0.0

# Collect all matching tags (exact and fuzzy) upfront
tags = result.get("tags", {})
has_exact_tag_match = False

for key, value in tags.items():
key_lower = str(key).lower()
value_str = str(value).lower()
tag_combined = f"{key_lower}={value_str}"

# Exact match in key or value
if query_lower in tag_combined:
has_exact_tag_match = True
matched_tags[key] = value
else:
# Fuzzy match for tags (on combined "key:value" string)
tag_fuzzy_score = fuzzy_match(query_lower, tag_combined)

if tag_fuzzy_score >= MATCH_SCORE_DEFAULT_THRESHOLD:
matched_tags[key] = value
if tag_fuzzy_score > best_fuzzy_tag_score:
best_fuzzy_tag_score = tag_fuzzy_score

result["matched_tags"] = matched_tags

# Search in name
if query_lower in result.get("name", "").lower():
result["match_score"] = MATCH_SCORE_NAME
Expand All @@ -555,42 +584,15 @@ def filter_search_results_and_match_score(
filtered_results.append(result)
continue

# Search in tags
tags = result.get("tags", {})
tag_match = False
matched_tag = None
best_fuzzy_score = 0.0
best_fuzzy_tag = None

for key, value in tags.items():
key_lower = key.lower()
value_str = str(value).lower()

# Exact match in key or value
if query_lower in key_lower or query_lower in value_str:
tag_match = True
# Store the matched tag as a dictionary
matched_tag = {key: value}
break

# Fuzzy match for tags (on combined "key:value" string)
tag_combined = f"{key_lower}={value_str}"
tag_fuzzy_score = fuzzy_match(query_lower, tag_combined)

if tag_fuzzy_score > best_fuzzy_score:
best_fuzzy_score = tag_fuzzy_score
best_fuzzy_tag = {key: value}

if tag_match:
# Exact tag match
if has_exact_tag_match:
result["match_score"] = MATCH_SCORE_TAGS
result["matched_tag"] = matched_tag
filtered_results.append(result)
continue

# Fuzzy tag match
if best_fuzzy_score >= MATCH_SCORE_DEFAULT_THRESHOLD:
result["match_score"] = best_fuzzy_score * 100
result["matched_tag"] = best_fuzzy_tag
if best_fuzzy_tag_score >= MATCH_SCORE_DEFAULT_THRESHOLD:
result["match_score"] = best_fuzzy_tag_score * 100
filtered_results.append(result)
continue

Expand Down
98 changes: 69 additions & 29 deletions sdk/python/tests/unit/api/test_search_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -734,8 +734,8 @@ def test_search_by_tags(self, shared_search_responses):
f"Expected to find some of {expected_resources} but found none in {found_resources}"
)

def test_search_matched_tag_exact_match(self, search_test_app):
"""Test that matched_tag field is present when a tag matches exactly"""
def test_search_matched_tags_exact_match(self, search_test_app):
"""Test that matched_tags field is present when a tag matches exactly"""
# Search for "data" which should match tag key "team" with value "data"
response = search_test_app.get("/search?query=data")
assert response.status_code == 200
Expand All @@ -745,34 +745,75 @@ def test_search_matched_tag_exact_match(self, search_test_app):

# Find results that matched via tags (match_score = 60)
tag_matched_results = [
r for r in results if r.get("match_score") == 60 and "matched_tag" in r
r for r in results if r.get("match_score") == 60 and "matched_tags" in r
]

assert len(tag_matched_results) > 0, (
"Expected to find at least one result with matched_tag from tag matching"
"Expected to find at least one result with matched_tags from tag matching"
)

# Verify matched_tag is present and has a valid dictionary value
# Verify matched_tags is present and has a valid dictionary value
for result in tag_matched_results:
matched_tag = result.get("matched_tag")
assert matched_tag is not None, (
f"matched_tag should not be None for result {result['name']}"
matched_tags = result.get("matched_tags")
assert matched_tags is not None, (
f"matched_tags should not be None for result {result['name']}"
)
assert isinstance(matched_tag, dict), (
f"matched_tag should be a dictionary, got {type(matched_tag)}"
assert isinstance(matched_tags, dict), (
f"matched_tags should be a dictionary, got {type(matched_tags)}"
)
# matched_tag should be a dictionary with key:value format
assert len(matched_tag) > 0, "matched_tag should not be empty"
assert len(matched_tag) == 1, (
f"matched_tag should contain exactly one key-value pair, got {len(matched_tag)}"
# matched_tags should be a non-empty dict for tag-matched results
assert len(matched_tags) > 0, (
"matched_tags should not be empty for tag matches"
)

logger.debug(
f"Found {len(tag_matched_results)} results with matched_tag: {[r['name'] + ' -> ' + str(r.get('matched_tag', 'N/A')) for r in tag_matched_results]}"
f"Found {len(tag_matched_results)} results with matched_tags: {[r['name'] + ' -> ' + str(r.get('matched_tags', 'N/A')) for r in tag_matched_results]}"
)

def test_search_matched_tag_fuzzy_match(self, search_test_app):
"""Test that matched_tag field is present when a tag matches via fuzzy matching"""
def test_search_matched_tags_multiple_tags(self, search_test_app):
"""Test that multiple matching tags are returned in matched_tags"""
# Search for "a" which should match:
# - Names containing "a" (e.g., user_training_dataset, data sources)
# - Tags where key/value contains "a": "team" (key), "data" (value), "training" (value)
response = search_test_app.get("/search?query=a")
logger.info(response.json())
assert response.status_code == 200

data = response.json()
results = data["results"]

# Find user_training_dataset which has tags: {"environment": "test", "purpose": "training", "team": "data"}
# "team" contains "a", "data" contains "a", "training" contains "a"
# So matched_tags should have at least 2 entries: "purpose" and "team"
dataset_results = [
r for r in results if r.get("name") == "user_training_dataset"
]

assert len(dataset_results) > 0, (
"Expected to find user_training_dataset in results"
)

dataset_result = dataset_results[0]
matched_tags = dataset_result.get("matched_tags", {})

assert isinstance(matched_tags, dict), (
f"matched_tags should be a dictionary, got {type(matched_tags)}"
)

# Should have multiple matching tags: "purpose" and "team"
assert len(matched_tags) >= 2, (
f"Expected at least 2 matching tags for 'a' query, got {len(matched_tags)}: {matched_tags}"
)

# Verify the expected tags are present
assert "team" in matched_tags and "purpose" in matched_tags, (
f"Expected 'team' and 'purpose' in matched_tags, got: {matched_tags}"
)

logger.debug(f"user_training_dataset matched_tags: {matched_tags}")

def test_search_matched_tags_fuzzy_match(self, search_test_app):
"""Test that matched_tags field is present when a tag matches via fuzzy matching"""
# Search for "te" which should fuzzy match tag key "team"
# "te" vs "team": overlap={'t','e'}/union={'t','e','a','m'} = 2/4 = 50% (below threshold)
# Try "tea" which should fuzzy match "team" better
Expand All @@ -789,7 +830,7 @@ def test_search_matched_tag_fuzzy_match(self, search_test_app):
for r in results
if r.get("match_score", 0) >= 40
and r.get("match_score", 0) < 60
and "matched_tag" in r
and "matched_tags" in r
]

# If we don't find fuzzy matches, try a different query that's more likely to match
Expand All @@ -805,30 +846,29 @@ def test_search_matched_tag_fuzzy_match(self, search_test_app):
for r in results
if r.get("match_score", 0) >= 40
and r.get("match_score", 0) < 60
and "matched_tag" in r
and "matched_tags" in r
]

if len(fuzzy_tag_matched_results) > 0:
# Verify matched_tag is present for fuzzy matches
# Verify matched_tags is present for fuzzy matches
for result in fuzzy_tag_matched_results:
matched_tag = result.get("matched_tag")
assert matched_tag is not None, (
f"matched_tag should not be None for fuzzy-matched result {result['name']}"
matched_tags = result.get("matched_tags")
assert matched_tags is not None, (
f"matched_tags should not be None for fuzzy-matched result {result['name']}"
)
assert isinstance(matched_tag, dict), (
f"matched_tag should be a dictionary, got {type(matched_tag)}"
assert isinstance(matched_tags, dict), (
f"matched_tags should be a dictionary, got {type(matched_tags)}"
)
assert len(matched_tag) > 0, "matched_tag should not be empty"
assert len(matched_tag) == 1, (
f"matched_tag should contain exactly one key-value pair, got {len(matched_tag)}"
assert len(matched_tags) > 0, (
"matched_tags should not be empty for fuzzy tag matches"
)
# Verify the match_score is in the fuzzy range
assert 40 <= result.get("match_score", 0) < 60, (
f"Fuzzy tag match should have score in [40, 60), got {result.get('match_score')}"
)

logger.debug(
f"Found {len(fuzzy_tag_matched_results)} results with fuzzy matched_tag: {[r['name'] + ' -> ' + str(r.get('matched_tag', 'N/A')) + ' (score: ' + str(r.get('match_score', 'N/A')) + ')' for r in fuzzy_tag_matched_results]}"
f"Found {len(fuzzy_tag_matched_results)} results with fuzzy matched_tags: {[r['name'] + ' -> ' + str(r.get('matched_tags', 'N/A')) + ' (score: ' + str(r.get('match_score', 'N/A')) + ')' for r in fuzzy_tag_matched_results]}"
)

def test_search_sorting_functionality(self, shared_search_responses):
Expand Down
Loading