fix: Search API to return all matching tags in matched_tags field (#5… · feast-dev/feast@de37f66

@@ -734,8 +734,8 @@ def test_search_by_tags(self, shared_search_responses):

734734

f"Expected to find some of {expected_resources} but found none in {found_resources}"

735735

)

736736737-

def test_search_matched_tag_exact_match(self, search_test_app):

738-

"""Test that matched_tag field is present when a tag matches exactly"""

737+

def test_search_matched_tags_exact_match(self, search_test_app):

738+

"""Test that matched_tags field is present when a tag matches exactly"""

739739

# Search for "data" which should match tag key "team" with value "data"

740740

response = search_test_app.get("/search?query=data")

741741

assert response.status_code == 200

@@ -745,34 +745,75 @@ def test_search_matched_tag_exact_match(self, search_test_app):

745745746746

# Find results that matched via tags (match_score = 60)

747747

tag_matched_results = [

748-

r for r in results if r.get("match_score") == 60 and "matched_tag" in r

748+

r for r in results if r.get("match_score") == 60 and "matched_tags" in r

749749

]

750750751751

assert len(tag_matched_results) > 0, (

752-

"Expected to find at least one result with matched_tag from tag matching"

752+

"Expected to find at least one result with matched_tags from tag matching"

753753

)

754754755-

# Verify matched_tag is present and has a valid dictionary value

755+

# Verify matched_tags is present and has a valid dictionary value

756756

for result in tag_matched_results:

757-

matched_tag = result.get("matched_tag")

758-

assert matched_tag is not None, (

759-

f"matched_tag should not be None for result {result['name']}"

757+

matched_tags = result.get("matched_tags")

758+

assert matched_tags is not None, (

759+

f"matched_tags should not be None for result {result['name']}"

760760

)

761-

assert isinstance(matched_tag, dict), (

762-

f"matched_tag should be a dictionary, got {type(matched_tag)}"

761+

assert isinstance(matched_tags, dict), (

762+

f"matched_tags should be a dictionary, got {type(matched_tags)}"

763763

)

764-

# matched_tag should be a dictionary with key:value format

765-

assert len(matched_tag) > 0, "matched_tag should not be empty"

766-

assert len(matched_tag) == 1, (

767-

f"matched_tag should contain exactly one key-value pair, got {len(matched_tag)}"

764+

# matched_tags should be a non-empty dict for tag-matched results

765+

assert len(matched_tags) > 0, (

766+

"matched_tags should not be empty for tag matches"

768767

)

769768770769

logger.debug(

771-

f"Found {len(tag_matched_results)} results with matched_tag: {[r['name'] + ' -> ' + str(r.get('matched_tag', 'N/A')) for r in tag_matched_results]}"

770+

f"Found {len(tag_matched_results)} results with matched_tags: {[r['name'] + ' -> ' + str(r.get('matched_tags', 'N/A')) for r in tag_matched_results]}"

772771

)

773772774-

def test_search_matched_tag_fuzzy_match(self, search_test_app):

775-

"""Test that matched_tag field is present when a tag matches via fuzzy matching"""

773+

def test_search_matched_tags_multiple_tags(self, search_test_app):

774+

"""Test that multiple matching tags are returned in matched_tags"""

775+

# Search for "a" which should match:

776+

# - Names containing "a" (e.g., user_training_dataset, data sources)

777+

# - Tags where key/value contains "a": "team" (key), "data" (value), "training" (value)

778+

response = search_test_app.get("/search?query=a")

779+

logger.info(response.json())

780+

assert response.status_code == 200

781+782+

data = response.json()

783+

results = data["results"]

784+785+

# Find user_training_dataset which has tags: {"environment": "test", "purpose": "training", "team": "data"}

786+

# "team" contains "a", "data" contains "a", "training" contains "a"

787+

# So matched_tags should have at least 2 entries: "purpose" and "team"

788+

dataset_results = [

789+

r for r in results if r.get("name") == "user_training_dataset"

790+

]

791+792+

assert len(dataset_results) > 0, (

793+

"Expected to find user_training_dataset in results"

794+

)

795+796+

dataset_result = dataset_results[0]

797+

matched_tags = dataset_result.get("matched_tags", {})

798+799+

assert isinstance(matched_tags, dict), (

800+

f"matched_tags should be a dictionary, got {type(matched_tags)}"

801+

)

802+803+

# Should have multiple matching tags: "purpose" and "team"

804+

assert len(matched_tags) >= 2, (

805+

f"Expected at least 2 matching tags for 'a' query, got {len(matched_tags)}: {matched_tags}"

806+

)

807+808+

# Verify the expected tags are present

809+

assert "team" in matched_tags and "purpose" in matched_tags, (

810+

f"Expected 'team' and 'purpose' in matched_tags, got: {matched_tags}"

811+

)

812+813+

logger.debug(f"user_training_dataset matched_tags: {matched_tags}")

814+815+

def test_search_matched_tags_fuzzy_match(self, search_test_app):

816+

"""Test that matched_tags field is present when a tag matches via fuzzy matching"""

776817

# Search for "te" which should fuzzy match tag key "team"

777818

# "te" vs "team": overlap={'t','e'}/union={'t','e','a','m'} = 2/4 = 50% (below threshold)

778819

# Try "tea" which should fuzzy match "team" better

@@ -789,7 +830,7 @@ def test_search_matched_tag_fuzzy_match(self, search_test_app):

789830

for r in results

790831

if r.get("match_score", 0) >= 40

791832

and r.get("match_score", 0) < 60

792-

and "matched_tag" in r

833+

and "matched_tags" in r

793834

]

794835795836

# If we don't find fuzzy matches, try a different query that's more likely to match

@@ -805,30 +846,29 @@ def test_search_matched_tag_fuzzy_match(self, search_test_app):

805846

for r in results

806847

if r.get("match_score", 0) >= 40

807848

and r.get("match_score", 0) < 60

808-

and "matched_tag" in r

849+

and "matched_tags" in r

809850

]

810851811852

if len(fuzzy_tag_matched_results) > 0:

812-

# Verify matched_tag is present for fuzzy matches

853+

# Verify matched_tags is present for fuzzy matches

813854

for result in fuzzy_tag_matched_results:

814-

matched_tag = result.get("matched_tag")

815-

assert matched_tag is not None, (

816-

f"matched_tag should not be None for fuzzy-matched result {result['name']}"

855+

matched_tags = result.get("matched_tags")

856+

assert matched_tags is not None, (

857+

f"matched_tags should not be None for fuzzy-matched result {result['name']}"

817858

)

818-

assert isinstance(matched_tag, dict), (

819-

f"matched_tag should be a dictionary, got {type(matched_tag)}"

859+

assert isinstance(matched_tags, dict), (

860+

f"matched_tags should be a dictionary, got {type(matched_tags)}"

820861

)

821-

assert len(matched_tag) > 0, "matched_tag should not be empty"

822-

assert len(matched_tag) == 1, (

823-

f"matched_tag should contain exactly one key-value pair, got {len(matched_tag)}"

862+

assert len(matched_tags) > 0, (

863+

"matched_tags should not be empty for fuzzy tag matches"

824864

)

825865

# Verify the match_score is in the fuzzy range

826866

assert 40 <= result.get("match_score", 0) < 60, (

827867

f"Fuzzy tag match should have score in [40, 60), got {result.get('match_score')}"

828868

)

829869830870

logger.debug(

831-

f"Found {len(fuzzy_tag_matched_results)} results with fuzzy matched_tag: {[r['name'] + ' -> ' + str(r.get('matched_tag', 'N/A')) + ' (score: ' + str(r.get('match_score', 'N/A')) + ')' for r in fuzzy_tag_matched_results]}"

871+

f"Found {len(fuzzy_tag_matched_results)} results with fuzzy matched_tags: {[r['name'] + ' -> ' + str(r.get('matched_tags', 'N/A')) + ' (score: ' + str(r.get('match_score', 'N/A')) + ')' for r in fuzzy_tag_matched_results]}"

832872

)

833873834874

def test_search_sorting_functionality(self, shared_search_responses):