fix: Search API to return all matching tags in matched_tags field (#5… · feast-dev/feast@de37f66
@@ -734,8 +734,8 @@ def test_search_by_tags(self, shared_search_responses):
734734f"Expected to find some of {expected_resources} but found none in {found_resources}"
735735 )
736736737-def test_search_matched_tag_exact_match(self, search_test_app):
738-"""Test that matched_tag field is present when a tag matches exactly"""
737+def test_search_matched_tags_exact_match(self, search_test_app):
738+"""Test that matched_tags field is present when a tag matches exactly"""
739739# Search for "data" which should match tag key "team" with value "data"
740740response = search_test_app.get("/search?query=data")
741741assert response.status_code == 200
@@ -745,34 +745,75 @@ def test_search_matched_tag_exact_match(self, search_test_app):
745745746746# Find results that matched via tags (match_score = 60)
747747tag_matched_results = [
748-r for r in results if r.get("match_score") == 60 and "matched_tag" in r
748+r for r in results if r.get("match_score") == 60 and "matched_tags" in r
749749 ]
750750751751assert len(tag_matched_results) > 0, (
752-"Expected to find at least one result with matched_tag from tag matching"
752+"Expected to find at least one result with matched_tags from tag matching"
753753 )
754754755-# Verify matched_tag is present and has a valid dictionary value
755+# Verify matched_tags is present and has a valid dictionary value
756756for result in tag_matched_results:
757-matched_tag = result.get("matched_tag")
758-assert matched_tag is not None, (
759-f"matched_tag should not be None for result {result['name']}"
757+matched_tags = result.get("matched_tags")
758+assert matched_tags is not None, (
759+f"matched_tags should not be None for result {result['name']}"
760760 )
761-assert isinstance(matched_tag, dict), (
762-f"matched_tag should be a dictionary, got {type(matched_tag)}"
761+assert isinstance(matched_tags, dict), (
762+f"matched_tags should be a dictionary, got {type(matched_tags)}"
763763 )
764-# matched_tag should be a dictionary with key:value format
765-assert len(matched_tag) > 0, "matched_tag should not be empty"
766-assert len(matched_tag) == 1, (
767-f"matched_tag should contain exactly one key-value pair, got {len(matched_tag)}"
764+# matched_tags should be a non-empty dict for tag-matched results
765+assert len(matched_tags) > 0, (
766+"matched_tags should not be empty for tag matches"
768767 )
769768770769logger.debug(
771-f"Found {len(tag_matched_results)} results with matched_tag: {[r['name'] + ' -> ' + str(r.get('matched_tag', 'N/A')) for r in tag_matched_results]}"
770+f"Found {len(tag_matched_results)} results with matched_tags: {[r['name'] + ' -> ' + str(r.get('matched_tags', 'N/A')) for r in tag_matched_results]}"
772771 )
773772774-def test_search_matched_tag_fuzzy_match(self, search_test_app):
775-"""Test that matched_tag field is present when a tag matches via fuzzy matching"""
773+def test_search_matched_tags_multiple_tags(self, search_test_app):
774+"""Test that multiple matching tags are returned in matched_tags"""
775+# Search for "a" which should match:
776+# - Names containing "a" (e.g., user_training_dataset, data sources)
777+# - Tags where key/value contains "a": "team" (key), "data" (value), "training" (value)
778+response = search_test_app.get("/search?query=a")
779+logger.info(response.json())
780+assert response.status_code == 200
781+782+data = response.json()
783+results = data["results"]
784+785+# Find user_training_dataset which has tags: {"environment": "test", "purpose": "training", "team": "data"}
786+# "team" contains "a", "data" contains "a", "training" contains "a"
787+# So matched_tags should have at least 2 entries: "purpose" and "team"
788+dataset_results = [
789+r for r in results if r.get("name") == "user_training_dataset"
790+ ]
791+792+assert len(dataset_results) > 0, (
793+"Expected to find user_training_dataset in results"
794+ )
795+796+dataset_result = dataset_results[0]
797+matched_tags = dataset_result.get("matched_tags", {})
798+799+assert isinstance(matched_tags, dict), (
800+f"matched_tags should be a dictionary, got {type(matched_tags)}"
801+ )
802+803+# Should have multiple matching tags: "purpose" and "team"
804+assert len(matched_tags) >= 2, (
805+f"Expected at least 2 matching tags for 'a' query, got {len(matched_tags)}: {matched_tags}"
806+ )
807+808+# Verify the expected tags are present
809+assert "team" in matched_tags and "purpose" in matched_tags, (
810+f"Expected 'team' and 'purpose' in matched_tags, got: {matched_tags}"
811+ )
812+813+logger.debug(f"user_training_dataset matched_tags: {matched_tags}")
814+815+def test_search_matched_tags_fuzzy_match(self, search_test_app):
816+"""Test that matched_tags field is present when a tag matches via fuzzy matching"""
776817# Search for "te" which should fuzzy match tag key "team"
777818# "te" vs "team": overlap={'t','e'}/union={'t','e','a','m'} = 2/4 = 50% (below threshold)
778819# Try "tea" which should fuzzy match "team" better
@@ -789,7 +830,7 @@ def test_search_matched_tag_fuzzy_match(self, search_test_app):
789830for r in results
790831if r.get("match_score", 0) >= 40
791832and r.get("match_score", 0) < 60
792-and "matched_tag" in r
833+and "matched_tags" in r
793834 ]
794835795836# If we don't find fuzzy matches, try a different query that's more likely to match
@@ -805,30 +846,29 @@ def test_search_matched_tag_fuzzy_match(self, search_test_app):
805846for r in results
806847if r.get("match_score", 0) >= 40
807848and r.get("match_score", 0) < 60
808-and "matched_tag" in r
849+and "matched_tags" in r
809850 ]
810851811852if len(fuzzy_tag_matched_results) > 0:
812-# Verify matched_tag is present for fuzzy matches
853+# Verify matched_tags is present for fuzzy matches
813854for result in fuzzy_tag_matched_results:
814-matched_tag = result.get("matched_tag")
815-assert matched_tag is not None, (
816-f"matched_tag should not be None for fuzzy-matched result {result['name']}"
855+matched_tags = result.get("matched_tags")
856+assert matched_tags is not None, (
857+f"matched_tags should not be None for fuzzy-matched result {result['name']}"
817858 )
818-assert isinstance(matched_tag, dict), (
819-f"matched_tag should be a dictionary, got {type(matched_tag)}"
859+assert isinstance(matched_tags, dict), (
860+f"matched_tags should be a dictionary, got {type(matched_tags)}"
820861 )
821-assert len(matched_tag) > 0, "matched_tag should not be empty"
822-assert len(matched_tag) == 1, (
823-f"matched_tag should contain exactly one key-value pair, got {len(matched_tag)}"
862+assert len(matched_tags) > 0, (
863+"matched_tags should not be empty for fuzzy tag matches"
824864 )
825865# Verify the match_score is in the fuzzy range
826866assert 40 <= result.get("match_score", 0) < 60, (
827867f"Fuzzy tag match should have score in [40, 60), got {result.get('match_score')}"
828868 )
829869830870logger.debug(
831-f"Found {len(fuzzy_tag_matched_results)} results with fuzzy matched_tag: {[r['name'] + ' -> ' + str(r.get('matched_tag', 'N/A')) + ' (score: ' + str(r.get('match_score', 'N/A')) + ')' for r in fuzzy_tag_matched_results]}"
871+f"Found {len(fuzzy_tag_matched_results)} results with fuzzy matched_tags: {[r['name'] + ' -> ' + str(r.get('matched_tags', 'N/A')) + ' (score: ' + str(r.get('match_score', 'N/A')) + ')' for r in fuzzy_tag_matched_results]}"
832872 )
833873834874def test_search_sorting_functionality(self, shared_search_responses):