feat: Added matched_tag field search api results with fuzzy search ca… · feast-dev/feast@4a9ffae
@@ -734,6 +734,103 @@ def test_search_by_tags(self, shared_search_responses):
734734f"Expected to find some of {expected_resources} but found none in {found_resources}"
735735 )
736736737+def test_search_matched_tag_exact_match(self, search_test_app):
738+"""Test that matched_tag field is present when a tag matches exactly"""
739+# Search for "data" which should match tag key "team" with value "data"
740+response = search_test_app.get("/search?query=data")
741+assert response.status_code == 200
742+743+data = response.json()
744+results = data["results"]
745+746+# Find results that matched via tags (match_score = 60)
747+tag_matched_results = [
748+r for r in results if r.get("match_score") == 60 and "matched_tag" in r
749+ ]
750+751+assert len(tag_matched_results) > 0, (
752+"Expected to find at least one result with matched_tag from tag matching"
753+ )
754+755+# Verify matched_tag is present and has a valid dictionary value
756+for result in tag_matched_results:
757+matched_tag = result.get("matched_tag")
758+assert matched_tag is not None, (
759+f"matched_tag should not be None for result {result['name']}"
760+ )
761+assert isinstance(matched_tag, dict), (
762+f"matched_tag should be a dictionary, got {type(matched_tag)}"
763+ )
764+# matched_tag should be a dictionary with key:value format
765+assert len(matched_tag) > 0, "matched_tag should not be empty"
766+assert len(matched_tag) == 1, (
767+f"matched_tag should contain exactly one key-value pair, got {len(matched_tag)}"
768+ )
769+770+logger.debug(
771+f"Found {len(tag_matched_results)} results with matched_tag: {[r['name'] + ' -> ' + str(r.get('matched_tag', 'N/A')) for r in tag_matched_results]}"
772+ )
773+774+def test_search_matched_tag_fuzzy_match(self, search_test_app):
775+"""Test that matched_tag field is present when a tag matches via fuzzy matching"""
776+# Search for "te" which should fuzzy match tag key "team"
777+# "te" vs "team": overlap={'t','e'}/union={'t','e','a','m'} = 2/4 = 50% (below threshold)
778+# Try "tea" which should fuzzy match "team" better
779+# "tea" vs "team": overlap={'t','e','a'}/union={'t','e','a','m'} = 3/4 = 75% (above threshold)
780+response = search_test_app.get("/search?query=tea")
781+assert response.status_code == 200
782+783+data = response.json()
784+results = data["results"]
785+786+# Find results that matched via fuzzy tag matching (match_score < 60 but >= 40)
787+fuzzy_tag_matched_results = [
788+r
789+for r in results
790+if r.get("match_score", 0) >= 40
791+and r.get("match_score", 0) < 60
792+and "matched_tag" in r
793+ ]
794+795+# If we don't find fuzzy matches, try a different query that's more likely to match
796+if len(fuzzy_tag_matched_results) == 0:
797+# Try "dat" which should fuzzy match tag value "data"
798+# "dat" vs "data": overlap={'d','a','t'}/union={'d','a','t','a'} = 3/4 = 75% (above threshold)
799+response = search_test_app.get("/search?query=dat")
800+assert response.status_code == 200
801+data = response.json()
802+results = data["results"]
803+fuzzy_tag_matched_results = [
804+r
805+for r in results
806+if r.get("match_score", 0) >= 40
807+and r.get("match_score", 0) < 60
808+and "matched_tag" in r
809+ ]
810+811+if len(fuzzy_tag_matched_results) > 0:
812+# Verify matched_tag is present for fuzzy matches
813+for result in fuzzy_tag_matched_results:
814+matched_tag = result.get("matched_tag")
815+assert matched_tag is not None, (
816+f"matched_tag should not be None for fuzzy-matched result {result['name']}"
817+ )
818+assert isinstance(matched_tag, dict), (
819+f"matched_tag should be a dictionary, got {type(matched_tag)}"
820+ )
821+assert len(matched_tag) > 0, "matched_tag should not be empty"
822+assert len(matched_tag) == 1, (
823+f"matched_tag should contain exactly one key-value pair, got {len(matched_tag)}"
824+ )
825+# Verify the match_score is in the fuzzy range
826+assert 40 <= result.get("match_score", 0) < 60, (
827+f"Fuzzy tag match should have score in [40, 60), got {result.get('match_score')}"
828+ )
829+830+logger.debug(
831+f"Found {len(fuzzy_tag_matched_results)} results with fuzzy matched_tag: {[r['name'] + ' -> ' + str(r.get('matched_tag', 'N/A')) + ' (score: ' + str(r.get('match_score', 'N/A')) + ')' for r in fuzzy_tag_matched_results]}"
832+ )
833+737834def test_search_sorting_functionality(self, shared_search_responses):
738835"""Test search results sorting using pre-computed responses"""
739836# Test match_score descending sort