feat: Added matched_tag field search api results with fuzzy search ca… · feast-dev/feast@4a9ffae

@@ -734,6 +734,103 @@ def test_search_by_tags(self, shared_search_responses):

734734

f"Expected to find some of {expected_resources} but found none in {found_resources}"

735735

)

736736737+

def test_search_matched_tag_exact_match(self, search_test_app):

738+

"""Test that matched_tag field is present when a tag matches exactly"""

739+

# Search for "data" which should match tag key "team" with value "data"

740+

response = search_test_app.get("/search?query=data")

741+

assert response.status_code == 200

742+743+

data = response.json()

744+

results = data["results"]

745+746+

# Find results that matched via tags (match_score = 60)

747+

tag_matched_results = [

748+

r for r in results if r.get("match_score") == 60 and "matched_tag" in r

749+

]

750+751+

assert len(tag_matched_results) > 0, (

752+

"Expected to find at least one result with matched_tag from tag matching"

753+

)

754+755+

# Verify matched_tag is present and has a valid dictionary value

756+

for result in tag_matched_results:

757+

matched_tag = result.get("matched_tag")

758+

assert matched_tag is not None, (

759+

f"matched_tag should not be None for result {result['name']}"

760+

)

761+

assert isinstance(matched_tag, dict), (

762+

f"matched_tag should be a dictionary, got {type(matched_tag)}"

763+

)

764+

# matched_tag should be a dictionary with key:value format

765+

assert len(matched_tag) > 0, "matched_tag should not be empty"

766+

assert len(matched_tag) == 1, (

767+

f"matched_tag should contain exactly one key-value pair, got {len(matched_tag)}"

768+

)

769+770+

logger.debug(

771+

f"Found {len(tag_matched_results)} results with matched_tag: {[r['name'] + ' -> ' + str(r.get('matched_tag', 'N/A')) for r in tag_matched_results]}"

772+

)

773+774+

def test_search_matched_tag_fuzzy_match(self, search_test_app):

775+

"""Test that matched_tag field is present when a tag matches via fuzzy matching"""

776+

# Search for "te" which should fuzzy match tag key "team"

777+

# "te" vs "team": overlap={'t','e'}/union={'t','e','a','m'} = 2/4 = 50% (below threshold)

778+

# Try "tea" which should fuzzy match "team" better

779+

# "tea" vs "team": overlap={'t','e','a'}/union={'t','e','a','m'} = 3/4 = 75% (above threshold)

780+

response = search_test_app.get("/search?query=tea")

781+

assert response.status_code == 200

782+783+

data = response.json()

784+

results = data["results"]

785+786+

# Find results that matched via fuzzy tag matching (match_score < 60 but >= 40)

787+

fuzzy_tag_matched_results = [

788+

r

789+

for r in results

790+

if r.get("match_score", 0) >= 40

791+

and r.get("match_score", 0) < 60

792+

and "matched_tag" in r

793+

]

794+795+

# If we don't find fuzzy matches, try a different query that's more likely to match

796+

if len(fuzzy_tag_matched_results) == 0:

797+

# Try "dat" which should fuzzy match tag value "data"

798+

# "dat" vs "data": overlap={'d','a','t'}/union={'d','a','t','a'} = 3/4 = 75% (above threshold)

799+

response = search_test_app.get("/search?query=dat")

800+

assert response.status_code == 200

801+

data = response.json()

802+

results = data["results"]

803+

fuzzy_tag_matched_results = [

804+

r

805+

for r in results

806+

if r.get("match_score", 0) >= 40

807+

and r.get("match_score", 0) < 60

808+

and "matched_tag" in r

809+

]

810+811+

if len(fuzzy_tag_matched_results) > 0:

812+

# Verify matched_tag is present for fuzzy matches

813+

for result in fuzzy_tag_matched_results:

814+

matched_tag = result.get("matched_tag")

815+

assert matched_tag is not None, (

816+

f"matched_tag should not be None for fuzzy-matched result {result['name']}"

817+

)

818+

assert isinstance(matched_tag, dict), (

819+

f"matched_tag should be a dictionary, got {type(matched_tag)}"

820+

)

821+

assert len(matched_tag) > 0, "matched_tag should not be empty"

822+

assert len(matched_tag) == 1, (

823+

f"matched_tag should contain exactly one key-value pair, got {len(matched_tag)}"

824+

)

825+

# Verify the match_score is in the fuzzy range

826+

assert 40 <= result.get("match_score", 0) < 60, (

827+

f"Fuzzy tag match should have score in [40, 60), got {result.get('match_score')}"

828+

)

829+830+

logger.debug(

831+

f"Found {len(fuzzy_tag_matched_results)} results with fuzzy matched_tag: {[r['name'] + ' -> ' + str(r.get('matched_tag', 'N/A')) + ' (score: ' + str(r.get('match_score', 'N/A')) + ')' for r in fuzzy_tag_matched_results]}"

832+

)

833+737834

def test_search_sorting_functionality(self, shared_search_responses):

738835

"""Test search results sorting using pre-computed responses"""

739836

# Test match_score descending sort