langchain/libs/community/tests/unit_tests/utilities/test_you.py
Scott Nath a32798abd7
community: Add you.com utility, update you retriever integration docs (#17014)
<!-- Thank you for contributing to LangChain!

Please title your PR "<package>: <description>", where <package> is
whichever of langchain, community, core, experimental, etc. is being
modified.

Replace this entire comment with:
  - **Description:** a description of the change, 
  - **Issue:** the issue # it fixes if applicable,
  - **Dependencies:** any dependencies required for this change,
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` from the root
of the package you've modified to check this locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc: https://python.langchain.com/docs/contributing/

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
 -->

- **Description: changes to you.com files** 
    - general cleanup
- adds community/utilities/you.py, moving bulk of code from retriever ->
utility
    - removes `snippet` as endpoint
    - adds `news` as endpoint
    - adds more tests

<s>**Description: update community MAKE file** 
    - adds `integration_tests`
    - adds `coverage`</s>

- **Issue:** the issue # it fixes if applicable,
- [For New Contributors: Update Integration
Documentation](https://github.com/langchain-ai/langchain/issues/15664#issuecomment-1920099868)
- **Dependencies:** n/a
- **Twitter handle:** @scottnath
- **Mastodon handle:** scottnath@mastodon.social

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
2024-02-08 13:47:50 -08:00

191 lines
5.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from typing import Any, Dict, List, Optional, Union
import responses
from langchain_core.documents import Document
from langchain_community.utilities.you import YouSearchAPIWrapper
TEST_ENDPOINT = "https://api.ydc-index.io"
# Mock you.com response for testing
MOCK_RESPONSE_RAW: Dict[str, List[Dict[str, Union[str, List[str]]]]] = {
"hits": [
{
"description": "Test description",
"snippets": ["yo", "bird up"],
"thumbnail_url": "https://example.com/image.gif",
"title": "Test title 1",
"url": "https://example.com/article.html",
},
{
"description": "Test description 2",
"snippets": ["worst show", "on tv"],
"thumbnail_url": "https://example.com/image2.gif",
"title": "Test title 2",
"url": "https://example.com/article2.html",
},
]
}
def generate_parsed_metadata(num: Optional[int] = 0) -> Dict[Any, Any]:
"""generate metadata for testing"""
if num is None:
num = 0
hit: Dict[str, Union[str, List[str]]] = MOCK_RESPONSE_RAW["hits"][num]
return {
"url": hit["url"],
"thumbnail_url": hit["thumbnail_url"],
"title": hit["title"],
"description": hit["description"],
}
def generate_parsed_output(num: Optional[int] = 0) -> List[Document]:
"""generate parsed output for testing"""
if num is None:
num = 0
hit: Dict[str, Union[str, List[str]]] = MOCK_RESPONSE_RAW["hits"][num]
output = []
for snippit in hit["snippets"]:
doc = Document(page_content=snippit, metadata=generate_parsed_metadata(num))
output.append(doc)
return output
# Mock results after parsing
MOCK_PARSED_OUTPUT = generate_parsed_output()
MOCK_PARSED_OUTPUT.extend(generate_parsed_output(1))
# Single-snippet
LIMITED_PARSED_OUTPUT = []
LIMITED_PARSED_OUTPUT.append(generate_parsed_output()[0])
LIMITED_PARSED_OUTPUT.append(generate_parsed_output(1)[0])
# copied from you api docs
NEWS_RESPONSE_RAW = {
"news": {
"results": [
{
"age": "18 hours ago",
"breaking": True,
"description": "Search on YDC for the news",
"meta_url": {
"hostname": "www.reuters.com",
"netloc": "reuters.com",
"path": " 2023 10 18 politics inflation index.html",
"scheme": "https",
},
"page_age": "2 days",
"page_fetched": "2023-10-12T23:00:00Z",
"thumbnail": {"original": "https://reuters.com/news.jpg"},
"title": "Breaking News about the World's Greatest Search Engine!",
"type": "news",
"url": "https://news.you.com",
}
]
}
}
NEWS_RESPONSE_PARSED = [
Document(page_content=str(result["description"]), metadata=result)
for result in NEWS_RESPONSE_RAW["news"]["results"]
]
@responses.activate
def test_raw_results() -> None:
responses.add(
responses.GET, f"{TEST_ENDPOINT}/search", json=MOCK_RESPONSE_RAW, status=200
)
query = "Test query text"
# ensure default endpoint_type
you_wrapper = YouSearchAPIWrapper(endpoint_type="snippet", ydc_api_key="test")
raw_results = you_wrapper.raw_results(query)
expected_result = MOCK_RESPONSE_RAW
assert raw_results == expected_result
@responses.activate
def test_raw_results_defaults() -> None:
responses.add(
responses.GET, f"{TEST_ENDPOINT}/search", json=MOCK_RESPONSE_RAW, status=200
)
query = "Test query text"
# ensure limit on number of docs returned
you_wrapper = YouSearchAPIWrapper(ydc_api_key="test")
raw_results = you_wrapper.raw_results(query)
expected_result = MOCK_RESPONSE_RAW
assert raw_results == expected_result
@responses.activate
def test_raw_results_news() -> None:
responses.add(
responses.GET, f"{TEST_ENDPOINT}/news", json=NEWS_RESPONSE_RAW, status=200
)
query = "Test news text"
# ensure limit on number of docs returned
you_wrapper = YouSearchAPIWrapper(endpoint_type="news", ydc_api_key="test")
raw_results = you_wrapper.raw_results(query)
expected_result = NEWS_RESPONSE_RAW
assert raw_results == expected_result
@responses.activate
def test_results() -> None:
responses.add(
responses.GET, f"{TEST_ENDPOINT}/search", json=MOCK_RESPONSE_RAW, status=200
)
query = "Test query text"
you_wrapper = YouSearchAPIWrapper(ydc_api_key="test")
results = you_wrapper.results(query)
expected_result = MOCK_PARSED_OUTPUT
assert results == expected_result
@responses.activate
def test_results_max_docs() -> None:
responses.add(
responses.GET, f"{TEST_ENDPOINT}/search", json=MOCK_RESPONSE_RAW, status=200
)
query = "Test query text"
you_wrapper = YouSearchAPIWrapper(k=2, ydc_api_key="test")
results = you_wrapper.results(query)
expected_result = generate_parsed_output()
assert results == expected_result
@responses.activate
def test_results_limit_snippets() -> None:
responses.add(
responses.GET, f"{TEST_ENDPOINT}/search", json=MOCK_RESPONSE_RAW, status=200
)
query = "Test query text"
you_wrapper = YouSearchAPIWrapper(n_snippets_per_hit=1, ydc_api_key="test")
results = you_wrapper.results(query)
expected_result = LIMITED_PARSED_OUTPUT
assert results == expected_result
@responses.activate
def test_results_news() -> None:
responses.add(
responses.GET, f"{TEST_ENDPOINT}/news", json=NEWS_RESPONSE_RAW, status=200
)
query = "Test news text"
# ensure limit on number of docs returned
you_wrapper = YouSearchAPIWrapper(endpoint_type="news", ydc_api_key="test")
raw_results = you_wrapper.results(query)
expected_result = NEWS_RESPONSE_PARSED
assert raw_results == expected_result
# @todo test async methods