From c7a55047897eabcd15a6d85ff0e5dac405330d17 Mon Sep 17 00:00:00 2001 From: KyrianC <67210837+KyrianC@users.noreply.github.com> Date: Fri, 1 Sep 2023 17:26:56 +0200 Subject: [PATCH] Add EdenAI Tools (#9764) This PR follows the Eden AI (LLM + embeddings) integration. #8633 We added different Tools to empower agents with new capabilities : - text: explicit content detection - image: explicit content detection - image: object detection - OCR: invoice parsing - OCR: ID parsing - audio: speech to text - audio: text to speech We plan to add more in the future (like translation, language detection, + others). Usage: ```python llm=EdenAI(feature="text",provider="openai", params={"temperature" : 0.2,"max_tokens" : 250}) tools = [ EdenAiTextModerationTool(providers=["openai"],language="en"), EdenAiObjectDetectionTool(providers=["google","api4ai"]), EdenAiTextToSpeechTool(providers=["amazon"],language="en",voice="MALE"), EdenAiExplicitImageTool(providers=["amazon","google"]), EdenAiSpeechToTextTool(providers=["amazon"]), EdenAiParsingIDTool(providers=["amazon","klippa"],language="en"), EdenAiParsingInvoiceTool(providers=["amazon","google"],language="en"), ] agent_chain = initialize_agent( tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True, return_intermediate_steps=True, ) result = agent_chain(""" i have this text : 'i want to slap you' first : i want to know if this text contains explicit content or not . second : if it does contain explicit content i want to know what is the explicit content in this text, third : i want to make the text into speech . if there is URL in the observations , you will always put it in the output (final answer) . """) ``` output: > Entering new AgentExecutor chain... > I need to extract the information from the ID and then convert it to text and then to speech > Action: edenai_identity_parsing > Action Input: "https://www.citizencard.com/images/citizencard-uk-id-card-2023.jpg" > Observation: last_name : > value : ANGELA > given_names : > value : GREENE > birth_place : > birth_date : > value : 2000-11-09 > issuance_date : > expire_date : > document_id : > issuing_state : > address : > age : > country : > document_type : > value : DRIVER LICENSE FRONT > gender : > image_id : > image_signature : > mrz : > nationality : > Thought: I now need to convert the information to text and then to speech > Action: edenai_text_to_speech > Action Input: "Welcome Angela Greene!" > Observation: https://d14uq1pz7dzsdq.cloudfront.net/0c494819-0bbc-4433-bfa4-6e99bd9747ea_.mp3?Expires=1693316851&Signature=YcMoVQgPuIMEOuSpFuvhkFM8JoBMSoGMcZb7MVWdqw7JEf5~67q9dEI90o5todE5mYXB5zSYoib6rGrmfBl4Rn5~yqDwZ~Tmc24K75zpQZIEyt5~ZSnHuXy4IFWGmlIVuGYVGMGKxTGNeCRNUXDhT6TXGZlr4mwa79Ei1YT7KcNyc1dsTrYB96LphnsqOERx4X9J9XriSwxn70X8oUPFfQmLcitr-syDhiwd9Wdpg6J5yHAJjf657u7Z1lFTBMoXGBuw1VYmyno-3TAiPeUcVlQXPueJ-ymZXmwaITmGOfH7HipZngZBziofRAFdhMYbIjYhegu5jS7TxHwRuox32A__&Key-Pair-Id=K1F55BTI9AHGIK > Thought: I now know the final answer > Final Answer: https://d14uq1pz7dzsdq.cloudfront.net/0c494819-0bbc-4433-bfa4-6e99bd9747ea_.mp3?Expires=1693316851&Signature=YcMoVQgPuIMEOuSpFuvhkFM8JoBMSoGMcZb7MVWdqw7JEf5~67q9dEI90o5todE5mYXB5zSYoib6rGrmfBl4Rn5~yqDwZ~Tmc24K75zpQZIEyt5~ZSnHuXy4IFWGmlIVuGYVGMGKxTGNeCRNUXDhT6TXGZlr4mwa79Ei1YT7KcNyc1dsTrYB96LphnsqOERx4X9J9XriSwxn70X8oUPFfQmLcitr-syDhiwd9Wdpg6J5y > > Finished chain. Other examples are available in the jupyter notebook. This PR is made in parallel with EdenAI LLM update #8963 I apologize for the messy PR. While working in implementing Tools we realized there was a few problems we needed to fix on LLM as well. Ping: @hwchase17, @baskaryan --------- Co-authored-by: RedhaWassim --- .../integrations/tools/edenai_tools.ipynb | 550 ++++++++++++++++++ libs/langchain/langchain/tools/__init__.py | 18 + .../langchain/tools/edenai/__init__.py | 34 ++ .../tools/edenai/audio_speech_to_text.py | 103 ++++ .../tools/edenai/audio_text_to_speech.py | 116 ++++ .../tools/edenai/edenai_base_tool.py | 160 +++++ .../tools/edenai/image_explicitcontent.py | 67 +++ .../tools/edenai/image_objectdetection.py | 75 +++ .../tools/edenai/ocr_identityparser.py | 68 +++ .../tools/edenai/ocr_invoiceparser.py | 72 +++ .../langchain/tools/edenai/text_moderation.py | 72 +++ .../tools/edenai/__init__.py | 0 .../tools/edenai/test_audio_speech_to_text.py | 25 + .../tools/edenai/test_audio_text_to_speech.py | 29 + .../edenai/test_image_explicitcontent.py | 23 + .../edenai/test_image_objectdetection.py | 23 + .../tools/edenai/test_ocr_identityparser.py | 25 + .../tools/edenai/test_ocr_invoiceparser.py | 23 + .../tools/edenai/test_text_moderation.py | 24 + .../unit_tests/tools/eden_ai/__init__.py | 0 .../unit_tests/tools/eden_ai/test_tools.py | 103 ++++ .../tests/unit_tests/tools/test_public_api.py | 8 + 22 files changed, 1618 insertions(+) create mode 100644 docs/extras/integrations/tools/edenai_tools.ipynb create mode 100644 libs/langchain/langchain/tools/edenai/__init__.py create mode 100644 libs/langchain/langchain/tools/edenai/audio_speech_to_text.py create mode 100644 libs/langchain/langchain/tools/edenai/audio_text_to_speech.py create mode 100644 libs/langchain/langchain/tools/edenai/edenai_base_tool.py create mode 100644 libs/langchain/langchain/tools/edenai/image_explicitcontent.py create mode 100644 libs/langchain/langchain/tools/edenai/image_objectdetection.py create mode 100644 libs/langchain/langchain/tools/edenai/ocr_identityparser.py create mode 100644 libs/langchain/langchain/tools/edenai/ocr_invoiceparser.py create mode 100644 libs/langchain/langchain/tools/edenai/text_moderation.py create mode 100644 libs/langchain/tests/integration_tests/tools/edenai/__init__.py create mode 100644 libs/langchain/tests/integration_tests/tools/edenai/test_audio_speech_to_text.py create mode 100644 libs/langchain/tests/integration_tests/tools/edenai/test_audio_text_to_speech.py create mode 100644 libs/langchain/tests/integration_tests/tools/edenai/test_image_explicitcontent.py create mode 100644 libs/langchain/tests/integration_tests/tools/edenai/test_image_objectdetection.py create mode 100644 libs/langchain/tests/integration_tests/tools/edenai/test_ocr_identityparser.py create mode 100644 libs/langchain/tests/integration_tests/tools/edenai/test_ocr_invoiceparser.py create mode 100644 libs/langchain/tests/integration_tests/tools/edenai/test_text_moderation.py create mode 100644 libs/langchain/tests/unit_tests/tools/eden_ai/__init__.py create mode 100644 libs/langchain/tests/unit_tests/tools/eden_ai/test_tools.py diff --git a/docs/extras/integrations/tools/edenai_tools.ipynb b/docs/extras/integrations/tools/edenai_tools.ipynb new file mode 100644 index 0000000000..b8d8467232 --- /dev/null +++ b/docs/extras/integrations/tools/edenai_tools.ipynb @@ -0,0 +1,550 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# EDEN AI TOOLS" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This Jupyter Notebook demonstrates how to use the Edenai tools with an Agent.\n", + "\n", + "Eden AI is revolutionizing the AI landscape by uniting the best AI providers, empowering users to unlock limitless possibilities and tap into the true potential of artificial intelligence. With an all-in-one comprehensive and hassle-free platform, it allows users to deploy AI features to production lightning fast, enabling effortless access to the full breadth of AI capabilities via a single API. (website: https://edenai.co/ )\n", + "\n", + "\n", + "By including an Edenai tool in the list of tools provided to an Agent, you can grant your Agent the ability to do multiple tasks, such as:\n", + "\n", + "-speech to text\n", + "\n", + "\n", + "-text to speech\n", + "\n", + "\n", + "-text explicit content detection \n", + "\n", + "\n", + "-image explicit content detection\n", + "\n", + "\n", + "-object detection\n", + "\n", + "\n", + "-OCR invoice parsing\n", + "\n", + "\n", + "-OCR ID parsing\n", + "\n", + "\n", + "\n", + "In this example, we will go through the process of utilizing the Edenai tools to create an Agent that can perform some of the tasks listed above.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---------------------------------------------------------------------------\n", + "Accessing the EDENAI's API requires an API key, \n", + "\n", + "which you can get by creating an account https://app.edenai.run/user/register and heading here https://app.edenai.run/admin/account/settings\n", + "\n", + "Once we have a key we'll want to set it as an environment variable by running:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "export EDENAI_API_KEY=\"...\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you'd prefer not to set an environment variable you can pass the key in directly via the edenai_api_key named parameter\n", + "\n", + "when initiating the EdenAI tools:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.tools.edenai.text_moderation import (\n", + " EdenAiTextModerationTool,\n", + ")\n", + "tools = [\n", + " EdenAiTextModerationTool(edenai_api_key=\"...\")\n", + "\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.tools.edenai import (\n", + " EdenAiSpeechToTextTool,\n", + " EdenAiTextToSpeechTool,\n", + " EdenAiExplicitImageTool,\n", + " EdenAiObjectDetectionTool,\n", + " EdenAiParsingIDTool,\n", + " EdenAiParsingInvoiceTool,\n", + " EdenAiTextModerationTool,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.llms import EdenAI\n", + "from langchain.agents import initialize_agent, AgentType\n", + "\n", + "llm=EdenAI(feature=\"text\",provider=\"openai\", params={\"temperature\" : 0.2,\"max_tokens\" : 250})\n", + "\n", + "tools = [\n", + " EdenAiTextModerationTool(providers=[\"openai\"],language=\"en\"),\n", + " EdenAiObjectDetectionTool(providers=[\"google\",\"api4ai\"]),\n", + " EdenAiTextToSpeechTool(providers=[\"amazon\"],language=\"en\",voice=\"MALE\"),\n", + " EdenAiExplicitImageTool(providers=[\"amazon\",\"google\"]),\n", + " EdenAiSpeechToTextTool(providers=[\"amazon\"]),\n", + " EdenAiParsingIDTool(providers=[\"amazon\",\"klippa\"],language=\"en\"),\n", + " EdenAiParsingInvoiceTool(providers=[\"amazon\",\"google\"],language=\"en\"),\n", + "]\n", + "agent_chain = initialize_agent(\n", + " tools,\n", + " llm,\n", + " agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,\n", + " verbose=True,\n", + " return_intermediate_steps=True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exemple with text" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m I need to scan the text for explicit content and then convert it to speech\n", + "Action: edenai_explicit_content_detection_text\n", + "Action Input: 'i want to slap you'\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3mnsfw_likelihood: 3\n", + "\"sexual\": 1\n", + "\"hate\": 1\n", + "\"harassment\": 1\n", + "\"self-harm\": 1\n", + "\"sexual/minors\": 1\n", + "\"hate/threatening\": 1\n", + "\"violence/graphic\": 1\n", + "\"self-harm/intent\": 1\n", + "\"self-harm/instructions\": 1\n", + "\"harassment/threatening\": 1\n", + "\"violence\": 3\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now need to convert the text to speech\n", + "Action: edenai_text_to_speech\n", + "Action Input: 'i want to slap you'\u001b[0m\n", + "Observation: \u001b[38;5;200m\u001b[1;3mhttps://d14uq1pz7dzsdq.cloudfront.net/0c825002-b4ef-4165-afa3-a140a5b25c82_.mp3?Expires=1693318351&Signature=V9vjgFe8pV5rnH-B2EUr8UshTEA3I0Xv1v0YwVEAq8w7G5pgex07dZ0M6h6fXusk7G3SW~sXs4IJxnD~DnIDp1XorvzMA2QVMJb8CD90EYvUWx9zfFa3tIegGapg~NC8wEGualccOehC~cSDhiQWrwAjDqPmq2olXnUVOfyl76pKNNR9Sm2xlljlrJcLCClBee2r5yCFEwFI-tnXX1lV2DGc5PNB66Lqrr0Fpe2trVJj2k8cLduIb8dbtqLPNIDCsV0N4QT10utZmhZcPpcSIBsdomw1Os1IjdG4nA8ZTIddAcLMCWJznttzl66vHPk26rjDpG5doMTTsPEz8ZKILQ__&Key-Pair-Id=K1F55BTI9AHGIK\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n", + "Final Answer: The text contains explicit content of violence with a likelihood of 3. The audio file of the text can be found at https://d14uq1pz7dzsdq.cloudfront.net/0c825002-b4ef-4165-afa3-a140a5b25c82_.mp3?Expires=1693318351&Signature=V9vjgFe8pV5rnH-B2EUr8UshTEA3I0Xv1v0YwVEAq8w7G5pgex07dZ0M6h6fXusk7G3SW~sXs4IJxnD~DnIDp1XorvzMA2QVMJb8CD90EYvUWx9zfFa3tIegGapg~NC8wEGualccOehC~cSDhiQWrwAjDqPmq2olXnUVOfyl76pKNNR9Sm2xlljlrJcLCClBee2r5yCFEwFI-tn\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + } + ], + "source": [ + "result=agent_chain(\"\"\" i have this text : 'i want to slap you' \n", + " first : i want to know if this text contains explicit content or not .\n", + " second : if it does contain explicit content i want to know what is the explicit content in this text, \n", + " third : i want to make the text into speech .\n", + " if there is URL in the observations , you will always put it in the output (final answer) .\n", + "\n", + " \"\"\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "you can have more details of the execution by printing the result " + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'The text contains explicit content of violence with a likelihood of 3. The audio file of the text can be found at https://d14uq1pz7dzsdq.cloudfront.net/0c825002-b4ef-4165-afa3-a140a5b25c82_.mp3?Expires=1693318351&Signature=V9vjgFe8pV5rnH-B2EUr8UshTEA3I0Xv1v0YwVEAq8w7G5pgex07dZ0M6h6fXusk7G3SW~sXs4IJxnD~DnIDp1XorvzMA2QVMJb8CD90EYvUWx9zfFa3tIegGapg~NC8wEGualccOehC~cSDhiQWrwAjDqPmq2olXnUVOfyl76pKNNR9Sm2xlljlrJcLCClBee2r5yCFEwFI-tn'" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result['output']" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'input': \" i have this text : 'i want to slap you' \\n first : i want to know if this text contains explicit content or not .\\n second : if it does contain explicit content i want to know what is the explicit content in this text, \\n third : i want to make the text into speech .\\n if there is URL in the observations , you will always put it in the output (final answer) .\\n\\n \",\n", + " 'output': 'The text contains explicit content of violence with a likelihood of 3. The audio file of the text can be found at https://d14uq1pz7dzsdq.cloudfront.net/0c825002-b4ef-4165-afa3-a140a5b25c82_.mp3?Expires=1693318351&Signature=V9vjgFe8pV5rnH-B2EUr8UshTEA3I0Xv1v0YwVEAq8w7G5pgex07dZ0M6h6fXusk7G3SW~sXs4IJxnD~DnIDp1XorvzMA2QVMJb8CD90EYvUWx9zfFa3tIegGapg~NC8wEGualccOehC~cSDhiQWrwAjDqPmq2olXnUVOfyl76pKNNR9Sm2xlljlrJcLCClBee2r5yCFEwFI-tn',\n", + " 'intermediate_steps': [(AgentAction(tool='edenai_explicit_content_detection_text', tool_input=\"'i want to slap you'\", log=\" I need to scan the text for explicit content and then convert it to speech\\nAction: edenai_explicit_content_detection_text\\nAction Input: 'i want to slap you'\"),\n", + " 'nsfw_likelihood: 3\\n\"sexual\": 1\\n\"hate\": 1\\n\"harassment\": 1\\n\"self-harm\": 1\\n\"sexual/minors\": 1\\n\"hate/threatening\": 1\\n\"violence/graphic\": 1\\n\"self-harm/intent\": 1\\n\"self-harm/instructions\": 1\\n\"harassment/threatening\": 1\\n\"violence\": 3'),\n", + " (AgentAction(tool='edenai_text_to_speech', tool_input=\"'i want to slap you'\", log=\" I now need to convert the text to speech\\nAction: edenai_text_to_speech\\nAction Input: 'i want to slap you'\"),\n", + " 'https://d14uq1pz7dzsdq.cloudfront.net/0c825002-b4ef-4165-afa3-a140a5b25c82_.mp3?Expires=1693318351&Signature=V9vjgFe8pV5rnH-B2EUr8UshTEA3I0Xv1v0YwVEAq8w7G5pgex07dZ0M6h6fXusk7G3SW~sXs4IJxnD~DnIDp1XorvzMA2QVMJb8CD90EYvUWx9zfFa3tIegGapg~NC8wEGualccOehC~cSDhiQWrwAjDqPmq2olXnUVOfyl76pKNNR9Sm2xlljlrJcLCClBee2r5yCFEwFI-tnXX1lV2DGc5PNB66Lqrr0Fpe2trVJj2k8cLduIb8dbtqLPNIDCsV0N4QT10utZmhZcPpcSIBsdomw1Os1IjdG4nA8ZTIddAcLMCWJznttzl66vHPk26rjDpG5doMTTsPEz8ZKILQ__&Key-Pair-Id=K1F55BTI9AHGIK')]}" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exemple with images" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m I need to determine if the image contains objects, if any of them are harmful, and then convert the text to speech.\n", + "Action: edenai_object_detection\n", + "Action Input: https://static.javatpoint.com/images/objects.jpg\u001b[0m\n", + "Observation: \u001b[33;1m\u001b[1;3mApple - Confidence 0.94003654\n", + "Apple - Confidence 0.94003654\n", + "Apple - Confidence 0.94003654\n", + "Backpack - Confidence 0.7481894\n", + "Backpack - Confidence 0.7481894\n", + "Backpack - Confidence 0.7481894\n", + "Luggage & bags - Confidence 0.70691586\n", + "Luggage & bags - Confidence 0.70691586\n", + "Luggage & bags - Confidence 0.70691586\n", + "Container - Confidence 0.654727\n", + "Container - Confidence 0.654727\n", + "Container - Confidence 0.654727\n", + "Luggage & bags - Confidence 0.5871518\n", + "Luggage & bags - Confidence 0.5871518\n", + "Luggage & bags - Confidence 0.5871518\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I need to check if any of the objects are harmful.\n", + "Action: edenai_explicit_content_detection_text\n", + "Action Input: Apple, Backpack, Luggage & bags, Container\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3mnsfw_likelihood: 2\n", + "\"sexually explicit\": 1\n", + "\"sexually suggestive\": 2\n", + "\"offensive\": 1\n", + "nsfw_likelihood: 1\n", + "\"sexual\": 1\n", + "\"hate\": 1\n", + "\"harassment\": 1\n", + "\"self-harm\": 1\n", + "\"sexual/minors\": 1\n", + "\"hate/threatening\": 1\n", + "\"violence/graphic\": 1\n", + "\"self-harm/intent\": 1\n", + "\"self-harm/instructions\": 1\n", + "\"harassment/threatening\": 1\n", + "\"violence\": 1\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m None of the objects are harmful.\n", + "Action: edenai_text_to_speech\n", + "Action Input: 'this item is safe'\u001b[0m\n", + "Observation: \u001b[38;5;200m\u001b[1;3mhttps://d14uq1pz7dzsdq.cloudfront.net/0546db8b-528e-4b63-9a69-d14d43ad1566_.mp3?Expires=1693316753&Signature=N0KZeK9I-1s7wTgiQOAwH7LFlltwyonSJcDnkdnr8JIJmbgSw6fo6RTxWl~VvD2Hg6igJqxtJFFWyrBmmx-f9wWLw3bZSnuMxkhTRqLX9aUA9N-vPJGiRZV5BFredaOm8pwfo8TcXhVjw08iSxv8GSuyZEIwZkiq4PzdiyVTnKKji6eytV0CrnHrTs~eXZkSnOdD2Fu0ECaKvFHlsF4IDLI8efRvituSk0X3ygdec4HQojl5vmBXJzi1TuhKWOX8UxeQle8pdjjqUPSJ9thTHpucdPy6UbhZOH0C9rbtLrCfvK5rzrT4D~gKy9woICzG34tKRxNxHYVVUPqx2BiInA__&Key-Pair-Id=K1F55BTI9AHGIK\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n", + "Final Answer: The image contains objects such as Apple, Backpack, Luggage & bags, and Container. None of them are harmful. The text 'this item is safe' can be found in the audio file at https://d14uq1pz7dzsdq.cloudfront.net/0546db8b-528e-4b63-9a69-d14d43ad1566_.mp3?Expires=1693316753&Signature=N0KZeK9I-1s7wTgiQOAwH7LFlltwyonSJcDnkdnr8JIJmbgSw6fo6RTxWl~VvD2Hg6igJqxtJFFWyrBmmx-f9wWLw3bZSnuMxkhTRqLX9aUA9N-vPJGiRZV5BFredaOm8pwfo8TcXhVjw08iSxv8GSuyZEIwZkiq4PzdiyVTnKKji6eyt\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + } + ], + "source": [ + "result=agent_chain(\"\"\" i have this url of an image : \"https://static.javatpoint.com/images/objects.jpg\"\n", + " first : i want to know if the image contain objects .\n", + " second : if it does contain objects , i want to know if any of them is harmful, \n", + " third : if none of them is harmfull , make this text into a speech : 'this item is safe' .\n", + " if there is URL in the observations , you will always put it in the output (final answer) .\n", + " \"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"The image contains objects such as Apple, Backpack, Luggage & bags, and Container. None of them are harmful. The text 'this item is safe' can be found in the audio file at https://d14uq1pz7dzsdq.cloudfront.net/0546db8b-528e-4b63-9a69-d14d43ad1566_.mp3?Expires=1693316753&Signature=N0KZeK9I-1s7wTgiQOAwH7LFlltwyonSJcDnkdnr8JIJmbgSw6fo6RTxWl~VvD2Hg6igJqxtJFFWyrBmmx-f9wWLw3bZSnuMxkhTRqLX9aUA9N-vPJGiRZV5BFredaOm8pwfo8TcXhVjw08iSxv8GSuyZEIwZkiq4PzdiyVTnKKji6eyt\"" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result['output']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "you can have more details of the execution by printing the result " + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'input': ' i have this url of an image : \"https://static.javatpoint.com/images/objects.jpg\"\\n first : i want to know if the image contain objects .\\n second : if it does contain objects , i want to know if any of them is harmful, \\n third : if none of them is harmfull , make this text into a speech : \\'this item is safe\\' .\\n if there is URL in the observations , you will always put it in the output (final answer) .\\n ',\n", + " 'output': \"The image contains objects such as Apple, Backpack, Luggage & bags, and Container. None of them are harmful. The text 'this item is safe' can be found in the audio file at https://d14uq1pz7dzsdq.cloudfront.net/0546db8b-528e-4b63-9a69-d14d43ad1566_.mp3?Expires=1693316753&Signature=N0KZeK9I-1s7wTgiQOAwH7LFlltwyonSJcDnkdnr8JIJmbgSw6fo6RTxWl~VvD2Hg6igJqxtJFFWyrBmmx-f9wWLw3bZSnuMxkhTRqLX9aUA9N-vPJGiRZV5BFredaOm8pwfo8TcXhVjw08iSxv8GSuyZEIwZkiq4PzdiyVTnKKji6eyt\",\n", + " 'intermediate_steps': [(AgentAction(tool='edenai_object_detection', tool_input='https://static.javatpoint.com/images/objects.jpg', log=' I need to determine if the image contains objects, if any of them are harmful, and then convert the text to speech.\\nAction: edenai_object_detection\\nAction Input: https://static.javatpoint.com/images/objects.jpg'),\n", + " 'Apple - Confidence 0.94003654\\nApple - Confidence 0.94003654\\nApple - Confidence 0.94003654\\nBackpack - Confidence 0.7481894\\nBackpack - Confidence 0.7481894\\nBackpack - Confidence 0.7481894\\nLuggage & bags - Confidence 0.70691586\\nLuggage & bags - Confidence 0.70691586\\nLuggage & bags - Confidence 0.70691586\\nContainer - Confidence 0.654727\\nContainer - Confidence 0.654727\\nContainer - Confidence 0.654727\\nLuggage & bags - Confidence 0.5871518\\nLuggage & bags - Confidence 0.5871518\\nLuggage & bags - Confidence 0.5871518'),\n", + " (AgentAction(tool='edenai_explicit_content_detection_text', tool_input='Apple, Backpack, Luggage & bags, Container', log=' I need to check if any of the objects are harmful.\\nAction: edenai_explicit_content_detection_text\\nAction Input: Apple, Backpack, Luggage & bags, Container'),\n", + " 'nsfw_likelihood: 2\\n\"sexually explicit\": 1\\n\"sexually suggestive\": 2\\n\"offensive\": 1\\nnsfw_likelihood: 1\\n\"sexual\": 1\\n\"hate\": 1\\n\"harassment\": 1\\n\"self-harm\": 1\\n\"sexual/minors\": 1\\n\"hate/threatening\": 1\\n\"violence/graphic\": 1\\n\"self-harm/intent\": 1\\n\"self-harm/instructions\": 1\\n\"harassment/threatening\": 1\\n\"violence\": 1'),\n", + " (AgentAction(tool='edenai_text_to_speech', tool_input=\"'this item is safe'\", log=\" None of the objects are harmful.\\nAction: edenai_text_to_speech\\nAction Input: 'this item is safe'\"),\n", + " 'https://d14uq1pz7dzsdq.cloudfront.net/0546db8b-528e-4b63-9a69-d14d43ad1566_.mp3?Expires=1693316753&Signature=N0KZeK9I-1s7wTgiQOAwH7LFlltwyonSJcDnkdnr8JIJmbgSw6fo6RTxWl~VvD2Hg6igJqxtJFFWyrBmmx-f9wWLw3bZSnuMxkhTRqLX9aUA9N-vPJGiRZV5BFredaOm8pwfo8TcXhVjw08iSxv8GSuyZEIwZkiq4PzdiyVTnKKji6eytV0CrnHrTs~eXZkSnOdD2Fu0ECaKvFHlsF4IDLI8efRvituSk0X3ygdec4HQojl5vmBXJzi1TuhKWOX8UxeQle8pdjjqUPSJ9thTHpucdPy6UbhZOH0C9rbtLrCfvK5rzrT4D~gKy9woICzG34tKRxNxHYVVUPqx2BiInA__&Key-Pair-Id=K1F55BTI9AHGIK')]}" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exemple with OCR images" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m I need to extract the information from the ID and then convert it to text and then to speech\n", + "Action: edenai_identity_parsing\n", + "Action Input: \"https://www.citizencard.com/images/citizencard-uk-id-card-2023.jpg\"\u001b[0m\n", + "Observation: \u001b[38;5;200m\u001b[1;3mlast_name : \n", + " value : ANGELA\n", + "given_names : \n", + " value : GREENE\n", + "birth_place : \n", + "birth_date : \n", + " value : 2000-11-09\n", + "issuance_date : \n", + "expire_date : \n", + "document_id : \n", + "issuing_state : \n", + "address : \n", + "age : \n", + "country : \n", + "document_type : \n", + " value : DRIVER LICENSE FRONT\n", + "gender : \n", + "image_id : \n", + "image_signature : \n", + "mrz : \n", + "nationality : \u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now need to convert the information to text and then to speech\n", + "Action: edenai_text_to_speech\n", + "Action Input: \"Welcome Angela Greene!\"\u001b[0m\n", + "Observation: \u001b[38;5;200m\u001b[1;3mhttps://d14uq1pz7dzsdq.cloudfront.net/0c494819-0bbc-4433-bfa4-6e99bd9747ea_.mp3?Expires=1693316851&Signature=YcMoVQgPuIMEOuSpFuvhkFM8JoBMSoGMcZb7MVWdqw7JEf5~67q9dEI90o5todE5mYXB5zSYoib6rGrmfBl4Rn5~yqDwZ~Tmc24K75zpQZIEyt5~ZSnHuXy4IFWGmlIVuGYVGMGKxTGNeCRNUXDhT6TXGZlr4mwa79Ei1YT7KcNyc1dsTrYB96LphnsqOERx4X9J9XriSwxn70X8oUPFfQmLcitr-syDhiwd9Wdpg6J5yHAJjf657u7Z1lFTBMoXGBuw1VYmyno-3TAiPeUcVlQXPueJ-ymZXmwaITmGOfH7HipZngZBziofRAFdhMYbIjYhegu5jS7TxHwRuox32A__&Key-Pair-Id=K1F55BTI9AHGIK\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n", + "Final Answer: https://d14uq1pz7dzsdq.cloudfront.net/0c494819-0bbc-4433-bfa4-6e99bd9747ea_.mp3?Expires=1693316851&Signature=YcMoVQgPuIMEOuSpFuvhkFM8JoBMSoGMcZb7MVWdqw7JEf5~67q9dEI90o5todE5mYXB5zSYoib6rGrmfBl4Rn5~yqDwZ~Tmc24K75zpQZIEyt5~ZSnHuXy4IFWGmlIVuGYVGMGKxTGNeCRNUXDhT6TXGZlr4mwa79Ei1YT7KcNyc1dsTrYB96LphnsqOERx4X9J9XriSwxn70X8oUPFfQmLcitr-syDhiwd9Wdpg6J5y\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + } + ], + "source": [ + "result=agent_chain(\"\"\" i have this url of an id: \"https://www.citizencard.com/images/citizencard-uk-id-card-2023.jpg\"\n", + " i want to extract the information in it.\n", + " create a text welcoming the person by his name and make it into speech \n", + " if there is URL in the observations , you will always put it in the output (final answer) .\n", + " \"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'https://d14uq1pz7dzsdq.cloudfront.net/0c494819-0bbc-4433-bfa4-6e99bd9747ea_.mp3?Expires=1693316851&Signature=YcMoVQgPuIMEOuSpFuvhkFM8JoBMSoGMcZb7MVWdqw7JEf5~67q9dEI90o5todE5mYXB5zSYoib6rGrmfBl4Rn5~yqDwZ~Tmc24K75zpQZIEyt5~ZSnHuXy4IFWGmlIVuGYVGMGKxTGNeCRNUXDhT6TXGZlr4mwa79Ei1YT7KcNyc1dsTrYB96LphnsqOERx4X9J9XriSwxn70X8oUPFfQmLcitr-syDhiwd9Wdpg6J5y'" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result['output']" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m I need to extract information from the invoice document\n", + "Action: edenai_invoice_parsing\n", + "Action Input: \"https://app.edenai.run/assets/img/data_1.72e3bdcc.png\"\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3mcustomer_information : \n", + " customer_name : Damita J Goldsmith\n", + " customer_address : 201 Stan Fey Dr,Upper Marlboro, MD 20774\n", + " customer_shipping_address : 201 Stan Fey Drive,Upper Marlboro\n", + "merchant_information : \n", + " merchant_name : SNG Engineering Inc\n", + " merchant_address : 344 Main St #200 Gaithersburg, MD 20878 USA\n", + " merchant_phone : +1 301 548 0055\n", + "invoice_number : 014-03\n", + "taxes : \n", + "payment_term : on receipt of service\n", + "date : 2003-01-20\n", + "po_number : \n", + "locale : \n", + "bank_informations : \n", + "item_lines : \n", + " description : Field inspection of construction on 1/19/2003 deficiencies in house,construction, Garage drive way & legal support to Attorney to\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the answer to the questions\n", + "Final Answer: The customer is Damita J Goldsmith and the company name is SNG Engineering Inc.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + } + ], + "source": [ + "result=agent_chain(\"\"\" i have this url of an invoice document: \"https://app.edenai.run/assets/img/data_1.72e3bdcc.png\"\n", + " i want to extract the information in it.\n", + " and answer these questions :\n", + " who is the customer ?\n", + " what is the company name ? \n", + " \n", + " \"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'The customer is Damita J Goldsmith and the company name is SNG Engineering Inc.'" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result['output']" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/libs/langchain/langchain/tools/__init__.py b/libs/langchain/langchain/tools/__init__.py index 14e34d7320..56958d90b2 100644 --- a/libs/langchain/langchain/tools/__init__.py +++ b/libs/langchain/langchain/tools/__init__.py @@ -34,6 +34,16 @@ from langchain.tools.bing_search.tool import BingSearchResults, BingSearchRun from langchain.tools.brave_search.tool import BraveSearch from langchain.tools.convert_to_openai import format_tool_to_openai_function from langchain.tools.ddg_search.tool import DuckDuckGoSearchResults, DuckDuckGoSearchRun +from langchain.tools.edenai import ( + EdenAiExplicitImageTool, + EdenAiObjectDetectionTool, + EdenAiParsingIDTool, + EdenAiParsingInvoiceTool, + EdenAiSpeechToTextTool, + EdenAiTextModerationTool, + EdenAiTextToSpeechTool, + EdenaiTool, +) from langchain.tools.file_management import ( CopyFileTool, DeleteFileTool, @@ -149,6 +159,14 @@ __all__ = [ "DeleteFileTool", "DuckDuckGoSearchResults", "DuckDuckGoSearchRun", + "EdenAiExplicitImageTool", + "EdenAiObjectDetectionTool", + "EdenAiParsingIDTool", + "EdenAiParsingInvoiceTool", + "EdenAiTextToSpeechTool", + "EdenAiSpeechToTextTool", + "EdenAiTextModerationTool", + "EdenaiTool", "ExtractHyperlinksTool", "ExtractTextTool", "FileSearchTool", diff --git a/libs/langchain/langchain/tools/edenai/__init__.py b/libs/langchain/langchain/tools/edenai/__init__.py new file mode 100644 index 0000000000..b1c254ba5f --- /dev/null +++ b/libs/langchain/langchain/tools/edenai/__init__.py @@ -0,0 +1,34 @@ +"""Edenai Tools.""" +from langchain.tools.edenai.audio_speech_to_text import ( + EdenAiSpeechToTextTool, +) +from langchain.tools.edenai.audio_text_to_speech import ( + EdenAiTextToSpeechTool, +) +from langchain.tools.edenai.edenai_base_tool import EdenaiTool +from langchain.tools.edenai.image_explicitcontent import ( + EdenAiExplicitImageTool, +) +from langchain.tools.edenai.image_objectdetection import ( + EdenAiObjectDetectionTool, +) +from langchain.tools.edenai.ocr_identityparser import ( + EdenAiParsingIDTool, +) +from langchain.tools.edenai.ocr_invoiceparser import ( + EdenAiParsingInvoiceTool, +) +from langchain.tools.edenai.text_moderation import ( + EdenAiTextModerationTool, +) + +__all__ = [ + "EdenAiExplicitImageTool", + "EdenAiObjectDetectionTool", + "EdenAiParsingIDTool", + "EdenAiParsingInvoiceTool", + "EdenAiTextToSpeechTool", + "EdenAiSpeechToTextTool", + "EdenAiTextModerationTool", + "EdenaiTool", +] diff --git a/libs/langchain/langchain/tools/edenai/audio_speech_to_text.py b/libs/langchain/langchain/tools/edenai/audio_speech_to_text.py new file mode 100644 index 0000000000..28dbce47d0 --- /dev/null +++ b/libs/langchain/langchain/tools/edenai/audio_speech_to_text.py @@ -0,0 +1,103 @@ +from __future__ import annotations + +import json +import logging +import time +from typing import List, Optional + +import requests + +from langchain.callbacks.manager import CallbackManagerForToolRun +from langchain.pydantic_v1 import validator +from langchain.tools.edenai.edenai_base_tool import EdenaiTool + +logger = logging.getLogger(__name__) + + +class EdenAiSpeechToTextTool(EdenaiTool): + """Tool that queries the Eden AI Speech To Text API. + + for api reference check edenai documentation: + https://app.edenai.run/bricks/speech/asynchronous-speech-to-text. + + To use, you should have + the environment variable ``EDENAI_API_KEY`` set with your API token. + You can find your token here: https://app.edenai.run/admin/account/settings + + """ + + edenai_api_key: Optional[str] = None + + name = "edenai_speech_to_text" + description = ( + "A wrapper around edenai Services speech to text " + "Useful for when you have to convert audio to text." + "Input should be a url to an audio file." + ) + is_async = True + + language: Optional[str] = "en" + speakers: Optional[int] + profanity_filter: bool = False + custom_vocabulary: Optional[List[str]] + + feature: str = "audio" + subfeature: str = "speech_to_text_async" + base_url = "https://api.edenai.run/v2/audio/speech_to_text_async/" + + @validator("providers") + def check_only_one_provider_selected(cls, v: List[str]) -> List[str]: + """ + This tool has no feature to combine providers results. + Therefore we only allow one provider + """ + if len(v) > 1: + raise ValueError( + "Please select only one provider. " + "The feature to combine providers results is not available " + "for this tool." + ) + return v + + def _wait_processing(self, url: str) -> requests.Response: + for _ in range(10): + time.sleep(1) + audio_analysis_result = self._get_edenai(url) + temp = audio_analysis_result.json() + if temp["status"] == "finished": + if temp["results"][self.providers[0]]["error"] is not None: + raise Exception( + f"""EdenAI returned an unexpected response + {temp['results'][self.providers[0]]['error']}""" + ) + else: + return audio_analysis_result + + raise Exception("Edenai speech to text job id processing Timed out") + + def _parse_response(self, response: dict) -> str: + return response["public_id"] + + def _run( + self, + query: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Use the tool.""" + all_params = { + "file_url": query, + "language": self.language, + "speakers": self.speakers, + "profanity_filter": self.profanity_filter, + "custom_vocabulary": self.custom_vocabulary, + } + + # filter so we don't send val to api when val is `None + query_params = {k: v for k, v in all_params.items() if v is not None} + + job_id = self._call_eden_ai(query_params) + url = self.base_url + job_id + audio_analysis_result = self._wait_processing(url) + result = audio_analysis_result.text + formatted_text = json.loads(result) + return formatted_text["results"][self.providers[0]]["text"] diff --git a/libs/langchain/langchain/tools/edenai/audio_text_to_speech.py b/libs/langchain/langchain/tools/edenai/audio_text_to_speech.py new file mode 100644 index 0000000000..968d03c1a2 --- /dev/null +++ b/libs/langchain/langchain/tools/edenai/audio_text_to_speech.py @@ -0,0 +1,116 @@ +from __future__ import annotations + +import logging +from typing import Dict, List, Literal, Optional + +import requests + +from langchain.callbacks.manager import CallbackManagerForToolRun +from langchain.pydantic_v1 import Field, root_validator, validator +from langchain.tools.edenai.edenai_base_tool import EdenaiTool + +logger = logging.getLogger(__name__) + + +class EdenAiTextToSpeechTool(EdenaiTool): + """Tool that queries the Eden AI Text to speech API. + for api reference check edenai documentation: + https://docs.edenai.co/reference/audio_text_to_speech_create. + + To use, you should have + the environment variable ``EDENAI_API_KEY`` set with your API token. + You can find your token here: https://app.edenai.run/admin/account/settings + + """ + + name = "edenai_text_to_speech" + description = ( + "A wrapper around edenai Services text to speech." + "Useful for when you need to convert text to speech." + """the output is a string representing the URL of the audio file, + or the path to the downloaded wav file """ + ) + + language: Optional[str] = "en" + """ + language of the text passed to the model. + """ + + # optional params see api documentation for more info + return_type: Literal["url", "wav"] = "url" + rate: Optional[int] + pitch: Optional[int] + volume: Optional[int] + audio_format: Optional[str] + sampling_rate: Optional[int] + voice_models: Dict[str, str] = Field(default_factory=dict) + + voice: Literal["MALE", "FEMALE"] + """voice option : 'MALE' or 'FEMALE' """ + + feature: str = "audio" + subfeature: str = "text_to_speech" + + @validator("providers") + def check_only_one_provider_selected(cls, v: List[str]) -> List[str]: + """ + This tool has no feature to combine providers results. + Therefore we only allow one provider + """ + if len(v) > 1: + raise ValueError( + "Please select only one provider. " + "The feature to combine providers results is not available " + "for this tool." + ) + return v + + @root_validator + def check_voice_models_key_is_provider_name(cls, values: dict) -> dict: + for key in values.get("voice_models", {}).keys(): + if key not in values.get("providers", []): + raise ValueError( + "voice_model should be formatted like this " + "{: }" + ) + return values + + def _download_wav(self, url: str, save_path: str) -> None: + response = requests.get(url) + if response.status_code == 200: + with open(save_path, "wb") as f: + f.write(response.content) + else: + raise ValueError("Error while downloading wav file") + + def _parse_response(self, response: list) -> str: + result = response[0] + if self.return_type == "url": + return result["audio_resource_url"] + else: + self._download_wav(result["audio_resource_url"], "audio.wav") + return "audio.wav" + + def _run( + self, + query: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Use the tool.""" + all_params = { + "text": query, + "language": self.language, + "option": self.voice, + "return_type": self.return_type, + "rate": self.rate, + "pitch": self.pitch, + "volume": self.volume, + "audio_format": self.audio_format, + "sampling_rate": self.sampling_rate, + "settings": self.voice_models, + } + + # filter so we don't send val to api when val is `None + query_params = {k: v for k, v in all_params.items() if v is not None} + + return self._call_eden_ai(query_params) diff --git a/libs/langchain/langchain/tools/edenai/edenai_base_tool.py b/libs/langchain/langchain/tools/edenai/edenai_base_tool.py new file mode 100644 index 0000000000..e0ebc543a8 --- /dev/null +++ b/libs/langchain/langchain/tools/edenai/edenai_base_tool.py @@ -0,0 +1,160 @@ +from __future__ import annotations + +import logging +from abc import abstractmethod +from typing import Any, Dict, List, Optional + +import requests + +from langchain.callbacks.manager import CallbackManagerForToolRun +from langchain.pydantic_v1 import root_validator +from langchain.tools.base import BaseTool +from langchain.utils import get_from_dict_or_env + +logger = logging.getLogger(__name__) + + +class EdenaiTool(BaseTool): + + """ + the base tool for all the EdenAI Tools . + you should have + the environment variable ``EDENAI_API_KEY`` set with your API token. + You can find your token here: https://app.edenai.run/admin/account/settings + """ + + feature: str + subfeature: str + edenai_api_key: Optional[str] = None + is_async: bool = False + + providers: List[str] + """provider to use for the API call.""" + + @root_validator(allow_reuse=True) + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key exists in environment.""" + values["edenai_api_key"] = get_from_dict_or_env( + values, "edenai_api_key", "EDENAI_API_KEY" + ) + return values + + @staticmethod + def get_user_agent() -> str: + from langchain import __version__ + + return f"langchain/{__version__}" + + def _call_eden_ai(self, query_params: Dict[str, Any]) -> str: + """ + Make an API call to the EdenAI service with the specified query parameters. + + Args: + query_params (dict): The parameters to include in the API call. + + Returns: + requests.Response: The response from the EdenAI API call. + + """ + + # faire l'API call + + headers = { + "Authorization": f"Bearer {self.edenai_api_key}", + "User-Agent": self.get_user_agent(), + } + + url = f"https://api.edenai.run/v2/{self.feature}/{self.subfeature}" + + payload = { + "providers": str(self.providers), + "response_as_dict": False, + "attributes_as_list": True, + "show_original_response": False, + } + + payload.update(query_params) + + response = requests.post(url, json=payload, headers=headers) + + self._raise_on_error(response) + + try: + return self._parse_response(response.json()) + except Exception as e: + raise RuntimeError(f"An error occurred while running tool: {e}") + + def _raise_on_error(self, response: requests.Response) -> None: + if response.status_code >= 500: + raise Exception(f"EdenAI Server: Error {response.status_code}") + elif response.status_code >= 400: + raise ValueError(f"EdenAI received an invalid payload: {response.text}") + elif response.status_code != 200: + raise Exception( + f"EdenAI returned an unexpected response with status " + f"{response.status_code}: {response.text}" + ) + + # case where edenai call succeeded but provider returned an error + # (eg: rate limit, server error, etc.) + if self.is_async is False: + # async call are different and only return a job_id, + # not the provider response directly + provider_response = response.json()[0] + if provider_response.get("status") == "fail": + err_msg = provider_response["error"]["message"] + raise ValueError(err_msg) + + @abstractmethod + def _run( + self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None + ) -> str: + pass + + @abstractmethod + def _parse_response(self, response: Any) -> str: + """Take a dict response and condense it's data in a human readable string""" + pass + + def _get_edenai(self, url: str) -> requests.Response: + headers = { + "accept": "application/json", + "authorization": f"Bearer {self.edenai_api_key}", + "User-Agent": self.get_user_agent(), + } + + response = requests.get(url, headers=headers) + + self._raise_on_error(response) + + return response + + def _parse_json_multilevel( + self, extracted_data: dict, formatted_list: list, level: int = 0 + ) -> None: + for section, subsections in extracted_data.items(): + indentation = " " * level + if isinstance(subsections, str): + subsections = subsections.replace("\n", ",") + formatted_list.append(f"{indentation}{section} : {subsections}") + + elif isinstance(subsections, list): + formatted_list.append(f"{indentation}{section} : ") + self._list_handling(subsections, formatted_list, level + 1) + + elif isinstance(subsections, dict): + formatted_list.append(f"{indentation}{section} : ") + self._parse_json_multilevel(subsections, formatted_list, level + 1) + + def _list_handling( + self, subsection_list: list, formatted_list: list, level: int + ) -> None: + for list_item in subsection_list: + if isinstance(list_item, dict): + self._parse_json_multilevel(list_item, formatted_list, level) + + elif isinstance(list_item, list): + self._list_handling(list_item, formatted_list, level + 1) + + else: + formatted_list.append(f"{' ' * level}{list_item}") diff --git a/libs/langchain/langchain/tools/edenai/image_explicitcontent.py b/libs/langchain/langchain/tools/edenai/image_explicitcontent.py new file mode 100644 index 0000000000..e1367a8567 --- /dev/null +++ b/libs/langchain/langchain/tools/edenai/image_explicitcontent.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import logging +from typing import Optional + +from langchain.callbacks.manager import CallbackManagerForToolRun +from langchain.tools.edenai.edenai_base_tool import EdenaiTool + +logger = logging.getLogger(__name__) + + +class EdenAiExplicitImageTool(EdenaiTool): + + """Tool that queries the Eden AI Explicit image detection. + + for api reference check edenai documentation: + https://docs.edenai.co/reference/image_explicit_content_create. + + To use, you should have + the environment variable ``EDENAI_API_KEY`` set with your API token. + You can find your token here: https://app.edenai.run/admin/account/settings + + """ + + name = "edenai_image_explicit_content_detection" + + description = ( + "A wrapper around edenai Services Explicit image detection. " + """Useful for when you have to extract Explicit Content from images. + it detects adult only content in images, + that is generally inappropriate for people under + the age of 18 and includes nudity, sexual activity, + pornography, violence, gore content, etc.""" + "Input should be the string url of the image ." + ) + + combine_available = True + feature = "image" + subfeature = "explicit_content" + + def _parse_json(self, json_data: dict) -> str: + result_str = f"nsfw_likelihood: {json_data['nsfw_likelihood']}\n" + for idx, found_obj in enumerate(json_data["items"]): + label = found_obj["label"].lower() + likelihood = found_obj["likelihood"] + result_str += f"{idx}: {label} likelihood {likelihood},\n" + + return result_str[:-2] + + def _parse_response(self, json_data: list) -> str: + if len(json_data) == 1: + result = self._parse_json(json_data[0]) + else: + for entry in json_data: + if entry.get("provider") == "eden-ai": + result = self._parse_json(entry) + + return result + + def _run( + self, + query: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Use the tool.""" + query_params = {"file_url": query, "attributes_as_list": False} + return self._call_eden_ai(query_params) diff --git a/libs/langchain/langchain/tools/edenai/image_objectdetection.py b/libs/langchain/langchain/tools/edenai/image_objectdetection.py new file mode 100644 index 0000000000..764d976851 --- /dev/null +++ b/libs/langchain/langchain/tools/edenai/image_objectdetection.py @@ -0,0 +1,75 @@ +from __future__ import annotations + +import logging +from typing import Optional + +from langchain.callbacks.manager import CallbackManagerForToolRun +from langchain.tools.edenai.edenai_base_tool import EdenaiTool + +logger = logging.getLogger(__name__) + + +class EdenAiObjectDetectionTool(EdenaiTool): + """Tool that queries the Eden AI Object detection API. + + for api reference check edenai documentation: + https://docs.edenai.co/reference/image_object_detection_create. + + To use, you should have + the environment variable ``EDENAI_API_KEY`` set with your API token. + You can find your token here: https://app.edenai.run/admin/account/settings + + """ + + name = "edenai_object_detection" + + description = ( + "A wrapper around edenai Services Object Detection . " + """Useful for when you have to do an to identify and locate + (with bounding boxes) objects in an image """ + "Input should be the string url of the image to identify." + ) + + show_positions: bool = False + + feature = "image" + subfeature = "object_detection" + + def _parse_json(self, json_data: dict) -> str: + result = [] + label_info = [] + + for found_obj in json_data["items"]: + label_str = f"{found_obj['label']} - Confidence {found_obj['confidence']}" + x_min = found_obj.get("x_min") + x_max = found_obj.get("x_max") + y_min = found_obj.get("y_min") + y_max = found_obj.get("y_max") + if self.show_positions and all( + [x_min, x_max, y_min, y_max] + ): # some providers don't return positions + label_str += f""",at the position x_min: {x_min}, x_max: {x_max}, + y_min: {y_min}, y_max: {y_max}""" + label_info.append(label_str) + + result.append("\n".join(label_info)) + return "\n\n".join(result) + + def _parse_response(self, response: list) -> str: + if len(response) == 1: + result = self._parse_json(response[0]) + else: + for entry in response: + if entry.get("provider") == "eden-ai": + result = self._parse_json(entry) + + return result + + def _run( + self, + query: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Use the tool.""" + query_params = {"file_url": query, "attributes_as_list": False} + return self._call_eden_ai(query_params) diff --git a/libs/langchain/langchain/tools/edenai/ocr_identityparser.py b/libs/langchain/langchain/tools/edenai/ocr_identityparser.py new file mode 100644 index 0000000000..55ccb477e2 --- /dev/null +++ b/libs/langchain/langchain/tools/edenai/ocr_identityparser.py @@ -0,0 +1,68 @@ +from __future__ import annotations + +import logging +from typing import Optional + +from langchain.callbacks.manager import CallbackManagerForToolRun +from langchain.tools.edenai.edenai_base_tool import EdenaiTool + +logger = logging.getLogger(__name__) + + +class EdenAiParsingIDTool(EdenaiTool): + """Tool that queries the Eden AI Identity parsing API. + + for api reference check edenai documentation: + https://docs.edenai.co/reference/ocr_identity_parser_create. + + To use, you should have + the environment variable ``EDENAI_API_KEY`` set with your API token. + You can find your token here: https://app.edenai.run/admin/account/settings + + """ + + name = "edenai_identity_parsing" + + description = ( + "A wrapper around edenai Services Identity parsing. " + "Useful for when you have to extract information from an ID Document " + "Input should be the string url of the document to parse." + ) + + feature = "ocr" + subfeature = "identity_parser" + + language: Optional[str] = None + """ + language of the text passed to the model. + """ + + def _parse_response(self, response: list) -> str: + formatted_list: list = [] + + if len(response) == 1: + self._parse_json_multilevel( + response[0]["extracted_data"][0], formatted_list + ) + else: + for entry in response: + if entry.get("provider") == "eden-ai": + self._parse_json_multilevel( + entry["extracted_data"][0], formatted_list + ) + + return "\n".join(formatted_list) + + def _run( + self, + query: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Use the tool.""" + query_params = { + "file_url": query, + "language": self.language, + "attributes_as_list": False, + } + + return self._call_eden_ai(query_params) diff --git a/libs/langchain/langchain/tools/edenai/ocr_invoiceparser.py b/libs/langchain/langchain/tools/edenai/ocr_invoiceparser.py new file mode 100644 index 0000000000..4bbf9ee923 --- /dev/null +++ b/libs/langchain/langchain/tools/edenai/ocr_invoiceparser.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +import logging +from typing import Optional + +from langchain.callbacks.manager import CallbackManagerForToolRun +from langchain.tools.edenai.edenai_base_tool import EdenaiTool + +logger = logging.getLogger(__name__) + + +class EdenAiParsingInvoiceTool(EdenaiTool): + """Tool that queries the Eden AI Invoice parsing API. + + for api reference check edenai documentation: + https://docs.edenai.co/reference/ocr_invoice_parser_create. + + To use, you should have + the environment variable ``EDENAI_API_KEY`` set with your API token. + You can find your token here: https://app.edenai.run/admin/account/settings + + """ + + name = "edenai_invoice_parsing" + + description = ( + "A wrapper around edenai Services invoice parsing. " + """Useful for when you have to extract information from + an image it enables to take invoices + in a variety of formats and returns the data in contains + (items, prices, addresses, vendor name, etc.) + in a structured format to automate the invoice processing """ + "Input should be the string url of the document to parse." + ) + + language: Optional[str] = None + """ + language of the image passed to the model. + """ + + feature = "ocr" + subfeature = "invoice_parser" + + def _parse_response(self, response: list) -> str: + formatted_list: list = [] + + if len(response) == 1: + self._parse_json_multilevel( + response[0]["extracted_data"][0], formatted_list + ) + else: + for entry in response: + if entry.get("provider") == "eden-ai": + self._parse_json_multilevel( + entry["extracted_data"][0], formatted_list + ) + + return "\n".join(formatted_list) + + def _run( + self, + query: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Use the tool.""" + query_params = { + "file_url": query, + "language": self.language, + "attributes_as_list": False, + } + + return self._call_eden_ai(query_params) diff --git a/libs/langchain/langchain/tools/edenai/text_moderation.py b/libs/langchain/langchain/tools/edenai/text_moderation.py new file mode 100644 index 0000000000..f03f1d58ae --- /dev/null +++ b/libs/langchain/langchain/tools/edenai/text_moderation.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +import logging +from typing import Optional + +from langchain.callbacks.manager import CallbackManagerForToolRun +from langchain.tools.edenai.edenai_base_tool import EdenaiTool + +logger = logging.getLogger(__name__) + + +class EdenAiTextModerationTool(EdenaiTool): + """Tool that queries the Eden AI Explicit text detection. + + for api reference check edenai documentation: + https://docs.edenai.co/reference/image_explicit_content_create. + + To use, you should have + the environment variable ``EDENAI_API_KEY`` set with your API token. + You can find your token here: https://app.edenai.run/admin/account/settings + + """ + + name = "edenai_explicit_content_detection_text" + + description = ( + "A wrapper around edenai Services explicit content detection for text. " + """Useful for when you have to scan text for offensive, + sexually explicit or suggestive content, + it checks also if there is any content of self-harm, + violence, racist or hate speech.""" + """the structure of the output is : + 'the type of the explicit content : the likelihood of it being explicit' + the likelihood is a number + between 1 and 5, 1 being the lowest and 5 the highest. + something is explicit if the likelihood is equal or higher than 3. + for example : + nsfw_likelihood: 1 + this is not explicit. + for example : + nsfw_likelihood: 3 + this is explicit. + """ + "Input should be a string." + ) + + language: str + + feature: str = "text" + subfeature: str = "moderation" + + def _parse_response(self, response: list) -> str: + formatted_result = [] + for result in response: + if "nsfw_likelihood" in result.keys(): + formatted_result.append( + "nsfw_likelihood: " + str(result["nsfw_likelihood"]) + ) + + for label, likelihood in zip(result["label"], result["likelihood"]): + formatted_result.append(f'"{label}": {str(likelihood)}') + + return "\n".join(formatted_result) + + def _run( + self, + query: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Use the tool.""" + query_params = {"text": query, "language": self.language} + return self._call_eden_ai(query_params) diff --git a/libs/langchain/tests/integration_tests/tools/edenai/__init__.py b/libs/langchain/tests/integration_tests/tools/edenai/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/libs/langchain/tests/integration_tests/tools/edenai/test_audio_speech_to_text.py b/libs/langchain/tests/integration_tests/tools/edenai/test_audio_speech_to_text.py new file mode 100644 index 0000000000..14375cd60a --- /dev/null +++ b/libs/langchain/tests/integration_tests/tools/edenai/test_audio_speech_to_text.py @@ -0,0 +1,25 @@ +"""Test EdenAi's speech to text Tool . + +In order to run this test, you need to have an EdenAI api key. +You can get it by registering for free at https://app.edenai.run/user/register. +A test key can be found at https://app.edenai.run/admin/account/settings by +clicking on the 'sandbox' toggle. +(calls will be free, and will return dummy results) + +You'll then need to set EDENAI_API_KEY environment variable to your api key. +""" +from langchain.tools.edenai import EdenAiSpeechToTextTool + + +def test_edenai_call() -> None: + """Test simple call to edenai's speech to text endpoint.""" + speech2text = EdenAiSpeechToTextTool(providers=["amazon"]) + + output = speech2text( + "https://audio-samples.github.io/samples/mp3/blizzard_unconditional/sample-0.mp3" + ) + + assert speech2text.name == "edenai_speech_to_text" + assert speech2text.feature == "audio" + assert speech2text.subfeature == "speech_to_text_async" + assert isinstance(output, str) diff --git a/libs/langchain/tests/integration_tests/tools/edenai/test_audio_text_to_speech.py b/libs/langchain/tests/integration_tests/tools/edenai/test_audio_text_to_speech.py new file mode 100644 index 0000000000..02fa848dbf --- /dev/null +++ b/libs/langchain/tests/integration_tests/tools/edenai/test_audio_text_to_speech.py @@ -0,0 +1,29 @@ +"""Test EdenAi's text to speech Tool . + +In order to run this test, you need to have an EdenAI api key. +You can get it by registering for free at https://app.edenai.run/user/register. +A test key can be found at https://app.edenai.run/admin/account/settings by +clicking on the 'sandbox' toggle. +(calls will be free, and will return dummy results) + +You'll then need to set EDENAI_API_KEY environment variable to your api key. +""" +from urllib.parse import urlparse + +from langchain.tools.edenai import EdenAiTextToSpeechTool + + +def test_edenai_call() -> None: + """Test simple call to edenai's text to speech endpoint.""" + text2speech = EdenAiTextToSpeechTool( + providers=["amazon"], language="en", voice="MALE" + ) + + output = text2speech("hello") + parsed_url = urlparse(output) + + assert text2speech.name == "edenai_text_to_speech" + assert text2speech.feature == "audio" + assert text2speech.subfeature == "text_to_speech" + assert isinstance(output, str) + assert parsed_url.scheme in ["http", "https"] diff --git a/libs/langchain/tests/integration_tests/tools/edenai/test_image_explicitcontent.py b/libs/langchain/tests/integration_tests/tools/edenai/test_image_explicitcontent.py new file mode 100644 index 0000000000..1fb883c88d --- /dev/null +++ b/libs/langchain/tests/integration_tests/tools/edenai/test_image_explicitcontent.py @@ -0,0 +1,23 @@ +"""Test EdenAi's image moderation Tool . + +In order to run this test, you need to have an EdenAI api key. +You can get it by registering for free at https://app.edenai.run/user/register. +A test key can be found at https://app.edenai.run/admin/account/settings by +clicking on the 'sandbox' toggle. +(calls will be free, and will return dummy results) + +You'll then need to set EDENAI_API_KEY environment variable to your api key. +""" +from langchain.tools.edenai import EdenAiExplicitImageTool + + +def test_edenai_call() -> None: + """Test simple call to edenai's image moderation endpoint.""" + image_moderation = EdenAiExplicitImageTool(providers=["amazon"]) + + output = image_moderation("https://static.javatpoint.com/images/objects.jpg") + + assert image_moderation.name == "edenai_image_explicit_content_detection" + assert image_moderation.feature == "image" + assert image_moderation.subfeature == "explicit_content" + assert isinstance(output, str) diff --git a/libs/langchain/tests/integration_tests/tools/edenai/test_image_objectdetection.py b/libs/langchain/tests/integration_tests/tools/edenai/test_image_objectdetection.py new file mode 100644 index 0000000000..4950543b42 --- /dev/null +++ b/libs/langchain/tests/integration_tests/tools/edenai/test_image_objectdetection.py @@ -0,0 +1,23 @@ +"""Test EdenAi's object detection Tool . + +In order to run this test, you need to have an EdenAI api key. +You can get it by registering for free at https://app.edenai.run/user/register. +A test key can be found at https://app.edenai.run/admin/account/settings by +clicking on the 'sandbox' toggle. +(calls will be free, and will return dummy results) + +You'll then need to set EDENAI_API_KEY environment variable to your api key. +""" +from langchain.tools.edenai import EdenAiObjectDetectionTool + + +def test_edenai_call() -> None: + """Test simple call to edenai's object detection endpoint.""" + object_detection = EdenAiObjectDetectionTool(providers=["google"]) + + output = object_detection("https://static.javatpoint.com/images/objects.jpg") + + assert object_detection.name == "edenai_object_detection" + assert object_detection.feature == "image" + assert object_detection.subfeature == "object_detection" + assert isinstance(output, str) diff --git a/libs/langchain/tests/integration_tests/tools/edenai/test_ocr_identityparser.py b/libs/langchain/tests/integration_tests/tools/edenai/test_ocr_identityparser.py new file mode 100644 index 0000000000..cfd8a1cc7b --- /dev/null +++ b/libs/langchain/tests/integration_tests/tools/edenai/test_ocr_identityparser.py @@ -0,0 +1,25 @@ +"""Test EdenAi's identity parser Tool . + +In order to run this test, you need to have an EdenAI api key. +You can get it by registering for free at https://app.edenai.run/user/register. +A test key can be found at https://app.edenai.run/admin/account/settings by +clicking on the 'sandbox' toggle. +(calls will be free, and will return dummy results) + +You'll then need to set EDENAI_API_KEY environment variable to your api key. +""" +from langchain.tools.edenai import EdenAiParsingIDTool + + +def test_edenai_call() -> None: + """Test simple call to edenai's identity parser endpoint.""" + id_parser = EdenAiParsingIDTool(providers=["amazon"], language="en") + + output = id_parser( + "https://www.citizencard.com/images/citizencard-uk-id-card-2023.jpg" + ) + + assert id_parser.name == "edenai_identity_parsing" + assert id_parser.feature == "ocr" + assert id_parser.subfeature == "identity_parser" + assert isinstance(output, str) diff --git a/libs/langchain/tests/integration_tests/tools/edenai/test_ocr_invoiceparser.py b/libs/langchain/tests/integration_tests/tools/edenai/test_ocr_invoiceparser.py new file mode 100644 index 0000000000..813ad448b4 --- /dev/null +++ b/libs/langchain/tests/integration_tests/tools/edenai/test_ocr_invoiceparser.py @@ -0,0 +1,23 @@ +"""Test EdenAi's invoice parser Tool . + +In order to run this test, you need to have an EdenAI api key. +You can get it by registering for free at https://app.edenai.run/user/register. +A test key can be found at https://app.edenai.run/admin/account/settings by +clicking on the 'sandbox' toggle. +(calls will be free, and will return dummy results) + +You'll then need to set EDENAI_API_KEY environment variable to your api key. +""" +from langchain.tools.edenai import EdenAiParsingInvoiceTool + + +def test_edenai_call() -> None: + """Test simple call to edenai's invoice parser endpoint.""" + invoice_parser = EdenAiParsingInvoiceTool(providers=["amazon"], language="en") + + output = invoice_parser("https://app.edenai.run/assets/img/data_1.72e3bdcc.png") + + assert invoice_parser.name == "edenai_invoice_parsing" + assert invoice_parser.feature == "ocr" + assert invoice_parser.subfeature == "invoice_parser" + assert isinstance(output, str) diff --git a/libs/langchain/tests/integration_tests/tools/edenai/test_text_moderation.py b/libs/langchain/tests/integration_tests/tools/edenai/test_text_moderation.py new file mode 100644 index 0000000000..b713b58231 --- /dev/null +++ b/libs/langchain/tests/integration_tests/tools/edenai/test_text_moderation.py @@ -0,0 +1,24 @@ +"""Test EdenAi's text moderation Tool . + +In order to run this test, you need to have an EdenAI api key. +You can get it by registering for free at https://app.edenai.run/user/register. +A test key can be found at https://app.edenai.run/admin/account/settings by +clicking on the 'sandbox' toggle. +(calls will be free, and will return dummy results) + +You'll then need to set EDENAI_API_KEY environment variable to your api key. +""" +from langchain.tools.edenai.text_moderation import EdenAiTextModerationTool + + +def test_edenai_call() -> None: + """Test simple call to edenai's text moderation endpoint.""" + + text_moderation = EdenAiTextModerationTool(providers=["openai"], language="en") + + output = text_moderation("i hate you") + + assert text_moderation.name == "edenai_explicit_content_detection_text" + assert text_moderation.feature == "text" + assert text_moderation.subfeature == "moderation" + assert isinstance(output, str) diff --git a/libs/langchain/tests/unit_tests/tools/eden_ai/__init__.py b/libs/langchain/tests/unit_tests/tools/eden_ai/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/libs/langchain/tests/unit_tests/tools/eden_ai/test_tools.py b/libs/langchain/tests/unit_tests/tools/eden_ai/test_tools.py new file mode 100644 index 0000000000..425dbe1b5a --- /dev/null +++ b/libs/langchain/tests/unit_tests/tools/eden_ai/test_tools.py @@ -0,0 +1,103 @@ +from collections.abc import Generator +from unittest.mock import MagicMock, patch + +import pytest + +from langchain.tools.edenai import EdenAiTextModerationTool + +tool = EdenAiTextModerationTool( + providers=["openai"], language="en", edenai_api_key="fake_key" +) + + +@pytest.fixture +def mock_post() -> Generator: + with patch("langchain.tools.edenai.edenai_base_tool.requests.post") as mock: + yield mock + + +def test_provider_not_available(mock_post: MagicMock) -> None: + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = [ + { + "error": { + "message": """Amazon has returned an error: + An error occurred (TextSizeLimitExceededException) + when calling the DetectTargetedSentiment + operation: Input text size exceeds limit. + Max length of request text allowed is 5000 bytes + while in this request the text size is 47380 bytes""", + "type": "ProviderInvalidInputTextLengthError", + }, + "status": "fail", + "provider": "amazon", + "provider_status_code": 400, + "cost": 0.0, + } + ] + mock_post.return_value = mock_response + + with pytest.raises(ValueError): + tool._run("some query") + + +def test_unexpected_response(mock_post: MagicMock) -> None: + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = [ + { + "status": "success", + } + ] + mock_post.return_value = mock_response + with pytest.raises(RuntimeError): + tool._run("some query") + + +def test_incomplete_response(mock_post: MagicMock) -> None: + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = [ + { + "status": "success", + "provider": "microsoft", + "nsfw_likelihood": 5, + "cost": 0.001, + "label": ["sexually explicit", "sexually suggestive", "offensive"], + } + ] + + mock_post.return_value = mock_response + with pytest.raises(RuntimeError): + tool._run("some query") + + +def test_invalid_payload(mock_post: MagicMock) -> None: + mock_response = MagicMock() + mock_response.status_code = 400 + mock_response.json.return_value = {} + mock_post.return_value = mock_response + + with pytest.raises(ValueError): + tool._run("some query") + + +def test_parse_response_format(mock_post: MagicMock) -> None: + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = [ + { + "status": "success", + "provider": "microsoft", + "nsfw_likelihood": 5, + "cost": 0.001, + "label": ["offensive", "hate_speech"], + "likelihood": [4, 5], + } + ] + mock_post.return_value = mock_response + + result = tool("some query") + + assert result == 'nsfw_likelihood: 5\n"offensive": 4\n"hate_speech": 5' diff --git a/libs/langchain/tests/unit_tests/tools/test_public_api.py b/libs/langchain/tests/unit_tests/tools/test_public_api.py index 6d58dd92ef..e7fd784587 100644 --- a/libs/langchain/tests/unit_tests/tools/test_public_api.py +++ b/libs/langchain/tests/unit_tests/tools/test_public_api.py @@ -28,6 +28,14 @@ _EXPECTED = [ "DeleteFileTool", "DuckDuckGoSearchResults", "DuckDuckGoSearchRun", + "EdenAiExplicitImageTool", + "EdenAiObjectDetectionTool", + "EdenAiParsingIDTool", + "EdenAiParsingInvoiceTool", + "EdenAiSpeechToTextTool", + "EdenAiTextModerationTool", + "EdenAiTextToSpeechTool", + "EdenaiTool", "ExtractHyperlinksTool", "ExtractTextTool", "FileSearchTool",