Update the nlpcloud connector after some changes on the NLP Cloud API (#9586)

- Description: remove some text generation deprecated parameters and update the embeddings doc, - Tag maintainer: @rlancemartin
2024-11-06 03:20:49 +00:00 · 2023-08-23 11:35:08 -07:00 · 2023-08-23 11:35:08 -07:00 · a40c12bb88
commit a40c12bb88
parent e2e582f1f6 f1072cc31f
2 changed files with 9 additions and 20 deletions
--- a/docs/extras/integrations/text_embedding/nlp_cloud.ipynb
+++ b/docs/extras/integrations/text_embedding/nlp_cloud.ipynb
@ -9,13 +9,9 @@
    "\n",
    "NLP Cloud is an artificial intelligence platform that allows you to use the most advanced AI engines, and even train your own engines with your own data. \n",
    "\n",
-    "The [embeddings](https://docs.nlpcloud.com/#embeddings) endpoint offers several models:\n",
+    "The [embeddings](https://docs.nlpcloud.com/#embeddings) endpoint offers the following model:\n",
    "\n",
-    "* `paraphrase-multilingual-mpnet-base-v2`: Paraphrase Multilingual MPNet Base V2 is a very fast model based on Sentence Transformers that is perfectly suited for embeddings extraction in more than 50 languages (see the full list here).\n",
-    "\n",
-    "* `gpt-j`: GPT-J returns advanced embeddings. It might return better results than Sentence Transformers based models (see above) but it is also much slower.\n",
-    "\n",
-    "* `dolphin`: Dolphin returns advanced embeddings. It might return better results than Sentence Transformers based models (see above) but it is also much slower. It natively understands the following languages: Bulgarian, Catalan, Chinese, Croatian, Czech, Danish, Dutch, English, French, German, Hungarian, Italian, Japanese, Polish, Portuguese, Romanian, Russian, Serbian, Slovenian, Spanish, Swedish, and Ukrainian."
+    "* `paraphrase-multilingual-mpnet-base-v2`: Paraphrase Multilingual MPNet Base V2 is a very fast model based on Sentence Transformers that is perfectly suited for embeddings extraction in more than 50 languages (see the full list here)."
   ]
  },
  {
@ -84,7 +80,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3.11.2 64-bit",
   "language": "python",
   "name": "python3"
  },
@ -98,7 +94,12 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.16"
+   "version": "3.11.2"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
+   }
  }
 },
 "nbformat": 4,
--- a/libs/langchain/langchain/llms/nlpcloud.py
+++ b/libs/langchain/langchain/llms/nlpcloud.py
@ -28,8 +28,6 @@ class NLPCloud(LLM):
    """Language to use (multilingual addon)"""
    temperature: float = 0.7
    """What sampling temperature to use."""
-    min_length: int = 1
-    """The minimum number of tokens to generate in the completion."""
    max_length: int = 256
    """The maximum number of tokens to generate in the completion."""
    length_no_input: bool = True
@ -46,14 +44,8 @@ class NLPCloud(LLM):
    """The number of highest probability tokens to keep for top-k filtering."""
    repetition_penalty: float = 1.0
    """Penalizes repeated tokens. 1.0 means no penalty."""
-    length_penalty: float = 1.0
-    """Exponential penalty to the length."""
-    do_sample: bool = True
-    """Whether to use sampling (True) or greedy decoding."""
    num_beams: int = 1
    """Number of beams for beam search."""
-    early_stopping: bool = False
-    """Whether to stop beam search at num_beams sentences."""
    num_return_sequences: int = 1
    """How many completions to generate for each prompt."""

@ -91,7 +83,6 @@ class NLPCloud(LLM):
        """Get the default parameters for calling NLPCloud API."""
        return {
            "temperature": self.temperature,
-            "min_length": self.min_length,
            "max_length": self.max_length,
            "length_no_input": self.length_no_input,
            "remove_input": self.remove_input,
@ -100,10 +91,7 @@ class NLPCloud(LLM):
            "top_p": self.top_p,
            "top_k": self.top_k,
            "repetition_penalty": self.repetition_penalty,
-            "length_penalty": self.length_penalty,
-            "do_sample": self.do_sample,
            "num_beams": self.num_beams,
-            "early_stopping": self.early_stopping,
            "num_return_sequences": self.num_return_sequences,
        }