Update the nlpcloud connector after some changes on the NLP Cloud API (#9586)

- Description: remove some text generation deprecated parameters and
update the embeddings doc,
- Tag maintainer: @rlancemartin
This commit is contained in:
Bagatur 2023-08-23 11:35:08 -07:00 committed by GitHub
commit a40c12bb88
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 9 additions and 20 deletions

View File

@ -9,13 +9,9 @@
"\n",
"NLP Cloud is an artificial intelligence platform that allows you to use the most advanced AI engines, and even train your own engines with your own data. \n",
"\n",
"The [embeddings](https://docs.nlpcloud.com/#embeddings) endpoint offers several models:\n",
"The [embeddings](https://docs.nlpcloud.com/#embeddings) endpoint offers the following model:\n",
"\n",
"* `paraphrase-multilingual-mpnet-base-v2`: Paraphrase Multilingual MPNet Base V2 is a very fast model based on Sentence Transformers that is perfectly suited for embeddings extraction in more than 50 languages (see the full list here).\n",
"\n",
"* `gpt-j`: GPT-J returns advanced embeddings. It might return better results than Sentence Transformers based models (see above) but it is also much slower.\n",
"\n",
"* `dolphin`: Dolphin returns advanced embeddings. It might return better results than Sentence Transformers based models (see above) but it is also much slower. It natively understands the following languages: Bulgarian, Catalan, Chinese, Croatian, Czech, Danish, Dutch, English, French, German, Hungarian, Italian, Japanese, Polish, Portuguese, Romanian, Russian, Serbian, Slovenian, Spanish, Swedish, and Ukrainian."
"* `paraphrase-multilingual-mpnet-base-v2`: Paraphrase Multilingual MPNet Base V2 is a very fast model based on Sentence Transformers that is perfectly suited for embeddings extraction in more than 50 languages (see the full list here)."
]
},
{
@ -84,7 +80,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "Python 3.11.2 64-bit",
"language": "python",
"name": "python3"
},
@ -98,7 +94,12 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
"version": "3.11.2"
},
"vscode": {
"interpreter": {
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
}
}
},
"nbformat": 4,

View File

@ -28,8 +28,6 @@ class NLPCloud(LLM):
"""Language to use (multilingual addon)"""
temperature: float = 0.7
"""What sampling temperature to use."""
min_length: int = 1
"""The minimum number of tokens to generate in the completion."""
max_length: int = 256
"""The maximum number of tokens to generate in the completion."""
length_no_input: bool = True
@ -46,14 +44,8 @@ class NLPCloud(LLM):
"""The number of highest probability tokens to keep for top-k filtering."""
repetition_penalty: float = 1.0
"""Penalizes repeated tokens. 1.0 means no penalty."""
length_penalty: float = 1.0
"""Exponential penalty to the length."""
do_sample: bool = True
"""Whether to use sampling (True) or greedy decoding."""
num_beams: int = 1
"""Number of beams for beam search."""
early_stopping: bool = False
"""Whether to stop beam search at num_beams sentences."""
num_return_sequences: int = 1
"""How many completions to generate for each prompt."""
@ -91,7 +83,6 @@ class NLPCloud(LLM):
"""Get the default parameters for calling NLPCloud API."""
return {
"temperature": self.temperature,
"min_length": self.min_length,
"max_length": self.max_length,
"length_no_input": self.length_no_input,
"remove_input": self.remove_input,
@ -100,10 +91,7 @@ class NLPCloud(LLM):
"top_p": self.top_p,
"top_k": self.top_k,
"repetition_penalty": self.repetition_penalty,
"length_penalty": self.length_penalty,
"do_sample": self.do_sample,
"num_beams": self.num_beams,
"early_stopping": self.early_stopping,
"num_return_sequences": self.num_return_sequences,
}