docs: model parameter mandatory for cohere embedding and rerank (#23349)

Latest langchain-cohere sdk mandates passing in the model parameter into the Embeddings and Reranker inits. This PR is to update the docs to reflect these changes.
2024-11-10 01:10:59 +00:00 · 2024-07-13 07:07:28 +08:00 · 2024-07-13 07:07:28 +08:00 · 7677ceea60
commit 7677ceea60
parent aee55eda39
5 changed files with 15 additions and 11 deletions
--- a/docs/docs/how_to/embed_text.mdx
+++ b/docs/docs/how_to/embed_text.mdx
@ -67,15 +67,16 @@ If you'd prefer not to set an environment variable you can pass the key in direc
 ```python
 from langchain_cohere import CohereEmbeddings

-embeddings_model = CohereEmbeddings(cohere_api_key="...")
+embeddings_model = CohereEmbeddings(cohere_api_key="...", model='embed-english-v3.0')
 ```

-Otherwise you can initialize without any params:
+Otherwise you can initialize simply as shown below:
 ```python
 from langchain_cohere import CohereEmbeddings

-embeddings_model = CohereEmbeddings()
+embeddings_model = CohereEmbeddings(model='embed-english-v3.0')
 ```
+Do note that it is mandatory to pass the model parameter while initializing the CohereEmbeddings class.

  </TabItem>
  <TabItem value="huggingface" label="Hugging Face">
--- a/docs/docs/integrations/retrievers/cohere-reranker.ipynb
+++ b/docs/docs/integrations/retrievers/cohere-reranker.ipynb
@ -309,9 +309,9 @@
    "documents = TextLoader(\"../../how_to/state_of_the_union.txt\").load()\n",
    "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)\n",
    "texts = text_splitter.split_documents(documents)\n",
-    "retriever = FAISS.from_documents(texts, CohereEmbeddings()).as_retriever(\n",
-    "    search_kwargs={\"k\": 20}\n",
-    ")\n",
+    "retriever = FAISS.from_documents(\n",
+    "    texts, CohereEmbeddings(model=\"embed-english-v3.0\")\n",
+    ").as_retriever(search_kwargs={\"k\": 20})\n",
    "\n",
    "query = \"What did the president say about Ketanji Brown Jackson\"\n",
    "docs = retriever.invoke(query)\n",
@ -324,7 +324,8 @@
   "metadata": {},
   "source": [
    "## Doing reranking with CohereRerank\n",
-    "Now let's wrap our base retriever with a `ContextualCompressionRetriever`. We'll add an `CohereRerank`, uses the Cohere rerank endpoint to rerank the returned results."
+    "Now let's wrap our base retriever with a `ContextualCompressionRetriever`. We'll add an `CohereRerank`, uses the Cohere rerank endpoint to rerank the returned results.\n",
+    "Do note that it is mandatory to specify the model name in CohereRerank!"
   ]
  },
  {
@ -339,7 +340,7 @@
    "from langchain_community.llms import Cohere\n",
    "\n",
    "llm = Cohere(temperature=0)\n",
-    "compressor = CohereRerank()\n",
+    "compressor = CohereRerank(model=\"rerank-english-v3.0\")\n",
    "compression_retriever = ContextualCompressionRetriever(\n",
    "    base_compressor=compressor, base_retriever=retriever\n",
    ")\n",
--- a/docs/docs/integrations/text_embedding/cohere.ipynb
+++ b/docs/docs/integrations/text_embedding/cohere.ipynb
@ -40,7 +40,9 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "embeddings = CohereEmbeddings(model=\"embed-english-light-v3.0\")"
+    "embeddings = CohereEmbeddings(\n",
+    "    model=\"embed-english-light-v3.0\"\n",
+    ")  # It is mandatory to pass a model parameter to initialize the CohereEmbeddings object"
   ]
  },
  {
--- a/docs/docs/integrations/vectorstores/pgvector.ipynb
+++ b/docs/docs/integrations/vectorstores/pgvector.ipynb
@ -78,7 +78,7 @@
    "# See docker command above to launch a postgres instance with pgvector enabled.\n",
    "connection = \"postgresql+psycopg://langchain:langchain@localhost:6024/langchain\"  # Uses psycopg3!\n",
    "collection_name = \"my_docs\"\n",
-    "embeddings = CohereEmbeddings()\n",
+    "embeddings = CohereEmbeddings(model=\"embed-english-v3.0\")\n",
    "\n",
    "vectorstore = PGVector(\n",
    "    embeddings=embeddings,\n",
--- a/templates/cohere-librarian/cohere_librarian/blurb_matcher.py
+++ b/templates/cohere-librarian/cohere_librarian/blurb_matcher.py
@ -23,7 +23,7 @@ parsed_data = [
 ]
 parsed_data[1]

-embeddings = CohereEmbeddings()
+embeddings = CohereEmbeddings(model="embed-english-v3.0")

 docsearch = Chroma.from_texts(
    [x["title"] for x in parsed_data], embeddings, metadatas=parsed_data