mirror of
https://github.com/hwchase17/langchain
synced 2024-11-20 03:25:56 +00:00
ed58eeb9c5
Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes
93 lines
2.8 KiB
Python
93 lines
2.8 KiB
Python
import os
|
|
import sys
|
|
from typing import Any, List
|
|
|
|
from langchain_core.embeddings import Embeddings
|
|
from langchain_core.pydantic_v1 import BaseModel, Extra
|
|
|
|
|
|
class JohnSnowLabsEmbeddings(BaseModel, Embeddings):
|
|
"""JohnSnowLabs embedding models
|
|
|
|
To use, you should have the ``johnsnowlabs`` python package installed.
|
|
Example:
|
|
.. code-block:: python
|
|
|
|
from langchain_community.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings
|
|
|
|
embedding = JohnSnowLabsEmbeddings(model='embed_sentence.bert')
|
|
output = embedding.embed_query("foo bar")
|
|
""" # noqa: E501
|
|
|
|
model: Any = "embed_sentence.bert"
|
|
|
|
def __init__(
|
|
self,
|
|
model: Any = "embed_sentence.bert",
|
|
hardware_target: str = "cpu",
|
|
**kwargs: Any,
|
|
):
|
|
"""Initialize the johnsnowlabs model."""
|
|
super().__init__(**kwargs)
|
|
# 1) Check imports
|
|
try:
|
|
from johnsnowlabs import nlp
|
|
from nlu.pipe.pipeline import NLUPipeline
|
|
except ImportError as exc:
|
|
raise ImportError(
|
|
"Could not import johnsnowlabs python package. "
|
|
"Please install it with `pip install johnsnowlabs`."
|
|
) from exc
|
|
|
|
# 2) Start a Spark Session
|
|
try:
|
|
os.environ["PYSPARK_PYTHON"] = sys.executable
|
|
os.environ["PYSPARK_DRIVER_PYTHON"] = sys.executable
|
|
nlp.start(hardware_target=hardware_target)
|
|
except Exception as exc:
|
|
raise Exception("Failure starting Spark Session") from exc
|
|
|
|
# 3) Load the model
|
|
try:
|
|
if isinstance(model, str):
|
|
self.model = nlp.load(model)
|
|
elif isinstance(model, NLUPipeline):
|
|
self.model = model
|
|
else:
|
|
self.model = nlp.to_nlu_pipe(model)
|
|
except Exception as exc:
|
|
raise Exception("Failure loading model") from exc
|
|
|
|
class Config:
|
|
"""Configuration for this pydantic object."""
|
|
|
|
extra = Extra.forbid
|
|
|
|
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
|
"""Compute doc embeddings using a JohnSnowLabs transformer model.
|
|
|
|
Args:
|
|
texts: The list of texts to embed.
|
|
|
|
Returns:
|
|
List of embeddings, one for each text.
|
|
"""
|
|
|
|
df = self.model.predict(texts, output_level="document")
|
|
emb_col = None
|
|
for c in df.columns:
|
|
if "embedding" in c:
|
|
emb_col = c
|
|
return [vec.tolist() for vec in df[emb_col].tolist()]
|
|
|
|
def embed_query(self, text: str) -> List[float]:
|
|
"""Compute query embeddings using a JohnSnowLabs transformer model.
|
|
|
|
Args:
|
|
text: The text to embed.
|
|
|
|
Returns:
|
|
Embeddings for the text.
|
|
"""
|
|
return self.embed_documents([text])[0]
|