langchain/libs/community/langchain_community/embeddings/openvino.py

from pathlib import Path
from typing import Any, Dict, List

from langchain_core.embeddings import Embeddings
from langchain_core.pydantic_v1 import BaseModel, Extra, Field

DEFAULT_QUERY_INSTRUCTION = (
    "Represent the question for retrieving supporting documents: "
)
DEFAULT_QUERY_BGE_INSTRUCTION_EN = (
    "Represent this question for searching relevant passages: "
)
DEFAULT_QUERY_BGE_INSTRUCTION_ZH = "为这个句子生成表示以用于检索相关文章："


class OpenVINOEmbeddings(BaseModel, Embeddings):
    """OpenVINO embedding models.

    Example:
        .. code-block:: python

            from langchain_community.embeddings import OpenVINOEmbeddings

            model_name = "sentence-transformers/all-mpnet-base-v2"
            model_kwargs = {'device': 'CPU'}
            encode_kwargs = {'normalize_embeddings': True}
            ov = OpenVINOEmbeddings(
                model_name_or_path=model_name,
                model_kwargs=model_kwargs,
                encode_kwargs=encode_kwargs
            )
    """

    ov_model: Any
    """OpenVINO model object."""
    tokenizer: Any
    """Tokenizer for embedding model."""
    model_name_or_path: str
    """HuggingFace model id."""
    model_kwargs: Dict[str, Any] = Field(default_factory=dict)
    """Keyword arguments to pass to the model."""
    encode_kwargs: Dict[str, Any] = Field(default_factory=dict)
    """Keyword arguments to pass when calling the `encode` method of the model."""
    show_progress: bool = False
    """Whether to show a progress bar."""

    def __init__(self, **kwargs: Any):
        """Initialize the sentence_transformer."""
        super().__init__(**kwargs)

        try:
            from optimum.intel.openvino import OVModelForFeatureExtraction
        except ImportError as e:
            raise ImportError(
                "Could not import optimum-intel python package. "
                "Please install it with: "
                "pip install -U 'optimum[openvino,nncf]'"
            ) from e

        try:
            from huggingface_hub import HfApi
        except ImportError as e:
            raise ImportError(
                "Could not import huggingface_hub python package. "
                "Please install it with: "
                "`pip install -U huggingface_hub`."
            ) from e

        def require_model_export(
            model_id: str, revision: Any = None, subfolder: Any = None
        ) -> bool:
            model_dir = Path(model_id)
            if subfolder is not None:
                model_dir = model_dir / subfolder
            if model_dir.is_dir():
                return (
                    not (model_dir / "openvino_model.xml").exists()
                    or not (model_dir / "openvino_model.bin").exists()
                )
            hf_api = HfApi()
            try:
                model_info = hf_api.model_info(model_id, revision=revision or "main")
                normalized_subfolder = (
                    None if subfolder is None else Path(subfolder).as_posix()
                )
                model_files = [
                    file.rfilename
                    for file in model_info.siblings
                    if normalized_subfolder is None
                    or file.rfilename.startswith(normalized_subfolder)
                ]
                ov_model_path = (
                    "openvino_model.xml"
                    if subfolder is None
                    else f"{normalized_subfolder}/openvino_model.xml"
                )
                return (
                    ov_model_path not in model_files
                    or ov_model_path.replace(".xml", ".bin") not in model_files
                )
            except Exception:
                return True

        if require_model_export(self.model_name_or_path):
            # use remote model
            self.ov_model = OVModelForFeatureExtraction.from_pretrained(
                self.model_name_or_path, export=True, **self.model_kwargs
            )
        else:
            # use local model
            self.ov_model = OVModelForFeatureExtraction.from_pretrained(
                self.model_name_or_path, **self.model_kwargs
            )

        try:
            from transformers import AutoTokenizer
        except ImportError as e:
            raise ImportError(
                "Unable to import transformers, please install with "
                "`pip install -U transformers`."
            ) from e
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path)

    def _text_length(self, text: Any) -> int:
        """
        Help function to get the length for the input text. Text can be either
        a list of ints (which means a single text as input), or a tuple of list of ints
        (representing several text inputs to the model).
        """

        if isinstance(text, dict):  # {key: value} case
            return len(next(iter(text.values())))
        elif not hasattr(text, "__len__"):  # Object has no len() method
            return 1
        # Empty string or list of ints
        elif len(text) == 0 or isinstance(text[0], int):
            return len(text)
        else:
            # Sum of length of individual strings
            return sum([len(t) for t in text])

    def encode(
        self,
        sentences: Any,
        batch_size: int = 4,
        show_progress_bar: bool = False,
        convert_to_numpy: bool = True,
        convert_to_tensor: bool = False,
        mean_pooling: bool = False,
        normalize_embeddings: bool = True,
    ) -> Any:
        """
        Computes sentence embeddings.

        :param sentences: the sentences to embed.
        :param batch_size: the batch size used for the computation.
        :param show_progress_bar: Whether to output a progress bar.
        :param convert_to_numpy: Whether the output should be a list of numpy vectors.
        :param convert_to_tensor: Whether the output should be one large tensor.
        :param mean_pooling: Whether to pool returned vectors.
        :param normalize_embeddings: Whether to normalize returned vectors.

        :return: By default, a 2d numpy array with shape [num_inputs, output_dimension].
        """
        try:
            import numpy as np
        except ImportError as e:
            raise ImportError(
                "Unable to import numpy, please install with " "`pip install -U numpy`."
            ) from e
        try:
            from tqdm import trange
        except ImportError as e:
            raise ImportError(
                "Unable to import tqdm, please install with " "`pip install -U tqdm`."
            ) from e
        try:
            import torch
        except ImportError as e:
            raise ImportError(
                "Unable to import torch, please install with " "`pip install -U torch`."
            ) from e

        def run_mean_pooling(model_output: Any, attention_mask: Any) -> Any:
            token_embeddings = model_output[
                0
            ]  # First element of model_output contains all token embeddings
            input_mask_expanded = (
                attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
            )
            return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
                input_mask_expanded.sum(1), min=1e-9
            )

        if convert_to_tensor:
            convert_to_numpy = False

        input_was_string = False
        if isinstance(sentences, str) or not hasattr(
            sentences, "__len__"
        ):  # Cast an individual sentence to a list with length 1
            sentences = [sentences]
            input_was_string = True

        all_embeddings: Any = []
        length_sorted_idx = np.argsort([-self._text_length(sen) for sen in sentences])
        sentences_sorted = [sentences[idx] for idx in length_sorted_idx]

        for start_index in trange(
            0, len(sentences), batch_size, desc="Batches", disable=not show_progress_bar
        ):
            sentences_batch = sentences_sorted[start_index : start_index + batch_size]
            features = self.tokenizer(
                sentences_batch, padding=True, truncation=True, return_tensors="pt"
            )

            out_features = self.ov_model(**features)
            if mean_pooling:
                embeddings = run_mean_pooling(out_features, features["attention_mask"])
            else:
                embeddings = out_features[0][:, 0]
            if normalize_embeddings:
                embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)

            # fixes for #522 and #487 to avoid oom problems on gpu with large datasets
            if convert_to_numpy:
                embeddings = embeddings.cpu()

            all_embeddings.extend(embeddings)

        all_embeddings = [all_embeddings[idx] for idx in np.argsort(length_sorted_idx)]

        if convert_to_tensor:
            if len(all_embeddings):
                all_embeddings = torch.stack(all_embeddings)
            else:
                all_embeddings = torch.Tensor()
        elif convert_to_numpy:
            all_embeddings = np.asarray([emb.numpy() for emb in all_embeddings])

        if input_was_string:
            all_embeddings = all_embeddings[0]

        return all_embeddings

    class Config:
        """Configuration for this pydantic object."""

        extra = Extra.forbid

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """Compute doc embeddings using a HuggingFace transformer model.

        Args:
            texts: The list of texts to embed.

        Returns:
            List of embeddings, one for each text.
        """

        texts = list(map(lambda x: x.replace("\n", " "), texts))
        embeddings = self.encode(
            texts, show_progress_bar=self.show_progress, **self.encode_kwargs
        )

        return embeddings.tolist()

    def embed_query(self, text: str) -> List[float]:
        """Compute query embeddings using a HuggingFace transformer model.

        Args:
            text: The text to embed.

        Returns:
            Embeddings for the text.
        """
        return self.embed_documents([text])[0]

    def save_model(
        self,
        model_path: str,
    ) -> bool:
        self.ov_model.half()
        self.ov_model.save_pretrained(model_path)
        self.tokenizer.save_pretrained(model_path)
        return True


class OpenVINOBgeEmbeddings(OpenVINOEmbeddings):
    """OpenVNO BGE embedding models.

    Bge Example:
        .. code-block:: python

            from langchain_community.embeddings import OpenVINOBgeEmbeddings

            model_name = "BAAI/bge-large-en"
            model_kwargs = {'device': 'CPU'}
            encode_kwargs = {'normalize_embeddings': True}
            ov = OpenVINOBgeEmbeddings(
                model_name_or_path=model_name,
                model_kwargs=model_kwargs,
                encode_kwargs=encode_kwargs
            )
    """

    query_instruction: str = DEFAULT_QUERY_BGE_INSTRUCTION_EN
    """Instruction to use for embedding query."""
    embed_instruction: str = ""
    """Instruction to use for embedding document."""

    def __init__(self, **kwargs: Any):
        """Initialize the sentence_transformer."""
        super().__init__(**kwargs)

        if "-zh" in self.model_name_or_path:
            self.query_instruction = DEFAULT_QUERY_BGE_INSTRUCTION_ZH

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """Compute doc embeddings using a HuggingFace transformer model.

        Args:
            texts: The list of texts to embed.

        Returns:
            List of embeddings, one for each text.
        """
        texts = [self.embed_instruction + t.replace("\n", " ") for t in texts]
        embeddings = self.encode(texts, **self.encode_kwargs)
        return embeddings.tolist()

    def embed_query(self, text: str) -> List[float]:
        """Compute query embeddings using a HuggingFace transformer model.

        Args:
            text: The text to embed.

        Returns:
            Embeddings for the text.
        """
        text = text.replace("\n", " ")
        embedding = self.encode(self.query_instruction + text, **self.encode_kwargs)
        return embedding.tolist()
community[minor]: Add Openvino embedding support (#19632) This PR is used to support both HF and BGE embeddings with openvino --------- Co-authored-by: Alexander Kozlov <alexander.kozlov@intel.com> 2024-03-29 08:34:51 +00:00			`from pathlib import Path`
			`from typing import Any, Dict, List`

			`from langchain_core.embeddings import Embeddings`
			`from langchain_core.pydantic_v1 import BaseModel, Extra, Field`

			`DEFAULT_QUERY_INSTRUCTION = (`
			`"Represent the question for retrieving supporting documents: "`
			`)`
			`DEFAULT_QUERY_BGE_INSTRUCTION_EN = (`
			`"Represent this question for searching relevant passages: "`
			`)`
			`DEFAULT_QUERY_BGE_INSTRUCTION_ZH = "为这个句子生成表示以用于检索相关文章："`


			`class OpenVINOEmbeddings(BaseModel, Embeddings):`
			`"""OpenVINO embedding models.`

			`Example:`
			`.. code-block:: python`

			`from langchain_community.embeddings import OpenVINOEmbeddings`

			`model_name = "sentence-transformers/all-mpnet-base-v2"`
			`model_kwargs = {'device': 'CPU'}`
			`encode_kwargs = {'normalize_embeddings': True}`
			`ov = OpenVINOEmbeddings(`
			`model_name_or_path=model_name,`
			`model_kwargs=model_kwargs,`
			`encode_kwargs=encode_kwargs`
			`)`
			`"""`

			`ov_model: Any`
			`"""OpenVINO model object."""`
			`tokenizer: Any`
			`"""Tokenizer for embedding model."""`
			`model_name_or_path: str`
			`"""HuggingFace model id."""`
			`model_kwargs: Dict[str, Any] = Field(default_factory=dict)`
			`"""Keyword arguments to pass to the model."""`
			`encode_kwargs: Dict[str, Any] = Field(default_factory=dict)`
			"""Keyword arguments to pass when calling the `encode` method of the model."""
			`show_progress: bool = False`
			`"""Whether to show a progress bar."""`

			`def __init__(self, **kwargs: Any):`
			`"""Initialize the sentence_transformer."""`
			`super().__init__(**kwargs)`

			`try:`
			`from optimum.intel.openvino import OVModelForFeatureExtraction`
			`except ImportError as e:`
community[minor]: import fix (#20995) Issue: When the third-party package is not installed, whenever we need to `pip install <package>` the ImportError is raised. But sometimes, the `ValueError` or `ModuleNotFoundError` is raised. It is bad for consistency. Change: replaced the `ValueError` or `ModuleNotFoundError` with `ImportError` when we raise an error with the `pip install <package>` message. Note: Ideally, we replace all `try: import... except... raise ... `with helper functions like `import_aim` or just use the existing [langchain_core.utils.utils.guard_import](https://api.python.langchain.com/en/latest/utils/langchain_core.utils.utils.guard_import.html#langchain_core.utils.utils.guard_import) But it would be much bigger refactoring. @baskaryan Please, advice on this. 2024-04-29 14:32:50 +00:00			`raise ImportError(`
community[minor]: Add Openvino embedding support (#19632) This PR is used to support both HF and BGE embeddings with openvino --------- Co-authored-by: Alexander Kozlov <alexander.kozlov@intel.com> 2024-03-29 08:34:51 +00:00			`"Could not import optimum-intel python package. "`
			`"Please install it with: "`
			`"pip install -U 'optimum[openvino,nncf]'"`
			`) from e`

			`try:`
			`from huggingface_hub import HfApi`
			`except ImportError as e:`
community[minor]: import fix (#20995) Issue: When the third-party package is not installed, whenever we need to `pip install <package>` the ImportError is raised. But sometimes, the `ValueError` or `ModuleNotFoundError` is raised. It is bad for consistency. Change: replaced the `ValueError` or `ModuleNotFoundError` with `ImportError` when we raise an error with the `pip install <package>` message. Note: Ideally, we replace all `try: import... except... raise ... `with helper functions like `import_aim` or just use the existing [langchain_core.utils.utils.guard_import](https://api.python.langchain.com/en/latest/utils/langchain_core.utils.utils.guard_import.html#langchain_core.utils.utils.guard_import) But it would be much bigger refactoring. @baskaryan Please, advice on this. 2024-04-29 14:32:50 +00:00			`raise ImportError(`
community[minor]: Add Openvino embedding support (#19632) This PR is used to support both HF and BGE embeddings with openvino --------- Co-authored-by: Alexander Kozlov <alexander.kozlov@intel.com> 2024-03-29 08:34:51 +00:00			`"Could not import huggingface_hub python package. "`
			`"Please install it with: "`
			"`pip install -U huggingface_hub`."
			`) from e`

			`def require_model_export(`
			`model_id: str, revision: Any = None, subfolder: Any = None`
			`) -> bool:`
			`model_dir = Path(model_id)`
			`if subfolder is not None:`
			`model_dir = model_dir / subfolder`
			`if model_dir.is_dir():`
			`return (`
			`not (model_dir / "openvino_model.xml").exists()`
			`or not (model_dir / "openvino_model.bin").exists()`
			`)`
			`hf_api = HfApi()`
			`try:`
			`model_info = hf_api.model_info(model_id, revision=revision or "main")`
			`normalized_subfolder = (`
			`None if subfolder is None else Path(subfolder).as_posix()`
			`)`
			`model_files = [`
			`file.rfilename`
			`for file in model_info.siblings`
			`if normalized_subfolder is None`
			`or file.rfilename.startswith(normalized_subfolder)`
			`]`
			`ov_model_path = (`
			`"openvino_model.xml"`
			`if subfolder is None`
			`else f"{normalized_subfolder}/openvino_model.xml"`
			`)`
			`return (`
			`ov_model_path not in model_files`
			`or ov_model_path.replace(".xml", ".bin") not in model_files`
			`)`
			`except Exception:`
			`return True`

			`if require_model_export(self.model_name_or_path):`
			`# use remote model`
			`self.ov_model = OVModelForFeatureExtraction.from_pretrained(`
			`self.model_name_or_path, export=True, **self.model_kwargs`
			`)`
			`else:`
			`# use local model`
			`self.ov_model = OVModelForFeatureExtraction.from_pretrained(`
			`self.model_name_or_path, **self.model_kwargs`
			`)`

			`try:`
			`from transformers import AutoTokenizer`
			`except ImportError as e:`
			`raise ImportError(`
			`"Unable to import transformers, please install with "`
			"`pip install -U transformers`."
			`) from e`
			`self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path)`

			`def _text_length(self, text: Any) -> int:`
			`"""`
			`Help function to get the length for the input text. Text can be either`
			`a list of ints (which means a single text as input), or a tuple of list of ints`
			`(representing several text inputs to the model).`
			`"""`

			`if isinstance(text, dict): # {key: value} case`
			`return len(next(iter(text.values())))`
			`elif not hasattr(text, "__len__"): # Object has no len() method`
			`return 1`
			`# Empty string or list of ints`
			`elif len(text) == 0 or isinstance(text[0], int):`
			`return len(text)`
			`else:`
			`# Sum of length of individual strings`
			`return sum([len(t) for t in text])`

			`def encode(`
			`self,`
			`sentences: Any,`
			`batch_size: int = 4,`
			`show_progress_bar: bool = False,`
			`convert_to_numpy: bool = True,`
			`convert_to_tensor: bool = False,`
			`mean_pooling: bool = False,`
			`normalize_embeddings: bool = True,`
			`) -> Any:`
			`"""`
			`Computes sentence embeddings.`

			`:param sentences: the sentences to embed.`
			`:param batch_size: the batch size used for the computation.`
			`:param show_progress_bar: Whether to output a progress bar.`
			`:param convert_to_numpy: Whether the output should be a list of numpy vectors.`
			`:param convert_to_tensor: Whether the output should be one large tensor.`
			`:param mean_pooling: Whether to pool returned vectors.`
			`:param normalize_embeddings: Whether to normalize returned vectors.`

			`:return: By default, a 2d numpy array with shape [num_inputs, output_dimension].`
			`"""`
			`try:`
			`import numpy as np`
			`except ImportError as e:`
			`raise ImportError(`
			"Unable to import numpy, please install with " "`pip install -U numpy`."
			`) from e`
			`try:`
			`from tqdm import trange`
			`except ImportError as e:`
			`raise ImportError(`
			"Unable to import tqdm, please install with " "`pip install -U tqdm`."
			`) from e`
			`try:`
			`import torch`
			`except ImportError as e:`
			`raise ImportError(`
			"Unable to import torch, please install with " "`pip install -U torch`."
			`) from e`

			`def run_mean_pooling(model_output: Any, attention_mask: Any) -> Any:`
			`token_embeddings = model_output[`
			`0`
			`] # First element of model_output contains all token embeddings`
			`input_mask_expanded = (`
			`attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()`
			`)`
			`return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(`
			`input_mask_expanded.sum(1), min=1e-9`
			`)`

			`if convert_to_tensor:`
			`convert_to_numpy = False`

			`input_was_string = False`
			`if isinstance(sentences, str) or not hasattr(`
			`sentences, "__len__"`
			`): # Cast an individual sentence to a list with length 1`
			`sentences = [sentences]`
			`input_was_string = True`

			`all_embeddings: Any = []`
			`length_sorted_idx = np.argsort([-self._text_length(sen) for sen in sentences])`
			`sentences_sorted = [sentences[idx] for idx in length_sorted_idx]`

			`for start_index in trange(`
			`0, len(sentences), batch_size, desc="Batches", disable=not show_progress_bar`
			`):`
			`sentences_batch = sentences_sorted[start_index : start_index + batch_size]`
			`features = self.tokenizer(`
			`sentences_batch, padding=True, truncation=True, return_tensors="pt"`
			`)`

			`out_features = self.ov_model(**features)`
			`if mean_pooling:`
			`embeddings = run_mean_pooling(out_features, features["attention_mask"])`
			`else:`
			`embeddings = out_features[0][:, 0]`
			`if normalize_embeddings:`
			`embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)`

			`# fixes for #522 and #487 to avoid oom problems on gpu with large datasets`
			`if convert_to_numpy:`
			`embeddings = embeddings.cpu()`

			`all_embeddings.extend(embeddings)`

			`all_embeddings = [all_embeddings[idx] for idx in np.argsort(length_sorted_idx)]`

			`if convert_to_tensor:`
			`if len(all_embeddings):`
			`all_embeddings = torch.stack(all_embeddings)`
			`else:`
			`all_embeddings = torch.Tensor()`
			`elif convert_to_numpy:`
			`all_embeddings = np.asarray([emb.numpy() for emb in all_embeddings])`

			`if input_was_string:`
			`all_embeddings = all_embeddings[0]`

			`return all_embeddings`

			`class Config:`
			`"""Configuration for this pydantic object."""`

			`extra = Extra.forbid`

			`def embed_documents(self, texts: List[str]) -> List[List[float]]:`
			`"""Compute doc embeddings using a HuggingFace transformer model.`

			`Args:`
			`texts: The list of texts to embed.`

			`Returns:`
			`List of embeddings, one for each text.`
			`"""`

			`texts = list(map(lambda x: x.replace("\n", " "), texts))`
			`embeddings = self.encode(`
			`texts, show_progress_bar=self.show_progress, **self.encode_kwargs`
			`)`

			`return embeddings.tolist()`

			`def embed_query(self, text: str) -> List[float]:`
			`"""Compute query embeddings using a HuggingFace transformer model.`

			`Args:`
			`text: The text to embed.`

			`Returns:`
			`Embeddings for the text.`
			`"""`
			`return self.embed_documents([text])[0]`

community: Add save_model function for openvino reranker and embedding (#19896) 2024-04-18 14:20:33 +00:00			`def save_model(`
			`self,`
			`model_path: str,`
			`) -> bool:`
			`self.ov_model.half()`
			`self.ov_model.save_pretrained(model_path)`
			`self.tokenizer.save_pretrained(model_path)`
			`return True`

community[minor]: Add Openvino embedding support (#19632) This PR is used to support both HF and BGE embeddings with openvino --------- Co-authored-by: Alexander Kozlov <alexander.kozlov@intel.com> 2024-03-29 08:34:51 +00:00
			`class OpenVINOBgeEmbeddings(OpenVINOEmbeddings):`
			`"""OpenVNO BGE embedding models.`

			`Bge Example:`
			`.. code-block:: python`

			`from langchain_community.embeddings import OpenVINOBgeEmbeddings`

community: Add save_model function for openvino reranker and embedding (#19896) 2024-04-18 14:20:33 +00:00			`model_name = "BAAI/bge-large-en"`
community[minor]: Add Openvino embedding support (#19632) This PR is used to support both HF and BGE embeddings with openvino --------- Co-authored-by: Alexander Kozlov <alexander.kozlov@intel.com> 2024-03-29 08:34:51 +00:00			`model_kwargs = {'device': 'CPU'}`
			`encode_kwargs = {'normalize_embeddings': True}`
			`ov = OpenVINOBgeEmbeddings(`
			`model_name_or_path=model_name,`
			`model_kwargs=model_kwargs,`
			`encode_kwargs=encode_kwargs`
			`)`
			`"""`

			`query_instruction: str = DEFAULT_QUERY_BGE_INSTRUCTION_EN`
			`"""Instruction to use for embedding query."""`
			`embed_instruction: str = ""`
			`"""Instruction to use for embedding document."""`

			`def __init__(self, **kwargs: Any):`
			`"""Initialize the sentence_transformer."""`
			`super().__init__(**kwargs)`

			`if "-zh" in self.model_name_or_path:`
			`self.query_instruction = DEFAULT_QUERY_BGE_INSTRUCTION_ZH`

			`def embed_documents(self, texts: List[str]) -> List[List[float]]:`
			`"""Compute doc embeddings using a HuggingFace transformer model.`

			`Args:`
			`texts: The list of texts to embed.`

			`Returns:`
			`List of embeddings, one for each text.`
			`"""`
			`texts = [self.embed_instruction + t.replace("\n", " ") for t in texts]`
			`embeddings = self.encode(texts, **self.encode_kwargs)`
			`return embeddings.tolist()`

			`def embed_query(self, text: str) -> List[float]:`
			`"""Compute query embeddings using a HuggingFace transformer model.`

			`Args:`
			`text: The text to embed.`

			`Returns:`
			`Embeddings for the text.`
			`"""`
			`text = text.replace("\n", " ")`
			`embedding = self.encode(self.query_instruction + text, **self.encode_kwargs)`
			`return embedding.tolist()`