From 6c3573e7f6379ec2bd1035ccb96675a352d6038f Mon Sep 17 00:00:00 2001
From: Harrison Chase <hw.chase.17@gmail.com>
Date: Thu, 3 Aug 2023 21:21:15 -0700
Subject: [PATCH] Harrison/aleph alpha (#8735)

Co-authored-by: PiotrMazurek <piotr.mazurek@aleph-alpha.com>
Co-authored-by: Bagatur <baskaryan@gmail.com>
---
 .../text_embedding/aleph_alpha.ipynb          | 28 ++++----
 .../langchain/embeddings/aleph_alpha.py       | 67 +++++++++++++++----
 libs/langchain/langchain/llms/aleph_alpha.py  | 56 ++++++++++++++--
 3 files changed, 119 insertions(+), 32 deletions(-)

diff --git a/docs/extras/integrations/text_embedding/aleph_alpha.ipynb b/docs/extras/integrations/text_embedding/aleph_alpha.ipynb
index f813329bfc..05fcebeaec 100644
--- a/docs/extras/integrations/text_embedding/aleph_alpha.ipynb
+++ b/docs/extras/integrations/text_embedding/aleph_alpha.ipynb
@@ -20,7 +20,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "id": "8a920a89",
    "metadata": {},
    "outputs": [],
@@ -30,7 +30,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "id": "f2d04da3",
    "metadata": {},
    "outputs": [],
@@ -41,17 +41,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "id": "e6ecde96",
    "metadata": {},
    "outputs": [],
    "source": [
-    "embeddings = AlephAlphaAsymmetricSemanticEmbedding()"
+    "embeddings = AlephAlphaAsymmetricSemanticEmbedding(normalize=True, compress_to_size=128)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "id": "90e68411",
    "metadata": {},
    "outputs": [],
@@ -61,7 +61,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "id": "55903233",
    "metadata": {},
    "outputs": [],
@@ -79,7 +79,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "id": "eabb763a",
    "metadata": {},
    "outputs": [],
@@ -89,7 +89,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 7,
    "id": "0ad799f7",
    "metadata": {},
    "outputs": [],
@@ -99,17 +99,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "id": "af86dc10",
    "metadata": {},
    "outputs": [],
    "source": [
-    "embeddings = AlephAlphaSymmetricSemanticEmbedding()"
+    "embeddings = AlephAlphaSymmetricSemanticEmbedding(normalize=True, compress_to_size=128)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "id": "d292536f",
    "metadata": {},
    "outputs": [],
@@ -119,7 +119,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "id": "c704a7cf",
    "metadata": {},
    "outputs": [],
@@ -130,7 +130,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "33492471",
+   "id": "5d999f8f",
    "metadata": {},
    "outputs": [],
    "source": []
@@ -152,7 +152,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.9.13"
   },
   "vscode": {
    "interpreter": {
diff --git a/libs/langchain/langchain/embeddings/aleph_alpha.py b/libs/langchain/langchain/embeddings/aleph_alpha.py
index f53d509a9c..1f5c4b8b19 100644
--- a/libs/langchain/langchain/embeddings/aleph_alpha.py
+++ b/libs/langchain/langchain/embeddings/aleph_alpha.py
@@ -16,10 +16,11 @@ class AlephAlphaAsymmetricSemanticEmbedding(BaseModel, Embeddings):
 
     Example:
         .. code-block:: python
-
             from aleph_alpha import AlephAlphaAsymmetricSemanticEmbedding
 
-            embeddings = AlephAlphaSymmetricSemanticEmbedding()
+            embeddings = AlephAlphaAsymmetricSemanticEmbedding(
+                normalize=True, compress_to_size=128
+            )
 
             document = "This is a content of the document"
             query = "What is the content of the document?"
@@ -30,24 +31,55 @@ class AlephAlphaAsymmetricSemanticEmbedding(BaseModel, Embeddings):
     """
 
     client: Any  #: :meta private:
-    """Aleph Alpha client."""
-    model: Optional[str] = "luminous-base"
+
+    # Embedding params
+    model: str = "luminous-base"
     """Model name to use."""
-    hosting: Optional[str] = "https://api.aleph-alpha.com"
-    """Optional parameter that specifies which datacenters may process the request."""
-    normalize: Optional[bool] = True
-    """Should returned embeddings be normalized"""
-    compress_to_size: Optional[int] = 128
+    compress_to_size: Optional[int] = None
     """Should the returned embeddings come back as an original 5120-dim vector, 
     or should it be compressed to 128-dim."""
+    normalize: Optional[bool] = None
+    """Should returned embeddings be normalized"""
     contextual_control_threshold: Optional[int] = None
     """Attention control parameters only apply to those tokens that have 
     explicitly been set in the request."""
-    control_log_additive: Optional[bool] = True
+    control_log_additive: bool = True
     """Apply controls on prompt items by adding the log(control_factor) 
     to attention scores."""
+
+    # Client params
     aleph_alpha_api_key: Optional[str] = None
     """API key for Aleph Alpha API."""
+    host: str = "https://api.aleph-alpha.com"
+    """The hostname of the API host. 
+    The default one is "https://api.aleph-alpha.com")"""
+    hosting: Optional[str] = None
+    """Determines in which datacenters the request may be processed.
+    You can either set the parameter to "aleph-alpha" or omit it (defaulting to None).
+    Not setting this value, or setting it to None, gives us maximal flexibility 
+    in processing your request in our
+    own datacenters and on servers hosted with other providers. 
+    Choose this option for maximal availability.
+    Setting it to "aleph-alpha" allows us to only process the request 
+    in our own datacenters.
+    Choose this option for maximal data privacy."""
+    request_timeout_seconds: int = 305
+    """Client timeout that will be set for HTTP requests in the 
+    `requests` library's API calls.
+    Server will close all requests after 300 seconds with an internal server error."""
+    total_retries: int = 8
+    """The number of retries made in case requests fail with certain retryable 
+    status codes. If the last
+    retry fails a corresponding exception is raised. Note, that between retries 
+    an exponential backoff
+    is applied, starting with 0.5 s after the first retry and doubling for each 
+    retry made. So with the
+    default setting of 8 retries a total wait time of 63.5 s is added between 
+    the retries."""
+    nice: bool = False
+    """Setting this to True, will signal to the API that you intend to be 
+    nice to other users
+    by de-prioritizing your request below concurrent ones."""
 
     @root_validator()
     def validate_environment(cls, values: Dict) -> Dict:
@@ -57,12 +89,21 @@ class AlephAlphaAsymmetricSemanticEmbedding(BaseModel, Embeddings):
         )
         try:
             from aleph_alpha_client import Client
+
+            values["client"] = Client(
+                token=aleph_alpha_api_key,
+                host=values["host"],
+                hosting=values["hosting"],
+                request_timeout_seconds=values["request_timeout_seconds"],
+                total_retries=values["total_retries"],
+                nice=values["nice"],
+            )
         except ImportError:
             raise ValueError(
                 "Could not import aleph_alpha_client python package. "
                 "Please install it with `pip install aleph_alpha_client`."
             )
-        values["client"] = Client(token=aleph_alpha_api_key)
+
         return values
 
     def embed_documents(self, texts: List[str]) -> List[List[float]]:
@@ -152,7 +193,9 @@ class AlephAlphaSymmetricSemanticEmbedding(AlephAlphaAsymmetricSemanticEmbedding
 
             from aleph_alpha import AlephAlphaSymmetricSemanticEmbedding
 
-            embeddings = AlephAlphaAsymmetricSemanticEmbedding()
+            embeddings = AlephAlphaAsymmetricSemanticEmbedding(
+                normalize=True, compress_to_size=128
+            )
             text = "This is a test text"
 
             doc_result = embeddings.embed_documents([text])
diff --git a/libs/langchain/langchain/llms/aleph_alpha.py b/libs/langchain/langchain/llms/aleph_alpha.py
index cd4aec1682..1df17c3a87 100644
--- a/libs/langchain/langchain/llms/aleph_alpha.py
+++ b/libs/langchain/langchain/llms/aleph_alpha.py
@@ -125,12 +125,43 @@ class AlephAlpha(LLM):
     raw_completion: bool = False
     """Force the raw completion of the model to be returned."""
 
-    aleph_alpha_api_key: Optional[str] = None
-    """API key for Aleph Alpha API."""
-
     stop_sequences: Optional[List[str]] = None
     """Stop sequences to use."""
 
+    # Client params
+    aleph_alpha_api_key: Optional[str] = None
+    """API key for Aleph Alpha API."""
+    host: str = "https://api.aleph-alpha.com"
+    """The hostname of the API host. 
+    The default one is "https://api.aleph-alpha.com")"""
+    hosting: Optional[str] = None
+    """Determines in which datacenters the request may be processed.
+    You can either set the parameter to "aleph-alpha" or omit it (defaulting to None).
+    Not setting this value, or setting it to None, gives us maximal 
+    flexibility in processing your request in our
+    own datacenters and on servers hosted with other providers. 
+    Choose this option for maximal availability.
+    Setting it to "aleph-alpha" allows us to only process the 
+    request in our own datacenters.
+    Choose this option for maximal data privacy."""
+    request_timeout_seconds: int = 305
+    """Client timeout that will be set for HTTP requests in the 
+    `requests` library's API calls.
+    Server will close all requests after 300 seconds with an internal server error."""
+    total_retries: int = 8
+    """The number of retries made in case requests fail with certain retryable 
+    status codes. If the last
+    retry fails a corresponding exception is raised. Note, that between retries
+    an exponential backoff
+    is applied, starting with 0.5 s after the first retry and doubling for
+    each retry made. So with the
+    default setting of 8 retries a total wait time of 63.5 s is added 
+    between the retries."""
+    nice: bool = False
+    """Setting this to True, will signal to the API that you intend to be 
+    nice to other users
+    by de-prioritizing your request below concurrent ones."""
+
     class Config:
         """Configuration for this pydantic object."""
 
@@ -143,9 +174,16 @@ class AlephAlpha(LLM):
             values, "aleph_alpha_api_key", "ALEPH_ALPHA_API_KEY"
         )
         try:
-            import aleph_alpha_client
-
-            values["client"] = aleph_alpha_client.Client(token=aleph_alpha_api_key)
+            from aleph_alpha_client import Client
+
+            values["client"] = Client(
+                token=aleph_alpha_api_key,
+                host=values["host"],
+                hosting=values["hosting"],
+                request_timeout_seconds=values["request_timeout_seconds"],
+                total_retries=values["total_retries"],
+                nice=values["nice"],
+            )
         except ImportError:
             raise ImportError(
                 "Could not import aleph_alpha_client python package. "
@@ -241,3 +279,9 @@ class AlephAlpha(LLM):
         if stop is not None or self.stop_sequences is not None:
             text = enforce_stop_tokens(text, params["stop_sequences"])
         return text
+
+
+if __name__ == "__main__":
+    aa = AlephAlpha()
+
+    print(aa("How are you?"))