mirror of
https://github.com/hwchase17/langchain
synced 2024-11-13 19:10:52 +00:00
community[minor]: VectorStore Infinispan. Adding TLS and authentication (#23522)
**Description**: this PR enable VectorStore TLS and authentication (digest, basic) with HTTP/2 for Infinispan server. Based on httpx. Added docker-compose facilities for testing Added documentation **Dependencies:** requires `pip install httpx[http2]` if HTTP2 is needed **Twitter handle:** https://twitter.com/infinispan
This commit is contained in:
parent
ff925d2ddc
commit
7da2efd9d3
@ -5,9 +5,10 @@ from __future__ import annotations
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
from typing import Any, Iterable, List, Optional, Tuple, Type, cast
|
||||
import warnings
|
||||
from typing import Any, Iterable, List, Optional, Tuple, Type, Union, cast
|
||||
|
||||
import requests
|
||||
from httpx import Response
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.vectorstores import VectorStore
|
||||
@ -49,7 +50,7 @@ class InfinispanVS(VectorStore):
|
||||
embedding=RGBEmbeddings(),
|
||||
output_fields: ["texture", "color"],
|
||||
lambda_key: lambda text,meta: str(meta["_key"]),
|
||||
lambda_content: lambda item: item["color"]})
|
||||
lambda_content: lambda item: item["color"])
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@ -58,13 +59,48 @@ class InfinispanVS(VectorStore):
|
||||
ids: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
cache_name: str
|
||||
Embeddings cache name. Default "vector"
|
||||
entity_name: str
|
||||
Protobuf entity name for the embeddings. Default "vector"
|
||||
text_field: str
|
||||
Protobuf field name for text. Default "text"
|
||||
vector_field: str
|
||||
Protobuf field name for vector. Default "vector"
|
||||
lambda_content: lambda
|
||||
Lambda returning the content part of an item. Default returns text_field
|
||||
lambda_metadata: lambda
|
||||
Lambda returning the metadata part of an item. Default returns items
|
||||
fields excepts text_field, vector_field, _type
|
||||
output_fields: List[str]
|
||||
List of fields to be returned from item, if None return all fields.
|
||||
Default None
|
||||
kwargs: Any
|
||||
Rest of arguments passed to Infinispan. See docs"""
|
||||
self.ispn = Infinispan(**kwargs)
|
||||
self._configuration = kwargs
|
||||
self._cache_name = str(self._configuration.get("cache_name", "vector"))
|
||||
self._entity_name = str(self._configuration.get("entity_name", "vector"))
|
||||
self._embedding = embedding
|
||||
self._textfield = self._configuration.get("textfield", "text")
|
||||
self._vectorfield = self._configuration.get("vectorfield", "vector")
|
||||
self._textfield = self._configuration.get("textfield", "")
|
||||
if self._textfield == "":
|
||||
self._textfield = self._configuration.get("text_field", "text")
|
||||
else:
|
||||
warnings.warn(
|
||||
"`textfield` is deprecated. Please use `text_field` " "param.",
|
||||
DeprecationWarning,
|
||||
)
|
||||
self._vectorfield = self._configuration.get("vectorfield", "")
|
||||
if self._vectorfield == "":
|
||||
self._vectorfield = self._configuration.get("vector_field", "vector")
|
||||
else:
|
||||
warnings.warn(
|
||||
"`vectorfield` is deprecated. Please use `vector_field` " "param.",
|
||||
DeprecationWarning,
|
||||
)
|
||||
self._to_content = self._configuration.get(
|
||||
"lambda_content", lambda item: self._default_content(item)
|
||||
)
|
||||
@ -121,7 +157,7 @@ repeated float %s = 1;
|
||||
metadata_proto += "}\n"
|
||||
return metadata_proto
|
||||
|
||||
def schema_create(self, proto: str) -> requests.Response:
|
||||
def schema_create(self, proto: str) -> Response:
|
||||
"""Deploy the schema for the vector db
|
||||
Args:
|
||||
proto(str): protobuf schema
|
||||
@ -130,14 +166,14 @@ repeated float %s = 1;
|
||||
"""
|
||||
return self.ispn.schema_post(self._entity_name + ".proto", proto)
|
||||
|
||||
def schema_delete(self) -> requests.Response:
|
||||
def schema_delete(self) -> Response:
|
||||
"""Delete the schema for the vector db
|
||||
Returns:
|
||||
An http Response containing the result of the operation
|
||||
"""
|
||||
return self.ispn.schema_delete(self._entity_name + ".proto")
|
||||
|
||||
def cache_create(self, config: str = "") -> requests.Response:
|
||||
def cache_create(self, config: str = "") -> Response:
|
||||
"""Create the cache for the vector db
|
||||
Args:
|
||||
config(str): configuration of the cache.
|
||||
@ -172,14 +208,14 @@ repeated float %s = 1;
|
||||
)
|
||||
return self.ispn.cache_post(self._cache_name, config)
|
||||
|
||||
def cache_delete(self) -> requests.Response:
|
||||
def cache_delete(self) -> Response:
|
||||
"""Delete the cache for the vector db
|
||||
Returns:
|
||||
An http Response containing the result of the operation
|
||||
"""
|
||||
return self.ispn.cache_delete(self._cache_name)
|
||||
|
||||
def cache_clear(self) -> requests.Response:
|
||||
def cache_clear(self) -> Response:
|
||||
"""Clear the cache for the vector db
|
||||
Returns:
|
||||
An http Response containing the result of the operation
|
||||
@ -193,14 +229,14 @@ repeated float %s = 1;
|
||||
"""
|
||||
return self.ispn.cache_exists(self._cache_name)
|
||||
|
||||
def cache_index_clear(self) -> requests.Response:
|
||||
def cache_index_clear(self) -> Response:
|
||||
"""Clear the index for the vector db
|
||||
Returns:
|
||||
An http Response containing the result of the operation
|
||||
"""
|
||||
return self.ispn.index_clear(self._cache_name)
|
||||
|
||||
def cache_index_reindex(self) -> requests.Response:
|
||||
def cache_index_reindex(self) -> Response:
|
||||
"""Rebuild the for the vector db
|
||||
Returns:
|
||||
An http Response containing the result of the operation
|
||||
@ -325,12 +361,16 @@ repeated float %s = 1;
|
||||
def configure(self, metadata: dict, dimension: int) -> None:
|
||||
schema = self.schema_builder(metadata, dimension)
|
||||
output = self.schema_create(schema)
|
||||
assert output.ok, "Unable to create schema. Already exists? "
|
||||
assert (
|
||||
output.status_code == self.ispn.Codes.OK
|
||||
), "Unable to create schema. Already exists? "
|
||||
"Consider using clear_old=True"
|
||||
assert json.loads(output.text)["error"] is None
|
||||
if not self.cache_exists():
|
||||
output = self.cache_create()
|
||||
assert output.ok, "Unable to create cache. Already exists? "
|
||||
assert (
|
||||
output.status_code == self.ispn.Codes.OK
|
||||
), "Unable to create cache. Already exists? "
|
||||
"Consider using clear_old=True"
|
||||
# Ensure index is clean
|
||||
self.cache_index_clear()
|
||||
@ -350,7 +390,24 @@ repeated float %s = 1;
|
||||
auto_config: Optional[bool] = True,
|
||||
**kwargs: Any,
|
||||
) -> InfinispanVS:
|
||||
"""Return VectorStore initialized from texts and embeddings."""
|
||||
"""Return VectorStore initialized from texts and embeddings.
|
||||
|
||||
In addition to parameters described by the super method, this
|
||||
implementation provides other configuration params if different
|
||||
configuration from default is needed.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ids : List[str]
|
||||
Additional list of keys associated to the embedding. If not
|
||||
provided UUIDs will be generated
|
||||
clear_old : bool
|
||||
Whether old data must be deleted. Default True
|
||||
auto_config: bool
|
||||
Whether to do a complete server setup (caches,
|
||||
protobuf definition...). Default True
|
||||
kwargs: Any
|
||||
Rest of arguments passed to InfinispanVS. See docs"""
|
||||
infinispanvs = cls(embedding=embedding, ids=ids, **kwargs)
|
||||
if auto_config and len(metadatas or []) > 0:
|
||||
if clear_old:
|
||||
@ -381,20 +438,83 @@ class Infinispan:
|
||||
https://github.com/rigazilla/infinispan-vector#run-infinispan
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs: Any):
|
||||
self._configuration = kwargs
|
||||
self._schema = str(self._configuration.get("schema", "http"))
|
||||
self._host = str(self._configuration.get("hosts", ["127.0.0.1:11222"])[0])
|
||||
self._default_node = self._schema + "://" + self._host
|
||||
self._cache_url = str(self._configuration.get("cache_url", "/rest/v2/caches"))
|
||||
self._schema_url = str(self._configuration.get("cache_url", "/rest/v2/schemas"))
|
||||
self._use_post_for_query = str(
|
||||
self._configuration.get("use_post_for_query", True)
|
||||
)
|
||||
def __init__(
|
||||
self,
|
||||
schema: str = "http",
|
||||
user: str = "",
|
||||
password: str = "",
|
||||
hosts: List[str] = ["127.0.0.1:11222"],
|
||||
cache_url: str = "/rest/v2/caches",
|
||||
schema_url: str = "/rest/v2/schemas",
|
||||
use_post_for_query: bool = True,
|
||||
http2: bool = True,
|
||||
verify: bool = True,
|
||||
**kwargs: Any,
|
||||
):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
schema: str
|
||||
Schema for HTTP request: "http" or "https". Default "http"
|
||||
user, password: str
|
||||
User and password if auth is required. Default None
|
||||
hosts: List[str]
|
||||
List of server addresses. Default ["127.0.0.1:11222"]
|
||||
cache_url: str
|
||||
URL endpoint for cache API. Default "/rest/v2/caches"
|
||||
schema_url: str
|
||||
URL endpoint for schema API. Default "/rest/v2/schemas"
|
||||
use_post_for_query: bool
|
||||
Whether POST method should be used for query. Default True
|
||||
http2: bool
|
||||
Whether HTTP/2 protocol should be used. `pip install "httpx[http2]"` is
|
||||
needed for HTTP/2. Default True
|
||||
verify: bool
|
||||
Whether TLS certificate must be verified. Default True
|
||||
"""
|
||||
|
||||
def req_query(
|
||||
self, query: str, cache_name: str, local: bool = False
|
||||
) -> requests.Response:
|
||||
try:
|
||||
import httpx
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import httpx python package. "
|
||||
"Please install it with `pip install httpx`"
|
||||
'or `pip install "httpx[http2]"` if you need HTTP/2.'
|
||||
)
|
||||
|
||||
self.Codes = httpx.codes
|
||||
|
||||
self._configuration = kwargs
|
||||
self._schema = schema
|
||||
self._user = user
|
||||
self._password = password
|
||||
self._host = hosts[0]
|
||||
self._default_node = self._schema + "://" + self._host
|
||||
self._cache_url = cache_url
|
||||
self._schema_url = schema_url
|
||||
self._use_post_for_query = use_post_for_query
|
||||
self._http2 = http2
|
||||
if self._user and self._password:
|
||||
if self._schema == "http":
|
||||
auth: Union[Tuple[str, str], httpx.DigestAuth] = httpx.DigestAuth(
|
||||
username=self._user, password=self._password
|
||||
)
|
||||
else:
|
||||
auth = (self._user, self._password)
|
||||
self._h2c = httpx.Client(
|
||||
http2=self._http2,
|
||||
http1=not self._http2,
|
||||
auth=auth,
|
||||
verify=verify,
|
||||
)
|
||||
else:
|
||||
self._h2c = httpx.Client(
|
||||
http2=self._http2,
|
||||
http1=not self._http2,
|
||||
verify=verify,
|
||||
)
|
||||
|
||||
def req_query(self, query: str, cache_name: str, local: bool = False) -> Response:
|
||||
"""Request a query
|
||||
Args:
|
||||
query(str): query requested
|
||||
@ -409,7 +529,7 @@ class Infinispan:
|
||||
|
||||
def _query_post(
|
||||
self, query_str: str, cache_name: str, local: bool = False
|
||||
) -> requests.Response:
|
||||
) -> Response:
|
||||
api_url = (
|
||||
self._default_node
|
||||
+ self._cache_url
|
||||
@ -420,9 +540,9 @@ class Infinispan:
|
||||
)
|
||||
data = {"query": query_str}
|
||||
data_json = json.dumps(data)
|
||||
response = requests.post(
|
||||
response = self._h2c.post(
|
||||
api_url,
|
||||
data_json,
|
||||
content=data_json,
|
||||
headers={"Content-Type": "application/json"},
|
||||
timeout=REST_TIMEOUT,
|
||||
)
|
||||
@ -430,7 +550,7 @@ class Infinispan:
|
||||
|
||||
def _query_get(
|
||||
self, query_str: str, cache_name: str, local: bool = False
|
||||
) -> requests.Response:
|
||||
) -> Response:
|
||||
api_url = (
|
||||
self._default_node
|
||||
+ self._cache_url
|
||||
@ -441,10 +561,10 @@ class Infinispan:
|
||||
+ "&local="
|
||||
+ str(local)
|
||||
)
|
||||
response = requests.get(api_url, timeout=REST_TIMEOUT)
|
||||
response = self._h2c.get(api_url, timeout=REST_TIMEOUT)
|
||||
return response
|
||||
|
||||
def post(self, key: str, data: str, cache_name: str) -> requests.Response:
|
||||
def post(self, key: str, data: str, cache_name: str) -> Response:
|
||||
"""Post an entry
|
||||
Args:
|
||||
key(str): key of the entry
|
||||
@ -454,15 +574,15 @@ class Infinispan:
|
||||
An http Response containing the result of the operation
|
||||
"""
|
||||
api_url = self._default_node + self._cache_url + "/" + cache_name + "/" + key
|
||||
response = requests.post(
|
||||
response = self._h2c.post(
|
||||
api_url,
|
||||
data,
|
||||
content=data,
|
||||
headers={"Content-Type": "application/json"},
|
||||
timeout=REST_TIMEOUT,
|
||||
)
|
||||
return response
|
||||
|
||||
def put(self, key: str, data: str, cache_name: str) -> requests.Response:
|
||||
def put(self, key: str, data: str, cache_name: str) -> Response:
|
||||
"""Put an entry
|
||||
Args:
|
||||
key(str): key of the entry
|
||||
@ -472,15 +592,15 @@ class Infinispan:
|
||||
An http Response containing the result of the operation
|
||||
"""
|
||||
api_url = self._default_node + self._cache_url + "/" + cache_name + "/" + key
|
||||
response = requests.put(
|
||||
response = self._h2c.put(
|
||||
api_url,
|
||||
data,
|
||||
content=data,
|
||||
headers={"Content-Type": "application/json"},
|
||||
timeout=REST_TIMEOUT,
|
||||
)
|
||||
return response
|
||||
|
||||
def get(self, key: str, cache_name: str) -> requests.Response:
|
||||
def get(self, key: str, cache_name: str) -> Response:
|
||||
"""Get an entry
|
||||
Args:
|
||||
key(str): key of the entry
|
||||
@ -489,12 +609,12 @@ class Infinispan:
|
||||
An http Response containing the entry or errors
|
||||
"""
|
||||
api_url = self._default_node + self._cache_url + "/" + cache_name + "/" + key
|
||||
response = requests.get(
|
||||
response = self._h2c.get(
|
||||
api_url, headers={"Content-Type": "application/json"}, timeout=REST_TIMEOUT
|
||||
)
|
||||
return response
|
||||
|
||||
def schema_post(self, name: str, proto: str) -> requests.Response:
|
||||
def schema_post(self, name: str, proto: str) -> Response:
|
||||
"""Deploy a schema
|
||||
Args:
|
||||
name(str): name of the schema. Will be used as a key
|
||||
@ -503,10 +623,10 @@ class Infinispan:
|
||||
An http Response containing the result of the operation
|
||||
"""
|
||||
api_url = self._default_node + self._schema_url + "/" + name
|
||||
response = requests.post(api_url, proto, timeout=REST_TIMEOUT)
|
||||
response = self._h2c.post(api_url, content=proto, timeout=REST_TIMEOUT)
|
||||
return response
|
||||
|
||||
def cache_post(self, name: str, config: str) -> requests.Response:
|
||||
def cache_post(self, name: str, config: str) -> Response:
|
||||
"""Create a cache
|
||||
Args:
|
||||
name(str): name of the cache.
|
||||
@ -515,15 +635,15 @@ class Infinispan:
|
||||
An http Response containing the result of the operation
|
||||
"""
|
||||
api_url = self._default_node + self._cache_url + "/" + name
|
||||
response = requests.post(
|
||||
response = self._h2c.post(
|
||||
api_url,
|
||||
config,
|
||||
content=config,
|
||||
headers={"Content-Type": "application/json"},
|
||||
timeout=REST_TIMEOUT,
|
||||
)
|
||||
return response
|
||||
|
||||
def schema_delete(self, name: str) -> requests.Response:
|
||||
def schema_delete(self, name: str) -> Response:
|
||||
"""Delete a schema
|
||||
Args:
|
||||
name(str): name of the schema.
|
||||
@ -531,10 +651,10 @@ class Infinispan:
|
||||
An http Response containing the result of the operation
|
||||
"""
|
||||
api_url = self._default_node + self._schema_url + "/" + name
|
||||
response = requests.delete(api_url, timeout=REST_TIMEOUT)
|
||||
response = self._h2c.delete(api_url, timeout=REST_TIMEOUT)
|
||||
return response
|
||||
|
||||
def cache_delete(self, name: str) -> requests.Response:
|
||||
def cache_delete(self, name: str) -> Response:
|
||||
"""Delete a cache
|
||||
Args:
|
||||
name(str): name of the cache.
|
||||
@ -542,10 +662,10 @@ class Infinispan:
|
||||
An http Response containing the result of the operation
|
||||
"""
|
||||
api_url = self._default_node + self._cache_url + "/" + name
|
||||
response = requests.delete(api_url, timeout=REST_TIMEOUT)
|
||||
response = self._h2c.delete(api_url, timeout=REST_TIMEOUT)
|
||||
return response
|
||||
|
||||
def cache_clear(self, cache_name: str) -> requests.Response:
|
||||
def cache_clear(self, cache_name: str) -> Response:
|
||||
"""Clear a cache
|
||||
Args:
|
||||
cache_name(str): name of the cache.
|
||||
@ -555,7 +675,7 @@ class Infinispan:
|
||||
api_url = (
|
||||
self._default_node + self._cache_url + "/" + cache_name + "?action=clear"
|
||||
)
|
||||
response = requests.post(api_url, timeout=REST_TIMEOUT)
|
||||
response = self._h2c.post(api_url, timeout=REST_TIMEOUT)
|
||||
return response
|
||||
|
||||
def cache_exists(self, cache_name: str) -> bool:
|
||||
@ -570,18 +690,17 @@ class Infinispan:
|
||||
)
|
||||
return self.resource_exists(api_url)
|
||||
|
||||
@staticmethod
|
||||
def resource_exists(api_url: str) -> bool:
|
||||
def resource_exists(self, api_url: str) -> bool:
|
||||
"""Check if a resource exists
|
||||
Args:
|
||||
api_url(str): url of the resource.
|
||||
Returns:
|
||||
true if resource exists
|
||||
"""
|
||||
response = requests.head(api_url, timeout=REST_TIMEOUT)
|
||||
return response.ok
|
||||
response = self._h2c.head(api_url, timeout=REST_TIMEOUT)
|
||||
return response.status_code == self.Codes.OK
|
||||
|
||||
def index_clear(self, cache_name: str) -> requests.Response:
|
||||
def index_clear(self, cache_name: str) -> Response:
|
||||
"""Clear an index on a cache
|
||||
Args:
|
||||
cache_name(str): name of the cache.
|
||||
@ -595,9 +714,9 @@ class Infinispan:
|
||||
+ cache_name
|
||||
+ "/search/indexes?action=clear"
|
||||
)
|
||||
return requests.post(api_url, timeout=REST_TIMEOUT)
|
||||
return self._h2c.post(api_url, timeout=REST_TIMEOUT)
|
||||
|
||||
def index_reindex(self, cache_name: str) -> requests.Response:
|
||||
def index_reindex(self, cache_name: str) -> Response:
|
||||
"""Rebuild index on a cache
|
||||
Args:
|
||||
cache_name(str): name of the cache.
|
||||
@ -611,4 +730,4 @@ class Infinispan:
|
||||
+ cache_name
|
||||
+ "/search/indexes?action=reindex"
|
||||
)
|
||||
return requests.post(api_url, timeout=REST_TIMEOUT)
|
||||
return self._h2c.post(api_url, timeout=REST_TIMEOUT)
|
||||
|
@ -0,0 +1,4 @@
|
||||
#/bin/sh
|
||||
|
||||
cd infinispan
|
||||
docker compose up
|
@ -0,0 +1,2 @@
|
||||
#Fri May 03 10:19:58 CEST 2024
|
||||
user=ADMIN,admin
|
@ -0,0 +1,62 @@
|
||||
<infinispan
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="urn:infinispan:config:15.0 https://infinispan.org/schemas/infinispan-config-15.0.xsd
|
||||
urn:infinispan:server:15.0 https://infinispan.org/schemas/infinispan-server-15.0.xsd"
|
||||
xmlns="urn:infinispan:config:15.0"
|
||||
xmlns:server="urn:infinispan:server:15.0">
|
||||
|
||||
<cache-container name="default" statistics="true">
|
||||
<transport cluster="${infinispan.cluster.name:cluster}" stack="${infinispan.cluster.stack:tcp}" node-name="${infinispan.node.name:}"/>
|
||||
</cache-container>
|
||||
|
||||
<server xmlns="urn:infinispan:server:15.0">
|
||||
<interfaces>
|
||||
<interface name="public">
|
||||
<inet-address value="${infinispan.bind.address:127.0.0.1}"/>
|
||||
</interface>
|
||||
</interfaces>
|
||||
|
||||
<socket-bindings default-interface="public" port-offset="${infinispan.socket.binding.port-offset:0}">
|
||||
<socket-binding name="default" port="${infinispan.bind.port:11222}"/>
|
||||
<socket-binding name="authenticated" port="11232"/>
|
||||
<socket-binding name="auth-tls" port="11242"/>
|
||||
</socket-bindings>
|
||||
|
||||
<security>
|
||||
<credential-stores>
|
||||
<credential-store name="credentials" path="credentials.pfx">
|
||||
<clear-text-credential clear-text="secret"/>
|
||||
</credential-store>
|
||||
</credential-stores>
|
||||
<security-realms>
|
||||
<security-realm name="default">
|
||||
<properties-realm groups-attribute="Roles">
|
||||
<user-properties path="/user-config/users.properties"/>
|
||||
<group-properties path="/user-config/groups.properties"/>
|
||||
</properties-realm>
|
||||
</security-realm>
|
||||
<security-realm name="tls">
|
||||
<!-- Uncomment to enable TLS on the realm -->
|
||||
<server-identities>
|
||||
<ssl>
|
||||
<keystore path="application.keystore"
|
||||
password="password" alias="server"
|
||||
generate-self-signed-certificate-host="localhost"/>
|
||||
|
||||
</ssl>
|
||||
</server-identities>
|
||||
<properties-realm groups-attribute="Roles">
|
||||
<user-properties path="/user-config/users.properties"/>
|
||||
<group-properties path="/user-config/groups.properties"/>
|
||||
</properties-realm>
|
||||
</security-realm>
|
||||
</security-realms>
|
||||
</security>
|
||||
|
||||
<endpoints>
|
||||
<endpoint socket-binding="default"/>
|
||||
<endpoint socket-binding="authenticated" security-realm="default"/>
|
||||
<endpoint socket-binding="auth-tls" security-realm="tls"/>
|
||||
</endpoints>
|
||||
</server>
|
||||
</infinispan>
|
@ -0,0 +1,4 @@
|
||||
#$REALM_NAME=default$
|
||||
#$ALGORITHM=encrypted$
|
||||
#Fri May 03 10:19:58 CEST 2024
|
||||
user=scram-sha-1\:BYGcIAws2gznU/kpezoSb1VQNVd+YMX9r+9SAINFoZtPHaHTAQ\=\=;scram-sha-256\:BYGcIAwRiWiD+8f7dyQEs1Wsum/64MOcjGJ2UcmZFQB6DZJqwRDJ4NrvII4NttmxlA\=\=;scram-sha-384\:BYGcIAz+Eud65N8GWK4TMwhSCZpeE5EFSdynywdryQj3ZwBEgv+KF8hRUuGxiq3EyRxsby6w7DHK3CICGZLsPrM\=;scram-sha-512\:BYGcIAwWxVY9DHn42kHydivyU3s9LSPmyfPPJkIFYyt/XsMASFHGoy5rzk4ahX4HjpJgb+NjdCwhGfi33CY0azUIrn439s62Yg5mq9i+ISto;digest-md5\:AgR1c2VyB2RlZmF1bHSYYyzPjRDR7MhrsdFSK03P;digest-sha\:AgR1c2VyB2RlZmF1bHTga5gDNnNYh7/2HqhBVOdUHjBzhw\=\=;digest-sha-256\:AgR1c2VyB2RlZmF1bHTig5qZQIxqtJBTUp3EMh5UIFoS4qOhz9Uk5aOW9ZKCfw\=\=;digest-sha-384\:AgR1c2VyB2RlZmF1bHT01pAN/pRMLS5afm4Q9S0kuLlA0NokuP8F0AISTwXCb1E8RMsFHlBVPOa5rC6Nyso\=;digest-sha-512\:AgR1c2VyB2RlZmF1bHTi+cHn1Ez2Ze41CvPXb9eP/7JmRys7m1f5qPMQWhAmDOuuUXNWEG4yKSI9k2EZgQvMKTd5hDbR24ul1BsYP8X5;
|
@ -0,0 +1,16 @@
|
||||
version: "3.7"
|
||||
|
||||
services:
|
||||
infinispan:
|
||||
image: quay.io/infinispan/server:15.0
|
||||
ports:
|
||||
- '11222:11222'
|
||||
- '11232:11232'
|
||||
- '11242:11242'
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 25Gb
|
||||
volumes:
|
||||
- ./conf:/user-config
|
||||
command: -c /user-config/infinispan.xml
|
@ -1,7 +1,9 @@
|
||||
"""Test Infinispan functionality."""
|
||||
|
||||
import warnings
|
||||
from typing import Any, List, Optional
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -11,9 +13,18 @@ from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||
fake_texts,
|
||||
)
|
||||
|
||||
"""
|
||||
cd tests/integration_tests/vectorstores/docker-compose
|
||||
./infinispan.sh
|
||||
|
||||
def _infinispan_setup_noautoconf() -> None:
|
||||
ispnvs = InfinispanVS(auto_config=False)
|
||||
Current Infinispan implementation relies on httpx: `pip install "httpx[http2]"`
|
||||
if not installed. HTTP/2 is enable by default, if it's not
|
||||
wanted use `pip install "httpx"`.
|
||||
"""
|
||||
|
||||
|
||||
def _infinispan_setup_noautoconf(**kwargs: Any) -> None:
|
||||
ispnvs = InfinispanVS(http2=_hasHttp2(), auto_config=False, **kwargs)
|
||||
ispnvs.cache_delete()
|
||||
ispnvs.schema_delete()
|
||||
proto = """
|
||||
@ -54,64 +65,104 @@ def _infinispanvs_from_texts(
|
||||
ids=ids,
|
||||
clear_old=clear_old,
|
||||
auto_config=auto_config,
|
||||
http2=_hasHttp2(),
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def _hasHttp2() -> bool:
|
||||
try:
|
||||
httpx.Client(http2=True)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
@pytest.mark.parametrize("autoconfig", [False, True])
|
||||
@pytest.mark.parametrize(
|
||||
"conn_opts",
|
||||
[
|
||||
{},
|
||||
{
|
||||
"user": "user",
|
||||
"password": "password",
|
||||
"hosts": ["localhost:11232"],
|
||||
"schema": "http",
|
||||
},
|
||||
{
|
||||
"user": "user",
|
||||
"password": "password",
|
||||
"hosts": ["localhost:11242"],
|
||||
"schema": "https",
|
||||
"verify": False,
|
||||
},
|
||||
],
|
||||
)
|
||||
class TestBasic:
|
||||
def test_infinispan(self, autoconfig: bool) -> None:
|
||||
def test_infinispan(self, autoconfig: bool, conn_opts: dict) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
if not autoconfig:
|
||||
_infinispan_setup_noautoconf()
|
||||
docsearch = _infinispanvs_from_texts(auto_config=autoconfig)
|
||||
_infinispan_setup_noautoconf(**conn_opts)
|
||||
docsearch = _infinispanvs_from_texts(auto_config=autoconfig, **conn_opts)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
def test_infinispan_with_metadata(self, autoconfig: bool) -> None:
|
||||
def test_infinispan_with_auth(self, autoconfig: bool, conn_opts: dict) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
if not autoconfig:
|
||||
_infinispan_setup_noautoconf(**conn_opts)
|
||||
docsearch = _infinispanvs_from_texts(auto_config=autoconfig, **conn_opts)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
def test_infinispan_with_metadata(self, autoconfig: bool, conn_opts: dict) -> None:
|
||||
"""Test with metadata"""
|
||||
if not autoconfig:
|
||||
_infinispan_setup_noautoconf()
|
||||
_infinispan_setup_noautoconf(**conn_opts)
|
||||
meta = []
|
||||
for _ in range(len(fake_texts)):
|
||||
meta.append({"label": "test"})
|
||||
docsearch = _infinispanvs_from_texts(metadatas=meta, auto_config=autoconfig)
|
||||
docsearch = _infinispanvs_from_texts(
|
||||
metadatas=meta, auto_config=autoconfig, **conn_opts
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo", metadata={"label": "test"})]
|
||||
|
||||
def test_infinispan_with_metadata_with_output_fields(
|
||||
self, autoconfig: bool
|
||||
self, autoconfig: bool, conn_opts: dict
|
||||
) -> None:
|
||||
"""Test with metadata"""
|
||||
if not autoconfig:
|
||||
_infinispan_setup_noautoconf()
|
||||
_infinispan_setup_noautoconf(**conn_opts)
|
||||
metadatas = [
|
||||
{"page": i, "label": "label" + str(i)} for i in range(len(fake_texts))
|
||||
]
|
||||
c = {"output_fields": ["label", "page", "text"]}
|
||||
docsearch = _infinispanvs_from_texts(
|
||||
metadatas=metadatas, configuration=c, auto_config=autoconfig
|
||||
metadatas=metadatas, configuration=c, auto_config=autoconfig, **conn_opts
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [
|
||||
Document(page_content="foo", metadata={"label": "label0", "page": 0})
|
||||
]
|
||||
|
||||
def test_infinispanvs_with_id(self, autoconfig: bool) -> None:
|
||||
def test_infinispanvs_with_id(self, autoconfig: bool, conn_opts: dict) -> None:
|
||||
"""Test with ids"""
|
||||
ids = ["id_" + str(i) for i in range(len(fake_texts))]
|
||||
docsearch = _infinispanvs_from_texts(ids=ids, auto_config=autoconfig)
|
||||
docsearch = _infinispanvs_from_texts(
|
||||
ids=ids, auto_config=autoconfig, **conn_opts
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
def test_infinispan_with_score(self, autoconfig: bool) -> None:
|
||||
def test_infinispan_with_score(self, autoconfig: bool, conn_opts: dict) -> None:
|
||||
"""Test end to end construction and search with scores and IDs."""
|
||||
if not autoconfig:
|
||||
_infinispan_setup_noautoconf()
|
||||
_infinispan_setup_noautoconf(**conn_opts)
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = _infinispanvs_from_texts(
|
||||
metadatas=metadatas, auto_config=autoconfig
|
||||
metadatas=metadatas, auto_config=autoconfig, **conn_opts
|
||||
)
|
||||
output = docsearch.similarity_search_with_score("foo", k=3)
|
||||
docs = [o[0] for o in output]
|
||||
@ -123,14 +174,14 @@ class TestBasic:
|
||||
]
|
||||
assert scores[0] >= scores[1] >= scores[2]
|
||||
|
||||
def test_infinispan_add_texts(self, autoconfig: bool) -> None:
|
||||
def test_infinispan_add_texts(self, autoconfig: bool, conn_opts: dict) -> None:
|
||||
"""Test end to end construction and MRR search."""
|
||||
if not autoconfig:
|
||||
_infinispan_setup_noautoconf()
|
||||
_infinispan_setup_noautoconf(**conn_opts)
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = _infinispanvs_from_texts(
|
||||
metadatas=metadatas, auto_config=autoconfig
|
||||
metadatas=metadatas, auto_config=autoconfig, **conn_opts
|
||||
)
|
||||
|
||||
docsearch.add_texts(texts, metadatas)
|
||||
@ -138,19 +189,22 @@ class TestBasic:
|
||||
output = docsearch.similarity_search("foo", k=10)
|
||||
assert len(output) == 6
|
||||
|
||||
def test_infinispan_no_clear_old(self, autoconfig: bool) -> None:
|
||||
def test_infinispan_no_clear_old(self, autoconfig: bool, conn_opts: dict) -> None:
|
||||
"""Test end to end construction and MRR search."""
|
||||
if not autoconfig:
|
||||
_infinispan_setup_noautoconf()
|
||||
_infinispan_setup_noautoconf(**conn_opts)
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = _infinispanvs_from_texts(
|
||||
metadatas=metadatas, auto_config=autoconfig
|
||||
metadatas=metadatas, auto_config=autoconfig, **conn_opts
|
||||
)
|
||||
del docsearch
|
||||
try:
|
||||
docsearch = _infinispanvs_from_texts(
|
||||
metadatas=metadatas, clear_old=False, auto_config=autoconfig
|
||||
metadatas=metadatas,
|
||||
clear_old=False,
|
||||
auto_config=autoconfig,
|
||||
**conn_opts,
|
||||
)
|
||||
except AssertionError:
|
||||
if autoconfig:
|
||||
@ -159,3 +213,12 @@ class TestBasic:
|
||||
raise
|
||||
output = docsearch.similarity_search("foo", k=10)
|
||||
assert len(output) == 6
|
||||
|
||||
|
||||
class TestHttp2:
|
||||
def test_http2(self) -> None:
|
||||
try:
|
||||
httpx.Client(http2=True)
|
||||
except Exception:
|
||||
warnings.warn('pip install "httpx[http2]" if you need HTTP/2')
|
||||
pass
|
||||
|
Loading…
Reference in New Issue
Block a user