community[minor]: VectorStore Infinispan. Adding TLS and authentication (#23522)

**Description**:
this PR enable VectorStore TLS and authentication (digest, basic) with
HTTP/2 for Infinispan server.
Based on httpx.

Added docker-compose facilities for testing
Added documentation

**Dependencies:**
requires `pip install httpx[http2]` if HTTP2 is needed

**Twitter handle:**
https://twitter.com/infinispan
This commit is contained in:
Vittorio Rigamonti 2024-10-09 16:51:39 +02:00 committed by GitHub
parent ff925d2ddc
commit 7da2efd9d3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 353 additions and 83 deletions

View File

@ -5,9 +5,10 @@ from __future__ import annotations
import json
import logging
import uuid
from typing import Any, Iterable, List, Optional, Tuple, Type, cast
import warnings
from typing import Any, Iterable, List, Optional, Tuple, Type, Union, cast
import requests
from httpx import Response
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_core.vectorstores import VectorStore
@ -49,7 +50,7 @@ class InfinispanVS(VectorStore):
embedding=RGBEmbeddings(),
output_fields: ["texture", "color"],
lambda_key: lambda text,meta: str(meta["_key"]),
lambda_content: lambda item: item["color"]})
lambda_content: lambda item: item["color"])
"""
def __init__(
@ -58,13 +59,48 @@ class InfinispanVS(VectorStore):
ids: Optional[List[str]] = None,
**kwargs: Any,
):
"""
Parameters
----------
cache_name: str
Embeddings cache name. Default "vector"
entity_name: str
Protobuf entity name for the embeddings. Default "vector"
text_field: str
Protobuf field name for text. Default "text"
vector_field: str
Protobuf field name for vector. Default "vector"
lambda_content: lambda
Lambda returning the content part of an item. Default returns text_field
lambda_metadata: lambda
Lambda returning the metadata part of an item. Default returns items
fields excepts text_field, vector_field, _type
output_fields: List[str]
List of fields to be returned from item, if None return all fields.
Default None
kwargs: Any
Rest of arguments passed to Infinispan. See docs"""
self.ispn = Infinispan(**kwargs)
self._configuration = kwargs
self._cache_name = str(self._configuration.get("cache_name", "vector"))
self._entity_name = str(self._configuration.get("entity_name", "vector"))
self._embedding = embedding
self._textfield = self._configuration.get("textfield", "text")
self._vectorfield = self._configuration.get("vectorfield", "vector")
self._textfield = self._configuration.get("textfield", "")
if self._textfield == "":
self._textfield = self._configuration.get("text_field", "text")
else:
warnings.warn(
"`textfield` is deprecated. Please use `text_field` " "param.",
DeprecationWarning,
)
self._vectorfield = self._configuration.get("vectorfield", "")
if self._vectorfield == "":
self._vectorfield = self._configuration.get("vector_field", "vector")
else:
warnings.warn(
"`vectorfield` is deprecated. Please use `vector_field` " "param.",
DeprecationWarning,
)
self._to_content = self._configuration.get(
"lambda_content", lambda item: self._default_content(item)
)
@ -121,7 +157,7 @@ repeated float %s = 1;
metadata_proto += "}\n"
return metadata_proto
def schema_create(self, proto: str) -> requests.Response:
def schema_create(self, proto: str) -> Response:
"""Deploy the schema for the vector db
Args:
proto(str): protobuf schema
@ -130,14 +166,14 @@ repeated float %s = 1;
"""
return self.ispn.schema_post(self._entity_name + ".proto", proto)
def schema_delete(self) -> requests.Response:
def schema_delete(self) -> Response:
"""Delete the schema for the vector db
Returns:
An http Response containing the result of the operation
"""
return self.ispn.schema_delete(self._entity_name + ".proto")
def cache_create(self, config: str = "") -> requests.Response:
def cache_create(self, config: str = "") -> Response:
"""Create the cache for the vector db
Args:
config(str): configuration of the cache.
@ -172,14 +208,14 @@ repeated float %s = 1;
)
return self.ispn.cache_post(self._cache_name, config)
def cache_delete(self) -> requests.Response:
def cache_delete(self) -> Response:
"""Delete the cache for the vector db
Returns:
An http Response containing the result of the operation
"""
return self.ispn.cache_delete(self._cache_name)
def cache_clear(self) -> requests.Response:
def cache_clear(self) -> Response:
"""Clear the cache for the vector db
Returns:
An http Response containing the result of the operation
@ -193,14 +229,14 @@ repeated float %s = 1;
"""
return self.ispn.cache_exists(self._cache_name)
def cache_index_clear(self) -> requests.Response:
def cache_index_clear(self) -> Response:
"""Clear the index for the vector db
Returns:
An http Response containing the result of the operation
"""
return self.ispn.index_clear(self._cache_name)
def cache_index_reindex(self) -> requests.Response:
def cache_index_reindex(self) -> Response:
"""Rebuild the for the vector db
Returns:
An http Response containing the result of the operation
@ -325,12 +361,16 @@ repeated float %s = 1;
def configure(self, metadata: dict, dimension: int) -> None:
schema = self.schema_builder(metadata, dimension)
output = self.schema_create(schema)
assert output.ok, "Unable to create schema. Already exists? "
assert (
output.status_code == self.ispn.Codes.OK
), "Unable to create schema. Already exists? "
"Consider using clear_old=True"
assert json.loads(output.text)["error"] is None
if not self.cache_exists():
output = self.cache_create()
assert output.ok, "Unable to create cache. Already exists? "
assert (
output.status_code == self.ispn.Codes.OK
), "Unable to create cache. Already exists? "
"Consider using clear_old=True"
# Ensure index is clean
self.cache_index_clear()
@ -350,7 +390,24 @@ repeated float %s = 1;
auto_config: Optional[bool] = True,
**kwargs: Any,
) -> InfinispanVS:
"""Return VectorStore initialized from texts and embeddings."""
"""Return VectorStore initialized from texts and embeddings.
In addition to parameters described by the super method, this
implementation provides other configuration params if different
configuration from default is needed.
Parameters
----------
ids : List[str]
Additional list of keys associated to the embedding. If not
provided UUIDs will be generated
clear_old : bool
Whether old data must be deleted. Default True
auto_config: bool
Whether to do a complete server setup (caches,
protobuf definition...). Default True
kwargs: Any
Rest of arguments passed to InfinispanVS. See docs"""
infinispanvs = cls(embedding=embedding, ids=ids, **kwargs)
if auto_config and len(metadatas or []) > 0:
if clear_old:
@ -381,20 +438,83 @@ class Infinispan:
https://github.com/rigazilla/infinispan-vector#run-infinispan
"""
def __init__(self, **kwargs: Any):
self._configuration = kwargs
self._schema = str(self._configuration.get("schema", "http"))
self._host = str(self._configuration.get("hosts", ["127.0.0.1:11222"])[0])
self._default_node = self._schema + "://" + self._host
self._cache_url = str(self._configuration.get("cache_url", "/rest/v2/caches"))
self._schema_url = str(self._configuration.get("cache_url", "/rest/v2/schemas"))
self._use_post_for_query = str(
self._configuration.get("use_post_for_query", True)
)
def __init__(
self,
schema: str = "http",
user: str = "",
password: str = "",
hosts: List[str] = ["127.0.0.1:11222"],
cache_url: str = "/rest/v2/caches",
schema_url: str = "/rest/v2/schemas",
use_post_for_query: bool = True,
http2: bool = True,
verify: bool = True,
**kwargs: Any,
):
"""
Parameters
----------
schema: str
Schema for HTTP request: "http" or "https". Default "http"
user, password: str
User and password if auth is required. Default None
hosts: List[str]
List of server addresses. Default ["127.0.0.1:11222"]
cache_url: str
URL endpoint for cache API. Default "/rest/v2/caches"
schema_url: str
URL endpoint for schema API. Default "/rest/v2/schemas"
use_post_for_query: bool
Whether POST method should be used for query. Default True
http2: bool
Whether HTTP/2 protocol should be used. `pip install "httpx[http2]"` is
needed for HTTP/2. Default True
verify: bool
Whether TLS certificate must be verified. Default True
"""
def req_query(
self, query: str, cache_name: str, local: bool = False
) -> requests.Response:
try:
import httpx
except ImportError:
raise ImportError(
"Could not import httpx python package. "
"Please install it with `pip install httpx`"
'or `pip install "httpx[http2]"` if you need HTTP/2.'
)
self.Codes = httpx.codes
self._configuration = kwargs
self._schema = schema
self._user = user
self._password = password
self._host = hosts[0]
self._default_node = self._schema + "://" + self._host
self._cache_url = cache_url
self._schema_url = schema_url
self._use_post_for_query = use_post_for_query
self._http2 = http2
if self._user and self._password:
if self._schema == "http":
auth: Union[Tuple[str, str], httpx.DigestAuth] = httpx.DigestAuth(
username=self._user, password=self._password
)
else:
auth = (self._user, self._password)
self._h2c = httpx.Client(
http2=self._http2,
http1=not self._http2,
auth=auth,
verify=verify,
)
else:
self._h2c = httpx.Client(
http2=self._http2,
http1=not self._http2,
verify=verify,
)
def req_query(self, query: str, cache_name: str, local: bool = False) -> Response:
"""Request a query
Args:
query(str): query requested
@ -409,7 +529,7 @@ class Infinispan:
def _query_post(
self, query_str: str, cache_name: str, local: bool = False
) -> requests.Response:
) -> Response:
api_url = (
self._default_node
+ self._cache_url
@ -420,9 +540,9 @@ class Infinispan:
)
data = {"query": query_str}
data_json = json.dumps(data)
response = requests.post(
response = self._h2c.post(
api_url,
data_json,
content=data_json,
headers={"Content-Type": "application/json"},
timeout=REST_TIMEOUT,
)
@ -430,7 +550,7 @@ class Infinispan:
def _query_get(
self, query_str: str, cache_name: str, local: bool = False
) -> requests.Response:
) -> Response:
api_url = (
self._default_node
+ self._cache_url
@ -441,10 +561,10 @@ class Infinispan:
+ "&local="
+ str(local)
)
response = requests.get(api_url, timeout=REST_TIMEOUT)
response = self._h2c.get(api_url, timeout=REST_TIMEOUT)
return response
def post(self, key: str, data: str, cache_name: str) -> requests.Response:
def post(self, key: str, data: str, cache_name: str) -> Response:
"""Post an entry
Args:
key(str): key of the entry
@ -454,15 +574,15 @@ class Infinispan:
An http Response containing the result of the operation
"""
api_url = self._default_node + self._cache_url + "/" + cache_name + "/" + key
response = requests.post(
response = self._h2c.post(
api_url,
data,
content=data,
headers={"Content-Type": "application/json"},
timeout=REST_TIMEOUT,
)
return response
def put(self, key: str, data: str, cache_name: str) -> requests.Response:
def put(self, key: str, data: str, cache_name: str) -> Response:
"""Put an entry
Args:
key(str): key of the entry
@ -472,15 +592,15 @@ class Infinispan:
An http Response containing the result of the operation
"""
api_url = self._default_node + self._cache_url + "/" + cache_name + "/" + key
response = requests.put(
response = self._h2c.put(
api_url,
data,
content=data,
headers={"Content-Type": "application/json"},
timeout=REST_TIMEOUT,
)
return response
def get(self, key: str, cache_name: str) -> requests.Response:
def get(self, key: str, cache_name: str) -> Response:
"""Get an entry
Args:
key(str): key of the entry
@ -489,12 +609,12 @@ class Infinispan:
An http Response containing the entry or errors
"""
api_url = self._default_node + self._cache_url + "/" + cache_name + "/" + key
response = requests.get(
response = self._h2c.get(
api_url, headers={"Content-Type": "application/json"}, timeout=REST_TIMEOUT
)
return response
def schema_post(self, name: str, proto: str) -> requests.Response:
def schema_post(self, name: str, proto: str) -> Response:
"""Deploy a schema
Args:
name(str): name of the schema. Will be used as a key
@ -503,10 +623,10 @@ class Infinispan:
An http Response containing the result of the operation
"""
api_url = self._default_node + self._schema_url + "/" + name
response = requests.post(api_url, proto, timeout=REST_TIMEOUT)
response = self._h2c.post(api_url, content=proto, timeout=REST_TIMEOUT)
return response
def cache_post(self, name: str, config: str) -> requests.Response:
def cache_post(self, name: str, config: str) -> Response:
"""Create a cache
Args:
name(str): name of the cache.
@ -515,15 +635,15 @@ class Infinispan:
An http Response containing the result of the operation
"""
api_url = self._default_node + self._cache_url + "/" + name
response = requests.post(
response = self._h2c.post(
api_url,
config,
content=config,
headers={"Content-Type": "application/json"},
timeout=REST_TIMEOUT,
)
return response
def schema_delete(self, name: str) -> requests.Response:
def schema_delete(self, name: str) -> Response:
"""Delete a schema
Args:
name(str): name of the schema.
@ -531,10 +651,10 @@ class Infinispan:
An http Response containing the result of the operation
"""
api_url = self._default_node + self._schema_url + "/" + name
response = requests.delete(api_url, timeout=REST_TIMEOUT)
response = self._h2c.delete(api_url, timeout=REST_TIMEOUT)
return response
def cache_delete(self, name: str) -> requests.Response:
def cache_delete(self, name: str) -> Response:
"""Delete a cache
Args:
name(str): name of the cache.
@ -542,10 +662,10 @@ class Infinispan:
An http Response containing the result of the operation
"""
api_url = self._default_node + self._cache_url + "/" + name
response = requests.delete(api_url, timeout=REST_TIMEOUT)
response = self._h2c.delete(api_url, timeout=REST_TIMEOUT)
return response
def cache_clear(self, cache_name: str) -> requests.Response:
def cache_clear(self, cache_name: str) -> Response:
"""Clear a cache
Args:
cache_name(str): name of the cache.
@ -555,7 +675,7 @@ class Infinispan:
api_url = (
self._default_node + self._cache_url + "/" + cache_name + "?action=clear"
)
response = requests.post(api_url, timeout=REST_TIMEOUT)
response = self._h2c.post(api_url, timeout=REST_TIMEOUT)
return response
def cache_exists(self, cache_name: str) -> bool:
@ -570,18 +690,17 @@ class Infinispan:
)
return self.resource_exists(api_url)
@staticmethod
def resource_exists(api_url: str) -> bool:
def resource_exists(self, api_url: str) -> bool:
"""Check if a resource exists
Args:
api_url(str): url of the resource.
Returns:
true if resource exists
"""
response = requests.head(api_url, timeout=REST_TIMEOUT)
return response.ok
response = self._h2c.head(api_url, timeout=REST_TIMEOUT)
return response.status_code == self.Codes.OK
def index_clear(self, cache_name: str) -> requests.Response:
def index_clear(self, cache_name: str) -> Response:
"""Clear an index on a cache
Args:
cache_name(str): name of the cache.
@ -595,9 +714,9 @@ class Infinispan:
+ cache_name
+ "/search/indexes?action=clear"
)
return requests.post(api_url, timeout=REST_TIMEOUT)
return self._h2c.post(api_url, timeout=REST_TIMEOUT)
def index_reindex(self, cache_name: str) -> requests.Response:
def index_reindex(self, cache_name: str) -> Response:
"""Rebuild index on a cache
Args:
cache_name(str): name of the cache.
@ -611,4 +730,4 @@ class Infinispan:
+ cache_name
+ "/search/indexes?action=reindex"
)
return requests.post(api_url, timeout=REST_TIMEOUT)
return self._h2c.post(api_url, timeout=REST_TIMEOUT)

View File

@ -0,0 +1,4 @@
#/bin/sh
cd infinispan
docker compose up

View File

@ -0,0 +1,2 @@
#Fri May 03 10:19:58 CEST 2024
user=ADMIN,admin

View File

@ -0,0 +1,62 @@
<infinispan
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="urn:infinispan:config:15.0 https://infinispan.org/schemas/infinispan-config-15.0.xsd
urn:infinispan:server:15.0 https://infinispan.org/schemas/infinispan-server-15.0.xsd"
xmlns="urn:infinispan:config:15.0"
xmlns:server="urn:infinispan:server:15.0">
<cache-container name="default" statistics="true">
<transport cluster="${infinispan.cluster.name:cluster}" stack="${infinispan.cluster.stack:tcp}" node-name="${infinispan.node.name:}"/>
</cache-container>
<server xmlns="urn:infinispan:server:15.0">
<interfaces>
<interface name="public">
<inet-address value="${infinispan.bind.address:127.0.0.1}"/>
</interface>
</interfaces>
<socket-bindings default-interface="public" port-offset="${infinispan.socket.binding.port-offset:0}">
<socket-binding name="default" port="${infinispan.bind.port:11222}"/>
<socket-binding name="authenticated" port="11232"/>
<socket-binding name="auth-tls" port="11242"/>
</socket-bindings>
<security>
<credential-stores>
<credential-store name="credentials" path="credentials.pfx">
<clear-text-credential clear-text="secret"/>
</credential-store>
</credential-stores>
<security-realms>
<security-realm name="default">
<properties-realm groups-attribute="Roles">
<user-properties path="/user-config/users.properties"/>
<group-properties path="/user-config/groups.properties"/>
</properties-realm>
</security-realm>
<security-realm name="tls">
<!-- Uncomment to enable TLS on the realm -->
<server-identities>
<ssl>
<keystore path="application.keystore"
password="password" alias="server"
generate-self-signed-certificate-host="localhost"/>
</ssl>
</server-identities>
<properties-realm groups-attribute="Roles">
<user-properties path="/user-config/users.properties"/>
<group-properties path="/user-config/groups.properties"/>
</properties-realm>
</security-realm>
</security-realms>
</security>
<endpoints>
<endpoint socket-binding="default"/>
<endpoint socket-binding="authenticated" security-realm="default"/>
<endpoint socket-binding="auth-tls" security-realm="tls"/>
</endpoints>
</server>
</infinispan>

View File

@ -0,0 +1,4 @@
#$REALM_NAME=default$
#$ALGORITHM=encrypted$
#Fri May 03 10:19:58 CEST 2024
user=scram-sha-1\:BYGcIAws2gznU/kpezoSb1VQNVd+YMX9r+9SAINFoZtPHaHTAQ\=\=;scram-sha-256\:BYGcIAwRiWiD+8f7dyQEs1Wsum/64MOcjGJ2UcmZFQB6DZJqwRDJ4NrvII4NttmxlA\=\=;scram-sha-384\:BYGcIAz+Eud65N8GWK4TMwhSCZpeE5EFSdynywdryQj3ZwBEgv+KF8hRUuGxiq3EyRxsby6w7DHK3CICGZLsPrM\=;scram-sha-512\:BYGcIAwWxVY9DHn42kHydivyU3s9LSPmyfPPJkIFYyt/XsMASFHGoy5rzk4ahX4HjpJgb+NjdCwhGfi33CY0azUIrn439s62Yg5mq9i+ISto;digest-md5\:AgR1c2VyB2RlZmF1bHSYYyzPjRDR7MhrsdFSK03P;digest-sha\:AgR1c2VyB2RlZmF1bHTga5gDNnNYh7/2HqhBVOdUHjBzhw\=\=;digest-sha-256\:AgR1c2VyB2RlZmF1bHTig5qZQIxqtJBTUp3EMh5UIFoS4qOhz9Uk5aOW9ZKCfw\=\=;digest-sha-384\:AgR1c2VyB2RlZmF1bHT01pAN/pRMLS5afm4Q9S0kuLlA0NokuP8F0AISTwXCb1E8RMsFHlBVPOa5rC6Nyso\=;digest-sha-512\:AgR1c2VyB2RlZmF1bHTi+cHn1Ez2Ze41CvPXb9eP/7JmRys7m1f5qPMQWhAmDOuuUXNWEG4yKSI9k2EZgQvMKTd5hDbR24ul1BsYP8X5;

View File

@ -0,0 +1,16 @@
version: "3.7"
services:
infinispan:
image: quay.io/infinispan/server:15.0
ports:
- '11222:11222'
- '11232:11232'
- '11242:11242'
deploy:
resources:
limits:
memory: 25Gb
volumes:
- ./conf:/user-config
command: -c /user-config/infinispan.xml

View File

@ -1,7 +1,9 @@
"""Test Infinispan functionality."""
import warnings
from typing import Any, List, Optional
import httpx
import pytest
from langchain_core.documents import Document
@ -11,9 +13,18 @@ from tests.integration_tests.vectorstores.fake_embeddings import (
fake_texts,
)
"""
cd tests/integration_tests/vectorstores/docker-compose
./infinispan.sh
def _infinispan_setup_noautoconf() -> None:
ispnvs = InfinispanVS(auto_config=False)
Current Infinispan implementation relies on httpx: `pip install "httpx[http2]"`
if not installed. HTTP/2 is enable by default, if it's not
wanted use `pip install "httpx"`.
"""
def _infinispan_setup_noautoconf(**kwargs: Any) -> None:
ispnvs = InfinispanVS(http2=_hasHttp2(), auto_config=False, **kwargs)
ispnvs.cache_delete()
ispnvs.schema_delete()
proto = """
@ -54,64 +65,104 @@ def _infinispanvs_from_texts(
ids=ids,
clear_old=clear_old,
auto_config=auto_config,
http2=_hasHttp2(),
**kwargs,
)
def _hasHttp2() -> bool:
try:
httpx.Client(http2=True)
return True
except Exception:
return False
@pytest.mark.parametrize("autoconfig", [False, True])
@pytest.mark.parametrize(
"conn_opts",
[
{},
{
"user": "user",
"password": "password",
"hosts": ["localhost:11232"],
"schema": "http",
},
{
"user": "user",
"password": "password",
"hosts": ["localhost:11242"],
"schema": "https",
"verify": False,
},
],
)
class TestBasic:
def test_infinispan(self, autoconfig: bool) -> None:
def test_infinispan(self, autoconfig: bool, conn_opts: dict) -> None:
"""Test end to end construction and search."""
if not autoconfig:
_infinispan_setup_noautoconf()
docsearch = _infinispanvs_from_texts(auto_config=autoconfig)
_infinispan_setup_noautoconf(**conn_opts)
docsearch = _infinispanvs_from_texts(auto_config=autoconfig, **conn_opts)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
def test_infinispan_with_metadata(self, autoconfig: bool) -> None:
def test_infinispan_with_auth(self, autoconfig: bool, conn_opts: dict) -> None:
"""Test end to end construction and search."""
if not autoconfig:
_infinispan_setup_noautoconf(**conn_opts)
docsearch = _infinispanvs_from_texts(auto_config=autoconfig, **conn_opts)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
def test_infinispan_with_metadata(self, autoconfig: bool, conn_opts: dict) -> None:
"""Test with metadata"""
if not autoconfig:
_infinispan_setup_noautoconf()
_infinispan_setup_noautoconf(**conn_opts)
meta = []
for _ in range(len(fake_texts)):
meta.append({"label": "test"})
docsearch = _infinispanvs_from_texts(metadatas=meta, auto_config=autoconfig)
docsearch = _infinispanvs_from_texts(
metadatas=meta, auto_config=autoconfig, **conn_opts
)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo", metadata={"label": "test"})]
def test_infinispan_with_metadata_with_output_fields(
self, autoconfig: bool
self, autoconfig: bool, conn_opts: dict
) -> None:
"""Test with metadata"""
if not autoconfig:
_infinispan_setup_noautoconf()
_infinispan_setup_noautoconf(**conn_opts)
metadatas = [
{"page": i, "label": "label" + str(i)} for i in range(len(fake_texts))
]
c = {"output_fields": ["label", "page", "text"]}
docsearch = _infinispanvs_from_texts(
metadatas=metadatas, configuration=c, auto_config=autoconfig
metadatas=metadatas, configuration=c, auto_config=autoconfig, **conn_opts
)
output = docsearch.similarity_search("foo", k=1)
assert output == [
Document(page_content="foo", metadata={"label": "label0", "page": 0})
]
def test_infinispanvs_with_id(self, autoconfig: bool) -> None:
def test_infinispanvs_with_id(self, autoconfig: bool, conn_opts: dict) -> None:
"""Test with ids"""
ids = ["id_" + str(i) for i in range(len(fake_texts))]
docsearch = _infinispanvs_from_texts(ids=ids, auto_config=autoconfig)
docsearch = _infinispanvs_from_texts(
ids=ids, auto_config=autoconfig, **conn_opts
)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
def test_infinispan_with_score(self, autoconfig: bool) -> None:
def test_infinispan_with_score(self, autoconfig: bool, conn_opts: dict) -> None:
"""Test end to end construction and search with scores and IDs."""
if not autoconfig:
_infinispan_setup_noautoconf()
_infinispan_setup_noautoconf(**conn_opts)
texts = ["foo", "bar", "baz"]
metadatas = [{"page": i} for i in range(len(texts))]
docsearch = _infinispanvs_from_texts(
metadatas=metadatas, auto_config=autoconfig
metadatas=metadatas, auto_config=autoconfig, **conn_opts
)
output = docsearch.similarity_search_with_score("foo", k=3)
docs = [o[0] for o in output]
@ -123,14 +174,14 @@ class TestBasic:
]
assert scores[0] >= scores[1] >= scores[2]
def test_infinispan_add_texts(self, autoconfig: bool) -> None:
def test_infinispan_add_texts(self, autoconfig: bool, conn_opts: dict) -> None:
"""Test end to end construction and MRR search."""
if not autoconfig:
_infinispan_setup_noautoconf()
_infinispan_setup_noautoconf(**conn_opts)
texts = ["foo", "bar", "baz"]
metadatas = [{"page": i} for i in range(len(texts))]
docsearch = _infinispanvs_from_texts(
metadatas=metadatas, auto_config=autoconfig
metadatas=metadatas, auto_config=autoconfig, **conn_opts
)
docsearch.add_texts(texts, metadatas)
@ -138,19 +189,22 @@ class TestBasic:
output = docsearch.similarity_search("foo", k=10)
assert len(output) == 6
def test_infinispan_no_clear_old(self, autoconfig: bool) -> None:
def test_infinispan_no_clear_old(self, autoconfig: bool, conn_opts: dict) -> None:
"""Test end to end construction and MRR search."""
if not autoconfig:
_infinispan_setup_noautoconf()
_infinispan_setup_noautoconf(**conn_opts)
texts = ["foo", "bar", "baz"]
metadatas = [{"page": i} for i in range(len(texts))]
docsearch = _infinispanvs_from_texts(
metadatas=metadatas, auto_config=autoconfig
metadatas=metadatas, auto_config=autoconfig, **conn_opts
)
del docsearch
try:
docsearch = _infinispanvs_from_texts(
metadatas=metadatas, clear_old=False, auto_config=autoconfig
metadatas=metadatas,
clear_old=False,
auto_config=autoconfig,
**conn_opts,
)
except AssertionError:
if autoconfig:
@ -159,3 +213,12 @@ class TestBasic:
raise
output = docsearch.similarity_search("foo", k=10)
assert len(output) == 6
class TestHttp2:
def test_http2(self) -> None:
try:
httpx.Client(http2=True)
except Exception:
warnings.warn('pip install "httpx[http2]" if you need HTTP/2')
pass