mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
8021d2a2ab
Thank you for contributing to LangChain! - Oracle AI Vector Search Oracle AI Vector Search is designed for Artificial Intelligence (AI) workloads that allows you to query data based on semantics, rather than keywords. One of the biggest benefit of Oracle AI Vector Search is that semantic search on unstructured data can be combined with relational search on business data in one single system. This is not only powerful but also significantly more effective because you don't need to add a specialized vector database, eliminating the pain of data fragmentation between multiple systems. - Oracle AI Vector Search is designed for Artificial Intelligence (AI) workloads that allows you to query data based on semantics, rather than keywords. One of the biggest benefit of Oracle AI Vector Search is that semantic search on unstructured data can be combined with relational search on business data in one single system. This is not only powerful but also significantly more effective because you don't need to add a specialized vector database, eliminating the pain of data fragmentation between multiple systems. This Pull Requests Adds the following functionalities Oracle AI Vector Search : Vector Store Oracle AI Vector Search : Document Loader Oracle AI Vector Search : Document Splitter Oracle AI Vector Search : Summary Oracle AI Vector Search : Oracle Embeddings - We have added unit tests and have our own local unit test suite which verifies all the code is correct. We have made sure to add guides for each of the components and one end to end guide that shows how the entire thing runs. - We have made sure that make format and make lint run clean. Additional guidelines: - Make sure optional dependencies are imported within a function. - Please do not add dependencies to pyproject.toml files (even optional ones) unless they are required for unit tests. - Most PRs should not touch more than one package. - Changes should be backwards compatible. - If you are adding something to community, do not re-import it in langchain. If no one reviews your PR within a few days, please @-mention one of baskaryan, efriis, eyurtsev, hwchase17. --------- Co-authored-by: skmishraoracle <shailendra.mishra@oracle.com> Co-authored-by: hroyofc <harichandan.roy@oracle.com> Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
202 lines
6.1 KiB
Python
202 lines
6.1 KiB
Python
# Authors:
|
|
# Harichandan Roy (hroy)
|
|
# David Jiang (ddjiang)
|
|
#
|
|
# -----------------------------------------------------------------------------
|
|
# oracleai.py
|
|
# -----------------------------------------------------------------------------
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import traceback
|
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
|
|
|
from langchain_core.documents import Document
|
|
|
|
if TYPE_CHECKING:
|
|
from oracledb import Connection
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
"""OracleSummary class"""
|
|
|
|
|
|
class OracleSummary:
|
|
"""Get Summary
|
|
Args:
|
|
conn: Oracle Connection,
|
|
params: Summary parameters,
|
|
proxy: Proxy
|
|
"""
|
|
|
|
def __init__(
|
|
self, conn: Connection, params: Dict[str, Any], proxy: Optional[str] = None
|
|
):
|
|
self.conn = conn
|
|
self.proxy = proxy
|
|
self.summary_params = params
|
|
|
|
def get_summary(self, docs: Any) -> List[str]:
|
|
"""Get the summary of the input docs.
|
|
Args:
|
|
docs: The documents to generate summary for.
|
|
Allowed input types: str, Document, List[str], List[Document]
|
|
Returns:
|
|
List of summary text, one for each input doc.
|
|
"""
|
|
|
|
try:
|
|
import oracledb
|
|
except ImportError as e:
|
|
raise ImportError(
|
|
"Unable to import oracledb, please install with "
|
|
"`pip install -U oracledb`."
|
|
) from e
|
|
|
|
if docs is None:
|
|
return []
|
|
|
|
results: List[str] = []
|
|
try:
|
|
oracledb.defaults.fetch_lobs = False
|
|
cursor = self.conn.cursor()
|
|
|
|
if self.proxy:
|
|
cursor.execute(
|
|
"begin utl_http.set_proxy(:proxy); end;", proxy=self.proxy
|
|
)
|
|
|
|
if isinstance(docs, str):
|
|
results = []
|
|
|
|
summary = cursor.var(oracledb.DB_TYPE_CLOB)
|
|
cursor.execute(
|
|
"""
|
|
declare
|
|
input clob;
|
|
begin
|
|
input := :data;
|
|
:summ := dbms_vector_chain.utl_to_summary(input, json(:params));
|
|
end;""",
|
|
data=docs,
|
|
params=json.dumps(self.summary_params),
|
|
summ=summary,
|
|
)
|
|
|
|
if summary is None:
|
|
results.append("")
|
|
else:
|
|
results.append(str(summary.getvalue()))
|
|
|
|
elif isinstance(docs, Document):
|
|
results = []
|
|
|
|
summary = cursor.var(oracledb.DB_TYPE_CLOB)
|
|
cursor.execute(
|
|
"""
|
|
declare
|
|
input clob;
|
|
begin
|
|
input := :data;
|
|
:summ := dbms_vector_chain.utl_to_summary(input, json(:params));
|
|
end;""",
|
|
data=docs.page_content,
|
|
params=json.dumps(self.summary_params),
|
|
summ=summary,
|
|
)
|
|
|
|
if summary is None:
|
|
results.append("")
|
|
else:
|
|
results.append(str(summary.getvalue()))
|
|
|
|
elif isinstance(docs, List):
|
|
results = []
|
|
|
|
for doc in docs:
|
|
summary = cursor.var(oracledb.DB_TYPE_CLOB)
|
|
if isinstance(doc, str):
|
|
cursor.execute(
|
|
"""
|
|
declare
|
|
input clob;
|
|
begin
|
|
input := :data;
|
|
:summ := dbms_vector_chain.utl_to_summary(input,
|
|
json(:params));
|
|
end;""",
|
|
data=doc,
|
|
params=json.dumps(self.summary_params),
|
|
summ=summary,
|
|
)
|
|
|
|
elif isinstance(doc, Document):
|
|
cursor.execute(
|
|
"""
|
|
declare
|
|
input clob;
|
|
begin
|
|
input := :data;
|
|
:summ := dbms_vector_chain.utl_to_summary(input,
|
|
json(:params));
|
|
end;""",
|
|
data=doc.page_content,
|
|
params=json.dumps(self.summary_params),
|
|
summ=summary,
|
|
)
|
|
|
|
else:
|
|
raise Exception("Invalid input type")
|
|
|
|
if summary is None:
|
|
results.append("")
|
|
else:
|
|
results.append(str(summary.getvalue()))
|
|
|
|
else:
|
|
raise Exception("Invalid input type")
|
|
|
|
cursor.close()
|
|
return results
|
|
|
|
except Exception as ex:
|
|
logger.info(f"An exception occurred :: {ex}")
|
|
traceback.print_exc()
|
|
cursor.close()
|
|
raise
|
|
|
|
|
|
# uncomment the following code block to run the test
|
|
|
|
"""
|
|
# A sample unit test.
|
|
|
|
''' get the Oracle connection '''
|
|
conn = oracledb.connect(
|
|
user="",
|
|
password="",
|
|
dsn="")
|
|
print("Oracle connection is established...")
|
|
|
|
''' params '''
|
|
summary_params = {"provider": "database","glevel": "S",
|
|
"numParagraphs": 1,"language": "english"}
|
|
proxy = ""
|
|
|
|
''' instance '''
|
|
summ = OracleSummary(conn=conn, params=summary_params, proxy=proxy)
|
|
|
|
summary = summ.get_summary("In the heart of the forest, " +
|
|
"a lone fox ventured out at dusk, seeking a lost treasure. " +
|
|
"With each step, memories flooded back, guiding its path. " +
|
|
"As the moon rose high, illuminating the night, the fox unearthed " +
|
|
"not gold, but a forgotten friendship, worth more than any riches.")
|
|
print(f"Summary generated by OracleSummary: {summary}")
|
|
|
|
conn.close()
|
|
print("Connection is closed.")
|
|
|
|
"""
|