mirror of https://github.com/hwchase17/langchain
Harrison/psychic (#5063)
Co-authored-by: Ayan Bandyopadhyay <ayanb9440@gmail.com> Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>pull/5064/head
parent
8c661baefb
commit
b0431c672b
@ -0,0 +1,134 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Psychic\n",
|
||||
"This notebook covers how to load documents from `Psychic`. See [here](../../../../ecosystem/psychic.md) for more details.\n",
|
||||
"\n",
|
||||
"## Prerequisites\n",
|
||||
"1. Follow the Quick Start section in [this document](../../../../ecosystem/psychic.md)\n",
|
||||
"2. Log into the [Psychic dashboard](https://dashboard.psychic.dev/) and get your secret key\n",
|
||||
"3. Install the frontend react library into your web app and have a user authenticate a connection. The connection will be created using the connection id that you specify."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Loading documents\n",
|
||||
"\n",
|
||||
"Use the `PsychicLoader` class to load in documents from a connection. Each connection has a connector id (corresponding to the SaaS app that was connected) and a connection id (which you passed in to the frontend library)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.1.2\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Uncomment this to install psychicapi if you don't already have it installed\n",
|
||||
"!poetry run pip -q install psychicapi"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import PsychicLoader\n",
|
||||
"from psychicapi import ConnectorId\n",
|
||||
"\n",
|
||||
"# Create a document loader for google drive. We can also load from other connectors by setting the connector_id to the appropriate value e.g. ConnectorId.notion.value\n",
|
||||
"# This loader uses our test credentials\n",
|
||||
"google_drive_loader = PsychicLoader(\n",
|
||||
" api_key=\"7ddb61c1-8b6a-4d31-a58e-30d1c9ea480e\",\n",
|
||||
" connector_id=ConnectorId.gdrive.value,\n",
|
||||
" connection_id=\"google-test\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"documents = google_drive_loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Converting the docs to embeddings \n",
|
||||
"\n",
|
||||
"We can now convert these documents into embeddings and store them in a vector database like Chroma"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.vectorstores import Chroma\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.chains import RetrievalQAWithSourcesChain\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"texts = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"docsearch = Chroma.from_documents(texts, embeddings)\n",
|
||||
"chain = RetrievalQAWithSourcesChain.from_chain_type(OpenAI(temperature=0), chain_type=\"stuff\", retriever=docsearch.as_retriever())\n",
|
||||
"chain({\"question\": \"what is psychic?\"}, return_only_outputs=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
@ -0,0 +1,34 @@
|
||||
"""Loader that loads documents from Psychic.dev."""
|
||||
from typing import List
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders.base import BaseLoader
|
||||
|
||||
|
||||
class PsychicLoader(BaseLoader):
|
||||
"""Loader that loads documents from Psychic.dev."""
|
||||
|
||||
def __init__(self, api_key: str, connector_id: str, connection_id: str):
|
||||
"""Initialize with API key, connector id, and connection id."""
|
||||
|
||||
try:
|
||||
from psychicapi import ConnectorId, Psychic # noqa: F401
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`psychicapi` package not found, please run `pip install psychicapi`"
|
||||
)
|
||||
self.psychic = Psychic(secret_key=api_key)
|
||||
self.connector_id = ConnectorId(connector_id)
|
||||
self.connection_id = connection_id
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load documents."""
|
||||
|
||||
psychic_docs = self.psychic.get_documents(self.connector_id, self.connection_id)
|
||||
return [
|
||||
Document(
|
||||
page_content=doc["content"],
|
||||
metadata={"title": doc["title"], "source": doc["uri"]},
|
||||
)
|
||||
for doc in psychic_docs
|
||||
]
|
@ -0,0 +1,66 @@
|
||||
from typing import Dict
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders.psychic import PsychicLoader
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_psychic(): # type: ignore
|
||||
with patch("psychicapi.Psychic") as mock_psychic:
|
||||
yield mock_psychic
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_connector_id(): # type: ignore
|
||||
with patch("psychicapi.ConnectorId") as mock_connector_id:
|
||||
yield mock_connector_id
|
||||
|
||||
|
||||
@pytest.mark.requires("psychicapi")
|
||||
class TestPsychicLoader:
|
||||
MOCK_API_KEY = "api_key"
|
||||
MOCK_CONNECTOR_ID = "notion"
|
||||
MOCK_CONNECTION_ID = "connection_id"
|
||||
|
||||
def test_psychic_loader_initialization(
|
||||
self, mock_psychic: MagicMock, mock_connector_id: MagicMock
|
||||
) -> None:
|
||||
PsychicLoader(
|
||||
api_key=self.MOCK_API_KEY,
|
||||
connector_id=self.MOCK_CONNECTOR_ID,
|
||||
connection_id=self.MOCK_CONNECTION_ID,
|
||||
)
|
||||
|
||||
mock_psychic.assert_called_once_with(secret_key=self.MOCK_API_KEY)
|
||||
mock_connector_id.assert_called_once_with(self.MOCK_CONNECTOR_ID)
|
||||
|
||||
def test_psychic_loader_load_data(self, mock_psychic: MagicMock) -> None:
|
||||
mock_psychic.get_documents.return_value = [
|
||||
self._get_mock_document("123"),
|
||||
self._get_mock_document("456"),
|
||||
]
|
||||
|
||||
psychic_loader = self._get_mock_psychic_loader(mock_psychic)
|
||||
|
||||
documents = psychic_loader.load()
|
||||
|
||||
assert mock_psychic.get_documents.call_count == 1
|
||||
assert len(documents) == 2
|
||||
assert all(isinstance(doc, Document) for doc in documents)
|
||||
assert documents[0].page_content == "Content 123"
|
||||
assert documents[1].page_content == "Content 456"
|
||||
|
||||
def _get_mock_psychic_loader(self, mock_psychic: MagicMock) -> PsychicLoader:
|
||||
psychic_loader = PsychicLoader(
|
||||
api_key=self.MOCK_API_KEY,
|
||||
connector_id=self.MOCK_CONNECTOR_ID,
|
||||
connection_id=self.MOCK_CONNECTION_ID,
|
||||
)
|
||||
psychic_loader.psychic = mock_psychic
|
||||
return psychic_loader
|
||||
|
||||
def _get_mock_document(self, uri: str) -> Dict:
|
||||
return {"uri": f"{uri}", "title": f"Title {uri}", "content": f"Content {uri}"}
|
Loading…
Reference in New Issue