IMPROVEMENT: Conditionally import core type hints (#13700)

pull/11979/head^2
Bagatur 7 months ago committed by GitHub
parent d47ee1ae79
commit e327bb4ba4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -52,6 +52,7 @@
},
{
"cell_type": "markdown",
"id": "c6fb4903-f845-4907-ae14-df305891b0ff",
"metadata": {},
"source": [
"## Data Loading\n",
@ -76,17 +77,18 @@
{
"cell_type": "code",
"execution_count": 45,
"id": "fc0767d4-9155-4591-855c-ef2e14e0e10f",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import tempfile\n",
"from pathlib import Path\n",
"from pprint import pprint\n",
"import requests\n",
"import tempfile\n",
"from time import sleep\n",
"from typing import Dict, List\n",
"\n",
"import requests\n",
"from docugami import Docugami\n",
"from docugami.types import Document as DocugamiDocument\n",
"\n",
@ -166,6 +168,7 @@
{
"cell_type": "code",
"execution_count": 46,
"id": "ce0b2b21-7623-46e7-ae2c-3a9f67e8b9b9",
"metadata": {},
"outputs": [
{
@ -207,6 +210,7 @@
},
{
"cell_type": "markdown",
"id": "01f035e5-c3f8-4d23-9d1b-8d2babdea8e9",
"metadata": {},
"source": [
"If you are on the free Docugami tier, your files should be done in ~15 minutes or less depending on the number of pages uploaded and available resources (please contact Docugami for paid plans for faster processing). You can re-run the code above without reprocessing your files to continue waiting if your notebook is not continuously running (it does not re-upload)."
@ -225,6 +229,7 @@
{
"cell_type": "code",
"execution_count": 47,
"id": "05fcdd57-090f-44bf-a1fb-2c3609c80e34",
"metadata": {},
"outputs": [
{
@ -268,6 +273,7 @@
},
{
"cell_type": "markdown",
"id": "bfc1f2c9-e6d4-4d98-a799-6bc30bc61661",
"metadata": {},
"source": [
"The file processed by Docugami in the example above was [this one](https://data.ntsb.gov/carol-repgen/api/Aviation/ReportMain/GenerateNewestReport/192541/pdf) from the NTSB and you can look at the PDF side by side to compare the XML chunks above. \n",
@ -278,6 +284,7 @@
{
"cell_type": "code",
"execution_count": 48,
"id": "8a4b49e0-de78-4790-a930-ad7cf324697a",
"metadata": {},
"outputs": [
{
@ -326,6 +333,7 @@
},
{
"cell_type": "markdown",
"id": "1cfc06bc-67d2-46dd-b04d-95efa3619d0a",
"metadata": {},
"source": [
"## Docugami XML Deep Dive: Jane Doe NDA Example\n",
@ -336,6 +344,7 @@
{
"cell_type": "code",
"execution_count": 109,
"id": "7b697d30-1e94-47f0-87e8-f81d4b180da2",
"metadata": {},
"outputs": [
{
@ -361,6 +370,7 @@
{
"cell_type": "code",
"execution_count": 98,
"id": "14714576-6e1d-499b-bcc8-39140bb2fd78",
"metadata": {},
"outputs": [
{
@ -415,6 +425,7 @@
},
{
"cell_type": "markdown",
"id": "dc09ba64-4973-4471-9501-54294c1143fc",
"metadata": {},
"source": [
"The Docugami XML contains extremely detailed semantics and visual bounding boxes for all elements. The `dgml-utils` library parses text and non-text elements into formats appropriate to pass into LLMs (chunked text with XML semantic labels)"
@ -423,6 +434,7 @@
{
"cell_type": "code",
"execution_count": 100,
"id": "2b4ece00-2e43-4254-adc9-66dbb79139a6",
"metadata": {},
"outputs": [
{
@ -460,6 +472,7 @@
{
"cell_type": "code",
"execution_count": 101,
"id": "08350119-aa22-4ec1-8f65-b1316a0d4123",
"metadata": {},
"outputs": [
{
@ -476,6 +489,7 @@
},
{
"cell_type": "markdown",
"id": "dca87b46-c0c2-4973-94ec-689c18075653",
"metadata": {},
"source": [
"The XML markup contains structural as well as semantic tags, which provide additional semantics to the LLM for improved retrieval and generation.\n",
@ -486,6 +500,7 @@
{
"cell_type": "code",
"execution_count": 112,
"id": "bcac8294-c54a-4b6e-af9d-3911a69620b2",
"metadata": {},
"outputs": [
{
@ -539,8 +554,8 @@
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.prompts import (\n",
" ChatPromptTemplate,\n",
" SystemMessagePromptTemplate,\n",
" HumanMessagePromptTemplate,\n",
" SystemMessagePromptTemplate,\n",
")\n",
"from langchain.schema.output_parser import StrOutputParser"
]
@ -610,11 +625,12 @@
"outputs": [],
"source": [
"import uuid\n",
"from langchain.vectorstores.chroma import Chroma\n",
"from langchain.storage import InMemoryStore\n",
"from langchain.schema.document import Document\n",
"\n",
"from langchain.embeddings import OpenAIEmbeddings\n",
"from langchain.retrievers.multi_vector import MultiVectorRetriever\n",
"from langchain.schema.document import Document\n",
"from langchain.storage import InMemoryStore\n",
"from langchain.vectorstores.chroma import Chroma\n",
"\n",
"\n",
"def build_retriever(text_elements, tables, table_summaries):\n",
@ -710,6 +726,7 @@
{
"cell_type": "code",
"execution_count": 120,
"id": "636e992f-823b-496b-a082-8b4fcd479de5",
"metadata": {},
"outputs": [
{
@ -743,6 +760,7 @@
},
{
"cell_type": "markdown",
"id": "86cad5db-81fe-4ae6-a20e-550b85fcbe96",
"metadata": {},
"source": [
"# RAG on Llama2 paper\n",
@ -753,6 +771,7 @@
{
"cell_type": "code",
"execution_count": 121,
"id": "0e4a2f43-dd48-4ae3-8e27-7e87d169965f",
"metadata": {},
"outputs": [
{
@ -777,6 +796,7 @@
{
"cell_type": "code",
"execution_count": 124,
"id": "56b78fb3-603d-4343-ae72-be54a3c5dd72",
"metadata": {},
"outputs": [
{
@ -801,6 +821,7 @@
{
"cell_type": "code",
"execution_count": 125,
"id": "d3cc5ba9-8553-4eda-a5d1-b799751186af",
"metadata": {},
"outputs": [],
"source": [
@ -812,6 +833,7 @@
{
"cell_type": "code",
"execution_count": 126,
"id": "d7c73faf-74cb-400d-8059-b69e2493de38",
"metadata": {},
"outputs": [],
"source": [
@ -823,6 +845,7 @@
{
"cell_type": "code",
"execution_count": 127,
"id": "4c553722-be42-42ce-83b8-76a17f323f1c",
"metadata": {},
"outputs": [],
"source": [
@ -832,6 +855,7 @@
{
"cell_type": "code",
"execution_count": 128,
"id": "65dce40b-f1c3-494a-949e-69a9c9544ddb",
"metadata": {},
"outputs": [
{
@ -851,6 +875,7 @@
},
{
"cell_type": "markdown",
"id": "59877edf-9a02-45db-95cb-b7f4234abfa3",
"metadata": {},
"source": [
"We can check the [trace](https://smith.langchain.com/public/5de100c3-bb40-4234-bf02-64bc708686a1/r) to see what chunks were retrieved.\n",
@ -939,6 +964,7 @@
},
{
"cell_type": "markdown",
"id": "0879349e-7298-4f2c-b246-f1142e97a8e5",
"metadata": {},
"source": []
}

@ -69,8 +69,8 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain_experimental.llm_bash.prompt import BashOutputParser\n",
"from langchain.prompts.prompt import PromptTemplate\n",
"from langchain_experimental.llm_bash.prompt import BashOutputParser\n",
"\n",
"_PROMPT_TEMPLATE = \"\"\"If someone asks you to perform a task, your job is to come up with a series of bash commands that will perform the task. There is no need to put \"#!/bin/bash\" in your answer. Make sure to reason step by step, using this format:\n",
"Question: \"copy the files in the directory named 'target' into a new directory at the same level as target called 'myNewDirectory'\"\n",

@ -1,15 +1,16 @@
"""Base callback handler that can be used to handle callbacks in langchain."""
from __future__ import annotations
from typing import Any, Dict, List, Optional, Sequence, TypeVar, Union
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, TypeVar, Union
from uuid import UUID
from tenacity import RetryCallState
from langchain_core.agents import AgentAction, AgentFinish
from langchain_core.documents import Document
from langchain_core.messages import BaseMessage
from langchain_core.outputs import ChatGenerationChunk, GenerationChunk, LLMResult
if TYPE_CHECKING:
from langchain_core.agents import AgentAction, AgentFinish
from langchain_core.documents import Document
from langchain_core.messages import BaseMessage
from langchain_core.outputs import ChatGenerationChunk, GenerationChunk, LLMResult
class RetrieverManagerMixin:

@ -7,6 +7,7 @@ import uuid
from concurrent.futures import ThreadPoolExecutor
from contextlib import asynccontextmanager, contextmanager
from typing import (
TYPE_CHECKING,
Any,
AsyncGenerator,
Coroutine,
@ -25,7 +26,6 @@ from uuid import UUID
from langsmith.run_helpers import get_run_tree_context
from tenacity import RetryCallState
from langchain_core.agents import AgentAction, AgentFinish
from langchain_core.callbacks.base import (
BaseCallbackHandler,
BaseCallbackManager,
@ -37,11 +37,14 @@ from langchain_core.callbacks.base import (
ToolManagerMixin,
)
from langchain_core.callbacks.stdout import StdOutCallbackHandler
from langchain_core.documents import Document
from langchain_core.messages import BaseMessage, get_buffer_string
from langchain_core.outputs import ChatGenerationChunk, GenerationChunk, LLMResult
from langchain_core.utils.env import env_var_is_set
if TYPE_CHECKING:
from langchain_core.agents import AgentAction, AgentFinish
from langchain_core.documents import Document
from langchain_core.outputs import ChatGenerationChunk, GenerationChunk, LLMResult
logger = logging.getLogger(__name__)

@ -1,11 +1,15 @@
"""Callback Handler that prints to std out."""
from typing import Any, Dict, List, Optional
from __future__ import annotations
from typing import TYPE_CHECKING, Any, Dict, List, Optional
from langchain_core.agents import AgentAction, AgentFinish
from langchain_core.callbacks.base import BaseCallbackHandler
from langchain_core.outputs import LLMResult
from langchain_core.utils import print_text
if TYPE_CHECKING:
from langchain_core.agents import AgentAction, AgentFinish
from langchain_core.outputs import LLMResult
class StdOutCallbackHandler(BaseCallbackHandler):
"""Callback Handler that prints to std out."""

@ -1,11 +1,15 @@
"""Callback Handler streams to stdout on new llm token."""
from __future__ import annotations
import sys
from typing import Any, Dict, List
from typing import TYPE_CHECKING, Any, Dict, List
from langchain_core.agents import AgentAction, AgentFinish
from langchain_core.callbacks.base import BaseCallbackHandler
from langchain_core.messages import BaseMessage
from langchain_core.outputs import LLMResult
if TYPE_CHECKING:
from langchain_core.agents import AgentAction, AgentFinish
from langchain_core.messages import BaseMessage
from langchain_core.outputs import LLMResult
class StreamingStdOutCallbackHandler(BaseCallbackHandler):

@ -3,9 +3,10 @@ from __future__ import annotations
import asyncio
from abc import ABC, abstractmethod
from functools import partial
from typing import Any, Sequence
from typing import TYPE_CHECKING, Any, Sequence
from langchain_core.documents import Document
if TYPE_CHECKING:
from langchain_core.documents import Document
class BaseDocumentTransformer(ABC):

@ -1,13 +1,15 @@
"""Example selector that selects examples based on SemanticSimilarity."""
from __future__ import annotations
from typing import Any, Dict, List, Optional, Type
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type
from langchain_core.embeddings import Embeddings
from langchain_core.example_selectors.base import BaseExampleSelector
from langchain_core.pydantic_v1 import BaseModel, Extra
from langchain_core.vectorstores import VectorStore
if TYPE_CHECKING:
from langchain_core.embeddings import Embeddings
def sorted_values(values: Dict[str, str]) -> List[Any]:
"""Return a list of values in dict sorted by key."""

@ -16,13 +16,13 @@ from typing import (
from typing_extensions import TypeAlias
from langchain_core.messages import AnyMessage, BaseMessage, get_buffer_string
from langchain_core.outputs import LLMResult
from langchain_core.prompt_values import PromptValue
from langchain_core.runnables import RunnableSerializable
from langchain_core.utils import get_pydantic_field_names
if TYPE_CHECKING:
from langchain_core.callbacks import Callbacks
from langchain_core.outputs import LLMResult
@lru_cache(maxsize=None) # Cache the tokenizer

@ -1,9 +1,12 @@
from __future__ import annotations
import asyncio
import inspect
import warnings
from abc import ABC, abstractmethod
from functools import partial
from typing import (
TYPE_CHECKING,
Any,
AsyncIterator,
Dict,
@ -41,7 +44,9 @@ from langchain_core.outputs import (
)
from langchain_core.prompt_values import ChatPromptValue, PromptValue, StringPromptValue
from langchain_core.pydantic_v1 import Field, root_validator
from langchain_core.runnables import RunnableConfig
if TYPE_CHECKING:
from langchain_core.runnables import RunnableConfig
def _get_verbosity() -> bool:

@ -37,10 +37,10 @@ from tenacity import (
wait_exponential,
)
from langchain_core.callbacks.base import BaseCallbackManager
from langchain_core.callbacks.manager import (
from langchain_core.callbacks import (
AsyncCallbackManager,
AsyncCallbackManagerForLLMRun,
BaseCallbackManager,
CallbackManager,
CallbackManagerForLLMRun,
Callbacks,

@ -4,6 +4,7 @@ import asyncio
import functools
from abc import ABC, abstractmethod
from typing import (
TYPE_CHECKING,
Any,
Dict,
Generic,
@ -18,9 +19,11 @@ from typing_extensions import get_args
from langchain_core.messages import AnyMessage, BaseMessage
from langchain_core.outputs import ChatGeneration, Generation
from langchain_core.prompt_values import PromptValue
from langchain_core.runnables import RunnableConfig, RunnableSerializable
if TYPE_CHECKING:
from langchain_core.prompt_values import PromptValue
T = TypeVar("T")

@ -1,6 +1,7 @@
from __future__ import annotations
from typing import (
TYPE_CHECKING,
Any,
AsyncIterator,
Iterator,
@ -16,7 +17,9 @@ from langchain_core.outputs import (
Generation,
GenerationChunk,
)
from langchain_core.runnables import RunnableConfig
if TYPE_CHECKING:
from langchain_core.runnables import RunnableConfig
class BaseTransformOutputParser(BaseOutputParser[T]):

@ -3,16 +3,32 @@ from __future__ import annotations
import json
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Any, Callable, Dict, List, Mapping, Optional, Type, Union
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
List,
Mapping,
Optional,
Type,
Union,
)
import yaml
from langchain_core.documents import Document
from langchain_core.output_parsers.base import BaseOutputParser
from langchain_core.prompt_values import PromptValue
from langchain_core.prompt_values import (
ChatPromptValueConcrete,
PromptValue,
StringPromptValue,
)
from langchain_core.pydantic_v1 import BaseModel, Field, create_model, root_validator
from langchain_core.runnables import RunnableConfig, RunnableSerializable
if TYPE_CHECKING:
from langchain_core.documents import Document
class BasePromptTemplate(RunnableSerializable[Dict, PromptValue], ABC):
"""Base class for all prompt templates, returning a prompt."""
@ -40,11 +56,6 @@ class BasePromptTemplate(RunnableSerializable[Dict, PromptValue], ABC):
@property
def OutputType(self) -> Any:
from langchain_core.prompt_values import (
ChatPromptValueConcrete,
StringPromptValue,
)
return Union[StringPromptValue, ChatPromptValueConcrete]
def get_input_schema(

@ -31,17 +31,6 @@ from typing import (
from typing_extensions import Literal, get_args
if TYPE_CHECKING:
from langchain_core.callbacks.manager import (
AsyncCallbackManagerForChainRun,
CallbackManagerForChainRun,
)
from langchain_core.runnables.fallbacks import (
RunnableWithFallbacks as RunnableWithFallbacksT,
)
from langchain_core.tracers.log_stream import RunLog, RunLogPatch
from langchain_core.tracers.root_listeners import Listener
from langchain_core.load.dump import dumpd
from langchain_core.load.serializable import Serializable
from langchain_core.pydantic_v1 import BaseModel, Field, create_model
@ -75,6 +64,18 @@ from langchain_core.runnables.utils import (
from langchain_core.utils.aiter import atee, py_anext
from langchain_core.utils.iter import safetee
if TYPE_CHECKING:
from langchain_core.callbacks.manager import (
AsyncCallbackManagerForChainRun,
CallbackManagerForChainRun,
)
from langchain_core.runnables.fallbacks import (
RunnableWithFallbacks as RunnableWithFallbacksT,
)
from langchain_core.tracers.log_stream import RunLog, RunLogPatch
from langchain_core.tracers.root_listeners import Listener
Other = TypeVar("Other")

@ -9,10 +9,10 @@ from functools import partial
from inspect import signature
from typing import Any, Awaitable, Callable, Dict, List, Optional, Tuple, Type, Union
from langchain_core.callbacks.base import BaseCallbackManager
from langchain_core.callbacks.manager import (
from langchain_core.callbacks import (
AsyncCallbackManager,
AsyncCallbackManagerForToolRun,
BaseCallbackManager,
CallbackManager,
CallbackManagerForToolRun,
Callbacks,

@ -4,13 +4,12 @@ from __future__ import annotations
import logging
from abc import ABC, abstractmethod
from datetime import datetime
from typing import Any, Dict, List, Optional, Sequence, Union, cast
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Union, cast
from uuid import UUID
from tenacity import RetryCallState
from langchain_core.callbacks.base import BaseCallbackHandler
from langchain_core.documents import Document
from langchain_core.exceptions import TracerException
from langchain_core.load import dumpd
from langchain_core.outputs import (
@ -21,6 +20,9 @@ from langchain_core.outputs import (
)
from langchain_core.tracers.schemas import Run
if TYPE_CHECKING:
from langchain_core.documents import Document
logger = logging.getLogger(__name__)

@ -5,7 +5,7 @@ import logging
import weakref
from concurrent.futures import Future, ThreadPoolExecutor, wait
from datetime import datetime
from typing import Any, Callable, Dict, List, Optional, Union
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
from uuid import UUID
from langsmith import Client
@ -19,10 +19,12 @@ from tenacity import (
from langchain_core.env import get_runtime_environment
from langchain_core.load import dumpd
from langchain_core.messages import BaseMessage
from langchain_core.tracers.base import BaseTracer
from langchain_core.tracers.schemas import Run
if TYPE_CHECKING:
from langchain_core.messages import BaseMessage
logger = logging.getLogger(__name__)
_LOGGED = set()
_TRACERS: weakref.WeakSet[LangChainTracer] = weakref.WeakSet()

@ -21,7 +21,6 @@ from typing import (
TypeVar,
)
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_core.pydantic_v1 import Field, root_validator
from langchain_core.retrievers import BaseRetriever
@ -31,6 +30,7 @@ if TYPE_CHECKING:
AsyncCallbackManagerForRetrieverRun,
CallbackManagerForRetrieverRun,
)
from langchain_core.documents import Document
logger = logging.getLogger(__name__)

Loading…
Cancel
Save