diff --git a/langchain/utilities/loading.py b/langchain/utilities/loading.py index 45569e4a..f694c1a1 100644 --- a/langchain/utilities/loading.py +++ b/langchain/utilities/loading.py @@ -3,7 +3,7 @@ import os import re import tempfile -from pathlib import Path +from pathlib import Path, PurePosixPath from typing import Any, Callable, Optional, Set, TypeVar, Union from urllib.parse import urljoin @@ -16,7 +16,6 @@ URL_BASE = os.environ.get( ) HUB_PATH_RE = re.compile(r"lc(?P@[^:]+)?://(?P.*)") - T = TypeVar("T") @@ -38,7 +37,13 @@ def try_load_from_hub( if remote_path.suffix[1:] not in valid_suffixes: raise ValueError("Unsupported file type.") - full_url = urljoin(URL_BASE.format(ref=ref), str(remote_path)) + # Using Path with URLs is not recommended, because on Windows + # the backslash is used as the path separator, which can cause issues + # when working with URLs that use forward slashes as the path separator. + # Instead, use PurePosixPath to ensure that forward slashes are used as the + # path separator, regardless of the operating system. + full_url = urljoin(URL_BASE.format(ref=ref), PurePosixPath(remote_path).__str__()) + r = requests.get(full_url, timeout=5) if r.status_code != 200: raise ValueError(f"Could not find file at {full_url}") diff --git a/poetry.lock b/poetry.lock index e4713722..0175549d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry and should not be changed by hand. +# This file is automatically @generated by Poetry 1.4.1 and should not be changed by hand. [[package]] name = "absl-py" @@ -6314,14 +6314,14 @@ files = [ [[package]] name = "setuptools" -version = "67.6.0" +version = "67.6.1" description = "Easily download, build, install, upgrade, and uninstall Python packages" category = "main" -optional = true +optional = false python-versions = ">=3.7" files = [ - {file = "setuptools-67.6.0-py3-none-any.whl", hash = "sha256:b78aaa36f6b90a074c1fa651168723acbf45d14cb1196b6f02c0fd07f17623b2"}, - {file = "setuptools-67.6.0.tar.gz", hash = "sha256:2ee892cd5f29f3373097f5a814697e397cf3ce313616df0af11231e2ad118077"}, + {file = "setuptools-67.6.1-py3-none-any.whl", hash = "sha256:e728ca814a823bf7bf60162daf9db95b93d532948c4c0bea762ce62f60189078"}, + {file = "setuptools-67.6.1.tar.gz", hash = "sha256:257de92a9d50a60b8e22abfcbb771571fde0dbf3ec234463212027a4eeecbe9a"}, ] [package.extras] @@ -6826,7 +6826,7 @@ files = [ ] [package.dependencies] -greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} +greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and platform_machine == \"aarch64\" or python_version >= \"3\" and platform_machine == \"ppc64le\" or python_version >= \"3\" and platform_machine == \"x86_64\" or python_version >= \"3\" and platform_machine == \"amd64\" or python_version >= \"3\" and platform_machine == \"AMD64\" or python_version >= \"3\" and platform_machine == \"win32\" or python_version >= \"3\" and platform_machine == \"WIN32\""} [package.extras] aiomysql = ["aiomysql", "greenlet (!=0.4.17)"] @@ -8486,10 +8486,10 @@ docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] [extras] -all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary"] -llms = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "manifest-ml", "torch", "transformers"] +all = ["aleph-alpha-client", "anthropic", "beautifulsoup4", "boto3", "cohere", "deeplake", "elasticsearch", "faiss-cpu", "google-api-python-client", "google-search-results", "huggingface_hub", "jina", "jinja2", "manifest-ml", "networkx", "nlpcloud", "nltk", "nomic", "openai", "opensearch-py", "pgvector", "pinecone-client", "psycopg2-binary", "pyowm", "pypdf", "qdrant-client", "redis", "sentence-transformers", "spacy", "tensorflow-text", "tiktoken", "torch", "transformers", "weaviate-client", "wikipedia", "wolframalpha"] +llms = ["anthropic", "cohere", "huggingface_hub", "manifest-ml", "nlpcloud", "openai", "torch", "transformers"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "3dd7ff0edb145aff1ba1ea7e35ffa4b224fb71f934be0e13d4427fd796e54869" +content-hash = "439617c4bd1c55ff951f0b79c5953ff2002e50aadd6452449ecd9d9e900bde17" diff --git a/pyproject.toml b/pyproject.toml index 25407850..f1c7de6e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -99,6 +99,7 @@ optional = true [tool.poetry.group.dev.dependencies] jupyter = "^1.0.0" playwright = "^1.28.0" +setuptools = "^67.6.1" [tool.poetry.extras] llms = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "manifest-ml", "torch", "transformers"]