2023-07-31 04:27:20 +00:00
|
|
|
import os
|
|
|
|
|
|
|
|
import py
|
|
|
|
import pytest
|
|
|
|
|
2023-12-11 21:53:30 +00:00
|
|
|
from langchain_community.document_loaders import GitLoader
|
2023-07-31 04:27:20 +00:00
|
|
|
|
|
|
|
|
|
|
|
def init_repo(tmpdir: py.path.local, dir_name: str) -> str:
|
|
|
|
from git import Repo
|
|
|
|
|
|
|
|
repo_dir = tmpdir.mkdir(dir_name)
|
|
|
|
repo = Repo.init(repo_dir)
|
|
|
|
git = repo.git
|
|
|
|
git.checkout(b="main")
|
|
|
|
|
|
|
|
git.config("user.name", "Test User")
|
|
|
|
git.config("user.email", "test@example.com")
|
|
|
|
|
|
|
|
sample_file = "file.txt"
|
|
|
|
with open(os.path.join(repo_dir, sample_file), "w") as f:
|
|
|
|
f.write("content")
|
|
|
|
git.add([sample_file])
|
|
|
|
git.commit(m="Initial commit")
|
|
|
|
|
Add support for structured data sources with google enterprise search (#9037)
<!-- Thank you for contributing to LangChain!
Replace this comment with:
- Description: Added the capability to handles structured data from
google enterprise search,
- Issue: Retriever failed when underline search engine was integrated
with structured data,
- Dependencies: google-api-core
- Tag maintainer: @jarokaz
- Twitter handle: anifort
Please make sure you're PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.
If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use.
Maintainer responsibilities:
- General / Misc / if you don't know who to tag: @baskaryan
- DataLoaders / VectorStores / Retrievers: @rlancemartin, @eyurtsev
- Models / Prompts: @hwchase17, @baskaryan
- Memory: @hwchase17
- Agents / Tools / Toolkits: @hinthornw
- Tracing / Callbacks: @agola11
- Async: @agola11
If no one reviews your PR within a few days, feel free to @-mention the
same people again.
See contribution guidelines for more information on how to write/run
tests, lint, etc:
https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md
-->
---------
Co-authored-by: Christos Aniftos <aniftos@google.com>
Co-authored-by: Holt Skinner <13262395+holtskinner@users.noreply.github.com>
Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
2023-08-23 03:18:10 +00:00
|
|
|
return str(repo_dir)
|
2023-07-31 04:27:20 +00:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.requires("git")
|
|
|
|
def test_load_twice(tmpdir: py.path.local) -> None:
|
|
|
|
"""
|
|
|
|
Test that loading documents twice from the same repository does not raise an error.
|
|
|
|
"""
|
|
|
|
|
|
|
|
clone_url = init_repo(tmpdir, "remote_repo")
|
|
|
|
|
|
|
|
repo_path = tmpdir.mkdir("local_repo").strpath
|
|
|
|
loader = GitLoader(repo_path=repo_path, clone_url=clone_url)
|
|
|
|
|
|
|
|
documents = loader.load()
|
|
|
|
assert len(documents) == 1
|
|
|
|
|
|
|
|
documents = loader.load()
|
|
|
|
assert len(documents) == 1
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.requires("git")
|
|
|
|
def test_clone_different_repo(tmpdir: py.path.local) -> None:
|
|
|
|
"""
|
|
|
|
Test that trying to clone a different repository into a directory already
|
|
|
|
containing a clone raises a ValueError.
|
|
|
|
"""
|
|
|
|
|
|
|
|
clone_url = init_repo(tmpdir, "remote_repo")
|
|
|
|
|
|
|
|
repo_path = tmpdir.mkdir("local_repo").strpath
|
|
|
|
loader = GitLoader(repo_path=repo_path, clone_url=clone_url)
|
|
|
|
|
|
|
|
documents = loader.load()
|
|
|
|
assert len(documents) == 1
|
|
|
|
|
|
|
|
other_clone_url = init_repo(tmpdir, "other_remote_repo")
|
|
|
|
other_loader = GitLoader(repo_path=repo_path, clone_url=other_clone_url)
|
|
|
|
with pytest.raises(ValueError):
|
|
|
|
other_loader.load()
|