mirror of
https://github.com/hwchase17/langchain
synced 2024-11-04 06:00:26 +00:00
3a2eb6e12b
Added noqa for existing prints. Can slowly remove / will prevent more being intro'd
36 lines
1.0 KiB
Python
36 lines
1.0 KiB
Python
import os
|
|
from pathlib import Path
|
|
|
|
from langchain_community.vectorstores import Chroma
|
|
from langchain_experimental.open_clip import OpenCLIPEmbeddings
|
|
|
|
# Load images
|
|
img_dump_path = Path(__file__).parent / "docs/"
|
|
rel_img_dump_path = img_dump_path.relative_to(Path.cwd())
|
|
image_uris = sorted(
|
|
[
|
|
os.path.join(rel_img_dump_path, image_name)
|
|
for image_name in os.listdir(rel_img_dump_path)
|
|
if image_name.endswith(".jpg")
|
|
]
|
|
)
|
|
|
|
# Index
|
|
vectorstore = Path(__file__).parent / "chroma_db_multi_modal"
|
|
re_vectorstore_path = vectorstore.relative_to(Path.cwd())
|
|
|
|
# Load embedding function
|
|
print("Loading embedding function") # noqa: T201
|
|
embedding = OpenCLIPEmbeddings(model_name="ViT-H-14", checkpoint="laion2b_s32b_b79k")
|
|
|
|
# Create chroma
|
|
vectorstore_mmembd = Chroma(
|
|
collection_name="multi-modal-rag",
|
|
persist_directory=str(Path(__file__).parent / "chroma_db_multi_modal"),
|
|
embedding_function=embedding,
|
|
)
|
|
|
|
# Add images
|
|
print("Embedding images") # noqa: T201
|
|
vectorstore_mmembd.add_images(uris=image_uris)
|