progress

3 months ago · 030101b853
parent a1681987cd
commit 030101b853
1 changed files with 2 additions and 11 deletions
--- a/ollama-rag.py
+++ b/ollama-rag.py
@ -30,29 +30,20 @@ def include_page(page):
    return True # page['code'].startswith('2')

 def to_page_content(page):
-    # TODO build JSON here, maybe stick it into Mongo, (or use punctuation, \n or something)
    return json.dumps(page)

+# Processing documents from reading file to here takes 0.01 seconds
 docs = [[Document(page_content=to_page_content(page)) for page in noc_data if include_page(page)]]

-
-
 # Sources
 # https://www.youtube.com/watch?v=jENqvjpkwmw

 model_local = ChatOllama(model="mistral")

 # 1. Split data into chucks
-urls = [
-    "https://ollama.com",
-    "https://ollama.com/blog/windows-preview",
-    "https://ollama.com/blog/openai-compatibility",
-]
-
-# docs = [WebBaseLoader(url).load() for url in urls];

 flattened_docs = [item for sublist in docs for item in sublist]
-print(flattened_docs)
+# print(flattened_docs)
 print('total documents included = ', len(flattened_docs))
 # TODO chunck by rows, so one row is one cunck, build a JSON blob for each chunmkc so we get the 
 # structure of the different fields