Merge pull request #46 from arc53/hub-loader-cache

load vectors from DocsHUB
This commit is contained in:
Alex 2023-02-07 21:55:47 +00:00 committed by GitHub
commit 82fad506ce
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 107 additions and 52 deletions

4
.gitignore vendored
View File

@ -131,4 +131,6 @@ dmypy.json
.idea/
# macOS
.DS_Store
.DS_Store
application/vectors/

View File

@ -8,6 +8,7 @@ import faiss
from langchain import OpenAI
from langchain.chains import VectorDBQAWithSourcesChain
from langchain.prompts import PromptTemplate
import requests
# Redirect PosixPath to WindowsPath on Windows
import platform
@ -40,11 +41,13 @@ def api_answer():
api_key = data["api_key"]
# check if the vectorstore is set
if "active_docs" in data:
vectorstore = "vectorstores/" + data["active_docs"]
vectorstore = "vectors/" + data["active_docs"]
if data['active_docs'] == "default":
vectorstore = ""
else:
vectorstore = ""
print(vectorstore)
# loading the index and the store and the prompt template
index = faiss.read_index(f"{vectorstore}docs.index")
@ -73,6 +76,30 @@ def api_answer():
return result
@app.route("/api/docs_check", methods=["POST"])
def check_docs():
# check if docs exist in a vectorstore folder
data = request.get_json()
vectorstore = "vectors/" + data["docs"]
base_path = 'https://raw.githubusercontent.com/arc53/DocsHUB/main/'
#
if os.path.exists(vectorstore):
return {"status": 'exists'}
else:
r = requests.get(base_path + vectorstore + "docs.index")
# save to vectors directory
# check if the directory exists
if not os.path.exists(vectorstore):
os.makedirs(vectorstore)
with open(vectorstore + "docs.index", "wb") as f:
f.write(r.content)
# download the store
r = requests.get(base_path + vectorstore + "faiss_store.pkl")
with open(vectorstore + "faiss_store.pkl", "wb") as f:
f.write(r.content)
return {"status": 'loaded'}
# handling CORS
@app.after_request

View File

@ -680,6 +680,11 @@ video {
background-color: rgb(59 130 246 / var(--tw-bg-opacity));
}
.bg-gray-50 {
--tw-bg-opacity: 1;
background-color: rgb(249 250 251 / var(--tw-bg-opacity));
}
.bg-gray-900 {
--tw-bg-opacity: 1;
background-color: rgb(17 24 39 / var(--tw-bg-opacity));
@ -695,11 +700,6 @@ video {
background-color: rgb(229 231 235 / var(--tw-bg-opacity));
}
.bg-gray-50 {
--tw-bg-opacity: 1;
background-color: rgb(249 250 251 / var(--tw-bg-opacity));
}
.p-2 {
padding: 0.5rem;
}
@ -785,6 +785,16 @@ video {
color: rgb(17 24 39 / var(--tw-text-opacity));
}
.text-green-500 {
--tw-text-opacity: 1;
color: rgb(34 197 94 / var(--tw-text-opacity));
}
.text-red-500 {
--tw-text-opacity: 1;
color: rgb(239 68 68 / var(--tw-text-opacity));
}
.opacity-75 {
opacity: 0.75;
}
@ -867,43 +877,6 @@ video {
--tw-ring-color: rgb(59 130 246 / var(--tw-ring-opacity));
}
@media (prefers-color-scheme: dark) {
.dark\:border-gray-600 {
--tw-border-opacity: 1;
border-color: rgb(75 85 99 / var(--tw-border-opacity));
}
.dark\:bg-gray-700 {
--tw-bg-opacity: 1;
background-color: rgb(55 65 81 / var(--tw-bg-opacity));
}
.dark\:text-white {
--tw-text-opacity: 1;
color: rgb(255 255 255 / var(--tw-text-opacity));
}
.dark\:placeholder-gray-400::-moz-placeholder {
--tw-placeholder-opacity: 1;
color: rgb(156 163 175 / var(--tw-placeholder-opacity));
}
.dark\:placeholder-gray-400::placeholder {
--tw-placeholder-opacity: 1;
color: rgb(156 163 175 / var(--tw-placeholder-opacity));
}
.dark\:focus\:border-blue-500:focus {
--tw-border-opacity: 1;
border-color: rgb(59 130 246 / var(--tw-border-opacity));
}
.dark\:focus\:ring-blue-500:focus {
--tw-ring-opacity: 1;
--tw-ring-color: rgb(59 130 246 / var(--tw-ring-opacity));
}
}
@media (min-width: 640px) {
.sm\:my-8 {
margin-top: 2rem;

View File

@ -1,3 +1,29 @@
document.getElementById("select-docs").addEventListener("change", function() {
localStorage.setItem('activeDocs', this.value)
});
function docsIndex() {
// loads latest index from https://raw.githubusercontent.com/arc53/DocsHUB/main/combined.json
// and stores it in localStorage
fetch('https://raw.githubusercontent.com/arc53/DocsHUB/main/combined.json')
.then(response => response.json())
.then(data => {
console.log('Success:', data);
localStorage.setItem("docsIndex", JSON.stringify(data));
}
)
}
document.getElementById("select-docs").addEventListener("change", function() {
localStorage.setItem('activeDocs', this.value)
fetch('/api/docs_check', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({docs: this.value}),
}).then(response => response.json()).then(
data => {
console.log('Success:', data);
}
)
});

View File

@ -61,12 +61,10 @@ This will return a new DataFrame with all the columns from both tables, and only
<p class="text-sm">The source code is available on <a href="https://github.com/arc53/docsgpt" class="text-blue-500 hover:text-blue-800">Github</a></p><br>
<p class="text-sm">Currently It uses python pandas documentation, so it will respond to information relevant to pandas. If you want to train it on different documentation - <a href="https://github.com/arc53/docsgpt/wiki/How-to-train-on-other-documentation" class="text-blue-500 hover:text-blue-800"> please follow this guide </a></p><br>
<p class="text-sm">If you want to launch it on your own server - <a href="https://github.com/arc53/docsgpt/wiki/How-to-train-on-other-documentation" class="text-blue-500 hover:text-blue-800"> follow this guide </a></p><br>
<label class="block mb-2 text-sm font-medium text-gray-900">Select pre-loaded documentation</label>
<label class="block mb-2 text-sm font-medium text-gray-900">Select documentation from DocsHUB</label>
<select id="select-docs" class="bg-gray-50 border border-gray-300 text-gray-900 text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5">
<option selected>Choose documentation</option>
<option value="ethereum/solidity/">Solidity</option>
<option value="python/pandas/">Pandas</option>
<option value="python/scikit-learn/">scikit-learn</option>
<option value="default">Default</option>
</select>
</div>
</div>
@ -103,6 +101,35 @@ This will return a new DataFrame with all the columns from both tables, and only
console.log("apiKey is not set")
document.getElementById('modal').classList.toggle('hidden')
}
if (localStorage.getItem('docsIndex') === null) {
console.log("docsIndex is not set")
docsIndex()
}
docsIndex = localStorage.getItem('docsIndex')
// create option on select with id select-docs
var select = document.getElementById("select-docs");
// convert docsIndex to json
docsIndex = JSON.parse(docsIndex)
// create option for each key in docsIndex
for (var key in docsIndex) {
var option = document.createElement("option");
console.log(key)
console.log(docsIndex[key])
if (docsIndex[key].name == docsIndex[key].language) {
option.text = docsIndex[key].name + " " + docsIndex[key].version;
option.value = docsIndex[key].name + "/" + ".project" + "/" + docsIndex[key].version + "/";
select.add(option);
}
else {
option.text = docsIndex[key].name + " " + docsIndex[key].version;
option.value = docsIndex[key].language + "/" + docsIndex[key].name + "/" + docsIndex[key].version + "/";
select.add(option);
}
}
</script>
<script src="{{url_for('static',filename='src/authapi.js')}}"></script>
<script src="{{url_for('static',filename='src/chat.js')}}"></script>