You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
DocsGPT/scripts/code_docs_gen.py

119 lines
3.4 KiB
Python

from pathlib import Path
from langchain.text_splitter import CharacterTextSplitter
import faiss
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
import pickle
import dotenv
import tiktoken
import sys
from argparse import ArgumentParser
import ast
import json
dotenv.load_dotenv()
ps = list(Path("inputs").glob("**/*.py"))
data = []
sources = []
for p in ps:
with open(p) as f:
data.append(f.read())
sources.append(p)
# with open('inputs/client.py', 'r') as f:
# tree = ast.parse(f.read())
# print(tree)
def get_functions_in_class(node):
functions = []
functions_code = []
for child in node.body:
if isinstance(child, ast.FunctionDef):
functions.append(child.name)
functions_code.append(ast.unparse(child))
return functions, functions_code
def get_classes_and_functions(source_code):
tree = ast.parse(source_code)
classes = {}
for node in tree.body:
if isinstance(node, ast.ClassDef):
class_name = node.name
function_name, function = get_functions_in_class(node)
# join function name and function code
functions = dict(zip(function_name, function))
classes[class_name] = functions
return classes
structure_dict = {}
c1 = 0
for code in data:
classes = get_classes_and_functions(ast.parse(code))
source = str(sources[c1])
structure_dict[source] = classes
c1 += 1
# save the structure dict as json
with open('structure_dict.json', 'w') as f:
json.dump(structure_dict, f)
# llm = OpenAI(temperature=0)
# prompt = PromptTemplate(
# input_variables=["code"],
# template="Code: {code}, Documentation: ",
# )
#
# print(prompt.format(code="print('hello world')"))
# print(llm(prompt.format(code="print('hello world')")))
if not Path("outputs").exists():
Path("outputs").mkdir()
c1 = len(structure_dict)
c2 = 0
for source, classes in structure_dict.items():
c2 += 1
print(f"Processing file {c2}/{c1}")
f1 = len(classes)
f2 = 0
for class_name, functions in classes.items():
f2 += 1
print(f"Processing class {f2}/{f1}")
source_w = source.replace("inputs/", "")
source_w = source_w.replace(".py", ".txt")
if not Path(f"outputs/{source_w}").exists():
with open(f"outputs/{source_w}", "w") as f:
f.write(f"Class: {class_name}")
else:
with open(f"outputs/{source_w}", "a") as f:
f.write(f"\n\nClass: {class_name}")
# append class name to the front
for function in functions:
b1 = len(functions)
b2 = 0
print(f"Processing function {b2}/{b1}")
b2 += 1
prompt = PromptTemplate(
input_variables=["code"],
template="Code: \n{code}, \nDocumentation: ",
)
llm = OpenAI(temperature=0)
response = llm(prompt.format(code=functions[function]))
if not Path(f"outputs/{source_w}").exists():
with open(f"outputs/{source_w}", "w") as f:
f.write(f"Function: {functions[function]}, \nDocumentation: {response}")
else:
with open(f"outputs/{source_w}", "a") as f:
f.write(f"\n\nFunction: {functions[function]}, \nDocumentation: {response}")