mirror of https://github.com/arc53/DocsGPT
Calc + structure
parent
c92f5dba32
commit
0fb28e5213
@ -1,160 +0,0 @@
|
||||
from pathlib import Path
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
import faiss
|
||||
from langchain.vectorstores import FAISS
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.prompts import PromptTemplate
|
||||
import pickle
|
||||
import dotenv
|
||||
import tiktoken
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
import ast
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
|
||||
ps = list(Path("inputs").glob("**/*.py"))
|
||||
data = []
|
||||
sources = []
|
||||
for p in ps:
|
||||
with open(p) as f:
|
||||
data.append(f.read())
|
||||
sources.append(p)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def get_all_functions(source_code):
|
||||
tree = ast.parse(source_code)
|
||||
functions = {}
|
||||
for node in tree.body:
|
||||
if isinstance(node, ast.FunctionDef):
|
||||
functions[node.name] = ast.unparse(node)
|
||||
|
||||
return functions
|
||||
|
||||
def get_all_functions_names(node):
|
||||
functions = []
|
||||
for child in node.body:
|
||||
if isinstance(child, ast.FunctionDef):
|
||||
functions.append(child.name)
|
||||
return functions
|
||||
|
||||
|
||||
|
||||
def get_classes(source_code):
|
||||
tree = ast.parse(source_code)
|
||||
classes = {}
|
||||
for node in tree.body:
|
||||
if isinstance(node, ast.ClassDef):
|
||||
classes[node.name] = get_all_functions_names(node)
|
||||
return classes
|
||||
|
||||
def get_functions_in_class(source_code, class_name):
|
||||
tree = ast.parse(source_code)
|
||||
functions = []
|
||||
for node in tree.body:
|
||||
if isinstance(node, ast.ClassDef):
|
||||
if node.name == class_name:
|
||||
for function in node.body:
|
||||
if isinstance(function, ast.FunctionDef):
|
||||
functions.append(function.name)
|
||||
return functions
|
||||
|
||||
|
||||
functions_dict = {}
|
||||
classes_dict = {}
|
||||
c1 = 0
|
||||
for code in data:
|
||||
functions = get_all_functions(ast.parse(code))
|
||||
source = str(sources[c1])
|
||||
functions_dict[source] = functions
|
||||
classes = get_classes(code)
|
||||
classes_dict[source] = classes
|
||||
c1 += 1
|
||||
|
||||
|
||||
|
||||
if not Path("outputs").exists():
|
||||
Path("outputs").mkdir()
|
||||
|
||||
c1 = len(functions_dict)
|
||||
c2 = 0
|
||||
functions_dict = {}
|
||||
for source, functions in functions_dict.items():
|
||||
c2 += 1
|
||||
print(f"Processing file {c2}/{c1}")
|
||||
f1 = len(functions)
|
||||
f2 = 0
|
||||
source_w = source.replace("inputs/", "")
|
||||
source_w = source_w.replace(".py", ".md")
|
||||
# this is how we check subfolders
|
||||
if "/" in source_w:
|
||||
subfolders = source_w.split("/")
|
||||
subfolders = subfolders[:-1]
|
||||
subfolders = "/".join(subfolders)
|
||||
if not Path(f"outputs/{subfolders}").exists():
|
||||
Path(f"outputs/{subfolders}").mkdir(parents=True)
|
||||
|
||||
for name, function in functions.items():
|
||||
f2 += 1
|
||||
print(f"Processing function {f2}/{f1}")
|
||||
prompt = PromptTemplate(
|
||||
input_variables=["code"],
|
||||
template="Code: \n{code}, \nDocumentation: ",
|
||||
)
|
||||
llm = OpenAI(temperature=0)
|
||||
response = llm(prompt.format(code=function))
|
||||
|
||||
if not Path(f"outputs/{source_w}").exists():
|
||||
with open(f"outputs/{source_w}", "w") as f:
|
||||
f.write(f"# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")
|
||||
else:
|
||||
with open(f"outputs/{source_w}", "a") as f:
|
||||
f.write(f"\n\n# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")
|
||||
|
||||
|
||||
|
||||
c1 = len(classes_dict)
|
||||
c2 = 0
|
||||
|
||||
for source, classes in classes_dict.items():
|
||||
c2 += 1
|
||||
print(f"Processing file {c2}/{c1}")
|
||||
f1 = len(classes)
|
||||
f2 = 0
|
||||
source_w = source.replace("inputs/", "")
|
||||
source_w = source_w.replace(".py", ".md")
|
||||
|
||||
if "/" in source_w:
|
||||
subfolders = source_w.split("/")
|
||||
subfolders = subfolders[:-1]
|
||||
subfolders = "/".join(subfolders)
|
||||
if not Path(f"outputs/{subfolders}").exists():
|
||||
Path(f"outputs/{subfolders}").mkdir(parents=True)
|
||||
|
||||
for name, function_names in classes.items():
|
||||
print(f"Processing Class {f2}/{f1}")
|
||||
f2 += 1
|
||||
prompt = PromptTemplate(
|
||||
input_variables=["class_name", "functions_names"],
|
||||
template="Class name: {class_name} \nFunctions: {functions_names}, \nDocumentation: ",
|
||||
)
|
||||
llm = OpenAI(temperature=0)
|
||||
response = llm(prompt.format(class_name=name, functions_names=function_names))
|
||||
|
||||
if not Path(f"outputs/{source_w}").exists():
|
||||
with open(f"outputs/{source_w}", "w") as f:
|
||||
f.write(f"# Class name: {name} \n\nFunctions: \n{function_names}, \nDocumentation: \n{response}")
|
||||
else:
|
||||
with open(f"outputs/{source_w}", "a") as f:
|
||||
f.write(f"\n\n# Class name: {name} \n\nFunctions: \n{function_names}, \nDocumentation: \n{response}")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,167 @@
|
||||
# Function name: get_functions_in_class
|
||||
|
||||
Function:
|
||||
```
|
||||
def get_functions_in_class(source_code, class_name):
|
||||
tree = ast.parse(source_code)
|
||||
functions = []
|
||||
for node in tree.body:
|
||||
if isinstance(node, ast.ClassDef):
|
||||
if node.name == class_name:
|
||||
for function in node.body:
|
||||
if isinstance(function, ast.FunctionDef):
|
||||
functions.append(function.name)
|
||||
return functions
|
||||
```,
|
||||
Documentation:
|
||||
|
||||
|
||||
get_functions_in_class(source_code, class_name)
|
||||
|
||||
Inputs:
|
||||
source_code (str): The source code of the program.
|
||||
class_name (str): The name of the class.
|
||||
|
||||
Outputs:
|
||||
functions (list): A list of the functions in the class.
|
||||
|
||||
Description:
|
||||
This function takes in a source code and a class name and returns a list of the functions in the class. It uses the ast module to parse the source code and find the class definition. It then iterates through the body of the class and checks if each node is a function definition. If it is, it adds the name of the function to the list of functions.
|
||||
|
||||
# Function name: process_functions
|
||||
|
||||
Function:
|
||||
```
|
||||
def process_functions(functions_dict):
|
||||
c1 = len(functions_dict)
|
||||
c2 = 0
|
||||
for (source, functions) in functions_dict.items():
|
||||
c2 += 1
|
||||
print(f'Processing file {c2}/{c1}')
|
||||
f1 = len(functions)
|
||||
f2 = 0
|
||||
source_w = source.replace('inputs/', '')
|
||||
source_w = source_w.replace('.py', '.md')
|
||||
create_subfolder(source_w)
|
||||
for (name, function) in functions.items():
|
||||
f2 += 1
|
||||
print(f'Processing function {f2}/{f1}')
|
||||
response = generate_response(function)
|
||||
write_output_file(source_w, name, function, response)
|
||||
```,
|
||||
Documentation:
|
||||
|
||||
|
||||
This function takes in a dictionary of functions and processes them. It takes the source file and the functions from the dictionary and creates a subfolder for the source file. It then generates a response for each function and writes the output file. The output file contains the function, the response, and the source file.
|
||||
|
||||
# Function name: get_functions_in_class
|
||||
|
||||
Function:
|
||||
```
|
||||
def get_functions_in_class(source_code, class_name):
|
||||
tree = ast.parse(source_code)
|
||||
functions = []
|
||||
for node in tree.body:
|
||||
if isinstance(node, ast.ClassDef):
|
||||
if node.name == class_name:
|
||||
for function in node.body:
|
||||
if isinstance(function, ast.FunctionDef):
|
||||
functions.append(function.name)
|
||||
return functions
|
||||
```,
|
||||
Documentation:
|
||||
|
||||
|
||||
get_functions_in_class(source_code, class_name)
|
||||
|
||||
Inputs:
|
||||
source_code (str): The source code of the program.
|
||||
class_name (str): The name of the class.
|
||||
|
||||
Outputs:
|
||||
functions (list): A list of the functions in the class.
|
||||
|
||||
Description:
|
||||
This function takes in a source code and a class name and returns a list of the functions in the class. It uses the ast module to parse the source code and find the class definition. It then iterates through the body of the class and checks if each node is a function definition. If it is, it adds the name of the function to the list of functions.
|
||||
|
||||
# Function name: process_functions
|
||||
|
||||
Function:
|
||||
```
|
||||
def process_functions(functions_dict):
|
||||
c1 = len(functions_dict)
|
||||
c2 = 0
|
||||
for (source, functions) in functions_dict.items():
|
||||
c2 += 1
|
||||
print(f'Processing file {c2}/{c1}')
|
||||
f1 = len(functions)
|
||||
f2 = 0
|
||||
source_w = source.replace('inputs/', '')
|
||||
source_w = source_w.replace('.py', '.md')
|
||||
create_subfolder(source_w)
|
||||
for (name, function) in functions.items():
|
||||
f2 += 1
|
||||
print(f'Processing function {f2}/{f1}')
|
||||
response = generate_response(function)
|
||||
write_output_file(source_w, name, function, response)
|
||||
```,
|
||||
Documentation:
|
||||
|
||||
|
||||
This function takes in a dictionary of functions and processes them. It takes the source file and the functions from the dictionary and creates a subfolder for the source file. It then generates a response for each function and writes the output file for each function.
|
||||
|
||||
# Function name: get_functions_in_class
|
||||
|
||||
Function:
|
||||
```
|
||||
def get_functions_in_class(source_code, class_name):
|
||||
tree = ast.parse(source_code)
|
||||
functions = []
|
||||
for node in tree.body:
|
||||
if isinstance(node, ast.ClassDef):
|
||||
if node.name == class_name:
|
||||
for function in node.body:
|
||||
if isinstance(function, ast.FunctionDef):
|
||||
functions.append(function.name)
|
||||
return functions
|
||||
```,
|
||||
Documentation:
|
||||
|
||||
|
||||
get_functions_in_class(source_code, class_name)
|
||||
|
||||
Inputs:
|
||||
source_code (str): The source code of the program.
|
||||
class_name (str): The name of the class.
|
||||
|
||||
Outputs:
|
||||
functions (list): A list of the functions in the class.
|
||||
|
||||
Description:
|
||||
This function takes in a source code and a class name and returns a list of the functions in the class. It uses the ast module to parse the source code and find the class definition. It then iterates through the body of the class and checks if each node is a function definition. If it is, it adds the name of the function to the list of functions.
|
||||
|
||||
# Function name: process_functions
|
||||
|
||||
Function:
|
||||
```
|
||||
def process_functions(functions_dict):
|
||||
c1 = len(functions_dict)
|
||||
c2 = 0
|
||||
for (source, functions) in functions_dict.items():
|
||||
c2 += 1
|
||||
print(f'Processing file {c2}/{c1}')
|
||||
f1 = len(functions)
|
||||
f2 = 0
|
||||
source_w = source.replace('inputs/', '')
|
||||
source_w = source_w.replace('.py', '.md')
|
||||
create_subfolder(source_w)
|
||||
for (name, function) in functions.items():
|
||||
f2 += 1
|
||||
print(f'Processing function {f2}/{f1}')
|
||||
response = generate_response(function)
|
||||
write_output_file(source_w, name, function, response)
|
||||
```,
|
||||
Documentation:
|
||||
|
||||
|
||||
This function takes in a dictionary of functions and processes them. It takes the source file and the functions from the dictionary and creates a subfolder for the source file. It then generates a response for each function and writes the output file for each function.
|
@ -0,0 +1,155 @@
|
||||
from pathlib import Path
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.prompts import PromptTemplate
|
||||
import dotenv
|
||||
import ast
|
||||
import typer
|
||||
import tiktoken
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
def get_functions(source_code):
|
||||
tree = ast.parse(source_code)
|
||||
functions = {}
|
||||
for node in tree.body:
|
||||
if isinstance(node, ast.FunctionDef):
|
||||
functions[node.name] = ast.unparse(node)
|
||||
|
||||
return functions
|
||||
|
||||
def get_functions_names(node):
|
||||
functions = []
|
||||
for child in node.body:
|
||||
if isinstance(child, ast.FunctionDef):
|
||||
functions.append(child.name)
|
||||
return functions
|
||||
|
||||
|
||||
|
||||
def get_classes(source_code):
|
||||
tree = ast.parse(source_code)
|
||||
classes = {}
|
||||
for node in tree.body:
|
||||
if isinstance(node, ast.ClassDef):
|
||||
classes[node.name] = get_functions_names(node)
|
||||
return classes
|
||||
|
||||
def get_functions_in_class(source_code, class_name):
|
||||
tree = ast.parse(source_code)
|
||||
functions = []
|
||||
for node in tree.body:
|
||||
if isinstance(node, ast.ClassDef):
|
||||
if node.name == class_name:
|
||||
for function in node.body:
|
||||
if isinstance(function, ast.FunctionDef):
|
||||
functions.append(function.name)
|
||||
return functions
|
||||
|
||||
|
||||
def parse_functions(functions_dict):
|
||||
c1 = len(functions_dict)
|
||||
c2 = 0
|
||||
for source, functions in functions_dict.items():
|
||||
c2 += 1
|
||||
print(f"Processing file {c2}/{c1}")
|
||||
f1 = len(functions)
|
||||
f2 = 0
|
||||
source_w = source.replace("inputs/", "")
|
||||
source_w = source_w.replace(".py", ".md")
|
||||
# this is how we check subfolders
|
||||
if "/" in source_w:
|
||||
subfolders = source_w.split("/")
|
||||
subfolders = subfolders[:-1]
|
||||
subfolders = "/".join(subfolders)
|
||||
if not Path(f"outputs/{subfolders}").exists():
|
||||
Path(f"outputs/{subfolders}").mkdir(parents=True)
|
||||
|
||||
for name, function in functions.items():
|
||||
f2 += 1
|
||||
print(f"Processing function {f2}/{f1}")
|
||||
prompt = PromptTemplate(
|
||||
input_variables=["code"],
|
||||
template="Code: \n{code}, \nDocumentation: ",
|
||||
)
|
||||
llm = OpenAI(temperature=0)
|
||||
response = llm(prompt.format(code=function))
|
||||
|
||||
if not Path(f"outputs/{source_w}").exists():
|
||||
with open(f"outputs/{source_w}", "w") as f:
|
||||
f.write(f"# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")
|
||||
else:
|
||||
with open(f"outputs/{source_w}", "a") as f:
|
||||
f.write(f"\n\n# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")
|
||||
|
||||
|
||||
def parse_classes(classes_dict):
|
||||
c1 = len(classes_dict)
|
||||
c2 = 0
|
||||
for source, classes in classes_dict.items():
|
||||
c2 += 1
|
||||
print(f"Processing file {c2}/{c1}")
|
||||
f1 = len(classes)
|
||||
f2 = 0
|
||||
source_w = source.replace("inputs/", "")
|
||||
source_w = source_w.replace(".py", ".md")
|
||||
|
||||
if "/" in source_w:
|
||||
subfolders = source_w.split("/")
|
||||
subfolders = subfolders[:-1]
|
||||
subfolders = "/".join(subfolders)
|
||||
if not Path(f"outputs/{subfolders}").exists():
|
||||
Path(f"outputs/{subfolders}").mkdir(parents=True)
|
||||
|
||||
for name, function_names in classes.items():
|
||||
print(f"Processing Class {f2}/{f1}")
|
||||
f2 += 1
|
||||
prompt = PromptTemplate(
|
||||
input_variables=["class_name", "functions_names"],
|
||||
template="Class name: {class_name} \nFunctions: {functions_names}, \nDocumentation: ",
|
||||
)
|
||||
llm = OpenAI(temperature=0)
|
||||
response = llm(prompt.format(class_name=name, functions_names=function_names))
|
||||
|
||||
if not Path(f"outputs/{source_w}").exists():
|
||||
with open(f"outputs/{source_w}", "w") as f:
|
||||
f.write(f"# Class name: {name} \n\nFunctions: \n{function_names}, \nDocumentation: \n{response}")
|
||||
else:
|
||||
with open(f"outputs/{source_w}", "a") as f:
|
||||
f.write(f"\n\n# Class name: {name} \n\nFunctions: \n{function_names}, \nDocumentation: \n{response}")
|
||||
|
||||
|
||||
#User permission
|
||||
def transform_to_docs(functions_dict, classes_dict):
|
||||
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
||||
# Here we convert dicts to a string and calculate the number of OpenAI tokens the string represents.
|
||||
docs_content = ""
|
||||
for key, value in functions_dict.items():
|
||||
docs_content += str(key) + str(value)
|
||||
for key, value in classes_dict.items():
|
||||
docs_content += str(key) + str(value)
|
||||
|
||||
encoding = tiktoken.get_encoding("cl100k_base")
|
||||
num_tokens = len(encoding.encode(docs_content))
|
||||
total_price = ((num_tokens / 1000) * 0.02)
|
||||
|
||||
# Here we print the number of tokens and the approx user cost with some visually appealing formatting.
|
||||
print(f"Number of Tokens = {format(num_tokens, ',d')}")
|
||||
print(f"Approx Cost = ${format(total_price, ',.2f')}")
|
||||
#Here we check for user permission before calling the API.
|
||||
user_input = input("Price Okay? (Y/N) \n").lower()
|
||||
if user_input == "y":
|
||||
if not Path("outputs").exists():
|
||||
Path("outputs").mkdir()
|
||||
parse_functions(functions_dict)
|
||||
print("Functions done!")
|
||||
parse_classes(classes_dict)
|
||||
print("All done!")
|
||||
elif user_input == "":
|
||||
if not Path("outputs").exists():
|
||||
Path("outputs").mkdir()
|
||||
parse_functions(functions_dict)
|
||||
print("Functions done!")
|
||||
parse_classes(classes_dict)
|
||||
print("All done!")
|
||||
else:
|
||||
print("The API was not called. No money was spent.")
|
Loading…
Reference in New Issue