You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
gpt4free/interference/app.py

163 lines
5.2 KiB
Python

import json
11 months ago
import time
import random
import string
import requests
11 months ago
from typing import Any
from flask import Flask, request
from flask_cors import CORS
from transformers import AutoTokenizer
11 months ago
from g4f import ChatCompletion
app = Flask(__name__)
CORS(app)
11 months ago
@app.route('/chat/completions', methods=['POST'])
def chat_completions():
11 months ago
model = request.get_json().get('model', 'gpt-3.5-turbo')
stream = request.get_json().get('stream', False)
messages = request.get_json().get('messages')
11 months ago
response = ChatCompletion.create(model = model,
stream = stream, messages = messages)
11 months ago
completion_id = ''.join(random.choices(string.ascii_letters + string.digits, k=28))
completion_timestamp = int(time.time())
if not stream:
return {
11 months ago
'id': f'chatcmpl-{completion_id}',
'object': 'chat.completion',
'created': completion_timestamp,
'model': model,
'choices': [
{
11 months ago
'index': 0,
'message': {
'role': 'assistant',
'content': response,
},
11 months ago
'finish_reason': 'stop',
}
],
11 months ago
'usage': {
'prompt_tokens': None,
'completion_tokens': None,
'total_tokens': None,
},
}
def streaming():
for chunk in response:
completion_data = {
11 months ago
'id': f'chatcmpl-{completion_id}',
'object': 'chat.completion.chunk',
'created': completion_timestamp,
'model': model,
'choices': [
{
11 months ago
'index': 0,
'delta': {
'content': chunk,
},
11 months ago
'finish_reason': None,
}
],
}
11 months ago
content = json.dumps(completion_data, separators=(',', ':'))
yield f'data: {content}\n\n'
time.sleep(0.1)
end_completion_data: dict[str, Any] = {
11 months ago
'id': f'chatcmpl-{completion_id}',
'object': 'chat.completion.chunk',
'created': completion_timestamp,
'model': model,
'choices': [
{
11 months ago
'index': 0,
'delta': {},
'finish_reason': 'stop',
}
],
}
11 months ago
content = json.dumps(end_completion_data, separators=(',', ':'))
yield f'data: {content}\n\n'
11 months ago
return app.response_class(streaming(), mimetype='text/event-stream')
11 months ago
# Get the embedding from huggingface
def get_embedding(input_text, token):
huggingface_token = token
11 months ago
embedding_model = 'sentence-transformers/all-mpnet-base-v2'
max_token_length = 500
11 months ago
# Load the tokenizer for the 'all-mpnet-base-v2' model
tokenizer = AutoTokenizer.from_pretrained(embedding_model)
# Tokenize the text and split the tokens into chunks of 500 tokens each
tokens = tokenizer.tokenize(input_text)
11 months ago
token_chunks = [tokens[i:i + max_token_length]
for i in range(0, len(tokens), max_token_length)]
# Initialize an empty list
embeddings = []
# Create embeddings for each chunk
for chunk in token_chunks:
# Convert the chunk tokens back to text
chunk_text = tokenizer.convert_tokens_to_string(chunk)
# Use the Hugging Face API to get embeddings for the chunk
11 months ago
api_url = f'https://api-inference.huggingface.co/pipeline/feature-extraction/{embedding_model}'
headers = {'Authorization': f'Bearer {huggingface_token}'}
chunk_text = chunk_text.replace('\n', ' ')
# Make a POST request to get the chunk's embedding
11 months ago
response = requests.post(api_url, headers=headers, json={
'inputs': chunk_text, 'options': {'wait_for_model': True}})
# Parse the response and extract the embedding
chunk_embedding = response.json()
# Append the embedding to the list
embeddings.append(chunk_embedding)
11 months ago
# averaging all the embeddings
# this isn't very effective
# someone a better idea?
num_embeddings = len(embeddings)
average_embedding = [sum(x) / num_embeddings for x in zip(*embeddings)]
embedding = average_embedding
return embedding
11 months ago
@app.route('/embeddings', methods=['POST'])
def embeddings():
11 months ago
input_text_list = request.get_json().get('input')
input_text = ' '.join(map(str, input_text_list))
token = request.headers.get('Authorization').replace('Bearer ', '')
embedding = get_embedding(input_text, token)
return {
11 months ago
'data': [
{
'embedding': embedding,
'index': 0,
'object': 'embedding'
}
],
'model': 'text-embedding-ada-002',
'object': 'list',
'usage': {
'prompt_tokens': None,
'total_tokens': None
}
}
def main():
11 months ago
app.run(host='0.0.0.0', port=1337, debug=True)
11 months ago
if __name__ == '__main__':
12 months ago
main()