Adding GCP Bigquery vector search with ChatGPT cookbook (#1344)

Co-authored-by: Aaron Wilkowitz <157151487+aaronwilkowitz-openai@users.noreply.github.com>
This commit is contained in:
pap-openai 2024-08-07 23:21:46 +01:00 committed by GitHub
parent 3f85e94f64
commit 872a322868
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 1037 additions and 0 deletions

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,77 @@
from google.cloud import bigquery
import functions_framework
import os
from openai import OpenAI
import json
openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
embeddings_model = os.getenv('EMBEDDINGS_MODEL')
project_id = os.getenv('PROJECT_ID')
dataset_id = os.getenv('DATASET_ID')
table_id = os.getenv('TABLE_ID')
def generate_embeddings(text, model):
print(f'Generating embedding for: {text}')
# Generate embeddings for the provided text using the specified model
embeddings_response = openai_client.embeddings.create(model=model, input=text)
# Extract the embedding data from the response
embedding = embeddings_response.data[0].embedding
return embedding
@functions_framework.http
def openai_docs_search(request):
print('received a request')
client = bigquery.Client()
request_json = request.get_json(silent=True)
print(request_json)
if not request_json:
return json.dumps({"error": "Invalid JSON in request"}), 400, {'Content-Type': 'application/json'}
query = request_json.get('query')
top_k = request_json.get('top_k', 3)
category = request_json.get('category', '')
if not query:
return json.dumps({"error": "Query parameter is required"}), 400, {'Content-Type': 'application/json'}
embedding_query = generate_embeddings(query, embeddings_model)
embedding_query_list = ', '.join(map(str, embedding_query))
sql_query = f"""
WITH search_results AS (
SELECT query.id AS query_id, base.id AS base_id, distance
FROM VECTOR_SEARCH(
TABLE `{project_id}.{dataset_id}.{table_id}`, 'content_vector',
(SELECT ARRAY[{embedding_query_list}] AS content_vector, 'query_vector' AS id),
top_k => {top_k}, distance_type => 'COSINE', options => '{{"use_brute_force": true}}')
)
SELECT sr.query_id, sr.base_id, sr.distance, ed.text, ed.title, ed.category
FROM search_results sr
JOIN `{project_id}.{dataset_id}.{table_id}` ed ON sr.base_id = ed.id
"""
if category:
sql_query += f" WHERE ed.category = '{category}'"
sql_query += " ORDER BY sr.distance;"
query_job = client.query(sql_query) # Make an API request.
rows = []
for row in query_job:
print(row.title)
rows.append({
"text": row.text,
"title": row.title,
"distance": row.distance,
"category": row.category
})
response = {
"items": rows
}
print('sending response')
print(len(rows))
return json.dumps(response), 200

View File

@ -0,0 +1,3 @@
google-cloud-bigquery
functions-framework
openai

View File

@ -1457,6 +1457,18 @@
- gpt-actions-library
- chatgpt
- title: GCP BigQuery Vector Search with GCP Functions and GPT Actions in ChatGPT
path: examples/chatgpt/rag-quickstart/gcp/Getting_started_with_bigquery_vector_search_and_openai.ipynb
date: 2024-08-02
authors:
- pap-openai
- maxreid-openai
tags:
- embeddings
- chatgpt
- tiktoken
- completions
- title: GPT Actions library - Zapier
path: examples/chatgpt/gpt_actions_library/gpt_action_zapier.ipynb
date: 2024-08-05