mirror of
https://github.com/openai/openai-cookbook
synced 2024-11-11 13:11:02 +00:00
Adding GCP Bigquery vector search with ChatGPT cookbook (#1344)
Co-authored-by: Aaron Wilkowitz <157151487+aaronwilkowitz-openai@users.noreply.github.com>
This commit is contained in:
parent
3f85e94f64
commit
872a322868
File diff suppressed because one or more lines are too long
77
examples/chatgpt/rag-quickstart/gcp/main.py
Normal file
77
examples/chatgpt/rag-quickstart/gcp/main.py
Normal file
@ -0,0 +1,77 @@
|
||||
from google.cloud import bigquery
|
||||
import functions_framework
|
||||
import os
|
||||
from openai import OpenAI
|
||||
import json
|
||||
|
||||
openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
embeddings_model = os.getenv('EMBEDDINGS_MODEL')
|
||||
project_id = os.getenv('PROJECT_ID')
|
||||
dataset_id = os.getenv('DATASET_ID')
|
||||
table_id = os.getenv('TABLE_ID')
|
||||
|
||||
def generate_embeddings(text, model):
|
||||
print(f'Generating embedding for: {text}')
|
||||
# Generate embeddings for the provided text using the specified model
|
||||
embeddings_response = openai_client.embeddings.create(model=model, input=text)
|
||||
# Extract the embedding data from the response
|
||||
embedding = embeddings_response.data[0].embedding
|
||||
return embedding
|
||||
|
||||
@functions_framework.http
|
||||
def openai_docs_search(request):
|
||||
print('received a request')
|
||||
client = bigquery.Client()
|
||||
|
||||
request_json = request.get_json(silent=True)
|
||||
print(request_json)
|
||||
|
||||
if not request_json:
|
||||
return json.dumps({"error": "Invalid JSON in request"}), 400, {'Content-Type': 'application/json'}
|
||||
|
||||
query = request_json.get('query')
|
||||
top_k = request_json.get('top_k', 3)
|
||||
category = request_json.get('category', '')
|
||||
|
||||
if not query:
|
||||
return json.dumps({"error": "Query parameter is required"}), 400, {'Content-Type': 'application/json'}
|
||||
|
||||
embedding_query = generate_embeddings(query, embeddings_model)
|
||||
embedding_query_list = ', '.join(map(str, embedding_query))
|
||||
|
||||
sql_query = f"""
|
||||
WITH search_results AS (
|
||||
SELECT query.id AS query_id, base.id AS base_id, distance
|
||||
FROM VECTOR_SEARCH(
|
||||
TABLE `{project_id}.{dataset_id}.{table_id}`, 'content_vector',
|
||||
(SELECT ARRAY[{embedding_query_list}] AS content_vector, 'query_vector' AS id),
|
||||
top_k => {top_k}, distance_type => 'COSINE', options => '{{"use_brute_force": true}}')
|
||||
)
|
||||
SELECT sr.query_id, sr.base_id, sr.distance, ed.text, ed.title, ed.category
|
||||
FROM search_results sr
|
||||
JOIN `{project_id}.{dataset_id}.{table_id}` ed ON sr.base_id = ed.id
|
||||
"""
|
||||
|
||||
if category:
|
||||
sql_query += f" WHERE ed.category = '{category}'"
|
||||
|
||||
sql_query += " ORDER BY sr.distance;"
|
||||
|
||||
query_job = client.query(sql_query) # Make an API request.
|
||||
|
||||
rows = []
|
||||
for row in query_job:
|
||||
print(row.title)
|
||||
rows.append({
|
||||
"text": row.text,
|
||||
"title": row.title,
|
||||
"distance": row.distance,
|
||||
"category": row.category
|
||||
})
|
||||
|
||||
response = {
|
||||
"items": rows
|
||||
}
|
||||
print('sending response')
|
||||
print(len(rows))
|
||||
return json.dumps(response), 200
|
3
examples/chatgpt/rag-quickstart/gcp/requirements.txt
Normal file
3
examples/chatgpt/rag-quickstart/gcp/requirements.txt
Normal file
@ -0,0 +1,3 @@
|
||||
google-cloud-bigquery
|
||||
functions-framework
|
||||
openai
|
@ -1457,6 +1457,18 @@
|
||||
- gpt-actions-library
|
||||
- chatgpt
|
||||
|
||||
- title: GCP BigQuery Vector Search with GCP Functions and GPT Actions in ChatGPT
|
||||
path: examples/chatgpt/rag-quickstart/gcp/Getting_started_with_bigquery_vector_search_and_openai.ipynb
|
||||
date: 2024-08-02
|
||||
authors:
|
||||
- pap-openai
|
||||
- maxreid-openai
|
||||
tags:
|
||||
- embeddings
|
||||
- chatgpt
|
||||
- tiktoken
|
||||
- completions
|
||||
|
||||
- title: GPT Actions library - Zapier
|
||||
path: examples/chatgpt/gpt_actions_library/gpt_action_zapier.ipynb
|
||||
date: 2024-08-05
|
||||
|
Loading…
Reference in New Issue
Block a user