add twitter chat loader doc (#9737)

pull/9745/head
Harrison Chase 1 year ago committed by GitHub
parent 87da56fb1e
commit ade482c17e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,77 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "d86853d2",
"metadata": {},
"source": [
"# Twitter (via Apify)\n",
"\n",
"This notebook shows how to load chat messages from Twitter to finetune on. We do this by utilizing Apify. \n",
"\n",
"First, use Apify to export tweets. An example"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "e5034b4e",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"from langchain.schema import AIMessage\n",
"from langchain.adapters.openai import convert_message_to_dict"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "8bf0fb93",
"metadata": {},
"outputs": [],
"source": [
"with open('example_data/dataset_twitter-scraper_2023-08-23_22-13-19-740.json') as f:\n",
" data = json.load(f)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "468124fa",
"metadata": {},
"outputs": [],
"source": [
"# Filter out tweets that reference other tweets, because it's a bit weird\n",
"tweets = [d[\"full_text\"] for d in data if \"t.co\" not in d['full_text']]\n",
"# Create them as AI messages\n",
"messages = [AIMessage(content=t) for t in tweets]\n",
"# Add in a system message at the start\n",
"# TODO: we could try to extract the subject from the tweets, and put that in the system message.\n",
"system_message = {\"role\": \"system\", \"content\": \"write a tweet\"}\n",
"data = [[system_message, convert_message_to_dict(m)] for m in messages]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading…
Cancel
Save