|
|
|
@ -58,7 +58,7 @@
|
|
|
|
|
"\n",
|
|
|
|
|
"retriever = GoogleDriveRetriever(\n",
|
|
|
|
|
" num_results=2,\n",
|
|
|
|
|
")"
|
|
|
|
|
")\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
@ -66,25 +66,26 @@
|
|
|
|
|
"id": "fa339ca0-f478-440c-ba80-0e5f41a19ce1",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"By default, all files with these mime-type can be converted to `Document`.\n",
|
|
|
|
|
"- text/text\n",
|
|
|
|
|
"- text/plain\n",
|
|
|
|
|
"- text/html\n",
|
|
|
|
|
"- text/csv\n",
|
|
|
|
|
"- text/markdown\n",
|
|
|
|
|
"- image/png\n",
|
|
|
|
|
"- image/jpeg\n",
|
|
|
|
|
"- application/epub+zip\n",
|
|
|
|
|
"- application/pdf\n",
|
|
|
|
|
"- application/rtf\n",
|
|
|
|
|
"- application/vnd.google-apps.document (GDoc)\n",
|
|
|
|
|
"- application/vnd.google-apps.presentation (GSlide)\n",
|
|
|
|
|
"- application/vnd.google-apps.spreadsheet (GSheet)\n",
|
|
|
|
|
"- application/vnd.google.colaboratory (Notebook colab)\n",
|
|
|
|
|
"- application/vnd.openxmlformats-officedocument.presentationml.presentation (PPTX)\n",
|
|
|
|
|
"- application/vnd.openxmlformats-officedocument.wordprocessingml.document (DOCX)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"It's possible to update or customize this. See the documentation of `GDriveRetriever`.\n",
|
|
|
|
|
"By default, all files with these MIME types can be converted to `Document`.\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"- `text/text`\n",
|
|
|
|
|
"- `text/plain`\n",
|
|
|
|
|
"- `text/html`\n",
|
|
|
|
|
"- `text/csv`\n",
|
|
|
|
|
"- `text/markdown`\n",
|
|
|
|
|
"- `image/png`\n",
|
|
|
|
|
"- `image/jpeg`\n",
|
|
|
|
|
"- `application/epub+zip`\n",
|
|
|
|
|
"- `application/pdf`\n",
|
|
|
|
|
"- `application/rtf`\n",
|
|
|
|
|
"- `application/vnd.google-apps.document` (GDoc)\n",
|
|
|
|
|
"- `application/vnd.google-apps.presentation` (GSlide)\n",
|
|
|
|
|
"- `application/vnd.google-apps.spreadsheet` (GSheet)\n",
|
|
|
|
|
"- `application/vnd.google.colaboratory` (Notebook colab)\n",
|
|
|
|
|
"- `application/vnd.openxmlformats-officedocument.presentationml.presentation` (PPTX)\n",
|
|
|
|
|
"- `application/vnd.openxmlformats-officedocument.wordprocessingml.document` (DOCX)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"It's possible to update or customize this. See the documentation of `GoogleDriveRetriever`.\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"But, the corresponding packages must be installed."
|
|
|
|
|
]
|
|
|
|
@ -96,7 +97,7 @@
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"#!pip install unstructured"
|
|
|
|
|
"#!pip install unstructured\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
@ -112,7 +113,7 @@
|
|
|
|
|
},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"retriever.get_relevant_documents(\"machine learning\")"
|
|
|
|
|
"retriever.get_relevant_documents(\"machine learning\")\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
@ -121,16 +122,17 @@
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"You can customize the criteria to select the files. A set of predefined filter are proposed:\n",
|
|
|
|
|
"| template | description |\n",
|
|
|
|
|
"| -------------------------------------- | --------------------------------------------------------------------- |\n",
|
|
|
|
|
"| gdrive-all-in-folder | Return all compatible files from a `folder_id` |\n",
|
|
|
|
|
"| gdrive-query | Search `query` in all drives |\n",
|
|
|
|
|
"| gdrive-by-name | Search file with name `query`) |\n",
|
|
|
|
|
"| gdrive-query-in-folder | Search `query` in `folder_id` (and sub-folders in `_recursive=true`) |\n",
|
|
|
|
|
"| gdrive-mime-type | Search a specific `mime_type` |\n",
|
|
|
|
|
"| gdrive-mime-type-in-folder | Search a specific `mime_type` in `folder_id` |\n",
|
|
|
|
|
"| gdrive-query-with-mime-type | Search `query` with a specific `mime_type` |\n",
|
|
|
|
|
"| gdrive-query-with-mime-type-and-folder | Search `query` with a specific `mime_type` and in `folder_id` |"
|
|
|
|
|
"\n",
|
|
|
|
|
"| Template | Description |\n",
|
|
|
|
|
"| -------------------------------------- | --------------------------------------------------------------------- |\n",
|
|
|
|
|
"| `gdrive-all-in-folder` | Return all compatible files from a `folder_id` |\n",
|
|
|
|
|
"| `gdrive-query` | Search `query` in all drives |\n",
|
|
|
|
|
"| `gdrive-by-name` | Search file with name `query` |\n",
|
|
|
|
|
"| `gdrive-query-in-folder` | Search `query` in `folder_id` (and sub-folders in `_recursive=true`) |\n",
|
|
|
|
|
"| `gdrive-mime-type` | Search a specific `mime_type` |\n",
|
|
|
|
|
"| `gdrive-mime-type-in-folder` | Search a specific `mime_type` in `folder_id` |\n",
|
|
|
|
|
"| `gdrive-query-with-mime-type` | Search `query` with a specific `mime_type` |\n",
|
|
|
|
|
"| `gdrive-query-with-mime-type-and-folder` | Search `query` with a specific `mime_type` and in `folder_id` |"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
@ -148,7 +150,7 @@
|
|
|
|
|
")\n",
|
|
|
|
|
"for doc in retriever.get_relevant_documents(\"machine learning\"):\n",
|
|
|
|
|
" print(\"---\")\n",
|
|
|
|
|
" print(doc.page_content.strip()[:60] + \"...\")"
|
|
|
|
|
" print(doc.page_content.strip()[:60] + \"...\")\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
@ -187,7 +189,7 @@
|
|
|
|
|
"for doc in retriever.get_relevant_documents(\"machine learning\"):\n",
|
|
|
|
|
" print(f\"{doc.metadata['name']}:\")\n",
|
|
|
|
|
" print(\"---\")\n",
|
|
|
|
|
" print(doc.page_content.strip()[:60] + \"...\")"
|
|
|
|
|
" print(doc.page_content.strip()[:60] + \"...\")\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
@ -219,7 +221,7 @@
|
|
|
|
|
" includeItemsFromAllDrives=False,\n",
|
|
|
|
|
" supportsAllDrives=False,\n",
|
|
|
|
|
")\n",
|
|
|
|
|
"retriever.get_relevant_documents(\"machine learning\")"
|
|
|
|
|
"retriever.get_relevant_documents(\"machine learning\")\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|