|
|
|
@ -1,8 +1,12 @@
|
|
|
|
|
"""Loader that loads data from Sharepoint Document Library"""
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
from typing import Iterator, Optional, Sequence, List
|
|
|
|
|
from typing import Iterator, List, Optional, Sequence
|
|
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
from langchain_core.document_loaders import BaseLoader
|
|
|
|
|
from langchain_core.documents import Document
|
|
|
|
|
from langchain_core.pydantic_v1 import Field
|
|
|
|
|
|
|
|
|
@ -11,9 +15,6 @@ from langchain_community.document_loaders.base_o365 import (
|
|
|
|
|
_FileType,
|
|
|
|
|
)
|
|
|
|
|
from langchain_community.document_loaders.parsers.registry import get_parser
|
|
|
|
|
from langchain_core.document_loaders import BaseLoader
|
|
|
|
|
import requests
|
|
|
|
|
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SharePointLoader(O365BaseLoader, BaseLoader):
|
|
|
|
@ -64,8 +65,10 @@ class SharePointLoader(O365BaseLoader, BaseLoader):
|
|
|
|
|
raise ValueError(f"There isn't a folder with path {self.folder_path}.")
|
|
|
|
|
for blob in self._load_from_folder(target_folder):
|
|
|
|
|
for parsed_blob in blob_parser.lazy_parse(blob):
|
|
|
|
|
auth_identities = self.authorized_identities(self.document_library_id, self.file_id)
|
|
|
|
|
parsed_blob.metadata['authorized_identities'] = auth_identities
|
|
|
|
|
auth_identities = self.authorized_identities(
|
|
|
|
|
self.document_library_id, self.file_id
|
|
|
|
|
)
|
|
|
|
|
parsed_blob.metadata["authorized_identities"] = auth_identities
|
|
|
|
|
yield parsed_blob
|
|
|
|
|
if self.folder_id:
|
|
|
|
|
target_folder = drive.get_item(self.folder_id)
|
|
|
|
@ -84,46 +87,42 @@ class SharePointLoader(O365BaseLoader, BaseLoader):
|
|
|
|
|
yield from blob_parser.lazy_parse(blob)
|
|
|
|
|
|
|
|
|
|
def authorized_identities(self, document_library_id, file_id):
|
|
|
|
|
|
|
|
|
|
data = self._fetch_access_token()
|
|
|
|
|
|
|
|
|
|
access_token = data.get('access_token')
|
|
|
|
|
|
|
|
|
|
access_token = data.get("access_token")
|
|
|
|
|
|
|
|
|
|
url = (
|
|
|
|
|
f"https://graph.microsoft.com/v1.0/sites/{self.site_id}/"
|
|
|
|
|
f"drives/{document_library_id}/items/{file_id}/permissions"
|
|
|
|
|
)
|
|
|
|
|
payload={}
|
|
|
|
|
payload = {}
|
|
|
|
|
|
|
|
|
|
headers = {
|
|
|
|
|
"Authorization": f"Bearer {access_token}"
|
|
|
|
|
}
|
|
|
|
|
headers = {"Authorization": f"Bearer {access_token}"}
|
|
|
|
|
|
|
|
|
|
response = requests.request("GET", url, headers=headers, data=payload)
|
|
|
|
|
groups_list = response.json()
|
|
|
|
|
|
|
|
|
|
group_names = []
|
|
|
|
|
|
|
|
|
|
for group_data in groups_list.get('value'):
|
|
|
|
|
if group_data.get('grantedToV2'):
|
|
|
|
|
if group_data.get('grantedToV2').get('siteGroup'):
|
|
|
|
|
site_data = group_data.get('grantedToV2').get('siteGroup')
|
|
|
|
|
|
|
|
|
|
for group_data in groups_list.get("value"):
|
|
|
|
|
if group_data.get("grantedToV2"):
|
|
|
|
|
if group_data.get("grantedToV2").get("siteGroup"):
|
|
|
|
|
site_data = group_data.get("grantedToV2").get("siteGroup")
|
|
|
|
|
# print(group_data)
|
|
|
|
|
group_names.append(site_data.get('displayName'))
|
|
|
|
|
elif group_data.get('grantedToV2').get('group') or (
|
|
|
|
|
group_data.get('grantedToV2').get('user')
|
|
|
|
|
group_names.append(site_data.get("displayName"))
|
|
|
|
|
elif group_data.get("grantedToV2").get("group") or (
|
|
|
|
|
group_data.get("grantedToV2").get("user")
|
|
|
|
|
):
|
|
|
|
|
site_data = group_data.get('grantedToV2').get('group') or (
|
|
|
|
|
group_data.get('grantedToV2').get('user')
|
|
|
|
|
site_data = group_data.get("grantedToV2").get("group") or (
|
|
|
|
|
group_data.get("grantedToV2").get("user")
|
|
|
|
|
)
|
|
|
|
|
# print(group_data)
|
|
|
|
|
group_names.append(site_data.get('displayName'))
|
|
|
|
|
|
|
|
|
|
group_names.append(site_data.get("displayName"))
|
|
|
|
|
|
|
|
|
|
return group_names
|
|
|
|
|
|
|
|
|
|
def _fetch_access_token(self):
|
|
|
|
|
|
|
|
|
|
def _fetch_access_token(self):
|
|
|
|
|
with open(self.token_path) as f:
|
|
|
|
|
s = f.read()
|
|
|
|
|
data = json.loads(s)
|
|
|
|
|
return data
|
|
|
|
|
return data
|
|
|
|
|