pull/21176/head
Harrison Chase 3 weeks ago
parent 1b191af362
commit 2e5322b8f3

@ -1,8 +1,12 @@
"""Loader that loads data from Sharepoint Document Library"""
from __future__ import annotations
import json
from pathlib import Path
from typing import Iterator, Optional, Sequence, List
from typing import Iterator, List, Optional, Sequence
import requests
from langchain_core.document_loaders import BaseLoader
from langchain_core.documents import Document
from langchain_core.pydantic_v1 import Field
@ -11,9 +15,6 @@ from langchain_community.document_loaders.base_o365 import (
_FileType,
)
from langchain_community.document_loaders.parsers.registry import get_parser
from langchain_core.document_loaders import BaseLoader
import requests
import json
class SharePointLoader(O365BaseLoader, BaseLoader):
@ -64,8 +65,10 @@ class SharePointLoader(O365BaseLoader, BaseLoader):
raise ValueError(f"There isn't a folder with path {self.folder_path}.")
for blob in self._load_from_folder(target_folder):
for parsed_blob in blob_parser.lazy_parse(blob):
auth_identities = self.authorized_identities(self.document_library_id, self.file_id)
parsed_blob.metadata['authorized_identities'] = auth_identities
auth_identities = self.authorized_identities(
self.document_library_id, self.file_id
)
parsed_blob.metadata["authorized_identities"] = auth_identities
yield parsed_blob
if self.folder_id:
target_folder = drive.get_item(self.folder_id)
@ -84,46 +87,42 @@ class SharePointLoader(O365BaseLoader, BaseLoader):
yield from blob_parser.lazy_parse(blob)
def authorized_identities(self, document_library_id, file_id):
data = self._fetch_access_token()
access_token = data.get('access_token')
access_token = data.get("access_token")
url = (
f"https://graph.microsoft.com/v1.0/sites/{self.site_id}/"
f"drives/{document_library_id}/items/{file_id}/permissions"
)
payload={}
payload = {}
headers = {
"Authorization": f"Bearer {access_token}"
}
headers = {"Authorization": f"Bearer {access_token}"}
response = requests.request("GET", url, headers=headers, data=payload)
groups_list = response.json()
group_names = []
for group_data in groups_list.get('value'):
if group_data.get('grantedToV2'):
if group_data.get('grantedToV2').get('siteGroup'):
site_data = group_data.get('grantedToV2').get('siteGroup')
for group_data in groups_list.get("value"):
if group_data.get("grantedToV2"):
if group_data.get("grantedToV2").get("siteGroup"):
site_data = group_data.get("grantedToV2").get("siteGroup")
# print(group_data)
group_names.append(site_data.get('displayName'))
elif group_data.get('grantedToV2').get('group') or (
group_data.get('grantedToV2').get('user')
group_names.append(site_data.get("displayName"))
elif group_data.get("grantedToV2").get("group") or (
group_data.get("grantedToV2").get("user")
):
site_data = group_data.get('grantedToV2').get('group') or (
group_data.get('grantedToV2').get('user')
site_data = group_data.get("grantedToV2").get("group") or (
group_data.get("grantedToV2").get("user")
)
# print(group_data)
group_names.append(site_data.get('displayName'))
group_names.append(site_data.get("displayName"))
return group_names
def _fetch_access_token(self):
def _fetch_access_token(self):
with open(self.token_path) as f:
s = f.read()
data = json.loads(s)
return data
return data

Loading…
Cancel
Save