wrap confluence attachment processing with a try-except block (#11503)

Prevents document loading from erroring out when an attachment is not
found at the url.

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
pull/11498/head^2
April 9 months ago committed by GitHub
parent 17439daa6a
commit c14a8df2ee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -541,26 +541,33 @@ class ConfluenceLoader(BaseLoader):
media_type = attachment["metadata"]["mediaType"]
absolute_url = self.base_url + attachment["_links"]["download"]
title = attachment["title"]
if media_type == "application/pdf":
text = title + self.process_pdf(absolute_url, ocr_languages)
elif (
media_type == "image/png"
or media_type == "image/jpg"
or media_type == "image/jpeg"
):
text = title + self.process_image(absolute_url, ocr_languages)
elif (
media_type == "application/vnd.openxmlformats-officedocument"
".wordprocessingml.document"
):
text = title + self.process_doc(absolute_url)
elif media_type == "application/vnd.ms-excel":
text = title + self.process_xls(absolute_url)
elif media_type == "image/svg+xml":
text = title + self.process_svg(absolute_url, ocr_languages)
else:
continue
texts.append(text)
try:
if media_type == "application/pdf":
text = title + self.process_pdf(absolute_url, ocr_languages)
elif (
media_type == "image/png"
or media_type == "image/jpg"
or media_type == "image/jpeg"
):
text = title + self.process_image(absolute_url, ocr_languages)
elif (
media_type == "application/vnd.openxmlformats-officedocument"
".wordprocessingml.document"
):
text = title + self.process_doc(absolute_url)
elif media_type == "application/vnd.ms-excel":
text = title + self.process_xls(absolute_url)
elif media_type == "image/svg+xml":
text = title + self.process_svg(absolute_url, ocr_languages)
else:
continue
texts.append(text)
except requests.HTTPError as e:
if e.response.status_code == 404:
print(f"Attachment not found at {absolute_url}")
continue
else:
raise
return texts

Loading…
Cancel
Save