wrap confluence attachment processing with a try-except block (#11503)

Prevents document loading from erroring out when an attachment is not
found at the url.

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
pull/11498/head^2
April 11 months ago committed by GitHub
parent 17439daa6a
commit c14a8df2ee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -541,26 +541,33 @@ class ConfluenceLoader(BaseLoader):
media_type = attachment["metadata"]["mediaType"] media_type = attachment["metadata"]["mediaType"]
absolute_url = self.base_url + attachment["_links"]["download"] absolute_url = self.base_url + attachment["_links"]["download"]
title = attachment["title"] title = attachment["title"]
if media_type == "application/pdf": try:
text = title + self.process_pdf(absolute_url, ocr_languages) if media_type == "application/pdf":
elif ( text = title + self.process_pdf(absolute_url, ocr_languages)
media_type == "image/png" elif (
or media_type == "image/jpg" media_type == "image/png"
or media_type == "image/jpeg" or media_type == "image/jpg"
): or media_type == "image/jpeg"
text = title + self.process_image(absolute_url, ocr_languages) ):
elif ( text = title + self.process_image(absolute_url, ocr_languages)
media_type == "application/vnd.openxmlformats-officedocument" elif (
".wordprocessingml.document" media_type == "application/vnd.openxmlformats-officedocument"
): ".wordprocessingml.document"
text = title + self.process_doc(absolute_url) ):
elif media_type == "application/vnd.ms-excel": text = title + self.process_doc(absolute_url)
text = title + self.process_xls(absolute_url) elif media_type == "application/vnd.ms-excel":
elif media_type == "image/svg+xml": text = title + self.process_xls(absolute_url)
text = title + self.process_svg(absolute_url, ocr_languages) elif media_type == "image/svg+xml":
else: text = title + self.process_svg(absolute_url, ocr_languages)
continue else:
texts.append(text) continue
texts.append(text)
except requests.HTTPError as e:
if e.response.status_code == 404:
print(f"Attachment not found at {absolute_url}")
continue
else:
raise
return texts return texts

Loading…
Cancel
Save