From 1e5d25b93cccce3e43ace37db74bc9347fb64c9d Mon Sep 17 00:00:00 2001 From: Andrea Pinto <53018727+andreakiro@users.noreply.github.com> Date: Fri, 12 May 2023 19:03:39 +0200 Subject: [PATCH] Improve error messages formatting in doc loaders (#4586) # Cosmetic in errors formatting Added appropriate spacing to the `ImportError` message in a bunch of document loaders to enhance trace readability (including Google Drive, Youtube, Confluence and others). This change ensures that the error messages are not displayed as a single line block, and that the `pip install xyz` commands can be copied to clipboard from terminal easily. ## Who can review? @eyurtsev --- langchain/document_loaders/confluence.py | 24 ++++++++++---------- langchain/document_loaders/googledrive.py | 4 ++-- langchain/document_loaders/image_captions.py | 6 ++--- langchain/document_loaders/onedrive.py | 2 +- langchain/document_loaders/pdf.py | 6 ++--- langchain/document_loaders/s3_file.py | 2 +- langchain/document_loaders/web_base.py | 2 +- langchain/document_loaders/youtube.py | 10 ++++---- 8 files changed, 28 insertions(+), 28 deletions(-) diff --git a/langchain/document_loaders/confluence.py b/langchain/document_loaders/confluence.py index 7920ff2c..40ad9eb6 100644 --- a/langchain/document_loaders/confluence.py +++ b/langchain/document_loaders/confluence.py @@ -92,7 +92,7 @@ class ConfluenceLoader(BaseLoader): from atlassian import Confluence # noqa: F401 except ImportError: raise ImportError( - "`atlassian` package not found, please run" + "`atlassian` package not found, please run " "`pip install atlassian-python-api`" ) @@ -124,13 +124,13 @@ class ConfluenceLoader(BaseLoader): if (api_key and not username) or (username and not api_key): errors.append( - "If one of `api_key` or `username` is provided," + "If one of `api_key` or `username` is provided, " "the other must be as well." ) if (api_key or username) and oauth2: errors.append( - "Cannot provide a value for `api_key` and/or" + "Cannot provide a value for `api_key` and/or " "`username` and provide a value for `oauth2`" ) @@ -141,8 +141,8 @@ class ConfluenceLoader(BaseLoader): "key_cert", ]: errors.append( - "You have either ommited require keys or added extra" - "keys to the oauth2 dictionary. key values should be" + "You have either ommited require keys or added extra " + "keys to the oauth2 dictionary. key values should be " "`['access_token', 'access_token_secret', 'consumer_key', 'key_cert']`" ) @@ -192,7 +192,7 @@ class ConfluenceLoader(BaseLoader): """ if not space_key and not page_ids and not label and not cql: raise ValueError( - "Must specify at least one among `space_key`, `page_ids`," + "Must specify at least one among `space_key`, `page_ids`, " "`label`, `cql` parameters." ) @@ -338,8 +338,8 @@ class ConfluenceLoader(BaseLoader): from bs4 import BeautifulSoup # type: ignore except ImportError: raise ImportError( - "`beautifulsoup4` package not found, please run" - " `pip install beautifulsoup4`" + "`beautifulsoup4` package not found, please run " + "`pip install beautifulsoup4`" ) if include_attachments: @@ -374,7 +374,7 @@ class ConfluenceLoader(BaseLoader): from PIL import Image # noqa: F401 except ImportError: raise ImportError( - "`pytesseract` or `pdf2image` or `Pillow` package not found," + "`pytesseract` or `pdf2image` or `Pillow` package not found, " "please run `pip install pytesseract pdf2image Pillow`" ) @@ -415,7 +415,7 @@ class ConfluenceLoader(BaseLoader): from pdf2image import convert_from_bytes # noqa: F401 except ImportError: raise ImportError( - "`pytesseract` or `pdf2image` package not found," + "`pytesseract` or `pdf2image` package not found, " "please run `pip install pytesseract pdf2image`" ) @@ -450,7 +450,7 @@ class ConfluenceLoader(BaseLoader): from PIL import Image # noqa: F401 except ImportError: raise ImportError( - "`pytesseract` or `Pillow` package not found," + "`pytesseract` or `Pillow` package not found, " "please run `pip install pytesseract Pillow`" ) @@ -531,7 +531,7 @@ class ConfluenceLoader(BaseLoader): from svglib.svglib import svg2rlg # noqa: F401 except ImportError: raise ImportError( - "`pytesseract`, `Pillow`, or `svglib` package not found," + "`pytesseract`, `Pillow`, or `svglib` package not found, " "please run `pip install pytesseract Pillow svglib`" ) diff --git a/langchain/document_loaders/googledrive.py b/langchain/document_loaders/googledrive.py index 4648b568..69fa190c 100644 --- a/langchain/document_loaders/googledrive.py +++ b/langchain/document_loaders/googledrive.py @@ -68,10 +68,10 @@ class GoogleDriveLoader(BaseLoader, BaseModel): from google_auth_oauthlib.flow import InstalledAppFlow except ImportError: raise ImportError( - "You must run" + "You must run " "`pip install --upgrade " "google-api-python-client google-auth-httplib2 " - "google-auth-oauthlib`" + "google-auth-oauthlib` " "to use the Google Drive loader." ) diff --git a/langchain/document_loaders/image_captions.py b/langchain/document_loaders/image_captions.py index f80d3aa7..1837ece6 100644 --- a/langchain/document_loaders/image_captions.py +++ b/langchain/document_loaders/image_captions.py @@ -40,8 +40,8 @@ class ImageCaptionLoader(BaseLoader): from transformers import BlipForConditionalGeneration, BlipProcessor except ImportError: raise ValueError( - "transformers package not found, please install with" - "`pip install transformers`" + "`transformers` package not found, please install with " + "`pip install transformers`." ) processor = BlipProcessor.from_pretrained(self.blip_processor) @@ -67,7 +67,7 @@ class ImageCaptionLoader(BaseLoader): from PIL import Image except ImportError: raise ValueError( - "PIL package not found, please install with `pip install pillow`" + "`PIL` package not found, please install with `pip install pillow`" ) try: diff --git a/langchain/document_loaders/onedrive.py b/langchain/document_loaders/onedrive.py index 8c30e26a..0d49902e 100644 --- a/langchain/document_loaders/onedrive.py +++ b/langchain/document_loaders/onedrive.py @@ -197,7 +197,7 @@ class OneDriveLoader(BaseLoader, BaseModel): file = drive.get_item(object_id) if not file: logging.warning( - "There isn't a file with" + "There isn't a file with " f"object_id {object_id} in drive {drive}." ) continue diff --git a/langchain/document_loaders/pdf.py b/langchain/document_loaders/pdf.py index e2763205..fe84e0c0 100644 --- a/langchain/document_loaders/pdf.py +++ b/langchain/document_loaders/pdf.py @@ -194,7 +194,7 @@ class PDFMinerLoader(BasePDFLoader): from pdfminer.high_level import extract_text # noqa:F401 except ImportError: raise ValueError( - "pdfminer package not found, please install it with " + "`pdfminer` package not found, please install it with " "`pip install pdfminer.six`" ) @@ -222,7 +222,7 @@ class PDFMinerPDFasHTMLLoader(BasePDFLoader): from pdfminer.high_level import extract_text_to_fp # noqa:F401 except ImportError: raise ValueError( - "pdfminer package not found, please install it with " + "`pdfminer` package not found, please install it with " "`pip install pdfminer.six`" ) @@ -256,7 +256,7 @@ class PyMuPDFLoader(BasePDFLoader): import fitz # noqa:F401 except ImportError: raise ValueError( - "PyMuPDF package not found, please install it with " + "`PyMuPDF` package not found, please install it with " "`pip install pymupdf`" ) diff --git a/langchain/document_loaders/s3_file.py b/langchain/document_loaders/s3_file.py index ec872a72..3625ed0e 100644 --- a/langchain/document_loaders/s3_file.py +++ b/langchain/document_loaders/s3_file.py @@ -22,7 +22,7 @@ class S3FileLoader(BaseLoader): import boto3 except ImportError: raise ValueError( - "Could not import boto3 python package. " + "Could not import `boto3` python package. " "Please install it with `pip install boto3`." ) s3 = boto3.client("s3") diff --git a/langchain/document_loaders/web_base.py b/langchain/document_loaders/web_base.py index 1d4e90c5..f39f361f 100644 --- a/langchain/document_loaders/web_base.py +++ b/langchain/document_loaders/web_base.py @@ -76,7 +76,7 @@ class WebBaseLoader(BaseLoader): self.session.headers = dict(headers) except ImportError: logger.info( - "fake_useragent not found, using default user agent." + "fake_useragent not found, using default user agent. " "To get a realistic header for requests, `pip install fake_useragent`." ) diff --git a/langchain/document_loaders/youtube.py b/langchain/document_loaders/youtube.py index ef65fb11..4f586576 100644 --- a/langchain/document_loaders/youtube.py +++ b/langchain/document_loaders/youtube.py @@ -70,8 +70,8 @@ class GoogleApiClient: "You must run" "`pip install --upgrade " "google-api-python-client google-auth-httplib2 " - "google-auth-oauthlib" - "youtube-transcript-api`" + "google-auth-oauthlib " + "youtube-transcript-api` " "to use the Google Drive loader" ) @@ -239,8 +239,8 @@ class GoogleApiYoutubeLoader(BaseLoader): "You must run" "`pip install --upgrade " "google-api-python-client google-auth-httplib2 " - "google-auth-oauthlib" - "youtube-transcript-api`" + "google-auth-oauthlib " + "youtube-transcript-api` " "to use the Google Drive loader" ) @@ -305,7 +305,7 @@ class GoogleApiYoutubeLoader(BaseLoader): raise ImportError( "You must run" "`pip install --upgrade " - "youtube-transcript-api`" + "youtube-transcript-api` " "to use the youtube loader" )