fix: not able to read .pdf or .png

add supported_formats so that it only reads .txt and .md by default
pull/10/head
sean1832 1 year ago
parent a64ac2665f
commit 9b90ac26c6

@ -77,21 +77,33 @@ def parse_data(data, delimiter='', force=False):
return data
def read_files(file_dir, delimiter='', force=False, single_string=True, exclude_dir: list = None):
def read_files(file_dir, delimiter='', force=False, single_string=True, exclude_dir: list = None, supported_formats: list = None):
contents = []
if exclude_dir is None:
exclude_dir = []
if supported_formats is None:
supported_formats = ['.txt', '.md']
# Read all files in a directory
for root, dirs, files in os.walk(file_dir):
# Check if root is in excluded directories
if any(dir in root for dir in exclude_dir):
continue
for file in files:
print(f'Processing {file}...')
# extract file path
filepath = os.path.join(root, file)
# extract filename with extension
filename = os.path.basename(filepath)
# extract filename without extension
filename = os.path.splitext(filename)[0]
# Check if filepath contains any excluded directories
if any(dir in filepath for dir in exclude_dir):
continue
# Check if file extension is in supported formats
if not any(file.endswith(format) for format in supported_formats):
continue
file_data = read_file(filepath, delimiter, force)
if force and file_data == '':
continue

Loading…
Cancel
Save