Added option --break-on-reject

and modified `--break-on-existing`
This commit is contained in:
pukkandan 2021-01-13 06:31:01 +05:30
parent 90505ff153
commit 8b0d7497d5
5 changed files with 82 additions and 54 deletions

View File

@ -250,8 +250,10 @@ ## Video Selection:
--download-archive FILE Download only videos not listed in the --download-archive FILE Download only videos not listed in the
archive file. Record the IDs of all archive file. Record the IDs of all
downloaded videos in it. downloaded videos in it.
--break-on-existing Stop the download process after attempting --break-on-existing Stop the download process when encountering
to download a file that's in the archive. a file that's in the archive.
--break-on-reject Stop the download process when encountering
a file that has been filtered out.
--no-download-archive Do not use archive file (default) --no-download-archive Do not use archive file (default)
--include-ads Download advertisements as well --include-ads Download advertisements as well
(experimental) (experimental)

View File

@ -58,6 +58,7 @@
encode_compat_str, encode_compat_str,
encodeFilename, encodeFilename,
error_to_compat_str, error_to_compat_str,
ExistingVideoReached,
expand_path, expand_path,
ExtractorError, ExtractorError,
format_bytes, format_bytes,
@ -81,6 +82,7 @@
register_socks_protocols, register_socks_protocols,
render_table, render_table,
replace_extension, replace_extension,
RejectedVideoReached,
SameFileError, SameFileError,
sanitize_filename, sanitize_filename,
sanitize_path, sanitize_path,
@ -232,6 +234,7 @@ class YoutubeDL(object):
again. again.
break_on_existing: Stop the download process after attempting to download a file that's break_on_existing: Stop the download process after attempting to download a file that's
in the archive. in the archive.
break_on_reject: Stop the download process when encountering a video that has been filtered out.
cookiefile: File name where cookies should be read from and dumped to. cookiefile: File name where cookies should be read from and dumped to.
nocheckcertificate:Do not verify SSL certificates nocheckcertificate:Do not verify SSL certificates
prefer_insecure: Use HTTP instead of HTTPS to retrieve information. prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
@ -797,44 +800,53 @@ def prepare_filename(self, info_dict):
def _match_entry(self, info_dict, incomplete): def _match_entry(self, info_dict, incomplete):
""" Returns None if the file should be downloaded """ """ Returns None if the file should be downloaded """
video_title = info_dict.get('title', info_dict.get('id', 'video')) def check_filter():
if 'title' in info_dict: video_title = info_dict.get('title', info_dict.get('id', 'video'))
# This can happen when we're just evaluating the playlist if 'title' in info_dict:
title = info_dict['title'] # This can happen when we're just evaluating the playlist
matchtitle = self.params.get('matchtitle', False) title = info_dict['title']
if matchtitle: matchtitle = self.params.get('matchtitle', False)
if not re.search(matchtitle, title, re.IGNORECASE): if matchtitle:
return '"' + title + '" title did not match pattern "' + matchtitle + '"' if not re.search(matchtitle, title, re.IGNORECASE):
rejecttitle = self.params.get('rejecttitle', False) return '"' + title + '" title did not match pattern "' + matchtitle + '"'
if rejecttitle: rejecttitle = self.params.get('rejecttitle', False)
if re.search(rejecttitle, title, re.IGNORECASE): if rejecttitle:
return '"' + title + '" title matched reject pattern "' + rejecttitle + '"' if re.search(rejecttitle, title, re.IGNORECASE):
date = info_dict.get('upload_date') return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
if date is not None: date = info_dict.get('upload_date')
dateRange = self.params.get('daterange', DateRange()) if date is not None:
if date not in dateRange: dateRange = self.params.get('daterange', DateRange())
return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) if date not in dateRange:
view_count = info_dict.get('view_count') return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
if view_count is not None: view_count = info_dict.get('view_count')
min_views = self.params.get('min_views') if view_count is not None:
if min_views is not None and view_count < min_views: min_views = self.params.get('min_views')
return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views) if min_views is not None and view_count < min_views:
max_views = self.params.get('max_views') return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
if max_views is not None and view_count > max_views: max_views = self.params.get('max_views')
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) if max_views is not None and view_count > max_views:
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
return 'Skipping "%s" because it is age restricted' % video_title if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
if self.in_download_archive(info_dict): return 'Skipping "%s" because it is age restricted' % video_title
return '%s has already been recorded in archive' % video_title if self.in_download_archive(info_dict):
return '%s has already been recorded in archive' % video_title
if not incomplete: if not incomplete:
match_filter = self.params.get('match_filter') match_filter = self.params.get('match_filter')
if match_filter is not None: if match_filter is not None:
ret = match_filter(info_dict) ret = match_filter(info_dict)
if ret is not None: if ret is not None:
return ret return ret
return None
return None reason = check_filter()
if reason is not None:
self.to_screen('[download] ' + reason)
if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
raise ExistingVideoReached()
elif self.params.get('break_on_reject'):
raise RejectedVideoReached()
return reason
@staticmethod @staticmethod
def add_extra_info(info_dict, extra_info): def add_extra_info(info_dict, extra_info):
@ -895,7 +907,7 @@ def wrapper(self, *args, **kwargs):
self.report_error(msg) self.report_error(msg)
except ExtractorError as e: # An error we somewhat expected except ExtractorError as e: # An error we somewhat expected
self.report_error(compat_str(e), e.format_traceback()) self.report_error(compat_str(e), e.format_traceback())
except MaxDownloadsReached: except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
raise raise
except Exception as e: except Exception as e:
if self.params.get('ignoreerrors', False): if self.params.get('ignoreerrors', False):
@ -1098,14 +1110,7 @@ def report_download(num_entries):
'extractor_key': ie_result['extractor_key'], 'extractor_key': ie_result['extractor_key'],
} }
reason = self._match_entry(entry, incomplete=True) self._match_entry(entry, incomplete=True)
if reason is not None:
if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')
break
else:
self.to_screen('[download] ' + reason)
continue
entry_result = self.__process_iterable_entry(entry, download, extra) entry_result = self.__process_iterable_entry(entry, download, extra)
# TODO: skip failed (empty) entries? # TODO: skip failed (empty) entries?
@ -1870,9 +1875,7 @@ def process_info(self, info_dict):
if 'format' not in info_dict: if 'format' not in info_dict:
info_dict['format'] = info_dict['ext'] info_dict['format'] = info_dict['ext']
reason = self._match_entry(info_dict, incomplete=False) if self._match_entry(info_dict, incomplete=False) is not None:
if reason is not None:
self.to_screen('[download] ' + reason)
return return
self._num_downloads += 1 self._num_downloads += 1
@ -2260,7 +2263,13 @@ def download(self, url_list):
except UnavailableVideoError: except UnavailableVideoError:
self.report_error('unable to download video') self.report_error('unable to download video')
except MaxDownloadsReached: except MaxDownloadsReached:
self.to_screen('[info] Maximum number of downloaded files reached.') self.to_screen('[info] Maximum number of downloaded files reached')
raise
except ExistingVideoReached:
self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
raise
except RejectedVideoReached:
self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
raise raise
else: else:
if self.params.get('dump_single_json', False): if self.params.get('dump_single_json', False):

View File

@ -26,11 +26,13 @@
decodeOption, decodeOption,
DEFAULT_OUTTMPL, DEFAULT_OUTTMPL,
DownloadError, DownloadError,
ExistingVideoReached,
expand_path, expand_path,
match_filter_func, match_filter_func,
MaxDownloadsReached, MaxDownloadsReached,
preferredencoding, preferredencoding,
read_batch_urls, read_batch_urls,
RejectedVideoReached,
SameFileError, SameFileError,
setproctitle, setproctitle,
std_headers, std_headers,
@ -449,6 +451,7 @@ def parse_retries(retries):
'age_limit': opts.age_limit, 'age_limit': opts.age_limit,
'download_archive': download_archive_fn, 'download_archive': download_archive_fn,
'break_on_existing': opts.break_on_existing, 'break_on_existing': opts.break_on_existing,
'break_on_reject': opts.break_on_reject,
'cookiefile': opts.cookiefile, 'cookiefile': opts.cookiefile,
'nocheckcertificate': opts.no_check_certificate, 'nocheckcertificate': opts.no_check_certificate,
'prefer_insecure': opts.prefer_insecure, 'prefer_insecure': opts.prefer_insecure,
@ -519,8 +522,8 @@ def parse_retries(retries):
retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename)) retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename))
else: else:
retcode = ydl.download(all_urls) retcode = ydl.download(all_urls)
except MaxDownloadsReached: except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
ydl.to_screen('--max-download limit reached, aborting.') ydl.to_screen('Aborting remaining downloads')
retcode = 101 retcode = 101
sys.exit(retcode) sys.exit(retcode)

View File

@ -367,7 +367,11 @@ def _comma_separated_values_options_callback(option, opt_str, value, parser):
selection.add_option( selection.add_option(
'--break-on-existing', '--break-on-existing',
action='store_true', dest='break_on_existing', default=False, action='store_true', dest='break_on_existing', default=False,
help="Stop the download process after attempting to download a file that's in the archive.") help="Stop the download process when encountering a file that's in the archive.")
selection.add_option(
'--break-on-reject',
action='store_true', dest='break_on_reject', default=False,
help="Stop the download process when encountering a file that has been filtered out.")
selection.add_option( selection.add_option(
'--no-download-archive', '--no-download-archive',
dest='download_archive', action="store_const", const=None, dest='download_archive', action="store_const", const=None,

View File

@ -2433,6 +2433,16 @@ def __init__(self, msg):
self.msg = msg self.msg = msg
class ExistingVideoReached(YoutubeDLError):
""" --max-downloads limit has been reached. """
pass
class RejectedVideoReached(YoutubeDLError):
""" --max-downloads limit has been reached. """
pass
class MaxDownloadsReached(YoutubeDLError): class MaxDownloadsReached(YoutubeDLError):
""" --max-downloads limit has been reached. """ """ --max-downloads limit has been reached. """
pass pass