Browse Source
[YoutubeDL] Skip malformed formats for better extraction robustness
master
Sergey M․
7 years ago
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
1 changed files with
12 additions and
4 deletions
-
youtube_dl/YoutubeDL.py
|
|
@ -1448,17 +1448,25 @@ class YoutubeDL(object): |
|
|
|
if not formats: |
|
|
|
raise ExtractorError('No video formats found!') |
|
|
|
|
|
|
|
def is_wellformed(f): |
|
|
|
url = f.get('url') |
|
|
|
valid_url = url and isinstance(url, compat_str) |
|
|
|
if not valid_url: |
|
|
|
self.report_warning( |
|
|
|
'"url" field is missing or empty - skipping format, ' |
|
|
|
'there is an error in extractor') |
|
|
|
return valid_url |
|
|
|
|
|
|
|
# Filter out malformed formats for better extraction robustness |
|
|
|
formats = list(filter(is_wellformed, formats)) |
|
|
|
|
|
|
|
formats_dict = {} |
|
|
|
|
|
|
|
# We check that all the formats have the format and format_id fields |
|
|
|
for i, format in enumerate(formats): |
|
|
|
if 'url' not in format: |
|
|
|
raise ExtractorError('Missing "url" key in result (index %d)' % i) |
|
|
|
|
|
|
|
sanitize_string_field(format, 'format_id') |
|
|
|
sanitize_numeric_fields(format) |
|
|
|
format['url'] = sanitize_url(format['url']) |
|
|
|
|
|
|
|
if format.get('format_id') is None: |
|
|
|
format['format_id'] = compat_str(i) |
|
|
|
else: |
|
|
|