Browse Source

New optoin --restrict-filenames

master
Philipp Hagemeister 12 years ago
parent
commit
1c469a9480
7 changed files with 77 additions and 41 deletions
  1. 2
      README.md
  2. 27
      test/test_utils.py
  3. 4
      youtube-dl.1
  4. 2
      youtube-dl.bash-completion
  5. 65
      youtube_dl/FileDownloader.py
  6. 4
      youtube_dl/__init__.py
  7. 14
      youtube_dl/utils.py

2
README.md

@ -47,6 +47,8 @@ which means you can modify it, redistribute it or use it however you like.
%(extractor)s for the provider (youtube, metacafe, %(extractor)s for the provider (youtube, metacafe,
etc), %(id)s for the video id and %% for a literal etc), %(id)s for the video id and %% for a literal
percent. Use - to output to stdout. percent. Use - to output to stdout.
--restrict-filenames Avoid some characters such as "&" and spaces in
filenames
-a, --batch-file FILE file containing URLs to download ('-' for stdin) -a, --batch-file FILE file containing URLs to download ('-' for stdin)
-w, --no-overwrites do not overwrite files -w, --no-overwrites do not overwrite files
-c, --continue resume partially downloaded files -c, --continue resume partially downloaded files

27
test/test_utils.py

@ -30,11 +30,34 @@ class TestUtil(unittest.TestCase):
self.assertEqual(u'yes no', sanitize_filename(u'yes? no')) self.assertEqual(u'yes no', sanitize_filename(u'yes? no'))
self.assertEqual(u'this - that', sanitize_filename(u'this: that')) self.assertEqual(u'this - that', sanitize_filename(u'this: that'))
self.assertEqual(sanitize_filename(u'AT&T'), u'AT&T')
self.assertEqual(sanitize_filename(u'ä'), u'ä') self.assertEqual(sanitize_filename(u'ä'), u'ä')
self.assertEqual(sanitize_filename(u'кириллица'), u'кириллица') self.assertEqual(sanitize_filename(u'кириллица'), u'кириллица')
for forbidden in u'"\0\\/':
self.assertTrue(forbidden not in sanitize_filename(forbidden))
forbidden = u'"\0\\/'
for fc in forbidden:
for fbc in forbidden:
self.assertTrue(fbc not in sanitize_filename(fc))
def test_sanitize_filename_restricted(self):
self.assertEqual(sanitize_filename(u'abc', restricted=True), u'abc')
self.assertEqual(sanitize_filename(u'abc_d-e', restricted=True), u'abc_d-e')
self.assertEqual(sanitize_filename(u'123', restricted=True), u'123')
self.assertEqual(u'abc-de', sanitize_filename(u'abc/de', restricted=True))
self.assertFalse(u'/' in sanitize_filename(u'abc/de///', restricted=True))
self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de', restricted=True))
self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|', restricted=True))
self.assertEqual(u'yes_no', sanitize_filename(u'yes? no', restricted=True))
self.assertEqual(u'this_-_that', sanitize_filename(u'this: that', restricted=True))
forbidden = u'"\0\\/&: \'\t\n'
for fc in forbidden:
print('input: ' + fc + ', result: ' + repr(sanitize_filename(fc, restricted=True)))
for fbc in forbidden:
self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))
def test_ordered_set(self): def test_ordered_set(self):
self.assertEqual(orderedSet([1,1,2,3,4,4,5,6,7,3,5]), [1,2,3,4,5,6,7]) self.assertEqual(orderedSet([1,1,2,3,4,4,5,6,7,3,5]), [1,2,3,4,5,6,7])

4
youtube-dl.1

@ -59,6 +59,8 @@ redistribute it or use it however you like.
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe, \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe,
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout. \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout.
--restrict-filenames\ \ \ \ \ Avoid\ some\ characters\ such\ as\ "&"\ and\ spaces\ in
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filenames
-a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin) -a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin)
-w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files -w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files
-c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files -c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files
@ -210,7 +212,7 @@ Please note that Python 2.5 is not supported anymore.
.PP .PP
Since June 2012 (#342) youtube-dl is packed as an executable zipfile, Since June 2012 (#342) youtube-dl is packed as an executable zipfile,
simply unzip it (might need renaming to \f[C]youtube-dl.zip\f[] first on simply unzip it (might need renaming to \f[C]youtube-dl.zip\f[] first on
some systems) or clone the git repo to see the code.
some systems) or clone the git repository, as laid out above.
If you modify the code, you can run it by executing the If you modify the code, you can run it by executing the
\f[C]__main__.py\f[] file. \f[C]__main__.py\f[] file.
To recompile the executable, run \f[C]make\ youtube-dl\f[]. To recompile the executable, run \f[C]make\ youtube-dl\f[].

2
youtube-dl.bash-completion

@ -3,7 +3,7 @@ __youtube-dl()
local cur prev opts local cur prev opts
COMPREPLY=() COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}" cur="${COMP_WORDS[COMP_CWORD]}"
opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt"
opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --restrict-filenames --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt"
if [[ ${cur} == * ]] ; then if [[ ${cur} == * ]] ; then
COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )

65
youtube_dl/FileDownloader.py

@ -44,37 +44,38 @@ class FileDownloader(object):
Available options: Available options:
username: Username for authentication purposes.
password: Password for authentication purposes.
usenetrc: Use netrc for authentication instead.
quiet: Do not print messages to stdout.
forceurl: Force printing final URL.
forcetitle: Force printing title.
forcethumbnail: Force printing thumbnail URL.
forcedescription: Force printing description.
forcefilename: Force printing final filename.
simulate: Do not download the video files.
format: Video format code.
format_limit: Highest quality format to try.
outtmpl: Template for output names.
ignoreerrors: Do not stop on download errors.
ratelimit: Download speed limit, in bytes/sec.
nooverwrites: Prevent overwriting files.
retries: Number of times to retry for HTTP error 5xx
continuedl: Try to continue downloads if possible.
noprogress: Do not print the progress bar.
playliststart: Playlist item to start at.
playlistend: Playlist item to end at.
matchtitle: Download only matching titles.
rejecttitle: Reject downloads for matching titles.
logtostderr: Log messages to stderr instead of stdout.
consoletitle: Display progress in console window's titlebar.
nopart: Do not use temporary .part files.
updatetime: Use the Last-modified header to set output file timestamps.
writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file
writesubtitles: Write the video subtitles to a .srt file
subtitleslang: Language of the subtitles to download
username: Username for authentication purposes.
password: Password for authentication purposes.
usenetrc: Use netrc for authentication instead.
quiet: Do not print messages to stdout.
forceurl: Force printing final URL.
forcetitle: Force printing title.
forcethumbnail: Force printing thumbnail URL.
forcedescription: Force printing description.
forcefilename: Force printing final filename.
simulate: Do not download the video files.
format: Video format code.
format_limit: Highest quality format to try.
outtmpl: Template for output names.
restrictfilenames: Do not allow "&" and spaces in file names
ignoreerrors: Do not stop on download errors.
ratelimit: Download speed limit, in bytes/sec.
nooverwrites: Prevent overwriting files.
retries: Number of times to retry for HTTP error 5xx
continuedl: Try to continue downloads if possible.
noprogress: Do not print the progress bar.
playliststart: Playlist item to start at.
playlistend: Playlist item to end at.
matchtitle: Download only matching titles.
rejecttitle: Reject downloads for matching titles.
logtostderr: Log messages to stderr instead of stdout.
consoletitle: Display progress in console window's titlebar.
nopart: Do not use temporary .part files.
updatetime: Use the Last-modified header to set output file timestamps.
writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file
writesubtitles: Write the video subtitles to a .srt file
subtitleslang: Language of the subtitles to download
""" """
params = None params = None
@ -349,7 +350,7 @@ class FileDownloader(object):
def process_info(self, info_dict): def process_info(self, info_dict):
"""Process a single dictionary returned by an InfoExtractor.""" """Process a single dictionary returned by an InfoExtractor."""
info_dict['stitle'] = sanitize_filename(info_dict['title'])
info_dict['stitle'] = sanitize_filename(info_dict['title'], self.params.get('restrictfilenames'))
reason = self._match_entry(info_dict) reason = self._match_entry(info_dict)
if reason is not None: if reason is not None:

4
youtube_dl/__init__.py

@ -272,6 +272,9 @@ def parseOpts():
help='number downloaded files starting from 00000', default=False) help='number downloaded files starting from 00000', default=False)
filesystem.add_option('-o', '--output', filesystem.add_option('-o', '--output',
dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.') dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.')
filesystem.add_option('--restrict-filenames',
action='store_true', dest='restrictfilenames',
help='Avoid some characters such as "&" and spaces in filenames', default=False)
filesystem.add_option('-a', '--batch-file', filesystem.add_option('-a', '--batch-file',
dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
filesystem.add_option('-w', '--no-overwrites', filesystem.add_option('-w', '--no-overwrites',
@ -485,6 +488,7 @@ def _real_main():
or (opts.useid and u'%(id)s.%(ext)s') or (opts.useid and u'%(id)s.%(ext)s')
or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
or u'%(id)s.%(ext)s'), or u'%(id)s.%(ext)s'),
'restrictfilenames': opts.restrictfilenames,
'ignoreerrors': opts.ignoreerrors, 'ignoreerrors': opts.ignoreerrors,
'ratelimit': opts.ratelimit, 'ratelimit': opts.ratelimit,
'nooverwrites': opts.nooverwrites, 'nooverwrites': opts.nooverwrites,

14
youtube_dl/utils.py

@ -194,18 +194,22 @@ def timeconvert(timestr):
if timetuple is not None: if timetuple is not None:
timestamp = email.utils.mktime_tz(timetuple) timestamp = email.utils.mktime_tz(timetuple)
return timestamp return timestamp
def sanitize_filename(s):
"""Sanitizes a string so it could be used as part of a filename."""
def sanitize_filename(s, restricted=False):
"""Sanitizes a string so it could be used as part of a filename.
If restricted is set, use a stricter subset of allowed characters.
"""
def replace_insane(char): def replace_insane(char):
if char == '?' or ord(char) < 32 or ord(char) == 127: if char == '?' or ord(char) < 32 or ord(char) == 127:
return '' return ''
elif char == '"': elif char == '"':
return '\''
return '' if restricted else 'FOO\''
elif char == ':': elif char == ':':
return ' -'
return '_-' if restricted else ' -'
elif char in '\\/|*<>': elif char in '\\/|*<>':
return '-' return '-'
if restricted and (char in '&\'' or char.isspace()):
return '_'
return char return char
result = u''.join(map(replace_insane, s)) result = u''.join(map(replace_insane, s))

Loading…
Cancel
Save