New optoin --restrict-filenames

12 years ago · 1c469a9480
7 changed files with 77 additions and 41 deletions
--- a/README.md
+++ b/README.md
@ -47,6 +47,8 @@ which means you can modify it, redistribute it or use it however you like.
                             %(extractor)s for the provider (youtube, metacafe,
                             etc), %(id)s for the video id and %% for a literal
                             percent. Use - to output to stdout.
    --restrict-filenames     Avoid some characters such as "&" and spaces in
                             filenames
    -a, --batch-file FILE    file containing URLs to download ('-' for stdin)
    -w, --no-overwrites      do not overwrite files
    -c, --continue           resume partially downloaded files
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -30,11 +30,34 @@ class TestUtil(unittest.TestCase):
 		self.assertEqual(u'yes no', sanitize_filename(u'yes? no'))
 		self.assertEqual(u'this - that', sanitize_filename(u'this: that'))
 		self.assertEqual(sanitize_filename(u'AT&T'), u'AT&T')
 		self.assertEqual(sanitize_filename(u'ä'), u'ä')
 		self.assertEqual(sanitize_filename(u'кириллица'), u'кириллица')
 		for forbidden in u'"\0\\/':
 			self.assertTrue(forbidden not in sanitize_filename(forbidden))
 		forbidden = u'"\0\\/'
 		for fc in forbidden:
 			for fbc in forbidden:
 				self.assertTrue(fbc not in sanitize_filename(fc))
 	def test_sanitize_filename_restricted(self):
 		self.assertEqual(sanitize_filename(u'abc', restricted=True), u'abc')
 		self.assertEqual(sanitize_filename(u'abc_d-e', restricted=True), u'abc_d-e')
 		self.assertEqual(sanitize_filename(u'123', restricted=True), u'123')
 		self.assertEqual(u'abc-de', sanitize_filename(u'abc/de', restricted=True))
 		self.assertFalse(u'/' in sanitize_filename(u'abc/de///', restricted=True))
 		self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de', restricted=True))
 		self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|', restricted=True))
 		self.assertEqual(u'yes_no', sanitize_filename(u'yes? no', restricted=True))
 		self.assertEqual(u'this_-_that', sanitize_filename(u'this: that', restricted=True))
 		forbidden = u'"\0\\/&: \'\t\n'
 		for fc in forbidden:
 			print('input: ' + fc + ', result: ' + repr(sanitize_filename(fc, restricted=True)))
 			for fbc in forbidden:
 				self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))
 	def test_ordered_set(self):
 		self.assertEqual(orderedSet([1,1,2,3,4,4,5,6,7,3,5]), [1,2,3,4,5,6,7])
--- a/youtube-dl.1
+++ b/youtube-dl.1
@ -59,6 +59,8 @@ redistribute it or use it however you like.
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe,
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout.
 --restrict-filenames\ \ \ \ \ Avoid\ some\ characters\ such\ as\ "&"\ and\ spaces\ in
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filenames
 -a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin)
 -w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files
 -c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files
@ -210,7 +212,7 @@ Please note that Python 2.5 is not supported anymore.
 .PP
 Since June 2012 (#342) youtube-dl is packed as an executable zipfile,
 simply unzip it (might need renaming to \f[C]youtube-dl.zip\f[] first on
 some systems) or clone the git repo to see the code.
 some systems) or clone the git repository, as laid out above.
 If you modify the code, you can run it by executing the
 \f[C]__main__.py\f[] file.
 To recompile the executable, run \f[C]make\ youtube-dl\f[].
--- a/youtube-dl.bash-completion
+++ b/youtube-dl.bash-completion
@ -3,7 +3,7 @@ __youtube-dl()
    local cur prev opts
    COMPREPLY=()
    cur="${COMP_WORDS[COMP_CWORD]}"
    opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt"
    opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --restrict-filenames --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt"
    if [[ ${cur} == * ]] ; then
        COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@ -44,37 +44,38 @@ class FileDownloader(object):
 	Available options:
 	username:         Username for authentication purposes.
 	password:         Password for authentication purposes.
 	usenetrc:         Use netrc for authentication instead.
 	quiet:            Do not print messages to stdout.
 	forceurl:         Force printing final URL.
 	forcetitle:       Force printing title.
 	forcethumbnail:   Force printing thumbnail URL.
 	forcedescription: Force printing description.
 	forcefilename:    Force printing final filename.
 	simulate:         Do not download the video files.
 	format:           Video format code.
 	format_limit:     Highest quality format to try.
 	outtmpl:          Template for output names.
 	ignoreerrors:     Do not stop on download errors.
 	ratelimit:        Download speed limit, in bytes/sec.
 	nooverwrites:     Prevent overwriting files.
 	retries:          Number of times to retry for HTTP error 5xx
 	continuedl:       Try to continue downloads if possible.
 	noprogress:       Do not print the progress bar.
 	playliststart:    Playlist item to start at.
 	playlistend:      Playlist item to end at.
 	matchtitle:       Download only matching titles.
 	rejecttitle:      Reject downloads for matching titles.
 	logtostderr:      Log messages to stderr instead of stdout.
 	consoletitle:     Display progress in console window's titlebar.
 	nopart:           Do not use temporary .part files.
 	updatetime:       Use the Last-modified header to set output file timestamps.
 	writedescription: Write the video description to a .description file
 	writeinfojson:    Write the video description to a .info.json file
 	writesubtitles:   Write the video subtitles to a .srt file
 	subtitleslang:    Language of the subtitles to download
 	username:          Username for authentication purposes.
 	password:          Password for authentication purposes.
 	usenetrc:          Use netrc for authentication instead.
 	quiet:             Do not print messages to stdout.
 	forceurl:          Force printing final URL.
 	forcetitle:        Force printing title.
 	forcethumbnail:    Force printing thumbnail URL.
 	forcedescription:  Force printing description.
 	forcefilename:     Force printing final filename.
 	simulate:          Do not download the video files.
 	format:            Video format code.
 	format_limit:      Highest quality format to try.
 	outtmpl:           Template for output names.
 	restrictfilenames: Do not allow "&" and spaces in file names
 	ignoreerrors:      Do not stop on download errors.
 	ratelimit:         Download speed limit, in bytes/sec.
 	nooverwrites:      Prevent overwriting files.
 	retries:           Number of times to retry for HTTP error 5xx
 	continuedl:        Try to continue downloads if possible.
 	noprogress:        Do not print the progress bar.
 	playliststart:     Playlist item to start at.
 	playlistend:       Playlist item to end at.
 	matchtitle:        Download only matching titles.
 	rejecttitle:       Reject downloads for matching titles.
 	logtostderr:       Log messages to stderr instead of stdout.
 	consoletitle:      Display progress in console window's titlebar.
 	nopart:            Do not use temporary .part files.
 	updatetime:        Use the Last-modified header to set output file timestamps.
 	writedescription:  Write the video description to a .description file
 	writeinfojson:     Write the video description to a .info.json file
 	writesubtitles:    Write the video subtitles to a .srt file
 	subtitleslang:     Language of the subtitles to download
 	"""
 	params = None
@ -349,7 +350,7 @@ class FileDownloader(object):
 	def process_info(self, info_dict):
 		"""Process a single dictionary returned by an InfoExtractor."""
 		info_dict['stitle'] = sanitize_filename(info_dict['title'])
 		info_dict['stitle'] = sanitize_filename(info_dict['title'], self.params.get('restrictfilenames'))
 		reason = self._match_entry(info_dict)
 		if reason is not None:
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -272,6 +272,9 @@ def parseOpts():
 			help='number downloaded files starting from 00000', default=False)
 	filesystem.add_option('-o', '--output',
 			dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.')
 	filesystem.add_option('--restrict-filenames',
 			action='store_true', dest='restrictfilenames',
 			help='Avoid some characters such as "&" and spaces in filenames', default=False)
 	filesystem.add_option('-a', '--batch-file',
 			dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
 	filesystem.add_option('-w', '--no-overwrites',
@ -485,6 +488,7 @@ def _real_main():
 			or (opts.useid and u'%(id)s.%(ext)s')
 			or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
 			or u'%(id)s.%(ext)s'),
 		'restrictfilenames': opts.restrictfilenames,
 		'ignoreerrors': opts.ignoreerrors,
 		'ratelimit': opts.ratelimit,
 		'nooverwrites': opts.nooverwrites,
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -194,18 +194,22 @@ def timeconvert(timestr):
 	if timetuple is not None:
 		timestamp = email.utils.mktime_tz(timetuple)
 	return timestamp
 def sanitize_filename(s):
 	"""Sanitizes a string so it could be used as part of a filename."""
 def sanitize_filename(s, restricted=False):
 	"""Sanitizes a string so it could be used as part of a filename.
 	If restricted is set, use a stricter subset of allowed characters.
 	"""
 	def replace_insane(char):
 		if char == '?' or ord(char) < 32 or ord(char) == 127:
 			return ''
 		elif char == '"':
 			return '\''
 			return '' if restricted else 'FOO\''
 		elif char == ':':
 			return ' -'
 			return '_-' if restricted else ' -'
 		elif char in '\\/|*<>':
 			return '-'
 		if restricted and (char in '&\'' or char.isspace()):
 			return '_'
 		return char
 	result = u''.join(map(replace_insane, s))