Browse Source

[commonmistakes] Detect BOMs at the beginning of URLs

Reported at https://bugzilla.redhat.com/show_bug.cgi?id=1093517 .
master
Philipp Hagemeister 10 years ago
parent
commit
c73fae1e2e
2 changed files with 18 additions and 1 deletions
  1. 2
      youtube_dl/extractor/__init__.py
  2. 17
      youtube_dl/extractor/commonmistakes.py

2
youtube_dl/extractor/__init__.py

@ -74,7 +74,7 @@ from .collegehumor import CollegeHumorIE
from .collegerama import CollegeRamaIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .comcarcoff import ComCarCoffIE
from .commonmistakes import CommonMistakesIE
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
from .condenast import CondeNastIE
from .cracked import CrackedIE
from .criterion import CriterionIE

17
youtube_dl/extractor/commonmistakes.py

@ -27,3 +27,20 @@ class CommonMistakesIE(InfoExtractor):
if not self._downloader.params.get('verbose'):
msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.'
raise ExtractorError(msg, expected=True)
class UnicodeBOMIE(InfoExtractor):
IE_DESC = False
_VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
_TESTS = [{
'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
'only_matching': True,
}]
def _real_extract(self, url):
real_url = self._match_id(url)
self.report_warning(
'Your URL starts with a Byte Order Mark (BOM). '
'Removing the BOM and looking for "%s" ...' % real_url)
return self.url_result(real_url)
Loading…
Cancel
Save