Deal with implicitly UTF-16 decoded webpages

These webpages don't specify an encoding and rely on the BOM
11 years ago · b60016e831
1 changed files with 2 additions and 0 deletions
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -220,6 +220,8 @@ class InfoExtractor(object):
                          webpage_bytes[:1024])
            if m:
                encoding = m.group(1).decode('ascii')
+            elif webpage_bytes.startswith(b'\xff\xfe'):
+                encoding = 'utf-16'
            else:
                encoding = 'utf-8'
        if self._downloader.params.get('dump_intermediate_pages', False):