Browse Source

[utils] js_to_json: various improvements

now JS object literals like { /* " */ 0: ",]\xaa<\/p>", } will be correctly converted to JSON.
master
felix 8 years ago
committed by Sergey M․
parent
commit
bd1e484448
No known key found for this signature in database GPG Key ID: 2C393E0F18A9236D
2 changed files with 28 additions and 14 deletions
  1. 12
      test/test_utils.py
  2. 30
      youtube_dl/utils.py

12
test/test_utils.py

@ -640,6 +640,18 @@ class TestUtil(unittest.TestCase):
on = js_to_json('{"abc": "def",}') on = js_to_json('{"abc": "def",}')
self.assertEqual(json.loads(on), {'abc': 'def'}) self.assertEqual(json.loads(on), {'abc': 'def'})
on = js_to_json('{ 0: /* " \n */ ",]" , }')
self.assertEqual(json.loads(on), {'0': ',]'})
on = js_to_json(r'["<p>x<\/p>"]')
self.assertEqual(json.loads(on), ['<p>x</p>'])
on = js_to_json(r'["\xaa"]')
self.assertEqual(json.loads(on), ['\u00aa'])
on = js_to_json("['a\\\nb']")
self.assertEqual(json.loads(on), ['ab'])
def test_extract_attributes(self): def test_extract_attributes(self):
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'}) self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})

30
youtube_dl/utils.py

@ -1914,24 +1914,26 @@ def js_to_json(code):
v = m.group(0) v = m.group(0)
if v in ('true', 'false', 'null'): if v in ('true', 'false', 'null'):
return v return v
if v.startswith('"'):
v = re.sub(r"\\'", "'", v[1:-1])
elif v.startswith("'"):
v = v[1:-1]
v = re.sub(r"\\\\|\\'|\"", lambda m: {
'\\\\': '\\\\',
"\\'": "'",
elif v.startswith('/*') or v == ',':
return ""
if v[0] in ("'", '"'):
v = re.sub(r'(?s)\\.|"', lambda m: {
'"': '\\"', '"': '\\"',
}[m.group(0)], v)
"\\'": "'",
'\\\n': '',
'\\x': '\\u00',
}.get(m.group(0), m.group(0)), v[1:-1])
return '"%s"' % v return '"%s"' % v
res = re.sub(r'''(?x)
"(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
'(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
[a-zA-Z_][.a-zA-Z_0-9]*
return re.sub(r'''(?sx)
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
'(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
/\*.*?\*/|,(?=\s*[\]}])|
[a-zA-Z_][.a-zA-Z_0-9]*|
[0-9]+(?=\s*:)
''', fix_kv, code) ''', fix_kv, code)
res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
return res
def qualities(quality_ids): def qualities(quality_ids):

Loading…
Cancel
Save