Browse Source

[spankwire] Fix extraction

master
Sergey M․ 9 years ago
parent
commit
447053668f
1 changed files with 4 additions and 4 deletions
  1. 8
      youtube_dl/extractor/spankwire.py

8
youtube_dl/extractor/spankwire.py

@ -27,7 +27,7 @@ class SpankwireIE(InfoExtractor):
'description': 'Crazy Bitch X rated music video.', 'description': 'Crazy Bitch X rated music video.',
'uploader': 'oreusz', 'uploader': 'oreusz',
'uploader_id': '124697', 'uploader_id': '124697',
'upload_date': '20070508',
'upload_date': '20070507',
'age_limit': 18, 'age_limit': 18,
} }
} }
@ -44,7 +44,7 @@ class SpankwireIE(InfoExtractor):
title = self._html_search_regex( title = self._html_search_regex(
r'<h1>([^<]+)', webpage, 'title') r'<h1>([^<]+)', webpage, 'title')
description = self._html_search_regex( description = self._html_search_regex(
r'<div\s+id="descriptionContent">([^<]+)<',
r'(?s)<div\s+id="descriptionContent">(.+?)</div>',
webpage, 'description', fatal=False) webpage, 'description', fatal=False)
thumbnail = self._html_search_regex( thumbnail = self._html_search_regex(
r'playerData\.screenShot\s*=\s*["\']([^"\']+)["\']', r'playerData\.screenShot\s*=\s*["\']([^"\']+)["\']',
@ -64,12 +64,12 @@ class SpankwireIE(InfoExtractor):
r'<div id="viewsCounter"><span>([\d,\.]+)</span> views</div>', r'<div id="viewsCounter"><span>([\d,\.]+)</span> views</div>',
webpage, 'view count', fatal=False)) webpage, 'view count', fatal=False))
comment_count = str_to_int(self._html_search_regex( comment_count = str_to_int(self._html_search_regex(
r'Comments<span[^>]+>\s*\(([\d,\.]+)\)</span>',
r'<span\s+id="spCommentCount"[^>]*>([\d,\.]+)</span>',
webpage, 'comment count', fatal=False)) webpage, 'comment count', fatal=False))
video_urls = list(map( video_urls = list(map(
compat_urllib_parse.unquote, compat_urllib_parse.unquote,
re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*["\']([^"\']+)["\']', webpage)))
re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage)))
if webpage.find('flashvars\.encrypted = "true"') != -1: if webpage.find('flashvars\.encrypted = "true"') != -1:
password = self._search_regex( password = self._search_regex(
r'flashvars\.video_title = "([^"]+)', r'flashvars\.video_title = "([^"]+)',

Loading…
Cancel
Save