Browse Source

[pornhub] Fix extraction (closes #12007)

master
Thomas Christlieb 7 years ago
committed by Sergey M․
parent
commit
e64b0fca14
No known key found for this signature in database GPG Key ID: 2C393E0F18A9236D
1 changed files with 17 additions and 4 deletions
  1. 21
      youtube_dl/extractor/pornhub.py

21
youtube_dl/extractor/pornhub.py

@ -156,11 +156,24 @@ class PornHubIE(InfoExtractor):
comment_count = self._extract_count( comment_count = self._extract_count(
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
video_variables = {}
for video_variablename, quote, video_variable in re.findall(
r'(player_quality_[0-9]{3,4}p[0-9a-z]+?)=\s*(["\'])(.*?)\2;', webpage):
video_variables[video_variablename] = video_variable
encoded_video_urls = []
for encoded_video_url in re.findall(
r'player_quality_[0-9]{3,4}p\s*=(.*?);', webpage):
encoded_video_urls.append(encoded_video_url)
# Decode the URLs
video_urls = [] video_urls = []
for quote, video_url in re.findall(
r'player_quality_[0-9]{3,4}p\s*=\s*(["\'])(.+?)\1;', webpage):
video_urls.append(compat_urllib_parse_unquote(re.sub(
r'{0}\s*\+\s*{0}'.format(quote), '', video_url)))
for url in encoded_video_urls:
for varname, varval in video_variables.items():
url = url.replace(varname, varval)
url = url.replace('+', '')
url = url.replace(' ', '')
video_urls.append(url)
if webpage.find('"encrypted":true') != -1: if webpage.find('"encrypted":true') != -1:
password = compat_urllib_parse_unquote_plus( password = compat_urllib_parse_unquote_plus(

Loading…
Cancel
Save