Browse Source

[pornhub:playlist] Improve extraction (closes #11594)

master
Sergey M․ 8 years ago
parent
commit
96d315c2be
No known key found for this signature in database GPG Key ID: 2C393E0F18A9236D
1 changed files with 12 additions and 5 deletions
  1. 17
      youtube_dl/extractor/pornhub.py

17
youtube_dl/extractor/pornhub.py

@ -229,7 +229,14 @@ class PornHubPlaylistBaseIE(InfoExtractor):
webpage = self._download_webpage(url, playlist_id) webpage = self._download_webpage(url, playlist_id)
entries = self._extract_entries(webpage)
# Only process container div with main playlist content skipping
# drop-down menu that uses similar pattern for videos (see
# https://github.com/rg3/youtube-dl/issues/11594).
container = self._search_regex(
r'(?s)(<div[^>]+class=["\']container.+)', webpage,
'container', default=webpage)
entries = self._extract_entries(container)
playlist = self._parse_json( playlist = self._parse_json(
self._search_regex( self._search_regex(
@ -243,12 +250,12 @@ class PornHubPlaylistBaseIE(InfoExtractor):
class PornHubPlaylistIE(PornHubPlaylistBaseIE): class PornHubPlaylistIE(PornHubPlaylistBaseIE):
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.pornhub.com/playlist/6201671',
'url': 'http://www.pornhub.com/playlist/4667351',
'info_dict': { 'info_dict': {
'id': '6201671',
'title': 'P0p4',
'id': '4667351',
'title': 'Nataly Hot',
}, },
'playlist_mincount': 35,
'playlist_mincount': 2,
}] }]

Loading…
Cancel
Save