Browse Source

[wrzuta:playlist] Improve and simplify (Closes #9341)

master
Sergey M․ 8 years ago
parent
commit
1759672eed
No known key found for this signature in database GPG Key ID: 2C393E0F18A9236D
2 changed files with 27 additions and 28 deletions
  1. 6
      youtube_dl/extractor/extractors.py
  2. 49
      youtube_dl/extractor/wrzuta.py

6
youtube_dl/extractor/extractors.py

@ -980,8 +980,10 @@ from .weiqitv import WeiqiTVIE
from .wimp import WimpIE from .wimp import WimpIE
from .wistia import WistiaIE from .wistia import WistiaIE
from .worldstarhiphop import WorldStarHipHopIE from .worldstarhiphop import WorldStarHipHopIE
from .wrzuta import WrzutaIE
from .wrzuta import WrzutaPlaylistIE
from .wrzuta import (
WrzutaIE,
WrzutaPlaylistIE,
)
from .wsj import WSJIE from .wsj import WSJIE
from .xbef import XBefIE from .xbef import XBefIE
from .xboxclips import XboxClipsIE from .xboxclips import XboxClipsIE

49
youtube_dl/extractor/wrzuta.py

@ -7,6 +7,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
qualities, qualities,
remove_start,
) )
@ -82,10 +83,6 @@ class WrzutaIE(InfoExtractor):
} }
_ENTRY_PATTERN = r'<a href="(?P<playlist_entry_url>[^"]+)" target="_blank" class="playlist\-file\-page">'
_PLAYLIST_SIZE_PATTERN = r'<div class="playlist-counter">[0-9]+/([0-9]+)</div>'
class WrzutaPlaylistIE(InfoExtractor): class WrzutaPlaylistIE(InfoExtractor):
""" """
this class covers extraction of wrzuta playlist entries this class covers extraction of wrzuta playlist entries
@ -101,10 +98,7 @@ class WrzutaPlaylistIE(InfoExtractor):
""" """
IE_NAME = 'wrzuta.pl:playlist' IE_NAME = 'wrzuta.pl:playlist'
_VALID_URL = r'https?://(?P<uploader>[0-9a-zA-Z]+)\.wrzuta\.pl/playlista/' \
'(?P<id>[0-9a-zA-Z]+)/.*'
_VALID_URL = r'https?://(?P<uploader>[0-9a-zA-Z]+)\.wrzuta\.pl/playlista/(?P<id>[0-9a-zA-Z]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://miromak71.wrzuta.pl/playlista/7XfO4vE84iR/moja_muza', 'url': 'http://miromak71.wrzuta.pl/playlista/7XfO4vE84iR/moja_muza',
'playlist_mincount': 14, 'playlist_mincount': 14,
@ -119,6 +113,9 @@ class WrzutaPlaylistIE(InfoExtractor):
'id': '6Nj3wQHx756', 'id': '6Nj3wQHx756',
'title': 'Lipiec - Lato 2015 Muzyka Świata', 'title': 'Lipiec - Lato 2015 Muzyka Świata',
}, },
}, {
'url': 'http://miromak71.wrzuta.pl/playlista/7XfO4vE84iR',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -126,31 +123,31 @@ class WrzutaPlaylistIE(InfoExtractor):
playlist_id = mobj.group('id') playlist_id = mobj.group('id')
uploader = mobj.group('uploader') uploader = mobj.group('uploader')
entries = []
webpage = self._download_webpage(url, playlist_id) webpage = self._download_webpage(url, playlist_id)
playlist_size = self._html_search_regex(_PLAYLIST_SIZE_PATTERN, webpage, 'Size of the playlist')
playlist_size = int(playlist_size) if playlist_size else 0
playlist_size = int_or_none(self._html_search_regex(
(r'<div[^>]+class=["\']playlist-counter["\'][^>]*>\d+/(\d+)',
r'<div[^>]+class=["\']all-counter["\'][^>]*>(.+?)</div>'),
webpage, 'playlist size', default=None))
playlist_title = self._og_search_title(webpage).replace('Playlista: ', '', 1)
playlist_title = remove_start(
self._og_search_title(webpage), 'Playlista: ')
entries = []
if playlist_size: if playlist_size:
entries = list(map(
lambda entry_url: self.url_result(entry_url),
re.findall(_ENTRY_PATTERN, webpage)
))
entries = [
self.url_result(entry_url)
for _, entry_url in re.findall(
r'<a[^>]+href=(["\'])(http.+?)\1[^>]+class=["\']playlist-file-page',
webpage)]
if playlist_size > len(entries): if playlist_size > len(entries):
playlist_content = self._download_json( playlist_content = self._download_json(
'http://{uploader_id}.wrzuta.pl/xhr/get_playlist_offset/{playlist_id}'.format(
uploader_id=uploader,
playlist_id=playlist_id,
),
'http://%s.wrzuta.pl/xhr/get_playlist_offset/%s' % (uploader, playlist_id),
playlist_id, playlist_id,
'Downloading playlist content as JSON metadata',
'Unable to download playlist content as JSON metadata',
)
entries += [self.url_result(entry['filelink']) for entry in playlist_content['files']]
'Downloading playlist JSON',
'Unable to download playlist JSON')
entries.extend([
self.url_result(entry['filelink'])
for entry in playlist_content.get('files', []) if entry.get('filelink')])
return self.playlist_result(entries, playlist_id, playlist_title) return self.playlist_result(entries, playlist_id, playlist_title)
Loading…
Cancel
Save