Browse Source

[youtube] Add and extractor for the subscriptions feed (closes #498)

It can be downloaded using the ytsubscriptions keyword.
It needs the login information.
master
Jaime Marquínez Ferrándiz 11 years ago
parent
commit
04cc96173c
2 changed files with 43 additions and 2 deletions
  1. 10
      youtube_dl/extractor/__init__.py
  2. 35
      youtube_dl/extractor/youtube.py

10
youtube_dl/extractor/__init__.py

@ -69,7 +69,15 @@ from .yahoo import YahooIE, YahooSearchIE
from .youjizz import YouJizzIE from .youjizz import YouJizzIE
from .youku import YoukuIE from .youku import YoukuIE
from .youporn import YouPornIE from .youporn import YouPornIE
from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE, YoutubeShowIE
from .youtube import (
YoutubeIE,
YoutubePlaylistIE,
YoutubeSearchIE,
YoutubeUserIE,
YoutubeChannelIE,
YoutubeShowIE,
YoutubeSubscriptionsIE,
)
from .zdf import ZDFIE from .zdf import ZDFIE

35
youtube_dl/extractor/youtube.py

@ -4,6 +4,7 @@ import json
import netrc import netrc
import re import re
import socket import socket
import itertools
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from ..utils import ( from ..utils import (
@ -19,6 +20,7 @@ from ..utils import (
ExtractorError, ExtractorError,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
orderedSet,
) )
@ -122,7 +124,7 @@ class YoutubeIE(InfoExtractor):
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
"""Receives a URL and returns True if suitable for this IE.""" """Receives a URL and returns True if suitable for this IE."""
if YoutubePlaylistIE.suitable(url): return False
if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
def report_lang(self): def report_lang(self):
@ -864,3 +866,34 @@ class YoutubeShowIE(InfoExtractor):
m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage)) m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons))) self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons] return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
class YoutubeSubscriptionsIE(YoutubeIE):
"""It's a subclass of YoutubeIE because we need to login"""
IE_DESC = u'YouTube.com subscriptions feed, "ytsubscriptions" keyword(requires authentication)'
_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|ytsubscriptions'
IE_NAME = u'youtube:subscriptions'
_FEED_TEMPLATE = 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=subscriptions&paging=%s'
_PAGING_STEP = 30
_TESTS = []
@classmethod
def suitable(cls, url):
return re.match(cls._VALID_URL, url) is not None
def _real_extract(self, url):
feed_entries = []
# The step argument is available only in 2.7 or higher
for i in itertools.count(0):
paging = i*self._PAGING_STEP
info = self._download_webpage(self._FEED_TEMPLATE % paging, 'feed',
u'Downloading page %s' % i)
info = json.loads(info)
feed_html = info['feed_html']
m_ids = re.finditer(r'"/watch\?v=(.*?)"', feed_html)
ids = orderedSet(m.group(1) for m in m_ids)
feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
if info['paging'] is None:
break
return self.playlist_result(feed_entries, playlist_title='Youtube Subscriptions')
Loading…
Cancel
Save