Browse Source

Merge branch 'chirbit' of https://github.com/skypher/youtube-dl into skypher-chirbit

master
Sergey M․ 9 years ago
parent
commit
543ec2136b
3 changed files with 138 additions and 1 deletions
  1. 6
      youtube_dl/extractor/__init__.py
  2. 97
      youtube_dl/extractor/chirbit.py
  3. 36
      youtube_dl/extractor/soundgasm.py

6
youtube_dl/extractor/__init__.py

@ -63,6 +63,7 @@ from .ccc import CCCIE
from .ceskatelevize import CeskaTelevizeIE from .ceskatelevize import CeskaTelevizeIE
from .channel9 import Channel9IE from .channel9 import Channel9IE
from .chilloutzone import ChilloutzoneIE from .chilloutzone import ChilloutzoneIE
from .chirbit import ChirbitIE, ChirbitProfileIE
from .cinchcast import CinchcastIE from .cinchcast import CinchcastIE
from .clipfish import ClipfishIE from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE from .cliphunter import CliphunterIE
@ -425,7 +426,10 @@ from .soundcloud import (
SoundcloudUserIE, SoundcloudUserIE,
SoundcloudPlaylistIE SoundcloudPlaylistIE
) )
from .soundgasm import SoundgasmIE
from .soundgasm import (
SoundgasmIE,
SoundgasmProfileIE
)
from .southpark import ( from .southpark import (
SouthParkIE, SouthParkIE,
SouthparkDeIE, SouthparkDeIE,

97
youtube_dl/extractor/chirbit.py

@ -0,0 +1,97 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import clean_html
class ChirbitIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?chirb\.it/(?P<id>[^/]+)'
_TEST = {
'url': 'http://chirb.it/PrIPv5',
'md5': '9847b0dad6ac3e074568bf2cfb197de8',
'info_dict': {
'id': 'PrIPv5',
'display_id': 'kukushtv_1423231243',
'ext': 'mp3',
'title': 'Фасадстрой',
'url': 'http://audio.chirbit.com/kukushtv_1423231243.mp3'
}
}
def _real_extract(self, url):
audio_linkid = self._match_id(url)
webpage = self._download_webpage(url, audio_linkid)
audio_title = self._html_search_regex(r'<h2\s+itemprop="name">(.*?)</h2>', webpage, 'title')
audio_id = self._html_search_regex(r'\("setFile",\s+"http://audio.chirbit.com/(.*?).mp3"\)', webpage, 'audio ID')
audio_url = 'http://audio.chirbit.com/' + audio_id + '.mp3';
return {
'id': audio_linkid,
'display_id': audio_id,
'title': audio_title,
'url': audio_url
}
class ChirbitProfileIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?chirbit.com/(?P<id>[^/]+)/?$'
_TEST = {
'url': 'http://chirbit.com/ScarletBeauty',
'playlist_count': 3,
'info_dict': {
'_type': 'playlist',
'title': 'ScarletBeauty',
'id': 'ScarletBeauty'
}
}
def _real_extract(self, url):
profile_id = self._match_id(url)
# Chirbit has a pretty weird "Last Page" navigation behavior.
# We grab the profile's oldest entry to determine when to
# stop fetching entries.
oldestpage = self._download_webpage(url + '/24599', profile_id)
oldest_page_entries = re.findall(
r'''soundFile:\s*"http://audio.chirbit.com/(.*?).mp3"''',
oldestpage);
oldestentry = clean_html(oldest_page_entries[-1]);
ids = []
titles = []
n = 0
while True:
page = self._download_webpage(url + '/' + str(n), profile_id)
page_ids = re.findall(
r'''soundFile:\s*"http://audio.chirbit.com/(.*?).mp3"''',
page);
page_titles = re.findall(
r'''<div\s+class="chirbit_title"\s*>(.*?)</div>''',
page);
ids += page_ids
titles += page_titles
if oldestentry in page_ids:
break
n += 1
entries = []
i = 0
for id in ids:
entries.append({
'id': id,
'title': titles[i],
'url': 'http://audio.chirbit.com/' + id + '.mp3'
});
i += 1
info_dict = {
'_type': 'playlist',
'id': profile_id,
'title': profile_id,
'entries': entries
}
return info_dict;

36
youtube_dl/extractor/soundgasm.py

@ -4,6 +4,7 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import clean_html
class SoundgasmIE(InfoExtractor): class SoundgasmIE(InfoExtractor):
@ -38,3 +39,38 @@ class SoundgasmIE(InfoExtractor):
'title': audio_title, 'title': audio_title,
'description': description 'description': description
} }
class SoundgasmProfileIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<id>[0-9a-zA-Z_\-]+)/?$'
_TEST = {
'url': 'http://soundgasm.net/u/ytdl',
'playlist_count': 1,
'info_dict': {
'_type': 'playlist',
'id': 'ytdl',
'title': 'ytdl'
}
}
def _real_extract(self, url):
profile_id = self._match_id(url)
webpage = self._download_webpage(url, profile_id)
ids = re.findall(r'''<a\s+href=".+?/u/%s/([^/]+)">''' % re.escape(profile_id), webpage)
ids = [clean_html(id) for id in ids]
entries = []
for id in ids:
entries.append({
'_type': 'url',
'url': ('http://soundgasm.net/u/%s/%s' % (profile_id, id))
})
info_dict = {
'_type': 'playlist',
'id': profile_id,
'title': profile_id,
'entries': entries
}
return info_dict;
Loading…
Cancel
Save