Browse Source

Push api updates to simplify audiomack, add support for albums

master
Your Name 10 years ago
parent
commit
defaf19f5d
2 changed files with 84 additions and 28 deletions
  1. 2
      youtube_dl/extractor/__init__.py
  2. 110
      youtube_dl/extractor/audiomack.py

2
youtube_dl/extractor/__init__.py

@ -26,7 +26,7 @@ from .arte import (
ArteTVEmbedIE, ArteTVEmbedIE,
) )
from .atresplayer import AtresPlayerIE from .atresplayer import AtresPlayerIE
from .audiomack import AudiomackIE
from .audiomack import AudiomackIE, AudiomackAlbumIE
from .auengine import AUEngineIE from .auengine import AUEngineIE
from .azubu import AzubuIE from .azubu import AzubuIE
from .bambuser import BambuserIE, BambuserChannelIE from .bambuser import BambuserIE, BambuserChannelIE

110
youtube_dl/extractor/audiomack.py

@ -9,20 +9,21 @@ import time
class AudiomackIE(InfoExtractor): class AudiomackIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P<id>[\w/-]+)'
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/(song)/(?P<id>[\w/-]+)'
IE_NAME = 'audiomack' IE_NAME = 'audiomack'
_TESTS = [ _TESTS = [
# hosted on audiomack
# audiomack
{ {
'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary', 'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary',
'info_dict': 'info_dict':
{ {
'id': 'roosh-williams/extraordinary',
'ext': 'mp3',
'title': 'Roosh Williams - Extraordinary'
'id': '310086',
"ext": "mp3",
"artist": "Roosh Williams",
'title': 'Extraordinary'
} }
}, },
# hosted on soundcloud via audiomack
# audiomack through soundcloud
{ {
'add_ie': ['Soundcloud'], 'add_ie': ['Soundcloud'],
'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare', 'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
@ -37,33 +38,88 @@ class AudiomackIE(InfoExtractor):
}, },
] ]
@staticmethod
def create_song_dictionary(api_response, album_url_tag, track_no=0):
# All keys are the same in audiomack api and InfoExtractor format
entry = {key: api_response[key] for key in ["title", "artist", "id", "url"] if key in api_response}
# Fudge values in the face of missing metadata
if "id" not in entry:
entry["id"] = track_no
if "title" not in entry:
entry["title"] = album_url_tag
return entry
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url)
# URLs end with [uploader name]/[uploader title]
# this title is whatever the user types in, and is rarely
# the proper song title. Real metadata is in the api response
album_url_tag = self._match_id(url)
# Request the extended version of the api for extra fields like artist and title
api_response = self._download_json( api_response = self._download_json(
"http://www.audiomack.com/api/music/url/song/%s?_=%d" % (
video_id, time.time()),
video_id)
"http://www.audiomack.com/api/music/url/song/%s?extended=1&_=%d" % (
album_url_tag, time.time()),
album_url_tag)
if "url" not in api_response:
raise ExtractorError("Unable to deduce api url of song")
realurl = api_response["url"]
# API is inconsistent with errors
if "url" not in api_response or not api_response["url"] or "error" in api_response:
raise ExtractorError("Invalid url %s", url)
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper # Audiomack wraps a lot of soundcloud tracks in their branded wrapper
# - if so, pass the work off to the soundcloud extractor
if SoundcloudIE.suitable(realurl):
return {'_type': 'url', 'url': realurl, 'ie_key': 'Soundcloud'}
# if so, pass the work off to the soundcloud extractor
if SoundcloudIE.suitable(api_response["url"]):
return {'_type': 'url', 'url': api_response["url"], 'ie_key': 'Soundcloud'}
webpage = self._download_webpage(url, video_id)
artist = self._html_search_regex(
r'<span class="artist">(.*?)</span>', webpage, "artist")
songtitle = self._html_search_regex(
r'<h1 class="profile-title song-title"><span class="artist">.*?</span>(.*?)</h1>',
webpage, "title")
title = artist + " - " + songtitle
return self.create_song_dictionary(api_response, album_url_tag)
return {
'id': video_id,
'title': title,
'url': realurl,
class AudiomackAlbumIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/album/(?P<id>[\w/-]+)'
IE_NAME = 'audiomack:album'
_TESTS = [
# Standard album playlist
{
'url': 'http://www.audiomack.com/album/flytunezcom/tha-tour-part-2-mixtape',
"playlist_count": 15,
'info_dict':
{
'id': "812251",
'title': "Tha Tour: Part 2 (Official Mixtape)"
}
},
# Album playlist ripped from fakeshoredrive with no metadata
{
"url": "http://www.audiomack.com/album/fakeshoredrive/ppp-pistol-p-project",
"playlist_count": 10
} }
]
def _real_extract(self, url):
# URLs end with [uploader name]/[uploader title]
# this title is whatever the user types in, and is rarely
# the proper song title. Real metadata is in the api response
album_url_tag = self._match_id(url)
result = {"_type": "playlist", "entries": []}
# There is no one endpoint for album metadata - instead it is included/repeated in each song's metadata
# Therefore we don't know how many songs the album has and must infi-loop until failure
track_no = 0
while True:
# Get song's metadata
api_response = self._download_json("http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d"
% (album_url_tag, track_no, time.time()), album_url_tag)
# Total failure, only occurs when url is totally wrong
# Won't happen in middle of valid playlist (next case)
if "url" not in api_response or "error" in api_response:
raise ExtractorError("Invalid url for track %d of album url %s" % (track_no, url))
# URL is good but song id doesn't exist - usually means end of playlist
elif not api_response["url"]:
break
else:
# Pull out the album metadata and add to result (if it exists)
for resultkey, apikey in [("id", "album_id"), ("title", "album_title")]:
if apikey in api_response and resultkey not in result:
result[resultkey] = api_response[apikey]
result["entries"].append(AudiomackIE.create_song_dictionary(api_response, album_url_tag, track_no))
track_no += 1
return result
Loading…
Cancel
Save