Browse Source

Merge pull request #3167 from Schnouki/motherless

* mother/motherless:
  [Motherless] Add new extractor
master
Petr Půlpán 10 years ago
parent
commit
f3f1cd6b3b
2 changed files with 94 additions and 0 deletions
  1. 1
      youtube_dl/extractor/__init__.py
  2. 93
      youtube_dl/extractor/motherless.py

1
youtube_dl/extractor/__init__.py

@ -170,6 +170,7 @@ from .mpora import MporaIE
from .mofosex import MofosexIE from .mofosex import MofosexIE
from .mooshare import MooshareIE from .mooshare import MooshareIE
from .morningstar import MorningstarIE from .morningstar import MorningstarIE
from .motherless import MotherlessIE
from .motorsport import MotorsportIE from .motorsport import MotorsportIE
from .moviezine import MoviezineIE from .moviezine import MoviezineIE
from .movshare import MovShareIE from .movshare import MovShareIE

93
youtube_dl/extractor/motherless.py

@ -0,0 +1,93 @@
from __future__ import unicode_literals
import datetime
import re
from .common import InfoExtractor
from ..utils import str_to_int
class MotherlessIE(InfoExtractor):
"""Information Extractor for Motherless"""
_VALID_URL = r'http://(?:www\.)?motherless\.com/(?P<id>[A-Z0-9]+)'
_TESTS = [
{
'url': 'http://motherless.com/AC3FFE1',
'md5': '5527fef81d2e529215dad3c2d744a7d9',
'info_dict': {
'id': 'AC3FFE1',
'ext': 'flv',
'title': 'Fucked in the ass while playing PS3',
'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
'upload_date': '20100913',
'uploader_id': 'famouslyfuckedup',
'thumbnail': 'http://thumbs.motherlessmedia.com/thumbs/AC3FFE1.jpg',
'age_limit': 18,
}
},
{
'url': 'http://motherless.com/532291B',
'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
'info_dict': {
'id': '532291B',
'ext': 'mp4',
'title': 'Amazing girl playing the omegle game, PERFECT!',
'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen', 'game', 'hairy'],
'upload_date': '20140622',
'uploader_id': 'Sulivana7x',
'thumbnail': 'http://thumbs.motherlessmedia.com/thumbs/532291B.jpg',
'age_limit': 18,
}
}
]
def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<title>(?P<title>.+?) - MOTHERLESS.COM</title>', webpage, 'title')
video_url = self._search_regex(r"__fileurl = '(?P<video_url>[^']+)'", webpage, 'video_url')
thumbnail = self._og_search_thumbnail(webpage)
age_limit = self._rta_search(webpage) # Hint: it's 18 ;)
view_count = str_to_int(self._html_search_regex(r'<strong>Views</strong>(.+?)</h2>', webpage,
'view_count', flags=re.DOTALL))
like_count = str_to_int(self._html_search_regex(r'<strong>Favorited</strong>(.+?)</h2>', webpage,
'like_count', flags=re.DOTALL))
comment_count = webpage.count('class="media-comment-contents"')
uploader_id = self._html_search_regex(r'<div class="thumb-member-username">.*?<a [^>]*>(.+?)</a>',
webpage, 'uploader_id', flags=re.DOTALL)
categories = self._html_search_meta('keywords', webpage)
if categories is not None:
categories = [cat.strip() for cat in categories.split(',')]
upload_date = self._html_search_regex(r'<strong>Uploaded</strong>(.+?)</h2>', webpage,
'upload_date', flags=re.DOTALL)
mobj = re.search(r'(\d+) days? ago', upload_date, re.I)
if mobj is not None:
upload_date = datetime.datetime.now() - datetime.timedelta(days=int(mobj.group(1)))
else:
mobj = re.search(r'(\w+) (\d+)\w* (\d+)', upload_date, re.I)
if mobj is not None:
upload_date = datetime.datetime.strptime('%s %s %s' % mobj.groups(), '%b %d %Y').date()
else:
upload_date = None
if upload_date is not None:
upload_date = upload_date.strftime('%Y%m%d')
return {
'id': video_id,
'title': title,
'upload_date': upload_date,
'uploader_id': uploader_id,
'thumbnail': thumbnail,
'categories': categories,
'view_count': view_count,
'like_count': like_count,
'comment_count': comment_count,
'age_limit': age_limit,
'url': video_url,
}
Loading…
Cancel
Save