Browse Source

[facebook] Fix login process

It was broken and didn't work in python 3.
And use `_download_webpage` instead of `compat_urllib_request.urlopen`.
master
Jaime Marquínez Ferrándiz 10 years ago
parent
commit
b74fa8cd2c
3 changed files with 20 additions and 8 deletions
  1. 5
      test/test_utils.py
  2. 19
      youtube_dl/extractor/facebook.py
  3. 4
      youtube_dl/utils.py

5
test/test_utils.py

@ -33,6 +33,7 @@ from youtube_dl.utils import (
unified_strdate, unified_strdate,
unsmuggle_url, unsmuggle_url,
url_basename, url_basename,
urlencode_postdata,
xpath_with_ns, xpath_with_ns,
) )
@ -261,5 +262,9 @@ class TestUtil(unittest.TestCase):
bam''') bam''')
self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam']) self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])
def test_urlencode_postdata(self):
data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
self.assertTrue(isinstance(data, bytes))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

19
youtube_dl/extractor/facebook.py

@ -11,6 +11,7 @@ from ..utils import (
compat_urllib_error, compat_urllib_error,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request, compat_urllib_request,
urlencode_postdata,
ExtractorError, ExtractorError,
) )
@ -51,8 +52,8 @@ class FacebookIE(InfoExtractor):
login_page_req = compat_urllib_request.Request(self._LOGIN_URL) login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
login_page_req.add_header('Cookie', 'locale=en_US') login_page_req.add_header('Cookie', 'locale=en_US')
self.report_login()
login_page = self._download_webpage(login_page_req, None, note=False,
login_page = self._download_webpage(login_page_req, None,
note='Downloading login page',
errnote='Unable to download login page') errnote='Unable to download login page')
lsd = self._search_regex( lsd = self._search_regex(
r'<input type="hidden" name="lsd" value="([^"]*)"', r'<input type="hidden" name="lsd" value="([^"]*)"',
@ -70,23 +71,25 @@ class FacebookIE(InfoExtractor):
'timezone': '-60', 'timezone': '-60',
'trynum': '1', 'trynum': '1',
} }
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
request = compat_urllib_request.Request(self._LOGIN_URL, urlencode_postdata(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
try: try:
login_results = compat_urllib_request.urlopen(request).read()
login_results = self._download_webpage(request, None,
note='Logging in', errnote='unable to fetch login page')
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None: if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
return return
check_form = { check_form = {
'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, 'fb_dtsg'),
'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'),
'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'), 'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'),
'name_action_selected': 'dont_save', 'name_action_selected': 'dont_save',
'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, 'continue'),
'submit[Continue]': self._search_regex(r'<button[^>]+value="(.*?)"[^>]+name="submit\[Continue\]"', login_results, 'continue'),
} }
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, compat_urllib_parse.urlencode(check_form))
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded') check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
check_response = compat_urllib_request.urlopen(check_req).read()
check_response = self._download_webpage(check_req, None,
note='Confirming login')
if re.search(r'id="checkpointSubmitButton"', check_response) is not None: if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.') self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:

4
youtube_dl/utils.py

@ -1263,3 +1263,7 @@ def read_batch_urls(batch_fd):
with contextlib.closing(batch_fd) as fd: with contextlib.closing(batch_fd) as fd:
return [url for url in map(fixup, fd) if url] return [url for url in map(fixup, fd) if url]
def urlencode_postdata(*args, **kargs):
return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
Loading…
Cancel
Save