You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

339 lines
11 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. from __future__ import unicode_literals
  2. import base64
  3. import io
  4. import itertools
  5. import os
  6. import time
  7. import xml.etree.ElementTree as etree
  8. from .common import FileDownloader
  9. from .http import HttpFD
  10. from ..compat import (
  11. compat_urlparse,
  12. )
  13. from ..utils import (
  14. struct_pack,
  15. struct_unpack,
  16. format_bytes,
  17. encodeFilename,
  18. sanitize_open,
  19. xpath_text,
  20. )
  21. class FlvReader(io.BytesIO):
  22. """
  23. Reader for Flv files
  24. The file format is documented in https://www.adobe.com/devnet/f4v.html
  25. """
  26. # Utility functions for reading numbers and strings
  27. def read_unsigned_long_long(self):
  28. return struct_unpack('!Q', self.read(8))[0]
  29. def read_unsigned_int(self):
  30. return struct_unpack('!I', self.read(4))[0]
  31. def read_unsigned_char(self):
  32. return struct_unpack('!B', self.read(1))[0]
  33. def read_string(self):
  34. res = b''
  35. while True:
  36. char = self.read(1)
  37. if char == b'\x00':
  38. break
  39. res += char
  40. return res
  41. def read_box_info(self):
  42. """
  43. Read a box and return the info as a tuple: (box_size, box_type, box_data)
  44. """
  45. real_size = size = self.read_unsigned_int()
  46. box_type = self.read(4)
  47. header_end = 8
  48. if size == 1:
  49. real_size = self.read_unsigned_long_long()
  50. header_end = 16
  51. return real_size, box_type, self.read(real_size - header_end)
  52. def read_asrt(self):
  53. # version
  54. self.read_unsigned_char()
  55. # flags
  56. self.read(3)
  57. quality_entry_count = self.read_unsigned_char()
  58. # QualityEntryCount
  59. for i in range(quality_entry_count):
  60. self.read_string()
  61. segment_run_count = self.read_unsigned_int()
  62. segments = []
  63. for i in range(segment_run_count):
  64. first_segment = self.read_unsigned_int()
  65. fragments_per_segment = self.read_unsigned_int()
  66. segments.append((first_segment, fragments_per_segment))
  67. return {
  68. 'segment_run': segments,
  69. }
  70. def read_afrt(self):
  71. # version
  72. self.read_unsigned_char()
  73. # flags
  74. self.read(3)
  75. # time scale
  76. self.read_unsigned_int()
  77. quality_entry_count = self.read_unsigned_char()
  78. # QualitySegmentUrlModifiers
  79. for i in range(quality_entry_count):
  80. self.read_string()
  81. fragments_count = self.read_unsigned_int()
  82. fragments = []
  83. for i in range(fragments_count):
  84. first = self.read_unsigned_int()
  85. first_ts = self.read_unsigned_long_long()
  86. duration = self.read_unsigned_int()
  87. if duration == 0:
  88. discontinuity_indicator = self.read_unsigned_char()
  89. else:
  90. discontinuity_indicator = None
  91. fragments.append({
  92. 'first': first,
  93. 'ts': first_ts,
  94. 'duration': duration,
  95. 'discontinuity_indicator': discontinuity_indicator,
  96. })
  97. return {
  98. 'fragments': fragments,
  99. }
  100. def read_abst(self):
  101. # version
  102. self.read_unsigned_char()
  103. # flags
  104. self.read(3)
  105. self.read_unsigned_int() # BootstrapinfoVersion
  106. # Profile,Live,Update,Reserved
  107. self.read(1)
  108. # time scale
  109. self.read_unsigned_int()
  110. # CurrentMediaTime
  111. self.read_unsigned_long_long()
  112. # SmpteTimeCodeOffset
  113. self.read_unsigned_long_long()
  114. self.read_string() # MovieIdentifier
  115. server_count = self.read_unsigned_char()
  116. # ServerEntryTable
  117. for i in range(server_count):
  118. self.read_string()
  119. quality_count = self.read_unsigned_char()
  120. # QualityEntryTable
  121. for i in range(quality_count):
  122. self.read_string()
  123. # DrmData
  124. self.read_string()
  125. # MetaData
  126. self.read_string()
  127. segments_count = self.read_unsigned_char()
  128. segments = []
  129. for i in range(segments_count):
  130. box_size, box_type, box_data = self.read_box_info()
  131. assert box_type == b'asrt'
  132. segment = FlvReader(box_data).read_asrt()
  133. segments.append(segment)
  134. fragments_run_count = self.read_unsigned_char()
  135. fragments = []
  136. for i in range(fragments_run_count):
  137. box_size, box_type, box_data = self.read_box_info()
  138. assert box_type == b'afrt'
  139. fragments.append(FlvReader(box_data).read_afrt())
  140. return {
  141. 'segments': segments,
  142. 'fragments': fragments,
  143. }
  144. def read_bootstrap_info(self):
  145. total_size, box_type, box_data = self.read_box_info()
  146. assert box_type == b'abst'
  147. return FlvReader(box_data).read_abst()
  148. def read_bootstrap_info(bootstrap_bytes):
  149. return FlvReader(bootstrap_bytes).read_bootstrap_info()
  150. def build_fragments_list(boot_info):
  151. """ Return a list of (segment, fragment) for each fragment in the video """
  152. res = []
  153. segment_run_table = boot_info['segments'][0]
  154. # I've only found videos with one segment
  155. segment_run_entry = segment_run_table['segment_run'][0]
  156. n_frags = segment_run_entry[1]
  157. fragment_run_entry_table = boot_info['fragments'][0]['fragments']
  158. first_frag_number = fragment_run_entry_table[0]['first']
  159. for (i, frag_number) in zip(range(1, n_frags + 1), itertools.count(first_frag_number)):
  160. res.append((1, frag_number))
  161. return res
  162. def write_flv_header(stream, metadata):
  163. """Writes the FLV header and the metadata to stream"""
  164. # FLV header
  165. stream.write(b'FLV\x01')
  166. stream.write(b'\x05')
  167. stream.write(b'\x00\x00\x00\x09')
  168. # FLV File body
  169. stream.write(b'\x00\x00\x00\x00')
  170. # FLVTAG
  171. # Script data
  172. stream.write(b'\x12')
  173. # Size of the metadata with 3 bytes
  174. stream.write(struct_pack('!L', len(metadata))[1:])
  175. stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
  176. stream.write(metadata)
  177. # Magic numbers extracted from the output files produced by AdobeHDS.php
  178. # (https://github.com/K-S-V/Scripts)
  179. stream.write(b'\x00\x00\x01\x73')
  180. def _add_ns(prop):
  181. return '{http://ns.adobe.com/f4m/1.0}%s' % prop
  182. class HttpQuietDownloader(HttpFD):
  183. def to_screen(self, *args, **kargs):
  184. pass
  185. class F4mFD(FileDownloader):
  186. """
  187. A downloader for f4m manifests or AdobeHDS.
  188. """
  189. def real_download(self, filename, info_dict):
  190. man_url = info_dict['url']
  191. requested_bitrate = info_dict.get('tbr')
  192. self.to_screen('[download] Downloading f4m manifest')
  193. manifest = self.ydl.urlopen(man_url).read()
  194. self.report_destination(filename)
  195. http_dl = HttpQuietDownloader(
  196. self.ydl,
  197. {
  198. 'continuedl': True,
  199. 'quiet': True,
  200. 'noprogress': True,
  201. 'ratelimit': self.params.get('ratelimit', None),
  202. 'test': self.params.get('test', False),
  203. }
  204. )
  205. doc = etree.fromstring(manifest)
  206. formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
  207. if requested_bitrate is None:
  208. # get the best format
  209. formats = sorted(formats, key=lambda f: f[0])
  210. rate, media = formats[-1]
  211. else:
  212. rate, media = list(filter(
  213. lambda f: int(f[0]) == requested_bitrate, formats))[0]
  214. base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
  215. bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
  216. if bootstrap_node.text is None:
  217. bootstrap_url = compat_urlparse.urljoin(
  218. base_url, bootstrap_node.attrib['url'])
  219. bootstrap = self.ydl.urlopen(bootstrap_url).read()
  220. else:
  221. bootstrap = base64.b64decode(bootstrap_node.text)
  222. metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
  223. boot_info = read_bootstrap_info(bootstrap)
  224. fragments_list = build_fragments_list(boot_info)
  225. if self.params.get('test', False):
  226. # We only download the first fragment
  227. fragments_list = fragments_list[:1]
  228. total_frags = len(fragments_list)
  229. # For some akamai manifests we'll need to add a query to the fragment url
  230. akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
  231. tmpfilename = self.temp_name(filename)
  232. (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
  233. write_flv_header(dest_stream, metadata)
  234. # This dict stores the download progress, it's updated by the progress
  235. # hook
  236. state = {
  237. 'downloaded_bytes': 0,
  238. 'frag_counter': 0,
  239. }
  240. start = time.time()
  241. def frag_progress_hook(status):
  242. frag_total_bytes = status.get('total_bytes', 0)
  243. estimated_size = (state['downloaded_bytes'] +
  244. (total_frags - state['frag_counter']) * frag_total_bytes)
  245. if status['status'] == 'finished':
  246. state['downloaded_bytes'] += frag_total_bytes
  247. state['frag_counter'] += 1
  248. progress = self.calc_percent(state['frag_counter'], total_frags)
  249. byte_counter = state['downloaded_bytes']
  250. else:
  251. frag_downloaded_bytes = status['downloaded_bytes']
  252. byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes
  253. frag_progress = self.calc_percent(frag_downloaded_bytes,
  254. frag_total_bytes)
  255. progress = self.calc_percent(state['frag_counter'], total_frags)
  256. progress += frag_progress / float(total_frags)
  257. eta = self.calc_eta(start, time.time(), estimated_size, byte_counter)
  258. self.report_progress(progress, format_bytes(estimated_size),
  259. status.get('speed'), eta)
  260. http_dl.add_progress_hook(frag_progress_hook)
  261. frags_filenames = []
  262. for (seg_i, frag_i) in fragments_list:
  263. name = 'Seg%d-Frag%d' % (seg_i, frag_i)
  264. url = base_url + name
  265. if akamai_pv:
  266. url += '?' + akamai_pv.strip(';')
  267. frag_filename = '%s-%s' % (tmpfilename, name)
  268. success = http_dl.download(frag_filename, {'url': url})
  269. if not success:
  270. return False
  271. with open(frag_filename, 'rb') as down:
  272. down_data = down.read()
  273. reader = FlvReader(down_data)
  274. while True:
  275. _, box_type, box_data = reader.read_box_info()
  276. if box_type == b'mdat':
  277. dest_stream.write(box_data)
  278. break
  279. frags_filenames.append(frag_filename)
  280. dest_stream.close()
  281. self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
  282. self.try_rename(tmpfilename, filename)
  283. for frag_file in frags_filenames:
  284. os.remove(frag_file)
  285. fsize = os.path.getsize(encodeFilename(filename))
  286. self._hook_progress({
  287. 'downloaded_bytes': fsize,
  288. 'total_bytes': fsize,
  289. 'filename': filename,
  290. 'status': 'finished',
  291. })
  292. return True