You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

315 lines
10 KiB

10 years ago
10 years ago
10 years ago
  1. from __future__ import unicode_literals
  2. import base64
  3. import io
  4. import itertools
  5. import os
  6. import time
  7. import xml.etree.ElementTree as etree
  8. from .common import FileDownloader
  9. from .http import HttpFD
  10. from ..utils import (
  11. struct_pack,
  12. struct_unpack,
  13. compat_urlparse,
  14. format_bytes,
  15. encodeFilename,
  16. sanitize_open,
  17. )
  18. class FlvReader(io.BytesIO):
  19. """
  20. Reader for Flv files
  21. The file format is documented in https://www.adobe.com/devnet/f4v.html
  22. """
  23. # Utility functions for reading numbers and strings
  24. def read_unsigned_long_long(self):
  25. return struct_unpack('!Q', self.read(8))[0]
  26. def read_unsigned_int(self):
  27. return struct_unpack('!I', self.read(4))[0]
  28. def read_unsigned_char(self):
  29. return struct_unpack('!B', self.read(1))[0]
  30. def read_string(self):
  31. res = b''
  32. while True:
  33. char = self.read(1)
  34. if char == b'\x00':
  35. break
  36. res += char
  37. return res
  38. def read_box_info(self):
  39. """
  40. Read a box and return the info as a tuple: (box_size, box_type, box_data)
  41. """
  42. real_size = size = self.read_unsigned_int()
  43. box_type = self.read(4)
  44. header_end = 8
  45. if size == 1:
  46. real_size = self.read_unsigned_long_long()
  47. header_end = 16
  48. return real_size, box_type, self.read(real_size-header_end)
  49. def read_asrt(self):
  50. # version
  51. self.read_unsigned_char()
  52. # flags
  53. self.read(3)
  54. quality_entry_count = self.read_unsigned_char()
  55. # QualityEntryCount
  56. for i in range(quality_entry_count):
  57. self.read_string()
  58. segment_run_count = self.read_unsigned_int()
  59. segments = []
  60. for i in range(segment_run_count):
  61. first_segment = self.read_unsigned_int()
  62. fragments_per_segment = self.read_unsigned_int()
  63. segments.append((first_segment, fragments_per_segment))
  64. return {
  65. 'segment_run': segments,
  66. }
  67. def read_afrt(self):
  68. # version
  69. self.read_unsigned_char()
  70. # flags
  71. self.read(3)
  72. # time scale
  73. self.read_unsigned_int()
  74. quality_entry_count = self.read_unsigned_char()
  75. # QualitySegmentUrlModifiers
  76. for i in range(quality_entry_count):
  77. self.read_string()
  78. fragments_count = self.read_unsigned_int()
  79. fragments = []
  80. for i in range(fragments_count):
  81. first = self.read_unsigned_int()
  82. first_ts = self.read_unsigned_long_long()
  83. duration = self.read_unsigned_int()
  84. if duration == 0:
  85. discontinuity_indicator = self.read_unsigned_char()
  86. else:
  87. discontinuity_indicator = None
  88. fragments.append({
  89. 'first': first,
  90. 'ts': first_ts,
  91. 'duration': duration,
  92. 'discontinuity_indicator': discontinuity_indicator,
  93. })
  94. return {
  95. 'fragments': fragments,
  96. }
  97. def read_abst(self):
  98. # version
  99. self.read_unsigned_char()
  100. # flags
  101. self.read(3)
  102. self.read_unsigned_int() # BootstrapinfoVersion
  103. # Profile,Live,Update,Reserved
  104. self.read(1)
  105. # time scale
  106. self.read_unsigned_int()
  107. # CurrentMediaTime
  108. self.read_unsigned_long_long()
  109. # SmpteTimeCodeOffset
  110. self.read_unsigned_long_long()
  111. self.read_string() # MovieIdentifier
  112. server_count = self.read_unsigned_char()
  113. # ServerEntryTable
  114. for i in range(server_count):
  115. self.read_string()
  116. quality_count = self.read_unsigned_char()
  117. # QualityEntryTable
  118. for i in range(quality_count):
  119. self.read_string()
  120. # DrmData
  121. self.read_string()
  122. # MetaData
  123. self.read_string()
  124. segments_count = self.read_unsigned_char()
  125. segments = []
  126. for i in range(segments_count):
  127. box_size, box_type, box_data = self.read_box_info()
  128. assert box_type == b'asrt'
  129. segment = FlvReader(box_data).read_asrt()
  130. segments.append(segment)
  131. fragments_run_count = self.read_unsigned_char()
  132. fragments = []
  133. for i in range(fragments_run_count):
  134. box_size, box_type, box_data = self.read_box_info()
  135. assert box_type == b'afrt'
  136. fragments.append(FlvReader(box_data).read_afrt())
  137. return {
  138. 'segments': segments,
  139. 'fragments': fragments,
  140. }
  141. def read_bootstrap_info(self):
  142. total_size, box_type, box_data = self.read_box_info()
  143. assert box_type == b'abst'
  144. return FlvReader(box_data).read_abst()
  145. def read_bootstrap_info(bootstrap_bytes):
  146. return FlvReader(bootstrap_bytes).read_bootstrap_info()
  147. def build_fragments_list(boot_info):
  148. """ Return a list of (segment, fragment) for each fragment in the video """
  149. res = []
  150. segment_run_table = boot_info['segments'][0]
  151. # I've only found videos with one segment
  152. segment_run_entry = segment_run_table['segment_run'][0]
  153. n_frags = segment_run_entry[1]
  154. fragment_run_entry_table = boot_info['fragments'][0]['fragments']
  155. first_frag_number = fragment_run_entry_table[0]['first']
  156. for (i, frag_number) in zip(range(1, n_frags+1), itertools.count(first_frag_number)):
  157. res.append((1, frag_number))
  158. return res
  159. def write_flv_header(stream, metadata):
  160. """Writes the FLV header and the metadata to stream"""
  161. # FLV header
  162. stream.write(b'FLV\x01')
  163. stream.write(b'\x05')
  164. stream.write(b'\x00\x00\x00\x09')
  165. # FLV File body
  166. stream.write(b'\x00\x00\x00\x00')
  167. # FLVTAG
  168. # Script data
  169. stream.write(b'\x12')
  170. # Size of the metadata with 3 bytes
  171. stream.write(struct_pack('!L', len(metadata))[1:])
  172. stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
  173. stream.write(metadata)
  174. # Magic numbers extracted from the output files produced by AdobeHDS.php
  175. #(https://github.com/K-S-V/Scripts)
  176. stream.write(b'\x00\x00\x01\x73')
  177. def _add_ns(prop):
  178. return '{http://ns.adobe.com/f4m/1.0}%s' % prop
  179. class HttpQuietDownloader(HttpFD):
  180. def to_screen(self, *args, **kargs):
  181. pass
  182. class F4mFD(FileDownloader):
  183. """
  184. A downloader for f4m manifests or AdobeHDS.
  185. """
  186. def real_download(self, filename, info_dict):
  187. man_url = info_dict['url']
  188. self.to_screen('[download] Downloading f4m manifest')
  189. manifest = self.ydl.urlopen(man_url).read()
  190. self.report_destination(filename)
  191. http_dl = HttpQuietDownloader(self.ydl,
  192. {
  193. 'continuedl': True,
  194. 'quiet': True,
  195. 'noprogress': True,
  196. 'test': self.params.get('test', False),
  197. })
  198. doc = etree.fromstring(manifest)
  199. formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
  200. formats = sorted(formats, key=lambda f: f[0])
  201. rate, media = formats[-1]
  202. base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
  203. bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text)
  204. metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
  205. boot_info = read_bootstrap_info(bootstrap)
  206. fragments_list = build_fragments_list(boot_info)
  207. if self.params.get('test', False):
  208. # We only download the first fragment
  209. fragments_list = fragments_list[:1]
  210. total_frags = len(fragments_list)
  211. tmpfilename = self.temp_name(filename)
  212. (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
  213. write_flv_header(dest_stream, metadata)
  214. # This dict stores the download progress, it's updated by the progress
  215. # hook
  216. state = {
  217. 'downloaded_bytes': 0,
  218. 'frag_counter': 0,
  219. }
  220. start = time.time()
  221. def frag_progress_hook(status):
  222. frag_total_bytes = status.get('total_bytes', 0)
  223. estimated_size = (state['downloaded_bytes'] +
  224. (total_frags - state['frag_counter']) * frag_total_bytes)
  225. if status['status'] == 'finished':
  226. state['downloaded_bytes'] += frag_total_bytes
  227. state['frag_counter'] += 1
  228. progress = self.calc_percent(state['frag_counter'], total_frags)
  229. byte_counter = state['downloaded_bytes']
  230. else:
  231. frag_downloaded_bytes = status['downloaded_bytes']
  232. byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes
  233. frag_progress = self.calc_percent(frag_downloaded_bytes,
  234. frag_total_bytes)
  235. progress = self.calc_percent(state['frag_counter'], total_frags)
  236. progress += frag_progress / float(total_frags)
  237. eta = self.calc_eta(start, time.time(), estimated_size, byte_counter)
  238. self.report_progress(progress, format_bytes(estimated_size),
  239. status.get('speed'), eta)
  240. http_dl.add_progress_hook(frag_progress_hook)
  241. frags_filenames = []
  242. for (seg_i, frag_i) in fragments_list:
  243. name = 'Seg%d-Frag%d' % (seg_i, frag_i)
  244. url = base_url + name
  245. frag_filename = '%s-%s' % (tmpfilename, name)
  246. success = http_dl.download(frag_filename, {'url': url})
  247. if not success:
  248. return False
  249. with open(frag_filename, 'rb') as down:
  250. down_data = down.read()
  251. reader = FlvReader(down_data)
  252. while True:
  253. _, box_type, box_data = reader.read_box_info()
  254. if box_type == b'mdat':
  255. dest_stream.write(box_data)
  256. break
  257. frags_filenames.append(frag_filename)
  258. dest_stream.close()
  259. self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
  260. self.try_rename(tmpfilename, filename)
  261. for frag_file in frags_filenames:
  262. os.remove(frag_file)
  263. fsize = os.path.getsize(encodeFilename(filename))
  264. self._hook_progress({
  265. 'downloaded_bytes': fsize,
  266. 'total_bytes': fsize,
  267. 'filename': filename,
  268. 'status': 'finished',
  269. })
  270. return True