You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

327 lines
11 KiB

10 years ago
10 years ago
10 years ago
  1. from __future__ import unicode_literals
  2. import base64
  3. import io
  4. import itertools
  5. import os
  6. import time
  7. import xml.etree.ElementTree as etree
  8. from .common import FileDownloader
  9. from .http import HttpFD
  10. from ..utils import (
  11. struct_pack,
  12. struct_unpack,
  13. compat_urlparse,
  14. format_bytes,
  15. encodeFilename,
  16. sanitize_open,
  17. xpath_text,
  18. )
  19. class FlvReader(io.BytesIO):
  20. """
  21. Reader for Flv files
  22. The file format is documented in https://www.adobe.com/devnet/f4v.html
  23. """
  24. # Utility functions for reading numbers and strings
  25. def read_unsigned_long_long(self):
  26. return struct_unpack('!Q', self.read(8))[0]
  27. def read_unsigned_int(self):
  28. return struct_unpack('!I', self.read(4))[0]
  29. def read_unsigned_char(self):
  30. return struct_unpack('!B', self.read(1))[0]
  31. def read_string(self):
  32. res = b''
  33. while True:
  34. char = self.read(1)
  35. if char == b'\x00':
  36. break
  37. res += char
  38. return res
  39. def read_box_info(self):
  40. """
  41. Read a box and return the info as a tuple: (box_size, box_type, box_data)
  42. """
  43. real_size = size = self.read_unsigned_int()
  44. box_type = self.read(4)
  45. header_end = 8
  46. if size == 1:
  47. real_size = self.read_unsigned_long_long()
  48. header_end = 16
  49. return real_size, box_type, self.read(real_size-header_end)
  50. def read_asrt(self):
  51. # version
  52. self.read_unsigned_char()
  53. # flags
  54. self.read(3)
  55. quality_entry_count = self.read_unsigned_char()
  56. # QualityEntryCount
  57. for i in range(quality_entry_count):
  58. self.read_string()
  59. segment_run_count = self.read_unsigned_int()
  60. segments = []
  61. for i in range(segment_run_count):
  62. first_segment = self.read_unsigned_int()
  63. fragments_per_segment = self.read_unsigned_int()
  64. segments.append((first_segment, fragments_per_segment))
  65. return {
  66. 'segment_run': segments,
  67. }
  68. def read_afrt(self):
  69. # version
  70. self.read_unsigned_char()
  71. # flags
  72. self.read(3)
  73. # time scale
  74. self.read_unsigned_int()
  75. quality_entry_count = self.read_unsigned_char()
  76. # QualitySegmentUrlModifiers
  77. for i in range(quality_entry_count):
  78. self.read_string()
  79. fragments_count = self.read_unsigned_int()
  80. fragments = []
  81. for i in range(fragments_count):
  82. first = self.read_unsigned_int()
  83. first_ts = self.read_unsigned_long_long()
  84. duration = self.read_unsigned_int()
  85. if duration == 0:
  86. discontinuity_indicator = self.read_unsigned_char()
  87. else:
  88. discontinuity_indicator = None
  89. fragments.append({
  90. 'first': first,
  91. 'ts': first_ts,
  92. 'duration': duration,
  93. 'discontinuity_indicator': discontinuity_indicator,
  94. })
  95. return {
  96. 'fragments': fragments,
  97. }
  98. def read_abst(self):
  99. # version
  100. self.read_unsigned_char()
  101. # flags
  102. self.read(3)
  103. self.read_unsigned_int() # BootstrapinfoVersion
  104. # Profile,Live,Update,Reserved
  105. self.read(1)
  106. # time scale
  107. self.read_unsigned_int()
  108. # CurrentMediaTime
  109. self.read_unsigned_long_long()
  110. # SmpteTimeCodeOffset
  111. self.read_unsigned_long_long()
  112. self.read_string() # MovieIdentifier
  113. server_count = self.read_unsigned_char()
  114. # ServerEntryTable
  115. for i in range(server_count):
  116. self.read_string()
  117. quality_count = self.read_unsigned_char()
  118. # QualityEntryTable
  119. for i in range(quality_count):
  120. self.read_string()
  121. # DrmData
  122. self.read_string()
  123. # MetaData
  124. self.read_string()
  125. segments_count = self.read_unsigned_char()
  126. segments = []
  127. for i in range(segments_count):
  128. box_size, box_type, box_data = self.read_box_info()
  129. assert box_type == b'asrt'
  130. segment = FlvReader(box_data).read_asrt()
  131. segments.append(segment)
  132. fragments_run_count = self.read_unsigned_char()
  133. fragments = []
  134. for i in range(fragments_run_count):
  135. box_size, box_type, box_data = self.read_box_info()
  136. assert box_type == b'afrt'
  137. fragments.append(FlvReader(box_data).read_afrt())
  138. return {
  139. 'segments': segments,
  140. 'fragments': fragments,
  141. }
  142. def read_bootstrap_info(self):
  143. total_size, box_type, box_data = self.read_box_info()
  144. assert box_type == b'abst'
  145. return FlvReader(box_data).read_abst()
  146. def read_bootstrap_info(bootstrap_bytes):
  147. return FlvReader(bootstrap_bytes).read_bootstrap_info()
  148. def build_fragments_list(boot_info):
  149. """ Return a list of (segment, fragment) for each fragment in the video """
  150. res = []
  151. segment_run_table = boot_info['segments'][0]
  152. # I've only found videos with one segment
  153. segment_run_entry = segment_run_table['segment_run'][0]
  154. n_frags = segment_run_entry[1]
  155. fragment_run_entry_table = boot_info['fragments'][0]['fragments']
  156. first_frag_number = fragment_run_entry_table[0]['first']
  157. for (i, frag_number) in zip(range(1, n_frags+1), itertools.count(first_frag_number)):
  158. res.append((1, frag_number))
  159. return res
  160. def write_flv_header(stream, metadata):
  161. """Writes the FLV header and the metadata to stream"""
  162. # FLV header
  163. stream.write(b'FLV\x01')
  164. stream.write(b'\x05')
  165. stream.write(b'\x00\x00\x00\x09')
  166. # FLV File body
  167. stream.write(b'\x00\x00\x00\x00')
  168. # FLVTAG
  169. # Script data
  170. stream.write(b'\x12')
  171. # Size of the metadata with 3 bytes
  172. stream.write(struct_pack('!L', len(metadata))[1:])
  173. stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
  174. stream.write(metadata)
  175. # Magic numbers extracted from the output files produced by AdobeHDS.php
  176. #(https://github.com/K-S-V/Scripts)
  177. stream.write(b'\x00\x00\x01\x73')
  178. def _add_ns(prop):
  179. return '{http://ns.adobe.com/f4m/1.0}%s' % prop
  180. class HttpQuietDownloader(HttpFD):
  181. def to_screen(self, *args, **kargs):
  182. pass
  183. class F4mFD(FileDownloader):
  184. """
  185. A downloader for f4m manifests or AdobeHDS.
  186. """
  187. def real_download(self, filename, info_dict):
  188. man_url = info_dict['url']
  189. requested_bitrate = info_dict.get('tbr')
  190. self.to_screen('[download] Downloading f4m manifest')
  191. manifest = self.ydl.urlopen(man_url).read()
  192. self.report_destination(filename)
  193. http_dl = HttpQuietDownloader(self.ydl,
  194. {
  195. 'continuedl': True,
  196. 'quiet': True,
  197. 'noprogress': True,
  198. 'test': self.params.get('test', False),
  199. })
  200. doc = etree.fromstring(manifest)
  201. formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
  202. if requested_bitrate is None:
  203. # get the best format
  204. formats = sorted(formats, key=lambda f: f[0])
  205. rate, media = formats[-1]
  206. else:
  207. rate, media = list(filter(
  208. lambda f: int(f[0]) == requested_bitrate, formats))[0]
  209. base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
  210. bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text)
  211. metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
  212. boot_info = read_bootstrap_info(bootstrap)
  213. fragments_list = build_fragments_list(boot_info)
  214. if self.params.get('test', False):
  215. # We only download the first fragment
  216. fragments_list = fragments_list[:1]
  217. total_frags = len(fragments_list)
  218. # For some akamai manifests we'll need to add a query to the fragment url
  219. akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
  220. tmpfilename = self.temp_name(filename)
  221. (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
  222. write_flv_header(dest_stream, metadata)
  223. # This dict stores the download progress, it's updated by the progress
  224. # hook
  225. state = {
  226. 'downloaded_bytes': 0,
  227. 'frag_counter': 0,
  228. }
  229. start = time.time()
  230. def frag_progress_hook(status):
  231. frag_total_bytes = status.get('total_bytes', 0)
  232. estimated_size = (state['downloaded_bytes'] +
  233. (total_frags - state['frag_counter']) * frag_total_bytes)
  234. if status['status'] == 'finished':
  235. state['downloaded_bytes'] += frag_total_bytes
  236. state['frag_counter'] += 1
  237. progress = self.calc_percent(state['frag_counter'], total_frags)
  238. byte_counter = state['downloaded_bytes']
  239. else:
  240. frag_downloaded_bytes = status['downloaded_bytes']
  241. byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes
  242. frag_progress = self.calc_percent(frag_downloaded_bytes,
  243. frag_total_bytes)
  244. progress = self.calc_percent(state['frag_counter'], total_frags)
  245. progress += frag_progress / float(total_frags)
  246. eta = self.calc_eta(start, time.time(), estimated_size, byte_counter)
  247. self.report_progress(progress, format_bytes(estimated_size),
  248. status.get('speed'), eta)
  249. http_dl.add_progress_hook(frag_progress_hook)
  250. frags_filenames = []
  251. for (seg_i, frag_i) in fragments_list:
  252. name = 'Seg%d-Frag%d' % (seg_i, frag_i)
  253. url = base_url + name
  254. if akamai_pv:
  255. url += '?' + akamai_pv.strip(';')
  256. frag_filename = '%s-%s' % (tmpfilename, name)
  257. success = http_dl.download(frag_filename, {'url': url})
  258. if not success:
  259. return False
  260. with open(frag_filename, 'rb') as down:
  261. down_data = down.read()
  262. reader = FlvReader(down_data)
  263. while True:
  264. _, box_type, box_data = reader.read_box_info()
  265. if box_type == b'mdat':
  266. dest_stream.write(box_data)
  267. break
  268. frags_filenames.append(frag_filename)
  269. dest_stream.close()
  270. self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
  271. self.try_rename(tmpfilename, filename)
  272. for frag_file in frags_filenames:
  273. os.remove(frag_file)
  274. fsize = os.path.getsize(encodeFilename(filename))
  275. self._hook_progress({
  276. 'downloaded_bytes': fsize,
  277. 'total_bytes': fsize,
  278. 'filename': filename,
  279. 'status': 'finished',
  280. })
  281. return True