You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

415 lines
11 KiB

  1. 'use strict'
  2. const Promise = require('bluebird')
  3. const md = require('markdown-it')
  4. const mdEmoji = require('markdown-it-emoji')
  5. const mdTaskLists = require('markdown-it-task-lists')
  6. const mdAbbr = require('markdown-it-abbr')
  7. const mdAnchor = require('markdown-it-anchor')
  8. const mdFootnote = require('markdown-it-footnote')
  9. const mdExternalLinks = require('markdown-it-external-links')
  10. const mdExpandTabs = require('markdown-it-expand-tabs')
  11. const mdAttrs = require('markdown-it-attrs')
  12. const mdMathjax = require('markdown-it-mathjax')()
  13. const mathjax = require('mathjax-node')
  14. const hljs = require('highlight.js')
  15. const cheerio = require('cheerio')
  16. const _ = require('lodash')
  17. const mdRemove = require('remove-markdown')
  18. // Load plugins
  19. var mkdown = md({
  20. html: true,
  21. breaks: appconfig.features.linebreaks,
  22. linkify: true,
  23. typography: true,
  24. highlight(str, lang) {
  25. if (lang && hljs.getLanguage(lang)) {
  26. try {
  27. return '<pre class="hljs"><code>' + hljs.highlight(lang, str, true).value + '</code></pre>'
  28. } catch (err) {
  29. return '<pre><code>' + _.escape(str) + '</code></pre>'
  30. }
  31. }
  32. return '<pre><code>' + _.escape(str) + '</code></pre>'
  33. }
  34. })
  35. .use(mdEmoji)
  36. .use(mdTaskLists)
  37. .use(mdAbbr)
  38. .use(mdAnchor, {
  39. slugify: _.kebabCase,
  40. permalink: true,
  41. permalinkClass: 'toc-anchor nc-icon-outline location_bookmark-add',
  42. permalinkSymbol: '',
  43. permalinkBefore: true
  44. })
  45. .use(mdFootnote)
  46. .use(mdExternalLinks, {
  47. externalClassName: 'external-link',
  48. internalClassName: 'internal-link'
  49. })
  50. .use(mdExpandTabs, {
  51. tabWidth: 4
  52. })
  53. .use(mdAttrs)
  54. if (appconfig.features.mathjax) {
  55. mkdown.use(mdMathjax)
  56. }
  57. // Rendering rules
  58. mkdown.renderer.rules.emoji = function (token, idx) {
  59. return '<i class="twa twa-' + _.replace(token[idx].markup, /_/g, '-') + '"></i>'
  60. }
  61. // Video rules
  62. const videoRules = [
  63. {
  64. selector: 'a.youtube',
  65. regexp: new RegExp(/(?:(?:youtu\.be\/|v\/|vi\/|u\/\w\/|embed\/)|(?:(?:watch)?\?v(?:i)?=|&v(?:i)?=))([^#&?]*).*/i),
  66. output: '<iframe width="640" height="360" src="https://www.youtube.com/embed/{0}?rel=0" frameborder="0" allowfullscreen></iframe>'
  67. },
  68. {
  69. selector: 'a.vimeo',
  70. regexp: new RegExp(/vimeo.com\/(?:channels\/(?:\w+\/)?|groups\/(?:[^/]*)\/videos\/|album\/(?:\d+)\/video\/|)(\d+)(?:$|\/|\?)/i),
  71. output: '<iframe src="https://player.vimeo.com/video/{0}" width="640" height="360" frameborder="0" webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>'
  72. },
  73. {
  74. selector: 'a.dailymotion',
  75. regexp: new RegExp(/(?:dailymotion\.com(?:\/embed)?(?:\/video|\/hub)|dai\.ly)\/([0-9a-z]+)(?:[-_0-9a-zA-Z]+(?:#video=)?([a-z0-9]+)?)?/i),
  76. output: '<iframe width="640" height="360" src="//www.dailymotion.com/embed/video/{0}?endscreen-enable=false" frameborder="0" allowfullscreen></iframe>'
  77. },
  78. {
  79. selector: 'a.video',
  80. regexp: false,
  81. output: '<video width="640" height="360" controls preload="metadata"><source src="{0}" type="video/mp4"></video>'
  82. }
  83. ]
  84. // Regex
  85. const textRegex = new RegExp('\\b[a-z0-9-.,' + appdata.regex.cjk + appdata.regex.arabic + ']+\\b', 'g')
  86. const mathRegex = [
  87. {
  88. format: 'TeX',
  89. regex: /\\\[([\s\S]*?)\\\]/g
  90. },
  91. {
  92. format: 'inline-TeX',
  93. regex: /\\\((.*?)\\\)/g
  94. },
  95. {
  96. format: 'MathML',
  97. regex: /<math([\s\S]*?)<\/math>/g
  98. }
  99. ]
  100. // MathJax
  101. mathjax.config({
  102. MathJax: {
  103. jax: ['input/TeX', 'input/MathML', 'output/SVG'],
  104. extensions: ['tex2jax.js', 'mml2jax.js'],
  105. TeX: {
  106. extensions: ['AMSmath.js', 'AMSsymbols.js', 'noErrors.js', 'noUndefined.js']
  107. },
  108. SVG: {
  109. scale: 120,
  110. font: 'STIX-Web'
  111. }
  112. }
  113. })
  114. mathjax.start()
  115. /**
  116. * Parse markdown content and build TOC tree
  117. *
  118. * @param {(Function|string)} content Markdown content
  119. * @return {Array} TOC tree
  120. */
  121. const parseTree = (content) => {
  122. content = content.replace(/<!--(.|\t|\n|\r)*?-->/g, '')
  123. let tokens = md().parse(content, {})
  124. let tocArray = []
  125. // -> Extract headings and their respective levels
  126. for (let i = 0; i < tokens.length; i++) {
  127. if (tokens[i].type !== 'heading_close') {
  128. continue
  129. }
  130. const heading = tokens[i - 1]
  131. const headingclose = tokens[i]
  132. if (heading.type === 'inline') {
  133. let content = ''
  134. let anchor = ''
  135. if (heading.children && heading.children.length > 0 && heading.children[0].type === 'link_open') {
  136. content = mdRemove(heading.children[1].content)
  137. anchor = _.kebabCase(content)
  138. } else {
  139. content = mdRemove(heading.content)
  140. anchor = _.kebabCase(heading.children.reduce((acc, t) => acc + t.content, ''))
  141. }
  142. tocArray.push({
  143. content,
  144. anchor,
  145. level: +headingclose.tag.substr(1, 1)
  146. })
  147. }
  148. }
  149. // -> Exclude levels deeper than 2
  150. _.remove(tocArray, (n) => { return n.level > 2 })
  151. // -> Build tree from flat array
  152. return _.reduce(tocArray, (tree, v) => {
  153. let treeLength = tree.length - 1
  154. if (v.level < 2) {
  155. tree.push({
  156. content: v.content,
  157. anchor: v.anchor,
  158. nodes: []
  159. })
  160. } else {
  161. let lastNodeLevel = 1
  162. let GetNodePath = (startPos) => {
  163. lastNodeLevel++
  164. if (_.isEmpty(startPos)) {
  165. startPos = 'nodes'
  166. }
  167. if (lastNodeLevel === v.level) {
  168. return startPos
  169. } else {
  170. return GetNodePath(startPos + '[' + (_.at(tree[treeLength], startPos).length - 1) + '].nodes')
  171. }
  172. }
  173. let lastNodePath = GetNodePath()
  174. let lastNode = _.get(tree[treeLength], lastNodePath)
  175. if (lastNode) {
  176. lastNode.push({
  177. content: v.content,
  178. anchor: v.anchor,
  179. nodes: []
  180. })
  181. _.set(tree[treeLength], lastNodePath, lastNode)
  182. }
  183. }
  184. return tree
  185. }, [])
  186. }
  187. /**
  188. * Parse markdown content to HTML
  189. *
  190. * @param {String} content Markdown content
  191. * @return {Promise<String>} Promise
  192. */
  193. const parseContent = (content) => {
  194. let cr = cheerio.load(mkdown.render(content))
  195. if (cr.root().children().length < 1) {
  196. return ''
  197. }
  198. // -> Check for empty first element
  199. let firstElm = cr.root().children().first()[0]
  200. if (firstElm.type === 'tag' && firstElm.name === 'p') {
  201. let firstElmChildren = firstElm.children
  202. if (firstElmChildren.length < 1) {
  203. firstElm.remove()
  204. } else if (firstElmChildren.length === 1 && firstElmChildren[0].type === 'tag' && firstElmChildren[0].name === 'img') {
  205. cr(firstElm).addClass('is-gapless')
  206. }
  207. }
  208. // -> Remove links in headers
  209. cr('h1 > a:not(.toc-anchor), h2 > a:not(.toc-anchor), h3 > a:not(.toc-anchor)').each((i, elm) => {
  210. let txtLink = cr(elm).text()
  211. cr(elm).replaceWith(txtLink)
  212. })
  213. // -> Re-attach blockquote styling classes to their parents
  214. cr('blockquote').each((i, elm) => {
  215. if (cr(elm).children().length > 0) {
  216. let bqLastChild = cr(elm).children().last()[0]
  217. let bqLastChildClasses = cr(bqLastChild).attr('class')
  218. if (bqLastChildClasses && bqLastChildClasses.length > 0) {
  219. cr(bqLastChild).removeAttr('class')
  220. cr(elm).addClass(bqLastChildClasses)
  221. }
  222. }
  223. })
  224. // -> Enclose content below headers
  225. cr('h2').each((i, elm) => {
  226. let subH2Content = cr(elm).nextUntil('h1, h2')
  227. cr(elm).after('<div class="indent-h2"></div>')
  228. let subH2Container = cr(elm).next('.indent-h2')
  229. _.forEach(subH2Content, (ch) => {
  230. cr(subH2Container).append(ch)
  231. })
  232. })
  233. cr('h3').each((i, elm) => {
  234. let subH3Content = cr(elm).nextUntil('h1, h2, h3')
  235. cr(elm).after('<div class="indent-h3"></div>')
  236. let subH3Container = cr(elm).next('.indent-h3')
  237. _.forEach(subH3Content, (ch) => {
  238. cr(subH3Container).append(ch)
  239. })
  240. })
  241. // Replace video links with embeds
  242. _.forEach(videoRules, (vrule) => {
  243. cr(vrule.selector).each((i, elm) => {
  244. let originLink = cr(elm).attr('href')
  245. if (vrule.regexp) {
  246. let vidMatches = originLink.match(vrule.regexp)
  247. if ((vidMatches && _.isArray(vidMatches))) {
  248. vidMatches = _.filter(vidMatches, (f) => {
  249. return f && _.isString(f)
  250. })
  251. originLink = _.last(vidMatches)
  252. }
  253. }
  254. let processedLink = _.replace(vrule.output, '{0}', originLink)
  255. cr(elm).replaceWith(processedLink)
  256. })
  257. })
  258. // Apply align-center to parent
  259. cr('img.align-center').each((i, elm) => {
  260. cr(elm).parent().addClass('align-center')
  261. cr(elm).removeClass('align-center')
  262. })
  263. // Mathjax Post-processor
  264. if (appconfig.features.mathjax) {
  265. return processMathjax(cr.html())
  266. } else {
  267. return Promise.resolve(cr.html())
  268. }
  269. }
  270. /**
  271. * Process MathJax expressions
  272. *
  273. * @param {String} content HTML content
  274. * @returns {Promise<String>} Promise
  275. */
  276. const processMathjax = (content) => {
  277. let matchStack = []
  278. let replaceStack = []
  279. let currentMatch
  280. let mathjaxState = {}
  281. _.forEach(mathRegex, mode => {
  282. do {
  283. currentMatch = mode.regex.exec(content)
  284. if (currentMatch) {
  285. matchStack.push(currentMatch[0])
  286. replaceStack.push(
  287. new Promise((resolve, reject) => {
  288. mathjax.typeset({
  289. math: (mode.format === 'MathML') ? currentMatch[0] : currentMatch[1],
  290. format: mode.format,
  291. speakText: false,
  292. svg: true,
  293. state: mathjaxState
  294. }, result => {
  295. if (!result.errors) {
  296. resolve(result.svg)
  297. } else {
  298. reject(new Error(result.errors.join(', ')))
  299. }
  300. })
  301. })
  302. )
  303. }
  304. } while (currentMatch)
  305. })
  306. return (matchStack.length > 0) ? Promise.all(replaceStack).then(results => {
  307. _.forEach(matchStack, (repMatch, idx) => {
  308. content = content.replace(repMatch, results[idx])
  309. })
  310. return content
  311. }) : Promise.resolve(content)
  312. }
  313. /**
  314. * Parse meta-data tags from content
  315. *
  316. * @param {String} content Markdown content
  317. * @return {Object} Properties found in the content and their values
  318. */
  319. const parseMeta = (content) => {
  320. let commentMeta = new RegExp('<!-- ?([a-zA-Z]+):(.*)-->', 'g')
  321. let results = {}
  322. let match
  323. while ((match = commentMeta.exec(content)) !== null) {
  324. results[_.toLower(match[1])] = _.trim(match[2])
  325. }
  326. return results
  327. }
  328. /**
  329. * Strips non-text elements from Markdown content
  330. *
  331. * @param {String} content Markdown-formatted content
  332. * @return {String} Text-only version
  333. */
  334. const removeMarkdown = (content) => {
  335. return _.join(mdRemove(_.chain(content)
  336. .replace(/<!-- ?([a-zA-Z]+):(.*)-->/g, '')
  337. .replace(/```([^`]|`)+?```/g, '')
  338. .replace(/`[^`]+`/g, '')
  339. .replace(new RegExp('(?!mailto:)(?:(?:http|https|ftp)://)(?:\\S+(?::\\S*)?@)?(?:(?:(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}(?:\\.(?:[0-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))|(?:(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)(?:\\.(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,})))|localhost)(?::\\d{2,5})?(?:(/|\\?|#)[^\\s]*)?', 'g'), '')
  340. .deburr()
  341. .toLower()
  342. .value()
  343. ).replace(/\r?\n|\r/g, ' ').match(textRegex), ' ')
  344. }
  345. module.exports = {
  346. /**
  347. * Parse content and return all data
  348. *
  349. * @param {String} content Markdown-formatted content
  350. * @return {Object} Object containing meta, html and tree data
  351. */
  352. parse(content) {
  353. return parseContent(content).then(html => {
  354. return {
  355. meta: parseMeta(content),
  356. html,
  357. tree: parseTree(content)
  358. }
  359. })
  360. },
  361. parseContent,
  362. parseMeta,
  363. parseTree,
  364. removeMarkdown
  365. }