You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

420 lines
12 KiB

  1. 'use strict'
  2. /* global wiki */
  3. const Promise = require('bluebird')
  4. const md = require('markdown-it')
  5. const mdEmoji = require('markdown-it-emoji')
  6. const mdTaskLists = require('markdown-it-task-lists')
  7. const mdAbbr = require('markdown-it-abbr')
  8. const mdAnchor = require('markdown-it-anchor')
  9. const mdFootnote = require('markdown-it-footnote')
  10. const mdExternalLinks = require('markdown-it-external-links')
  11. const mdExpandTabs = require('markdown-it-expand-tabs')
  12. const mdAttrs = require('markdown-it-attrs')
  13. const mdMathjax = require('markdown-it-mathjax')()
  14. const mathjax = require('mathjax-node')
  15. const hljs = require('highlight.js')
  16. const cheerio = require('cheerio')
  17. const _ = require('lodash')
  18. const mdRemove = require('remove-markdown')
  19. // Load plugins
  20. var mkdown = md({
  21. html: true,
  22. // breaks: wiki.config.features.linebreaks,
  23. breaks: true,
  24. linkify: true,
  25. typography: true,
  26. highlight(str, lang) {
  27. if (wiki.config.theme.code.colorize && lang && hljs.getLanguage(lang)) {
  28. try {
  29. return '<pre class="hljs"><code>' + hljs.highlight(lang, str, true).value + '</code></pre>'
  30. } catch (err) {
  31. return '<pre><code>' + _.escape(str) + '</code></pre>'
  32. }
  33. }
  34. return '<pre><code>' + _.escape(str) + '</code></pre>'
  35. }
  36. })
  37. .use(mdEmoji)
  38. .use(mdTaskLists)
  39. .use(mdAbbr)
  40. .use(mdAnchor, {
  41. slugify: _.kebabCase,
  42. permalink: true,
  43. permalinkClass: 'toc-anchor nc-icon-outline location_bookmark-add',
  44. permalinkSymbol: '',
  45. permalinkBefore: true
  46. })
  47. .use(mdFootnote)
  48. .use(mdExternalLinks, {
  49. externalClassName: 'external-link',
  50. internalClassName: 'internal-link'
  51. })
  52. .use(mdExpandTabs, {
  53. tabWidth: 4
  54. })
  55. .use(mdAttrs)
  56. // if (wiki.config.features.mathjax) {
  57. if (true) {
  58. mkdown.use(mdMathjax)
  59. }
  60. // Rendering rules
  61. mkdown.renderer.rules.emoji = function (token, idx) {
  62. return '<i class="twa twa-' + _.replace(token[idx].markup, /_/g, '-') + '"></i>'
  63. }
  64. // Video rules
  65. const videoRules = [
  66. {
  67. selector: 'a.youtube',
  68. regexp: new RegExp(/(?:(?:youtu\.be\/|v\/|vi\/|u\/\w\/|embed\/)|(?:(?:watch)?\?v(?:i)?=|&v(?:i)?=))([^#&?]*).*/i),
  69. output: '<iframe width="640" height="360" src="https://www.youtube.com/embed/{0}?rel=0" frameborder="0" allowfullscreen></iframe>'
  70. },
  71. {
  72. selector: 'a.vimeo',
  73. regexp: new RegExp(/vimeo.com\/(?:channels\/(?:\w+\/)?|groups\/(?:[^/]*)\/videos\/|album\/(?:\d+)\/video\/|)(\d+)(?:$|\/|\?)/i),
  74. output: '<iframe src="https://player.vimeo.com/video/{0}" width="640" height="360" frameborder="0" webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>'
  75. },
  76. {
  77. selector: 'a.dailymotion',
  78. regexp: new RegExp(/(?:dailymotion\.com(?:\/embed)?(?:\/video|\/hub)|dai\.ly)\/([0-9a-z]+)(?:[-_0-9a-zA-Z]+(?:#video=)?([a-z0-9]+)?)?/i),
  79. output: '<iframe width="640" height="360" src="//www.dailymotion.com/embed/video/{0}?endscreen-enable=false" frameborder="0" allowfullscreen></iframe>'
  80. },
  81. {
  82. selector: 'a.video',
  83. regexp: false,
  84. output: '<video width="640" height="360" controls preload="metadata"><source src="{0}" type="video/mp4"></video>'
  85. }
  86. ]
  87. // Regex
  88. const textRegex = new RegExp('\\b[a-z0-9-.,' + wiki.data.regex.cjk + wiki.data.regex.arabic + ']+\\b', 'g')
  89. const mathRegex = [
  90. {
  91. format: 'TeX',
  92. regex: /\\\[([\s\S]*?)\\\]/g
  93. },
  94. {
  95. format: 'inline-TeX',
  96. regex: /\\\((.*?)\\\)/g
  97. },
  98. {
  99. format: 'MathML',
  100. regex: /<math([\s\S]*?)<\/math>/g
  101. }
  102. ]
  103. // MathJax
  104. mathjax.config({
  105. MathJax: {
  106. jax: ['input/TeX', 'input/MathML', 'output/SVG'],
  107. extensions: ['tex2jax.js', 'mml2jax.js'],
  108. TeX: {
  109. extensions: ['AMSmath.js', 'AMSsymbols.js', 'noErrors.js', 'noUndefined.js']
  110. },
  111. SVG: {
  112. scale: 120,
  113. font: 'STIX-Web'
  114. }
  115. }
  116. })
  117. /**
  118. * Parse markdown content and build TOC tree
  119. *
  120. * @param {(Function|string)} content Markdown content
  121. * @return {Array} TOC tree
  122. */
  123. const parseTree = (content) => {
  124. content = content.replace(/<!--(.|\t|\n|\r)*?-->/g, '')
  125. let tokens = md().parse(content, {})
  126. let tocArray = []
  127. // -> Extract headings and their respective levels
  128. for (let i = 0; i < tokens.length; i++) {
  129. if (tokens[i].type !== 'heading_close') {
  130. continue
  131. }
  132. const heading = tokens[i - 1]
  133. const headingclose = tokens[i]
  134. if (heading.type === 'inline') {
  135. let content = ''
  136. let anchor = ''
  137. if (heading.children && heading.children.length > 0 && heading.children[0].type === 'link_open') {
  138. content = mdRemove(heading.children[1].content)
  139. anchor = _.kebabCase(content)
  140. } else {
  141. content = mdRemove(heading.content)
  142. anchor = _.kebabCase(heading.children.reduce((acc, t) => acc + t.content, ''))
  143. }
  144. tocArray.push({
  145. content,
  146. anchor,
  147. level: +headingclose.tag.substr(1, 1)
  148. })
  149. }
  150. }
  151. // -> Exclude levels deeper than 2
  152. _.remove(tocArray, (n) => { return n.level > 2 })
  153. // -> Build tree from flat array
  154. return _.reduce(tocArray, (tree, v) => {
  155. let treeLength = tree.length - 1
  156. if (v.level < 2) {
  157. tree.push({
  158. content: v.content,
  159. anchor: v.anchor,
  160. nodes: []
  161. })
  162. } else {
  163. let lastNodeLevel = 1
  164. let GetNodePath = (startPos) => {
  165. lastNodeLevel++
  166. if (_.isEmpty(startPos)) {
  167. startPos = 'nodes'
  168. }
  169. if (lastNodeLevel === v.level) {
  170. return startPos
  171. } else {
  172. return GetNodePath(startPos + '[' + (_.at(tree[treeLength], startPos).length - 1) + '].nodes')
  173. }
  174. }
  175. let lastNodePath = GetNodePath()
  176. let lastNode = _.get(tree[treeLength], lastNodePath)
  177. if (lastNode) {
  178. lastNode.push({
  179. content: v.content,
  180. anchor: v.anchor,
  181. nodes: []
  182. })
  183. _.set(tree[treeLength], lastNodePath, lastNode)
  184. }
  185. }
  186. return tree
  187. }, [])
  188. }
  189. /**
  190. * Parse markdown content to HTML
  191. *
  192. * @param {String} content Markdown content
  193. * @return {Promise<String>} Promise
  194. */
  195. const parseContent = (content) => {
  196. let cr = cheerio.load(mkdown.render(content))
  197. if (cr.root().children().length < 1) {
  198. return ''
  199. }
  200. // -> Check for empty first element
  201. let firstElm = cr.root().children().first()[0]
  202. if (firstElm.type === 'tag' && firstElm.name === 'p') {
  203. let firstElmChildren = firstElm.children
  204. if (firstElmChildren.length < 1) {
  205. firstElm.remove()
  206. } else if (firstElmChildren.length === 1 && firstElmChildren[0].type === 'tag' && firstElmChildren[0].name === 'img') {
  207. cr(firstElm).addClass('is-gapless')
  208. }
  209. }
  210. // -> Remove links in headers
  211. cr('h1 > a:not(.toc-anchor), h2 > a:not(.toc-anchor), h3 > a:not(.toc-anchor)').each((i, elm) => {
  212. let txtLink = cr(elm).text()
  213. cr(elm).replaceWith(txtLink)
  214. })
  215. // -> Re-attach blockquote styling classes to their parents
  216. cr('blockquote').each((i, elm) => {
  217. if (cr(elm).children().length > 0) {
  218. let bqLastChild = cr(elm).children().last()[0]
  219. let bqLastChildClasses = cr(bqLastChild).attr('class')
  220. if (bqLastChildClasses && bqLastChildClasses.length > 0) {
  221. cr(bqLastChild).removeAttr('class')
  222. cr(elm).addClass(bqLastChildClasses)
  223. }
  224. }
  225. })
  226. // -> Enclose content below headers
  227. cr('h2').each((i, elm) => {
  228. let subH2Content = cr(elm).nextUntil('h1, h2')
  229. cr(elm).after('<div class="indent-h2"></div>')
  230. let subH2Container = cr(elm).next('.indent-h2')
  231. _.forEach(subH2Content, (ch) => {
  232. cr(subH2Container).append(ch)
  233. })
  234. })
  235. cr('h3').each((i, elm) => {
  236. let subH3Content = cr(elm).nextUntil('h1, h2, h3')
  237. cr(elm).after('<div class="indent-h3"></div>')
  238. let subH3Container = cr(elm).next('.indent-h3')
  239. _.forEach(subH3Content, (ch) => {
  240. cr(subH3Container).append(ch)
  241. })
  242. })
  243. // Replace video links with embeds
  244. _.forEach(videoRules, (vrule) => {
  245. cr(vrule.selector).each((i, elm) => {
  246. let originLink = cr(elm).attr('href')
  247. if (vrule.regexp) {
  248. let vidMatches = originLink.match(vrule.regexp)
  249. if ((vidMatches && _.isArray(vidMatches))) {
  250. vidMatches = _.filter(vidMatches, (f) => {
  251. return f && _.isString(f)
  252. })
  253. originLink = _.last(vidMatches)
  254. }
  255. }
  256. let processedLink = _.replace(vrule.output, '{0}', originLink)
  257. cr(elm).replaceWith(processedLink)
  258. })
  259. })
  260. // Apply align-center to parent
  261. cr('img.align-center').each((i, elm) => {
  262. cr(elm).parent().addClass('align-center')
  263. cr(elm).removeClass('align-center')
  264. })
  265. // Mathjax Post-processor
  266. if (wiki.config.features.mathjax) {
  267. return processMathjax(cr.html())
  268. } else {
  269. return Promise.resolve(cr.html())
  270. }
  271. }
  272. /**
  273. * Process MathJax expressions
  274. *
  275. * @param {String} content HTML content
  276. * @returns {Promise<String>} Promise
  277. */
  278. const processMathjax = (content) => {
  279. let matchStack = []
  280. let replaceStack = []
  281. let currentMatch
  282. let mathjaxState = {}
  283. _.forEach(mathRegex, mode => {
  284. do {
  285. currentMatch = mode.regex.exec(content)
  286. if (currentMatch) {
  287. matchStack.push(currentMatch[0])
  288. replaceStack.push(
  289. new Promise((resolve, reject) => {
  290. mathjax.typeset({
  291. math: (mode.format === 'MathML') ? currentMatch[0] : currentMatch[1],
  292. format: mode.format,
  293. speakText: false,
  294. svg: true,
  295. state: mathjaxState,
  296. timeout: 30 * 1000
  297. }, result => {
  298. if (!result.errors) {
  299. resolve(result.svg)
  300. } else {
  301. resolve(currentMatch[0])
  302. wiki.logger.warn(result.errors.join(', '))
  303. }
  304. })
  305. })
  306. )
  307. }
  308. } while (currentMatch)
  309. })
  310. return (matchStack.length > 0) ? Promise.all(replaceStack).then(results => {
  311. _.forEach(matchStack, (repMatch, idx) => {
  312. content = content.replace(repMatch, results[idx])
  313. })
  314. return content
  315. }) : Promise.resolve(content)
  316. }
  317. /**
  318. * Parse meta-data tags from content
  319. *
  320. * @param {String} content Markdown content
  321. * @return {Object} Properties found in the content and their values
  322. */
  323. const parseMeta = (content) => {
  324. let commentMeta = new RegExp('<!-- ?([a-zA-Z]+):(.*)-->', 'g')
  325. let results = {}
  326. let match
  327. while ((match = commentMeta.exec(content)) !== null) {
  328. results[_.toLower(match[1])] = _.trim(match[2])
  329. }
  330. return results
  331. }
  332. /**
  333. * Strips non-text elements from Markdown content
  334. *
  335. * @param {String} content Markdown-formatted content
  336. * @return {String} Text-only version
  337. */
  338. const removeMarkdown = (content) => {
  339. return _.join(mdRemove(_.chain(content)
  340. .replace(/<!-- ?([a-zA-Z]+):(.*)-->/g, '')
  341. .replace(/```([^`]|`)+?```/g, '')
  342. .replace(/`[^`]+`/g, '')
  343. .replace(new RegExp('(?!mailto:)(?:(?:http|https|ftp)://)(?:\\S+(?::\\S*)?@)?(?:(?:(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}(?:\\.(?:[0-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))|(?:(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)(?:\\.(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,})))|localhost)(?::\\d{2,5})?(?:(/|\\?|#)[^\\s]*)?', 'g'), '')
  344. .deburr()
  345. .toLower()
  346. .value()
  347. ).replace(/\r?\n|\r/g, ' ').match(textRegex), ' ')
  348. }
  349. module.exports = {
  350. /**
  351. * Parse content and return all data
  352. *
  353. * @param {String} content Markdown-formatted content
  354. * @return {Object} Object containing meta, html and tree data
  355. */
  356. parse(content) {
  357. return parseContent(content).then(html => {
  358. return {
  359. meta: parseMeta(content),
  360. html,
  361. tree: parseTree(content)
  362. }
  363. })
  364. },
  365. parseContent,
  366. parseMeta,
  367. parseTree,
  368. removeMarkdown
  369. }