You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

317 lines
8.6 KiB

  1. "use strict";
  2. var Promise = require('bluebird'),
  3. md = require('markdown-it'),
  4. mdEmoji = require('markdown-it-emoji'),
  5. mdTaskLists = require('markdown-it-task-lists'),
  6. mdAbbr = require('markdown-it-abbr'),
  7. mdAnchor = require('markdown-it-anchor'),
  8. mdFootnote = require('markdown-it-footnote'),
  9. mdExternalLinks = require('markdown-it-external-links'),
  10. mdExpandTabs = require('markdown-it-expand-tabs'),
  11. mdAttrs = require('markdown-it-attrs'),
  12. hljs = require('highlight.js'),
  13. cheerio = require('cheerio'),
  14. _ = require('lodash'),
  15. mdRemove = require('remove-markdown');
  16. // Load plugins
  17. var mkdown = md({
  18. html: true,
  19. linkify: true,
  20. typography: true,
  21. highlight(str, lang) {
  22. if (lang && hljs.getLanguage(lang)) {
  23. try {
  24. return '<pre class="hljs"><code>' + hljs.highlight(lang, str, true).value + '</code></pre>';
  25. } catch (err) {
  26. return '<pre><code>' + str + '</code></pre>';
  27. }
  28. }
  29. return '<pre><code>' + str + '</code></pre>';
  30. }
  31. })
  32. .use(mdEmoji)
  33. .use(mdTaskLists)
  34. .use(mdAbbr)
  35. .use(mdAnchor, {
  36. slugify: _.kebabCase,
  37. permalink: true,
  38. permalinkClass: 'toc-anchor',
  39. permalinkSymbol: '#',
  40. permalinkBefore: true
  41. })
  42. .use(mdFootnote)
  43. .use(mdExternalLinks, {
  44. externalClassName: 'external-link',
  45. internalClassName: 'internal-link'
  46. })
  47. .use(mdExpandTabs, {
  48. tabWidth: 4
  49. })
  50. .use(mdAttrs);
  51. // Rendering rules
  52. mkdown.renderer.rules.emoji = function(token, idx) {
  53. return '<i class="twa twa-' + _.replace(token[idx].markup, /_/g, '-') + '"></i>';
  54. };
  55. // Video rules
  56. const videoRules = [
  57. {
  58. selector: 'a.youtube',
  59. regexp: new RegExp(/(?:(?:youtu\.be\/|v\/|vi\/|u\/\w\/|embed\/)|(?:(?:watch)?\?v(?:i)?=|\&v(?:i)?=))([^#\&\?]*).*/, 'i'),
  60. output: '<iframe width="640" height="360" src="https://www.youtube.com/embed/{0}?rel=0" frameborder="0" allowfullscreen></iframe>'
  61. },
  62. {
  63. selector: 'a.vimeo',
  64. regexp: new RegExp(/vimeo.com\/(?:channels\/(?:\w+\/)?|groups\/(?:[^\/]*)\/videos\/|album\/(?:\d+)\/video\/|)(\d+)(?:$|\/|\?)/, 'i'),
  65. output: '<iframe src="https://player.vimeo.com/video/{0}" width="640" height="360" frameborder="0" webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>'
  66. },
  67. {
  68. selector: 'a.dailymotion',
  69. regexp: new RegExp(/(?:dailymotion\.com(?:\/embed)?(?:\/video|\/hub)|dai\.ly)\/([0-9a-z]+)(?:[\-_0-9a-zA-Z]+(?:#video=)?([a-z0-9]+)?)?/, 'i'),
  70. output: '<iframe width="640" height="360" src="//www.dailymotion.com/embed/video/{0}?endscreen-enable=false" frameborder="0" allowfullscreen></iframe>'
  71. },
  72. {
  73. selector: 'a.video',
  74. regexp: false,
  75. output: '<video width="640" height="360" controls preload="metadata"><source src="{0}" type="video/mp4"></video>'
  76. }
  77. ]
  78. /**
  79. * Parse markdown content and build TOC tree
  80. *
  81. * @param {(Function|string)} content Markdown content
  82. * @return {Array} TOC tree
  83. */
  84. const parseTree = (content) => {
  85. let tokens = md().parse(content, {});
  86. let tocArray = [];
  87. //-> Extract headings and their respective levels
  88. for (let i = 0; i < tokens.length; i++) {
  89. if (tokens[i].type !== "heading_close") {
  90. continue;
  91. }
  92. const heading = tokens[i - 1];
  93. const heading_close = tokens[i];
  94. if (heading.type === "inline") {
  95. let content = "";
  96. let anchor = "";
  97. if (heading.children && heading.children[0].type === "link_open") {
  98. content = heading.children[1].content;
  99. anchor = _.kebabCase(content);
  100. } else {
  101. content = heading.content;
  102. anchor = _.kebabCase(heading.children.reduce((acc, t) => acc + t.content, ""));
  103. }
  104. tocArray.push({
  105. content,
  106. anchor,
  107. level: +heading_close.tag.substr(1, 1)
  108. });
  109. }
  110. }
  111. //-> Exclude levels deeper than 2
  112. _.remove(tocArray, (n) => { return n.level > 2; });
  113. //-> Build tree from flat array
  114. return _.reduce(tocArray, (tree, v) => {
  115. let treeLength = tree.length - 1;
  116. if(v.level < 2) {
  117. tree.push({
  118. content: v.content,
  119. anchor: v.anchor,
  120. nodes: []
  121. });
  122. } else {
  123. let lastNodeLevel = 1;
  124. let GetNodePath = (startPos) => {
  125. lastNodeLevel++;
  126. if(_.isEmpty(startPos)) {
  127. startPos = 'nodes';
  128. }
  129. if(lastNodeLevel === v.level) {
  130. return startPos;
  131. } else {
  132. return GetNodePath(startPos + '[' + (_.at(tree[treeLength], startPos).length - 1) + '].nodes');
  133. }
  134. };
  135. let lastNodePath = GetNodePath();
  136. let lastNode = _.get(tree[treeLength], lastNodePath);
  137. if(lastNode) {
  138. lastNode.push({
  139. content: v.content,
  140. anchor: v.anchor,
  141. nodes: []
  142. });
  143. _.set(tree[treeLength], lastNodePath, lastNode);
  144. }
  145. }
  146. return tree;
  147. }, []);
  148. };
  149. /**
  150. * Parse markdown content to HTML
  151. *
  152. * @param {String} content Markdown content
  153. * @return {String} HTML formatted content
  154. */
  155. const parseContent = (content) => {
  156. let output = mkdown.render(content);
  157. let cr = cheerio.load(output);
  158. //-> Check for empty first element
  159. let firstElm = cr.root().children().first()[0];
  160. if(firstElm.type === 'tag' && firstElm.name === 'p') {
  161. let firstElmChildren = firstElm.children;
  162. if(firstElmChildren.length < 1) {
  163. firstElm.remove();
  164. } else if(firstElmChildren.length === 1 && firstElmChildren[0].type === 'tag' && firstElmChildren[0].name === 'img') {
  165. cr(firstElm).addClass('is-gapless');
  166. }
  167. }
  168. //-> Remove links in headers
  169. cr('h1 > a:not(.toc-anchor), h2 > a:not(.toc-anchor), h3 > a:not(.toc-anchor)').each((i, elm) => {
  170. let txtLink = cr(elm).text();
  171. cr(elm).replaceWith(txtLink);
  172. });
  173. //-> Re-attach blockquote styling classes to their parents
  174. cr.root().children('blockquote').each((i, elm) => {
  175. if(cr(elm).children().length > 0) {
  176. let bqLastChild = cr(elm).children().last()[0];
  177. let bqLastChildClasses = cr(bqLastChild).attr('class');
  178. if(bqLastChildClasses && bqLastChildClasses.length > 0) {
  179. cr(bqLastChild).removeAttr('class');
  180. cr(elm).addClass(bqLastChildClasses);
  181. }
  182. }
  183. });
  184. //-> Enclose content below headers
  185. cr('h2').each((i, elm) => {
  186. let subH2Content = cr(elm).nextUntil('h1, h2');
  187. cr(elm).after('<div class="indent-h2"></div>');
  188. let subH2Container = cr(elm).next('.indent-h2');
  189. _.forEach(subH2Content, (ch) => {
  190. cr(subH2Container).append(ch);
  191. });
  192. });
  193. cr('h3').each((i, elm) => {
  194. let subH3Content = cr(elm).nextUntil('h1, h2, h3');
  195. cr(elm).after('<div class="indent-h3"></div>');
  196. let subH3Container = cr(elm).next('.indent-h3');
  197. _.forEach(subH3Content, (ch) => {
  198. cr(subH3Container).append(ch);
  199. });
  200. });
  201. // Replace video links with embeds
  202. _.forEach(videoRules, (vrule) => {
  203. cr(vrule.selector).each((i, elm) => {
  204. let originLink = cr(elm).attr('href');
  205. if(vrule.regexp) {
  206. let vidMatches = originLink.match(vrule.regexp);
  207. if((vidMatches && _.isArray(vidMatches))) {
  208. vidMatches = _.filter(vidMatches, (f) => {
  209. return f && _.isString(f);
  210. });
  211. originLink = _.last(vidMatches);
  212. }
  213. }
  214. let processedLink = _.replace(vrule.output, '{0}', originLink);
  215. cr(elm).replaceWith(processedLink);
  216. });
  217. });
  218. output = cr.html();
  219. return output;
  220. };
  221. /**
  222. * Parse meta-data tags from content
  223. *
  224. * @param {String} content Markdown content
  225. * @return {Object} Properties found in the content and their values
  226. */
  227. const parseMeta = (content) => {
  228. let commentMeta = new RegExp('<!-- ?([a-zA-Z]+):(.*)-->','g');
  229. let results = {}, match;
  230. while(match = commentMeta.exec(content)) {
  231. results[_.toLower(match[1])] = _.trim(match[2]);
  232. }
  233. return results;
  234. };
  235. module.exports = {
  236. /**
  237. * Parse content and return all data
  238. *
  239. * @param {String} content Markdown-formatted content
  240. * @return {Object} Object containing meta, html and tree data
  241. */
  242. parse(content) {
  243. return {
  244. meta: parseMeta(content),
  245. html: parseContent(content),
  246. tree: parseTree(content)
  247. };
  248. },
  249. parseContent,
  250. parseMeta,
  251. parseTree,
  252. /**
  253. * Strips non-text elements from Markdown content
  254. *
  255. * @param {String} content Markdown-formatted content
  256. * @return {String} Text-only version
  257. */
  258. removeMarkdown(content) {
  259. return mdRemove(_.chain(content)
  260. .replace(/<!-- ?([a-zA-Z]+):(.*)-->/g, '')
  261. .replace(/```[^`]+```/g, '')
  262. .replace(/`[^`]+`/g, '')
  263. .replace(new RegExp('(?!mailto:)(?:(?:http|https|ftp)://)(?:\\S+(?::\\S*)?@)?(?:(?:(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}(?:\\.(?:[0-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))|(?:(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)(?:\\.(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,})))|localhost)(?::\\d{2,5})?(?:(/|\\?|#)[^\\s]*)?', 'g'), '')
  264. .replace(/\r?\n|\r/g, ' ')
  265. .deburr()
  266. .toLower()
  267. .replace(/(\b([^a-z]+)\b)/g, ' ')
  268. .replace(/[^a-z]+/g, ' ')
  269. .replace(/(\b(\w{1,2})\b(\W|$))/g, '')
  270. .replace(/\s\s+/g, ' ')
  271. .value()
  272. );
  273. }
  274. };