You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

240 lines
5.9 KiB

  1. "use strict";
  2. var Promise = require('bluebird'),
  3. md = require('markdown-it'),
  4. mdEmoji = require('markdown-it-emoji'),
  5. mdTaskLists = require('markdown-it-task-lists'),
  6. mdAbbr = require('markdown-it-abbr'),
  7. mdAnchor = require('markdown-it-anchor'),
  8. mdFootnote = require('markdown-it-footnote'),
  9. mdExternalLinks = require('markdown-it-external-links'),
  10. mdExpandTabs = require('markdown-it-expand-tabs'),
  11. mdAttrs = require('markdown-it-attrs'),
  12. hljs = require('highlight.js'),
  13. cheerio = require('cheerio'),
  14. _ = require('lodash'),
  15. mdRemove = require('remove-markdown');
  16. // Load plugins
  17. var mkdown = md({
  18. html: true,
  19. linkify: true,
  20. typography: true,
  21. highlight(str, lang) {
  22. if (lang && hljs.getLanguage(lang)) {
  23. try {
  24. return '<pre class="hljs"><code>' + hljs.highlight(lang, str, true).value + '</code></pre>';
  25. } catch (err) {
  26. return '<pre><code>' + str + '</code></pre>';
  27. }
  28. }
  29. return '<pre><code>' + str + '</code></pre>';
  30. }
  31. })
  32. .use(mdEmoji)
  33. .use(mdTaskLists)
  34. .use(mdAbbr)
  35. .use(mdAnchor, {
  36. slugify: _.kebabCase,
  37. permalink: true,
  38. permalinkClass: 'toc-anchor',
  39. permalinkSymbol: '#',
  40. permalinkBefore: true
  41. })
  42. .use(mdFootnote)
  43. .use(mdExternalLinks, {
  44. externalClassName: 'external-link',
  45. internalClassName: 'internal-link'
  46. })
  47. .use(mdExpandTabs, {
  48. tabWidth: 4
  49. })
  50. .use(mdAttrs);
  51. // Rendering rules
  52. mkdown.renderer.rules.emoji = function(token, idx) {
  53. return '<i class="twa twa-' + token[idx].markup + '"></i>';
  54. };
  55. /**
  56. * Parse markdown content and build TOC tree
  57. *
  58. * @param {(Function|string)} content Markdown content
  59. * @return {Array} TOC tree
  60. */
  61. const parseTree = (content) => {
  62. let tokens = md().parse(content, {});
  63. let tocArray = [];
  64. //-> Extract headings and their respective levels
  65. for (let i = 0; i < tokens.length; i++) {
  66. if (tokens[i].type !== "heading_close") {
  67. continue;
  68. }
  69. const heading = tokens[i - 1];
  70. const heading_close = tokens[i];
  71. if (heading.type === "inline") {
  72. let content = "";
  73. let anchor = "";
  74. if (heading.children && heading.children[0].type === "link_open") {
  75. content = heading.children[1].content;
  76. anchor = _.kebabCase(content);
  77. } else {
  78. content = heading.content;
  79. anchor = _.kebabCase(heading.children.reduce((acc, t) => acc + t.content, ""));
  80. }
  81. tocArray.push({
  82. content,
  83. anchor,
  84. level: +heading_close.tag.substr(1, 1)
  85. });
  86. }
  87. }
  88. //-> Exclude levels deeper than 2
  89. _.remove(tocArray, (n) => { return n.level > 2; });
  90. //-> Build tree from flat array
  91. return _.reduce(tocArray, (tree, v) => {
  92. let treeLength = tree.length - 1;
  93. if(v.level < 2) {
  94. tree.push({
  95. content: v.content,
  96. anchor: v.anchor,
  97. nodes: []
  98. });
  99. } else {
  100. let lastNodeLevel = 1;
  101. let GetNodePath = (startPos) => {
  102. lastNodeLevel++;
  103. if(_.isEmpty(startPos)) {
  104. startPos = 'nodes';
  105. }
  106. if(lastNodeLevel === v.level) {
  107. return startPos;
  108. } else {
  109. return GetNodePath(startPos + '[' + (_.at(tree[treeLength], startPos).length - 1) + '].nodes');
  110. }
  111. };
  112. let lastNodePath = GetNodePath();
  113. let lastNode = _.get(tree[treeLength], lastNodePath);
  114. if(lastNode) {
  115. lastNode.push({
  116. content: v.content,
  117. anchor: v.anchor,
  118. nodes: []
  119. });
  120. _.set(tree[treeLength], lastNodePath, lastNode);
  121. }
  122. }
  123. return tree;
  124. }, []);
  125. };
  126. /**
  127. * Parse markdown content to HTML
  128. *
  129. * @param {String} content Markdown content
  130. * @return {String} HTML formatted content
  131. */
  132. const parseContent = (content) => {
  133. let output = mkdown.render(content);
  134. let cr = cheerio.load(output);
  135. //-> Check for empty first element
  136. let firstElm = cr.root().children().first()[0];
  137. if(firstElm.type === 'tag' && firstElm.name === 'p') {
  138. let firstElmChildren = firstElm.children;
  139. if(firstElmChildren.length < 1) {
  140. firstElm.remove();
  141. } else if(firstElmChildren.length === 1 && firstElmChildren[0].type === 'tag' && firstElmChildren[0].name === 'img') {
  142. cr(firstElm).addClass('is-gapless');
  143. }
  144. }
  145. //-> Remove links in headers
  146. cr('h1 > a:not(.toc-anchor), h2 > a:not(.toc-anchor), h3 > a:not(.toc-anchor)').each((i, elm) => {
  147. let txtLink = cr(elm).text();
  148. cr(elm).replaceWith(txtLink);
  149. });
  150. output = cr.html();
  151. return output;
  152. };
  153. /**
  154. * Parse meta-data tags from content
  155. *
  156. * @param {String} content Markdown content
  157. * @return {Object} Properties found in the content and their values
  158. */
  159. const parseMeta = (content) => {
  160. let commentMeta = new RegExp('<!-- ?([a-zA-Z]+):(.*)-->','g');
  161. let results = {}, match;
  162. while(match = commentMeta.exec(content)) {
  163. results[_.toLower(match[1])] = _.trim(match[2]);
  164. }
  165. return results;
  166. };
  167. module.exports = {
  168. /**
  169. * Parse content and return all data
  170. *
  171. * @param {String} content Markdown-formatted content
  172. * @return {Object} Object containing meta, html and tree data
  173. */
  174. parse(content) {
  175. return {
  176. meta: parseMeta(content),
  177. html: parseContent(content),
  178. tree: parseTree(content)
  179. };
  180. },
  181. parseContent,
  182. parseMeta,
  183. parseTree,
  184. /**
  185. * Strips non-text elements from Markdown content
  186. *
  187. * @param {String} content Markdown-formatted content
  188. * @return {String} Text-only version
  189. */
  190. removeMarkdown(content) {
  191. return mdRemove(_.chain(content)
  192. .replace(/<!-- ?([a-zA-Z]+):(.*)-->/g, '')
  193. .replace(/```[^`]+```/g, '')
  194. .replace(/`[^`]+`/g, '')
  195. .replace(new RegExp('(?!mailto:)(?:(?:http|https|ftp)://)(?:\\S+(?::\\S*)?@)?(?:(?:(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}(?:\\.(?:[0-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))|(?:(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)(?:\\.(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,})))|localhost)(?::\\d{2,5})?(?:(/|\\?|#)[^\\s]*)?', 'g'), '')
  196. .replace(/\r?\n|\r/g, ' ')
  197. .deburr()
  198. .toLower()
  199. .replace(/(\b([^a-z]+)\b)/g, ' ')
  200. .replace(/[^a-z]+/g, ' ')
  201. .replace(/(\b(\w{1,2})\b(\W|$))/g, '')
  202. .replace(/\s\s+/g, ' ')
  203. .value()
  204. );
  205. }
  206. };