|
|
@ -7,6 +7,7 @@ const fs = require('fs-extra') |
|
|
|
const yaml = require('js-yaml') |
|
|
|
const striptags = require('striptags') |
|
|
|
const emojiRegex = require('emoji-regex') |
|
|
|
const he = require('he') |
|
|
|
|
|
|
|
/* global WIKI */ |
|
|
|
|
|
|
@ -17,7 +18,7 @@ const frontmatterRegex = { |
|
|
|
} |
|
|
|
|
|
|
|
const punctuationRegex = /[!,:;/\\_+\-=()&#@<>$~%^*[\]{}"'|]+|(\.\s)|(\s\.)/ig |
|
|
|
const htmlEntitiesRegex = /(&#[0-9]{3};)|(&#x[a-zA-Z0-9]{2};)/ig |
|
|
|
// const htmlEntitiesRegex = /(&#[0-9]{3};)|(&#x[a-zA-Z0-9]{2};)/ig
|
|
|
|
|
|
|
|
/** |
|
|
|
* Pages model |
|
|
@ -663,9 +664,10 @@ module.exports = class Page extends Model { |
|
|
|
* @returns {string} Cleaned Content Text |
|
|
|
*/ |
|
|
|
static cleanHTML(rawHTML = '') { |
|
|
|
return striptags(rawHTML || '') |
|
|
|
let data = striptags(rawHTML || '') |
|
|
|
.replace(emojiRegex(), '') |
|
|
|
.replace(htmlEntitiesRegex, '') |
|
|
|
// .replace(htmlEntitiesRegex, '')
|
|
|
|
return he.decode(data) |
|
|
|
.replace(punctuationRegex, ' ') |
|
|
|
.replace(/(\r\n|\n|\r)/gm, ' ') |
|
|
|
.replace(/\s\s+/g, ' ') |
|
|
|