You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

370 lines
10 KiB

  1. const _ = require('lodash')
  2. const AWS = require('aws-sdk')
  3. const stream = require('stream')
  4. const Promise = require('bluebird')
  5. const pipeline = Promise.promisify(stream.pipeline)
  6. /* global WIKI */
  7. module.exports = {
  8. async activate() {
  9. // not used
  10. },
  11. async deactivate() {
  12. // not used
  13. },
  14. /**
  15. * INIT
  16. */
  17. async init() {
  18. WIKI.logger.info(`(SEARCH/AWS) Initializing...`)
  19. this.client = new AWS.CloudSearch({
  20. apiVersion: '2013-01-01',
  21. accessKeyId: this.config.accessKeyId,
  22. secretAccessKey: this.config.secretAccessKey,
  23. region: this.config.region
  24. })
  25. this.clientDomain = new AWS.CloudSearchDomain({
  26. apiVersion: '2013-01-01',
  27. endpoint: this.config.endpoint,
  28. accessKeyId: this.config.accessKeyId,
  29. secretAccessKey: this.config.secretAccessKey,
  30. region: this.config.region
  31. })
  32. let rebuildIndex = false
  33. // -> Define Analysis Schemes
  34. const schemes = await this.client.describeAnalysisSchemes({
  35. DomainName: this.config.domain,
  36. AnalysisSchemeNames: ['default_anlscheme']
  37. }).promise()
  38. if (_.get(schemes, 'AnalysisSchemes', []).length < 1) {
  39. WIKI.logger.info(`(SEARCH/AWS) Defining Analysis Scheme...`)
  40. await this.client.defineAnalysisScheme({
  41. DomainName: this.config.domain,
  42. AnalysisScheme: {
  43. AnalysisSchemeLanguage: this.config.AnalysisSchemeLang,
  44. AnalysisSchemeName: 'default_anlscheme'
  45. }
  46. }).promise()
  47. rebuildIndex = true
  48. }
  49. // -> Define Index Fields
  50. const fields = await this.client.describeIndexFields({
  51. DomainName: this.config.domain
  52. }).promise()
  53. if (_.get(fields, 'IndexFields', []).length < 1) {
  54. WIKI.logger.info(`(SEARCH/AWS) Defining Index Fields...`)
  55. await this.client.defineIndexField({
  56. DomainName: this.config.domain,
  57. IndexField: {
  58. IndexFieldName: 'id',
  59. IndexFieldType: 'literal'
  60. }
  61. }).promise()
  62. await this.client.defineIndexField({
  63. DomainName: this.config.domain,
  64. IndexField: {
  65. IndexFieldName: 'path',
  66. IndexFieldType: 'literal'
  67. }
  68. }).promise()
  69. await this.client.defineIndexField({
  70. DomainName: this.config.domain,
  71. IndexField: {
  72. IndexFieldName: 'locale',
  73. IndexFieldType: 'literal'
  74. }
  75. }).promise()
  76. await this.client.defineIndexField({
  77. DomainName: this.config.domain,
  78. IndexField: {
  79. IndexFieldName: 'title',
  80. IndexFieldType: 'text',
  81. TextOptions: {
  82. ReturnEnabled: true,
  83. AnalysisScheme: 'default_anlscheme'
  84. }
  85. }
  86. }).promise()
  87. await this.client.defineIndexField({
  88. DomainName: this.config.domain,
  89. IndexField: {
  90. IndexFieldName: 'description',
  91. IndexFieldType: 'text',
  92. TextOptions: {
  93. ReturnEnabled: true,
  94. AnalysisScheme: 'default_anlscheme'
  95. }
  96. }
  97. }).promise()
  98. await this.client.defineIndexField({
  99. DomainName: this.config.domain,
  100. IndexField: {
  101. IndexFieldName: 'content',
  102. IndexFieldType: 'text',
  103. TextOptions: {
  104. ReturnEnabled: false,
  105. AnalysisScheme: 'default_anlscheme'
  106. }
  107. }
  108. }).promise()
  109. rebuildIndex = true
  110. }
  111. // -> Define suggester
  112. const suggesters = await this.client.describeSuggesters({
  113. DomainName: this.config.domain,
  114. SuggesterNames: ['default_suggester']
  115. }).promise()
  116. if (_.get(suggesters, 'Suggesters', []).length < 1) {
  117. WIKI.logger.info(`(SEARCH/AWS) Defining Suggester...`)
  118. await this.client.defineSuggester({
  119. DomainName: this.config.domain,
  120. Suggester: {
  121. SuggesterName: 'default_suggester',
  122. DocumentSuggesterOptions: {
  123. SourceField: 'title',
  124. FuzzyMatching: 'high'
  125. }
  126. }
  127. }).promise()
  128. rebuildIndex = true
  129. }
  130. // -> Rebuild Index
  131. if (rebuildIndex) {
  132. WIKI.logger.info(`(SEARCH/AWS) Requesting Index Rebuild...`)
  133. await this.client.indexDocuments({
  134. DomainName: this.config.domain
  135. }).promise()
  136. }
  137. WIKI.logger.info(`(SEARCH/AWS) Initialization completed.`)
  138. },
  139. /**
  140. * QUERY
  141. *
  142. * @param {String} q Query
  143. * @param {Object} opts Additional options
  144. */
  145. async query(q, opts) {
  146. try {
  147. let suggestions = []
  148. const results = await this.clientDomain.search({
  149. query: q,
  150. partial: true,
  151. size: 50
  152. }).promise()
  153. if (results.hits.found < 5) {
  154. const suggestResults = await this.clientDomain.suggest({
  155. query: q,
  156. suggester: 'default_suggester',
  157. size: 5
  158. }).promise()
  159. suggestions = suggestResults.suggest.suggestions.map(s => s.suggestion)
  160. }
  161. return {
  162. results: _.map(results.hits.hit, r => ({
  163. id: r.id,
  164. path: _.head(r.fields.path),
  165. locale: _.head(r.fields.locale),
  166. title: _.head(r.fields.title) || '',
  167. description: _.head(r.fields.description) || ''
  168. })),
  169. suggestions: suggestions,
  170. totalHits: results.hits.found
  171. }
  172. } catch (err) {
  173. WIKI.logger.warn('Search Engine Error:')
  174. WIKI.logger.warn(err)
  175. }
  176. },
  177. /**
  178. * CREATE
  179. *
  180. * @param {Object} page Page to create
  181. */
  182. async created(page) {
  183. await this.clientDomain.uploadDocuments({
  184. contentType: 'application/json',
  185. documents: JSON.stringify([
  186. {
  187. type: 'add',
  188. id: page.hash,
  189. fields: {
  190. locale: page.localeCode,
  191. path: page.path,
  192. title: page.title,
  193. description: page.description,
  194. content: page.safeContent
  195. }
  196. }
  197. ])
  198. }).promise()
  199. },
  200. /**
  201. * UPDATE
  202. *
  203. * @param {Object} page Page to update
  204. */
  205. async updated(page) {
  206. await this.clientDomain.uploadDocuments({
  207. contentType: 'application/json',
  208. documents: JSON.stringify([
  209. {
  210. type: 'add',
  211. id: page.hash,
  212. fields: {
  213. locale: page.localeCode,
  214. path: page.path,
  215. title: page.title,
  216. description: page.description,
  217. content: page.safeContent
  218. }
  219. }
  220. ])
  221. }).promise()
  222. },
  223. /**
  224. * DELETE
  225. *
  226. * @param {Object} page Page to delete
  227. */
  228. async deleted(page) {
  229. await this.clientDomain.uploadDocuments({
  230. contentType: 'application/json',
  231. documents: JSON.stringify([
  232. {
  233. type: 'delete',
  234. id: page.hash
  235. }
  236. ])
  237. }).promise()
  238. },
  239. /**
  240. * RENAME
  241. *
  242. * @param {Object} page Page to rename
  243. */
  244. async renamed(page) {
  245. await this.clientDomain.uploadDocuments({
  246. contentType: 'application/json',
  247. documents: JSON.stringify([
  248. {
  249. type: 'delete',
  250. id: page.sourceHash
  251. }
  252. ])
  253. }).promise()
  254. await this.clientDomain.uploadDocuments({
  255. contentType: 'application/json',
  256. documents: JSON.stringify([
  257. {
  258. type: 'add',
  259. id: page.destinationHash,
  260. fields: {
  261. locale: page.localeCode,
  262. path: page.destinationPath,
  263. title: page.title,
  264. description: page.description,
  265. content: page.safeContent
  266. }
  267. }
  268. ])
  269. }).promise()
  270. },
  271. /**
  272. * REBUILD INDEX
  273. */
  274. async rebuild() {
  275. WIKI.logger.info(`(SEARCH/AWS) Rebuilding Index...`)
  276. const MAX_DOCUMENT_BYTES = Math.pow(2, 20)
  277. const MAX_INDEXING_BYTES = 5 * Math.pow(2, 20) - Buffer.from('[').byteLength - Buffer.from(']').byteLength
  278. const MAX_INDEXING_COUNT = 1000
  279. const COMMA_BYTES = Buffer.from(',').byteLength
  280. let chunks = []
  281. let bytes = 0
  282. const processDocument = async (cb, doc) => {
  283. try {
  284. if (doc) {
  285. const docBytes = Buffer.from(JSON.stringify(doc)).byteLength
  286. // -> Document too large
  287. if (docBytes >= MAX_DOCUMENT_BYTES) {
  288. throw new Error('Document exceeds maximum size allowed by AWS CloudSearch.')
  289. }
  290. // -> Current batch exceeds size hard limit, flush
  291. if (docBytes + COMMA_BYTES + bytes >= MAX_INDEXING_BYTES) {
  292. await flushBuffer()
  293. }
  294. if (chunks.length > 0) {
  295. bytes += COMMA_BYTES
  296. }
  297. bytes += docBytes
  298. chunks.push(doc)
  299. // -> Current batch exceeds count soft limit, flush
  300. if (chunks.length >= MAX_INDEXING_COUNT) {
  301. await flushBuffer()
  302. }
  303. } else {
  304. // -> End of stream, flush
  305. await flushBuffer()
  306. }
  307. cb()
  308. } catch (err) {
  309. cb(err)
  310. }
  311. }
  312. const flushBuffer = async () => {
  313. WIKI.logger.info(`(SEARCH/AWS) Sending batch of ${chunks.length}...`)
  314. try {
  315. await this.clientDomain.uploadDocuments({
  316. contentType: 'application/json',
  317. documents: JSON.stringify(_.map(chunks, doc => ({
  318. type: 'add',
  319. id: doc.id,
  320. fields: {
  321. locale: doc.locale,
  322. path: doc.path,
  323. title: doc.title,
  324. description: doc.description,
  325. content: WIKI.models.pages.cleanHTML(doc.render)
  326. }
  327. })))
  328. }).promise()
  329. } catch (err) {
  330. WIKI.logger.warn('(SEARCH/AWS) Failed to send batch to AWS CloudSearch: ', err)
  331. }
  332. chunks.length = 0
  333. bytes = 0
  334. }
  335. await pipeline(
  336. WIKI.models.knex.column({ id: 'hash' }, 'path', { locale: 'localeCode' }, 'title', 'description', 'render').select().from('pages').where({
  337. isPublished: true,
  338. isPrivate: false
  339. }).stream(),
  340. new stream.Transform({
  341. objectMode: true,
  342. transform: async (chunk, enc, cb) => processDocument(cb, chunk),
  343. flush: async (cb) => processDocument(cb)
  344. })
  345. )
  346. WIKI.logger.info(`(SEARCH/AWS) Requesting Index Rebuild...`)
  347. await this.client.indexDocuments({
  348. DomainName: this.config.domain
  349. }).promise()
  350. WIKI.logger.info(`(SEARCH/AWS) Index rebuilt successfully.`)
  351. }
  352. }