You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

172 lines
5.9 KiB

  1. const tsquery = require('pg-tsquery')()
  2. const stream = require('stream')
  3. const Promise = require('bluebird')
  4. const pipeline = Promise.promisify(stream.pipeline)
  5. /* global WIKI */
  6. module.exports = {
  7. async activate() {
  8. if (WIKI.config.db.type !== 'postgres') {
  9. throw new WIKI.Error.SearchActivationFailed('Must use PostgreSQL database to activate this engine!')
  10. }
  11. },
  12. async deactivate() {
  13. WIKI.logger.info(`(SEARCH/POSTGRES) Dropping index tables...`)
  14. await WIKI.models.knex.schema.dropTable('pagesWords')
  15. await WIKI.models.knex.schema.dropTable('pagesVector')
  16. WIKI.logger.info(`(SEARCH/POSTGRES) Index tables have been dropped.`)
  17. },
  18. /**
  19. * INIT
  20. */
  21. async init() {
  22. WIKI.logger.info(`(SEARCH/POSTGRES) Initializing...`)
  23. // -> Create Search Index
  24. const indexExists = await WIKI.models.knex.schema.hasTable('pagesVector')
  25. if (!indexExists) {
  26. WIKI.logger.info(`(SEARCH/POSTGRES) Creating Pages Vector table...`)
  27. await WIKI.models.knex.schema.createTable('pagesVector', table => {
  28. table.increments()
  29. table.string('path')
  30. table.string('locale')
  31. table.string('title')
  32. table.string('description')
  33. table.specificType('tokens', 'TSVECTOR')
  34. table.text('content')
  35. })
  36. }
  37. // -> Create Words Index
  38. const wordsExists = await WIKI.models.knex.schema.hasTable('pagesWords')
  39. if (!wordsExists) {
  40. WIKI.logger.info(`(SEARCH/POSTGRES) Creating Words Suggestion Index...`)
  41. await WIKI.models.knex.raw(`
  42. CREATE TABLE "pagesWords" AS SELECT word FROM ts_stat(
  43. 'SELECT to_tsvector(''simple'', "title") || to_tsvector(''simple'', "description") || to_tsvector(''simple'', "content") FROM "pagesVector"'
  44. )`)
  45. await WIKI.models.knex.raw('CREATE EXTENSION IF NOT EXISTS pg_trgm')
  46. await WIKI.models.knex.raw(`CREATE INDEX "pageWords_idx" ON "pagesWords" USING GIN (word gin_trgm_ops)`)
  47. }
  48. WIKI.logger.info(`(SEARCH/POSTGRES) Initialization completed.`)
  49. },
  50. /**
  51. * QUERY
  52. *
  53. * @param {String} q Query
  54. * @param {Object} opts Additional options
  55. */
  56. async query(q, opts) {
  57. try {
  58. let suggestions = []
  59. const results = await WIKI.models.knex.raw(`
  60. SELECT id, path, locale, title, description
  61. FROM "pagesVector", to_tsquery(?) query
  62. WHERE query @@ "tokens"
  63. ORDER BY ts_rank(tokens, query) DESC
  64. `, [tsquery(q)])
  65. if (results.rows.length < 5) {
  66. const suggestResults = await WIKI.models.knex.raw(`SELECT word, word <-> ? AS rank FROM "pagesWords" WHERE similarity(word, ?) > 0.2 ORDER BY rank LIMIT 5;`, [q, q])
  67. suggestions = suggestResults.rows.map(r => r.word)
  68. }
  69. return {
  70. results: results.rows,
  71. suggestions,
  72. totalHits: results.rows.length
  73. }
  74. } catch (err) {
  75. WIKI.logger.warn('Search Engine Error:')
  76. WIKI.logger.warn(err)
  77. }
  78. },
  79. /**
  80. * CREATE
  81. *
  82. * @param {Object} page Page to create
  83. */
  84. async created(page) {
  85. await WIKI.models.knex.raw(`
  86. INSERT INTO "pagesVector" (path, locale, title, description, "tokens") VALUES (
  87. ?, ?, ?, ?, (setweight(to_tsvector('${this.config.dictLanguage}', ?), 'A') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'B') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'C'))
  88. )
  89. `, [page.path, page.localeCode, page.title, page.description, page.title, page.description, page.safeContent])
  90. },
  91. /**
  92. * UPDATE
  93. *
  94. * @param {Object} page Page to update
  95. */
  96. async updated(page) {
  97. await WIKI.models.knex.raw(`
  98. UPDATE "pagesVector" SET
  99. title = ?,
  100. description = ?,
  101. tokens = (setweight(to_tsvector('${this.config.dictLanguage}', ?), 'A') ||
  102. setweight(to_tsvector('${this.config.dictLanguage}', ?), 'B') ||
  103. setweight(to_tsvector('${this.config.dictLanguage}', ?), 'C'))
  104. WHERE path = ? AND locale = ?
  105. `, [page.title, page.description, page.title, page.description, page.safeContent, page.path, page.localeCode])
  106. },
  107. /**
  108. * DELETE
  109. *
  110. * @param {Object} page Page to delete
  111. */
  112. async deleted(page) {
  113. await WIKI.models.knex('pagesVector').where({
  114. locale: page.localeCode,
  115. path: page.path
  116. }).del().limit(1)
  117. },
  118. /**
  119. * RENAME
  120. *
  121. * @param {Object} page Page to rename
  122. */
  123. async renamed(page) {
  124. await WIKI.models.knex('pagesVector').where({
  125. locale: page.localeCode,
  126. path: page.sourcePath
  127. }).update({
  128. locale: page.localeCode,
  129. path: page.destinationPath
  130. })
  131. },
  132. /**
  133. * REBUILD INDEX
  134. */
  135. async rebuild() {
  136. WIKI.logger.info(`(SEARCH/POSTGRES) Rebuilding Index...`)
  137. await WIKI.models.knex('pagesVector').truncate()
  138. await WIKI.models.knex('pagesWords').truncate()
  139. await pipeline(
  140. WIKI.models.knex.column('path', 'localeCode', 'title', 'description', 'render').select().from('pages').where({
  141. isPublished: true,
  142. isPrivate: false
  143. }).stream(),
  144. new stream.Transform({
  145. objectMode: true,
  146. transform: async (page, enc, cb) => {
  147. const content = WIKI.models.pages.cleanHTML(page.render)
  148. await WIKI.models.knex.raw(`
  149. INSERT INTO "pagesVector" (path, locale, title, description, "tokens", content) VALUES (
  150. ?, ?, ?, ?, (setweight(to_tsvector('${this.config.dictLanguage}', ?), 'A') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'B') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'C')), ?
  151. )
  152. `, [page.path, page.localeCode, page.title, page.description, page.title, page.description, content, content])
  153. cb()
  154. }
  155. })
  156. )
  157. await WIKI.models.knex.raw(`
  158. INSERT INTO "pagesWords" (word)
  159. SELECT word FROM ts_stat(
  160. 'SELECT to_tsvector(''simple'', "title") || to_tsvector(''simple'', "description") || to_tsvector(''simple'', "content") FROM "pagesVector"'
  161. )
  162. `)
  163. WIKI.logger.info(`(SEARCH/POSTGRES) Index rebuilt successfully.`)
  164. }
  165. }