From c9648371e615221567dfe8c1b862341bb65445c3 Mon Sep 17 00:00:00 2001 From: Nick Date: Wed, 13 Mar 2019 02:52:08 -0400 Subject: [PATCH] feat: aws cloudsearch engine (wip) --- package.json | 1 + server/modules/search/aws/definition.yml | 84 +++++++- server/modules/search/aws/engine.js | 240 +++++++++++++++++++++-- server/modules/search/azure/engine.js | 5 + server/modules/search/postgres/engine.js | 8 + 5 files changed, 321 insertions(+), 17 deletions(-) diff --git a/package.json b/package.json index 6197d275..7f0e860c 100644 --- a/package.json +++ b/package.json @@ -44,6 +44,7 @@ "apollo-server": "2.3.3", "apollo-server-express": "2.3.3", "auto-load": "3.0.4", + "aws-sdk": "2.420.0", "axios": "0.18.0", "azure-search-client": "3.1.5", "bcryptjs-then": "1.0.1", diff --git a/server/modules/search/aws/definition.yml b/server/modules/search/aws/definition.yml index 64b1af66..f7f87f92 100644 --- a/server/modules/search/aws/definition.yml +++ b/server/modules/search/aws/definition.yml @@ -4,5 +4,85 @@ description: Amazon CloudSearch is a managed service in the AWS Cloud that makes author: requarks.io logo: https://static.requarks.io/logo/aws-cloudsearch.svg website: https://aws.amazon.com/cloudsearch/ -isAvailable: false -props: {} +isAvailable: true +props: + domain: + type: String + title: Search Domain + hint: The name of your CloudSearch service. + order: 1 + endpoint: + type: String + title: Document Endpoint + hint: The Document Endpoint specified in the domain AWS console dashboard. + order: 2 + region: + type: String + title: Region + hint: The AWS datacenter region where the instance was created. + default: us-east-1 + enum: + - ap-northeast-1 + - ap-northeast-2 + - ap-southeast-1 + - ap-southeast-2 + - eu-central-1 + - eu-west-1 + - sa-east-1 + - us-east-1 + - us-west-1 + - us-west-2 + order: 3 + accessKeyId: + type: String + title: Access Key ID + hint: The Access Key ID with CloudSearchFullAccess role access to the CloudSearch instance. + order: 4 + secretAccessKey : + type: String + title: Secret Access Key + hint: The Secret Access Key for the Access Key ID provided above. + order: 5 + AnalysisSchemeLang: + type: String + title: Analysis Scheme Language + hint: The language used to analyse content. + default: en + enum: + - 'ar' + - 'bg' + - 'ca' + - 'cs' + - 'da' + - 'de' + - 'el' + - 'en' + - 'es' + - 'eu' + - 'fa' + - 'fi' + - 'fr' + - 'ga' + - 'gl' + - 'he' + - 'hi' + - 'hu' + - 'hy' + - 'id' + - 'it' + - 'ja' + - 'ko' + - 'lv' + - 'mul' + - 'nl' + - 'no' + - 'pt' + - 'ro' + - 'ru' + - 'sv' + - 'th' + - 'tr' + - 'zh-Hans' + - 'zh-Hant' + order: 6 + diff --git a/server/modules/search/aws/engine.js b/server/modules/search/aws/engine.js index e7369ccd..6c05496b 100644 --- a/server/modules/search/aws/engine.js +++ b/server/modules/search/aws/engine.js @@ -1,26 +1,236 @@ -module.exports = { - activate() { +const _ = require('lodash') +const AWS = require('aws-sdk') +const { pipeline } = require('stream') +module.exports = { + async activate() { + // not used }, - deactivate() { - + async deactivate() { + // not used }, - query() { + /** + * INIT + */ + async init() { + WIKI.logger.info(`(SEARCH/AWS) Initializing...`) + this.client = new AWS.CloudSearch({ + apiVersion: '2013-01-01', + accessKeyId: this.config.accessKeyId, + secretAccessKey: this.config.secretAccessKey, + region: this.config.region + }) - }, - created() { + let rebuildIndex = false - }, - updated() { + // -> Define Analysis Schemes + const schemes = await this.client.describeAnalysisSchemes({ + DomainName: this.config.domain, + AnalysisSchemeNames: ['default_anlscheme'] + }).promise() + if (_.get(schemes, 'AnalysisSchemes', []).length < 1) { + WIKI.logger.info(`(SEARCH/AWS) Defining Analysis Scheme...`) + await this.client.defineAnalysisScheme({ + DomainName: this.config.domain, + AnalysisScheme: { + AnalysisSchemeLanguage: this.config.AnalysisSchemeLang, + AnalysisSchemeName: 'default_anlscheme' + } + }).promise() + rebuildIndex = true + } - }, - deleted() { + // -> Define Index Fields + const fields = await this.client.describeIndexFields({ + DomainName: this.config.domain + }).promise() + if (_.get(fields, 'IndexFields', []).length < 1) { + WIKI.logger.info(`(SEARCH/AWS) Defining Index Fields...`) + await this.client.defineIndexField({ + DomainName: this.config.domain, + IndexField: { + IndexFieldName: 'id', + IndexFieldType: 'literal' + } + }).promise() + await this.client.defineIndexField({ + DomainName: this.config.domain, + IndexField: { + IndexFieldName: 'path', + IndexFieldType: 'literal' + } + }).promise() + await this.client.defineIndexField({ + DomainName: this.config.domain, + IndexField: { + IndexFieldName: 'locale', + IndexFieldType: 'literal' + } + }).promise() + await this.client.defineIndexField({ + DomainName: this.config.domain, + IndexField: { + IndexFieldName: 'title', + IndexFieldType: 'text', + TextOptions: { + ReturnEnabled: true, + AnalysisScheme: 'default_anlscheme' + } + } + }).promise() + await this.client.defineIndexField({ + DomainName: this.config.domain, + IndexField: { + IndexFieldName: 'description', + IndexFieldType: 'text', + TextOptions: { + ReturnEnabled: true, + AnalysisScheme: 'default_anlscheme' + } + } + }).promise() + await this.client.defineIndexField({ + DomainName: this.config.domain, + IndexField: { + IndexFieldName: 'content', + IndexFieldType: 'text', + TextOptions: { + ReturnEnabled: false, + AnalysisScheme: 'default_anlscheme' + } + } + }).promise() + rebuildIndex = true + } - }, - renamed() { + //-> Define suggester + const suggesters = await this.client.describeSuggesters({ + DomainName: this.config.domain, + SuggesterNames: ['default_suggester'] + }).promise() + if(_.get(suggesters, 'Suggesters', []).length < 1) { + WIKI.logger.info(`(SEARCH/AWS) Defining Suggester...`) + await this.client.defineSuggester({ + DomainName: this.config.domain, + Suggester: { + SuggesterName: 'default_suggester', + DocumentSuggesterOptions: { + SourceField: 'title', + FuzzyMatching: 'high' + } + } + }).promise() + rebuildIndex = true + } - }, - rebuild() { + // -> Rebuild Index + if (rebuildIndex) { + WIKI.logger.info(`(SEARCH/AWS) Requesting Index Rebuild...`) + await this.client.indexDocuments({ + DomainName: this.config.domain + }).promise() + } + WIKI.logger.info(`(SEARCH/AWS) Initialization completed.`) + }, + /** + * QUERY + * + * @param {String} q Query + * @param {Object} opts Additional options + */ + async query(q, opts) { + try { + return { + results: [], + suggestions: [], + totalHits: 0 + } + } catch (err) { + WIKI.logger.warn('Search Engine Error:') + WIKI.logger.warn(err) + } + }, + /** + * CREATE + * + * @param {Object} page Page to create + */ + async created(page) { + await this.client.indexes.use(this.config.indexName).index([ + { + id: page.hash, + locale: page.localeCode, + path: page.path, + title: page.title, + description: page.description, + content: page.content + } + ]) + }, + /** + * UPDATE + * + * @param {Object} page Page to update + */ + async updated(page) { + await this.client.indexes.use(this.config.indexName).index([ + { + id: page.hash, + locale: page.localeCode, + path: page.path, + title: page.title, + description: page.description, + content: page.content + } + ]) + }, + /** + * DELETE + * + * @param {Object} page Page to delete + */ + async deleted(page) { + await this.client.indexes.use(this.config.indexName).index([ + { + '@search.action': 'delete', + id: page.hash + } + ]) + }, + /** + * RENAME + * + * @param {Object} page Page to rename + */ + async renamed(page) { + await this.client.indexes.use(this.config.indexName).index([ + { + '@search.action': 'delete', + id: page.sourceHash + } + ]) + await this.client.indexes.use(this.config.indexName).index([ + { + id: page.destinationHash, + locale: page.localeCode, + path: page.destinationPath, + title: page.title, + description: page.description, + content: page.content + } + ]) + }, + /** + * REBUILD INDEX + */ + async rebuild() { + await pipeline( + WIKI.models.knex.column({ id: 'hash' }, 'path', { locale: 'localeCode' }, 'title', 'description', 'content').select().from('pages').where({ + isPublished: true, + isPrivate: false + }).stream(), + this.client.indexes.use(this.config.indexName).createIndexingStream() + ) } } diff --git a/server/modules/search/azure/engine.js b/server/modules/search/azure/engine.js index 6bd2df8c..0a9c0ef7 100644 --- a/server/modules/search/azure/engine.js +++ b/server/modules/search/azure/engine.js @@ -14,11 +14,13 @@ module.exports = { * INIT */ async init() { + WIKI.logger.info(`(SEARCH/AZURE) Initializing...`) this.client = new SearchService(this.config.serviceName, this.config.adminKey) // -> Create Search Index const indexes = await this.client.indexes.list() if (!_.find(_.get(indexes, 'result.value', []), ['name', this.config.indexName])) { + WIKI.logger.info(`(SEARCH/AWS) Creating index...`) await this.client.indexes.create({ name: this.config.indexName, fields: [ @@ -75,6 +77,7 @@ module.exports = { ], }) } + WIKI.logger.info(`(SEARCH/AZURE) Initialization completed.`) }, /** * QUERY @@ -202,6 +205,7 @@ module.exports = { * REBUILD INDEX */ async rebuild() { + WIKI.logger.info(`(SEARCH/AZURE) Rebuilding Index...`) await pipeline( WIKI.models.knex.column({ id: 'hash' }, 'path', { locale: 'localeCode' }, 'title', 'description', 'content').select().from('pages').where({ isPublished: true, @@ -209,5 +213,6 @@ module.exports = { }).stream(), this.client.indexes.use(this.config.indexName).createIndexingStream() ) + WIKI.logger.info(`(SEARCH/AZURE) Index rebuilt successfully.`) } } diff --git a/server/modules/search/postgres/engine.js b/server/modules/search/postgres/engine.js index e35be778..1e4eba59 100644 --- a/server/modules/search/postgres/engine.js +++ b/server/modules/search/postgres/engine.js @@ -14,9 +14,12 @@ module.exports = { * INIT */ async init() { + WIKI.logger.info(`(SEARCH/POSTGRES) Initializing...`) + // -> Create Search Index const indexExists = await WIKI.models.knex.schema.hasTable('pagesVector') if (!indexExists) { + WIKI.logger.info(`(SEARCH/POSTGRES) Creating Pages Vector table...`) await WIKI.models.knex.schema.createTable('pagesVector', table => { table.increments() table.string('path') @@ -29,6 +32,7 @@ module.exports = { // -> Create Words Index const wordsExists = await WIKI.models.knex.schema.hasTable('pagesWords') if (!wordsExists) { + WIKI.logger.info(`(SEARCH/POSTGRES) Creating Words Suggestion Index...`) await WIKI.models.knex.raw(` CREATE TABLE "pagesWords" AS SELECT word FROM ts_stat( 'SELECT to_tsvector(''simple'', pages."title") || to_tsvector(''simple'', pages."description") || to_tsvector(''simple'', pages."content") FROM pages WHERE pages."isPublished" AND NOT pages."isPrivate"' @@ -36,6 +40,8 @@ module.exports = { await WIKI.models.knex.raw('CREATE EXTENSION IF NOT EXISTS pg_trgm') await WIKI.models.knex.raw(`CREATE INDEX "pageWords_idx" ON "pagesWords" USING GIN (word gin_trgm_ops)`) } + + WIKI.logger.info(`(SEARCH/POSTGRES) Initialization completed.`) }, /** * QUERY @@ -124,6 +130,7 @@ module.exports = { * REBUILD INDEX */ async rebuild() { + WIKI.logger.info(`(SEARCH/POSTGRES) Rebuilding Index...`) await WIKI.models.knex('pagesVector').truncate() await WIKI.models.knex.raw(` INSERT INTO "pagesVector" (path, locale, title, description, "tokens") @@ -133,5 +140,6 @@ module.exports = { setweight(to_tsvector('${this.config.dictLanguage}', content), 'C')) AS tokens FROM "pages" WHERE pages."isPublished" AND NOT pages."isPrivate"`) + WIKI.logger.info(`(SEARCH/POSTGRES) Index rebuilt successfully.`) } }