mirror of https://github.com/Requarks/wiki.git
2 changed files with 299 additions and 20 deletions
Unified View
Diff Options
-
20server/modules/search/elasticsearch/definition.yml
-
299server/modules/search/elasticsearch/engine.js
@ -1,26 +1,297 @@ |
|||||
module.exports = { |
|
||||
activate() { |
|
||||
|
const _ = require('lodash') |
||||
|
const elasticsearch = require('elasticsearch') |
||||
|
const { pipeline, Transform } = require('stream') |
||||
|
|
||||
}, |
|
||||
deactivate() { |
|
||||
|
/* global WIKI */ |
||||
|
|
||||
|
module.exports = { |
||||
|
async activate() { |
||||
|
// not used
|
||||
}, |
}, |
||||
query() { |
|
||||
|
|
||||
|
async deactivate() { |
||||
|
// not used
|
||||
}, |
}, |
||||
created() { |
|
||||
|
/** |
||||
|
* INIT |
||||
|
*/ |
||||
|
async init() { |
||||
|
WIKI.logger.info(`(SEARCH/ELASTICSEARCH) Initializing...`) |
||||
|
this.client = new elasticsearch.Client({ |
||||
|
apiVersion: this.config.apiVersion, |
||||
|
hosts: this.config.hosts.split(',').map(_.trim), |
||||
|
httpAuth: (this.config.user.length > 0) ? `${this.config.user}:${this.config.pass}` : null, |
||||
|
sniffOnStart: this.config.sniffOnStart, |
||||
|
sniffInterval: (this.config.sniffInterval > 0) ? this.config.sniffInterval : false |
||||
|
}) |
||||
|
|
||||
}, |
|
||||
updated() { |
|
||||
|
// -> Create Search Index
|
||||
|
await this.createIndex() |
||||
|
|
||||
|
WIKI.logger.info(`(SEARCH/ELASTICSEARCH) Initialization completed.`) |
||||
}, |
}, |
||||
deleted() { |
|
||||
|
|
||||
|
/** |
||||
|
* Create Index |
||||
|
*/ |
||||
|
async createIndex() { |
||||
|
const indexExists = await this.client.indices.exists({ index: this.config.indexName }) |
||||
|
if (!indexExists) { |
||||
|
WIKI.logger.info(`(SEARCH/ELASTICSEARCH) Creating index...`) |
||||
|
await this.client.indices.create({ |
||||
|
index: this.config.indexName, |
||||
|
body: { |
||||
|
mappings: { |
||||
|
_doc: { |
||||
|
properties: { |
||||
|
suggest: { type: 'completion' }, |
||||
|
title: { type: 'text', boost: 4.0 }, |
||||
|
description: { type: 'text', boost: 3.0 }, |
||||
|
content: { type: 'text', boost: 1.0 }, |
||||
|
locale: { type: 'keyword' }, |
||||
|
path: { type: 'text' } |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
}) |
||||
|
} |
||||
}, |
}, |
||||
renamed() { |
|
||||
|
|
||||
|
/** |
||||
|
* QUERY |
||||
|
* |
||||
|
* @param {String} q Query |
||||
|
* @param {Object} opts Additional options |
||||
|
*/ |
||||
|
async query(q, opts) { |
||||
|
try { |
||||
|
const results = await this.client.search({ |
||||
|
index: this.config.indexName, |
||||
|
body: { |
||||
|
query: { |
||||
|
simple_query_string: { |
||||
|
query: q |
||||
|
} |
||||
|
}, |
||||
|
from: 0, |
||||
|
size: 50, |
||||
|
_source: ['title', 'description', 'path', 'locale'], |
||||
|
suggest: { |
||||
|
suggestions: { |
||||
|
text: q, |
||||
|
completion: { |
||||
|
field: 'suggest', |
||||
|
size: 5, |
||||
|
skip_duplicates: true, |
||||
|
fuzzy: true |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
}) |
||||
|
return { |
||||
|
results: _.get(results, 'hits.hits', []).map(r => ({ |
||||
|
id: r._id, |
||||
|
locale: r._source.locale, |
||||
|
path: r._source.path, |
||||
|
title: r._source.title, |
||||
|
description: r._source.description |
||||
|
})), |
||||
|
suggestions: _.reject(_.get(results, 'suggest.suggestions', []).map(s => _.get(s, 'options[0].text', false)), s => !s), |
||||
|
totalHits: results.hits.total |
||||
|
} |
||||
|
} catch (err) { |
||||
|
WIKI.logger.warn('Search Engine Error:') |
||||
|
WIKI.logger.warn(err) |
||||
|
} |
||||
|
}, |
||||
|
/** |
||||
|
* Build suggest field |
||||
|
*/ |
||||
|
buildSuggest(page) { |
||||
|
return _.uniq(_.concat( |
||||
|
page.title.split(' ').map(s => ({ |
||||
|
input: s, |
||||
|
weight: 4 |
||||
|
})), |
||||
|
page.description.split(' ').map(s => ({ |
||||
|
input: s, |
||||
|
weight: 3 |
||||
|
})), |
||||
|
page.content.split(' ').map(s => ({ |
||||
|
input: s, |
||||
|
weight: 1 |
||||
|
})) |
||||
|
)) |
||||
|
}, |
||||
|
/** |
||||
|
* CREATE |
||||
|
* |
||||
|
* @param {Object} page Page to create |
||||
|
*/ |
||||
|
async created(page) { |
||||
|
await this.client.index({ |
||||
|
index: this.config.indexName, |
||||
|
type: '_doc', |
||||
|
id: page.hash, |
||||
|
body: { |
||||
|
suggest: this.buildSuggest(page), |
||||
|
locale: page.localeCode, |
||||
|
path: page.path, |
||||
|
title: page.title, |
||||
|
description: page.description, |
||||
|
content: page.content |
||||
|
}, |
||||
|
refresh: true |
||||
|
}) |
||||
|
}, |
||||
|
/** |
||||
|
* UPDATE |
||||
|
* |
||||
|
* @param {Object} page Page to update |
||||
|
*/ |
||||
|
async updated(page) { |
||||
|
await this.client.index({ |
||||
|
index: this.config.indexName, |
||||
|
type: '_doc', |
||||
|
id: page.hash, |
||||
|
body: { |
||||
|
suggest: this.buildSuggest(page), |
||||
|
locale: page.localeCode, |
||||
|
path: page.path, |
||||
|
title: page.title, |
||||
|
description: page.description, |
||||
|
content: page.content |
||||
|
}, |
||||
|
refresh: true |
||||
|
}) |
||||
}, |
}, |
||||
rebuild() { |
|
||||
|
/** |
||||
|
* DELETE |
||||
|
* |
||||
|
* @param {Object} page Page to delete |
||||
|
*/ |
||||
|
async deleted(page) { |
||||
|
await this.client.delete({ |
||||
|
index: this.config.indexName, |
||||
|
type: '_doc', |
||||
|
id: page.hash, |
||||
|
refresh: true |
||||
|
}) |
||||
|
}, |
||||
|
/** |
||||
|
* RENAME |
||||
|
* |
||||
|
* @param {Object} page Page to rename |
||||
|
*/ |
||||
|
async renamed(page) { |
||||
|
await this.client.delete({ |
||||
|
index: this.config.indexName, |
||||
|
type: '_doc', |
||||
|
id: page.sourceHash, |
||||
|
refresh: true |
||||
|
}) |
||||
|
await this.client.index({ |
||||
|
index: this.config.indexName, |
||||
|
type: '_doc', |
||||
|
id: page.destinationHash, |
||||
|
body: { |
||||
|
suggest: this.buildSuggest(page), |
||||
|
locale: page.localeCode, |
||||
|
path: page.destinationPath, |
||||
|
title: page.title, |
||||
|
description: page.description, |
||||
|
content: page.content |
||||
|
}, |
||||
|
refresh: true |
||||
|
}) |
||||
|
}, |
||||
|
/** |
||||
|
* REBUILD INDEX |
||||
|
*/ |
||||
|
async rebuild() { |
||||
|
WIKI.logger.info(`(SEARCH/ELASTICSEARCH) Rebuilding Index...`) |
||||
|
await this.client.indices.delete({ index: this.config.indexName }) |
||||
|
await this.createIndex() |
||||
|
|
||||
|
const MAX_INDEXING_BYTES = 10 * Math.pow(2, 20) - Buffer.from('[').byteLength - Buffer.from(']').byteLength // 10 MB
|
||||
|
const MAX_INDEXING_COUNT = 1000 |
||||
|
const COMMA_BYTES = Buffer.from(',').byteLength |
||||
|
|
||||
|
let chunks = [] |
||||
|
let bytes = 0 |
||||
|
|
||||
|
const processDocument = async (cb, doc) => { |
||||
|
try { |
||||
|
if (doc) { |
||||
|
const docBytes = Buffer.from(JSON.stringify(doc)).byteLength |
||||
|
|
||||
|
// -> Current batch exceeds size limit, flush
|
||||
|
if (docBytes + COMMA_BYTES + bytes >= MAX_INDEXING_BYTES) { |
||||
|
await flushBuffer() |
||||
|
} |
||||
|
|
||||
|
if (chunks.length > 0) { |
||||
|
bytes += COMMA_BYTES |
||||
|
} |
||||
|
bytes += docBytes |
||||
|
chunks.push(doc) |
||||
|
|
||||
|
// -> Current batch exceeds count limit, flush
|
||||
|
if (chunks.length >= MAX_INDEXING_COUNT) { |
||||
|
await flushBuffer() |
||||
|
} |
||||
|
} else { |
||||
|
// -> End of stream, flush
|
||||
|
await flushBuffer() |
||||
|
} |
||||
|
cb() |
||||
|
} catch (err) { |
||||
|
cb(err) |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
const flushBuffer = async () => { |
||||
|
WIKI.logger.info(`(SEARCH/ELASTICSEARCH) Sending batch of ${chunks.length}...`) |
||||
|
try { |
||||
|
await this.client.bulk({ |
||||
|
index: this.config.indexName, |
||||
|
body: _.reduce(chunks, (result, doc) => { |
||||
|
result.push({ |
||||
|
index: { |
||||
|
_index: this.config.indexName, |
||||
|
_type: '_doc', |
||||
|
_id: doc.id |
||||
|
} |
||||
|
}) |
||||
|
result.push({ |
||||
|
suggest: this.buildSuggest(doc), |
||||
|
locale: doc.locale, |
||||
|
path: doc.path, |
||||
|
title: doc.title, |
||||
|
description: doc.description, |
||||
|
content: doc.content |
||||
|
}) |
||||
|
return result |
||||
|
}, []), |
||||
|
refresh: true |
||||
|
}) |
||||
|
} catch (err) { |
||||
|
WIKI.logger.warn('(SEARCH/ELASTICSEARCH) Failed to send batch to elasticsearch: ', err) |
||||
|
} |
||||
|
chunks.length = 0 |
||||
|
bytes = 0 |
||||
|
} |
||||
|
|
||||
|
await pipeline( |
||||
|
WIKI.models.knex.column({ id: 'hash' }, 'path', { locale: 'localeCode' }, 'title', 'description', 'content').select().from('pages').where({ |
||||
|
isPublished: true, |
||||
|
isPrivate: false |
||||
|
}).stream(), |
||||
|
new Transform({ |
||||
|
objectMode: true, |
||||
|
transform: async (chunk, enc, cb) => processDocument(cb, chunk), |
||||
|
flush: async (cb) => processDocument(cb) |
||||
|
}) |
||||
|
) |
||||
|
WIKI.logger.info(`(SEARCH/ELASTICSEARCH) Index rebuilt successfully.`) |
||||
} |
} |
||||
} |
} |
Write
Preview
Loading…
Cancel
Save