From f34e98625d8759c652535f98aafc467e032543a8 Mon Sep 17 00:00:00 2001 From: imcaizheng Date: Thu, 16 Jan 2020 04:31:53 +0800 Subject: [PATCH 01/10] apply patch from original submission --- .gitignore | 1 + docs/es-db-compare.md | 21 ++ package.json | 7 +- scripts/es-db-compare/compareProjects.js | 279 +++++++++++++++++++++++ scripts/es-db-compare/constants.js | 13 ++ scripts/es-db-compare/index.js | 191 ++++++++++++++++ scripts/es-db-compare/report.mustache | 89 ++++++++ scripts/es-db-compare/util.js | 180 +++++++++++++++ 8 files changed, 779 insertions(+), 2 deletions(-) create mode 100644 docs/es-db-compare.md create mode 100644 scripts/es-db-compare/compareProjects.js create mode 100644 scripts/es-db-compare/constants.js create mode 100644 scripts/es-db-compare/index.js create mode 100644 scripts/es-db-compare/report.mustache create mode 100644 scripts/es-db-compare/util.js diff --git a/.gitignore b/.gitignore index edd85b28..e59603ad 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,4 @@ jspm_packages !.elasticbeanstalk/*.global.yml .DS_Store .idea +report.html diff --git a/docs/es-db-compare.md b/docs/es-db-compare.md new file mode 100644 index 00000000..a27a7d98 --- /dev/null +++ b/docs/es-db-compare.md @@ -0,0 +1,21 @@ +# es-db-compare + +## Configuration +The following properties can be set from env variables: + +- PROJECT_START_ID: if set, only projects with id that large than or equal to the value are compared. +- PROJECT_END_ID: if set, only projects with id that less than or equal to the value are compared. +- PROJECT_LAST_ACTIVITY_AT: if set, only projects with property lastActivityAt that large than or equal to the value are compared. + +There could be some fields that always mismatch in ES and DB. +The variable named `ignoredProperties` at `scripts/es-db-compare/constants.js` maintains a list of fields which will be ignored +during the comparation. You may need to modify/add/delete items in the list. + +### Note +- `PROJECT_START_ID` and `PROJECT_END_ID` must exist together. +- At least one of `PROJECT_START_ID(also PROJECT_END_ID)` and `PROJECT_LAST_ACTIVITY_AT` needs be set before running the script. + +## Usage + +Set up configuration and execute command `npm run es-db-compare` on the command line. +It will then generate a HTML report with name `report.html` under the current directory. diff --git a/package.json b/package.json index a7e72a9c..68962e0b 100644 --- a/package.json +++ b/package.json @@ -22,7 +22,8 @@ "test": "NODE_ENV=test npm run lint && NODE_ENV=test npm run sync:es && NODE_ENV=test npm run sync:db && NODE_ENV=test ./node_modules/.bin/istanbul cover ./node_modules/mocha/bin/_mocha -- --timeout 10000 --require babel-core/register $(find src -path '*spec.js*') --exit", "test:watch": "NODE_ENV=test ./node_modules/.bin/mocha -w --require babel-core/register $(find src -path '*spec.js*')", "seed": "babel-node src/tests/seed.js --presets es2015", - "demo-data": "babel-node local/seed" + "demo-data": "babel-node local/seed", + "es-db-compare": "babel-node scripts/es-db-compare" }, "repository": { "type": "git", @@ -53,8 +54,11 @@ "express-request-id": "^1.1.0", "express-sanitizer": "^1.0.2", "express-validation": "^0.6.0", + "handlebars": "^4.5.3", "http-aws-es": "^4.0.0", "joi": "^8.0.5", + "jsondiffpatch": "^0.4.1", + "jsonpath": "^1.0.2", "jsonwebtoken": "^8.3.0", "lodash": "^4.17.11", "memwatch-next": "^0.3.0", @@ -64,7 +68,6 @@ "pg": "^7.11.0", "pg-native": "^3.0.0", "sequelize": "^5.8.7", - "jsonpath": "^1.0.2", "swagger-ui-express": "^4.0.6", "tc-core-library-js": "appirio-tech/tc-core-library-js.git#v2.6.3", "traverse": "^0.6.6", diff --git a/scripts/es-db-compare/compareProjects.js b/scripts/es-db-compare/compareProjects.js new file mode 100644 index 00000000..787bccdd --- /dev/null +++ b/scripts/es-db-compare/compareProjects.js @@ -0,0 +1,279 @@ +/* eslint-disable no-console */ +/* eslint-disable consistent-return */ +/* eslint-disable no-restricted-syntax */ +/* + * Compare the data from database and data from ES. + * Specific to project-related data. + * + * Please consider decouple some reusable logics from this module before create + * modules to compare other models. + */ + +const Diff = require('jsondiffpatch'); +const lodash = require('lodash'); +const scriptUtil = require('./util'); +const scriptConstants = require('./constants'); + +const associations = { + phases: 'Phase', + members: 'Member', + invites: 'Invite', + attachment: 'Attachment', +}; + +const differ = Diff.create({ + objectHash: obj => obj.id, + propertyFilter: (name) => { + if (scriptConstants.ignoredProperties.includes(name)) { + return false; + } + return true; + }, +}); + +/** + * Process diff delta to extract project-related data. + * + * @param {Object} delta the diff delta. See `util.flatten()` + * @param {Object} esData the data from ES + * @param {Object} dbData the data from DB + * @param {Object} finalData the data patched + * @returns {Object} Object project diff delta in a specific data structure + */ +function processDelta(delta, esData, dbData, finalData) { + const processMissingObject = (item, option) => { + if (item.type === 'delete') { + const projectId = lodash.get(dbData, lodash.slice(item.path, 0, 1)).id; + console.log(`one dbOnly found for ${option.modelName} with id ${item.originalValue.id}`); + return { + type: 'dbOnly', + projectId, + modelName: option.modelName, + id: item.originalValue.id, + dbCopy: item.originalValue, + }; + } + if (item.type === 'add') { + const projectId = lodash.get(esData, lodash.slice(item.path, 0, 1)).id; + console.log(`one esOnly found for ${option.modelName} with id ${item.value.id}`); + return { + type: 'esOnly', + projectId, + modelName: option.modelName, + id: item.value.id, + esCopy: item.value, + }; + } + }; + + const processProduct = (item) => { + const itemNew = Object.assign({}, lodash.omit(item, ['path']), { path: lodash.slice(item.path, 5) }); + if (itemNew.dataType === 'array') { + return processMissingObject(item, { modelName: 'Product' }); + } + if (['add', 'delete', 'modify'].includes(itemNew.type)) { + const path = scriptUtil.generateJSONPath(itemNew.path); + const id = lodash.get(finalData, lodash.slice(item.path, 0, 5)).id; + const projectId = lodash.get(finalData, lodash.slice(item.path, 0, 1)).id; + const phaseId = lodash.get(finalData, lodash.slice(item.path, 0, 3)).id; + const dbCopy = lodash.find( + lodash.find( + lodash.find(dbData, { id: projectId }).phases, + { id: phaseId }, + ).products, + { id }, + ); + const esCopy = lodash.find( + lodash.find( + lodash.find(esData, { id: projectId }).phases, + { id: phaseId }, + ).products, + { id }, + ); + console.log(`one mismatch found for Product with id ${id}`); + return { + type: 'mismatch', + projectId, + id, + modelName: 'Product', + path, + dbCopy, + esCopy, + }; + } + }; + + const processAssociation = (item, option) => { + if (item.path[1] === 'phases' && item.path[3] === 'products') { + return processProduct(item); + } + const itemNew = Object.assign({}, lodash.omit(item, ['path']), { path: lodash.slice(item.path, 2) }); + if (itemNew.dataType === 'array') { + return processMissingObject(item, option); + } + if (['add', 'delete', 'modify'].includes(itemNew.type)) { + const path = scriptUtil.generateJSONPath(lodash.slice(itemNew.path, 1)); + const id = lodash.get(finalData, lodash.slice(item.path, 0, 3)).id; + const projectId = lodash.get(finalData, lodash.slice(item.path, 0, 1)).id; + const dbCopy = lodash.find( + lodash.find(dbData, { id: projectId })[option.refPath], + { id }, + ); + const esCopy = lodash.find( + lodash.find(esData, { id: projectId })[option.refPath], + { id }, + ); + console.log(`one mismatch found for ${option.modelName} with id ${id}`); + return { + type: 'mismatch', + projectId, + modelName: option.modelName, + id, + path, + dbCopy, + esCopy, + }; + } + }; + + if (delta.path.length > 2 && associations[delta.path[1]]) { + return processAssociation(delta, { modelName: associations[delta.path[1]], refPath: delta.path[1] }); + } + if (delta.dataType === 'array') { + return processMissingObject(delta, { modelName: 'Project' }); + } + if (['add', 'delete', 'modify'].includes(delta.type)) { + const path = scriptUtil.generateJSONPath(lodash.slice(delta.path, 1)); + const id = lodash.get(finalData, lodash.slice(delta.path, 0, 1)).id; + const dbCopy = lodash.find(dbData, { id }); + const esCopy = lodash.find(esData, { id }); + console.log(`one mismatch found for Project with id ${id}`); + return { + type: 'mismatch', + projectId: id, + modelName: 'Project', + id, + path, + dbCopy, + esCopy, + }; + } +} + +/** + * Compare Project data from ES and DB. + * + * @param {Object} esData the data from ES + * @param {Object} dbData the data from DB + * @returns {Object} the data to feed handlebars template + */ +function compareProjects(esData, dbData) { + const data = { + project: { + rootMismatch: {}, + esOnly: [], + dbOnly: [], + }, + meta: { + esCopies: [], + dbCopies: [], + counts: { + Project: 0, + }, + uniqueDeltas: [], + }, + }; + + const storeDelta = (root, delta) => { + if (delta.modelName === 'Project') { + if (delta.type === 'esOnly') { + data[root].esOnly.push(delta); + return; + } + if (delta.type === 'dbOnly') { + data[root].dbOnly.push(delta); + return; + } + } + if (!data[root].rootMismatch[delta.projectId]) { + data[root].rootMismatch[delta.projectId] = { project: [], associations: {} }; + } + if (delta.modelName === 'Project') { + data[root].rootMismatch[delta.projectId].project.push(delta); + return; + } + const currentAssociations = data[root].rootMismatch[delta.projectId].associations; + if (!Object.keys(currentAssociations).includes(delta.modelName)) { + currentAssociations[delta.modelName] = { + mismatches: {}, + esOnly: [], + dbOnly: [], + }; + } + if (delta.type === 'mismatch') { + const mismatches = currentAssociations[delta.modelName].mismatches; + if (!mismatches[delta.id]) { + mismatches[delta.id] = []; + } + mismatches[delta.id].push(delta); + return; + } + currentAssociations[delta.modelName][delta.type].push(delta); + }; + + const collectDataCopies = (delta) => { + if (delta.dbCopy) { + if (!lodash.find(data.meta.dbCopies, lodash.pick(delta, ['modelName', 'id']))) { + data.meta.dbCopies.push(delta); + } + } + if (delta.esCopy) { + if (!lodash.find(data.meta.esCopies, lodash.pick(delta, ['modelName', 'id']))) { + data.meta.esCopies.push(delta); + } + } + }; + + const countInconsistencies = () => { + lodash.set( + data.project, + 'meta.totalObjects', + data.project.dbOnly.length + data.project.esOnly.length, + ); + lodash.set( + data.project, + 'meta.totalProjects', + Object.keys(data.project.rootMismatch).length + data.project.dbOnly.length + data.project.esOnly.length, + ); + lodash.map(data.project.rootMismatch, (value) => { + const currentValue = value; + lodash.set(currentValue, 'meta.counts', currentValue.project.length ? 1 : 0); + lodash.map(currentValue.associations, (subObject) => { + lodash.set( + subObject, + 'meta.counts', + Object.keys(subObject.mismatches).length + subObject.dbOnly.length + subObject.esOnly.length, + ); + currentValue.meta.counts += subObject.meta.counts; + }); + data.project.meta.totalObjects += currentValue.meta.counts; + }); + }; + + const result = differ.diff(dbData, esData); + const finalData = differ.patch(Diff.clone(dbData), result); + const flattenedResult = scriptUtil.flatten(result); + for (const item of flattenedResult) { + const delta = processDelta(item, esData, dbData, finalData); + if (delta) { + collectDataCopies(delta); + storeDelta('project', delta); + } + } + countInconsistencies(); + return data; +} + +module.exports = { + compareProjects, +}; diff --git a/scripts/es-db-compare/constants.js b/scripts/es-db-compare/constants.js new file mode 100644 index 00000000..9529931f --- /dev/null +++ b/scripts/es-db-compare/constants.js @@ -0,0 +1,13 @@ +/* + * Constants used in the script + */ + +module.exports = { + ignoredProperties: [ + 'createdAt', + 'updatedAt', + 'deletedAt', + 'deletedBy', + 'projectUrl', + ], +}; diff --git a/scripts/es-db-compare/index.js b/scripts/es-db-compare/index.js new file mode 100644 index 00000000..4a3314a4 --- /dev/null +++ b/scripts/es-db-compare/index.js @@ -0,0 +1,191 @@ +/* eslint-disable no-console */ +/* + * Compare data between DB and ES and generate a report to be uploaded + * to AWS S3. + */ + +import Joi from 'joi'; +import lodash from 'lodash'; +import config from 'config'; + +import models from '../../src/models'; +import util from '../../src/util'; +import { INVITE_STATUS } from '../../src/constants'; + +const handlebars = require('handlebars'); +const path = require('path'); +const fs = require('fs'); +const { compareProjects } = require('./compareProjects'); + +const scriptConfig = { + PROJECT_START_ID: process.env.PROJECT_START_ID, + PROJECT_END_ID: process.env.PROJECT_END_ID, + PROJECT_LAST_ACTIVITY_AT: process.env.PROJECT_LAST_ACTIVITY_AT, +}; + +const reportPathname = './report.html'; + +const configSchema = Joi.object().keys({ + PROJECT_START_ID: Joi.number().integer().positive().optional(), + PROJECT_END_ID: Joi.number().integer().positive().optional(), + PROJECT_LAST_ACTIVITY_AT: Joi.date().optional(), +}) + .with('PROJECT_START_ID', 'PROJECT_END_ID') + .or('PROJECT_START_ID', 'PROJECT_LAST_ACTIVITY_AT'); + +try { + Joi.attempt(scriptConfig, configSchema); +} catch (err) { + console.error(err.message); + process.exit(); +} + +const es = util.getElasticSearchClient(); + +const ES_PROJECT_INDEX = config.get('elasticsearchConfig.indexName'); +const ES_PROJECT_TYPE = config.get('elasticsearchConfig.docType'); + +/** + * Get es search criteria. + * + * @returns {Object} the search criteria + */ +function getESSearchCriteria() { + const filters = []; + if (!lodash.isNil(scriptConfig.PROJECT_START_ID)) { + filters.push({ + filtered: { + filter: { + range: { + id: { + gte: scriptConfig.PROJECT_START_ID, + lte: scriptConfig.PROJECT_END_ID, + }, + }, + }, + }, + }); + } + if (!lodash.isNil(scriptConfig.PROJECT_LAST_ACTIVITY_AT)) { + filters.push({ + filtered: { + filter: { + range: { + lastActivityAt: { + gte: scriptConfig.PROJECT_LAST_ACTIVITY_AT, + }, + }, + }, + }, + }); + } + const searchCriteria = { + index: ES_PROJECT_INDEX, + type: ES_PROJECT_TYPE, + body: { + query: { + bool: { + must: filters, + }, + }, + }, + }; + return searchCriteria; +} + +/** + * Get handlebars template. + * + * @returns {Object} the template + */ +function getTemplate() { + handlebars.registerHelper('getValue', (data, key) => data[key]); + handlebars.registerHelper('toJSON', obj => JSON.stringify(obj, null, 2)); + const template = handlebars.compile(fs.readFileSync(path.join(__dirname, 'report.mustache')).toString()); + return template; +} + +/** + * Get ES data. + * + * @returns {Promise} the ES data + */ +async function getESData() { + const searchCriteria = getESSearchCriteria(); + return es.search(searchCriteria) + .then((docs) => { + const rows = lodash.map(docs.hits.hits, single => single._source); // eslint-disable-line no-underscore-dangle + return rows; + }); +} + +/** + * Get DB data. + * + * @returns {Promise} the DB data + */ +async function getDBData() { + const filter = {}; + if (!lodash.isNil(scriptConfig.PROJECT_START_ID)) { + filter.id = { $between: [scriptConfig.PROJECT_START_ID, scriptConfig.PROJECT_END_ID] }; + } + if (!lodash.isNil(scriptConfig.PROJECT_LAST_ACTIVITY_AT)) { + filter.lastActivityAt = { $gte: scriptConfig.PROJECT_LAST_ACTIVITY_AT }; + } + return models.Project.findAll({ + where: filter, + raw: false, + include: [{ + model: models.ProjectPhase, + as: 'phases', + include: [{ + model: models.PhaseProduct, + as: 'products', + }], + }, { + model: models.ProjectMemberInvite, + as: 'invites', + where: { status: { $in: [INVITE_STATUS.PENDING, INVITE_STATUS.REQUESTED] } }, + required: false, + }, { + model: models.ProjectAttachment, + as: 'attachments', + }], + }).then((_projects) => { + const projects = _projects.map((_project) => { + if (!_project) { + return Promise.resolve(null); + } + const project = _project.toJSON(); + return models.ProjectMember.getActiveProjectMembers(project.id) + .then((currentProjectMembers) => { + project.members = currentProjectMembers; + return project; + }); + }); + return Promise.all(projects); + }); +} + +/** + * Main function. + * + * @returns {Promise} void + */ +async function main() { + const esData = await getESData(); + const dbData = await getDBData(); + const template = getTemplate(); + const data = compareProjects(esData, dbData); + const report = template(data); + fs.writeFileSync(reportPathname, report); + console.log(`report is written to ${reportPathname}`); +} + +main().then(() => { + console.log('done!'); + process.exit(); +}).catch((err) => { + console.log(err.message); + process.exit(); +}); diff --git a/scripts/es-db-compare/report.mustache b/scripts/es-db-compare/report.mustache new file mode 100644 index 00000000..f4f186e8 --- /dev/null +++ b/scripts/es-db-compare/report.mustache @@ -0,0 +1,89 @@ + + Topcoder Project Service - ES/DB Comparison Report + + + + +

Summary

+There are {{ project.meta.totalObjects }} objects with inconsistencies found in {{ project.meta.totalProjects }} projects. +

List

+

Project ({{ project.meta.totalObjects }})

+{{#each project.rootMismatch }} +

Project - id: {{ @key }} ({{ this.meta.counts }})

+ {{#if this.project.length}} + + +

{{ this.project.length }} mismatches:

+ {{/if}} + + {{#each this.project }} +
  • {{ this.path }}
  • + {{/each}} +
    + + {{#each this.associations }} +

    {{ @key }} ({{ this.meta.counts }})

    + {{#each this.mismatches }} +
    {{ @../key }} - id: {{ @key }}
    + + +

    {{ this.length }} mismatches:

    + + {{#each this }} +
  • {{ this.path }}
  • + {{/each}} +
    + {{/each}} + {{#each this.dbOnly }} +
    {{ @../key }} - id: {{ this.id }} (1)
    + +

    Found in DB but not in ES.

    + {{/each}} + + {{#each this.esOnly }} +
    {{ @../key }} - id: {{ this.id }} (1)
    + +

    Found in ES but not in DB.

    + {{/each}} + {{/each}} +{{/each}} + +{{#each project.dbOnly }} +

    Project - id: {{ this.id }} (1)

    + +

    Found in DB but not in ES.

    +{{/each}} + +{{#each project.esOnly }} +

    Project - id: {{ this.id }} (1)

    + +

    Found in ES but not in DB.

    +{{/each}} + +

    Data

    +{{#each meta.dbCopies }} + +{{/each}} +{{#each meta.esCopies }} + +{{/each}} + diff --git a/scripts/es-db-compare/util.js b/scripts/es-db-compare/util.js new file mode 100644 index 00000000..704e88b8 --- /dev/null +++ b/scripts/es-db-compare/util.js @@ -0,0 +1,180 @@ +/* eslint-disable no-underscore-dangle */ +/* eslint-disable no-use-before-define */ +/* eslint-disable no-restricted-syntax */ +/* + * Util functions used in the script. + */ + +const _ = require('lodash'); +const moment = require('moment'); + +/** + * Sub-function for the flatten function that process object assets in the delta. + * + * @param {Object} delta the diff delta + * @param {Array} path the JSON path + * @returns {Array} flattened delta + */ +function flattenObject(delta, path) { + let result = []; + _.map(delta, (value, key) => { + const currentPath = _.concat(path, key); + if (value instanceof Array) { + if (value.length === 2) { + result.push({ + path: currentPath, + type: 'modify', + dataType: 'object', + originalValue: value[0], + currentValue: value[1], + }); + return; + } + if (value.length === 1) { + result.push({ + path: currentPath, + type: 'add', + dataType: 'object', + value: value[0], + }); + return; + } + if (value.length === 3) { + if (value[2] === 3) { + result.push({ + path: currentPath, + type: 'move', + dataType: 'object', + value: value[0], + }); + return; + } + result.push({ + path: currentPath, + type: 'delete', + dataType: 'object', + value: value[0], + }); + return; + } + } + result = _.concat(result, flatten(value, _.clone(currentPath))); + }); + return result; +} + +/** + * Sub-function for the flatten function that process array assets in the delta. + * + * @param {Object} delta the diff delta + * @param {Array} path the JSON path + * @returns {Array} flattened delta + */ +function flattenArray(delta, path) { + let result = []; + _.map(_.omit(delta, ['_t']), (value, key) => { + if (value instanceof Array) { + if (key.startsWith('_')) { + const index = key.substring(1); + const currentPath = [...path, index]; + if (value[2] === 3) { + result.push({ + path: currentPath, + type: 'move', + dataType: 'array', + index, + originalValue: value[0], + }); + return; + } + result.push({ + path: currentPath, + type: 'delete', + dataType: 'array', + index, + originalValue: value[0], + }); + return; + } + const currentPath = _.concat(path, key); + if (value[2] === 3) { + result.push({ + path: currentPath, + type: 'move', + dataType: 'array', + index: key, + value: value[0], + }); + return; + } + result.push({ + path: currentPath, + type: 'add', + dataType: 'array', + index: key, + value: value[0], + }); + return; + } + const currentPath = _.concat(path, key); + if (key >= 0) { + result = _.concat(result, flattenObject(value, _.clone(currentPath))); + return; + } + throw new Error(`Unhandled case at ${currentPath}`); + }); + return result; +} + +/** + * Flatten delta from json diff patch so that it can be easily manipulated. + * + * @param {Object} delta the diff delta + * @param {Array} path the JSON path + * @returns {Array} flattened delta + */ +function flatten(delta, path = []) { + if (delta._t === 'a') { + return flattenArray(delta, path); + } + return flattenObject(delta, path); +} + +/** + * Generate a JSON path from array format. + * Example: `generateJSONPath([ 'members', '0', 'key' ])` will output `members[0].key` + * + * @param {Array} path path in array format + * @returns {String} the JSON path + */ +function generateJSONPath(path) { + let result = ''; + for (const item of path) { + if (!isNaN(item)) { + result += `[${item}]`; + continue; // eslint-disable-line no-continue + } + if (result) { + result += '.'; + } + result += item; + } + return result; +} + +/** + * Generate a sensible filename for the report. + * + * @returns {String} the result filename + */ +function generateFilename() { + const nodeEnv = process.env.NODE_ENV || 'default'; + const date = moment().format('DD-MM-YYYY-HH-MM-SS'); + return `es-db-report-${nodeEnv}-${date}.html`; +} + +module.exports = { + flatten, + generateJSONPath, + generateFilename, +}; From 5073ad608486f1edd5a0d809b93b03fd851db54f Mon Sep 17 00:00:00 2001 From: imcaizheng Date: Thu, 16 Jan 2020 04:32:37 +0800 Subject: [PATCH 02/10] fix 4 issues --- docs/es-db-compare.md | 2 +- scripts/es-db-compare/compareProjects.js | 36 ++++++------ scripts/es-db-compare/constants.js | 34 ++++++++--- scripts/es-db-compare/index.js | 2 + scripts/es-db-compare/report.mustache | 72 +++++++++++++++--------- scripts/es-db-compare/util.js | 23 ++++++++ 6 files changed, 116 insertions(+), 53 deletions(-) diff --git a/docs/es-db-compare.md b/docs/es-db-compare.md index a27a7d98..ff66909a 100644 --- a/docs/es-db-compare.md +++ b/docs/es-db-compare.md @@ -8,7 +8,7 @@ The following properties can be set from env variables: - PROJECT_LAST_ACTIVITY_AT: if set, only projects with property lastActivityAt that large than or equal to the value are compared. There could be some fields that always mismatch in ES and DB. -The variable named `ignoredProperties` at `scripts/es-db-compare/constants.js` maintains a list of fields which will be ignored +The variable named `ignoredPaths` at `scripts/es-db-compare/constants.js` maintains a list of json paths which will be ignored during the comparation. You may need to modify/add/delete items in the list. ### Note diff --git a/scripts/es-db-compare/compareProjects.js b/scripts/es-db-compare/compareProjects.js index 787bccdd..5669014b 100644 --- a/scripts/es-db-compare/compareProjects.js +++ b/scripts/es-db-compare/compareProjects.js @@ -12,23 +12,16 @@ const Diff = require('jsondiffpatch'); const lodash = require('lodash'); const scriptUtil = require('./util'); -const scriptConstants = require('./constants'); const associations = { phases: 'Phase', members: 'Member', invites: 'Invite', - attachment: 'Attachment', + attachments: 'Attachment', }; const differ = Diff.create({ objectHash: obj => obj.id, - propertyFilter: (name) => { - if (scriptConstants.ignoredProperties.includes(name)) { - return false; - } - return true; - }, }); /** @@ -67,12 +60,12 @@ function processDelta(delta, esData, dbData, finalData) { }; const processProduct = (item) => { - const itemNew = Object.assign({}, lodash.omit(item, ['path']), { path: lodash.slice(item.path, 5) }); - if (itemNew.dataType === 'array') { + const subPath = lodash.slice(item.path, 4); + if (item.dataType === 'array' && subPath.length === 1) { return processMissingObject(item, { modelName: 'Product' }); } - if (['add', 'delete', 'modify'].includes(itemNew.type)) { - const path = scriptUtil.generateJSONPath(itemNew.path); + if (['add', 'delete', 'modify'].includes(item.type)) { + const path = scriptUtil.generateJSONPath(lodash.slice(subPath, 1)); const id = lodash.get(finalData, lodash.slice(item.path, 0, 5)).id; const projectId = lodash.get(finalData, lodash.slice(item.path, 0, 1)).id; const phaseId = lodash.get(finalData, lodash.slice(item.path, 0, 3)).id; @@ -93,6 +86,8 @@ function processDelta(delta, esData, dbData, finalData) { console.log(`one mismatch found for Product with id ${id}`); return { type: 'mismatch', + kind: item.type, + dataType: item.dataType, projectId, id, modelName: 'Product', @@ -107,12 +102,12 @@ function processDelta(delta, esData, dbData, finalData) { if (item.path[1] === 'phases' && item.path[3] === 'products') { return processProduct(item); } - const itemNew = Object.assign({}, lodash.omit(item, ['path']), { path: lodash.slice(item.path, 2) }); - if (itemNew.dataType === 'array') { + const subPath = lodash.slice(item.path, 2); + if (item.dataType === 'array' && subPath.length === 1) { return processMissingObject(item, option); } - if (['add', 'delete', 'modify'].includes(itemNew.type)) { - const path = scriptUtil.generateJSONPath(lodash.slice(itemNew.path, 1)); + if (['add', 'delete', 'modify'].includes(item.type)) { + const path = scriptUtil.generateJSONPath(lodash.slice(subPath, 1)); const id = lodash.get(finalData, lodash.slice(item.path, 0, 3)).id; const projectId = lodash.get(finalData, lodash.slice(item.path, 0, 1)).id; const dbCopy = lodash.find( @@ -126,6 +121,8 @@ function processDelta(delta, esData, dbData, finalData) { console.log(`one mismatch found for ${option.modelName} with id ${id}`); return { type: 'mismatch', + kind: item.type, + dataType: item.dataType, projectId, modelName: option.modelName, id, @@ -139,7 +136,7 @@ function processDelta(delta, esData, dbData, finalData) { if (delta.path.length > 2 && associations[delta.path[1]]) { return processAssociation(delta, { modelName: associations[delta.path[1]], refPath: delta.path[1] }); } - if (delta.dataType === 'array') { + if (delta.dataType === 'array' && delta.path.length === 1) { return processMissingObject(delta, { modelName: 'Project' }); } if (['add', 'delete', 'modify'].includes(delta.type)) { @@ -150,6 +147,8 @@ function processDelta(delta, esData, dbData, finalData) { console.log(`one mismatch found for Project with id ${id}`); return { type: 'mismatch', + kind: delta.type, + dataType: delta.dataType, projectId: id, modelName: 'Project', id, @@ -264,6 +263,9 @@ function compareProjects(esData, dbData) { const finalData = differ.patch(Diff.clone(dbData), result); const flattenedResult = scriptUtil.flatten(result); for (const item of flattenedResult) { + if (scriptUtil.isIgnoredPath('project', item.path)) { + continue; // eslint-disable-line no-continue + } const delta = processDelta(item, esData, dbData, finalData); if (delta) { collectDataCopies(delta); diff --git a/scripts/es-db-compare/constants.js b/scripts/es-db-compare/constants.js index 9529931f..130f1c1b 100644 --- a/scripts/es-db-compare/constants.js +++ b/scripts/es-db-compare/constants.js @@ -3,11 +3,31 @@ */ module.exports = { - ignoredProperties: [ - 'createdAt', - 'updatedAt', - 'deletedAt', - 'deletedBy', - 'projectUrl', - ], + // currently support only a subset of jsonpath notations + // "*" means any index number + ignoredPaths: [ + 'project.projectUrl', + 'project.utm', + + 'project.deletedAt', + 'project.phases[*].deletedAt', + 'project.phases[*].products[*].deletedAt', + 'project.invites[*].deletedAt', + 'project.members[*].deletedAt', + 'project.attachments[*].deletedAt', + + 'project.updatedAt', + 'project.phases[*].updatedAt', + 'project.phases[*].products[*].updatedAt', + 'project.invites[*].updatedAt', + 'project.members[*].updatedAt', + 'project.attachments[*].updatedAt', + + 'project.deletedBy', + 'project.phases[*].deletedBy', + 'project.phases[*].products[*].deletedBy', + 'project.invites[*].deletedBy', + 'project.members[*].deletedBy', + 'project.attachments[*].deletedBy', + ] }; diff --git a/scripts/es-db-compare/index.js b/scripts/es-db-compare/index.js index 4a3314a4..adc1a2a6 100644 --- a/scripts/es-db-compare/index.js +++ b/scripts/es-db-compare/index.js @@ -164,6 +164,8 @@ async function getDBData() { }); }); return Promise.all(projects); + }).then(projects => { + return JSON.parse(JSON.stringify(projects)); }); } diff --git a/scripts/es-db-compare/report.mustache b/scripts/es-db-compare/report.mustache index f4f186e8..3d539bbc 100644 --- a/scripts/es-db-compare/report.mustache +++ b/scripts/es-db-compare/report.mustache @@ -1,21 +1,37 @@ Topcoder Project Service - ES/DB Comparison Report + @@ -26,13 +42,13 @@ There are {{ project.meta.totalObjects }} objects with inconsistencies found in {{#each project.rootMismatch }}

    Project - id: {{ @key }} ({{ this.meta.counts }})

    {{#if this.project.length}} - - + +

    {{ this.project.length }} mismatches:

    {{/if}} {{#each this.project }} -
  • {{ this.path }}
  • +
  • {{ this.path }} (kind: {{ this.kind }})
  • {{/each}}
    @@ -40,24 +56,24 @@ There are {{ project.meta.totalObjects }} objects with inconsistencies found in

    {{ @key }} ({{ this.meta.counts }})

    {{#each this.mismatches }}
    {{ @../key }} - id: {{ @key }}
    - - + +

    {{ this.length }} mismatches:

    {{#each this }} -
  • {{ this.path }}
  • +
  • {{ this.path }} (kind: {{ this.kind }})
  • {{/each}}
    {{/each}} {{#each this.dbOnly }}
    {{ @../key }} - id: {{ this.id }} (1)
    - +

    Found in DB but not in ES.

    {{/each}} {{#each this.esOnly }}
    {{ @../key }} - id: {{ this.id }} (1)
    - +

    Found in ES but not in DB.

    {{/each}} {{/each}} @@ -65,25 +81,25 @@ There are {{ project.meta.totalObjects }} objects with inconsistencies found in {{#each project.dbOnly }}

    Project - id: {{ this.id }} (1)

    - +

    Found in DB but not in ES.

    {{/each}} {{#each project.esOnly }}

    Project - id: {{ this.id }} (1)

    - +

    Found in ES but not in DB.

    {{/each}} - -

    Data

    -{{#each meta.dbCopies }} - -{{/each}} -{{#each meta.esCopies }} - -{{/each}} +
    + {{#each meta.dbCopies }} + + {{/each}} + {{#each meta.esCopies }} + + {{/each}} +
    diff --git a/scripts/es-db-compare/util.js b/scripts/es-db-compare/util.js index 704e88b8..4b378588 100644 --- a/scripts/es-db-compare/util.js +++ b/scripts/es-db-compare/util.js @@ -8,6 +8,8 @@ const _ = require('lodash'); const moment = require('moment'); +const constants = require('./constants'); + /** * Sub-function for the flatten function that process object assets in the delta. * @@ -162,6 +164,26 @@ function generateJSONPath(path) { return result; } +/** + * Check if the json path of a delta should be ignored. + * Low-budget version. + * + * @param {String} root the model name, one of "project" and "metadata" + * @param {Array} path the path to be verified + * @returns {Boolean} the result + */ +function isIgnoredPath(root, path) { + const jsonPath = generateJSONPath(_.slice(path, 1)); + if (jsonPath === '') { + return false; + } + const expr = jsonPath.replace(/\[\d+\]/g, '[*]').replace(/^/, `${root}.`) + if (constants.ignoredPaths.includes(expr)) { + return true; + } + return false; +} + /** * Generate a sensible filename for the report. * @@ -177,4 +199,5 @@ module.exports = { flatten, generateJSONPath, generateFilename, + isIgnoredPath, }; From 93723a5f5deaf56ad8fb8b56708ba0dcd171e0c0 Mon Sep 17 00:00:00 2001 From: imcaizheng Date: Sun, 19 Jan 2020 14:12:41 +0800 Subject: [PATCH 03/10] fix issues on array comparison --- scripts/es-db-compare/compareProjects.js | 39 +++++++++++++++++++++++- scripts/es-db-compare/constants.js | 2 +- scripts/es-db-compare/index.js | 4 +-- scripts/es-db-compare/util.js | 2 +- 4 files changed, 41 insertions(+), 6 deletions(-) diff --git a/scripts/es-db-compare/compareProjects.js b/scripts/es-db-compare/compareProjects.js index 5669014b..35377747 100644 --- a/scripts/es-db-compare/compareProjects.js +++ b/scripts/es-db-compare/compareProjects.js @@ -24,6 +24,43 @@ const differ = Diff.create({ objectHash: obj => obj.id, }); +/** + * The json diff patch may contains deltas with same path, + * one is "added to array", the other is "deleted from array". + * In such case they can be combined and treated as "modified at an index in the array". + * + * @param {Array} deltas the data to be filtered + * @returns {Array} filtered data + */ +function processSamePath(deltas) { + const result = []; + const groups = lodash.groupBy(deltas, 'path'); + for (const value of Object.values(groups)) { + if (value.length === 1) { + result.push(value[0]); + continue; // eslint-disable-line no-continue + } + if (value.length === 2) { + result.push(Object.assign({ type: 'modify' }, lodash.omit(value[0], 'type'))); + continue; // eslint-disable-line no-continue + } + throw new Error('Internal Error'); + } + return result; +} + +/** + * Transform or filter deltas before any further proccess. + * + * @param {Array} deltas the data to be processed + * @returns {Array} the result + */ +function preProcessDeltas(deltas) { + return processSamePath( + scriptUtil.flatten(deltas), + ); +} + /** * Process diff delta to extract project-related data. * @@ -261,7 +298,7 @@ function compareProjects(esData, dbData) { const result = differ.diff(dbData, esData); const finalData = differ.patch(Diff.clone(dbData), result); - const flattenedResult = scriptUtil.flatten(result); + const flattenedResult = preProcessDeltas(result); for (const item of flattenedResult) { if (scriptUtil.isIgnoredPath('project', item.path)) { continue; // eslint-disable-line no-continue diff --git a/scripts/es-db-compare/constants.js b/scripts/es-db-compare/constants.js index 130f1c1b..36b51fbb 100644 --- a/scripts/es-db-compare/constants.js +++ b/scripts/es-db-compare/constants.js @@ -29,5 +29,5 @@ module.exports = { 'project.invites[*].deletedBy', 'project.members[*].deletedBy', 'project.attachments[*].deletedBy', - ] + ], }; diff --git a/scripts/es-db-compare/index.js b/scripts/es-db-compare/index.js index adc1a2a6..51aab132 100644 --- a/scripts/es-db-compare/index.js +++ b/scripts/es-db-compare/index.js @@ -164,9 +164,7 @@ async function getDBData() { }); }); return Promise.all(projects); - }).then(projects => { - return JSON.parse(JSON.stringify(projects)); - }); + }).then(projects => JSON.parse(JSON.stringify(projects))); } /** diff --git a/scripts/es-db-compare/util.js b/scripts/es-db-compare/util.js index 4b378588..6d02040b 100644 --- a/scripts/es-db-compare/util.js +++ b/scripts/es-db-compare/util.js @@ -177,7 +177,7 @@ function isIgnoredPath(root, path) { if (jsonPath === '') { return false; } - const expr = jsonPath.replace(/\[\d+\]/g, '[*]').replace(/^/, `${root}.`) + const expr = jsonPath.replace(/\[\d+\]/g, '[*]').replace(/^/, `${root}.`); if (constants.ignoredPaths.includes(expr)) { return true; } From dc8339f3875455940b6d8464292623fa9f4e5847 Mon Sep 17 00:00:00 2001 From: imcaizheng Date: Sun, 19 Jan 2020 14:16:01 +0800 Subject: [PATCH 04/10] add descriptions to each kind of mismatches --- scripts/es-db-compare/index.js | 12 ++++++++++++ scripts/es-db-compare/report.mustache | 4 ++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/scripts/es-db-compare/index.js b/scripts/es-db-compare/index.js index 51aab132..f74e152d 100644 --- a/scripts/es-db-compare/index.js +++ b/scripts/es-db-compare/index.js @@ -101,6 +101,18 @@ function getESSearchCriteria() { function getTemplate() { handlebars.registerHelper('getValue', (data, key) => data[key]); handlebars.registerHelper('toJSON', obj => JSON.stringify(obj, null, 2)); + handlebars.registerHelper('describeKind', (kind) => { + if (kind === 'modify') { + return 'values differ'; + } + if (kind === 'add') { + return 'missed in DB'; + } + if (kind === 'delete') { + return 'missed in ES'; + } + return 'unknown'; + }); const template = handlebars.compile(fs.readFileSync(path.join(__dirname, 'report.mustache')).toString()); return template; } diff --git a/scripts/es-db-compare/report.mustache b/scripts/es-db-compare/report.mustache index 3d539bbc..a69fa3cf 100644 --- a/scripts/es-db-compare/report.mustache +++ b/scripts/es-db-compare/report.mustache @@ -48,7 +48,7 @@ There are {{ project.meta.totalObjects }} objects with inconsistencies found in {{/if}} {{#each this.project }} -
  • {{ this.path }} (kind: {{ this.kind }})
  • +
  • {{ this.path }} ({{describeKind this.kind }})
  • {{/each}}
    @@ -61,7 +61,7 @@ There are {{ project.meta.totalObjects }} objects with inconsistencies found in

    {{ this.length }} mismatches:

    {{#each this }} -
  • {{ this.path }} (kind: {{ this.kind }})
  • +
  • {{ this.path }} ({{ this.kind }})
  • {{/each}}
    {{/each}} From abdba62b599435eca984f89f36e31b392d64c436 Mon Sep 17 00:00:00 2001 From: imcaizheng Date: Mon, 27 Jan 2020 20:35:33 +0800 Subject: [PATCH 05/10] initial commit --- scripts/es-db-compare/compareMetadata.js | 152 +++++++++++++ scripts/es-db-compare/compareProjects.js | 269 +++++++++++++++-------- scripts/es-db-compare/constants.js | 72 +++++- scripts/es-db-compare/index.js | 132 ++++++++++- scripts/es-db-compare/report.mustache | 83 ++++--- scripts/es-db-compare/util.js | 124 +++++++++-- 6 files changed, 660 insertions(+), 172 deletions(-) create mode 100644 scripts/es-db-compare/compareMetadata.js diff --git a/scripts/es-db-compare/compareMetadata.js b/scripts/es-db-compare/compareMetadata.js new file mode 100644 index 00000000..3a828efa --- /dev/null +++ b/scripts/es-db-compare/compareMetadata.js @@ -0,0 +1,152 @@ +/* eslint-disable no-console */ +/* eslint-disable consistent-return */ +/* eslint-disable no-restricted-syntax */ +/* eslint-disable no-param-reassign */ +/* + * Compare metadata between ES and DB. + */ +const lodash = require('lodash'); + +const scriptUtil = require('./util'); +const scriptConstants = require('./constants'); + +const hashKeyMapping = { + ProjectTemplate: 'id', + ProductTemplate: 'id', + ProjectType: 'key', + ProductCategory: 'key', + MilestoneTemplate: 'id', + OrgConfig: 'id', + Form: 'id', + PlanConfig: 'id', + PriceConfig: 'id', + BuildingBlock: 'id', +}; + +/** + * Process a single delta. + * + * @param {String} modelName the model name the delta belongs to + * @param {Object} delta the diff delta. + * @param {Object} dbData the data from DB + * @param {Object} esData the data from ES + * @param {Object} finalData the data patched + * @returns {undefined} + */ +function processDelta(modelName, delta, dbData, esData, finalData) { + const hashKey = hashKeyMapping[modelName]; + if (delta.dataType === 'array' && delta.path.length === 1) { + if (delta.type === 'delete') { + console.log(`one dbOnly found for ${modelName} with ${hashKey} ${delta.originalValue[hashKey]}`); + return { + type: 'dbOnly', + modelName, + hashKey, + hashValue: delta.originalValue[hashKey], + dbCopy: delta.originalValue, + }; + } + if (delta.type === 'add') { + console.log(`one esOnly found for ${modelName} with ${hashKey} ${delta.value[hashKey]}`); + return { + type: 'esOnly', + modelName, + hashKey, + hashValue: delta.value[hashKey], + esCopy: delta.value, + }; + } + } + if (['add', 'delete', 'modify'].includes(delta.type)) { + const path = scriptUtil.generateJSONPath(lodash.slice(delta.path, 1)); + const hashValue = lodash.get(finalData, lodash.slice(delta.path, 0, 1))[hashKey]; + const hashObject = lodash.set({}, hashKey, hashValue); + const dbCopy = lodash.find(dbData, hashObject); + const esCopy = lodash.find(esData, hashObject); + console.log(`one mismatch found for ${modelName} with ${hashKey} ${hashValue}`); + return { + type: 'mismatch', + kind: delta.type, + modelName, + hashKey, + hashValue, + path, + dbCopy, + esCopy, + }; + } +} + + +/** + * Compare Metadata data from ES and DB. + * + * @param {Object} dbData the data from DB + * @param {Object} esData the data from ES + * @returns {Object} the data to feed handlebars template + */ +function compareMetadata(dbData, esData) { + const data = { + nestedModels: {}, + }; + + const countInconsistencies = () => { + lodash.set(data, 'meta.totalObjects', 0); + lodash.map(data.nestedModels, (model) => { + const counts = Object.keys(model.mismatches).length + model.dbOnly.length + model.esOnly.length; + lodash.set(model, 'meta.counts', counts); + data.meta.totalObjects += counts; + }); + }; + + const storeDelta = (modelName, delta) => { + if (lodash.isUndefined(data.nestedModels[modelName])) { + data.nestedModels[modelName] = { + mismatches: {}, + dbOnly: [], + esOnly: [], + }; + } + if (delta.type === 'mismatch') { + if (lodash.isUndefined(data.nestedModels[modelName].mismatches[delta.hashValue])) { + data.nestedModels[modelName].mismatches[delta.hashValue] = []; + } + data.nestedModels[modelName].mismatches[delta.hashValue].push(delta); + return; + } + if (delta.type === 'dbOnly') { + data.nestedModels[modelName].dbOnly.push(delta); + return; + } + if (delta.type === 'esOnly') { + data.nestedModels[modelName].esOnly.push(delta); + } + }; + + for (const refPath of Object.keys(scriptConstants.associations.metadata)) { + const modelName = scriptConstants.associations.metadata[refPath]; + const { deltas, finalData } = scriptUtil.diffData( + dbData[refPath], + esData[refPath], + { + hashKey: hashKeyMapping[modelName], + modelPathExprssions: lodash.set({}, modelName, '[*]'), + }, + ); + for (const delta of deltas) { + if (scriptUtil.isIgnoredPath(`metadata.${refPath}`, delta.path)) { + continue; // eslint-disable-line no-continue + } + const deltaWithCopy = processDelta(modelName, delta, dbData[refPath], esData[refPath], finalData); + if (deltaWithCopy) { + storeDelta(modelName, deltaWithCopy); + } + } + } + countInconsistencies(); + return data; +} + +module.exports = { + compareMetadata, +}; diff --git a/scripts/es-db-compare/compareProjects.js b/scripts/es-db-compare/compareProjects.js index 35377747..f42a6e1b 100644 --- a/scripts/es-db-compare/compareProjects.js +++ b/scripts/es-db-compare/compareProjects.js @@ -9,7 +9,6 @@ * modules to compare other models. */ -const Diff = require('jsondiffpatch'); const lodash = require('lodash'); const scriptUtil = require('./util'); @@ -18,62 +17,22 @@ const associations = { members: 'Member', invites: 'Invite', attachments: 'Attachment', + timelines: 'Timeline', }; -const differ = Diff.create({ - objectHash: obj => obj.id, -}); - -/** - * The json diff patch may contains deltas with same path, - * one is "added to array", the other is "deleted from array". - * In such case they can be combined and treated as "modified at an index in the array". - * - * @param {Array} deltas the data to be filtered - * @returns {Array} filtered data - */ -function processSamePath(deltas) { - const result = []; - const groups = lodash.groupBy(deltas, 'path'); - for (const value of Object.values(groups)) { - if (value.length === 1) { - result.push(value[0]); - continue; // eslint-disable-line no-continue - } - if (value.length === 2) { - result.push(Object.assign({ type: 'modify' }, lodash.omit(value[0], 'type'))); - continue; // eslint-disable-line no-continue - } - throw new Error('Internal Error'); - } - return result; -} - -/** - * Transform or filter deltas before any further proccess. - * - * @param {Array} deltas the data to be processed - * @returns {Array} the result - */ -function preProcessDeltas(deltas) { - return processSamePath( - scriptUtil.flatten(deltas), - ); -} - /** * Process diff delta to extract project-related data. * - * @param {Object} delta the diff delta. See `util.flatten()` - * @param {Object} esData the data from ES + * @param {Object} delta the diff delta. * @param {Object} dbData the data from DB + * @param {Object} esData the data from ES * @param {Object} finalData the data patched * @returns {Object} Object project diff delta in a specific data structure */ -function processDelta(delta, esData, dbData, finalData) { +function processDelta(delta, dbData, esData, finalData) { const processMissingObject = (item, option) => { if (item.type === 'delete') { - const projectId = lodash.get(dbData, lodash.slice(item.path, 0, 1)).id; + const projectId = lodash.get(finalData, lodash.slice(item.path, 0, 1)).id; console.log(`one dbOnly found for ${option.modelName} with id ${item.originalValue.id}`); return { type: 'dbOnly', @@ -84,7 +43,7 @@ function processDelta(delta, esData, dbData, finalData) { }; } if (item.type === 'add') { - const projectId = lodash.get(esData, lodash.slice(item.path, 0, 1)).id; + const projectId = lodash.get(finalData, lodash.slice(item.path, 0, 1)).id; console.log(`one esOnly found for ${option.modelName} with id ${item.value.id}`); return { type: 'esOnly', @@ -96,6 +55,112 @@ function processDelta(delta, esData, dbData, finalData) { } }; + const processMilestone = (item) => { + const subPath = lodash.slice(item.path, 7); + if (item.dataType === 'array' && subPath.length === 1) { + return processMissingObject(item, { modelName: 'Milestone' }); + } + if (['add', 'delete', 'modify'].includes(item.type)) { + const path = scriptUtil.generateJSONPath(lodash.slice(subPath, 1)); + const id = lodash.get(finalData, lodash.slice(item.path, 0, 8)).id; + const projectId = lodash.get(finalData, lodash.slice(item.path, 0, 1)).id; + const phaseId = lodash.get(finalData, lodash.slice(item.path, 0, 3)).id; + const productId = lodash.get(finalData, lodash.slice(item.path, 0, 5)).id; + const dbCopy = lodash.find( + lodash.find( + lodash.find( + lodash.find(dbData, { id: projectId }).phases, + { id: phaseId }, + ).products, + { id: productId }, + ).timeline.milestones, + { id }, + ); + const esCopy = lodash.find( + lodash.find( + lodash.find( + lodash.find(esData, { id: projectId }).phases, + { id: phaseId }, + ).products, + { id: productId }, + ).timeline.milestones, + { id }, + ); + console.log(`one mismatch found for Milestone with id ${id}`); + return { + type: 'mismatch', + kind: item.type, + dataType: item.dataType, + projectId, + id, + modelName: 'Milestone', + path, + dbCopy, + esCopy, + }; + } + }; + + const processTimeline = (item) => { + if (item.path.length === 6 && item.type === 'modify') { + if (lodash.isNil(item.originalValue)) { + console.log(`one esOnly found for Timeline with id ${item.currentValue.id}`); + return { + type: 'esOnly', + projectId: lodash.get(finalData, lodash.slice(item.path, 0, 1)).id, + modelName: 'Timeline', + id: item.currentValue.id, + esCopy: item.currentValue, + }; + } + if (lodash.isNil(item.currentValue)) { + console.log(`one dbOnly found for Timeline with id ${item.originalValue.id}`); + return { + type: 'dbOnly', + projectId: lodash.get(finalData, lodash.slice(item.path, 0, 1)).id, + modelName: 'Timeline', + id: item.originalValue.id, + dbCopy: item.originalValue, + }; + } + throw new Error('Internal Error'); + } + const subPath = lodash.slice(item.path, 4); + if (['add', 'delete', 'modify'].includes(item.type)) { + const path = scriptUtil.generateJSONPath(lodash.slice(subPath, 2)); + const id = lodash.get(finalData, lodash.slice(item.path, 0, 5)).timeline.id; + const projectId = lodash.get(finalData, lodash.slice(item.path, 0, 1)).id; + const phaseId = lodash.get(finalData, lodash.slice(item.path, 0, 3)).id; + const productId = lodash.get(finalData, lodash.slice(item.path, 0, 5)).id; + const dbCopy = lodash.find( + lodash.find( + lodash.find(dbData, { id: projectId }).phases, + { id: phaseId }, + ).products, + { id: productId }, + ).timeline; + const esCopy = lodash.find( + lodash.find( + lodash.find(esData, { id: projectId }).phases, + { id: phaseId }, + ).products, + { id: productId }, + ).timeline; + console.log(`one mismatch found for Timeline with id ${id}`); + return { + type: 'mismatch', + kind: item.type, + dataType: item.dataType, + projectId, + id, + modelName: 'Timeline', + path, + dbCopy, + esCopy, + }; + } + }; + const processProduct = (item) => { const subPath = lodash.slice(item.path, 4); if (item.dataType === 'array' && subPath.length === 1) { @@ -137,6 +202,12 @@ function processDelta(delta, esData, dbData, finalData) { const processAssociation = (item, option) => { if (item.path[1] === 'phases' && item.path[3] === 'products') { + if (item.path[5] === 'timeline') { + if (item.path[6] === 'milestones') { + return processMilestone(item); + } + return processTimeline(item); + } return processProduct(item); } const subPath = lodash.slice(item.path, 2); @@ -174,7 +245,24 @@ function processDelta(delta, esData, dbData, finalData) { return processAssociation(delta, { modelName: associations[delta.path[1]], refPath: delta.path[1] }); } if (delta.dataType === 'array' && delta.path.length === 1) { - return processMissingObject(delta, { modelName: 'Project' }); + if (delta.type === 'delete') { + console.log(`one dbOnly found for Project with id ${delta.originalValue.id}`); + return { + type: 'dbOnly', + modelName: 'Project', + id: delta.originalValue.id, + dbCopy: delta.originalValue, + }; + } + if (delta.type === 'add') { + console.log(`one esOnly found for Project with id ${delta.value.id}`); + return { + type: 'esOnly', + modelName: 'Project', + id: delta.value.id, + esCopy: delta.value, + }; + } } if (['add', 'delete', 'modify'].includes(delta.type)) { const path = scriptUtil.generateJSONPath(lodash.slice(delta.path, 1)); @@ -199,46 +287,36 @@ function processDelta(delta, esData, dbData, finalData) { /** * Compare Project data from ES and DB. * - * @param {Object} esData the data from ES * @param {Object} dbData the data from DB + * @param {Object} esData the data from ES * @returns {Object} the data to feed handlebars template */ -function compareProjects(esData, dbData) { +function compareProjects(dbData, esData) { const data = { - project: { - rootMismatch: {}, - esOnly: [], - dbOnly: [], - }, - meta: { - esCopies: [], - dbCopies: [], - counts: { - Project: 0, - }, - uniqueDeltas: [], - }, + rootMismatch: {}, + esOnly: [], + dbOnly: [], }; - const storeDelta = (root, delta) => { + const storeDelta = (delta) => { if (delta.modelName === 'Project') { if (delta.type === 'esOnly') { - data[root].esOnly.push(delta); + data.esOnly.push(delta); return; } if (delta.type === 'dbOnly') { - data[root].dbOnly.push(delta); + data.dbOnly.push(delta); return; } } - if (!data[root].rootMismatch[delta.projectId]) { - data[root].rootMismatch[delta.projectId] = { project: [], associations: {} }; + if (!data.rootMismatch[delta.projectId]) { + data.rootMismatch[delta.projectId] = { project: [], associations: {} }; } if (delta.modelName === 'Project') { - data[root].rootMismatch[delta.projectId].project.push(delta); + data.rootMismatch[delta.projectId].project.push(delta); return; } - const currentAssociations = data[root].rootMismatch[delta.projectId].associations; + const currentAssociations = data.rootMismatch[delta.projectId].associations; if (!Object.keys(currentAssociations).includes(delta.modelName)) { currentAssociations[delta.modelName] = { mismatches: {}, @@ -257,31 +335,18 @@ function compareProjects(esData, dbData) { currentAssociations[delta.modelName][delta.type].push(delta); }; - const collectDataCopies = (delta) => { - if (delta.dbCopy) { - if (!lodash.find(data.meta.dbCopies, lodash.pick(delta, ['modelName', 'id']))) { - data.meta.dbCopies.push(delta); - } - } - if (delta.esCopy) { - if (!lodash.find(data.meta.esCopies, lodash.pick(delta, ['modelName', 'id']))) { - data.meta.esCopies.push(delta); - } - } - }; - const countInconsistencies = () => { lodash.set( - data.project, + data, 'meta.totalObjects', - data.project.dbOnly.length + data.project.esOnly.length, + data.dbOnly.length + data.esOnly.length, ); lodash.set( - data.project, + data, 'meta.totalProjects', - Object.keys(data.project.rootMismatch).length + data.project.dbOnly.length + data.project.esOnly.length, + Object.keys(data.rootMismatch).length + data.dbOnly.length + data.esOnly.length, ); - lodash.map(data.project.rootMismatch, (value) => { + lodash.map(data.rootMismatch, (value) => { const currentValue = value; lodash.set(currentValue, 'meta.counts', currentValue.project.length ? 1 : 0); lodash.map(currentValue.associations, (subObject) => { @@ -292,21 +357,33 @@ function compareProjects(esData, dbData) { ); currentValue.meta.counts += subObject.meta.counts; }); - data.project.meta.totalObjects += currentValue.meta.counts; + data.meta.totalObjects += currentValue.meta.counts; }); }; - const result = differ.diff(dbData, esData); - const finalData = differ.patch(Diff.clone(dbData), result); - const flattenedResult = preProcessDeltas(result); - for (const item of flattenedResult) { + const { deltas, finalData } = scriptUtil.diffData( + dbData, + esData, + { + hashKey: 'id', + modelPathExprssions: { + Project: '[*]', + Phase: '[*].phases[*]', + Product: '[*].phases[*].products[*]', + Milestone: '[*].phases[*].products[*].timeline.milestones[*]', + Invite: '[*].invites[*]', + Member: '[*].members[*]', + Attachment: '[*].attachments[*]', + }, + }, + ); + for (const item of deltas) { if (scriptUtil.isIgnoredPath('project', item.path)) { continue; // eslint-disable-line no-continue } - const delta = processDelta(item, esData, dbData, finalData); + const delta = processDelta(item, dbData, esData, finalData); if (delta) { - collectDataCopies(delta); - storeDelta('project', delta); + storeDelta(delta); } } countInconsistencies(); diff --git a/scripts/es-db-compare/constants.js b/scripts/es-db-compare/constants.js index 36b51fbb..a8dbd734 100644 --- a/scripts/es-db-compare/constants.js +++ b/scripts/es-db-compare/constants.js @@ -8,26 +8,82 @@ module.exports = { ignoredPaths: [ 'project.projectUrl', 'project.utm', + 'metadata.milestoneTemplates.order', - 'project.deletedAt', - 'project.phases[*].deletedAt', - 'project.phases[*].products[*].deletedAt', - 'project.invites[*].deletedAt', - 'project.members[*].deletedAt', - 'project.attachments[*].deletedAt', - + // all project updatedAt 'project.updatedAt', 'project.phases[*].updatedAt', 'project.phases[*].products[*].updatedAt', + 'project.phases[*].products[*].timeline.updatedAt', + 'project.phases[*].products[*].timeline.milestones[*].updatedAt', 'project.invites[*].updatedAt', 'project.members[*].updatedAt', 'project.attachments[*].updatedAt', - + // all project deletedAt + 'project.deletedAt', + 'project.phases[*].deletedAt', + 'project.phases[*].products[*].deletedAt', + 'project.phases[*].products[*].timeline.deletedAt', + 'project.phases[*].products[*].timeline.milestones[*].deletedAt', + 'project.invites[*].deletedAt', + 'project.members[*].deletedAt', + 'project.attachments[*].deletedAt', + // all project deletedBy 'project.deletedBy', 'project.phases[*].deletedBy', 'project.phases[*].products[*].deletedBy', + 'project.phases[*].products[*].timeline.deletedBy', + 'project.phases[*].products[*].timeline.milestones[*].deletedBy', 'project.invites[*].deletedBy', 'project.members[*].deletedBy', 'project.attachments[*].deletedBy', + + // all metadata updatedAt + 'metadata.projectTemplates.updatedAt', + 'metadata.productTemplates.updatedAt', + 'metadata.projectTypes.updatedAt', + 'metadata.productCategories.updatedAt', + 'metadata.milestoneTemplates.updatedAt', + 'metadata.orgConfigs.updatedAt', + 'metadata.forms.updatedAt', + 'metadata.planConfigs.updatedAt', + 'metadata.priceConfigs.updatedAt', + 'metadata.buildingBlocks.updatedAt', + // all metadata deletedAt + 'metadata.projectTemplates.deletedAt', + 'metadata.productTemplates.deletedAt', + 'metadata.projectTypes.deletedAt', + 'metadata.productCategories.deletedAt', + 'metadata.milestoneTemplates.deletedAt', + 'metadata.orgConfigs.deletedAt', + 'metadata.forms.deletedAt', + 'metadata.planConfigs.deletedAt', + 'metadata.priceConfigs.deletedAt', + 'metadata.buildingBlocks.deletedAt', + // all metadata deletedBy + 'metadata.projectTemplates.deletedBy', + 'metadata.productTemplates.deletedBy', + 'metadata.projectTypes.deletedBy', + 'metadata.productCategories.deletedBy', + 'metadata.milestoneTemplates.deletedBy', + 'metadata.orgConfigs.deletedBy', + 'metadata.forms.deletedBy', + 'metadata.planConfigs.deletedBy', + 'metadata.priceConfigs.deletedBy', + 'metadata.buildingBlocks.deletedBy', ], + associations: { + metadata: { + projectTemplates: 'ProjectTemplate', + productTemplates: 'ProductTemplate', + projectTypes: 'ProjectType', + productCategories: 'ProductCategory', + milestoneTemplates: 'MilestoneTemplate', + orgConfigs: 'OrgConfig', + forms: 'Form', + planConfigs: 'PlanConfig', + priceConfigs: 'PriceConfig', + buildingBlocks: 'BuildingBlock', + }, + }, }; diff --git a/scripts/es-db-compare/index.js b/scripts/es-db-compare/index.js index f74e152d..46bec184 100644 --- a/scripts/es-db-compare/index.js +++ b/scripts/es-db-compare/index.js @@ -1,4 +1,5 @@ /* eslint-disable no-console */ +/* eslint-disable no-param-reassign */ /* * Compare data between DB and ES and generate a report to be uploaded * to AWS S3. @@ -15,7 +16,9 @@ import { INVITE_STATUS } from '../../src/constants'; const handlebars = require('handlebars'); const path = require('path'); const fs = require('fs'); +const { compareMetadata } = require('./compareMetadata'); const { compareProjects } = require('./compareProjects'); +const scriptConstants = require('./constants'); const scriptConfig = { PROJECT_START_ID: process.env.PROJECT_START_ID, @@ -44,13 +47,17 @@ const es = util.getElasticSearchClient(); const ES_PROJECT_INDEX = config.get('elasticsearchConfig.indexName'); const ES_PROJECT_TYPE = config.get('elasticsearchConfig.docType'); +const ES_METADATA_INDEX = config.get('elasticsearchConfig.metadataIndexName'); +const ES_METADATA_TYPE = config.get('elasticsearchConfig.metadataDocType'); +const ES_TIMELINE_INDEX = config.get('elasticsearchConfig.timelineIndexName'); +const ES_TIMELINE_TYPE = config.get('elasticsearchConfig.timelineDocType'); /** * Get es search criteria. * * @returns {Object} the search criteria */ -function getESSearchCriteria() { +function getESSearchCriteriaForProject() { const filters = []; if (!lodash.isNil(scriptConfig.PROJECT_START_ID)) { filters.push({ @@ -118,12 +125,22 @@ function getTemplate() { } /** - * Get ES data. + * Get product timelines from ES. * * @returns {Promise} the ES data */ -async function getESData() { - const searchCriteria = getESSearchCriteria(); +async function getProductTimelinesFromES() { + const searchCriteria = { + index: ES_TIMELINE_INDEX, + type: ES_TIMELINE_TYPE, + body: { + query: { + match_phrase: { + reference: 'product', + }, + }, + }, + }; return es.search(searchCriteria) .then((docs) => { const rows = lodash.map(docs.hits.hits, single => single._source); // eslint-disable-line no-underscore-dangle @@ -132,17 +149,65 @@ async function getESData() { } /** - * Get DB data. + * Get projects from ES. + * + * @returns {Promise} the ES data + */ +async function getProjectsFromES() { + const searchCriteria = getESSearchCriteriaForProject(); + const projects = await es.search(searchCriteria) + .then((docs) => { + const rows = lodash.map(docs.hits.hits, single => single._source); // eslint-disable-line no-underscore-dangle + return rows; + }); + const timelines = await getProductTimelinesFromES(); + const timelinesGroup = lodash.groupBy(timelines, 'referenceId'); + lodash.map(projects, (project) => { + lodash.map(project.phases, (phase) => { + lodash.map(phase.products, (product) => { + product.timeline = lodash.get(timelinesGroup, [product.id, '0']) || null; + }); + }); + }); + return projects; +} + +/** + * Get metadata from ES. + * + * @returns {Promise} the ES data + */ +async function getMetadataFromES() { + const searchCriteria = { + index: ES_METADATA_INDEX, + type: ES_METADATA_TYPE, + }; + return es.search(searchCriteria) + .then((docs) => { + const rows = lodash.map(docs.hits.hits, single => single._source); // eslint-disable-line no-underscore-dangle + if (!rows.length) { + return lodash.reduce( + Object.keys(scriptConstants.associations.metadata), + (result, modleName) => { result[modleName] = []; }, + {}, + ); + } + return rows[0]; + }); +} + +/** + * Get projects from DB. * * @returns {Promise} the DB data */ -async function getDBData() { +async function getProjectsFromDB() { const filter = {}; if (!lodash.isNil(scriptConfig.PROJECT_START_ID)) { filter.id = { $between: [scriptConfig.PROJECT_START_ID, scriptConfig.PROJECT_END_ID] }; } if (!lodash.isNil(scriptConfig.PROJECT_LAST_ACTIVITY_AT)) { - filter.lastActivityAt = { $gte: scriptConfig.PROJECT_LAST_ACTIVITY_AT }; + filter.lastActivityAt = { $gte: new Date(scriptConfig.PROJECT_LAST_ACTIVITY_AT).toISOString() }; } return models.Project.findAll({ where: filter, @@ -172,24 +237,67 @@ async function getDBData() { return models.ProjectMember.getActiveProjectMembers(project.id) .then((currentProjectMembers) => { project.members = currentProjectMembers; - return project; + }).then(() => { + const promises = []; + lodash.map(project.phases, (phase) => { + lodash.map(phase.products, (product) => { + promises.push( + models.Timeline.findOne({ + where: { + reference: 'product', + referenceId: product.id, + }, + include: [{ + model: models.Milestone, + as: 'milestones', + }], + }).then((timeline) => { + product.timeline = timeline || null; + }), + ); + }); + }); + return Promise.all(promises) + .then(() => project); }); }); return Promise.all(projects); }).then(projects => JSON.parse(JSON.stringify(projects))); } +/** + * Get metadata from DB. + * + * @returns {Promise} the DB data + */ +async function getMetadataFromDB() { + const metadataAssociations = scriptConstants.associations.metadata; + const results = await Promise.all(lodash.map( + Object.values(metadataAssociations), + modelName => models[modelName].findAll(), + )); + return lodash.zipObject(Object.keys(metadataAssociations), JSON.parse(JSON.stringify(results))); +} + /** * Main function. * * @returns {Promise} void */ async function main() { - const esData = await getESData(); - const dbData = await getDBData(); + console.log('Processing Project...'); + const projectsFromDB = await getProjectsFromDB(); + const projectsFromES = await getProjectsFromES(); + const dataForProject = compareProjects(projectsFromDB, projectsFromES); + console.log('Processing Metadata...'); + const metadataFromDB = await getMetadataFromDB(); + const metadataFromES = await getMetadataFromES(); + const dataForMetadata = compareMetadata(metadataFromDB, metadataFromES); const template = getTemplate(); - const data = compareProjects(esData, dbData); - const report = template(data); + const report = template({ + metadata: dataForMetadata, + project: dataForProject, + }); fs.writeFileSync(reportPathname, report); console.log(`report is written to ${reportPathname}`); } diff --git a/scripts/es-db-compare/report.mustache b/scripts/es-db-compare/report.mustache index a69fa3cf..2ecd5ae9 100644 --- a/scripts/es-db-compare/report.mustache +++ b/scripts/es-db-compare/report.mustache @@ -1,21 +1,17 @@ Topcoder Project Service - ES/DB Comparison Report