From 42c90178b10be5166edbe8eb76f8528b311e33f0 Mon Sep 17 00:00:00 2001 From: ishii-norimi Date: Mon, 8 Jan 2024 16:14:56 +0900 Subject: [PATCH 1/2] Add BRIDGE --- README.md | 2 +- js/model_selector.js | 1 + js/view/bridge.js | 29 +++++++ lib/model/bridge.js | 134 +++++++++++++++++++++++++++++++++ tests/gui/view/bridge.test.js | 46 +++++++++++ tests/lib/model/bridge.test.js | 23 ++++++ 6 files changed, 234 insertions(+), 1 deletion(-) create mode 100644 js/view/bridge.js create mode 100644 lib/model/bridge.js create mode 100644 tests/gui/view/bridge.test.js create mode 100644 tests/lib/model/bridge.test.js diff --git a/README.md b/README.md index 9a57ae5d3..dc62a03e7 100644 --- a/README.md +++ b/README.md @@ -121,7 +121,7 @@ for (let i = 0; i < n; i++) { | task | model | | ---- | ----- | -| clustering | (Soft / Kernel / Genetic / Weighted) k-means, k-means++, k-medois, k-medians, x-means, G-means, LBG, ISODATA, Fuzzy c-means, Possibilistic c-means, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Monothetic, Mutual kNN, Mean shift, DBSCAN, OPTICS, HDBSCAN, DENCLUE, DBCLASD, CLUES, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, C2P, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, (Growing) SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, CHAMELEON, COLL, CLIQUE, PROCLUS, ORCLUS, FINDIT, NMF, Autoencoder | +| clustering | (Soft / Kernel / Genetic / Weighted) k-means, k-means++, k-medois, k-medians, x-means, G-means, LBG, ISODATA, Fuzzy c-means, Possibilistic c-means, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Monothetic, Mutual kNN, Mean shift, DBSCAN, OPTICS, HDBSCAN, DENCLUE, DBCLASD, BRIDGE, CLUES, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, C2P, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, (Growing) SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, CHAMELEON, COLL, CLIQUE, PROCLUS, ORCLUS, FINDIT, NMF, Autoencoder | | classification | (Fisher's) Linear discriminant, Quadratic discriminant, Mixture discriminant, Least squares, (Multiclass / Kernel) Ridge, (Complement / Negation / Universal-set / Selective) Naive Bayes (gaussian), AODE, (Fuzzy / Weighted) k-nearest neighbor, Radius neighbor, Nearest centroid, ENN, ENaN, NNBCA, ADAMENN, DANN, IKNN, Decision tree, Random forest, Extra trees, GBDT, XGBoost, ALMA, (Aggressive) ROMMA, (Bounded) Online gradient descent, (Budgeted online) Passive aggressive, RLS, (Selective-sampling) Second order perceptron, AROW, NAROW, Confidence weighted, CELLIP, IELLIP, Normal herd, Stoptron, (Kernelized) Pegasos, MIRA, Forgetron, Projectron, Projectron++, Banditron, Ballseptron, (Multiclass) BSGD, ILK, SILK, (Multinomial) Logistic regression, (Multinomial) Probit, Ordered logistic, Ordered probit, PRank, OAP-BPM, SVM, Gaussian process, HMM, CRF, Bayesian Network, LVQ, (Average / Multiclass / Voted / Kernelized / Selective-sampling / Margin / Shifting / Budget / Tighter / Tightest) Perceptron, PAUM, RBP, ADALINE, MADALINE, MLP, LMNN | | semi-supervised classification | k-nearest neighbor, Radius neighbor, Label propagation, Label spreading, k-means, GMM, S3VM, Ladder network | | regression | Least squares, Ridge, Lasso, Elastic net, RLS, Bayesian linear, Poisson, Least absolute deviations, Huber, Tukey, Least trimmed squares, Least median squares, Lp norm linear, SMA, Deming, Segmented, LOWESS, LOESS, spline, Naive Bayes, Gaussian process, Principal components, Partial least squares, Projection pursuit, Quantile regression, k-nearest neighbor, Radius neighbor, IDW, Nadaraya Watson, Priestley Chao, Gasser Muller, RBF Network, RVM, Decision tree, Random forest, Extra trees, GBDT, XGBoost, SVR, MLP, GMR, Isotonic, Ramer Douglas Peucker, Theil-Sen, Passing-Bablok, Repeated median | diff --git a/js/model_selector.js b/js/model_selector.js index 5bfc03e8e..5a93bbfbc 100644 --- a/js/model_selector.js +++ b/js/model_selector.js @@ -106,6 +106,7 @@ const AIMethods = [ { value: 'hdbscan', title: 'HDBSCAN' }, { value: 'denclue', title: 'DENCLUE' }, { value: 'dbclasd', title: 'DBCLASD' }, + { value: 'bridge', title: 'BRIDGE' }, ], '': [ { value: 'mutual_knn', title: 'Mutual kNN' }, diff --git a/js/view/bridge.js b/js/view/bridge.js new file mode 100644 index 000000000..65ad6c492 --- /dev/null +++ b/js/view/bridge.js @@ -0,0 +1,29 @@ +import BRIDGE from '../../lib/model/bridge.js' +import Controller from '../controller.js' + +export default function (platform) { + platform.setting.ml.usage = 'Click and add data point. Then, click "Fit" button.' + platform.setting.ml.reference = { + author: 'M. Dash, H. Liu, X. Xu', + title: '1 + 1 > 2: Merging distance and density based clustering', + year: 2001, + } + const controller = new Controller(platform) + + const fitModel = () => { + const model = new BRIDGE(k.value, e_core.value, e_den.value) + const pred = model.predict(platform.trainInput) + platform.trainResult = pred.map(v => v + 1) + clusters.value = new Set(pred).size + } + + const k = controller.input.number({ label: 'k', min: 1, max: 100, value: 5 }).on('change', fitModel) + const e_core = controller.input + .number({ label: 'e-core', min: 0, max: 10, value: 0.1, step: 0.01 }) + .on('change', fitModel) + const e_den = controller.input + .number({ label: 'e density', min: 0, max: 10, value: 0.05, step: 0.01 }) + .on('change', fitModel) + controller.input.button('Fit').on('click', fitModel) + const clusters = controller.text({ label: ' Clusters: ' }) +} diff --git a/lib/model/bridge.js b/lib/model/bridge.js new file mode 100644 index 000000000..d2f89ecd0 --- /dev/null +++ b/lib/model/bridge.js @@ -0,0 +1,134 @@ +import { KMeanspp } from './kmeans.js' +import DBSCAN from './dbscan.js' + +const logGamma = z => { + // https://ja.wikipedia.org/wiki/%E3%82%AC%E3%83%B3%E3%83%9E%E9%96%A2%E6%95%B0 + let x = 0 + if (Number.isInteger(z)) { + for (let i = 2; i < z; i++) { + x += Math.log(i) + } + } else { + const n = z - 0.5 + x = Math.log(Math.sqrt(Math.PI)) - Math.log(2) * n + for (let i = 2 * n - 1; i > 0; i -= 2) { + x += Math.log(i) + } + } + return x +} + +/** + * BRIDGE + */ +export default class BRIDGE { + // http://i2pc.es/coss/Docencia/SignalProcessingReviews/Murtagh2012.pdf + // 1 + 1 > 2: Merging distance and density based clustering + // https://www.comp.nus.edu.sg/~lingtw/dasfaa_proceedings/dasfaa2001/00916361.pdf + /** + * @param {number} k K-means clustering size + * @param {number} e_core e for core distance + * @param {number} e_den e for density base clustering + */ + constructor(k, e_core, e_den) { + this._k = k + this._e_core = e_core + this._e_den = e_den + } + + _distance(a, b) { + return Math.sqrt(a.reduce((s, v, i) => s + (v - b[i]) ** 2, 0)) + } + + /** + * Returns predicted categories. + * + * @param {Array>} datas Training data + * @returns {number[]} Predicted values + */ + predict(datas) { + const n = datas.length + const dim = datas[0].length + + const kmeans = new KMeanspp() + for (let i = 0; i < this._k; i++) { + kmeans.add(datas) + } + while (kmeans.fit(datas) > 0); + const cranges = [] + const coredist = Array(this._k).fill(Infinity) + for (let i = 0; i < this._k; i++) { + for (let j = i + 1; j < this._k; j++) { + const d = this._distance(kmeans.centroids[i], kmeans.centroids[j]) / 2 + if (d < coredist[i]) { + coredist[i] = d + } + if (d < coredist[j]) { + coredist[j] = d + } + } + cranges[i] = Array.from({ length: dim }, () => [Infinity, -Infinity]) + } + const p = kmeans.predict(datas) + const nk = Array(this._k).fill(0) + const core_points = Array.from({ length: this._k }, () => []) + const ecore_points = Array.from({ length: this._k }, () => []) + const noncore_points = [] + const trange = Array.from({ length: dim }, () => [Infinity, -Infinity]) + for (let i = 0; i < n; i++) { + const k = p[i] + nk[k]++ + for (let j = 0; j < dim; j++) { + cranges[k][j][0] = Math.min(cranges[k][j][0], datas[i][j]) + cranges[k][j][1] = Math.max(cranges[k][j][1], datas[i][j]) + trange[j][0] = Math.min(trange[j][0], datas[i][j]) + trange[j][1] = Math.max(trange[j][1], datas[i][j]) + } + const d = this._distance(kmeans.centroids[k], datas[i]) + if (d < coredist[k] - this._e_core) { + core_points[k].push(i) + } else if (d < coredist[k] + this._e_core) { + ecore_points[k].push(i) + } else { + noncore_points.push(i) + } + } + + const pd = Array(n).fill(-1) + const evol = Math.exp(dim * Math.log(this._e_den) - logGamma(dim / 2 + 1) + (dim / 2) * Math.log(Math.PI)) + let offset = 0 + for (let k = 0; k < this._k; k++) { + const tvol = cranges[k].reduce((s, v) => s * (v[1] - v[0]), 1) + const minpts = (evol / tvol) * nk[k] + const dbscan = new DBSCAN(this._e_den, minpts) + + const cp = core_points[k].concat(ecore_points[k]) + const p = dbscan.predict(cp.map(i => datas[i])) + let max_p = offset + for (let i = 0; i < cp.length; i++) { + if (p[i] >= 0) { + pd[cp[i]] = offset + p[i] + max_p = Math.max(max_p, offset + p[i]) + } + } + offset = max_p + 1 + } + + const tvol = trange.reduce((s, v) => s * (v[1] - v[0]), 1) + const dbscan = new DBSCAN(this._e_den, (evol / tvol) * n) + const ecp = ecore_points.reduce((p, e) => p.concat(e), noncore_points) + const pe = dbscan.predict(ecp.map(i => datas[i])) + const match = [] + for (let i = 0; i < ecp.length; i++) { + if (pd[ecp[i]] >= 0 && pe[i] >= 0 && match[pe[i]] == null) { + match[pe[i]] = pd[ecp[i]] + } + } + for (let i = 0; i < ecp.length; i++) { + if (pd[ecp[i]] < 0 && pe[i] >= 0) { + pd[ecp[i]] = match[pe[i]] + } + } + return (this._clusters = pd) + } +} diff --git a/tests/gui/view/bridge.test.js b/tests/gui/view/bridge.test.js new file mode 100644 index 000000000..6f7a08d46 --- /dev/null +++ b/tests/gui/view/bridge.test.js @@ -0,0 +1,46 @@ +import { getPage } from '../helper/browser' + +describe('clustering', () => { + /** @type {Awaited>} */ + let page + beforeEach(async () => { + page = await getPage() + }) + + afterEach(async () => { + await page?.close() + }) + + test('initialize', async () => { + const taskSelectBox = await page.waitForSelector('#ml_selector dl:first-child dd:nth-child(5) select') + await taskSelectBox.selectOption('CT') + const modelSelectBox = await page.waitForSelector('#ml_selector .model_selection #mlDisp') + await modelSelectBox.selectOption('bridge') + const methodMenu = await page.waitForSelector('#ml_selector #method_menu') + const buttons = await methodMenu.waitForSelector('.buttons') + + const k = await buttons.waitForSelector('input:nth-of-type(1)') + await expect((await k.getProperty('value')).jsonValue()).resolves.toBe('5') + const ecore = await buttons.waitForSelector('input:nth-of-type(2)') + await expect((await ecore.getProperty('value')).jsonValue()).resolves.toBe('0.1') + const eden = await buttons.waitForSelector('input:nth-of-type(3)') + await expect((await eden.getProperty('value')).jsonValue()).resolves.toBe('0.05') + }) + + test('learn', async () => { + const taskSelectBox = await page.waitForSelector('#ml_selector dl:first-child dd:nth-child(5) select') + await taskSelectBox.selectOption('CT') + const modelSelectBox = await page.waitForSelector('#ml_selector .model_selection #mlDisp') + await modelSelectBox.selectOption('bridge') + const methodMenu = await page.waitForSelector('#ml_selector #method_menu') + const buttons = await methodMenu.waitForSelector('.buttons') + + const clusters = await buttons.waitForSelector('span:last-child', { state: 'attached' }) + await expect(clusters.evaluate(el => el.textContent)).resolves.toBe('') + + const fitButton = await buttons.waitForSelector('input[value=Fit]') + await fitButton.evaluate(el => el.click()) + + await expect(clusters.evaluate(el => el.textContent)).resolves.toMatch(/^[0-9]+$/) + }) +}) diff --git a/tests/lib/model/bridge.test.js b/tests/lib/model/bridge.test.js new file mode 100644 index 000000000..bf9f6fda7 --- /dev/null +++ b/tests/lib/model/bridge.test.js @@ -0,0 +1,23 @@ +import Matrix from '../../../lib/util/matrix.js' +import BRIDGE from '../../../lib/model/bridge.js' + +import { randIndex } from '../../../lib/evaluate/clustering.js' + +test.each([2, 3, 4])('clustering %d', dim => { + const model = new BRIDGE(2, 0.2, 1) + const n = 50 + const x = Matrix.concat( + Matrix.concat(Matrix.randn(n, dim, 0, 0.2), Matrix.randn(n, dim, 5, 0.2)), + Matrix.randn(n, dim, [0, ...Array(dim - 1).fill(5)], 0.2) + ).toArray() + + const y = model.predict(x) + expect(y).toHaveLength(x.length) + + const t = [] + for (let i = 0; i < x.length; i++) { + t[i] = Math.floor(i / n) + } + const ri = randIndex(y, t) + expect(ri).toBeGreaterThan(0.9) +}) From ab287352cb04b4c8c330a7a1c5ae49cc028e5c45 Mon Sep 17 00:00:00 2001 From: ishii-norimi Date: Mon, 8 Jan 2024 17:26:10 +0900 Subject: [PATCH 2/2] Change eVolume calculation --- lib/model/bridge.js | 28 ++++++---------- tests/lib/model/bridge.test.js | 58 ++++++++++++++++++++++++---------- 2 files changed, 51 insertions(+), 35 deletions(-) diff --git a/lib/model/bridge.js b/lib/model/bridge.js index d2f89ecd0..461c607dc 100644 --- a/lib/model/bridge.js +++ b/lib/model/bridge.js @@ -1,23 +1,6 @@ import { KMeanspp } from './kmeans.js' import DBSCAN from './dbscan.js' -const logGamma = z => { - // https://ja.wikipedia.org/wiki/%E3%82%AC%E3%83%B3%E3%83%9E%E9%96%A2%E6%95%B0 - let x = 0 - if (Number.isInteger(z)) { - for (let i = 2; i < z; i++) { - x += Math.log(i) - } - } else { - const n = z - 0.5 - x = Math.log(Math.sqrt(Math.PI)) - Math.log(2) * n - for (let i = 2 * n - 1; i > 0; i -= 2) { - x += Math.log(i) - } - } - return x -} - /** * BRIDGE */ @@ -95,7 +78,16 @@ export default class BRIDGE { } const pd = Array(n).fill(-1) - const evol = Math.exp(dim * Math.log(this._e_den) - logGamma(dim / 2 + 1) + (dim / 2) * Math.log(Math.PI)) + let evol = 1 + if (dim === 1) { + evol = 2 * this._e_den + } else if (dim === 2) { + evol = Math.PI * this._e_den ** 2 + } else if (dim === 3) { + evol = (4 / 3) * Math.PI * this._e_den ** 3 + } else { + evol = (2 * this._e_den) ** dim + } let offset = 0 for (let k = 0; k < this._k; k++) { const tvol = cranges[k].reduce((s, v) => s * (v[1] - v[0]), 1) diff --git a/tests/lib/model/bridge.test.js b/tests/lib/model/bridge.test.js index bf9f6fda7..b0602a151 100644 --- a/tests/lib/model/bridge.test.js +++ b/tests/lib/model/bridge.test.js @@ -1,23 +1,47 @@ +import { jest } from '@jest/globals' +jest.retryTimes(3) + import Matrix from '../../../lib/util/matrix.js' import BRIDGE from '../../../lib/model/bridge.js' import { randIndex } from '../../../lib/evaluate/clustering.js' -test.each([2, 3, 4])('clustering %d', dim => { - const model = new BRIDGE(2, 0.2, 1) - const n = 50 - const x = Matrix.concat( - Matrix.concat(Matrix.randn(n, dim, 0, 0.2), Matrix.randn(n, dim, 5, 0.2)), - Matrix.randn(n, dim, [0, ...Array(dim - 1).fill(5)], 0.2) - ).toArray() - - const y = model.predict(x) - expect(y).toHaveLength(x.length) - - const t = [] - for (let i = 0; i < x.length; i++) { - t[i] = Math.floor(i / n) - } - const ri = randIndex(y, t) - expect(ri).toBeGreaterThan(0.9) +describe('clustering', () => { + test('1', () => { + const model = new BRIDGE(2, 0.2, 1) + const n = 50 + const x = Matrix.concat( + Matrix.concat(Matrix.randn(n, 1, 0, 0.2), Matrix.randn(n, 1, 5, 0.2)), + Matrix.randn(n, 1, 10, 0.2) + ).toArray() + + const y = model.predict(x) + expect(y).toHaveLength(x.length) + + const t = [] + for (let i = 0; i < x.length; i++) { + t[i] = Math.floor(i / n) + } + const ri = randIndex(y, t) + expect(ri).toBeGreaterThan(0.9) + }) + + test.each([2, 3, 4])('%d', dim => { + const model = new BRIDGE(2, 0.2, 1) + const n = 50 + const x = Matrix.concat( + Matrix.concat(Matrix.randn(n, dim, 0, 0.2), Matrix.randn(n, dim, 5, 0.2)), + Matrix.randn(n, dim, [0, ...Array(dim - 1).fill(5)], 0.2) + ).toArray() + + const y = model.predict(x) + expect(y).toHaveLength(x.length) + + const t = [] + for (let i = 0; i < x.length; i++) { + t[i] = Math.floor(i / n) + } + const ri = randIndex(y, t) + expect(ri).toBeGreaterThan(0.9) + }) })