Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ for (let i = 0; i < n; i++) {

| task | model |
| ---- | ----- |
| clustering | (Soft / Kernel / Genetic / Weighted) k-means, k-means++, k-medois, k-medians, x-means, G-means, LBG, ISODATA, Fuzzy c-means, Possibilistic c-means, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Monothetic, Mutual kNN, Mean shift, DBSCAN, OPTICS, HDBSCAN, DENCLUE, DBCLASD, CLUES, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, C2P, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, (Growing) SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, CHAMELEON, COLL, CLIQUE, PROCLUS, ORCLUS, FINDIT, NMF, Autoencoder |
| clustering | (Soft / Kernel / Genetic / Weighted) k-means, k-means++, k-medois, k-medians, x-means, G-means, LBG, ISODATA, Fuzzy c-means, Possibilistic c-means, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Monothetic, Mutual kNN, Mean shift, DBSCAN, OPTICS, HDBSCAN, DENCLUE, DBCLASD, BRIDGE, CLUES, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, C2P, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, (Growing) SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, CHAMELEON, COLL, CLIQUE, PROCLUS, ORCLUS, FINDIT, NMF, Autoencoder |
| classification | (Fisher's) Linear discriminant, Quadratic discriminant, Mixture discriminant, Least squares, (Multiclass / Kernel) Ridge, (Complement / Negation / Universal-set / Selective) Naive Bayes (gaussian), AODE, (Fuzzy / Weighted) k-nearest neighbor, Radius neighbor, Nearest centroid, ENN, ENaN, NNBCA, ADAMENN, DANN, IKNN, Decision tree, Random forest, Extra trees, GBDT, XGBoost, ALMA, (Aggressive) ROMMA, (Bounded) Online gradient descent, (Budgeted online) Passive aggressive, RLS, (Selective-sampling) Second order perceptron, AROW, NAROW, Confidence weighted, CELLIP, IELLIP, Normal herd, Stoptron, (Kernelized) Pegasos, MIRA, Forgetron, Projectron, Projectron++, Banditron, Ballseptron, (Multiclass) BSGD, ILK, SILK, (Multinomial) Logistic regression, (Multinomial) Probit, Ordered logistic, Ordered probit, PRank, OAP-BPM, SVM, Gaussian process, HMM, CRF, Bayesian Network, LVQ, (Average / Multiclass / Voted / Kernelized / Selective-sampling / Margin / Shifting / Budget / Tighter / Tightest) Perceptron, PAUM, RBP, ADALINE, MADALINE, MLP, LMNN |
| semi-supervised classification | k-nearest neighbor, Radius neighbor, Label propagation, Label spreading, k-means, GMM, S3VM, Ladder network |
| regression | Least squares, Ridge, Lasso, Elastic net, RLS, Bayesian linear, Poisson, Least absolute deviations, Huber, Tukey, Least trimmed squares, Least median squares, Lp norm linear, SMA, Deming, Segmented, LOWESS, LOESS, spline, Naive Bayes, Gaussian process, Principal components, Partial least squares, Projection pursuit, Quantile regression, k-nearest neighbor, Radius neighbor, IDW, Nadaraya Watson, Priestley Chao, Gasser Muller, RBF Network, RVM, Decision tree, Random forest, Extra trees, GBDT, XGBoost, SVR, MLP, GMR, Isotonic, Ramer Douglas Peucker, Theil-Sen, Passing-Bablok, Repeated median |
Expand Down
1 change: 1 addition & 0 deletions js/model_selector.js
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ const AIMethods = [
{ value: 'hdbscan', title: 'HDBSCAN' },
{ value: 'denclue', title: 'DENCLUE' },
{ value: 'dbclasd', title: 'DBCLASD' },
{ value: 'bridge', title: 'BRIDGE' },
],
'': [
{ value: 'mutual_knn', title: 'Mutual kNN' },
Expand Down
29 changes: 29 additions & 0 deletions js/view/bridge.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import BRIDGE from '../../lib/model/bridge.js'
import Controller from '../controller.js'

export default function (platform) {
platform.setting.ml.usage = 'Click and add data point. Then, click "Fit" button.'
platform.setting.ml.reference = {
author: 'M. Dash, H. Liu, X. Xu',
title: '1 + 1 > 2: Merging distance and density based clustering',
year: 2001,
}
const controller = new Controller(platform)

const fitModel = () => {
const model = new BRIDGE(k.value, e_core.value, e_den.value)
const pred = model.predict(platform.trainInput)
platform.trainResult = pred.map(v => v + 1)
clusters.value = new Set(pred).size
}

const k = controller.input.number({ label: 'k', min: 1, max: 100, value: 5 }).on('change', fitModel)
const e_core = controller.input
.number({ label: 'e-core', min: 0, max: 10, value: 0.1, step: 0.01 })
.on('change', fitModel)
const e_den = controller.input
.number({ label: 'e density', min: 0, max: 10, value: 0.05, step: 0.01 })
.on('change', fitModel)
controller.input.button('Fit').on('click', fitModel)
const clusters = controller.text({ label: ' Clusters: ' })
}
126 changes: 126 additions & 0 deletions lib/model/bridge.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import { KMeanspp } from './kmeans.js'
import DBSCAN from './dbscan.js'

/**
* BRIDGE
*/
export default class BRIDGE {
// http://i2pc.es/coss/Docencia/SignalProcessingReviews/Murtagh2012.pdf
// 1 + 1 > 2: Merging distance and density based clustering
// https://www.comp.nus.edu.sg/~lingtw/dasfaa_proceedings/dasfaa2001/00916361.pdf
/**
* @param {number} k K-means clustering size
* @param {number} e_core e for core distance
* @param {number} e_den e for density base clustering
*/
constructor(k, e_core, e_den) {
this._k = k
this._e_core = e_core
this._e_den = e_den
}

_distance(a, b) {
return Math.sqrt(a.reduce((s, v, i) => s + (v - b[i]) ** 2, 0))
}

/**
* Returns predicted categories.
*
* @param {Array<Array<number>>} datas Training data
* @returns {number[]} Predicted values
*/
predict(datas) {
const n = datas.length
const dim = datas[0].length

const kmeans = new KMeanspp()
for (let i = 0; i < this._k; i++) {
kmeans.add(datas)
}
while (kmeans.fit(datas) > 0);
const cranges = []
const coredist = Array(this._k).fill(Infinity)
for (let i = 0; i < this._k; i++) {
for (let j = i + 1; j < this._k; j++) {
const d = this._distance(kmeans.centroids[i], kmeans.centroids[j]) / 2
if (d < coredist[i]) {
coredist[i] = d
}
if (d < coredist[j]) {
coredist[j] = d
}
}
cranges[i] = Array.from({ length: dim }, () => [Infinity, -Infinity])
}
const p = kmeans.predict(datas)
const nk = Array(this._k).fill(0)
const core_points = Array.from({ length: this._k }, () => [])
const ecore_points = Array.from({ length: this._k }, () => [])
const noncore_points = []
const trange = Array.from({ length: dim }, () => [Infinity, -Infinity])
for (let i = 0; i < n; i++) {
const k = p[i]
nk[k]++
for (let j = 0; j < dim; j++) {
cranges[k][j][0] = Math.min(cranges[k][j][0], datas[i][j])
cranges[k][j][1] = Math.max(cranges[k][j][1], datas[i][j])
trange[j][0] = Math.min(trange[j][0], datas[i][j])
trange[j][1] = Math.max(trange[j][1], datas[i][j])
}
const d = this._distance(kmeans.centroids[k], datas[i])
if (d < coredist[k] - this._e_core) {
core_points[k].push(i)
} else if (d < coredist[k] + this._e_core) {
ecore_points[k].push(i)
} else {
noncore_points.push(i)
}
}

const pd = Array(n).fill(-1)
let evol = 1
if (dim === 1) {
evol = 2 * this._e_den
} else if (dim === 2) {
evol = Math.PI * this._e_den ** 2
} else if (dim === 3) {
evol = (4 / 3) * Math.PI * this._e_den ** 3
} else {
evol = (2 * this._e_den) ** dim
}
let offset = 0
for (let k = 0; k < this._k; k++) {
const tvol = cranges[k].reduce((s, v) => s * (v[1] - v[0]), 1)
const minpts = (evol / tvol) * nk[k]
const dbscan = new DBSCAN(this._e_den, minpts)

const cp = core_points[k].concat(ecore_points[k])
const p = dbscan.predict(cp.map(i => datas[i]))
let max_p = offset
for (let i = 0; i < cp.length; i++) {
if (p[i] >= 0) {
pd[cp[i]] = offset + p[i]
max_p = Math.max(max_p, offset + p[i])
}
}
offset = max_p + 1
}

const tvol = trange.reduce((s, v) => s * (v[1] - v[0]), 1)
const dbscan = new DBSCAN(this._e_den, (evol / tvol) * n)
const ecp = ecore_points.reduce((p, e) => p.concat(e), noncore_points)
const pe = dbscan.predict(ecp.map(i => datas[i]))
const match = []
for (let i = 0; i < ecp.length; i++) {
if (pd[ecp[i]] >= 0 && pe[i] >= 0 && match[pe[i]] == null) {
match[pe[i]] = pd[ecp[i]]
}
}
for (let i = 0; i < ecp.length; i++) {
if (pd[ecp[i]] < 0 && pe[i] >= 0) {
pd[ecp[i]] = match[pe[i]]
}
}
return (this._clusters = pd)
}
}
46 changes: 46 additions & 0 deletions tests/gui/view/bridge.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import { getPage } from '../helper/browser'

describe('clustering', () => {
/** @type {Awaited<ReturnType<getPage>>} */
let page
beforeEach(async () => {
page = await getPage()
})

afterEach(async () => {
await page?.close()
})

test('initialize', async () => {
const taskSelectBox = await page.waitForSelector('#ml_selector dl:first-child dd:nth-child(5) select')
await taskSelectBox.selectOption('CT')
const modelSelectBox = await page.waitForSelector('#ml_selector .model_selection #mlDisp')
await modelSelectBox.selectOption('bridge')
const methodMenu = await page.waitForSelector('#ml_selector #method_menu')
const buttons = await methodMenu.waitForSelector('.buttons')

const k = await buttons.waitForSelector('input:nth-of-type(1)')
await expect((await k.getProperty('value')).jsonValue()).resolves.toBe('5')
const ecore = await buttons.waitForSelector('input:nth-of-type(2)')
await expect((await ecore.getProperty('value')).jsonValue()).resolves.toBe('0.1')
const eden = await buttons.waitForSelector('input:nth-of-type(3)')
await expect((await eden.getProperty('value')).jsonValue()).resolves.toBe('0.05')
})

test('learn', async () => {
const taskSelectBox = await page.waitForSelector('#ml_selector dl:first-child dd:nth-child(5) select')
await taskSelectBox.selectOption('CT')
const modelSelectBox = await page.waitForSelector('#ml_selector .model_selection #mlDisp')
await modelSelectBox.selectOption('bridge')
const methodMenu = await page.waitForSelector('#ml_selector #method_menu')
const buttons = await methodMenu.waitForSelector('.buttons')

const clusters = await buttons.waitForSelector('span:last-child', { state: 'attached' })
await expect(clusters.evaluate(el => el.textContent)).resolves.toBe('')

const fitButton = await buttons.waitForSelector('input[value=Fit]')
await fitButton.evaluate(el => el.click())

await expect(clusters.evaluate(el => el.textContent)).resolves.toMatch(/^[0-9]+$/)
})
})
47 changes: 47 additions & 0 deletions tests/lib/model/bridge.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import { jest } from '@jest/globals'
jest.retryTimes(3)

import Matrix from '../../../lib/util/matrix.js'
import BRIDGE from '../../../lib/model/bridge.js'

import { randIndex } from '../../../lib/evaluate/clustering.js'

describe('clustering', () => {
test('1', () => {
const model = new BRIDGE(2, 0.2, 1)
const n = 50
const x = Matrix.concat(
Matrix.concat(Matrix.randn(n, 1, 0, 0.2), Matrix.randn(n, 1, 5, 0.2)),
Matrix.randn(n, 1, 10, 0.2)
).toArray()

const y = model.predict(x)
expect(y).toHaveLength(x.length)

const t = []
for (let i = 0; i < x.length; i++) {
t[i] = Math.floor(i / n)
}
const ri = randIndex(y, t)
expect(ri).toBeGreaterThan(0.9)
})

test.each([2, 3, 4])('%d', dim => {
const model = new BRIDGE(2, 0.2, 1)
const n = 50
const x = Matrix.concat(
Matrix.concat(Matrix.randn(n, dim, 0, 0.2), Matrix.randn(n, dim, 5, 0.2)),
Matrix.randn(n, dim, [0, ...Array(dim - 1).fill(5)], 0.2)
).toArray()

const y = model.predict(x)
expect(y).toHaveLength(x.length)

const t = []
for (let i = 0; i < x.length; i++) {
t[i] = Math.floor(i / n)
}
const ri = randIndex(y, t)
expect(ri).toBeGreaterThan(0.9)
})
})