Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ for (let i = 0; i < n; i++) {

| task | model |
| ---- | ----- |
| clustering | (Soft / Kernel / Genetic / Weighted / Bisecting) k-means, k-means++, k-medois, k-medians, x-means, G-means, LBG, ISODATA, Fuzzy c-means, Possibilistic c-means, k-harmonic means, MacQueen, Hartigan-Wong, Elkan, Hamelry, Drake, Yinyang, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Monothetic, Mutual kNN, Mean shift, DBSCAN, OPTICS, DTSCAN, HDBSCAN, DENCLUE, DBCLASD, BRIDGE, CLUES, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, C2P, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, (Growing) SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, CHAMELEON, COLL, CLIQUE, PROCLUS, ORCLUS, FINDIT, DOC, FastDOC, NMF, Autoencoder |
| clustering | (Soft / Kernel / Genetic / Weighted / Bisecting) k-means, k-means++, k-medois, k-medians, x-means, G-means, LBG, ISODATA, Fuzzy c-means, Possibilistic c-means, k-harmonic means, MacQueen, Hartigan-Wong, Elkan, Hamelry, Drake, Yinyang, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Monothetic, Mutual kNN, Mean shift, DBSCAN, OPTICS, DTSCAN, HDBSCAN, DENCLUE, DBCLASD, BRIDGE, CLUES, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, C2P, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, (Growing) SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, CHAMELEON, COLL, CLIQUE, PROCLUS, ORCLUS, FINDIT, DOC, FastDOC, DiSH, NMF, Autoencoder |
| classification | (Fisher's) Linear discriminant, Quadratic discriminant, Mixture discriminant, Least squares, (Multiclass / Kernel) Ridge, (Complement / Negation / Universal-set / Selective) Naive Bayes (gaussian), AODE, (Fuzzy / Weighted) k-nearest neighbor, Radius neighbor, Nearest centroid, ENN, ENaN, NNBCA, ADAMENN, DANN, IKNN, Decision tree, Random forest, Extra trees, GBDT, XGBoost, ALMA, (Aggressive) ROMMA, (Bounded) Online gradient descent, (Budgeted online) Passive aggressive, RLS, (Selective-sampling) Second order perceptron, AROW, NAROW, Confidence weighted, CELLIP, IELLIP, Normal herd, Stoptron, (Kernelized) Pegasos, MIRA, Forgetron, Projectron, Projectron++, Banditron, Ballseptron, (Multiclass) BSGD, ILK, SILK, (Multinomial) Logistic regression, (Multinomial) Probit, SVM, Gaussian process, HMM, CRF, Bayesian Network, LVQ, (Average / Multiclass / Voted / Kernelized / Selective-sampling / Margin / Shifting / Budget / Tighter / Tightest) Perceptron, PAUM, RBP, ADALINE, MADALINE, MLP, ELM, LMNN |
| semi-supervised classification | k-nearest neighbor, Radius neighbor, Label propagation, Label spreading, k-means, GMM, S3VM, Ladder network |
| regression | Least squares, Ridge, Lasso, Elastic net, RLS, Bayesian linear, Poisson, Least absolute deviations, Huber, Tukey, Least trimmed squares, Least median squares, Lp norm linear, SMA, Deming, Segmented, LOWESS, LOESS, spline, Naive Bayes, Gaussian process, Principal components, Partial least squares, Projection pursuit, Quantile regression, k-nearest neighbor, Radius neighbor, IDW, Nadaraya Watson, Priestley Chao, Gasser Muller, RBF Network, RVM, Decision tree, Random forest, Extra trees, GBDT, XGBoost, SVR, MLP, ELM, GMR, Isotonic, Ramer Douglas Peucker, Theil-Sen, Passing-Bablok, Repeated median |
Expand Down
1 change: 1 addition & 0 deletions js/model_selector.js
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ const AIMethods = [
{ value: 'orclus', title: 'ORCLUS' },
{ value: 'findit', title: 'FINDIT' },
{ value: 'doc', title: 'DOC / FastDOC' },
{ value: 'dish', title: 'DiSH' },
{ value: 'plsa', title: 'PLSA' },
{ value: 'latent_dirichlet_allocation', title: 'Latent Dirichlet Allocation' },
{ value: 'nmf', title: 'NMF' },
Expand Down
24 changes: 24 additions & 0 deletions js/view/dish.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import DiSH from '../../lib/model/dish.js'
import Controller from '../controller.js'

export default function (platform) {
platform.setting.ml.usage = 'Click and add data point. Then, click "Fit" button.'
platform.setting.ml.reference = {
author: 'E. Achtert, C. Bohm, H. P. Kriegel, P. Kroger, I. Muller-Gorman, A. Zimek',
title: 'Detection and Visualization of Subspace Cluster Hierarchies',
year: 2007,
}
const controller = new Controller(platform)

const fitModel = () => {
let model = new DiSH(mu.value, e.value)

const pred = model.predict(platform.trainInput).map(v => v + 1)
platform.trainResult = pred
}

const mu = controller.input.number({ label: ' mu ', min: 1, max: 1000, value: 20 })
const e = controller.input.number({ label: ' e ', min: 0, max: 100, step: 0.1, value: 0.1 })

controller.input.button('Fit').on('click', fitModel)
}
238 changes: 238 additions & 0 deletions lib/model/dish.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
class PriorityQueue {
constructor(arr) {
this._value = arr || []
}

get length() {
return this._value.length
}

[Symbol.iterator]() {
return this._value[Symbol.iterator]()
}

_sort() {
this._value.sort((a, b) => a[1] - b[1])
}

push(value, priority) {
this._value.push([value, priority])
this._sort()
}

move(value, priority) {
for (let i = 0; i < this.length; i++) {
if (this._value[i][0] === value) {
this._value[i][1] = priority
this._sort()
return
}
}
this.push(value, priority)
}

shift() {
const [value] = this._value.shift()
return value
}
}

/**
* Detecting Subspace cluster Hierarchies
*/
export default class DiSH {
// Detection and Visualization of Subspace Cluster Hierarchies
// https://imada.sdu.dk/u/zimek/publications/DASFAA2007/detection.pdf
/**
* @param {number} mu Number of neighborhood
* @param {number} e Neighborhood range
*/
constructor(mu, e) {
this._mu = mu
this._e = e
}

/**
* Returns predicted categories.
* @param {Array<Array<number>>} x Training data
* @returns {number[]} Predicted values
*/
predict(x) {
const n = x.length
const a = x[0].length
const w = []
for (let i = 0; i < n; i++) {
const nears = []
for (let k = 0; k < a; k++) {
nears[k] = new Set()
for (let j = 0; j < n; j++) {
if (Math.abs(x[i][k] - x[j][k]) <= this._e) {
nears[k].add(j)
}
}
}
const c = new Set()
let max_n = -1
let max_k = -1
for (let k = 0; k < a; k++) {
if (nears[k].size >= this._mu) {
if (max_n < 0) {
max_n = nears[k].size
max_k = k
} else if (nears[k].size < max_n) {
c.add(max_k)
max_n = nears[k].size
max_k = k
} else {
c.add(k)
}
}
}
w[i] = Array(a).fill(0)
if (max_k < 0) {
continue
}
w[i][max_k] = 1

let int = nears[max_k]
while (c.length > 0) {
let max_n = -1
let max_k = -1
for (const k of c) {
const intn = new Set()
for (const a of int) {
if (nears[k].has(a)) {
intn.add(a)
}
}
nears[k] = intn
if (nears[k].size >= max_n) {
max_n = nears[k].size
max_k = k
}
}
if (max_n < this._mu) {
break
}
w[i][max_k] = 1
c.delete(max_k)
int = nears[max_k]
}
}

const queue = new PriorityQueue()
for (let i = 0; i < n; i++) {
queue.push(i, Infinity)
}

const sdists = []
for (let i = 0; i < n; i++) {
sdists[i] = []
sdists[i][i] = [i, 0]
for (let j = 0; j < i; j++) {
let lambda = 0
let sd = 0
for (let k = 0; k < a; k++) {
if (w[i][k] === 0 || w[j][k] === 0) {
lambda++
}
if (w[i][k] === 1 && w[j][k] === 1) {
sd += (x[i][k] - x[j][k]) ** 2
}
}
const d = lambda + (Math.sqrt(sd) > 2 * this._e ? 1 : 0)
sdists[i][j] = [j, d]
sdists[j][i] = [i, d]
}
}

const co = []
while (queue.length > 0) {
const o = queue.shift()
const ss = sdists[o].concat()
ss.sort((a, b) => a[1] - b[1])
const [r] = ss[this._mu]
for (const [p] of [...queue]) {
const sr = Math.max(sdists[o][r][1], sdists[o][p][1])
queue.move(p, sr)
}
co.push(o)
}

const clusters = []
for (let i = 0; i < n; i++) {
let c = null
for (let t = 0; t < clusters.length; t++) {
if (clusters[t].w.some((v, k) => v !== w[co[i]][k] * w[co[i - 1]][k])) {
continue
}
let dist = 0
for (let k = 0; k < a; k++) {
if (clusters[t].w[k] === 1) {
dist += (clusters[t].center[k] - x[co[i]][k]) ** 2
}
}
if (Math.sqrt(dist) > 2 * this._e) {
continue
}
c = t
break
}
if (c == null) {
c = clusters.length
clusters.push({
i: [],
center: Array(a).fill(0),
w: w[co[i]].concat(),
l: w[co[i]].reduce((s, v) => s + (v === 0 ? 1 : 0), 0),
parents: [],
})
}
clusters[c].center = clusters[c].center.map(
(v, k) => (v * clusters[c].i.length + x[co[i]][k]) / (clusters[c].i.length + 1)
)
clusters[c].i.push(i)
}

for (let i = 0; i < clusters.length; i++) {
for (let j = 0; j < clusters.length; j++) {
if (clusters[j].l <= clusters[i].l) {
continue
}
if (clusters[j].l === a) {
clusters[i].parents.push(j)
continue
}
let dist = 0
for (let k = 0; k < a; k++) {
if (clusters[i].w[k] === 1 && clusters[j].w[k] === 1) {
dist += (clusters[i].center[k] - clusters[j].center[k]) ** 2
}
}
if (Math.sqrt(dist) > 2 * this._e) {
continue
}

let target = true
for (let t = 0; t < clusters[i].parents.length; t++) {
if (clusters[clusters[i].parents[t]].l < clusters[j].l) {
target = false
break
}
}
if (target) {
clusters[i].parents.push(j)
}
}
}
this._clusters = clusters

const p = []
for (let c = 0; c < clusters.length; c++) {
for (let i = 0; i < clusters[c].i.length; i++) {
p[clusters[c].i[i]] = c
}
}
return p
}
}
45 changes: 45 additions & 0 deletions tests/gui/view/dish.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import { getPage } from '../helper/browser'

describe('clustering', () => {
/** @type {Awaited<ReturnType<getPage>>} */
let page
beforeEach(async () => {
page = await getPage()
const taskSelectBox = await page.waitForSelector('#ml_selector dl:first-child dd:nth-child(5) select')
await taskSelectBox.selectOption('CT')
const modelSelectBox = await page.waitForSelector('#ml_selector .model_selection #mlDisp')
await modelSelectBox.selectOption('dish')
})

afterEach(async () => {
await page?.close()
})

test('initialize', async () => {
const methodMenu = await page.waitForSelector('#ml_selector #method_menu')
const buttons = await methodMenu.waitForSelector('.buttons')

const mu = await buttons.waitForSelector('input:nth-of-type(1)')
await expect(mu.getAttribute('value')).resolves.toBe('20')
const e = await buttons.waitForSelector('input:nth-of-type(2)')
await expect(e.getAttribute('value')).resolves.toBe('0.1')
})

test('learn', async () => {
const methodMenu = await page.waitForSelector('#ml_selector #method_menu')
const buttons = await methodMenu.waitForSelector('.buttons')

const fitButton = await buttons.waitForSelector('input[value=Fit]')
await fitButton.evaluate(el => el.click())

const svg = await page.waitForSelector('#plot-area svg')
await svg.waitForSelector('.datas circle')
const circles = await svg.$$('.datas circle')
const colors = new Set()
for (const circle of circles) {
const fill = await circle.evaluate(el => el.getAttribute('fill'))
colors.add(fill)
}
expect(colors.size).toBeGreaterThan(1)
})
})
20 changes: 20 additions & 0 deletions tests/lib/model/dish.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import Matrix from '../../../lib/util/matrix.js'
import DiSH from '../../../lib/model/dish.js'

import { randIndex } from '../../../lib/evaluate/clustering.js'

test('clustering', () => {
const model = new DiSH(5, 2.0)
const n = 50
const x = Matrix.concat(Matrix.randn(n, 6, 0, 0.1), Matrix.randn(n, 6, 5, 0.1)).toArray()

const y = model.predict(x)
expect(y).toHaveLength(x.length)

const t = []
for (let i = 0; i < x.length; i++) {
t[i] = Math.floor(i / n)
}
const ri = randIndex(y, t)
expect(ri).toBeGreaterThan(0.9)
})