Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ for (let i = 0; i < n; i++) {

| task | model |
| ---- | ----- |
| clustering | k-means, k-means++, k-medois, k-medians, x-means, G-means, Weighted k-means, LBG, ISODATA, Soft k-means, Fuzzy c-means, Possibilistic c-means, Kernel k-means, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Monothetic, Mean shift, DBSCAN, OPTICS, HDBSCAN, DENCLUE, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, NMF, Autoencoder |
| clustering | k-means, k-means++, k-medois, k-medians, x-means, G-means, Weighted k-means, LBG, ISODATA, Soft k-means, Fuzzy c-means, Possibilistic c-means, Kernel k-means, Genetic k-means, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Monothetic, Mean shift, DBSCAN, OPTICS, HDBSCAN, DENCLUE, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, NMF, Autoencoder |
| classification | Linear discriminant (FLD, LDA), Quadratic discriminant, Mixture discriminant, Least squares, Ridge, (Complement / Negation / Universal-set / Selective) Naive Bayes (gaussian), AODE, k-nearest neighbor, Radius neighbor, Fuzzy k-nearest neighbor, Nearest centroid, DANN, Decision tree, Random forest, Extra trees, GBDT, XGBoost, ALMA, ROMMA, Online gradient descent, Passive aggressive, RLS, Second order perceptron, AROW, NAROW, Confidence weighted, CELLIP, IELLIP, Normal herd, (Multinomial) Logistic regression, (Multinomial) Probit, SVM, Gaussian process, HMM, CRF, Bayesian Network, LVQ, Perceptron, ADALINE, MLP, LMNN |
| semi-supervised classification | k-nearest neighbor, Radius neighbor, Label propagation, Label spreading, k-means, GMM, Ladder network |
| regression | Least squares, Ridge, Lasso, Elastic net, RLS, Bayesian linear, Poisson, Least absolute deviations, Huber, Tukey, Least trimmed squares, Least median squares, Lp norm linear, SMA, Deming, Segmented, LOWESS, spline, Gaussian process, Principal components, Partial least squares, Projection pursuit, Quantile regression, k-nearest neighbor, Radius neighbor, IDW, Nadaraya Watson, Priestley Chao, Gasser Muller, RBF Network, RVM, Decision tree, Random forest, Extra trees, GBDT, XGBoost, SVR, MLP, GMR, Isotonic, Ramer Douglas Peucker, Theil-Sen, Passing-Bablok, Repeated median |
Expand Down
1 change: 1 addition & 0 deletions js/model_selector.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ const AIMethods = [
{ value: 'fuzzy_cmeans', title: 'Fuzzy C-Means' },
{ value: 'pcm', title: 'Possibilistic C-Means' },
{ value: 'kernel_kmeans', title: 'Kernel K-Means' },
{ value: 'genetic_kmeans', title: 'Genetic k-means' },
{ value: 'lbg', title: 'Linde-Buzo-Gray' },
{ value: 'pam', title: 'PAM / CLARA' },
{ value: 'clarans', title: 'CLARANS' },
Expand Down
39 changes: 39 additions & 0 deletions js/view/genetic_kmeans.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import GeneticKMeans from '../../lib/model/genetic_kmeans.js'

var dispGKMeans = function (elm, platform) {
let model = null

elm.append('span').text('k')
elm.append('input').attr('name', 'k').attr('type', 'number').attr('min', 1).attr('max', 100).attr('value', 3)
platform.setting.ml.controller
.stepLoopButtons()
.init(() => {
platform.init()
const k = +elm.select('[name=k]').property('value')
model = new GeneticKMeans(k, 10)
platform.fit((tx, ty) => {
model.init(tx)
})
})
.step(cb => {
platform.fit((tx, ty, pred_cb) => {
model.fit()
const pred = model.predict(tx)
pred_cb(pred.map(v => v + 1))
})
platform.centroids(
model.centroids,
model.centroids.map((c, i) => i + 1),
{
line: true,
duration: 1000,
}
)
cb && setTimeout(cb, 1000)
})
}

export default function (platform) {
platform.setting.ml.usage = 'Click and add data point. Then, click "Step" button repeatedly.'
dispGKMeans(platform.setting.ml.configElement, platform)
}
216 changes: 216 additions & 0 deletions lib/model/genetic_kmeans.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
class GeneticKMeansModel {
constructor(k) {
this._k = k
}

get centroids() {
return this._c
}

_distance(a, b) {
return Math.sqrt(a.reduce((s, v, i) => s + (v - b[i]) ** 2, 0))
}

init(data) {
this._data = data

const n = data.length
const idx = []
for (let i = 0; i < this._k; i++) {
idx.push(Math.floor(Math.random() * (n - i)))
}
for (let i = n - 1; i >= 0; i--) {
for (let j = n - 1; j > i; j--) {
if (idx[i] <= idx[j]) {
idx[j]++
}
}
}

this._c = idx.map(i => this._data[i].concat())
}

copy() {
const cp = new GeneticKMeansModel(this._k)
cp.init(this._data)
for (let i = 0; i < this._c.length; i++) {
cp._c[i] = this._c[i].concat()
}
return cp
}

cost() {
const pred = this.predict(this._data)
let c = 0
for (let i = 0; i < this._data.length; i++) {
for (let j = 0; j < this._data[i].length; j++) {
c += (this._data[i][j] - this._c[pred[i]][j]) ** 2
}
}
return c
}

fit() {
const pred = this.predict(this._data)
for (let k = 0; k < this._k; k++) {
const m = Array(this._data[0].length).fill(0)
let s = 0
for (let i = 0; i < this._data.length; i++) {
if (pred[i] !== k) {
continue
}
for (let j = 0; j < m.length; j++) {
m[j] += this._data[i][j]
}
s++
}
this._c[k] = m.map(v => v / s)
}
}

predict(datas) {
return datas.map(value => {
let min_d = Infinity
let min_k = -1
for (let i = 0; i < this._c.length; i++) {
const d = this._distance(value, this._c[i])
if (d < min_d) {
min_d = d
min_k = i
}
}
return min_k
})
}

mutation(rate, cm) {
const pred = this.predict(this._data)
for (let i = 0; i < this._data.length; i++) {
if (Math.random() >= rate) {
continue
}
const d = this._c.map(c => this._distance(c, this._data[i]))
if (d[pred[i]] === 0) {
continue
}
const dmax = Math.max(...d)
const p = d.map(v => cm * dmax - v)
let r = Math.random() * p.reduce((s, v) => s + v, 0)
for (let k = 0; k < p.length; k++) {
r -= p[k]
if (r <= 0) {
pred[i] = k
break
}
}
}

for (let k = 0; k < this._k; k++) {
const m = Array(this._data[0].length).fill(0)
let s = 0
for (let i = 0; i < this._data.length; i++) {
if (pred[i] !== k) {
continue
}
for (let j = 0; j < m.length; j++) {
m[j] += this._data[i][j]
}
s++
}
this._c[k] = m.map(v => v / s)
}
}
}

/**
* Genetic k-means model
*/
export default class GeneticKMeans {
// https://deepblue-ts.co.jp/machine-learning/genetic-k-means-alogorithm/
// https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.520.6737&rep=rep1&type=pdf
/**
* @param {number} k
* @param {number} size
*/
constructor(k, size) {
this._k = k
this._size = size
this._c = 1
this._pm = 0.1
this._cm = 1

this._models = []
for (let i = 0; i < this._size; i++) {
this._models[i] = new GeneticKMeansModel(this._k)
}
}

/**
* Centroids
*
* @type {Array<Array<number>>}
*/
get centroids() {
return this.bestModel.centroids
}

/**
* The best model.
*
* @type {GeneticKMeansModel}
*/
get bestModel() {
return this._models[0]
}

/**
* Initialize model.
*
* @param {Array<Array<number>>} datas
*/
init(datas) {
this._models.forEach(m => m.init(datas))
}

/**
* Returns predicted categories.
*
* @param {Array<Array<number>>} datas
* @returns {number[]}
*/
predict(datas) {
return this.bestModel.predict(datas)
}

/**
* Fit model.
*/
fit() {
const f = this._models.map(m => -m.cost())
const m = f.reduce((s, v) => s + v, 0) / f.length
const s = Math.sqrt(f.reduce((s, v) => s + (v - m) ** 2, 0) / f.length)
const population = f.map(v => Math.max(0, v - (m - this._c * s)))
const sum = population.reduce((s, v) => s + v, 0)

const newModels = []
for (let i = 0; i < this._size; i++) {
let r = Math.random() * sum
for (let k = 0; k < population.length; k++) {
r -= population[k]
if (r <= 0) {
newModels[i] = this._models[k].copy()
}
}
}
this._models = newModels

for (let k = 0; k < this._size; k++) {
this._models[k].mutation(this._pm, this._cm)
this._models[k].fit()
}

const costs = this._models.map((m, i) => [m.cost(), i])
costs.sort((a, b) => a[0] - b[0])
this._models = costs.map(v => this._models[v[1]])
}
}
24 changes: 24 additions & 0 deletions tests/lib/model/genetic_kmeans.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import Matrix from '../../../lib/util/matrix.js'
import GeneticKMeans from '../../../lib/model/genetic_kmeans.js'

import { randIndex } from '../../../lib/evaluate/clustering.js'

test('predict', () => {
const model = new GeneticKMeans(2, 10)
const n = 50
const x = Matrix.randn(n, 2, 0, 0.1).concat(Matrix.randn(n, 2, 5, 0.1)).toArray()

model.init(x)
for (let i = 0; i < 20; i++) {
model.fit()
}
const y = model.predict(x)
expect(y).toHaveLength(x.length)

const t = []
for (let i = 0; i < x.length; i++) {
t[i] = Math.floor(i / n)
}
const ri = randIndex(y, t)
expect(ri).toBeGreaterThan(0.9)
})