Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ for (let i = 0; i < n; i++) {
| clustering | (Soft / Kernel / Genetic / Weighted) k-means, k-means++, k-medois, k-medians, x-means, G-means, LBG, ISODATA, Fuzzy c-means, Possibilistic c-means, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Monothetic, Mutual kNN, Mean shift, DBSCAN, OPTICS, HDBSCAN, DENCLUE, DBCLASD, CLUES, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, C2P, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, (Growing) SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, CHAMELEON, COLL, CLIQUE, PROCLUS, ORCLUS, FINDIT, NMF, Autoencoder |
| classification | (Fisher's) Linear discriminant, Quadratic discriminant, Mixture discriminant, Least squares, (Multiclass / Kernel) Ridge, (Complement / Negation / Universal-set / Selective) Naive Bayes (gaussian), AODE, (Fuzzy / Weighted) k-nearest neighbor, Radius neighbor, Nearest centroid, ENN, ENaN, NNBCA, ADAMENN, DANN, IKNN, Decision tree, Random forest, Extra trees, GBDT, XGBoost, ALMA, (Aggressive) ROMMA, (Bounded) Online gradient descent, (Budgeted online) Passive aggressive, RLS, (Selective-sampling) Second order perceptron, AROW, NAROW, Confidence weighted, CELLIP, IELLIP, Normal herd, Stoptron, (Kernelized) Pegasos, MIRA, Forgetron, Projectron, Projectron++, Banditron, Ballseptron, (Multiclass) BSGD, ILK, SILK, (Multinomial) Logistic regression, (Multinomial) Probit, Ordinal regression, SVM, Gaussian process, HMM, CRF, Bayesian Network, LVQ, (Average / Multiclass / Voted / Kernelized / Selective-sampling / Margin / Shifting / Budget / Tighter / Tightest) Perceptron, PAUM, RBP, ADALINE, MADALINE, MLP, LMNN |
| semi-supervised classification | k-nearest neighbor, Radius neighbor, Label propagation, Label spreading, k-means, GMM, S3VM, Ladder network |
| regression | Least squares, Ridge, Lasso, Elastic net, RLS, Bayesian linear, Poisson, Least absolute deviations, Huber, Tukey, Least trimmed squares, Least median squares, Lp norm linear, SMA, Deming, Segmented, LOWESS, LOESS, spline, Gaussian process, Principal components, Partial least squares, Projection pursuit, Quantile regression, k-nearest neighbor, Radius neighbor, IDW, Nadaraya Watson, Priestley Chao, Gasser Muller, RBF Network, RVM, Decision tree, Random forest, Extra trees, GBDT, XGBoost, SVR, MLP, GMR, Isotonic, Ramer Douglas Peucker, Theil-Sen, Passing-Bablok, Repeated median |
| regression | Least squares, Ridge, Lasso, Elastic net, RLS, Bayesian linear, Poisson, Least absolute deviations, Huber, Tukey, Least trimmed squares, Least median squares, Lp norm linear, SMA, Deming, Segmented, LOWESS, LOESS, spline, Naive Bayes, Gaussian process, Principal components, Partial least squares, Projection pursuit, Quantile regression, k-nearest neighbor, Radius neighbor, IDW, Nadaraya Watson, Priestley Chao, Gasser Muller, RBF Network, RVM, Decision tree, Random forest, Extra trees, GBDT, XGBoost, SVR, MLP, GMR, Isotonic, Ramer Douglas Peucker, Theil-Sen, Passing-Bablok, Repeated median |
| interpolation | Nearest neighbor, IDW, (Spherical) Linear, Brahmagupta, Logarithmic, Cosine, (Inverse) Smoothstep, Cubic, (Centripetal) Catmull-Rom, Hermit, Polynomial, Lagrange, Trigonometric, Spline, RBF Network, Akima, Natural neighbor, Delaunay |
| anomaly detection | Percentile, MAD, Tukey's fences, Grubbs's test, Thompson test, Tietjen Moore test, Generalized ESD, Hotelling, MT, MCD, k-nearest neighbor, LOF, COF, ODIN, LDOF, INFLO, LOCI, LoOP, RDF, LDF, KDEOS, RDOS, RKOF, ABOD, PCA, OCSVM, KDE, GMM, Isolation forest, Autoencoder, GAN |
| dimensionality reduction | Random projection, (Dual / Kernel / Incremental / Probabilistic) PCA, GPLVM, LSA, MDS, Linear discriminant analysis, NCA, ICA, Principal curve, Sammon, FastMap, Sliced inverse regression, LLE, HLLE, MLLE, Laplacian eigenmaps, Isomap, LTSA, Diffusion map, SNE, t-SNE, UMAP, SOM, GTM, NMF, MOD, K-SVD, Autoencoder, VAE |
Expand Down
1 change: 1 addition & 0 deletions js/model_selector.js
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ const AIMethods = [
{ value: 'lowess', title: 'LOWESS' },
{ value: 'loess', title: 'LOESS' },
{ value: 'spline', title: 'Spline' },
{ value: 'naive_bayes_regression', title: 'Naive Bayes' },
{ value: 'gaussian_process', title: 'Gaussian Process' },
{ value: 'pcr', title: 'Principal Components' },
{ value: 'pls', title: 'Partial Least Squares' },
Expand Down
23 changes: 23 additions & 0 deletions js/view/naive_bayes_regression.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import NaiveBayesRegression from '../../lib/model/naive_bayes_regression.js'
import Controller from '../controller.js'

export default function (platform) {
platform.setting.ml.usage = 'Click and add data point. Then, click "Fit".'
platform.setting.ml.reference = {
author: 'E. Frank, L. Trigg, G. Holmes, I. H. Witten',
title: 'Naive Bayes for Regression',
year: 1999,
}
const controller = new Controller(platform)

controller.input.button('Fit').on('click', () => {
const input = platform.trainInput
const model = new NaiveBayesRegression(Array(input[0].length).fill(false))
model.fit(
input,
platform.trainOutput.map(v => v[0])
)
const pred = model.predict(platform.testInput(20))
platform.testResult(pred)
})
}
146 changes: 146 additions & 0 deletions lib/model/naive_bayes_regression.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
/**
* Naive bayes regression
*/
export default class NaiveBayesRegression {
// E. Frank, L. Trigg, G. Holmes, I. H. Witten, Technical Note Naive Bayes for Regression (1999)
// https://www.cs.waikato.ac.nz/~eibe/pubs/nbr.pdf
/**
* @param {boolean[]} categoryPositions Category column position
*/
constructor(categoryPositions) {
this._iscat = categoryPositions
this._categories = []
this._hx = []
this._hy = []
this._hk = []

this._c_cand = [0.4, 0.5, 0.6, 0.7, 0.8]
this._d = 50
this._h = null
}

_gaussian(x) {
return Math.exp(-(x ** 2) / 2) / Math.sqrt(2 * Math.PI)
}

/**
* Fit model.
*
* @param {Array<Array<*>>} x Training data
* @param {Array<number>} y Target values
*/
fit(x, y) {
this._x = x
this._y = y

const n = x.length
for (let k = 0; k < this._iscat.length; k++) {
const xk = x.map(v => v[k])
if (this._iscat[k]) {
this._categories[k] = {}
for (let j = 0; j < n; j++) {
if (!this._categories[k][xk[j]]) {
this._categories[k][xk[j]] = 0
}
this._categories[k][xk[j]]++
}
this._hk[k] = {}
for (const vk of Object.keys(this._categories[k])) {
let min_cv = Infinity
this._hk[k][vk] = 1
for (const ck of this._c_cand) {
const hk = ck / Math.sqrt(this._categories[k][vk])
let cv = 0
for (let i = 0; i < n; i++) {
if (xk[i] !== vk) continue
let v = 0
for (let j = 0; j < n; j++) {
if (i === j || xk[j] !== vk) continue
v += this._gaussian((y[j] - y[i]) / hk)
}
cv += Math.log(v / ((n - 1) * hk))
}
if (-cv / n < min_cv) {
min_cv = -cv / n
this._hk[k][vk] = hk
}
}
}
} else {
let min_cv = Infinity
this._hx[k] = 0
this._hy[k] = 0
for (const cx of this._c_cand) {
const hx = cx / Math.sqrt(n)
for (const cy of this._c_cand) {
const hy = cy / Math.sqrt(n)
let cv = 0
for (let i = 0; i < n; i++) {
let v = 0
for (let j = 0; j < n; j++) {
if (i === j) continue
v += this._gaussian((xk[j] - xk[i]) / hx) * this._gaussian((y[j] - y[i]) / hy)
}
cv += Math.log(v / ((n - 1) * hx * hy))
}
if (-cv / n < min_cv) {
min_cv = -cv / n
this._hx[k] = hx
this._hy[k] = hy
}
}
}
}
}

this._ymax = -Infinity
this._ymin = Infinity
for (let i = 0; i < n; i++) {
this._ymax = Math.max(this._ymax, y[i])
this._ymin = Math.min(this._ymin, y[i])
}
this._h = (this._ymax - this._ymin) / (this._d - 1)
}

/**
* Returns predicted values.
*
* @param {Array<Array<*>>} x Sample data
* @returns {Array<number>} Predicted values
*/
predict(x) {
const pred = []
const n = this._x.length
for (let i = 0; i < x.length; i++) {
const pi = []
const g = []
for (let t = -Math.floor(this._d / 2); t <= Math.ceil(this._d * 1.5); t++) {
const y = this._ymin + this._h * t
let p = 1
for (let k = 0; k < this._iscat.length; k++) {
if (this._iscat[k]) {
let pt = 0
for (let j = 0; j < n; j++) {
if (x[i][k] !== this._x[j][k]) continue
pt += this._gaussian((y - this._y[j]) / this._hk[k][x[i][k]])
}
p *= pt / (n * this._hk[k][x[i][k]])
} else {
let pt = 0
for (let j = 0; j < n; j++) {
pt +=
this._gaussian((x[i][k] - this._x[j][k]) / this._hx[k]) *
this._gaussian((y - this._y[j]) / this._hy[k])
}
p *= pt / (n * this._hx[k] * this._hy[k])
}
}
pi.push(p)
g.push(y)
}
const s = pi.reduce((s, v) => s + v, 0)
pred[i] = pi.reduce((s, v, k) => s + v * g[k], 0) / s
}
return pred
}
}
42 changes: 42 additions & 0 deletions tests/gui/view/naive_bayes_regression.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import { getPage } from '../helper/browser'

describe('regression', () => {
/** @type {Awaited<ReturnType<getPage>>} */
let page
beforeEach(async () => {
page = await getPage()
})

afterEach(async () => {
await page?.close()
})

test('initialize', async () => {
const taskSelectBox = await page.waitForSelector('#ml_selector dl:first-child dd:nth-child(5) select')
await taskSelectBox.selectOption('RG')
const modelSelectBox = await page.waitForSelector('#ml_selector .model_selection #mlDisp')
await modelSelectBox.selectOption('naive_bayes_regression')
const methodMenu = await page.waitForSelector('#ml_selector #method_menu')
const buttons = await methodMenu.waitForSelector('.buttons')

const fit = await buttons.waitForSelector('input:nth-of-type(1)')
await expect((await fit.getProperty('value')).jsonValue()).resolves.toBe('Fit')
})

test('learn', async () => {
const taskSelectBox = await page.waitForSelector('#ml_selector dl:first-child dd:nth-child(5) select')
await taskSelectBox.selectOption('RG')
const modelSelectBox = await page.waitForSelector('#ml_selector .model_selection #mlDisp')
await modelSelectBox.selectOption('naive_bayes_regression')
const methodMenu = await page.waitForSelector('#ml_selector #method_menu')
const buttons = await methodMenu.waitForSelector('.buttons')

const methodFooter = await page.waitForSelector('#method_footer', { state: 'attached' })
await expect(methodFooter.evaluate(el => el.textContent)).resolves.toBe('')

const initButton = await buttons.waitForSelector('input[value=Fit]')
await initButton.evaluate(el => el.click())

await expect(methodFooter.evaluate(el => el.textContent)).resolves.toMatch(/^RMSE:[0-9.]+$/)
})
})
46 changes: 46 additions & 0 deletions tests/lib/model/naive_bayes_regression.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import NaiveBayesRegression from '../../../lib/model/naive_bayes_regression.js'

import { rmse } from '../../../lib/evaluate/regression.js'

describe('predict', () => {
test('fit', () => {
const iscat = [true, false, true, false, true]
const model = new NaiveBayesRegression(iscat)
const n = 50
const x = []
const t = []
for (let i = 0; i < n; i++) {
const xi = []
for (let k = 0; k < 5; k++) {
if (iscat[k]) {
const r = Math.floor(Math.random() * 10)
xi[k] = String.fromCharCode('a'.charCodeAt(0) + r)
} else {
xi[k] = Math.random() * 2
}
}
x.push(xi)
t.push(0)
}
for (let i = 0; i < n; i++) {
const xi = []
for (let k = 0; k < 5; k++) {
if (iscat[k]) {
const r = Math.floor(Math.random() * 10 + 9)
xi[k] = String.fromCharCode('a'.charCodeAt(0) + r)
} else {
xi[k] = Math.random() * 2 + 2
}
}
x.push(xi)
t.push(1)
}

model.fit(x, t)

const y = model.predict(x)
expect(y).toHaveLength(x.length)
const err = rmse(y, t)
expect(err).toBeLessThan(0.5)
})
})