diff --git a/README.md b/README.md index 32e8d6445..9f09ee9a3 100644 --- a/README.md +++ b/README.md @@ -124,7 +124,7 @@ for (let i = 0; i < n; i++) { | clustering | (Soft / Kernel / Genetic / Weighted) k-means, k-means++, k-medois, k-medians, x-means, G-means, LBG, ISODATA, Fuzzy c-means, Possibilistic c-means, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Monothetic, Mutual kNN, Mean shift, DBSCAN, OPTICS, HDBSCAN, DENCLUE, DBCLASD, CLUES, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, C2P, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, (Growing) SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, CHAMELEON, COLL, CLIQUE, PROCLUS, ORCLUS, FINDIT, NMF, Autoencoder | | classification | (Fisher's) Linear discriminant, Quadratic discriminant, Mixture discriminant, Least squares, (Multiclass / Kernel) Ridge, (Complement / Negation / Universal-set / Selective) Naive Bayes (gaussian), AODE, (Fuzzy / Weighted) k-nearest neighbor, Radius neighbor, Nearest centroid, ENN, ENaN, NNBCA, ADAMENN, DANN, IKNN, Decision tree, Random forest, Extra trees, GBDT, XGBoost, ALMA, (Aggressive) ROMMA, (Bounded) Online gradient descent, (Budgeted online) Passive aggressive, RLS, (Selective-sampling) Second order perceptron, AROW, NAROW, Confidence weighted, CELLIP, IELLIP, Normal herd, Stoptron, (Kernelized) Pegasos, MIRA, Forgetron, Projectron, Projectron++, Banditron, Ballseptron, (Multiclass) BSGD, ILK, SILK, (Multinomial) Logistic regression, (Multinomial) Probit, Ordinal regression, SVM, Gaussian process, HMM, CRF, Bayesian Network, LVQ, (Average / Multiclass / Voted / Kernelized / Selective-sampling / Margin / Shifting / Budget / Tighter / Tightest) Perceptron, PAUM, RBP, ADALINE, MADALINE, MLP, LMNN | | semi-supervised classification | k-nearest neighbor, Radius neighbor, Label propagation, Label spreading, k-means, GMM, S3VM, Ladder network | -| regression | Least squares, Ridge, Lasso, Elastic net, RLS, Bayesian linear, Poisson, Least absolute deviations, Huber, Tukey, Least trimmed squares, Least median squares, Lp norm linear, SMA, Deming, Segmented, LOWESS, LOESS, spline, Gaussian process, Principal components, Partial least squares, Projection pursuit, Quantile regression, k-nearest neighbor, Radius neighbor, IDW, Nadaraya Watson, Priestley Chao, Gasser Muller, RBF Network, RVM, Decision tree, Random forest, Extra trees, GBDT, XGBoost, SVR, MLP, GMR, Isotonic, Ramer Douglas Peucker, Theil-Sen, Passing-Bablok, Repeated median | +| regression | Least squares, Ridge, Lasso, Elastic net, RLS, Bayesian linear, Poisson, Least absolute deviations, Huber, Tukey, Least trimmed squares, Least median squares, Lp norm linear, SMA, Deming, Segmented, LOWESS, LOESS, spline, Naive Bayes, Gaussian process, Principal components, Partial least squares, Projection pursuit, Quantile regression, k-nearest neighbor, Radius neighbor, IDW, Nadaraya Watson, Priestley Chao, Gasser Muller, RBF Network, RVM, Decision tree, Random forest, Extra trees, GBDT, XGBoost, SVR, MLP, GMR, Isotonic, Ramer Douglas Peucker, Theil-Sen, Passing-Bablok, Repeated median | | interpolation | Nearest neighbor, IDW, (Spherical) Linear, Brahmagupta, Logarithmic, Cosine, (Inverse) Smoothstep, Cubic, (Centripetal) Catmull-Rom, Hermit, Polynomial, Lagrange, Trigonometric, Spline, RBF Network, Akima, Natural neighbor, Delaunay | | anomaly detection | Percentile, MAD, Tukey's fences, Grubbs's test, Thompson test, Tietjen Moore test, Generalized ESD, Hotelling, MT, MCD, k-nearest neighbor, LOF, COF, ODIN, LDOF, INFLO, LOCI, LoOP, RDF, LDF, KDEOS, RDOS, RKOF, ABOD, PCA, OCSVM, KDE, GMM, Isolation forest, Autoencoder, GAN | | dimensionality reduction | Random projection, (Dual / Kernel / Incremental / Probabilistic) PCA, GPLVM, LSA, MDS, Linear discriminant analysis, NCA, ICA, Principal curve, Sammon, FastMap, Sliced inverse regression, LLE, HLLE, MLLE, Laplacian eigenmaps, Isomap, LTSA, Diffusion map, SNE, t-SNE, UMAP, SOM, GTM, NMF, MOD, K-SVD, Autoencoder, VAE | diff --git a/js/model_selector.js b/js/model_selector.js index 96acad244..5d933b5c2 100644 --- a/js/model_selector.js +++ b/js/model_selector.js @@ -277,6 +277,7 @@ const AIMethods = [ { value: 'lowess', title: 'LOWESS' }, { value: 'loess', title: 'LOESS' }, { value: 'spline', title: 'Spline' }, + { value: 'naive_bayes_regression', title: 'Naive Bayes' }, { value: 'gaussian_process', title: 'Gaussian Process' }, { value: 'pcr', title: 'Principal Components' }, { value: 'pls', title: 'Partial Least Squares' }, diff --git a/js/view/naive_bayes_regression.js b/js/view/naive_bayes_regression.js new file mode 100644 index 000000000..8baf21dfb --- /dev/null +++ b/js/view/naive_bayes_regression.js @@ -0,0 +1,23 @@ +import NaiveBayesRegression from '../../lib/model/naive_bayes_regression.js' +import Controller from '../controller.js' + +export default function (platform) { + platform.setting.ml.usage = 'Click and add data point. Then, click "Fit".' + platform.setting.ml.reference = { + author: 'E. Frank, L. Trigg, G. Holmes, I. H. Witten', + title: 'Naive Bayes for Regression', + year: 1999, + } + const controller = new Controller(platform) + + controller.input.button('Fit').on('click', () => { + const input = platform.trainInput + const model = new NaiveBayesRegression(Array(input[0].length).fill(false)) + model.fit( + input, + platform.trainOutput.map(v => v[0]) + ) + const pred = model.predict(platform.testInput(20)) + platform.testResult(pred) + }) +} diff --git a/lib/model/naive_bayes_regression.js b/lib/model/naive_bayes_regression.js new file mode 100644 index 000000000..fe44f0cfc --- /dev/null +++ b/lib/model/naive_bayes_regression.js @@ -0,0 +1,146 @@ +/** + * Naive bayes regression + */ +export default class NaiveBayesRegression { + // E. Frank, L. Trigg, G. Holmes, I. H. Witten, Technical Note Naive Bayes for Regression (1999) + // https://www.cs.waikato.ac.nz/~eibe/pubs/nbr.pdf + /** + * @param {boolean[]} categoryPositions Category column position + */ + constructor(categoryPositions) { + this._iscat = categoryPositions + this._categories = [] + this._hx = [] + this._hy = [] + this._hk = [] + + this._c_cand = [0.4, 0.5, 0.6, 0.7, 0.8] + this._d = 50 + this._h = null + } + + _gaussian(x) { + return Math.exp(-(x ** 2) / 2) / Math.sqrt(2 * Math.PI) + } + + /** + * Fit model. + * + * @param {Array>} x Training data + * @param {Array} y Target values + */ + fit(x, y) { + this._x = x + this._y = y + + const n = x.length + for (let k = 0; k < this._iscat.length; k++) { + const xk = x.map(v => v[k]) + if (this._iscat[k]) { + this._categories[k] = {} + for (let j = 0; j < n; j++) { + if (!this._categories[k][xk[j]]) { + this._categories[k][xk[j]] = 0 + } + this._categories[k][xk[j]]++ + } + this._hk[k] = {} + for (const vk of Object.keys(this._categories[k])) { + let min_cv = Infinity + this._hk[k][vk] = 1 + for (const ck of this._c_cand) { + const hk = ck / Math.sqrt(this._categories[k][vk]) + let cv = 0 + for (let i = 0; i < n; i++) { + if (xk[i] !== vk) continue + let v = 0 + for (let j = 0; j < n; j++) { + if (i === j || xk[j] !== vk) continue + v += this._gaussian((y[j] - y[i]) / hk) + } + cv += Math.log(v / ((n - 1) * hk)) + } + if (-cv / n < min_cv) { + min_cv = -cv / n + this._hk[k][vk] = hk + } + } + } + } else { + let min_cv = Infinity + this._hx[k] = 0 + this._hy[k] = 0 + for (const cx of this._c_cand) { + const hx = cx / Math.sqrt(n) + for (const cy of this._c_cand) { + const hy = cy / Math.sqrt(n) + let cv = 0 + for (let i = 0; i < n; i++) { + let v = 0 + for (let j = 0; j < n; j++) { + if (i === j) continue + v += this._gaussian((xk[j] - xk[i]) / hx) * this._gaussian((y[j] - y[i]) / hy) + } + cv += Math.log(v / ((n - 1) * hx * hy)) + } + if (-cv / n < min_cv) { + min_cv = -cv / n + this._hx[k] = hx + this._hy[k] = hy + } + } + } + } + } + + this._ymax = -Infinity + this._ymin = Infinity + for (let i = 0; i < n; i++) { + this._ymax = Math.max(this._ymax, y[i]) + this._ymin = Math.min(this._ymin, y[i]) + } + this._h = (this._ymax - this._ymin) / (this._d - 1) + } + + /** + * Returns predicted values. + * + * @param {Array>} x Sample data + * @returns {Array} Predicted values + */ + predict(x) { + const pred = [] + const n = this._x.length + for (let i = 0; i < x.length; i++) { + const pi = [] + const g = [] + for (let t = -Math.floor(this._d / 2); t <= Math.ceil(this._d * 1.5); t++) { + const y = this._ymin + this._h * t + let p = 1 + for (let k = 0; k < this._iscat.length; k++) { + if (this._iscat[k]) { + let pt = 0 + for (let j = 0; j < n; j++) { + if (x[i][k] !== this._x[j][k]) continue + pt += this._gaussian((y - this._y[j]) / this._hk[k][x[i][k]]) + } + p *= pt / (n * this._hk[k][x[i][k]]) + } else { + let pt = 0 + for (let j = 0; j < n; j++) { + pt += + this._gaussian((x[i][k] - this._x[j][k]) / this._hx[k]) * + this._gaussian((y - this._y[j]) / this._hy[k]) + } + p *= pt / (n * this._hx[k] * this._hy[k]) + } + } + pi.push(p) + g.push(y) + } + const s = pi.reduce((s, v) => s + v, 0) + pred[i] = pi.reduce((s, v, k) => s + v * g[k], 0) / s + } + return pred + } +} diff --git a/tests/gui/view/naive_bayes_regression.test.js b/tests/gui/view/naive_bayes_regression.test.js new file mode 100644 index 000000000..693f8884b --- /dev/null +++ b/tests/gui/view/naive_bayes_regression.test.js @@ -0,0 +1,42 @@ +import { getPage } from '../helper/browser' + +describe('regression', () => { + /** @type {Awaited>} */ + let page + beforeEach(async () => { + page = await getPage() + }) + + afterEach(async () => { + await page?.close() + }) + + test('initialize', async () => { + const taskSelectBox = await page.waitForSelector('#ml_selector dl:first-child dd:nth-child(5) select') + await taskSelectBox.selectOption('RG') + const modelSelectBox = await page.waitForSelector('#ml_selector .model_selection #mlDisp') + await modelSelectBox.selectOption('naive_bayes_regression') + const methodMenu = await page.waitForSelector('#ml_selector #method_menu') + const buttons = await methodMenu.waitForSelector('.buttons') + + const fit = await buttons.waitForSelector('input:nth-of-type(1)') + await expect((await fit.getProperty('value')).jsonValue()).resolves.toBe('Fit') + }) + + test('learn', async () => { + const taskSelectBox = await page.waitForSelector('#ml_selector dl:first-child dd:nth-child(5) select') + await taskSelectBox.selectOption('RG') + const modelSelectBox = await page.waitForSelector('#ml_selector .model_selection #mlDisp') + await modelSelectBox.selectOption('naive_bayes_regression') + const methodMenu = await page.waitForSelector('#ml_selector #method_menu') + const buttons = await methodMenu.waitForSelector('.buttons') + + const methodFooter = await page.waitForSelector('#method_footer', { state: 'attached' }) + await expect(methodFooter.evaluate(el => el.textContent)).resolves.toBe('') + + const initButton = await buttons.waitForSelector('input[value=Fit]') + await initButton.evaluate(el => el.click()) + + await expect(methodFooter.evaluate(el => el.textContent)).resolves.toMatch(/^RMSE:[0-9.]+$/) + }) +}) diff --git a/tests/lib/model/naive_bayes_regression.test.js b/tests/lib/model/naive_bayes_regression.test.js new file mode 100644 index 000000000..3a6d9fb75 --- /dev/null +++ b/tests/lib/model/naive_bayes_regression.test.js @@ -0,0 +1,46 @@ +import NaiveBayesRegression from '../../../lib/model/naive_bayes_regression.js' + +import { rmse } from '../../../lib/evaluate/regression.js' + +describe('predict', () => { + test('fit', () => { + const iscat = [true, false, true, false, true] + const model = new NaiveBayesRegression(iscat) + const n = 50 + const x = [] + const t = [] + for (let i = 0; i < n; i++) { + const xi = [] + for (let k = 0; k < 5; k++) { + if (iscat[k]) { + const r = Math.floor(Math.random() * 10) + xi[k] = String.fromCharCode('a'.charCodeAt(0) + r) + } else { + xi[k] = Math.random() * 2 + } + } + x.push(xi) + t.push(0) + } + for (let i = 0; i < n; i++) { + const xi = [] + for (let k = 0; k < 5; k++) { + if (iscat[k]) { + const r = Math.floor(Math.random() * 10 + 9) + xi[k] = String.fromCharCode('a'.charCodeAt(0) + r) + } else { + xi[k] = Math.random() * 2 + 2 + } + } + x.push(xi) + t.push(1) + } + + model.fit(x, t) + + const y = model.predict(x) + expect(y).toHaveLength(x.length) + const err = rmse(y, t) + expect(err).toBeLessThan(0.5) + }) +})