|
| 1 | +import Matrix from '../util/matrix.js' |
| 2 | +import Tensor from '../util/tensor.js' |
| 3 | +import NeuralNetwork from './neuralnetwork.js' |
| 4 | + |
| 5 | +/** |
| 6 | + * Diffusion model network |
| 7 | + */ |
| 8 | +export default class DiffusionModel { |
| 9 | + // https://qiita.com/pocokhc/items/5a015ee5b527a357dd67 |
| 10 | + /** |
| 11 | + * @param {number} timesteps Number of timestep |
| 12 | + * @param {LayerObject[]} [layers] Layers |
| 13 | + */ |
| 14 | + constructor(timesteps, layers) { |
| 15 | + this._timesteps = timesteps |
| 16 | + this._ulayers = layers |
| 17 | + this._peDims = 32 |
| 18 | + |
| 19 | + this._model = null |
| 20 | + this._epoch = 0 |
| 21 | + |
| 22 | + const betaStart = 0.0001 |
| 23 | + const betaEnd = 0.02 |
| 24 | + const betaStep = (betaEnd - betaStart) / (this._timesteps - 1) |
| 25 | + this._beta = [betaStart] |
| 26 | + for (let t = 1; t < this._timesteps - 1; t++) { |
| 27 | + this._beta[t] = betaStart + betaStep * t |
| 28 | + } |
| 29 | + this._beta.push(betaEnd) |
| 30 | + this._alpha = [1 - this._beta[0]] |
| 31 | + this._alphaCumprod = [this._alpha[0]] |
| 32 | + for (let t = 1; t < this._beta.length; t++) { |
| 33 | + this._alpha[t] = 1 - this._beta[t] |
| 34 | + this._alphaCumprod[t] = this._alphaCumprod[t - 1] * this._alpha[t] |
| 35 | + } |
| 36 | + } |
| 37 | + |
| 38 | + /** |
| 39 | + * Epoch |
| 40 | + * @type {number} |
| 41 | + */ |
| 42 | + get epoch() { |
| 43 | + return this._epoch |
| 44 | + } |
| 45 | + |
| 46 | + _addNoise(x, t) { |
| 47 | + const at = this._alphaCumprod[t] |
| 48 | + const sqrtat = Math.sqrt(at) |
| 49 | + const sqrt1at = Math.sqrt(1 - at) |
| 50 | + const noize = Tensor.randn(x.sizes) |
| 51 | + const xNoised = x.copy() |
| 52 | + xNoised.broadcastOperate(noize, (a, b) => sqrtat * a + sqrt1at * b) |
| 53 | + return [xNoised, noize] |
| 54 | + } |
| 55 | + |
| 56 | + _build() { |
| 57 | + if (this._dataShape.length === 1) { |
| 58 | + this._layers = [ |
| 59 | + { type: 'input', name: 'x' }, |
| 60 | + { type: 'input', name: 'position_encoding' }, |
| 61 | + { type: 'full', out_size: this._peDims, l2_decay: 0.001, activation: 'gelu', name: 'pe' }, |
| 62 | + { type: 'concat', input: ['x', 'pe'], axis: 1 }, |
| 63 | + ] |
| 64 | + if (this._ulayers) { |
| 65 | + this._layers.push(...this._ulayers) |
| 66 | + } else { |
| 67 | + this._layers.push( |
| 68 | + { type: 'full', out_size: 32, l2_decay: 0.001, name: 'c1', activation: 'tanh' }, |
| 69 | + { type: 'full', out_size: 16, l2_decay: 0.001, activation: 'tanh' }, |
| 70 | + { type: 'full', out_size: 32, l2_decay: 0.001, name: 'u1', activation: 'tanh' }, |
| 71 | + { type: 'concat', input: ['u1', 'c1'], axis: 1 }, |
| 72 | + { type: 'full', out_size: 32, l2_decay: 0.001, activation: 'tanh' } |
| 73 | + ) |
| 74 | + } |
| 75 | + this._layers.push({ type: 'full', out_size: this._dataShape[0], l2_decay: 0.001 }, { type: 'output' }) |
| 76 | + } else { |
| 77 | + const dim = this._dataShape.length |
| 78 | + this._layers = [ |
| 79 | + { type: 'input', name: 'x' }, |
| 80 | + { type: 'input', name: 'position_encoding' }, |
| 81 | + { type: 'full', out_size: this._peDims, l2_decay: 0.001, activation: 'gelu' }, |
| 82 | + { type: 'reshape', size: [...Array(dim - 1).fill(1), this._peDims] }, |
| 83 | + { type: 'up_sampling', size: this._dataShape.slice(0, dim - 1), name: 'pe' }, |
| 84 | + { type: 'concat', input: ['x', 'pe'], axis: dim }, |
| 85 | + ] |
| 86 | + if (this._ulayers) { |
| 87 | + this._layers.push(...this._ulayers) |
| 88 | + } else { |
| 89 | + this._layers.push( |
| 90 | + { |
| 91 | + type: 'conv', |
| 92 | + kernel: 3, |
| 93 | + channel: 16, |
| 94 | + padding: 1, |
| 95 | + l2_decay: 0.001, |
| 96 | + name: 'c1', |
| 97 | + activation: 'relu', |
| 98 | + }, |
| 99 | + { type: 'max_pool', kernel: 2 }, |
| 100 | + { type: 'conv', kernel: 3, channel: 32, padding: 1, l2_decay: 0.001, activation: 'relu' }, |
| 101 | + { type: 'up_sampling', size: 2, name: 'u1' }, |
| 102 | + { type: 'concat', input: ['u1', 'c1'], axis: dim }, |
| 103 | + { type: 'conv', kernel: 3, channel: 16, padding: 1, l2_decay: 0.001, activation: 'relu' } |
| 104 | + ) |
| 105 | + } |
| 106 | + this._layers.push( |
| 107 | + { type: 'conv', kernel: 1, channel: this._dataShape[dim - 1], l2_decay: 0.001 }, |
| 108 | + { type: 'output' } |
| 109 | + ) |
| 110 | + } |
| 111 | + |
| 112 | + return NeuralNetwork.fromObject(this._layers, 'mse', 'adam') |
| 113 | + } |
| 114 | + |
| 115 | + _positionEncoding(t, embdims) { |
| 116 | + const rates = Array.from({ length: embdims }, (_, i) => t / 10000 ** (2 * Math.floor(i / 2)) / embdims) |
| 117 | + const pe = rates.map((v, i) => (i % 2 === 0 ? Math.sin(v) : Math.cos(v))) |
| 118 | + return new Matrix(1, embdims, pe) |
| 119 | + } |
| 120 | + |
| 121 | + /** |
| 122 | + * Fit model. |
| 123 | + * @param {Array<Array<number>>} train_x Training data |
| 124 | + * @param {number} iteration Iteration count |
| 125 | + * @param {number} rate Learning rate |
| 126 | + * @param {number} batch Batch size |
| 127 | + * @returns {{labeledLoss: number, unlabeledLoss: number}} Loss value |
| 128 | + */ |
| 129 | + fit(train_x, iteration, rate, batch) { |
| 130 | + const x = Tensor.fromArray(train_x) |
| 131 | + this._dataShape = x.sizes.slice(1) |
| 132 | + if (!this._model) { |
| 133 | + this._model = this._build() |
| 134 | + } |
| 135 | + let loss = null |
| 136 | + for (let i = 0; i < iteration; i++) { |
| 137 | + const t = Math.floor(Math.random() * this._timesteps) |
| 138 | + const pe = this._positionEncoding(t, this._peDims) |
| 139 | + pe.repeat(x.sizes[0], 0) |
| 140 | + const [noised_x, noise] = this._addNoise(x, t) |
| 141 | + |
| 142 | + loss = this._model.fit({ x: noised_x, position_encoding: pe }, Tensor.fromArray(noise), 1, rate, batch) |
| 143 | + } |
| 144 | + this._epoch += iteration |
| 145 | + return loss |
| 146 | + } |
| 147 | + |
| 148 | + /** |
| 149 | + * Returns generated data from the model. |
| 150 | + * @param {number} n Number of generated data |
| 151 | + * @returns {Array<Array<number>>} Generated values |
| 152 | + */ |
| 153 | + generate(n) { |
| 154 | + const ds = this._dataShape.concat() |
| 155 | + const samples = Tensor.randn([n, ...ds]) |
| 156 | + for (let t = this._timesteps - 1; t >= 0; t--) { |
| 157 | + const pe = this._positionEncoding(t, this._peDims) |
| 158 | + pe.repeat(n, 0) |
| 159 | + |
| 160 | + const pred = this._model.calc({ x: samples, position_encoding: pe }) |
| 161 | + |
| 162 | + samples.broadcastOperate( |
| 163 | + pred, |
| 164 | + (a, b) => |
| 165 | + (1 / Math.sqrt(this._alpha[t])) * (a - (b * this._beta[t]) / Math.sqrt(1 - this._alphaCumprod[t])) |
| 166 | + ) |
| 167 | + if (t > 0) { |
| 168 | + const s2 = ((1 - this._alphaCumprod[t - 1]) / (1 - this._alphaCumprod[t])) * this._beta[t] |
| 169 | + const noise = Tensor.randn(samples.sizes, 0, s2) |
| 170 | + samples.broadcastOperate(noise, (a, b) => a + b) |
| 171 | + } |
| 172 | + } |
| 173 | + |
| 174 | + return samples.toArray() |
| 175 | + } |
| 176 | +} |
0 commit comments