diff --git a/lib/model/nns/layer/conv.js b/lib/model/nns/layer/conv.js index 0692a6c99..45bbf124e 100644 --- a/lib/model/nns/layer/conv.js +++ b/lib/model/nns/layer/conv.js @@ -12,10 +12,9 @@ export default class ConvLayer extends Layer { * @param {number | number[]} [config.stride] Step of stride * @param {number | number[]} [config.padding] size of padding * @param {number[][] | Tensor | string} [config.w] Weight of kernel - * @param {string} [config.activation] Name of activation + * @param {string | object} [config.activation] Name of activation or activation layer object * @param {number} [config.l2_decay=0] L2 decay * @param {number} [config.l1_decay=0] L1 decay - * @param {object} [config.activation_params] Parameters of activation * @param {number} [config.channel_dim=-1] Dimension of the channel */ constructor({ @@ -27,7 +26,6 @@ export default class ConvLayer extends Layer { activation = null, l2_decay = 0, l1_decay = 0, - activation_params = {}, channel_dim = -1, ...rest }) { @@ -52,9 +50,10 @@ export default class ConvLayer extends Layer { this._out_channel = this._w.sizes[0] } } - this._activation = activation - if (activation) { - this._activation_func = Layer.fromObject({ type: activation, ...activation_params }) + if (typeof activation === 'string') { + this._activation = Layer.fromObject({ type: activation }) + } else if (activation) { + this._activation = Layer.fromObject(activation) } this._l2_decay = l2_decay this._l1_decay = l1_decay @@ -153,16 +152,16 @@ export default class ConvLayer extends Layer { } while (idx.some(v => v > 0)) } } - if (this._activation_func) { - return this._activation_func.calc(this._o) + if (this._activation) { + return this._activation.calc(this._o) } return this._o } grad(bo) { this._bo = bo - if (this._activation_func) { - this._bo = this._activation_func.grad(bo) + if (this._activation) { + this._bo = this._activation.grad(bo) } this._bi = new Tensor(this._i.sizes) this._dw = new Tensor(this._w.sizes) @@ -225,10 +224,9 @@ export default class ConvLayer extends Layer { kernel: this._kernel, stride: this._stride, padding: this._padding, - activation: this._activation, + activation: this._activation?.toObject(), l2_decay: this._l2_decay, l1_decay: this._l1_decay, - activation_params: this._activation_func?.toObject(), channel_dim: this._channel_dim, } } diff --git a/lib/model/nns/layer/full.js b/lib/model/nns/layer/full.js index 51433d1b1..008ace137 100644 --- a/lib/model/nns/layer/full.js +++ b/lib/model/nns/layer/full.js @@ -10,10 +10,9 @@ export default class FullyConnected extends Layer { * @param {number | string} config.out_size Size of output * @param {number[][] | Matrix | string} [config.w] Weight of kernel * @param {number[][] | Matrix | string} [config.b] Weight of kernel - * @param {string} [config.activation] Name of activation + * @param {string | object} [config.activation] Name of activation or activation layer object * @param {number} [config.l2_decay=0] L2 decay * @param {number} [config.l1_decay=0] L1 decay - * @param {object} [config.activation_params] Parameters of activation */ constructor({ out_size, @@ -22,7 +21,6 @@ export default class FullyConnected extends Layer { activation = null, l2_decay = 0, l1_decay = 0, - activation_params = {}, ...rest }) { super(rest) @@ -39,9 +37,10 @@ export default class FullyConnected extends Layer { } else if (b) { this._b = Matrix.fromArray(b) } - this._activation = activation - if (activation) { - this._activation_func = Layer.fromObject({ type: activation, ...activation_params }) + if (typeof activation === 'string') { + this._activation = Layer.fromObject({ type: activation }) + } else if (activation) { + this._activation = Layer.fromObject(activation) } this._l2_decay = l2_decay this._l1_decay = l1_decay @@ -72,15 +71,15 @@ export default class FullyConnected extends Layer { this._i = x this._o = x.dot(this._w) this._o.broadcastOperate(this._b, (a, b) => a + b) - if (this._activation_func) { - return this._activation_func.calc(this._o) + if (this._activation) { + return this._activation.calc(this._o) } return this._o } grad(bo) { - if (this._activation_func) { - bo = this._activation_func.grad(bo) + if (this._activation) { + bo = this._activation.grad(bo) } let i = this._i @@ -137,10 +136,9 @@ export default class FullyConnected extends Layer { out_size: this._out_size, w: this._wname || this._w?.toArray(), b: this._bname || this._b?.toArray(), - activation: this._activation, + activation: this._activation?.toObject(), l2_decay: this._l2_decay, l1_decay: this._l1_decay, - activation_params: this._activation_func?.toObject(), } } } diff --git a/lib/model/nns/layer/index.js b/lib/model/nns/layer/index.js index 5c5505803..b0900706d 100644 --- a/lib/model/nns/layer/index.js +++ b/lib/model/nns/layer/index.js @@ -135,7 +135,7 @@ import Tensor from '../../../util/tensor.js' * { type: 'concat', axis?: number } | * { type: 'cond' } | * { type: 'const', value: number } | - * { type: 'conv', kernel: number | number[], channel?: number, stride?: number | number[], padding?: number | number[], w?: number[][] | Tensor | string, activation?: string, l2_decay?: number, l1_decay?: number, activation_params?: object, channel_dim?: number } | + * { type: 'conv', kernel: number | number[], channel?: number, stride?: number | number[], padding?: number | number[], w?: number[][] | Tensor | string, activation?: string | object, l2_decay?: number, l1_decay?: number, channel_dim?: number } | * { type: 'cos' } | * { type: 'cosh' } | * { type: 'crelu' } | @@ -156,7 +156,7 @@ import Tensor from '../../../util/tensor.js' * { type: 'flatten' } | * { type: 'floor' } | * { type: 'frelu', b?: number } | - * { type: 'full', out_size: number | string, w?: number[][] | Matrix | string, b?: number[][] | Matrix | string, activation?: string, l2_decay?: number, l1_decay?: number, activation_params?: object } | + * { type: 'full', out_size: number | string, w?: number[][] | Matrix | string, b?: number[][] | Matrix | string, activation?: string | object, l2_decay?: number, l1_decay?: number } | * { type: 'gaussian' } | * { type: 'gelu' } | * { type: 'global_average_pool', channel_dim?: number } | @@ -230,7 +230,7 @@ import Tensor from '../../../util/tensor.js' * { type: 'reu' } | * { type: 'reverse', axis?: number } | * { type: 'right_bitshift' } | - * { type: 'rnn', size: number, out_size?: number, activation?: string, recurrent_activation?: string, return_sequences?: boolean, w_xh?: number[][] | Matrix, w_hh?: number[][] | Matrix, w_hy?: number[][] | Matrix, b_xh?: number[][] | Matrix, b_hh?: number[][] | Matrix, b_hy?: number[][] | Matrix, activation_params?: object, recurrent_activation_params?: object } | + * { type: 'rnn', size: number, out_size?: number, activation?: string | object, recurrent_activation?: string | object, return_sequences?: boolean, w_xh?: number[][] | Matrix, w_hh?: number[][] | Matrix, w_hy?: number[][] | Matrix, b_xh?: number[][] | Matrix, b_hh?: number[][] | Matrix, b_hy?: number[][] | Matrix } | * { type: 'rootsig' } | * { type: 'round' } | * { type: 'rrelu', l?: number, u?: number } | diff --git a/lib/model/nns/layer/rnn.js b/lib/model/nns/layer/rnn.js index c9c83965b..b64e8fd08 100644 --- a/lib/model/nns/layer/rnn.js +++ b/lib/model/nns/layer/rnn.js @@ -10,8 +10,8 @@ export default class RNNLayer extends Layer { * @param {object} config object * @param {number} config.size Size of recurrent * @param {number} [config.out_size] Size of output - * @param {string} [config.activation=tanh] Activation function of output - * @param {string} [config.recurrent_activation=sigmoid] Activation function of recurrent + * @param {string | object} [config.activation=tanh] Name of activation or activation layer object + * @param {string | object} [config.recurrent_activation=sigmoid] Activation function or activation layer object of recurrent * @param {boolean} [config.return_sequences=false] Return sequences or not * @param {number[][] | Matrix} [config.w_xh] Weight from input to sequence * @param {number[][] | Matrix} [config.w_hh] Weight from sequence to sequence @@ -19,8 +19,6 @@ export default class RNNLayer extends Layer { * @param {number[][] | Matrix} [config.b_xh] Bias from input to sequence * @param {number[][] | Matrix} [config.b_hh] Bias from sequence to sequence * @param {number[][] | Matrix} [config.b_hy] Bias from sequence to output - * @param {object} [config.activation_params] Parameters of activation - * @param {object} [config.recurrent_activation_params] Parameters of recurrent activation */ constructor({ size, @@ -34,8 +32,6 @@ export default class RNNLayer extends Layer { b_xh = null, b_hh = null, b_hy = null, - activation_params = {}, - recurrent_activation_params = {}, ...rest }) { super(rest) @@ -51,12 +47,12 @@ export default class RNNLayer extends Layer { w_hh, b_xh, b_hh, - recurrent_activation_params, }) - this._activation = activation - if (activation) { - this._activation_func = Layer.fromObject({ type: activation, ...activation_params }) + if (typeof activation === 'string') { + this._activation = Layer.fromObject({ type: activation }) + } else if (activation) { + this._activation = Layer.fromObject(activation) } this._return_sequences = return_sequences @@ -75,9 +71,9 @@ export default class RNNLayer extends Layer { this._z[k] = this._unit.calc(this._i[k], k) this._o[k] = this._z[k].dot(this._w_hy) this._o[k].add(this._b_hy) - if (this._activation_func) { + if (this._activation) { this._v[k] = this._o[k] - this._o[k] = this._activation_func.calc(this._o[k]) + this._o[k] = this._activation.calc(this._o[k]) } } if (this._return_sequences) { @@ -102,11 +98,11 @@ export default class RNNLayer extends Layer { } else { this._bo[s - 1] = bo } - if (this._activation_func) { + if (this._activation) { this._bo = this._bo.map((bo, i) => { if (bo) { - this._activation_func.calc(this._v[i]) - return this._activation_func.grad(bo) + this._activation.calc(this._v[i]) + return this._activation.grad(bo) } return bo }) @@ -152,8 +148,7 @@ export default class RNNLayer extends Layer { return_sequences: this._return_sequences, w_hy: this._w_hy.toArray(), b_hy: this._b_hy.toArray(), - activation: this._activation, - activation_params: this._activation_func?.toObject(), + activation: this._activation?.toObject(), ...this._unit.toObject(), } } @@ -167,7 +162,6 @@ class RNNUnitLayer extends Layer { w_hh = null, b_xh = null, b_hh = null, - recurrent_activation_params = {}, ...rest }) { super(rest) @@ -184,12 +178,10 @@ class RNNUnitLayer extends Layer { this._bo = [] this._bh = [] - this._recurrent_activation = recurrent_activation - if (recurrent_activation) { - this._recurrent_activation_func = Layer.fromObject({ - type: recurrent_activation, - ...recurrent_activation_params, - }) + if (typeof recurrent_activation === 'string') { + this._recurrent_activation = Layer.fromObject({ type: recurrent_activation }) + } else if (recurrent_activation) { + this._recurrent_activation = Layer.fromObject(recurrent_activation) } } @@ -207,9 +199,9 @@ class RNNUnitLayer extends Layer { this._z[k].add(pre_z.dot(this._w_hh)) this._z[k].add(this._b_xh) this._z[k].add(this._b_hh) - if (this._recurrent_activation_func) { + if (this._recurrent_activation) { this._u[k] = this._z[k] - this._z[k] = this._recurrent_activation_func.calc(this._z[k]) + this._z[k] = this._recurrent_activation.calc(this._z[k]) } return this._z[k] } @@ -225,9 +217,9 @@ class RNNUnitLayer extends Layer { if (k < s - 1) { this._bh[k].add(this._bh[k + 1].dot(this._w_hh.t)) } - if (this._recurrent_activation_func) { - this._recurrent_activation_func.calc(this._u[k]) - this._bh[k] = this._recurrent_activation_func.grad(this._bh[k]) + if (this._recurrent_activation) { + this._recurrent_activation.calc(this._u[k]) + this._bh[k] = this._recurrent_activation.grad(this._bh[k]) } return this._bh[k].dot(this._w_xh.t) } @@ -271,8 +263,7 @@ class RNNUnitLayer extends Layer { w_hh: this._w_hh.toArray(), b_xh: this._b_xh.toArray(), b_hh: this._b_hh.toArray(), - recurrent_activation: this._recurrent_activation, - recurrent_activation_params: this._recurrent_activation_func?.toObject(), + recurrent_activation: this._recurrent_activation?.toObject(), } } } diff --git a/tests/lib/model/nns/layer/conv.test.js b/tests/lib/model/nns/layer/conv.test.js index cb7c63290..bbcfb2f98 100644 --- a/tests/lib/model/nns/layer/conv.test.js +++ b/tests/lib/model/nns/layer/conv.test.js @@ -27,6 +27,22 @@ describe('layer', () => { expect(() => layer.calc(x)).toThrow() }) + test('string activation', () => { + const layer = new ConvLayer({ kernel: 3, padding: 1, activation: 'sigmoid' }) + + const x = Tensor.randn([10, 3, 1]) + const y = layer.calc(x) + expect(y.sizes).toEqual([10, 3, 2]) + }) + + test('object activation', () => { + const layer = new ConvLayer({ kernel: 3, padding: 1, activation: { type: 'sigmoid' } }) + + const x = Tensor.randn([10, 3, 1]) + const y = layer.calc(x) + expect(y.sizes).toEqual([10, 3, 2]) + }) + describe('1d', () => { test('kernel:1-2-4 stride:1 padding:0', () => { const layer = new ConvLayer({ kernel: 2, stride: 1, w: Tensor.ones([4, 1, 2]) }) @@ -363,6 +379,17 @@ describe('layer', () => { expect(bi.sizes).toEqual([10, 3, 3, 2]) }) + test('with object activation', () => { + const layer = new ConvLayer({ kernel: 3, padding: 1, activation: { type: 'tanh' } }) + + const x = Tensor.randn([10, 3, 3, 2]) + layer.calc(x) + + const bo = Tensor.randn([10, 3, 3, 4]) + const bi = layer.grad(bo) + expect(bi.sizes).toEqual([10, 3, 3, 2]) + }) + test('channel 1', () => { const layer = new ConvLayer({ kernel: 3, padding: 1, channel_dim: 1 }) @@ -383,7 +410,6 @@ describe('layer', () => { type: 'conv', kernel: 3, padding: 1, - activation: null, channel: null, l1_decay: 0, l2_decay: 0, diff --git a/tests/lib/model/nns/layer/full.test.js b/tests/lib/model/nns/layer/full.test.js index 79f25b7ae..89d8ae521 100644 --- a/tests/lib/model/nns/layer/full.test.js +++ b/tests/lib/model/nns/layer/full.test.js @@ -19,6 +19,22 @@ describe('layer', () => { expect(y.sizes).toEqual([100, 4]) }) + test('string activation', () => { + const layer = new FullLayer({ out_size: 4, activation: 'sigmoid' }) + + const x = Matrix.randn(100, 10) + const y = layer.calc(x) + expect(y.sizes).toEqual([100, 4]) + }) + + test('object activation', () => { + const layer = new FullLayer({ out_size: 4, activation: { type: 'sigmoid' } }) + + const x = Matrix.randn(100, 10) + const y = layer.calc(x) + expect(y.sizes).toEqual([100, 4]) + }) + test('tensor', () => { const layer = new FullLayer({ out_size: 4 }) @@ -40,6 +56,28 @@ describe('layer', () => { expect(bi.sizes).toEqual([100, 10]) }) + test('string activation', () => { + const layer = new FullLayer({ out_size: 4, activation: 'sigmoid' }) + + const x = Matrix.randn(100, 10) + layer.calc(x) + + const bo = Matrix.ones(100, 4) + const bi = layer.grad(bo) + expect(bi.sizes).toEqual([100, 10]) + }) + + test('object activation', () => { + const layer = new FullLayer({ out_size: 4, activation: { type: 'sigmoid' } }) + + const x = Matrix.randn(100, 10) + layer.calc(x) + + const bo = Matrix.ones(100, 4) + const bi = layer.grad(bo) + expect(bi.sizes).toEqual([100, 10]) + }) + test('tensor', () => { const layer = new FullLayer({ out_size: 4 }) @@ -57,7 +95,7 @@ describe('layer', () => { const obj = layer.toObject() expect(obj.type).toBe('full') - expect(obj.activation).toBeNull() + expect(obj.activation).toBeUndefined() expect(obj.l1_decay).toBe(0) expect(obj.l2_decay).toBe(0) expect(obj.out_size).toBe(4) diff --git a/tests/lib/model/nns/layer/rnn.test.js b/tests/lib/model/nns/layer/rnn.test.js index 5edf078dc..a87859985 100644 --- a/tests/lib/model/nns/layer/rnn.test.js +++ b/tests/lib/model/nns/layer/rnn.test.js @@ -41,6 +41,14 @@ describe('layer', () => { const y = layer.calc(x) expect(y.sizes).toEqual([10, 4]) }) + + test('tensor object activation', () => { + const layer = new RNNLayer({ size: 4, activation: { type: 'sigmoid' } }) + + const x = Tensor.randn([10, 7, 5]) + const y = layer.calc(x) + expect(y.sizes).toEqual([10, 4]) + }) }) describe('grad', () => { @@ -76,6 +84,17 @@ describe('layer', () => { const bi = layer.grad(bo) expect(bi.sizes).toEqual([10, 7, 5]) }) + + test('object activation', () => { + const layer = new RNNLayer({ size: 4, activation: { type: 'sigmoid' } }) + + const x = Tensor.randn([10, 7, 5]) + layer.calc(x) + + const bo = Matrix.ones(10, 4) + const bi = layer.grad(bo) + expect(bi.sizes).toEqual([10, 7, 5]) + }) }) test('toObject', () => {