Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 198 additions & 0 deletions lib/model/nns/layer/attention.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
import Matrix from '../../../util/matrix.js'
import Tensor from '../../../util/tensor.js'
import Layer from './base.js'

/**
* Attention layer
*/
export default class AttentionLayer extends Layer {
/**
* @param {object} config object
* @param {number} [config.dk] Inner depth size
* @param {number} [config.dv] Output depth size
* @param {number[][] | Matrix | string} [config.wq] Weight of q
* @param {number[][] | Matrix | string} [config.wk] Weight of k
* @param {number[][] | Matrix | string} [config.wv] Weight of v
*/
constructor({ dk = null, dv = null, wq = null, wk = null, wv = null, ...rest }) {
super(rest)
this._dk = dk
this._dv = dv
this._wq = null
if (typeof wq === 'string') {
this._wqname = wq
} else if (wq) {
this._wq = Matrix.fromArray(wq)
}
this._wk = null
if (typeof wk === 'string') {
this._wkname = wk
} else if (wk) {
this._wk = Matrix.fromArray(wk)
}
this._wv = null
if (typeof wv === 'string') {
this._wvname = wv
} else if (wv) {
this._wv = Matrix.fromArray(wv)
}
}

calc(x, memory) {
this._selfattention = !memory
if (!memory) {
memory = x
}
this._dk ??= x.sizes.at(-1)
if (this._wqname) {
this._wq = this.graph.getNode(this._wqname).outputValue
}
if (!this._wq) {
this._wq = Matrix.randn(x.sizes[2], this._dk)
}
if (this._wkname) {
this._wk = this.graph.getNode(this._wkname).outputValue
}
if (!this._wk) {
this._wk = Matrix.randn(memory.sizes[2], this._dk)
}
this._dv ??= x.sizes.at(-1)
if (this._wvname) {
this._wv = this.graph.getNode(this._wvname).outputValue
}
if (!this._wv) {
this._wv = Matrix.randn(memory.sizes[2], this._dv)
}
this._i = x
this._m = memory
this._q = x.dot(this._wq)
this._k = memory.dot(this._wk)
this._v = memory.dot(this._wv)

const qkt = this._matmul(this._q, this._k, false, true)
this._atn = qkt.copy()
for (let i = 0; i < qkt.sizes[0]; i++) {
for (let j = 0; j < qkt.sizes[1]; j++) {
let tmp = []
for (let k = 0; k < qkt.sizes[2]; k++) {
tmp[k] = qkt.at(i, j, k) / Math.sqrt(this._dk)
}
const m = tmp.reduce((s, v) => Math.max(s, v), -Infinity)
let s = 0
for (let k = 0; k < qkt.sizes[2]; k++) {
tmp[k] = Math.exp(tmp[k] - m)
s += tmp[k]
}
for (let k = 0; k < qkt.sizes[2]; k++) {
this._atn.set([i, j, k], tmp[k] / s)
}
}
}

const o = this._matmul(this._atn, this._v)
return o
}

_matmul(a, b, transpose_a = false, transpose_b = false) {
const sizes = [a.sizes[0], transpose_a ? a.sizes[2] : a.sizes[1], transpose_b ? b.sizes[1] : b.sizes[2]]
const d = transpose_a ? a.sizes[1] : a.sizes[2]
const t = new Tensor(sizes)
for (let i = 0; i < sizes[0]; i++) {
for (let j = 0; j < sizes[1]; j++) {
for (let k = 0; k < sizes[2]; k++) {
let v = 0
for (let s = 0; s < d; s++) {
v +=
(transpose_a ? a.at(i, s, j) : a.at(i, j, s)) *
(transpose_b ? b.at(i, k, s) : b.at(i, s, k))
}
t.set([i, j, k], v)
}
}
}
return t
}

grad(bo) {
const n = bo.sizes[0]
const bv = this._matmul(this._atn, bo, true)
const dwv = this._matmul(this._m, bv, true)
this._dwv = dwv.reduce((a, b) => a + b, 0, 0).toMatrix()
this._dwv.map(v => v / n)

const batn = this._matmul(bo, this._v, false, true)
const blog = batn.copy()
for (let t = 0; t < batn.sizes[0]; t++) {
for (let i = 0; i < batn.sizes[1]; i++) {
for (let j = 0; j < batn.sizes[2]; j++) {
const vtij = batn.at(t, i, j)
let b = 0
for (let k = 0; k < batn.sizes[2]; k++) {
const v = j === k ? 1 - vtij : -vtij
b += this._atn.at(t, i, k) * v * batn.at(t, i, k)
}
blog.set([t, i, j], b / Math.sqrt(this._dk))
}
}
}

const bq = this._matmul(blog, this._k)
const dwq = this._matmul(this._i, bq, true)
this._dwq = dwq.reduce((a, b) => a + b, 0, 0).toMatrix()
this._dwq.map(v => v / n)
const bi = bq.dot(this._wq.t)

const bk = this._matmul(blog, this._q, true)
const dwk = this._matmul(this._m, bk, true)
this._dwk = dwk.reduce((a, b) => a + b, 0, 0).toMatrix()
this._dwk.map(v => v / n)

const bm = bk.dot(this._wk.t)
bm.broadcastOperate(bv.dot(this._wv.t), (a, b) => a + b)

if (this._selfattention) {
bi.broadcastOperate(bm, (a, b) => a + b)
}

if (this._wqname || this._wkname || this._wvname) {
const gp = {}
if (this._wqname) {
gp[this._wqname] = this._dwq
}
if (this._wkname) {
gp[this._wkname] = this._dwk
}
if (this._wvname) {
gp[this._wvname] = this._dwv
}
return this._selfattention ? [bi, gp] : [bi, bm, gp]
}

return this._selfattention ? bi : [bi, bm]
}

update(optimizer) {
if (!this._wqname) {
this._wq.sub(optimizer.delta('wq', this._dwq))
}
if (!this._wkname) {
this._wk.sub(optimizer.delta('wk', this._dwk))
}
if (!this._wvname) {
this._wv.sub(optimizer.delta('wv', this._dwv))
}
}

toObject() {
return {
type: 'attention',
dk: this._dk,
dv: this._dv,
wq: this._wqname || this._wq?.toArray(),
wk: this._wkname || this._wk?.toArray(),
wv: this._wvname || this._wv?.toArray(),
}
}
}

AttentionLayer.registLayer()
67 changes: 67 additions & 0 deletions lib/model/nns/layer/embedding.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import Matrix from '../../../util/matrix.js'
import Tensor from '../../../util/tensor.js'
import Layer from './base.js'

/**
* Embedding layer
*/
export default class EmbeddingLayer extends Layer {
/**
* @param {object} config object
* @param {number} [config.size=512] size
* @param {object} [config.embeddings] embedding vectors
*/
constructor({ size = 512, embeddings = {}, ...rest }) {
super(rest)
this._size = size
this._v = embeddings
}

calc(x) {
this._i = x
const size = [...x.sizes, this._size]
const o = size.length === 2 ? new Matrix(...size) : new Tensor(size)
for (let i = 0; i < x.length; i++) {
if (!this._v[x.value[i]]) {
this._v[x.value[i]] = Matrix.randn(1, this._size)
}
for (let k = 0; k < this._size; k++) {
o.value[i * this._size + k] = this._v[x.value[i]].at(0, k)
}
}
return o
}

grad(bo) {
this._dw = {}
for (let i = 0; i < this._i.length; i++) {
for (let k = 0; k < this._size; k++) {
if (!this._dw[this._i.value[i]]) {
this._dw[this._i.value[i]] = Matrix.zeros(1, this._size)
}
this._dw[this._i.value[i]].add(
new Matrix(1, this._size, bo.value.slice(i * this._size, (i + 1) * this._size))
)
}
}
const bi = this._i.copy()
bi.fill(0)
return bi
}

update(optimizer) {
for (const w of Object.keys(this._dw)) {
this._v[w].sub(optimizer.delta(w, this._dw[w]))
}
}

toObject() {
return {
type: 'embedding',
size: this._size,
embeddings: this._v,
}
}
}

EmbeddingLayer.registLayer()
6 changes: 6 additions & 0 deletions lib/model/nns/layer/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export { default as AdaptivePiecewiseLinearLayer } from './apl.js'
export { default as ArandaLayer } from './aranda.js'
export { default as ArgmaxLayer } from './argmax.js'
export { default as ArgminLayer } from './argmin.js'
export { default as AttentionLayer } from './attention.js'
export { default as AveragePoolLayer } from './averagepool.js'
export { default as BatchNormalizationLayer } from './batch_normalization.js'
export { default as BimodalDerivativeAdaptiveActivationLayer } from './bdaa.js'
Expand All @@ -20,6 +21,7 @@ export { default as DetachLayer } from './detach.js'
export { default as DropoutLayer } from './dropout.js'
export { default as ElasticELULayer } from './eelu.js'
export { default as ELULayer } from './elu.js'
export { default as EmbeddingLayer } from './embedding.js'
export { default as ElasticReLULayer } from './erelu.js'
export { default as ESwishLayer } from './eswish.js'
export { default as FastELULayer } from './felu.js'
Expand All @@ -40,6 +42,7 @@ export { default as IdentityLayer } from './identity.js'
export { default as IncludeLayer } from './include.js'
export { default as InputLayer } from './input.js'
export { default as ImprovedSigmoidLayer } from './isigmoid.js'
export { default as LayerNormalizationLayer } from './layer_normalization.js'
export { default as LeakyReLULayer } from './leaky_relu.js'
export { default as LogSoftmaxLayer } from './logsoftmax.js'
export { default as LpPoolLayer } from './lppool.js'
Expand Down Expand Up @@ -115,6 +118,7 @@ import Tensor from '../../../util/tensor.js'
* { type: 'asinh' } |
* { type: 'atan' } |
* { type: 'atanh' } |
* { type: 'attention', dk?: number, dv?: number, wq?: number[][] | Matrix | string, wk?: number[][] | Matrix | string, wv?: number[][] | Matrix | string } |
* { type: 'average_pool', kernel: number | number[], stride?: number | number[], padding?: number | number[], channel_dim?: number } |
* { type: 'batch_normalization', scale?: number | number[] | string, offset?: number | number[] | string, epsilon?: number, channel_dim?: number, input_mean?: number[] | string, input_var?: number[] | string } |
* { type: 'bdaa', alpha?: number } |
Expand Down Expand Up @@ -144,6 +148,7 @@ import Tensor from '../../../util/tensor.js'
* { type: 'elish' } |
* { type: 'elliott' } |
* { type: 'elu', a?: number } |
* { type: 'embedding', size?: number, embeddings?: object } |
* { type: 'equal' } |
* { type: 'erelu' } |
* { type: 'erf' } |
Expand Down Expand Up @@ -175,6 +180,7 @@ import Tensor from '../../../util/tensor.js'
* { type: 'is_inf' } |
* { type: 'is_nan' } |
* { type: 'isigmoid', a?: number, alpha?: number } |
* { type: 'layer_normalization', axis?: number, epsilon?: number, scale?: number | number[] | string, offset?: number | number[] | string } |
* { type: 'leaky_relu', a?: number } |
* { type: 'left_bitshift' } |
* { type: 'less' } |
Expand Down
Loading