From b5ba3e1ad5172e8b4a51af5d732e89121b12c535 Mon Sep 17 00:00:00 2001 From: ishii-norimi Date: Wed, 27 Sep 2023 21:01:07 +0900 Subject: [PATCH 1/3] Fix and improve RL environments, and add tests --- js/renderer/rl/draughts.js | 2 +- lib/rl/draughts.js | 108 +++++-- lib/rl/gomoku.js | 20 ++ lib/rl/inhypercube.js | 2 +- lib/rl/reversi.js | 39 ++- tests/lib/rl/acrobot.test.js | 35 +++ tests/lib/rl/base.test.js | 7 + tests/lib/rl/blackjack.test.js | 12 + tests/lib/rl/breaker.test.js | 192 ++++++++++++- tests/lib/rl/draughts.test.js | 470 +++++++++++++++++++++++-------- tests/lib/rl/gomoku.test.js | 167 +++++++---- tests/lib/rl/grid.test.js | 54 +++- tests/lib/rl/inhypercube.test.js | 79 ++++++ tests/lib/rl/mountaincar.test.js | 36 +++ tests/lib/rl/pendulum.test.js | 41 +++ tests/lib/rl/reversi.test.js | 321 ++++++++++++++++----- 16 files changed, 1304 insertions(+), 281 deletions(-) create mode 100644 tests/lib/rl/inhypercube.test.js diff --git a/js/renderer/rl/draughts.js b/js/renderer/rl/draughts.js index 04f4abb0f..b216c0f9f 100644 --- a/js/renderer/rl/draughts.js +++ b/js/renderer/rl/draughts.js @@ -160,7 +160,7 @@ class ManualPlayer { for (let i = 0; i < board.size[0]; i++) { this._check[i] = [] for (let j = 0; j < board.size[1]; j++) { - if ((i + j) % 2 > 0) continue + if ((i + j) % 2 === 0) continue this._check[i][j] = document.createElementNS('http://www.w3.org/2000/svg', 'rect') this._check[i][j].setAttribute('x', dw * j) this._check[i][j].setAttribute('y', dh * i) diff --git a/lib/rl/draughts.js b/lib/rl/draughts.js index 5d9a5dc9e..0ebb56361 100644 --- a/lib/rl/draughts.js +++ b/lib/rl/draughts.js @@ -47,7 +47,7 @@ export default class DraughtsRLEnvironment extends RLEnvironmentBase { ] const checkBound = (x, y) => 0 <= x && x < this._size[0] && 0 <= y && y < this._size[1] for (let i = 0; i < this._size[0]; i++) { - for (let j = i % 2 === 0 ? 0 : 1; j < this._size[1]; j += 2) { + for (let j = i % 2 === 1 ? 0 : 1; j < this._size[1]; j += 2) { let midpath = [] for (const [di, dj] of d) { const i1 = i + di @@ -93,18 +93,14 @@ export default class DraughtsRLEnvironment extends RLEnvironmentBase { get states() { const s = [[RED, WHITE]] for (let i = 0; i < this._size[0]; i++) { - for (let j = 0; j < this._size[1]; j++) { - if (j % 2 === i % 2) { - s.push([ - EMPTY, - DraughtsRLEnvironment.OWN, - DraughtsRLEnvironment.OWN | KING, - DraughtsRLEnvironment.OTHER, - DraughtsRLEnvironment.OTHER | KING, - ]) - } else { - s.push([EMPTY]) - } + for (let j = i % 2 === 0 ? 1 : 0; j < this._size[1]; j += 2) { + s.push([ + EMPTY, + DraughtsRLEnvironment.OWN, + DraughtsRLEnvironment.OWN | KING, + DraughtsRLEnvironment.OTHER, + DraughtsRLEnvironment.OTHER | KING, + ]) } } return s @@ -123,7 +119,7 @@ export default class DraughtsRLEnvironment extends RLEnvironmentBase { _makeState(board, agentturn, gameturn) { const s = [gameturn] for (let i = 0; i < this._size[0]; i++) { - for (let j = 0; j < this._size[1]; j++) { + for (let j = i % 2 === 0 ? 1 : 0; j < this._size[1]; j += 2) { const p = board.at([i, j]) if (p === EMPTY) { s.push(EMPTY) @@ -144,7 +140,7 @@ export default class DraughtsRLEnvironment extends RLEnvironmentBase { const board = new DraughtsBoard(this._size, this._evaluation) const opturn = turn === RED ? WHITE : RED for (let i = 0, p = 1; i < this._size[0]; i++) { - for (let j = 0; j < this._size[1]; j++, p++) { + for (let j = i % 2 === 0 ? 1 : 0; j < this._size[1]; j += 2, p++) { if (state[p] === EMPTY) { board._board[i][j] = EMPTY } else { @@ -241,6 +237,7 @@ class DraughtsBoard { constructor(size, evaluator) { this._evaluator = evaluator this._size = size + this._lines = 3 this.reset() } @@ -280,6 +277,26 @@ class DraughtsBoard { return null } + toString() { + let buf = '' + for (let i = 0; i < this._size[0]; i++) { + for (let j = 0; j < this._size[1]; j++) { + if (j > 0) { + buf += ' ' + } + if (this._board[i][j] === RED) { + buf += 'x' + } else if (this._board[i][j] === WHITE) { + buf += 'o' + } else { + buf += '-' + } + } + buf += '\n' + } + return buf + } + nextTurn(turn) { if (turn === WHITE) { return RED @@ -310,20 +327,44 @@ class DraughtsBoard { } } + _num_to_pos(n) { + if (typeof n !== 'number') { + return n + } + const r = Math.floor((n - 1) / this._size[1]) + const c = (n - 1) % this._size[1] + if (c < (this._size[1] - 1) / 2) { + return [r * 2, c * 2 + 1] + } else { + return [r * 2 + 1, (c - Math.floor(this._size[1] / 2)) * 2] + } + } + at(p) { + if (typeof p === 'number') { + p = this._num_to_pos(p) + } return this._board[p[0]][p[1]] } set(p, turn) { - let piece = this._board[p.from[0]][p.from[1]] + p = { + from: this._num_to_pos(p.from), + path: p.path.map(v => this._num_to_pos(v)), + jump: p.jump.map(v => this._num_to_pos(v)), + } + let piece = this.at(p.from) if (!(turn & piece)) { return false } + if ((p.jump.length !== 0 || p.path.length !== 1) && p.jump.length !== p.path.length) { + return false + } const nturn = this.nextTurn(turn) - if (p.jump.some(([i, j]) => !(this._board[i][j] & nturn))) { + if (p.jump.some(j => !(this.at(j) & nturn))) { return false } - if (p.path.some(([i, j]) => this._board[i][j] !== EMPTY)) { + if (p.path.some(j => this.at(j) !== EMPTY)) { return false } @@ -334,6 +375,27 @@ class DraughtsBoard { } } + if (p.jump.length === 0) { + for (let i = 0; i < 2; i++) { + if (Math.abs(p.from[i] - p.path[0][i]) !== 1) { + return false + } + } + } else { + let pos = p.from + for (let k = 0; k < p.path.length; k++) { + for (let i = 0; i < 2; i++) { + if (Math.abs(pos[i] - p.jump[k][i]) !== 1) { + return false + } + if (Math.abs(p.jump[k][i] - p.path[k][i]) !== 1) { + return false + } + } + pos = p.path[k] + } + } + this._board[p.from[0]][p.from[1]] = EMPTY for (const [i, j] of p.jump) { this._board[i][j] = EMPTY @@ -354,10 +416,10 @@ class DraughtsBoard { this._board[i] = Array(this._size[1]).fill(EMPTY) } for (let i = 0; i < this._size[0]; i++) { - for (let j = 0; j < this._size[1]; j++) { - if (i < 3 && (i + j) % 2 === 0) { + for (let j = i % 2 === 0 ? 1 : 0; j < this._size[1]; j += 2) { + if (i < this._lines) { this._board[i][j] = RED - } else if (this._size[0] - 3 <= i && (i + j) % 2 === 0) { + } else if (this._size[0] - this._lines <= i) { this._board[i][j] = WHITE } } @@ -418,9 +480,9 @@ class DraughtsBoard { cp._board[x + dx * 2][y + dy * 2] = this._board[x][y] cp._board[x][y] = EMPTY cp._board[x + dx][y + dy] = EMPTY - if (turn === RED && x * dx * 2 === this._size[0] - 1) { + if (turn === RED && x + dx * 2 === this._size[0] - 1) { cp._board[x + dx * 2][y + dy * 2] |= KING - } else if (turn === WHITE && x * dx * 2 === 0) { + } else if (turn === WHITE && x + dx * 2 === 0) { cp._board[x + dx * 2][y + dy * 2] |= KING } const npath = cp.allPath(x + dx * 2, y + dy * 2, turn, false) diff --git a/lib/rl/gomoku.js b/lib/rl/gomoku.js index 570663c9e..cf6350b3e 100644 --- a/lib/rl/gomoku.js +++ b/lib/rl/gomoku.js @@ -190,6 +190,26 @@ class GomokuBoard { return null } + toString() { + let buf = '' + for (let i = 0; i < this._size[0]; i++) { + for (let j = 0; j < this._size[1]; j++) { + if (j > 0) { + buf += ' ' + } + if (this._board[i][j] === BLACK) { + buf += 'x' + } else if (this._board[i][j] === WHITE) { + buf += 'o' + } else { + buf += '-' + } + } + buf += '\n' + } + return buf + } + nextTurn(turn) { return turn === BLACK ? WHITE : BLACK } diff --git a/lib/rl/inhypercube.js b/lib/rl/inhypercube.js index 1cb9c726d..9875e356f 100644 --- a/lib/rl/inhypercube.js +++ b/lib/rl/inhypercube.js @@ -68,7 +68,7 @@ export default class InHypercubeRLEnvironment extends RLEnvironmentBase { } const success = p[this._success_dim] <= -this._fail_position - const fail = !success && p.every(v => Math.abs(v) >= this._fail_position) + const fail = !success && p.some(v => Math.abs(v) >= this._fail_position) const done = this.epoch >= this._max_step || success || fail const reward = fail ? this._reward.fail : success ? this._reward.goal : this._reward.step return { diff --git a/lib/rl/reversi.js b/lib/rl/reversi.js index 2606dd0e8..700b95ae7 100644 --- a/lib/rl/reversi.js +++ b/lib/rl/reversi.js @@ -46,7 +46,7 @@ export default class ReversiRLEnvironment extends RLEnvironmentBase { const a = [EMPTY] for (let i = 0; i < this._size[0]; i++) { for (let j = 0; j < this._size[1]; j++) { - a.push(`${i}_${j}`) + a.push(`${String.fromCharCode('a'.charCodeAt(0) + i)}${i + 1}`) } } return [a] @@ -167,8 +167,7 @@ export default class ReversiRLEnvironment extends RLEnvironmentBase { invalid, } } - const choice = action[0].split('_').map(v => +v) - const changed = board.set(choice, agent) + const changed = board.set(action[0], agent) const done = board.finish if (!changed) { return { @@ -233,6 +232,26 @@ class ReversiBoard { return null } + toString() { + let buf = '' + for (let i = 0; i < this._size[0]; i++) { + for (let j = 0; j < this._size[1]; j++) { + if (j > 0) { + buf += ' ' + } + if (this._board[i][j] === BLACK) { + buf += 'x' + } else if (this._board[i][j] === WHITE) { + buf += 'o' + } else { + buf += '-' + } + } + buf += '\n' + } + return buf + } + nextTurn(turn) { return flipPiece(turn) } @@ -260,10 +279,16 @@ class ReversiBoard { } at(p) { + if (typeof p === 'string') { + p = [p[1] - 1, p.charCodeAt(0) - 'a'.charCodeAt(0)] + } return this._board[p[0]][p[1]] } set(p, turn) { + if (typeof p === 'string') { + p = [p[1] - 1, p.charCodeAt(0) - 'a'.charCodeAt(0)] + } const flips = this.flipPositions(p[0], p[1], turn) if (flips.length === 0) { return false @@ -282,10 +307,10 @@ class ReversiBoard { } const cx = Math.floor(this._size[0] / 2) const cy = Math.floor(this._size[1] / 2) - this._board[cx - 1][cy - 1] = BLACK - this._board[cx - 1][cy] = WHITE - this._board[cx][cy - 1] = WHITE - this._board[cx][cy] = BLACK + this._board[cx - 1][cy - 1] = WHITE + this._board[cx - 1][cy] = BLACK + this._board[cx][cy - 1] = BLACK + this._board[cx][cy] = WHITE } choices(turn) { diff --git a/tests/lib/rl/acrobot.test.js b/tests/lib/rl/acrobot.test.js index 7ff59ddef..6c1f0ee99 100644 --- a/tests/lib/rl/acrobot.test.js +++ b/tests/lib/rl/acrobot.test.js @@ -80,6 +80,41 @@ describe('test', () => { expect(info.state[3]).toBeGreaterThan(0) }) + test('small t1, t2', () => { + const env = new AcrobotRLEnvironment() + const info = env.test([-4, -13, 0, 0], [0]) + expect(info.done).toBeFalsy() + expect(info.reward).toBe(-1) + expect(info.state[0]).toBeCloseTo(-4 + 2 * Math.PI) + expect(info.state[1]).toBeCloseTo(-13 + 4 * Math.PI) + expect(info.state[2]).toBeLessThan(0) + expect(info.state[3]).toBeGreaterThan(0) + }) + + test('big t1, t2', () => { + const env = new AcrobotRLEnvironment() + const info = env.test([26, 4, 0, 0], [0]) + expect(info.done).toBeFalsy() + expect(info.reward).toBe(-1) + expect(info.state[0]).toBeCloseTo(26 - 8 * Math.PI) + expect(info.state[1]).toBeCloseTo(4 - 2 * Math.PI) + expect(info.state[2]).toBeLessThan(0) + expect(info.state[3]).toBeGreaterThan(0) + }) + + test('clip dt1, dt2', () => { + const env = new AcrobotRLEnvironment() + const info = env.test([0, 0, -100, 100], [0]) + expect(info.done).toBeFalsy() + expect(info.reward).toBe(-1) + for (let i = 0; i < 2; i++) { + expect(info.state[i]).toBeLessThanOrEqual(Math.PI) + expect(info.state[i]).toBeGreaterThanOrEqual(-Math.PI) + } + expect(info.state[2]).toBeCloseTo(-4 * Math.PI) + expect(info.state[3]).toBeCloseTo(9 * Math.PI) + }) + test('goal', () => { const env = new AcrobotRLEnvironment() const info = env.test([Math.PI, Math.PI / 2, 0, 0], [0]) diff --git a/tests/lib/rl/base.test.js b/tests/lib/rl/base.test.js index 28609e8f2..12ae99020 100644 --- a/tests/lib/rl/base.test.js +++ b/tests/lib/rl/base.test.js @@ -99,6 +99,13 @@ describe('EmptyRLEnvironment', () => { expect(env.states).toEqual([]) }) + test('clone', () => { + const env = new EmptyRLEnvironment() + const clone = env.clone() + expect(clone.actions).toEqual([]) + expect(clone.states).toEqual([]) + }) + test('reset', () => { const env = new EmptyRLEnvironment() const init_state = env.reset() diff --git a/tests/lib/rl/blackjack.test.js b/tests/lib/rl/blackjack.test.js index dd932c175..f05d47577 100644 --- a/tests/lib/rl/blackjack.test.js +++ b/tests/lib/rl/blackjack.test.js @@ -79,4 +79,16 @@ describe('step', () => { expect(info.reward).toBeLessThan(0) expect(info.state).toBeInstanceOf(Array) }) + + test('usableace', () => { + const env = new BlackjackRLEnvironment() + env._player_hands = [ + { suit: 0, value: 1 }, + { suit: 0, value: 2 }, + ] + const info = env.step([0]) + expect(info.done).toBeTruthy() + expect(info.reward).toBeDefined() + expect(info.state).toBeInstanceOf(Array) + }) }) diff --git a/tests/lib/rl/breaker.test.js b/tests/lib/rl/breaker.test.js index 163420b9f..62a8bcc1a 100644 --- a/tests/lib/rl/breaker.test.js +++ b/tests/lib/rl/breaker.test.js @@ -57,4 +57,194 @@ test('step', () => { expect(info.state).toHaveLength(85) }) -test.todo('test') +describe('test', () => { + test('default', () => { + const env = new BreakerRLEnvironment() + const state = env.reset() + + const info = env.test(state, [0]) + expect(info.done).toBeFalsy() + expect(info.reward).toBe(0.1) + expect(info.state).toHaveLength(85) + expect(env.epoch).toBe(0) + }) + + test.each([0, 1000])('bar position: %p', p => { + const env = new BreakerRLEnvironment() + const state = env.reset() + state[4] = p + + const info = env.test(state, [0]) + expect(info.done).toBeFalsy() + expect(info.reward).toBe(0.1) + expect(info.state).toHaveLength(85) + expect(env.epoch).toBe(0) + }) + + test('hit paddle top', () => { + const env = new BreakerRLEnvironment() + const state = env.reset() + state[0] = 100 + state[1] = env._paddle_baseline + 1 + state[2] = 1 + state[3] = -1 + state[4] = 100 + + const info = env.test(state, [0]) + expect(info.done).toBeFalsy() + expect(info.reward).toBe(100) + expect(info.state).toHaveLength(85) + expect(env.epoch).toBe(0) + }) + + test('hit paddle side', () => { + const env = new BreakerRLEnvironment() + const state = env.reset() + state[0] = 100 - env._paddle_size[0] / 2 + state[1] = env._paddle_baseline + state[2] = 1 + state[3] = -1 + state[4] = 100 + + const info = env.test(state, [0]) + expect(info.done).toBeFalsy() + expect(info.reward).toBe(100) + expect(info.state).toHaveLength(85) + expect(info.state[0]).toBe(state[0] + 1) + expect(info.state[1]).toBe(state[1] - 1) + expect(info.state[2]).toBe(-1) + expect(info.state[3]).toBe(-1) + expect(info.state[4]).toBe(100) + expect(env.epoch).toBe(0) + }) + + test('hit side left', () => { + const env = new BreakerRLEnvironment() + const state = env.reset() + state[0] = 0 + state[1] = 100 + state[2] = -1 + state[3] = 1 + state[4] = 100 + + const info = env.test(state, [0]) + expect(info.done).toBeFalsy() + expect(info.reward).toBe(0.1) + expect(info.state).toHaveLength(85) + expect(info.state[0]).toBe(state[0] - 1) + expect(info.state[1]).toBe(state[1] + 1) + expect(info.state[2]).toBe(1) + expect(info.state[3]).toBe(1) + expect(info.state[4]).toBe(100) + expect(env.epoch).toBe(0) + }) + + test('hit side right', () => { + const env = new BreakerRLEnvironment() + const state = env.reset() + state[0] = env._size[0] + state[1] = 100 + state[2] = 1 + state[3] = 1 + state[4] = 100 + + const info = env.test(state, [0]) + expect(info.done).toBeFalsy() + expect(info.reward).toBe(0.1) + expect(info.state).toHaveLength(85) + expect(info.state[0]).toBe(state[0] + 1) + expect(info.state[1]).toBe(state[1] + 1) + expect(info.state[2]).toBe(-1) + expect(info.state[3]).toBe(1) + expect(info.state[4]).toBe(100) + expect(env.epoch).toBe(0) + }) + + test('hit top', () => { + const env = new BreakerRLEnvironment() + const state = env.reset() + state[0] = 100 + state[1] = env._size[1] + state[2] = -1 + state[3] = 1 + state[4] = 100 + + const info = env.test(state, [0]) + expect(info.done).toBeFalsy() + expect(info.reward).toBe(0.1) + expect(info.state).toHaveLength(85) + expect(info.state[0]).toBe(state[0] - 1) + expect(info.state[1]).toBe(state[1] + 1) + expect(info.state[2]).toBe(-1) + expect(info.state[3]).toBe(-1) + expect(info.state[4]).toBe(100) + expect(env.epoch).toBe(0) + }) + + test('hit bottom', () => { + const env = new BreakerRLEnvironment() + const state = env.reset() + state[0] = 100 + state[1] = 0 + state[2] = -1 + state[3] = 1 + state[4] = 100 + + const info = env.test(state, [0]) + expect(info.done).toBeTruthy() + expect(info.reward).toBe(-1000) + expect(info.state).toHaveLength(85) + expect(info.state[0]).toBe(state[0] - 1) + expect(info.state[1]).toBe(state[1] + 1) + expect(info.state[2]).toBe(-1) + expect(info.state[3]).toBe(1) + expect(info.state[4]).toBe(100) + expect(env.epoch).toBe(0) + }) + + test('hit block', () => { + const env = new BreakerRLEnvironment() + const state = env.reset() + state[0] = env._padding[0][0] + state[1] = env._padding[1][0] + state[2] = 1 + state[3] = 1 + state[4] = 100 + expect(state[5]).toBe(1) + + const info = env.test(state, [0]) + expect(info.done).toBeFalsy() + expect(info.reward).toBe(100) + expect(info.state).toHaveLength(85) + expect(info.state[0]).toBe(state[0] + 1) + expect(info.state[1]).toBe(state[1] + 1) + expect(info.state[2]).toBe(-1) + expect(info.state[3]).toBe(1) + expect(info.state[4]).toBe(100) + expect(info.state[5]).toBe(0) + expect(env.epoch).toBe(0) + }) + + test('have breaked block', () => { + const env = new BreakerRLEnvironment() + const state = env.reset() + state[0] = env._padding[0][0] + state[1] = env._padding[1][0] + state[2] = 1 + state[3] = 1 + state[4] = 100 + + const info0 = env.test(state, [0]) + const info = env.test(info0.state, [0]) + expect(info.done).toBeFalsy() + expect(info.reward).toBe(0.1) + expect(info.state).toHaveLength(85) + expect(info.state[0]).toBe(state[0]) + expect(info.state[1]).toBe(state[1] + 2) + expect(info.state[2]).toBe(-1) + expect(info.state[3]).toBe(1) + expect(info.state[4]).toBe(100) + expect(info.state[5]).toBe(0) + expect(env.epoch).toBe(0) + }) +}) diff --git a/tests/lib/rl/draughts.test.js b/tests/lib/rl/draughts.test.js index 9d5d93f84..fe939348c 100644 --- a/tests/lib/rl/draughts.test.js +++ b/tests/lib/rl/draughts.test.js @@ -15,7 +15,28 @@ describe('env', () => { const env = new DraughtsRLEnvironment() expect(env.actions[0]).toHaveLength(1 + 1426) - expect(env.states).toHaveLength(1 + 8 * 8) + expect(env.states).toHaveLength(1 + 8 * 4) + }) + + describe('evaluation', () => { + test('set', () => { + const env = new DraughtsRLEnvironment() + env.evaluation = state => { + expect(state).toHaveLength(1 + 8 * 4) + return 1 + } + + const score = env._board.score() + expect(score).toBe(1) + }) + + test('clear', () => { + const env = new DraughtsRLEnvironment() + env.evaluation = null + + const score = env._board.score() + expect(score).toBe(0) + }) }) describe('reset', () => { @@ -23,17 +44,15 @@ describe('env', () => { const env = new DraughtsRLEnvironment() const state = env.reset() - expect(state).toHaveLength(1 + 8 * 8) + expect(state).toHaveLength(1 + 8 * 4) expect(state[0]).toBe(DraughtsRLEnvironment.RED) for (let i = 0, p = 1; i < 8; i++) { - for (let j = 0; j < 8; j++, p++) { + for (let j = i % 2 === 0 ? 1 : 0; j < 8; j += 2, p++) { expect(state[p]).toBe( - i % 2 === j % 2 - ? i < 3 - ? DraughtsRLEnvironment.OWN - : i >= 5 - ? DraughtsRLEnvironment.OTHER - : DraughtsRLEnvironment.EMPTY + i < 3 + ? DraughtsRLEnvironment.OWN + : i >= 5 + ? DraughtsRLEnvironment.OTHER : DraughtsRLEnvironment.EMPTY ) } @@ -42,31 +61,48 @@ describe('env', () => { }) describe('state', () => { - test.each([DraughtsRLEnvironment.RED, DraughtsRLEnvironment.WHITE])('success %i', agent => { + test.each([undefined, DraughtsRLEnvironment.RED, DraughtsRLEnvironment.WHITE])('success %i', agent => { const env = new DraughtsRLEnvironment() env.reset(0, 1) - const red = agent === DraughtsRLEnvironment.RED ? DraughtsRLEnvironment.OWN : DraughtsRLEnvironment.OTHER - const white = agent === DraughtsRLEnvironment.RED ? DraughtsRLEnvironment.OTHER : DraughtsRLEnvironment.OWN + const red = + agent === undefined || agent === DraughtsRLEnvironment.RED + ? DraughtsRLEnvironment.OWN + : DraughtsRLEnvironment.OTHER + const white = + agent === undefined || agent === DraughtsRLEnvironment.RED + ? DraughtsRLEnvironment.OTHER + : DraughtsRLEnvironment.OWN const state = env.state(agent) - expect(state).toHaveLength(1 + 8 * 8) + expect(state).toHaveLength(1 + 8 * 4) expect(state[0]).toBe(DraughtsRLEnvironment.RED) for (let i = 0, p = 1; i < 8; i++) { - for (let j = 0; j < 8; j++, p++) { - expect(state[p]).toBe( - i % 2 === j % 2 - ? i < 3 - ? red - : i >= 5 - ? white - : DraughtsRLEnvironment.EMPTY - : DraughtsRLEnvironment.EMPTY - ) + for (let j = i % 2 === 0 ? 1 : 0; j < 8; j += 2, p++) { + expect(state[p]).toBe(i < 3 ? red : i >= 5 ? white : DraughtsRLEnvironment.EMPTY) } } }) + test('with king', () => { + const env = new DraughtsRLEnvironment() + env.reset() + + env.step([{ from: 11, path: [16], jump: [] }], DraughtsRLEnvironment.RED) + env.step([{ from: 22, path: [17], jump: [] }], DraughtsRLEnvironment.WHITE) + env.step([{ from: 8, path: [11], jump: [] }], DraughtsRLEnvironment.RED) + env.step([{ from: 26, path: [22], jump: [] }], DraughtsRLEnvironment.WHITE) + env.step([{ from: 16, path: [20], jump: [] }], DraughtsRLEnvironment.RED) + env.step([{ from: 22, path: [18], jump: [] }], DraughtsRLEnvironment.WHITE) + env.step([{ from: 9, path: [13], jump: [] }], DraughtsRLEnvironment.RED) + env.step([{ from: 31, path: [26], jump: [] }], DraughtsRLEnvironment.WHITE) + env.step([{ from: 13, path: [22, 31], jump: [17, 26] }], DraughtsRLEnvironment.RED) + + const state = env.state(DraughtsRLEnvironment.RED) + expect(state).toHaveLength(1 + 8 * 4) + expect(state[31]).toBe(DraughtsRLEnvironment.OWN | DraughtsRLEnvironment.KING) + }) + test('failed before reset', () => { const env = new DraughtsRLEnvironment() expect(() => env.state(DraughtsRLEnvironment.RED)).toThrow( @@ -82,27 +118,25 @@ describe('env', () => { }) describe('step', () => { - test('success', () => { + test.each([undefined, DraughtsRLEnvironment.RED])('success agent: %p', agent => { const env = new DraughtsRLEnvironment() env.reset() - const info = env.step([{ from: [2, 0], path: [[3, 1]], jump: [] }], DraughtsRLEnvironment.RED) + const info = env.step([{ from: [2, 1], path: [[3, 2]], jump: [] }], agent) expect(info.invalid).toBeFalsy() expect(info.done).toBeFalsy() expect(info.reward).toBe(0) const state = info.state - expect(state).toHaveLength(1 + 8 * 8) + expect(state).toHaveLength(1 + 8 * 4) expect(state[0]).toBe(DraughtsRLEnvironment.WHITE) for (let i = 0, p = 1; i < 8; i++) { - for (let j = 0; j < 8; j++, p++) { + for (let j = i % 2 === 0 ? 1 : 0; j < 8; j += 2, p++) { expect(state[p]).toBe( - i % 2 === j % 2 - ? i < 2 || (i === 2 && j !== 0) || (i === 3 && j === 1) - ? DraughtsRLEnvironment.OWN - : i >= 5 - ? DraughtsRLEnvironment.OTHER - : DraughtsRLEnvironment.EMPTY + i < 2 || (i === 2 && j !== 1) || (i === 3 && j === 2) + ? DraughtsRLEnvironment.OWN + : i >= 5 + ? DraughtsRLEnvironment.OTHER : DraughtsRLEnvironment.EMPTY ) } @@ -151,14 +185,14 @@ describe('env', () => { const env = new DraughtsRLEnvironment() env.reset() - const info1 = env.step([{ from: [2, 0], path: [[3, 1]], jump: [] }], DraughtsRLEnvironment.RED) + const info1 = env.step([{ from: [2, 1], path: [[3, 2]], jump: [] }], DraughtsRLEnvironment.RED) expect(info1.invalid).toBeFalsy() expect(env.epoch).toBe(1) - const info2 = env.step([{ from: [5, 3], path: [[4, 2]], jump: [] }], DraughtsRLEnvironment.WHITE) + const info2 = env.step([{ from: [5, 4], path: [[4, 3]], jump: [] }], DraughtsRLEnvironment.WHITE) expect(info2.invalid).toBeFalsy() expect(env.epoch).toBe(2) - const info = env.step([{ from: [2, 2], path: [[3, 3]], jump: [] }], DraughtsRLEnvironment.RED) + const info = env.step([{ from: [2, 3], path: [[3, 4]], jump: [] }], DraughtsRLEnvironment.RED) expect(info.invalid).toBeTruthy() expect(info.done).toBeFalsy() expect(info.reward).toBe(0) @@ -181,25 +215,23 @@ describe('env', () => { }) describe('test', () => { - test('step', () => { + test.each([undefined, DraughtsRLEnvironment.RED])('step agent: %p', agent => { const env = new DraughtsRLEnvironment() const orgstate = env.reset() - const info = env.test(orgstate, [{ from: [2, 0], path: [[3, 1]], jump: [] }], DraughtsRLEnvironment.RED) + const info = env.test(orgstate, [{ from: [2, 1], path: [[3, 2]], jump: [] }], agent) expect(info.invalid).toBeFalsy() const state = info.state - expect(state).toHaveLength(1 + 8 * 8) + expect(state).toHaveLength(1 + 8 * 4) expect(state[0]).toBe(DraughtsRLEnvironment.WHITE) for (let i = 0, p = 1; i < 8; i++) { - for (let j = 0; j < 8; j++, p++) { + for (let j = i % 2 === 0 ? 1 : 0; j < 8; j += 2, p++) { expect(state[p]).toBe( - i % 2 === j % 2 - ? i < 2 || (i === 2 && j !== 0) || (i === 3 && j === 1) - ? DraughtsRLEnvironment.OWN - : i >= 5 - ? DraughtsRLEnvironment.OTHER - : DraughtsRLEnvironment.EMPTY + i < 2 || (i === 2 && j !== 1) || (i === 3 && j === 2) + ? DraughtsRLEnvironment.OWN + : i >= 5 + ? DraughtsRLEnvironment.OTHER : DraughtsRLEnvironment.EMPTY ) } @@ -207,6 +239,65 @@ describe('env', () => { expect(orgstate).toEqual(env.state(DraughtsRLEnvironment.RED)) expect(env.epoch).toBe(0) }) + + test('win', () => { + const env = new DraughtsRLEnvironment() + env.reset() + const state = Array(33).fill(DraughtsRLEnvironment.EMPTY) + state[0] = DraughtsRLEnvironment.RED + state[1] = DraughtsRLEnvironment.OWN + state[6] = DraughtsRLEnvironment.OTHER + + const info = env.test(state, [{ from: [0, 1], path: [[2, 3]], jump: [[1, 2]] }], DraughtsRLEnvironment.RED) + expect(info.invalid).toBeFalsy() + expect(info.done).toBeTruthy() + expect(info.reward).toBe(1) + }) + + test('lose', () => { + const env = new DraughtsRLEnvironment() + env.reset() + const state = Array(33).fill(DraughtsRLEnvironment.EMPTY) + state[0] = DraughtsRLEnvironment.RED + state[1] = DraughtsRLEnvironment.OTHER + + const info = env.test(state, [DraughtsRLEnvironment.EMPTY], DraughtsRLEnvironment.RED) + expect(info.invalid).toBeFalsy() + expect(info.done).toBeTruthy() + expect(info.reward).toBe(-1) + }) + + test('empty gameturn red', () => { + const env = new DraughtsRLEnvironment() + env.reset() + const state = Array(33).fill(DraughtsRLEnvironment.EMPTY) + state[0] = DraughtsRLEnvironment.RED + state[4] = DraughtsRLEnvironment.OWN + state[8] = DraughtsRLEnvironment.OTHER + state[11] = DraughtsRLEnvironment.OTHER + + const info = env.test(state, [DraughtsRLEnvironment.EMPTY], DraughtsRLEnvironment.RED) + expect(info.invalid).toBeFalsy() + expect(info.done).toBeTruthy() + expect(info.reward).toBe(-1) + expect(info.state[0]).toBe(DraughtsRLEnvironment.WHITE) + }) + + test('empty gameturn white', () => { + const env = new DraughtsRLEnvironment() + env.reset() + const state = Array(33).fill(DraughtsRLEnvironment.EMPTY) + state[0] = DraughtsRLEnvironment.WHITE + state[4] = DraughtsRLEnvironment.OWN + state[8] = DraughtsRLEnvironment.OTHER + state[11] = DraughtsRLEnvironment.OTHER + + const info = env.test(state, [DraughtsRLEnvironment.EMPTY], DraughtsRLEnvironment.WHITE) + expect(info.invalid).toBeFalsy() + expect(info.done).toBeTruthy() + expect(info.reward).toBe(-1) + expect(info.state[0]).toBe(DraughtsRLEnvironment.RED) + }) }) }) @@ -217,13 +308,13 @@ describe('board', () => { expect(board.size).toEqual([8, 8]) for (let i = 0; i < 8; i += 2) { - expect(board.at([0, i])).toBe(DraughtsRLEnvironment.RED) - expect(board.at([1, i + 1])).toBe(DraughtsRLEnvironment.RED) - expect(board.at([2, i])).toBe(DraughtsRLEnvironment.RED) + expect(board.at([0, i + 1])).toBe(DraughtsRLEnvironment.RED) + expect(board.at([1, i])).toBe(DraughtsRLEnvironment.RED) + expect(board.at([2, i + 1])).toBe(DraughtsRLEnvironment.RED) - expect(board.at([5, i + 1])).toBe(DraughtsRLEnvironment.WHITE) - expect(board.at([6, i])).toBe(DraughtsRLEnvironment.WHITE) - expect(board.at([7, i + 1])).toBe(DraughtsRLEnvironment.WHITE) + expect(board.at([5, i])).toBe(DraughtsRLEnvironment.WHITE) + expect(board.at([6, i + 1])).toBe(DraughtsRLEnvironment.WHITE) + expect(board.at([7, i])).toBe(DraughtsRLEnvironment.WHITE) } expect(board.finish).toBeFalsy() expect(board.count.red).toBe(12) @@ -233,51 +324,154 @@ describe('board', () => { expect(board.score(DraughtsRLEnvironment.WHITE)).toBe(0) }) - test('choices', () => { + describe('winner', () => { + test('random', () => { + const env = new DraughtsRLEnvironment() + const board = env._board + let turn = DraughtsRLEnvironment.RED + + let maxIter = 1.0e4 + while (maxIter-- > 0) { + const choices = board.choices(turn) + if (choices.length === 0) { + turn = board.nextTurn(turn) + continue + } + + board.set(choices[0], turn) + turn = board.nextTurn(turn) + } + + expect(board.winner).not.toBeNull() + }) + }) + + test('toString', () => { const env = new DraughtsRLEnvironment() const board = env._board - const choiceRed = board.choices(DraughtsRLEnvironment.RED) - expect(choiceRed).toEqual([ - { from: [2, 0], path: [[3, 1]], jump: [] }, - { from: [2, 2], path: [[3, 3]], jump: [] }, - { from: [2, 2], path: [[3, 1]], jump: [] }, - { from: [2, 4], path: [[3, 5]], jump: [] }, - { from: [2, 4], path: [[3, 3]], jump: [] }, - { from: [2, 6], path: [[3, 7]], jump: [] }, - { from: [2, 6], path: [[3, 5]], jump: [] }, - ]) - const choiceWhite = board.choices(DraughtsRLEnvironment.WHITE) - expect(choiceWhite).toEqual([ - { from: [5, 1], path: [[4, 2]], jump: [] }, - { from: [5, 1], path: [[4, 0]], jump: [] }, - { from: [5, 3], path: [[4, 4]], jump: [] }, - { from: [5, 3], path: [[4, 2]], jump: [] }, - { from: [5, 5], path: [[4, 6]], jump: [] }, - { from: [5, 5], path: [[4, 4]], jump: [] }, - { from: [5, 7], path: [[4, 6]], jump: [] }, - ]) + expect(board.toString()).toBe(`- x - x - x - x +x - x - x - x - +- x - x - x - x +- - - - - - - - +- - - - - - - - +o - o - o - o - +- o - o - o - o +o - o - o - o - +`) + }) + + test('nextTurn', () => { + const env = new DraughtsRLEnvironment() + const board = env._board + + expect(board.nextTurn(DraughtsRLEnvironment.RED)).toBe(DraughtsRLEnvironment.WHITE) + expect(board.nextTurn(DraughtsRLEnvironment.WHITE)).toBe(DraughtsRLEnvironment.RED) + }) + + test('copy', () => { + const env = new DraughtsRLEnvironment() + const board = env._board + + const cp = board.copy() + for (let i = 0; i < board.size[0]; i++) { + for (let j = 0; j < board.size[1]; j++) { + expect(cp.at([i, j])).toBe(board.at([i, j])) + } + } + }) + + describe('at', () => { + test.each([[0, 1], 1])('%p', p => { + const env = new DraughtsRLEnvironment() + const board = env._board + + expect(board.at(p)).toBe(DraughtsRLEnvironment.RED) + }) + + test.each([[7, 0], 29])('%p', p => { + const env = new DraughtsRLEnvironment() + const board = env._board + + expect(board.at(p)).toBe(DraughtsRLEnvironment.WHITE) + }) }) describe('set', () => { - test('success', () => { + test.each([ + { from: [2, 1], path: [[3, 2]], jump: [] }, + { from: 9, path: [14], jump: [] }, + ])('success %p', p => { + const env = new DraughtsRLEnvironment() + const board = env._board + + expect(board.at([2, 1])).toBe(DraughtsRLEnvironment.RED) + expect(board.at([3, 2])).toBe(DraughtsRLEnvironment.EMPTY) + + const success = board.set(p, DraughtsRLEnvironment.RED) + expect(success).toBeTruthy() + expect(board.at([2, 1])).toBe(DraughtsRLEnvironment.EMPTY) + expect(board.at([3, 2])).toBe(DraughtsRLEnvironment.RED) + }) + + test('to king', () => { const env = new DraughtsRLEnvironment() const board = env._board - expect(board.at([2, 0])).toBe(DraughtsRLEnvironment.RED) - expect(board.at([3, 1])).toBe(DraughtsRLEnvironment.EMPTY) + board.set({ from: 11, path: [16], jump: [] }, DraughtsRLEnvironment.RED) + board.set({ from: 22, path: [17], jump: [] }, DraughtsRLEnvironment.WHITE) + board.set({ from: 8, path: [11], jump: [] }, DraughtsRLEnvironment.RED) + board.set({ from: 26, path: [22], jump: [] }, DraughtsRLEnvironment.WHITE) + board.set({ from: 16, path: [20], jump: [] }, DraughtsRLEnvironment.RED) + board.set({ from: 22, path: [18], jump: [] }, DraughtsRLEnvironment.WHITE) + board.set({ from: 9, path: [13], jump: [] }, DraughtsRLEnvironment.RED) + board.set({ from: 31, path: [26], jump: [] }, DraughtsRLEnvironment.WHITE) - const success = board.set({ from: [2, 0], path: [[3, 1]], jump: [] }, DraughtsRLEnvironment.RED) + const success = board.set({ from: 13, path: [22, 31], jump: [17, 26] }, DraughtsRLEnvironment.RED) expect(success).toBeTruthy() - expect(board.at([2, 0])).toBe(DraughtsRLEnvironment.EMPTY) - expect(board.at([3, 1])).toBe(DraughtsRLEnvironment.RED) }) test('fail invalid piece', () => { const env = new DraughtsRLEnvironment() const board = env._board - const success = board.set({ from: [5, 1], path: [[4, 2]], jump: [] }, DraughtsRLEnvironment.RED) + const success = board.set({ from: [5, 2], path: [[4, 3]], jump: [] }, DraughtsRLEnvironment.RED) + expect(success).toBeFalsy() + }) + + test('fail invalid path length', () => { + const env = new DraughtsRLEnvironment() + const board = env._board + + const success = board.set( + { + from: [2, 1], + path: [ + [3, 2], + [4, 3], + ], + jump: [], + }, + DraughtsRLEnvironment.RED + ) + expect(success).toBeFalsy() + }) + + test('fail invalid path, jump length', () => { + const env = new DraughtsRLEnvironment() + const board = env._board + + const success = board.set( + { + from: [2, 1], + path: [ + [4, 3], + [5, 4], + ], + jump: [[3, 2]], + }, + DraughtsRLEnvironment.RED + ) expect(success).toBeFalsy() }) @@ -285,11 +479,11 @@ describe('board', () => { const env = new DraughtsRLEnvironment() const board = env._board - const success1 = board.set({ from: [2, 0], path: [[3, 1]], jump: [] }, DraughtsRLEnvironment.RED) + const success1 = board.set({ from: [2, 1], path: [[3, 2]], jump: [] }, DraughtsRLEnvironment.RED) expect(success1).toBeTruthy() - const success2 = board.set({ from: [5, 1], path: [[4, 0]], jump: [] }, DraughtsRLEnvironment.WHITE) + const success2 = board.set({ from: [5, 0], path: [[4, 1]], jump: [] }, DraughtsRLEnvironment.WHITE) expect(success2).toBeTruthy() - const success = board.set({ from: [3, 1], path: [[2, 0]], jump: [] }, DraughtsRLEnvironment.RED) + const success = board.set({ from: [3, 2], path: [[2, 1]], jump: [] }, DraughtsRLEnvironment.RED) expect(success).toBeFalsy() }) @@ -297,7 +491,7 @@ describe('board', () => { const env = new DraughtsRLEnvironment() const board = env._board - const success = board.set({ from: [1, 1], path: [[3, 3]], jump: [[2, 2]] }, DraughtsRLEnvironment.RED) + const success = board.set({ from: [1, 0], path: [[3, 2]], jump: [[2, 1]] }, DraughtsRLEnvironment.RED) expect(success).toBeFalsy() }) @@ -305,47 +499,99 @@ describe('board', () => { const env = new DraughtsRLEnvironment() const board = env._board - const success = board.set({ from: [1, 1], path: [[2, 2]], jump: [] }, DraughtsRLEnvironment.RED) + const success = board.set({ from: [1, 0], path: [[2, 1]], jump: [] }, DraughtsRLEnvironment.RED) expect(success).toBeFalsy() }) - }) - test('nextTurn', () => { - const env = new DraughtsRLEnvironment() - const board = env._board + test('fail invalid move path only', () => { + const env = new DraughtsRLEnvironment() + const board = env._board - expect(board.nextTurn(DraughtsRLEnvironment.RED)).toBe(DraughtsRLEnvironment.WHITE) - expect(board.nextTurn(DraughtsRLEnvironment.WHITE)).toBe(DraughtsRLEnvironment.RED) + const success = board.set({ from: [2, 1], path: [[3, 1]], jump: [] }, DraughtsRLEnvironment.RED) + expect(success).toBeFalsy() + }) + + test('fail invalid move jump', () => { + const env = new DraughtsRLEnvironment() + const board = env._board + + const success = board.set({ from: [2, 1], path: [[4, 3]], jump: [[6, 3]] }, DraughtsRLEnvironment.RED) + expect(success).toBeFalsy() + }) + + test('fail invalid move path', () => { + const env = new DraughtsRLEnvironment() + const board = env._board + + board.set({ from: [2, 1], path: [[3, 2]], jump: [] }, DraughtsRLEnvironment.RED) + board.set({ from: [5, 4], path: [[4, 3]], jump: [] }, DraughtsRLEnvironment.WHITE) + const success = board.set({ from: [3, 2], path: [[4, 4]], jump: [[4, 3]] }, DraughtsRLEnvironment.RED) + expect(success).toBeFalsy() + }) }) - test('copy', () => { + test('choices', () => { const env = new DraughtsRLEnvironment() const board = env._board - const cp = board.copy() - for (let i = 0; i < board.size[0]; i++) { - for (let j = 0; j < board.size[1]; j++) { - expect(cp.at([i, j])).toBe(board.at([i, j])) - } - } + const choiceRed = board.choices(DraughtsRLEnvironment.RED) + expect(choiceRed).toEqual([ + { from: [2, 1], path: [[3, 2]], jump: [] }, + { from: [2, 1], path: [[3, 0]], jump: [] }, + { from: [2, 3], path: [[3, 4]], jump: [] }, + { from: [2, 3], path: [[3, 2]], jump: [] }, + { from: [2, 5], path: [[3, 6]], jump: [] }, + { from: [2, 5], path: [[3, 4]], jump: [] }, + { from: [2, 7], path: [[3, 6]], jump: [] }, + ]) + const choiceWhite = board.choices(DraughtsRLEnvironment.WHITE) + expect(choiceWhite).toEqual([ + { from: [5, 0], path: [[4, 1]], jump: [] }, + { from: [5, 2], path: [[4, 3]], jump: [] }, + { from: [5, 2], path: [[4, 1]], jump: [] }, + { from: [5, 4], path: [[4, 5]], jump: [] }, + { from: [5, 4], path: [[4, 3]], jump: [] }, + { from: [5, 6], path: [[4, 7]], jump: [] }, + { from: [5, 6], path: [[4, 5]], jump: [] }, + ]) }) - test('winner', () => { - const env = new DraughtsRLEnvironment() - const board = env._board - let turn = DraughtsRLEnvironment.RED + describe('allPah', () => { + test('not start own piece', () => { + const env = new DraughtsRLEnvironment() + const board = env._board - while (!board.finish) { - const choices = board.choices(turn) - if (choices.length === 0) { - turn = board.nextTurn(turn) - continue - } + const path = board.allPath(0, 1, DraughtsRLEnvironment.WHITE) + expect(path).toHaveLength(0) + }) - board.set(choices[0], turn) - turn = board.nextTurn(turn) - } + test('will be king', () => { + const env = new DraughtsRLEnvironment() + const board = env._board - expect(board.winner).not.toBeNull() + board.set({ from: 11, path: [16], jump: [] }, DraughtsRLEnvironment.RED) + board.set({ from: 22, path: [17], jump: [] }, DraughtsRLEnvironment.WHITE) + board.set({ from: 8, path: [11], jump: [] }, DraughtsRLEnvironment.RED) + board.set({ from: 26, path: [22], jump: [] }, DraughtsRLEnvironment.WHITE) + board.set({ from: 16, path: [20], jump: [] }, DraughtsRLEnvironment.RED) + board.set({ from: 22, path: [18], jump: [] }, DraughtsRLEnvironment.WHITE) + board.set({ from: 9, path: [13], jump: [] }, DraughtsRLEnvironment.RED) + board.set({ from: 31, path: [26], jump: [] }, DraughtsRLEnvironment.WHITE) + + const path = board.allPath(3, 0, DraughtsRLEnvironment.RED) + expect(path).toEqual([ + { + from: [3, 0], + path: [ + [5, 2], + [7, 4], + ], + jump: [ + [4, 1], + [6, 3], + ], + }, + ]) + }) }) }) diff --git a/tests/lib/rl/gomoku.test.js b/tests/lib/rl/gomoku.test.js index 02d5743bb..60b2db198 100644 --- a/tests/lib/rl/gomoku.test.js +++ b/tests/lib/rl/gomoku.test.js @@ -18,6 +18,27 @@ describe('env', () => { expect(env.states).toHaveLength(1 + 8 * 8) }) + describe('evaluation', () => { + test('set', () => { + const env = new GomokuRLEnvironment() + env.evaluation = state => { + expect(state).toHaveLength(1 + 8 * 8) + return 1 + } + + const score = env._board.score() + expect(score).toBe(1) + }) + + test('clear', () => { + const env = new GomokuRLEnvironment() + env.evaluation = null + + const score = env._board.score() + expect(score).toBe(0) + }) + }) + describe('reset', () => { test('success', () => { const env = new GomokuRLEnvironment() @@ -34,14 +55,20 @@ describe('env', () => { }) describe('state', () => { - test.each([GomokuRLEnvironment.BLACK, GomokuRLEnvironment.WHITE])('success %i', agent => { + test.each([undefined, GomokuRLEnvironment.BLACK, GomokuRLEnvironment.WHITE])('success %i', agent => { const env = new GomokuRLEnvironment() env.reset(0, 1) env.step(['1_1'], GomokuRLEnvironment.BLACK) env.step(['2_2'], GomokuRLEnvironment.WHITE) - const black = agent === GomokuRLEnvironment.BLACK ? GomokuRLEnvironment.OWN : GomokuRLEnvironment.OTHER - const white = agent === GomokuRLEnvironment.BLACK ? GomokuRLEnvironment.OTHER : GomokuRLEnvironment.OWN + const black = + agent === undefined || agent === GomokuRLEnvironment.BLACK + ? GomokuRLEnvironment.OWN + : GomokuRLEnvironment.OTHER + const white = + agent === undefined || agent === GomokuRLEnvironment.BLACK + ? GomokuRLEnvironment.OTHER + : GomokuRLEnvironment.OWN const state = env.state(agent) expect(state).toHaveLength(1 + 8 * 8) @@ -70,11 +97,11 @@ describe('env', () => { }) describe('step', () => { - test('success', () => { + test.each([undefined, GomokuRLEnvironment.BLACK])('success agent: %p', agent => { const env = new GomokuRLEnvironment() env.reset() - const info = env.step(['3_5'], GomokuRLEnvironment.BLACK) + const info = env.step(['3_5'], agent) expect(info.invalid).toBeFalsy() expect(info.done).toBeFalsy() expect(info.reward).toBe(0) @@ -131,11 +158,11 @@ describe('env', () => { }) describe('test', () => { - test('step', () => { + test.each([undefined, GomokuRLEnvironment.BLACK])('step agent: %p', agent => { const env = new GomokuRLEnvironment() const orgstate = env.reset() - const info = env.test(orgstate, ['3_5'], GomokuRLEnvironment.BLACK) + const info = env.test(orgstate, ['3_5'], agent) expect(info.invalid).toBeFalsy() const state = info.state @@ -162,6 +189,78 @@ describe('board', () => { expect(board.winner).toBeNull() }) + describe('winner', () => { + test.each(['black', 'white'])('%s', winner => { + const env = new GomokuRLEnvironment() + const board = env._board + const turn = winner === 'black' ? GomokuRLEnvironment.BLACK : GomokuRLEnvironment.WHITE + board.set([0, 0], turn) + board.set([0, 1], turn) + board.set([0, 2], turn) + board.set([0, 3], turn) + board.set([0, 4], turn) + + expect(board.winner).toBe(turn) + }) + + test('game', () => { + const env = new GomokuRLEnvironment() + const board = env._board + let turn = GomokuRLEnvironment.BLACK + + while (!board.finish) { + const choices = board.choices(turn) + if (choices.length === 0) { + turn = board.nextTurn(turn) + continue + } + + board.set(choices[0], turn) + turn = board.nextTurn(turn) + } + + expect(board.winner).not.toBeNull() + }) + }) + + test('toString', () => { + const env = new GomokuRLEnvironment() + const board = env._board + + board.set([2, 4], GomokuRLEnvironment.BLACK) + board.set([3, 6], GomokuRLEnvironment.WHITE) + + expect(board.toString()).toBe(`- - - - - - - - +- - - - - - - - +- - - - x - - - +- - - - - - o - +- - - - - - - - +- - - - - - - - +- - - - - - - - +- - - - - - - - +`) + }) + + test('nextTurn', () => { + const env = new GomokuRLEnvironment() + const board = env._board + + expect(board.nextTurn(GomokuRLEnvironment.BLACK)).toBe(GomokuRLEnvironment.WHITE) + expect(board.nextTurn(GomokuRLEnvironment.WHITE)).toBe(GomokuRLEnvironment.BLACK) + }) + + test('copy', () => { + const env = new GomokuRLEnvironment() + const board = env._board + + const cp = board.copy() + for (let i = 0; i < board.size[0]; i++) { + for (let j = 0; j < board.size[1]; j++) { + expect(cp.at([i, j])).toBe(board.at([i, j])) + } + } + }) + describe('choices', () => { test('all', () => { const env = new GomokuRLEnvironment() @@ -218,60 +317,6 @@ describe('board', () => { }) }) - test('nextTurn', () => { - const env = new GomokuRLEnvironment() - const board = env._board - - expect(board.nextTurn(GomokuRLEnvironment.BLACK)).toBe(GomokuRLEnvironment.WHITE) - expect(board.nextTurn(GomokuRLEnvironment.WHITE)).toBe(GomokuRLEnvironment.BLACK) - }) - - test('copy', () => { - const env = new GomokuRLEnvironment() - const board = env._board - - const cp = board.copy() - for (let i = 0; i < board.size[0]; i++) { - for (let j = 0; j < board.size[1]; j++) { - expect(cp.at([i, j])).toBe(board.at([i, j])) - } - } - }) - - describe('winner', () => { - test.each(['black', 'white'])('%s', winner => { - const env = new GomokuRLEnvironment() - const board = env._board - const turn = winner === 'black' ? GomokuRLEnvironment.BLACK : GomokuRLEnvironment.WHITE - board.set([0, 0], turn) - board.set([0, 1], turn) - board.set([0, 2], turn) - board.set([0, 3], turn) - board.set([0, 4], turn) - - expect(board.winner).toBe(turn) - }) - - test('game', () => { - const env = new GomokuRLEnvironment() - const board = env._board - let turn = GomokuRLEnvironment.BLACK - - while (!board.finish) { - const choices = board.choices(turn) - if (choices.length === 0) { - turn = board.nextTurn(turn) - continue - } - - board.set(choices[0], turn) - turn = board.nextTurn(turn) - } - - expect(board.winner).not.toBeNull() - }) - }) - describe('row', () => { test('empty', () => { const env = new GomokuRLEnvironment() diff --git a/tests/lib/rl/grid.test.js b/tests/lib/rl/grid.test.js index 3c472dbe8..d2b2ea747 100644 --- a/tests/lib/rl/grid.test.js +++ b/tests/lib/rl/grid.test.js @@ -10,9 +10,17 @@ test('size', () => { expect(env.size).toEqual([20, 10]) }) -test('actions', () => { - const env = new GridRLEnvironment() - expect(env.actions).toEqual([[0, 1, 2, 3]]) +describe('actions', () => { + test('2d', () => { + const env = new GridRLEnvironment() + expect(env.actions).toEqual([[0, 1, 2, 3]]) + }) + + test('1d', () => { + const env = new GridRLEnvironment() + env._dim = 1 + expect(env.actions).toEqual([[0, 1]]) + }) }) test('states', () => { @@ -69,6 +77,16 @@ test('reset', () => { expect(env.state()).toEqual([0, 0]) }) +test('resetMap', () => { + const env = new GridRLEnvironment() + env._points.push([0, 1]) + env.step([0]) + env.resetMap() + expect(env._points).toHaveLength(0) + const state = env.state() + expect(state).toEqual([1, 0]) +}) + test('resetMapAsMaze', () => { const env = new GridRLEnvironment() env.resetMapAsMaze() @@ -104,14 +122,28 @@ describe('state', () => { }) }) -test('step', () => { - const env = new GridRLEnvironment() - expect(env.epoch).toBe(0) - const info = env.step([0]) - expect(env.epoch).toBe(1) - expect(info.done).toBeFalsy() - expect(info.reward).toBe(-1) - expect(info.state).toHaveLength(2) +describe('step', () => { + test('2d', () => { + const env = new GridRLEnvironment() + expect(env.epoch).toBe(0) + const info = env.step([0]) + expect(env.epoch).toBe(1) + expect(info.done).toBeFalsy() + expect(info.reward).toBe(-1) + expect(info.state).toEqual([1, 0]) + }) + + test('1d', () => { + const env = new GridRLEnvironment() + env._dim = 1 + env.reset() + expect(env.epoch).toBe(0) + const info = env.step([0]) + expect(env.epoch).toBe(1) + expect(info.done).toBeFalsy() + expect(info.reward).toBe(-1) + expect(info.state).toEqual([1]) + }) }) describe('test', () => { diff --git a/tests/lib/rl/inhypercube.test.js b/tests/lib/rl/inhypercube.test.js new file mode 100644 index 000000000..8831cbe7f --- /dev/null +++ b/tests/lib/rl/inhypercube.test.js @@ -0,0 +1,79 @@ +import InHypercubeRLEnvironment from '../../../lib/rl/inhypercube.js' + +test('constructor', () => { + const env = new InHypercubeRLEnvironment() + expect(env).toBeDefined() +}) + +describe('actions', () => { + test('2d', () => { + const env = new InHypercubeRLEnvironment(2) + expect(env.actions).toEqual([[0, 1, 2, 3]]) + }) + + test('3d', () => { + const env = new InHypercubeRLEnvironment(3) + env._dim = 1 + expect(env.actions).toEqual([[0, 1, 2, 3, 4, 5]]) + }) +}) + +test.each([1, 2, 3])('states %dd', (d) => { + const env = new InHypercubeRLEnvironment(d) + expect(env.states).toHaveLength(d * 2) +}) + +test('reset', () => { + const env = new InHypercubeRLEnvironment() + for (let i = 0; i < 10; i++) { + env.step(env.sample_action()) + } + const init_state = env.reset() + expect(init_state).toEqual([0, 0, 0, 0]) + expect(env.state()).toEqual([0, 0, 0, 0]) +}) + +describe('state', () => { + test('init', () => { + const env = new InHypercubeRLEnvironment() + expect(env.state()).toEqual([0, 0, 0, 0]) + }) +}) + +describe('step', () => { + test('2d', () => { + const env = new InHypercubeRLEnvironment() + expect(env.epoch).toBe(0) + const info = env.step([0]) + expect(env.epoch).toBe(1) + expect(info.done).toBeFalsy() + expect(info.reward).toBe(0) + expect(info.state).toEqual([0.1, 0, 0.1, 0]) + }) +}) + +describe('test', () => { + test('step', () => { + const env = new InHypercubeRLEnvironment() + const info = env.test([0, 0, 0, 0], [0]) + expect(info.done).toBeFalsy() + expect(info.reward).toBe(0) + expect(info.state).toEqual([0.1, 0, 0.1, 0]) + }) + + test('goal', () => { + const env = new InHypercubeRLEnvironment() + const info = env.test([-1, 0, 0, 0], [1]) + expect(info.done).toBeTruthy() + expect(info.reward).toBe(1) + expect(info.state).toEqual([-1.1, 0, -0.1, 0]) + }) + + test('fail', () => { + const env = new InHypercubeRLEnvironment() + const info = env.test([1, 0, 0, 0], [0]) + expect(info.done).toBeTruthy() + expect(info.reward).toBe(0) + expect(info.state).toEqual([1.1, 0, 0.1, 0]) + }) +}) diff --git a/tests/lib/rl/mountaincar.test.js b/tests/lib/rl/mountaincar.test.js index 8caab866f..7da4cbad7 100644 --- a/tests/lib/rl/mountaincar.test.js +++ b/tests/lib/rl/mountaincar.test.js @@ -73,6 +73,42 @@ describe('test', () => { expect(info.state[1]).toBe(-0.0115) }) + test('big v', () => { + const env = new MountainCarRLEnvironment() + const info = env.test([0, 1], [0]) + expect(info.done).toBeFalsy() + expect(info.reward).toBe(-1) + expect(info.state[0]).toBe(0.07) + expect(info.state[1]).toBe(0.07) + }) + + test('small p', () => { + const env = new MountainCarRLEnvironment() + const info = env.test([-Math.PI, 0], [1]) + expect(info.done).toBeFalsy() + expect(info.reward).toBe(-1) + expect(info.state[0]).toBe(-1.2) + expect(info.state[1]).toBe(0.0025) + }) + + test('big p', () => { + const env = new MountainCarRLEnvironment() + const info = env.test([Math.PI, 0], [1]) + expect(info.done).toBeTruthy() + expect(info.reward).toBe(-1) + expect(info.state[0]).toBe(0.6) + expect(info.state[1]).toBe(0.0025) + }) + + test('small p, v', () => { + const env = new MountainCarRLEnvironment() + const info = env.test([-Math.PI, -1], [1]) + expect(info.done).toBeFalsy() + expect(info.reward).toBe(-1) + expect(info.state[0]).toBe(-1.2) + expect(info.state[1]).toBe(0) + }) + test('goal', () => { const env = new MountainCarRLEnvironment() const info = env.test([0.5, 0.01], [2]) diff --git a/tests/lib/rl/pendulum.test.js b/tests/lib/rl/pendulum.test.js index c60d672d6..2f10682b1 100644 --- a/tests/lib/rl/pendulum.test.js +++ b/tests/lib/rl/pendulum.test.js @@ -47,3 +47,44 @@ test('step', () => { expect(info.reward).toBeCloseTo(0) expect(info.state).toHaveLength(3) }) + +describe('test', () => { + test('big t', () => { + const env = new PendulumRLEnvironment() + const info = env.test([-1, 0, 0], [0]) + + expect(info.done).toBeFalsy() + expect(info.state[0]).toBeGreaterThanOrEqual(-1) + expect(info.state[0]).toBeLessThanOrEqual(1) + expect(info.state[1]).toBeGreaterThanOrEqual(-1) + expect(info.state[1]).toBeLessThanOrEqual(1) + expect(info.state[2]).toBeGreaterThanOrEqual(-0.5) + expect(info.state[2]).toBeLessThanOrEqual(0.5) + }) + + test('small action[0]', () => { + const env = new PendulumRLEnvironment() + const info = env.test([1, 0, 0], [-10]) + + expect(info.done).toBeFalsy() + expect(info.state[0]).toBeGreaterThanOrEqual(-1) + expect(info.state[0]).toBeLessThanOrEqual(1) + expect(info.state[1]).toBeGreaterThanOrEqual(-1) + expect(info.state[1]).toBeLessThanOrEqual(1) + expect(info.state[2]).toBeGreaterThanOrEqual(-0.5) + expect(info.state[2]).toBeLessThanOrEqual(0.5) + }) + + test('big action[0]', () => { + const env = new PendulumRLEnvironment() + const info = env.test([1, 0, 0], [10]) + + expect(info.done).toBeFalsy() + expect(info.state[0]).toBeGreaterThanOrEqual(-1) + expect(info.state[0]).toBeLessThanOrEqual(1) + expect(info.state[1]).toBeGreaterThanOrEqual(-1) + expect(info.state[1]).toBeLessThanOrEqual(1) + expect(info.state[2]).toBeGreaterThanOrEqual(-0.5) + expect(info.state[2]).toBeLessThanOrEqual(0.5) + }) +}) diff --git a/tests/lib/rl/reversi.test.js b/tests/lib/rl/reversi.test.js index 5350e72e3..6e90b939d 100644 --- a/tests/lib/rl/reversi.test.js +++ b/tests/lib/rl/reversi.test.js @@ -18,6 +18,27 @@ describe('env', () => { expect(env.states).toHaveLength(1 + 8 * 8) }) + describe('evaluation', () => { + test('set', () => { + const env = new ReversiRLEnvironment() + env.evaluation = state => { + expect(state).toHaveLength(1 + 8 * 8) + return 1 + } + + const score = env._board.score() + expect(score).toBe(1) + }) + + test('clear', () => { + const env = new ReversiRLEnvironment() + env.evaluation = null + + const score = env._board.score() + expect(score).toBe(0) + }) + }) + describe('reset', () => { test('success', () => { const env = new ReversiRLEnvironment() @@ -28,9 +49,9 @@ describe('env', () => { for (let i = 0, p = 1; i < 8; i++) { for (let j = 0; j < 8; j++, p++) { expect(state[p]).toBe( - (i === 3 && j === 3) || (i === 4 && j === 4) + (i === 3 && j === 4) || (i === 4 && j === 3) ? ReversiRLEnvironment.OWN - : (i === 3 && j === 4) || (i === 4 && j === 3) + : (i === 3 && j === 3) || (i === 4 && j === 4) ? ReversiRLEnvironment.OTHER : ReversiRLEnvironment.EMPTY ) @@ -40,12 +61,18 @@ describe('env', () => { }) describe('state', () => { - test.each([ReversiRLEnvironment.BLACK, ReversiRLEnvironment.WHITE])('success %i', agent => { + test.each([undefined, ReversiRLEnvironment.BLACK, ReversiRLEnvironment.WHITE])('success %p', agent => { const env = new ReversiRLEnvironment() env.reset(0, 1) - const black = agent === ReversiRLEnvironment.BLACK ? ReversiRLEnvironment.OWN : ReversiRLEnvironment.OTHER - const white = agent === ReversiRLEnvironment.BLACK ? ReversiRLEnvironment.OTHER : ReversiRLEnvironment.OWN + const black = + agent === undefined || agent === ReversiRLEnvironment.BLACK + ? ReversiRLEnvironment.OWN + : ReversiRLEnvironment.OTHER + const white = + agent === undefined || agent === ReversiRLEnvironment.BLACK + ? ReversiRLEnvironment.OTHER + : ReversiRLEnvironment.OWN const state = env.state(agent) expect(state).toHaveLength(1 + 8 * 8) @@ -53,9 +80,9 @@ describe('env', () => { for (let i = 0, p = 1; i < 8; i++) { for (let j = 0; j < 8; j++, p++) { expect(state[p]).toBe( - (i === 3 && j === 3) || (i === 4 && j === 4) + (i === 3 && j === 4) || (i === 4 && j === 3) ? black - : (i === 3 && j === 4) || (i === 4 && j === 3) + : (i === 3 && j === 3) || (i === 4 && j === 4) ? white : ReversiRLEnvironment.EMPTY ) @@ -78,11 +105,11 @@ describe('env', () => { }) describe('step', () => { - test('success', () => { + test.each([undefined, ReversiRLEnvironment.BLACK])('success agent: %p', agent => { const env = new ReversiRLEnvironment() env.reset() - const info = env.step(['3_5'], ReversiRLEnvironment.BLACK) + const info = env.step(['f5'], agent) expect(info.invalid).toBeFalsy() expect(info.done).toBeFalsy() expect(info.reward).toBe(0) @@ -93,9 +120,9 @@ describe('env', () => { for (let i = 0, p = 1; i < 8; i++) { for (let j = 0; j < 8; j++, p++) { expect(state[p]).toBe( - (i === 3 && j === 3) || (i === 4 && j === 4) || (i === 3 && j === 4) || (i === 3 && j === 5) + (i === 3 && j === 4) || (i === 4 && j === 3) || (i === 4 && j === 4) || (i === 4 && j === 5) ? ReversiRLEnvironment.OWN - : i === 4 && j === 3 + : i === 3 && j === 3 ? ReversiRLEnvironment.OTHER : ReversiRLEnvironment.EMPTY ) @@ -105,11 +132,56 @@ describe('env', () => { expect(env.epoch).toBe(1) }) + test('no action', () => { + const env = new ReversiRLEnvironment() + env.reset() + + env.step(['f5']) + env.step(['f6']) + env.step(['d3']) + env.step(['g5']) + env.step(['h5']) + env.step(['h4']) + env.step(['f7']) + env.step(['h6']) + + const info = env.step([ReversiRLEnvironment.EMPTY]) + expect(info.invalid).toBeFalsy() + expect(info.done).toBeFalsy() + expect(info.reward).toBe(0) + expect(env.epoch).toBe(9) + }) + + test('win black', () => { + const env = new ReversiRLEnvironment() + env.reset() + + env.step(['f5']) + env.step(['d6']) + env.step(['c5']) + env.step(['f4']) + env.step(['e3']) + env.step(['f6']) + env.step(['g5']) + env.step(['e6']) + + const info = env.step(['e7']) + expect(info.invalid).toBeFalsy() + expect(info.done).toBeTruthy() + expect(info.reward).toBe(1) + expect(env.epoch).toBe(9) + + const info2 = env.step([ReversiRLEnvironment.EMPTY]) + expect(info2.invalid).toBeFalsy() + expect(info2.done).toBeTruthy() + expect(info2.reward).toBe(-1) + }) + test('invalid position', () => { const env = new ReversiRLEnvironment() const state = env.reset() - const info = env.step(['0_0'], ReversiRLEnvironment.BLACK) + const info = env.step(['a1'], ReversiRLEnvironment.BLACK) expect(info.invalid).toBeTruthy() expect(info.done).toBeFalsy() expect(info.reward).toBe(0) @@ -133,7 +205,7 @@ describe('env', () => { const env = new ReversiRLEnvironment() env.reset() - const info = env.step(['0_0'], ReversiRLEnvironment.WHITE) + const info = env.step(['a1'], ReversiRLEnvironment.WHITE) expect(info.invalid).toBeTruthy() expect(info.done).toBeFalsy() expect(info.reward).toBe(0) @@ -143,7 +215,7 @@ describe('env', () => { test('failed before reset', () => { const env = new ReversiRLEnvironment() - expect(() => env.step(['3_5'], ReversiRLEnvironment.BLACK)).toThrow( + expect(() => env.step(['f4'], ReversiRLEnvironment.BLACK)).toThrow( 'Agent does not exist. Call reset to set agents.' ) }) @@ -151,16 +223,16 @@ describe('env', () => { test.each([1, 4])('failed %p', agent => { const env = new ReversiRLEnvironment() env.reset() - expect(() => env.step(['3_5'], agent)).toThrow('Unknown agent.') + expect(() => env.step(['f4'], agent)).toThrow('Unknown agent.') }) }) describe('test', () => { - test('step', () => { + test.each([undefined, ReversiRLEnvironment.BLACK])('step agent: %p', agent => { const env = new ReversiRLEnvironment() const orgstate = env.reset() - const info = env.test(orgstate, ['3_5'], ReversiRLEnvironment.BLACK) + const info = env.test(orgstate, ['f5'], agent) expect(info.invalid).toBeFalsy() const state = info.state @@ -169,9 +241,9 @@ describe('env', () => { for (let i = 0, p = 1; i < 8; i++) { for (let j = 0; j < 8; j++, p++) { expect(state[p]).toBe( - (i === 3 && j === 3) || (i === 4 && j === 4) || (i === 3 && j === 4) || (i === 3 && j === 5) + (i === 3 && j === 4) || (i === 4 && j === 3) || (i === 4 && j === 4) || (i === 4 && j === 5) ? ReversiRLEnvironment.OWN - : i === 4 && j === 3 + : i === 3 && j === 3 ? ReversiRLEnvironment.OTHER : ReversiRLEnvironment.EMPTY ) @@ -189,10 +261,10 @@ describe('board', () => { const board = env._board expect(board.size).toEqual([8, 8]) - expect(board.at([3, 3])).toBe(ReversiRLEnvironment.BLACK) - expect(board.at([4, 4])).toBe(ReversiRLEnvironment.BLACK) - expect(board.at([3, 4])).toBe(ReversiRLEnvironment.WHITE) - expect(board.at([4, 3])).toBe(ReversiRLEnvironment.WHITE) + expect(board.at([3, 3])).toBe(ReversiRLEnvironment.WHITE) + expect(board.at([4, 4])).toBe(ReversiRLEnvironment.WHITE) + expect(board.at([3, 4])).toBe(ReversiRLEnvironment.BLACK) + expect(board.at([4, 3])).toBe(ReversiRLEnvironment.BLACK) expect(board.finish).toBeFalsy() expect(board.count.black).toBe(2) expect(board.count.white).toBe(2) @@ -201,49 +273,131 @@ describe('board', () => { expect(board.score(ReversiRLEnvironment.WHITE)).toBe(0) }) - test('choices', () => { - const env = new ReversiRLEnvironment() - const board = env._board + describe('winner', () => { + test('random', () => { + const env = new ReversiRLEnvironment() + const board = env._board + let turn = ReversiRLEnvironment.BLACK - const choiceBlack = board.choices(ReversiRLEnvironment.BLACK) - expect(choiceBlack).toEqual([ - [2, 4], - [3, 5], - [4, 2], - [5, 3], - ]) - const choiceWhite = board.choices(ReversiRLEnvironment.WHITE) - expect(choiceWhite).toEqual([ - [2, 3], - [3, 2], - [4, 5], - [5, 4], - ]) - }) + while (!board.finish) { + const choices = board.choices(turn) + if (choices.length === 0) { + turn = board.nextTurn(turn) + continue + } - describe('set', () => { - test('success', () => { + board.set(choices[0], turn) + turn = board.nextTurn(turn) + } + + expect(board.winner).not.toBeNull() + }) + + test('black', () => { const env = new ReversiRLEnvironment() const board = env._board - expect(board.at([2, 4])).toBe(ReversiRLEnvironment.EMPTY) - expect(board.at([3, 4])).toBe(ReversiRLEnvironment.WHITE) + const ps = ['f5', 'd6', 'c5', 'f4', 'e3', 'f6', 'g5', 'e6', 'e7'] + for (let i = 0; i < ps.length; i++) { + board.set(ps[i], i % 2 === 0 ? ReversiRLEnvironment.BLACK : ReversiRLEnvironment.WHITE) + } + expect(board.winner).toBe(ReversiRLEnvironment.BLACK) + }) + + test('white', () => { + const env = new ReversiRLEnvironment() + const board = env._board - const success = board.set([2, 4], ReversiRLEnvironment.BLACK) - expect(success).toBeTruthy() - expect(board.at([2, 4])).toBe(ReversiRLEnvironment.BLACK) - expect(board.at([3, 4])).toBe(ReversiRLEnvironment.BLACK) + const ps = ['f5', 'f6', 'c4', 'f4', 'e6', 'b4', 'g6', 'f7', 'e8', 'g8', 'g5', 'h5'] + for (let i = 0; i < ps.length; i++) { + board.set(ps[i], i % 2 === 0 ? ReversiRLEnvironment.BLACK : ReversiRLEnvironment.WHITE) + } + expect(board.winner).toBe(ReversiRLEnvironment.WHITE) }) - test('fail', () => { + test('draw', () => { const env = new ReversiRLEnvironment() const board = env._board - const success = board.set([2, 4], ReversiRLEnvironment.WHITE) - expect(success).toBeFalsy() + board.set('f5', ReversiRLEnvironment.BLACK) + board.set('d6', ReversiRLEnvironment.WHITE) + board.set('c7', ReversiRLEnvironment.BLACK) + board.set('f3', ReversiRLEnvironment.WHITE) + board.set('e3', ReversiRLEnvironment.BLACK) + board.set('d3', ReversiRLEnvironment.WHITE) + board.set('g2', ReversiRLEnvironment.BLACK) + board.set('f4', ReversiRLEnvironment.WHITE) + board.set('c6', ReversiRLEnvironment.BLACK) + board.set('d7', ReversiRLEnvironment.WHITE) + board.set('g4', ReversiRLEnvironment.BLACK) + board.set('b7', ReversiRLEnvironment.WHITE) + board.set('a8', ReversiRLEnvironment.BLACK) + board.set('g3', ReversiRLEnvironment.WHITE) + board.set('c8', ReversiRLEnvironment.BLACK) + board.set('h1', ReversiRLEnvironment.WHITE) + board.set('c4', ReversiRLEnvironment.BLACK) + board.set('b8', ReversiRLEnvironment.WHITE) + board.set('f2', ReversiRLEnvironment.BLACK) + board.set('e1', ReversiRLEnvironment.WHITE) + board.set('f1', ReversiRLEnvironment.BLACK) + board.set('d8', ReversiRLEnvironment.WHITE) + board.set('e8', ReversiRLEnvironment.BLACK) + board.set('a7', ReversiRLEnvironment.WHITE) + board.set('a6', ReversiRLEnvironment.BLACK) + board.set('b6', ReversiRLEnvironment.WHITE) + board.set('a5', ReversiRLEnvironment.BLACK) + board.set('g1', ReversiRLEnvironment.WHITE) + board.set('b5', ReversiRLEnvironment.BLACK) + board.set('e2', ReversiRLEnvironment.WHITE) + board.set('h2', ReversiRLEnvironment.BLACK) + board.set('c3', ReversiRLEnvironment.WHITE) + board.set('e6', ReversiRLEnvironment.BLACK) + board.set('c5', ReversiRLEnvironment.WHITE) + board.set('b4', ReversiRLEnvironment.BLACK) + board.set('e7', ReversiRLEnvironment.WHITE) + board.set('b3', ReversiRLEnvironment.BLACK) + board.set('d2', ReversiRLEnvironment.WHITE) + board.set('c1', ReversiRLEnvironment.BLACK) + board.set('d1', ReversiRLEnvironment.WHITE) + board.set('f8', ReversiRLEnvironment.BLACK) + board.set('b1', ReversiRLEnvironment.WHITE) + board.set('f7', ReversiRLEnvironment.BLACK) + board.set('g6', ReversiRLEnvironment.WHITE) + board.set('f6', ReversiRLEnvironment.BLACK) + board.set('h3', ReversiRLEnvironment.WHITE) + board.set('h6', ReversiRLEnvironment.BLACK) + board.set('a3', ReversiRLEnvironment.WHITE) + board.set('c2', ReversiRLEnvironment.BLACK) + board.set('h7', ReversiRLEnvironment.WHITE) + board.set('a2', ReversiRLEnvironment.BLACK) + board.set('a4', ReversiRLEnvironment.WHITE) + board.set('h8', ReversiRLEnvironment.BLACK) + board.set('g7', ReversiRLEnvironment.WHITE) + board.set('h5', ReversiRLEnvironment.BLACK) + board.set('a1', ReversiRLEnvironment.WHITE) + board.set('g8', ReversiRLEnvironment.BLACK) + board.set('b2', ReversiRLEnvironment.WHITE) + board.set('g5', ReversiRLEnvironment.BLACK) + board.set('h4', ReversiRLEnvironment.WHITE) + expect(board.winner).toBeNull() }) }) + test('toString', () => { + const env = new ReversiRLEnvironment() + const board = env._board + + expect(board.toString()).toBe(`- - - - - - - - +- - - - - - - - +- - - - - - - - +- - - o x - - - +- - - x o - - - +- - - - - - - - +- - - - - - - - +- - - - - - - - +`) + }) + test('nextTurn', () => { const env = new ReversiRLEnvironment() const board = env._board @@ -265,22 +419,61 @@ describe('board', () => { } }) - test('winner', () => { + test.each([[3, 3], 'd4'])('at %p', p => { const env = new ReversiRLEnvironment() const board = env._board - let turn = ReversiRLEnvironment.BLACK - while (!board.finish) { - const choices = board.choices(turn) - if (choices.length === 0) { - turn = board.nextTurn(turn) - continue - } + expect(board.at(p)).toBe(ReversiRLEnvironment.WHITE) + }) - board.set(choices[0], turn) - turn = board.nextTurn(turn) - } + describe('set', () => { + test.each([[2, 3], 'd3'])('success %p', p => { + const env = new ReversiRLEnvironment() + const board = env._board + + expect(board.at([2, 3])).toBe(ReversiRLEnvironment.EMPTY) + expect(board.at([3, 3])).toBe(ReversiRLEnvironment.WHITE) + + const success = board.set(p, ReversiRLEnvironment.BLACK) + expect(success).toBeTruthy() + expect(board.at([2, 3])).toBe(ReversiRLEnvironment.BLACK) + expect(board.at([3, 3])).toBe(ReversiRLEnvironment.BLACK) + }) + + test('fail', () => { + const env = new ReversiRLEnvironment() + const board = env._board - expect(board.winner).not.toBeNull() + const success = board.set([2, 3], ReversiRLEnvironment.WHITE) + expect(success).toBeFalsy() + }) + + test('out of bounds', () => { + const env = new ReversiRLEnvironment() + const board = env._board + + const success = board.set([-1, -1], ReversiRLEnvironment.WHITE) + expect(success).toBeFalsy() + }) + }) + + test('choices', () => { + const env = new ReversiRLEnvironment() + const board = env._board + + const choiceBlack = board.choices(ReversiRLEnvironment.BLACK) + expect(choiceBlack).toEqual([ + [2, 3], + [3, 2], + [4, 5], + [5, 4], + ]) + const choiceWhite = board.choices(ReversiRLEnvironment.WHITE) + expect(choiceWhite).toEqual([ + [2, 4], + [3, 5], + [4, 2], + [5, 3], + ]) }) }) From af11b3b535dcbb1caeeeb48548289b98951b8f4f Mon Sep 17 00:00:00 2001 From: ishii-norimi Date: Wed, 27 Sep 2023 22:41:34 +0900 Subject: [PATCH 2/3] Forgot to commit and format --- lib/rl/acrobot.js | 23 ++++------------------- tests/lib/rl/inhypercube.test.js | 2 +- 2 files changed, 5 insertions(+), 20 deletions(-) diff --git a/lib/rl/acrobot.js b/lib/rl/acrobot.js index 3780ab35c..f6bb0b75e 100644 --- a/lib/rl/acrobot.js +++ b/lib/rl/acrobot.js @@ -46,21 +46,6 @@ export default class AcrobotRLEnvironment extends RLEnvironmentBase { ] } - set reward(value) { - this._reward = { - goal: 0, - step: -1, - fail: 0, - } - if (value === 'achieve') { - this._reward = { - goal: 0, - step: -1, - fail: 0, - } - } - } - reset() { super.reset() this._theta1 = Math.random() * 0.2 - 0.1 @@ -111,11 +96,11 @@ export default class AcrobotRLEnvironment extends RLEnvironmentBase { const clip = (x, min, max) => (x < min ? min : x > max ? max : x) t1 += this._dt * dt1 - if (t1 < -Math.PI) t1 = t1 + 2 * Math.PI - if (t1 > Math.PI) t1 = t1 - 2 * Math.PI + while (t1 < -Math.PI) t1 += 2 * Math.PI + while (t1 > Math.PI) t1 -= 2 * Math.PI t2 += this._dt * dt2 - if (t2 < -Math.PI) t2 = t2 + 2 * Math.PI - if (t2 > Math.PI) t2 = t2 - 2 * Math.PI + while (t2 < -Math.PI) t2 += 2 * Math.PI + while (t2 > Math.PI) t2 -= 2 * Math.PI dt1 = clip(dt1 + this._dt * ddt1, -this._max_vel1, this._max_vel1) dt2 = clip(dt2 + this._dt * ddt2, -this._max_vel2, this._max_vel2) diff --git a/tests/lib/rl/inhypercube.test.js b/tests/lib/rl/inhypercube.test.js index 8831cbe7f..c7e308c85 100644 --- a/tests/lib/rl/inhypercube.test.js +++ b/tests/lib/rl/inhypercube.test.js @@ -18,7 +18,7 @@ describe('actions', () => { }) }) -test.each([1, 2, 3])('states %dd', (d) => { +test.each([1, 2, 3])('states %dd', d => { const env = new InHypercubeRLEnvironment(d) expect(env.states).toHaveLength(d * 2) }) From cfd641a9f1bc40f9fba0259957f89011bf600ddc Mon Sep 17 00:00:00 2001 From: ishii-norimi Date: Thu, 28 Sep 2023 21:20:11 +0900 Subject: [PATCH 3/3] Add tests --- tests/lib/rl/gomoku.test.js | 90 ++++++++++++++++++++++++++++--------- 1 file changed, 68 insertions(+), 22 deletions(-) diff --git a/tests/lib/rl/gomoku.test.js b/tests/lib/rl/gomoku.test.js index 60b2db198..9e0c84b5c 100644 --- a/tests/lib/rl/gomoku.test.js +++ b/tests/lib/rl/gomoku.test.js @@ -118,6 +118,32 @@ describe('env', () => { expect(env.epoch).toBe(1) }) + test('win black', () => { + const env = new GomokuRLEnvironment() + env.reset() + + env.step(['0_0'], GomokuRLEnvironment.BLACK) + env.step(['1_0'], GomokuRLEnvironment.WHITE) + env.step(['0_1'], GomokuRLEnvironment.BLACK) + env.step(['1_1'], GomokuRLEnvironment.WHITE) + env.step(['0_2'], GomokuRLEnvironment.BLACK) + env.step(['1_2'], GomokuRLEnvironment.WHITE) + env.step(['0_3'], GomokuRLEnvironment.BLACK) + env.step(['1_3'], GomokuRLEnvironment.WHITE) + + const info = env.step(['0_4'], GomokuRLEnvironment.BLACK) + expect(info.invalid).toBeFalsy() + expect(info.done).toBeTruthy() + expect(info.reward).toBe(1) + expect(env.epoch).toBe(9) + + const info2 = env.step(['1_5'], GomokuRLEnvironment.WHITE) + expect(info2.invalid).toBeFalsy() + expect(info2.done).toBeTruthy() + expect(info2.reward).toBe(-1) + expect(env.epoch).toBe(10) + }) + test('invalid position', () => { const env = new GomokuRLEnvironment() env.reset() @@ -261,37 +287,23 @@ describe('board', () => { } }) - describe('choices', () => { - test('all', () => { + describe('score', () => { + test('win', () => { const env = new GomokuRLEnvironment() const board = env._board - const c = [] - for (let i = 0; i < board.size[0]; i++) { - for (let j = 0; j < board.size[1]; j++) { - c.push([i, j]) - } - } - - const choiceBlack = board.choices(GomokuRLEnvironment.BLACK) - expect(choiceBlack).toEqual(c) - const choiceWhite = board.choices(GomokuRLEnvironment.WHITE) - expect(choiceWhite).toEqual(c) - }) - - test('finish', () => { - const env = new GomokuRLEnvironment() - const board = env._board board.set([0, 0], GomokuRLEnvironment.BLACK) + board.set([1, 0], GomokuRLEnvironment.WHITE) board.set([0, 1], GomokuRLEnvironment.BLACK) + board.set([1, 1], GomokuRLEnvironment.WHITE) board.set([0, 2], GomokuRLEnvironment.BLACK) + board.set([1, 2], GomokuRLEnvironment.WHITE) board.set([0, 3], GomokuRLEnvironment.BLACK) + board.set([1, 3], GomokuRLEnvironment.WHITE) board.set([0, 4], GomokuRLEnvironment.BLACK) - const choiceBlack = board.choices(GomokuRLEnvironment.BLACK) - expect(choiceBlack).toHaveLength(0) - const choiceWhite = board.choices(GomokuRLEnvironment.WHITE) - expect(choiceWhite).toHaveLength(0) + expect(board.score(GomokuRLEnvironment.BLACK)).toBe(6391) + expect(board.score(GomokuRLEnvironment.WHITE)).toBe(-6391) }) }) @@ -317,6 +329,40 @@ describe('board', () => { }) }) + describe('choices', () => { + test('all', () => { + const env = new GomokuRLEnvironment() + const board = env._board + + const c = [] + for (let i = 0; i < board.size[0]; i++) { + for (let j = 0; j < board.size[1]; j++) { + c.push([i, j]) + } + } + + const choiceBlack = board.choices(GomokuRLEnvironment.BLACK) + expect(choiceBlack).toEqual(c) + const choiceWhite = board.choices(GomokuRLEnvironment.WHITE) + expect(choiceWhite).toEqual(c) + }) + + test('finish', () => { + const env = new GomokuRLEnvironment() + const board = env._board + board.set([0, 0], GomokuRLEnvironment.BLACK) + board.set([0, 1], GomokuRLEnvironment.BLACK) + board.set([0, 2], GomokuRLEnvironment.BLACK) + board.set([0, 3], GomokuRLEnvironment.BLACK) + board.set([0, 4], GomokuRLEnvironment.BLACK) + + const choiceBlack = board.choices(GomokuRLEnvironment.BLACK) + expect(choiceBlack).toHaveLength(0) + const choiceWhite = board.choices(GomokuRLEnvironment.WHITE) + expect(choiceWhite).toHaveLength(0) + }) + }) + describe('row', () => { test('empty', () => { const env = new GomokuRLEnvironment()