From b5ba3e1ad5172e8b4a51af5d732e89121b12c535 Mon Sep 17 00:00:00 2001
From: ishii-norimi <mirasunimoni@yahoo.co.jp>
Date: Wed, 27 Sep 2023 21:01:07 +0900
Subject: [PATCH 1/3] Fix and improve RL environments, and add tests

---
 js/renderer/rl/draughts.js       |   2 +-
 lib/rl/draughts.js               | 108 +++++--
 lib/rl/gomoku.js                 |  20 ++
 lib/rl/inhypercube.js            |   2 +-
 lib/rl/reversi.js                |  39 ++-
 tests/lib/rl/acrobot.test.js     |  35 +++
 tests/lib/rl/base.test.js        |   7 +
 tests/lib/rl/blackjack.test.js   |  12 +
 tests/lib/rl/breaker.test.js     | 192 ++++++++++++-
 tests/lib/rl/draughts.test.js    | 470 +++++++++++++++++++++++--------
 tests/lib/rl/gomoku.test.js      | 167 +++++++----
 tests/lib/rl/grid.test.js        |  54 +++-
 tests/lib/rl/inhypercube.test.js |  79 ++++++
 tests/lib/rl/mountaincar.test.js |  36 +++
 tests/lib/rl/pendulum.test.js    |  41 +++
 tests/lib/rl/reversi.test.js     | 321 ++++++++++++++++-----
 16 files changed, 1304 insertions(+), 281 deletions(-)
 create mode 100644 tests/lib/rl/inhypercube.test.js

diff --git a/js/renderer/rl/draughts.js b/js/renderer/rl/draughts.js
index 04f4abb0f..b216c0f9f 100644
--- a/js/renderer/rl/draughts.js
+++ b/js/renderer/rl/draughts.js
@@ -160,7 +160,7 @@ class ManualPlayer {
 		for (let i = 0; i < board.size[0]; i++) {
 			this._check[i] = []
 			for (let j = 0; j < board.size[1]; j++) {
-				if ((i + j) % 2 > 0) continue
+				if ((i + j) % 2 === 0) continue
 				this._check[i][j] = document.createElementNS('http://www.w3.org/2000/svg', 'rect')
 				this._check[i][j].setAttribute('x', dw * j)
 				this._check[i][j].setAttribute('y', dh * i)
diff --git a/lib/rl/draughts.js b/lib/rl/draughts.js
index 5d9a5dc9e..0ebb56361 100644
--- a/lib/rl/draughts.js
+++ b/lib/rl/draughts.js
@@ -47,7 +47,7 @@ export default class DraughtsRLEnvironment extends RLEnvironmentBase {
 		]
 		const checkBound = (x, y) => 0 <= x && x < this._size[0] && 0 <= y && y < this._size[1]
 		for (let i = 0; i < this._size[0]; i++) {
-			for (let j = i % 2 === 0 ? 0 : 1; j < this._size[1]; j += 2) {
+			for (let j = i % 2 === 1 ? 0 : 1; j < this._size[1]; j += 2) {
 				let midpath = []
 				for (const [di, dj] of d) {
 					const i1 = i + di
@@ -93,18 +93,14 @@ export default class DraughtsRLEnvironment extends RLEnvironmentBase {
 	get states() {
 		const s = [[RED, WHITE]]
 		for (let i = 0; i < this._size[0]; i++) {
-			for (let j = 0; j < this._size[1]; j++) {
-				if (j % 2 === i % 2) {
-					s.push([
-						EMPTY,
-						DraughtsRLEnvironment.OWN,
-						DraughtsRLEnvironment.OWN | KING,
-						DraughtsRLEnvironment.OTHER,
-						DraughtsRLEnvironment.OTHER | KING,
-					])
-				} else {
-					s.push([EMPTY])
-				}
+			for (let j = i % 2 === 0 ? 1 : 0; j < this._size[1]; j += 2) {
+				s.push([
+					EMPTY,
+					DraughtsRLEnvironment.OWN,
+					DraughtsRLEnvironment.OWN | KING,
+					DraughtsRLEnvironment.OTHER,
+					DraughtsRLEnvironment.OTHER | KING,
+				])
 			}
 		}
 		return s
@@ -123,7 +119,7 @@ export default class DraughtsRLEnvironment extends RLEnvironmentBase {
 	_makeState(board, agentturn, gameturn) {
 		const s = [gameturn]
 		for (let i = 0; i < this._size[0]; i++) {
-			for (let j = 0; j < this._size[1]; j++) {
+			for (let j = i % 2 === 0 ? 1 : 0; j < this._size[1]; j += 2) {
 				const p = board.at([i, j])
 				if (p === EMPTY) {
 					s.push(EMPTY)
@@ -144,7 +140,7 @@ export default class DraughtsRLEnvironment extends RLEnvironmentBase {
 		const board = new DraughtsBoard(this._size, this._evaluation)
 		const opturn = turn === RED ? WHITE : RED
 		for (let i = 0, p = 1; i < this._size[0]; i++) {
-			for (let j = 0; j < this._size[1]; j++, p++) {
+			for (let j = i % 2 === 0 ? 1 : 0; j < this._size[1]; j += 2, p++) {
 				if (state[p] === EMPTY) {
 					board._board[i][j] = EMPTY
 				} else {
@@ -241,6 +237,7 @@ class DraughtsBoard {
 	constructor(size, evaluator) {
 		this._evaluator = evaluator
 		this._size = size
+		this._lines = 3
 
 		this.reset()
 	}
@@ -280,6 +277,26 @@ class DraughtsBoard {
 		return null
 	}
 
+	toString() {
+		let buf = ''
+		for (let i = 0; i < this._size[0]; i++) {
+			for (let j = 0; j < this._size[1]; j++) {
+				if (j > 0) {
+					buf += ' '
+				}
+				if (this._board[i][j] === RED) {
+					buf += 'x'
+				} else if (this._board[i][j] === WHITE) {
+					buf += 'o'
+				} else {
+					buf += '-'
+				}
+			}
+			buf += '\n'
+		}
+		return buf
+	}
+
 	nextTurn(turn) {
 		if (turn === WHITE) {
 			return RED
@@ -310,20 +327,44 @@ class DraughtsBoard {
 		}
 	}
 
+	_num_to_pos(n) {
+		if (typeof n !== 'number') {
+			return n
+		}
+		const r = Math.floor((n - 1) / this._size[1])
+		const c = (n - 1) % this._size[1]
+		if (c < (this._size[1] - 1) / 2) {
+			return [r * 2, c * 2 + 1]
+		} else {
+			return [r * 2 + 1, (c - Math.floor(this._size[1] / 2)) * 2]
+		}
+	}
+
 	at(p) {
+		if (typeof p === 'number') {
+			p = this._num_to_pos(p)
+		}
 		return this._board[p[0]][p[1]]
 	}
 
 	set(p, turn) {
-		let piece = this._board[p.from[0]][p.from[1]]
+		p = {
+			from: this._num_to_pos(p.from),
+			path: p.path.map(v => this._num_to_pos(v)),
+			jump: p.jump.map(v => this._num_to_pos(v)),
+		}
+		let piece = this.at(p.from)
 		if (!(turn & piece)) {
 			return false
 		}
+		if ((p.jump.length !== 0 || p.path.length !== 1) && p.jump.length !== p.path.length) {
+			return false
+		}
 		const nturn = this.nextTurn(turn)
-		if (p.jump.some(([i, j]) => !(this._board[i][j] & nturn))) {
+		if (p.jump.some(j => !(this.at(j) & nturn))) {
 			return false
 		}
-		if (p.path.some(([i, j]) => this._board[i][j] !== EMPTY)) {
+		if (p.path.some(j => this.at(j) !== EMPTY)) {
 			return false
 		}
 
@@ -334,6 +375,27 @@ class DraughtsBoard {
 			}
 		}
 
+		if (p.jump.length === 0) {
+			for (let i = 0; i < 2; i++) {
+				if (Math.abs(p.from[i] - p.path[0][i]) !== 1) {
+					return false
+				}
+			}
+		} else {
+			let pos = p.from
+			for (let k = 0; k < p.path.length; k++) {
+				for (let i = 0; i < 2; i++) {
+					if (Math.abs(pos[i] - p.jump[k][i]) !== 1) {
+						return false
+					}
+					if (Math.abs(p.jump[k][i] - p.path[k][i]) !== 1) {
+						return false
+					}
+				}
+				pos = p.path[k]
+			}
+		}
+
 		this._board[p.from[0]][p.from[1]] = EMPTY
 		for (const [i, j] of p.jump) {
 			this._board[i][j] = EMPTY
@@ -354,10 +416,10 @@ class DraughtsBoard {
 			this._board[i] = Array(this._size[1]).fill(EMPTY)
 		}
 		for (let i = 0; i < this._size[0]; i++) {
-			for (let j = 0; j < this._size[1]; j++) {
-				if (i < 3 && (i + j) % 2 === 0) {
+			for (let j = i % 2 === 0 ? 1 : 0; j < this._size[1]; j += 2) {
+				if (i < this._lines) {
 					this._board[i][j] = RED
-				} else if (this._size[0] - 3 <= i && (i + j) % 2 === 0) {
+				} else if (this._size[0] - this._lines <= i) {
 					this._board[i][j] = WHITE
 				}
 			}
@@ -418,9 +480,9 @@ class DraughtsBoard {
 				cp._board[x + dx * 2][y + dy * 2] = this._board[x][y]
 				cp._board[x][y] = EMPTY
 				cp._board[x + dx][y + dy] = EMPTY
-				if (turn === RED && x * dx * 2 === this._size[0] - 1) {
+				if (turn === RED && x + dx * 2 === this._size[0] - 1) {
 					cp._board[x + dx * 2][y + dy * 2] |= KING
-				} else if (turn === WHITE && x * dx * 2 === 0) {
+				} else if (turn === WHITE && x + dx * 2 === 0) {
 					cp._board[x + dx * 2][y + dy * 2] |= KING
 				}
 				const npath = cp.allPath(x + dx * 2, y + dy * 2, turn, false)
diff --git a/lib/rl/gomoku.js b/lib/rl/gomoku.js
index 570663c9e..cf6350b3e 100644
--- a/lib/rl/gomoku.js
+++ b/lib/rl/gomoku.js
@@ -190,6 +190,26 @@ class GomokuBoard {
 		return null
 	}
 
+	toString() {
+		let buf = ''
+		for (let i = 0; i < this._size[0]; i++) {
+			for (let j = 0; j < this._size[1]; j++) {
+				if (j > 0) {
+					buf += ' '
+				}
+				if (this._board[i][j] === BLACK) {
+					buf += 'x'
+				} else if (this._board[i][j] === WHITE) {
+					buf += 'o'
+				} else {
+					buf += '-'
+				}
+			}
+			buf += '\n'
+		}
+		return buf
+	}
+
 	nextTurn(turn) {
 		return turn === BLACK ? WHITE : BLACK
 	}
diff --git a/lib/rl/inhypercube.js b/lib/rl/inhypercube.js
index 1cb9c726d..9875e356f 100644
--- a/lib/rl/inhypercube.js
+++ b/lib/rl/inhypercube.js
@@ -68,7 +68,7 @@ export default class InHypercubeRLEnvironment extends RLEnvironmentBase {
 		}
 
 		const success = p[this._success_dim] <= -this._fail_position
-		const fail = !success && p.every(v => Math.abs(v) >= this._fail_position)
+		const fail = !success && p.some(v => Math.abs(v) >= this._fail_position)
 		const done = this.epoch >= this._max_step || success || fail
 		const reward = fail ? this._reward.fail : success ? this._reward.goal : this._reward.step
 		return {
diff --git a/lib/rl/reversi.js b/lib/rl/reversi.js
index 2606dd0e8..700b95ae7 100644
--- a/lib/rl/reversi.js
+++ b/lib/rl/reversi.js
@@ -46,7 +46,7 @@ export default class ReversiRLEnvironment extends RLEnvironmentBase {
 		const a = [EMPTY]
 		for (let i = 0; i < this._size[0]; i++) {
 			for (let j = 0; j < this._size[1]; j++) {
-				a.push(`${i}_${j}`)
+				a.push(`${String.fromCharCode('a'.charCodeAt(0) + i)}${i + 1}`)
 			}
 		}
 		return [a]
@@ -167,8 +167,7 @@ export default class ReversiRLEnvironment extends RLEnvironmentBase {
 				invalid,
 			}
 		}
-		const choice = action[0].split('_').map(v => +v)
-		const changed = board.set(choice, agent)
+		const changed = board.set(action[0], agent)
 		const done = board.finish
 		if (!changed) {
 			return {
@@ -233,6 +232,26 @@ class ReversiBoard {
 		return null
 	}
 
+	toString() {
+		let buf = ''
+		for (let i = 0; i < this._size[0]; i++) {
+			for (let j = 0; j < this._size[1]; j++) {
+				if (j > 0) {
+					buf += ' '
+				}
+				if (this._board[i][j] === BLACK) {
+					buf += 'x'
+				} else if (this._board[i][j] === WHITE) {
+					buf += 'o'
+				} else {
+					buf += '-'
+				}
+			}
+			buf += '\n'
+		}
+		return buf
+	}
+
 	nextTurn(turn) {
 		return flipPiece(turn)
 	}
@@ -260,10 +279,16 @@ class ReversiBoard {
 	}
 
 	at(p) {
+		if (typeof p === 'string') {
+			p = [p[1] - 1, p.charCodeAt(0) - 'a'.charCodeAt(0)]
+		}
 		return this._board[p[0]][p[1]]
 	}
 
 	set(p, turn) {
+		if (typeof p === 'string') {
+			p = [p[1] - 1, p.charCodeAt(0) - 'a'.charCodeAt(0)]
+		}
 		const flips = this.flipPositions(p[0], p[1], turn)
 		if (flips.length === 0) {
 			return false
@@ -282,10 +307,10 @@ class ReversiBoard {
 		}
 		const cx = Math.floor(this._size[0] / 2)
 		const cy = Math.floor(this._size[1] / 2)
-		this._board[cx - 1][cy - 1] = BLACK
-		this._board[cx - 1][cy] = WHITE
-		this._board[cx][cy - 1] = WHITE
-		this._board[cx][cy] = BLACK
+		this._board[cx - 1][cy - 1] = WHITE
+		this._board[cx - 1][cy] = BLACK
+		this._board[cx][cy - 1] = BLACK
+		this._board[cx][cy] = WHITE
 	}
 
 	choices(turn) {
diff --git a/tests/lib/rl/acrobot.test.js b/tests/lib/rl/acrobot.test.js
index 7ff59ddef..6c1f0ee99 100644
--- a/tests/lib/rl/acrobot.test.js
+++ b/tests/lib/rl/acrobot.test.js
@@ -80,6 +80,41 @@ describe('test', () => {
 		expect(info.state[3]).toBeGreaterThan(0)
 	})
 
+	test('small t1, t2', () => {
+		const env = new AcrobotRLEnvironment()
+		const info = env.test([-4, -13, 0, 0], [0])
+		expect(info.done).toBeFalsy()
+		expect(info.reward).toBe(-1)
+		expect(info.state[0]).toBeCloseTo(-4 + 2 * Math.PI)
+		expect(info.state[1]).toBeCloseTo(-13 + 4 * Math.PI)
+		expect(info.state[2]).toBeLessThan(0)
+		expect(info.state[3]).toBeGreaterThan(0)
+	})
+
+	test('big t1, t2', () => {
+		const env = new AcrobotRLEnvironment()
+		const info = env.test([26, 4, 0, 0], [0])
+		expect(info.done).toBeFalsy()
+		expect(info.reward).toBe(-1)
+		expect(info.state[0]).toBeCloseTo(26 - 8 * Math.PI)
+		expect(info.state[1]).toBeCloseTo(4 - 2 * Math.PI)
+		expect(info.state[2]).toBeLessThan(0)
+		expect(info.state[3]).toBeGreaterThan(0)
+	})
+
+	test('clip dt1, dt2', () => {
+		const env = new AcrobotRLEnvironment()
+		const info = env.test([0, 0, -100, 100], [0])
+		expect(info.done).toBeFalsy()
+		expect(info.reward).toBe(-1)
+		for (let i = 0; i < 2; i++) {
+			expect(info.state[i]).toBeLessThanOrEqual(Math.PI)
+			expect(info.state[i]).toBeGreaterThanOrEqual(-Math.PI)
+		}
+		expect(info.state[2]).toBeCloseTo(-4 * Math.PI)
+		expect(info.state[3]).toBeCloseTo(9 * Math.PI)
+	})
+
 	test('goal', () => {
 		const env = new AcrobotRLEnvironment()
 		const info = env.test([Math.PI, Math.PI / 2, 0, 0], [0])
diff --git a/tests/lib/rl/base.test.js b/tests/lib/rl/base.test.js
index 28609e8f2..12ae99020 100644
--- a/tests/lib/rl/base.test.js
+++ b/tests/lib/rl/base.test.js
@@ -99,6 +99,13 @@ describe('EmptyRLEnvironment', () => {
 		expect(env.states).toEqual([])
 	})
 
+	test('clone', () => {
+		const env = new EmptyRLEnvironment()
+		const clone = env.clone()
+		expect(clone.actions).toEqual([])
+		expect(clone.states).toEqual([])
+	})
+
 	test('reset', () => {
 		const env = new EmptyRLEnvironment()
 		const init_state = env.reset()
diff --git a/tests/lib/rl/blackjack.test.js b/tests/lib/rl/blackjack.test.js
index dd932c175..f05d47577 100644
--- a/tests/lib/rl/blackjack.test.js
+++ b/tests/lib/rl/blackjack.test.js
@@ -79,4 +79,16 @@ describe('step', () => {
 		expect(info.reward).toBeLessThan(0)
 		expect(info.state).toBeInstanceOf(Array)
 	})
+
+	test('usableace', () => {
+		const env = new BlackjackRLEnvironment()
+		env._player_hands = [
+			{ suit: 0, value: 1 },
+			{ suit: 0, value: 2 },
+		]
+		const info = env.step([0])
+		expect(info.done).toBeTruthy()
+		expect(info.reward).toBeDefined()
+		expect(info.state).toBeInstanceOf(Array)
+	})
 })
diff --git a/tests/lib/rl/breaker.test.js b/tests/lib/rl/breaker.test.js
index 163420b9f..62a8bcc1a 100644
--- a/tests/lib/rl/breaker.test.js
+++ b/tests/lib/rl/breaker.test.js
@@ -57,4 +57,194 @@ test('step', () => {
 	expect(info.state).toHaveLength(85)
 })
 
-test.todo('test')
+describe('test', () => {
+	test('default', () => {
+		const env = new BreakerRLEnvironment()
+		const state = env.reset()
+
+		const info = env.test(state, [0])
+		expect(info.done).toBeFalsy()
+		expect(info.reward).toBe(0.1)
+		expect(info.state).toHaveLength(85)
+		expect(env.epoch).toBe(0)
+	})
+
+	test.each([0, 1000])('bar position: %p', p => {
+		const env = new BreakerRLEnvironment()
+		const state = env.reset()
+		state[4] = p
+
+		const info = env.test(state, [0])
+		expect(info.done).toBeFalsy()
+		expect(info.reward).toBe(0.1)
+		expect(info.state).toHaveLength(85)
+		expect(env.epoch).toBe(0)
+	})
+
+	test('hit paddle top', () => {
+		const env = new BreakerRLEnvironment()
+		const state = env.reset()
+		state[0] = 100
+		state[1] = env._paddle_baseline + 1
+		state[2] = 1
+		state[3] = -1
+		state[4] = 100
+
+		const info = env.test(state, [0])
+		expect(info.done).toBeFalsy()
+		expect(info.reward).toBe(100)
+		expect(info.state).toHaveLength(85)
+		expect(env.epoch).toBe(0)
+	})
+
+	test('hit paddle side', () => {
+		const env = new BreakerRLEnvironment()
+		const state = env.reset()
+		state[0] = 100 - env._paddle_size[0] / 2
+		state[1] = env._paddle_baseline
+		state[2] = 1
+		state[3] = -1
+		state[4] = 100
+
+		const info = env.test(state, [0])
+		expect(info.done).toBeFalsy()
+		expect(info.reward).toBe(100)
+		expect(info.state).toHaveLength(85)
+		expect(info.state[0]).toBe(state[0] + 1)
+		expect(info.state[1]).toBe(state[1] - 1)
+		expect(info.state[2]).toBe(-1)
+		expect(info.state[3]).toBe(-1)
+		expect(info.state[4]).toBe(100)
+		expect(env.epoch).toBe(0)
+	})
+
+	test('hit side left', () => {
+		const env = new BreakerRLEnvironment()
+		const state = env.reset()
+		state[0] = 0
+		state[1] = 100
+		state[2] = -1
+		state[3] = 1
+		state[4] = 100
+
+		const info = env.test(state, [0])
+		expect(info.done).toBeFalsy()
+		expect(info.reward).toBe(0.1)
+		expect(info.state).toHaveLength(85)
+		expect(info.state[0]).toBe(state[0] - 1)
+		expect(info.state[1]).toBe(state[1] + 1)
+		expect(info.state[2]).toBe(1)
+		expect(info.state[3]).toBe(1)
+		expect(info.state[4]).toBe(100)
+		expect(env.epoch).toBe(0)
+	})
+
+	test('hit side right', () => {
+		const env = new BreakerRLEnvironment()
+		const state = env.reset()
+		state[0] = env._size[0]
+		state[1] = 100
+		state[2] = 1
+		state[3] = 1
+		state[4] = 100
+
+		const info = env.test(state, [0])
+		expect(info.done).toBeFalsy()
+		expect(info.reward).toBe(0.1)
+		expect(info.state).toHaveLength(85)
+		expect(info.state[0]).toBe(state[0] + 1)
+		expect(info.state[1]).toBe(state[1] + 1)
+		expect(info.state[2]).toBe(-1)
+		expect(info.state[3]).toBe(1)
+		expect(info.state[4]).toBe(100)
+		expect(env.epoch).toBe(0)
+	})
+
+	test('hit top', () => {
+		const env = new BreakerRLEnvironment()
+		const state = env.reset()
+		state[0] = 100
+		state[1] = env._size[1]
+		state[2] = -1
+		state[3] = 1
+		state[4] = 100
+
+		const info = env.test(state, [0])
+		expect(info.done).toBeFalsy()
+		expect(info.reward).toBe(0.1)
+		expect(info.state).toHaveLength(85)
+		expect(info.state[0]).toBe(state[0] - 1)
+		expect(info.state[1]).toBe(state[1] + 1)
+		expect(info.state[2]).toBe(-1)
+		expect(info.state[3]).toBe(-1)
+		expect(info.state[4]).toBe(100)
+		expect(env.epoch).toBe(0)
+	})
+
+	test('hit bottom', () => {
+		const env = new BreakerRLEnvironment()
+		const state = env.reset()
+		state[0] = 100
+		state[1] = 0
+		state[2] = -1
+		state[3] = 1
+		state[4] = 100
+
+		const info = env.test(state, [0])
+		expect(info.done).toBeTruthy()
+		expect(info.reward).toBe(-1000)
+		expect(info.state).toHaveLength(85)
+		expect(info.state[0]).toBe(state[0] - 1)
+		expect(info.state[1]).toBe(state[1] + 1)
+		expect(info.state[2]).toBe(-1)
+		expect(info.state[3]).toBe(1)
+		expect(info.state[4]).toBe(100)
+		expect(env.epoch).toBe(0)
+	})
+
+	test('hit block', () => {
+		const env = new BreakerRLEnvironment()
+		const state = env.reset()
+		state[0] = env._padding[0][0]
+		state[1] = env._padding[1][0]
+		state[2] = 1
+		state[3] = 1
+		state[4] = 100
+		expect(state[5]).toBe(1)
+
+		const info = env.test(state, [0])
+		expect(info.done).toBeFalsy()
+		expect(info.reward).toBe(100)
+		expect(info.state).toHaveLength(85)
+		expect(info.state[0]).toBe(state[0] + 1)
+		expect(info.state[1]).toBe(state[1] + 1)
+		expect(info.state[2]).toBe(-1)
+		expect(info.state[3]).toBe(1)
+		expect(info.state[4]).toBe(100)
+		expect(info.state[5]).toBe(0)
+		expect(env.epoch).toBe(0)
+	})
+
+	test('have breaked block', () => {
+		const env = new BreakerRLEnvironment()
+		const state = env.reset()
+		state[0] = env._padding[0][0]
+		state[1] = env._padding[1][0]
+		state[2] = 1
+		state[3] = 1
+		state[4] = 100
+
+		const info0 = env.test(state, [0])
+		const info = env.test(info0.state, [0])
+		expect(info.done).toBeFalsy()
+		expect(info.reward).toBe(0.1)
+		expect(info.state).toHaveLength(85)
+		expect(info.state[0]).toBe(state[0])
+		expect(info.state[1]).toBe(state[1] + 2)
+		expect(info.state[2]).toBe(-1)
+		expect(info.state[3]).toBe(1)
+		expect(info.state[4]).toBe(100)
+		expect(info.state[5]).toBe(0)
+		expect(env.epoch).toBe(0)
+	})
+})
diff --git a/tests/lib/rl/draughts.test.js b/tests/lib/rl/draughts.test.js
index 9d5d93f84..fe939348c 100644
--- a/tests/lib/rl/draughts.test.js
+++ b/tests/lib/rl/draughts.test.js
@@ -15,7 +15,28 @@ describe('env', () => {
 		const env = new DraughtsRLEnvironment()
 
 		expect(env.actions[0]).toHaveLength(1 + 1426)
-		expect(env.states).toHaveLength(1 + 8 * 8)
+		expect(env.states).toHaveLength(1 + 8 * 4)
+	})
+
+	describe('evaluation', () => {
+		test('set', () => {
+			const env = new DraughtsRLEnvironment()
+			env.evaluation = state => {
+				expect(state).toHaveLength(1 + 8 * 4)
+				return 1
+			}
+
+			const score = env._board.score()
+			expect(score).toBe(1)
+		})
+
+		test('clear', () => {
+			const env = new DraughtsRLEnvironment()
+			env.evaluation = null
+
+			const score = env._board.score()
+			expect(score).toBe(0)
+		})
 	})
 
 	describe('reset', () => {
@@ -23,17 +44,15 @@ describe('env', () => {
 			const env = new DraughtsRLEnvironment()
 
 			const state = env.reset()
-			expect(state).toHaveLength(1 + 8 * 8)
+			expect(state).toHaveLength(1 + 8 * 4)
 			expect(state[0]).toBe(DraughtsRLEnvironment.RED)
 			for (let i = 0, p = 1; i < 8; i++) {
-				for (let j = 0; j < 8; j++, p++) {
+				for (let j = i % 2 === 0 ? 1 : 0; j < 8; j += 2, p++) {
 					expect(state[p]).toBe(
-						i % 2 === j % 2
-							? i < 3
-								? DraughtsRLEnvironment.OWN
-								: i >= 5
-								? DraughtsRLEnvironment.OTHER
-								: DraughtsRLEnvironment.EMPTY
+						i < 3
+							? DraughtsRLEnvironment.OWN
+							: i >= 5
+							? DraughtsRLEnvironment.OTHER
 							: DraughtsRLEnvironment.EMPTY
 					)
 				}
@@ -42,31 +61,48 @@ describe('env', () => {
 	})
 
 	describe('state', () => {
-		test.each([DraughtsRLEnvironment.RED, DraughtsRLEnvironment.WHITE])('success %i', agent => {
+		test.each([undefined, DraughtsRLEnvironment.RED, DraughtsRLEnvironment.WHITE])('success %i', agent => {
 			const env = new DraughtsRLEnvironment()
 			env.reset(0, 1)
 
-			const red = agent === DraughtsRLEnvironment.RED ? DraughtsRLEnvironment.OWN : DraughtsRLEnvironment.OTHER
-			const white = agent === DraughtsRLEnvironment.RED ? DraughtsRLEnvironment.OTHER : DraughtsRLEnvironment.OWN
+			const red =
+				agent === undefined || agent === DraughtsRLEnvironment.RED
+					? DraughtsRLEnvironment.OWN
+					: DraughtsRLEnvironment.OTHER
+			const white =
+				agent === undefined || agent === DraughtsRLEnvironment.RED
+					? DraughtsRLEnvironment.OTHER
+					: DraughtsRLEnvironment.OWN
 
 			const state = env.state(agent)
-			expect(state).toHaveLength(1 + 8 * 8)
+			expect(state).toHaveLength(1 + 8 * 4)
 			expect(state[0]).toBe(DraughtsRLEnvironment.RED)
 			for (let i = 0, p = 1; i < 8; i++) {
-				for (let j = 0; j < 8; j++, p++) {
-					expect(state[p]).toBe(
-						i % 2 === j % 2
-							? i < 3
-								? red
-								: i >= 5
-								? white
-								: DraughtsRLEnvironment.EMPTY
-							: DraughtsRLEnvironment.EMPTY
-					)
+				for (let j = i % 2 === 0 ? 1 : 0; j < 8; j += 2, p++) {
+					expect(state[p]).toBe(i < 3 ? red : i >= 5 ? white : DraughtsRLEnvironment.EMPTY)
 				}
 			}
 		})
 
+		test('with king', () => {
+			const env = new DraughtsRLEnvironment()
+			env.reset()
+
+			env.step([{ from: 11, path: [16], jump: [] }], DraughtsRLEnvironment.RED)
+			env.step([{ from: 22, path: [17], jump: [] }], DraughtsRLEnvironment.WHITE)
+			env.step([{ from: 8, path: [11], jump: [] }], DraughtsRLEnvironment.RED)
+			env.step([{ from: 26, path: [22], jump: [] }], DraughtsRLEnvironment.WHITE)
+			env.step([{ from: 16, path: [20], jump: [] }], DraughtsRLEnvironment.RED)
+			env.step([{ from: 22, path: [18], jump: [] }], DraughtsRLEnvironment.WHITE)
+			env.step([{ from: 9, path: [13], jump: [] }], DraughtsRLEnvironment.RED)
+			env.step([{ from: 31, path: [26], jump: [] }], DraughtsRLEnvironment.WHITE)
+			env.step([{ from: 13, path: [22, 31], jump: [17, 26] }], DraughtsRLEnvironment.RED)
+
+			const state = env.state(DraughtsRLEnvironment.RED)
+			expect(state).toHaveLength(1 + 8 * 4)
+			expect(state[31]).toBe(DraughtsRLEnvironment.OWN | DraughtsRLEnvironment.KING)
+		})
+
 		test('failed before reset', () => {
 			const env = new DraughtsRLEnvironment()
 			expect(() => env.state(DraughtsRLEnvironment.RED)).toThrow(
@@ -82,27 +118,25 @@ describe('env', () => {
 	})
 
 	describe('step', () => {
-		test('success', () => {
+		test.each([undefined, DraughtsRLEnvironment.RED])('success agent: %p', agent => {
 			const env = new DraughtsRLEnvironment()
 			env.reset()
 
-			const info = env.step([{ from: [2, 0], path: [[3, 1]], jump: [] }], DraughtsRLEnvironment.RED)
+			const info = env.step([{ from: [2, 1], path: [[3, 2]], jump: [] }], agent)
 			expect(info.invalid).toBeFalsy()
 			expect(info.done).toBeFalsy()
 			expect(info.reward).toBe(0)
 
 			const state = info.state
-			expect(state).toHaveLength(1 + 8 * 8)
+			expect(state).toHaveLength(1 + 8 * 4)
 			expect(state[0]).toBe(DraughtsRLEnvironment.WHITE)
 			for (let i = 0, p = 1; i < 8; i++) {
-				for (let j = 0; j < 8; j++, p++) {
+				for (let j = i % 2 === 0 ? 1 : 0; j < 8; j += 2, p++) {
 					expect(state[p]).toBe(
-						i % 2 === j % 2
-							? i < 2 || (i === 2 && j !== 0) || (i === 3 && j === 1)
-								? DraughtsRLEnvironment.OWN
-								: i >= 5
-								? DraughtsRLEnvironment.OTHER
-								: DraughtsRLEnvironment.EMPTY
+						i < 2 || (i === 2 && j !== 1) || (i === 3 && j === 2)
+							? DraughtsRLEnvironment.OWN
+							: i >= 5
+							? DraughtsRLEnvironment.OTHER
 							: DraughtsRLEnvironment.EMPTY
 					)
 				}
@@ -151,14 +185,14 @@ describe('env', () => {
 			const env = new DraughtsRLEnvironment()
 			env.reset()
 
-			const info1 = env.step([{ from: [2, 0], path: [[3, 1]], jump: [] }], DraughtsRLEnvironment.RED)
+			const info1 = env.step([{ from: [2, 1], path: [[3, 2]], jump: [] }], DraughtsRLEnvironment.RED)
 			expect(info1.invalid).toBeFalsy()
 			expect(env.epoch).toBe(1)
-			const info2 = env.step([{ from: [5, 3], path: [[4, 2]], jump: [] }], DraughtsRLEnvironment.WHITE)
+			const info2 = env.step([{ from: [5, 4], path: [[4, 3]], jump: [] }], DraughtsRLEnvironment.WHITE)
 			expect(info2.invalid).toBeFalsy()
 			expect(env.epoch).toBe(2)
 
-			const info = env.step([{ from: [2, 2], path: [[3, 3]], jump: [] }], DraughtsRLEnvironment.RED)
+			const info = env.step([{ from: [2, 3], path: [[3, 4]], jump: [] }], DraughtsRLEnvironment.RED)
 			expect(info.invalid).toBeTruthy()
 			expect(info.done).toBeFalsy()
 			expect(info.reward).toBe(0)
@@ -181,25 +215,23 @@ describe('env', () => {
 	})
 
 	describe('test', () => {
-		test('step', () => {
+		test.each([undefined, DraughtsRLEnvironment.RED])('step agent: %p', agent => {
 			const env = new DraughtsRLEnvironment()
 			const orgstate = env.reset()
 
-			const info = env.test(orgstate, [{ from: [2, 0], path: [[3, 1]], jump: [] }], DraughtsRLEnvironment.RED)
+			const info = env.test(orgstate, [{ from: [2, 1], path: [[3, 2]], jump: [] }], agent)
 			expect(info.invalid).toBeFalsy()
 
 			const state = info.state
-			expect(state).toHaveLength(1 + 8 * 8)
+			expect(state).toHaveLength(1 + 8 * 4)
 			expect(state[0]).toBe(DraughtsRLEnvironment.WHITE)
 			for (let i = 0, p = 1; i < 8; i++) {
-				for (let j = 0; j < 8; j++, p++) {
+				for (let j = i % 2 === 0 ? 1 : 0; j < 8; j += 2, p++) {
 					expect(state[p]).toBe(
-						i % 2 === j % 2
-							? i < 2 || (i === 2 && j !== 0) || (i === 3 && j === 1)
-								? DraughtsRLEnvironment.OWN
-								: i >= 5
-								? DraughtsRLEnvironment.OTHER
-								: DraughtsRLEnvironment.EMPTY
+						i < 2 || (i === 2 && j !== 1) || (i === 3 && j === 2)
+							? DraughtsRLEnvironment.OWN
+							: i >= 5
+							? DraughtsRLEnvironment.OTHER
 							: DraughtsRLEnvironment.EMPTY
 					)
 				}
@@ -207,6 +239,65 @@ describe('env', () => {
 			expect(orgstate).toEqual(env.state(DraughtsRLEnvironment.RED))
 			expect(env.epoch).toBe(0)
 		})
+
+		test('win', () => {
+			const env = new DraughtsRLEnvironment()
+			env.reset()
+			const state = Array(33).fill(DraughtsRLEnvironment.EMPTY)
+			state[0] = DraughtsRLEnvironment.RED
+			state[1] = DraughtsRLEnvironment.OWN
+			state[6] = DraughtsRLEnvironment.OTHER
+
+			const info = env.test(state, [{ from: [0, 1], path: [[2, 3]], jump: [[1, 2]] }], DraughtsRLEnvironment.RED)
+			expect(info.invalid).toBeFalsy()
+			expect(info.done).toBeTruthy()
+			expect(info.reward).toBe(1)
+		})
+
+		test('lose', () => {
+			const env = new DraughtsRLEnvironment()
+			env.reset()
+			const state = Array(33).fill(DraughtsRLEnvironment.EMPTY)
+			state[0] = DraughtsRLEnvironment.RED
+			state[1] = DraughtsRLEnvironment.OTHER
+
+			const info = env.test(state, [DraughtsRLEnvironment.EMPTY], DraughtsRLEnvironment.RED)
+			expect(info.invalid).toBeFalsy()
+			expect(info.done).toBeTruthy()
+			expect(info.reward).toBe(-1)
+		})
+
+		test('empty gameturn red', () => {
+			const env = new DraughtsRLEnvironment()
+			env.reset()
+			const state = Array(33).fill(DraughtsRLEnvironment.EMPTY)
+			state[0] = DraughtsRLEnvironment.RED
+			state[4] = DraughtsRLEnvironment.OWN
+			state[8] = DraughtsRLEnvironment.OTHER
+			state[11] = DraughtsRLEnvironment.OTHER
+
+			const info = env.test(state, [DraughtsRLEnvironment.EMPTY], DraughtsRLEnvironment.RED)
+			expect(info.invalid).toBeFalsy()
+			expect(info.done).toBeTruthy()
+			expect(info.reward).toBe(-1)
+			expect(info.state[0]).toBe(DraughtsRLEnvironment.WHITE)
+		})
+
+		test('empty gameturn white', () => {
+			const env = new DraughtsRLEnvironment()
+			env.reset()
+			const state = Array(33).fill(DraughtsRLEnvironment.EMPTY)
+			state[0] = DraughtsRLEnvironment.WHITE
+			state[4] = DraughtsRLEnvironment.OWN
+			state[8] = DraughtsRLEnvironment.OTHER
+			state[11] = DraughtsRLEnvironment.OTHER
+
+			const info = env.test(state, [DraughtsRLEnvironment.EMPTY], DraughtsRLEnvironment.WHITE)
+			expect(info.invalid).toBeFalsy()
+			expect(info.done).toBeTruthy()
+			expect(info.reward).toBe(-1)
+			expect(info.state[0]).toBe(DraughtsRLEnvironment.RED)
+		})
 	})
 })
 
@@ -217,13 +308,13 @@ describe('board', () => {
 
 		expect(board.size).toEqual([8, 8])
 		for (let i = 0; i < 8; i += 2) {
-			expect(board.at([0, i])).toBe(DraughtsRLEnvironment.RED)
-			expect(board.at([1, i + 1])).toBe(DraughtsRLEnvironment.RED)
-			expect(board.at([2, i])).toBe(DraughtsRLEnvironment.RED)
+			expect(board.at([0, i + 1])).toBe(DraughtsRLEnvironment.RED)
+			expect(board.at([1, i])).toBe(DraughtsRLEnvironment.RED)
+			expect(board.at([2, i + 1])).toBe(DraughtsRLEnvironment.RED)
 
-			expect(board.at([5, i + 1])).toBe(DraughtsRLEnvironment.WHITE)
-			expect(board.at([6, i])).toBe(DraughtsRLEnvironment.WHITE)
-			expect(board.at([7, i + 1])).toBe(DraughtsRLEnvironment.WHITE)
+			expect(board.at([5, i])).toBe(DraughtsRLEnvironment.WHITE)
+			expect(board.at([6, i + 1])).toBe(DraughtsRLEnvironment.WHITE)
+			expect(board.at([7, i])).toBe(DraughtsRLEnvironment.WHITE)
 		}
 		expect(board.finish).toBeFalsy()
 		expect(board.count.red).toBe(12)
@@ -233,51 +324,154 @@ describe('board', () => {
 		expect(board.score(DraughtsRLEnvironment.WHITE)).toBe(0)
 	})
 
-	test('choices', () => {
+	describe('winner', () => {
+		test('random', () => {
+			const env = new DraughtsRLEnvironment()
+			const board = env._board
+			let turn = DraughtsRLEnvironment.RED
+
+			let maxIter = 1.0e4
+			while (maxIter-- > 0) {
+				const choices = board.choices(turn)
+				if (choices.length === 0) {
+					turn = board.nextTurn(turn)
+					continue
+				}
+
+				board.set(choices[0], turn)
+				turn = board.nextTurn(turn)
+			}
+
+			expect(board.winner).not.toBeNull()
+		})
+	})
+
+	test('toString', () => {
 		const env = new DraughtsRLEnvironment()
 		const board = env._board
 
-		const choiceRed = board.choices(DraughtsRLEnvironment.RED)
-		expect(choiceRed).toEqual([
-			{ from: [2, 0], path: [[3, 1]], jump: [] },
-			{ from: [2, 2], path: [[3, 3]], jump: [] },
-			{ from: [2, 2], path: [[3, 1]], jump: [] },
-			{ from: [2, 4], path: [[3, 5]], jump: [] },
-			{ from: [2, 4], path: [[3, 3]], jump: [] },
-			{ from: [2, 6], path: [[3, 7]], jump: [] },
-			{ from: [2, 6], path: [[3, 5]], jump: [] },
-		])
-		const choiceWhite = board.choices(DraughtsRLEnvironment.WHITE)
-		expect(choiceWhite).toEqual([
-			{ from: [5, 1], path: [[4, 2]], jump: [] },
-			{ from: [5, 1], path: [[4, 0]], jump: [] },
-			{ from: [5, 3], path: [[4, 4]], jump: [] },
-			{ from: [5, 3], path: [[4, 2]], jump: [] },
-			{ from: [5, 5], path: [[4, 6]], jump: [] },
-			{ from: [5, 5], path: [[4, 4]], jump: [] },
-			{ from: [5, 7], path: [[4, 6]], jump: [] },
-		])
+		expect(board.toString()).toBe(`- x - x - x - x
+x - x - x - x -
+- x - x - x - x
+- - - - - - - -
+- - - - - - - -
+o - o - o - o -
+- o - o - o - o
+o - o - o - o -
+`)
+	})
+
+	test('nextTurn', () => {
+		const env = new DraughtsRLEnvironment()
+		const board = env._board
+
+		expect(board.nextTurn(DraughtsRLEnvironment.RED)).toBe(DraughtsRLEnvironment.WHITE)
+		expect(board.nextTurn(DraughtsRLEnvironment.WHITE)).toBe(DraughtsRLEnvironment.RED)
+	})
+
+	test('copy', () => {
+		const env = new DraughtsRLEnvironment()
+		const board = env._board
+
+		const cp = board.copy()
+		for (let i = 0; i < board.size[0]; i++) {
+			for (let j = 0; j < board.size[1]; j++) {
+				expect(cp.at([i, j])).toBe(board.at([i, j]))
+			}
+		}
+	})
+
+	describe('at', () => {
+		test.each([[0, 1], 1])('%p', p => {
+			const env = new DraughtsRLEnvironment()
+			const board = env._board
+
+			expect(board.at(p)).toBe(DraughtsRLEnvironment.RED)
+		})
+
+		test.each([[7, 0], 29])('%p', p => {
+			const env = new DraughtsRLEnvironment()
+			const board = env._board
+
+			expect(board.at(p)).toBe(DraughtsRLEnvironment.WHITE)
+		})
 	})
 
 	describe('set', () => {
-		test('success', () => {
+		test.each([
+			{ from: [2, 1], path: [[3, 2]], jump: [] },
+			{ from: 9, path: [14], jump: [] },
+		])('success %p', p => {
+			const env = new DraughtsRLEnvironment()
+			const board = env._board
+
+			expect(board.at([2, 1])).toBe(DraughtsRLEnvironment.RED)
+			expect(board.at([3, 2])).toBe(DraughtsRLEnvironment.EMPTY)
+
+			const success = board.set(p, DraughtsRLEnvironment.RED)
+			expect(success).toBeTruthy()
+			expect(board.at([2, 1])).toBe(DraughtsRLEnvironment.EMPTY)
+			expect(board.at([3, 2])).toBe(DraughtsRLEnvironment.RED)
+		})
+
+		test('to king', () => {
 			const env = new DraughtsRLEnvironment()
 			const board = env._board
 
-			expect(board.at([2, 0])).toBe(DraughtsRLEnvironment.RED)
-			expect(board.at([3, 1])).toBe(DraughtsRLEnvironment.EMPTY)
+			board.set({ from: 11, path: [16], jump: [] }, DraughtsRLEnvironment.RED)
+			board.set({ from: 22, path: [17], jump: [] }, DraughtsRLEnvironment.WHITE)
+			board.set({ from: 8, path: [11], jump: [] }, DraughtsRLEnvironment.RED)
+			board.set({ from: 26, path: [22], jump: [] }, DraughtsRLEnvironment.WHITE)
+			board.set({ from: 16, path: [20], jump: [] }, DraughtsRLEnvironment.RED)
+			board.set({ from: 22, path: [18], jump: [] }, DraughtsRLEnvironment.WHITE)
+			board.set({ from: 9, path: [13], jump: [] }, DraughtsRLEnvironment.RED)
+			board.set({ from: 31, path: [26], jump: [] }, DraughtsRLEnvironment.WHITE)
 
-			const success = board.set({ from: [2, 0], path: [[3, 1]], jump: [] }, DraughtsRLEnvironment.RED)
+			const success = board.set({ from: 13, path: [22, 31], jump: [17, 26] }, DraughtsRLEnvironment.RED)
 			expect(success).toBeTruthy()
-			expect(board.at([2, 0])).toBe(DraughtsRLEnvironment.EMPTY)
-			expect(board.at([3, 1])).toBe(DraughtsRLEnvironment.RED)
 		})
 
 		test('fail invalid piece', () => {
 			const env = new DraughtsRLEnvironment()
 			const board = env._board
 
-			const success = board.set({ from: [5, 1], path: [[4, 2]], jump: [] }, DraughtsRLEnvironment.RED)
+			const success = board.set({ from: [5, 2], path: [[4, 3]], jump: [] }, DraughtsRLEnvironment.RED)
+			expect(success).toBeFalsy()
+		})
+
+		test('fail invalid path length', () => {
+			const env = new DraughtsRLEnvironment()
+			const board = env._board
+
+			const success = board.set(
+				{
+					from: [2, 1],
+					path: [
+						[3, 2],
+						[4, 3],
+					],
+					jump: [],
+				},
+				DraughtsRLEnvironment.RED
+			)
+			expect(success).toBeFalsy()
+		})
+
+		test('fail invalid path, jump length', () => {
+			const env = new DraughtsRLEnvironment()
+			const board = env._board
+
+			const success = board.set(
+				{
+					from: [2, 1],
+					path: [
+						[4, 3],
+						[5, 4],
+					],
+					jump: [[3, 2]],
+				},
+				DraughtsRLEnvironment.RED
+			)
 			expect(success).toBeFalsy()
 		})
 
@@ -285,11 +479,11 @@ describe('board', () => {
 			const env = new DraughtsRLEnvironment()
 			const board = env._board
 
-			const success1 = board.set({ from: [2, 0], path: [[3, 1]], jump: [] }, DraughtsRLEnvironment.RED)
+			const success1 = board.set({ from: [2, 1], path: [[3, 2]], jump: [] }, DraughtsRLEnvironment.RED)
 			expect(success1).toBeTruthy()
-			const success2 = board.set({ from: [5, 1], path: [[4, 0]], jump: [] }, DraughtsRLEnvironment.WHITE)
+			const success2 = board.set({ from: [5, 0], path: [[4, 1]], jump: [] }, DraughtsRLEnvironment.WHITE)
 			expect(success2).toBeTruthy()
-			const success = board.set({ from: [3, 1], path: [[2, 0]], jump: [] }, DraughtsRLEnvironment.RED)
+			const success = board.set({ from: [3, 2], path: [[2, 1]], jump: [] }, DraughtsRLEnvironment.RED)
 			expect(success).toBeFalsy()
 		})
 
@@ -297,7 +491,7 @@ describe('board', () => {
 			const env = new DraughtsRLEnvironment()
 			const board = env._board
 
-			const success = board.set({ from: [1, 1], path: [[3, 3]], jump: [[2, 2]] }, DraughtsRLEnvironment.RED)
+			const success = board.set({ from: [1, 0], path: [[3, 2]], jump: [[2, 1]] }, DraughtsRLEnvironment.RED)
 			expect(success).toBeFalsy()
 		})
 
@@ -305,47 +499,99 @@ describe('board', () => {
 			const env = new DraughtsRLEnvironment()
 			const board = env._board
 
-			const success = board.set({ from: [1, 1], path: [[2, 2]], jump: [] }, DraughtsRLEnvironment.RED)
+			const success = board.set({ from: [1, 0], path: [[2, 1]], jump: [] }, DraughtsRLEnvironment.RED)
 			expect(success).toBeFalsy()
 		})
-	})
 
-	test('nextTurn', () => {
-		const env = new DraughtsRLEnvironment()
-		const board = env._board
+		test('fail invalid move path only', () => {
+			const env = new DraughtsRLEnvironment()
+			const board = env._board
 
-		expect(board.nextTurn(DraughtsRLEnvironment.RED)).toBe(DraughtsRLEnvironment.WHITE)
-		expect(board.nextTurn(DraughtsRLEnvironment.WHITE)).toBe(DraughtsRLEnvironment.RED)
+			const success = board.set({ from: [2, 1], path: [[3, 1]], jump: [] }, DraughtsRLEnvironment.RED)
+			expect(success).toBeFalsy()
+		})
+
+		test('fail invalid move jump', () => {
+			const env = new DraughtsRLEnvironment()
+			const board = env._board
+
+			const success = board.set({ from: [2, 1], path: [[4, 3]], jump: [[6, 3]] }, DraughtsRLEnvironment.RED)
+			expect(success).toBeFalsy()
+		})
+
+		test('fail invalid move path', () => {
+			const env = new DraughtsRLEnvironment()
+			const board = env._board
+
+			board.set({ from: [2, 1], path: [[3, 2]], jump: [] }, DraughtsRLEnvironment.RED)
+			board.set({ from: [5, 4], path: [[4, 3]], jump: [] }, DraughtsRLEnvironment.WHITE)
+			const success = board.set({ from: [3, 2], path: [[4, 4]], jump: [[4, 3]] }, DraughtsRLEnvironment.RED)
+			expect(success).toBeFalsy()
+		})
 	})
 
-	test('copy', () => {
+	test('choices', () => {
 		const env = new DraughtsRLEnvironment()
 		const board = env._board
 
-		const cp = board.copy()
-		for (let i = 0; i < board.size[0]; i++) {
-			for (let j = 0; j < board.size[1]; j++) {
-				expect(cp.at([i, j])).toBe(board.at([i, j]))
-			}
-		}
+		const choiceRed = board.choices(DraughtsRLEnvironment.RED)
+		expect(choiceRed).toEqual([
+			{ from: [2, 1], path: [[3, 2]], jump: [] },
+			{ from: [2, 1], path: [[3, 0]], jump: [] },
+			{ from: [2, 3], path: [[3, 4]], jump: [] },
+			{ from: [2, 3], path: [[3, 2]], jump: [] },
+			{ from: [2, 5], path: [[3, 6]], jump: [] },
+			{ from: [2, 5], path: [[3, 4]], jump: [] },
+			{ from: [2, 7], path: [[3, 6]], jump: [] },
+		])
+		const choiceWhite = board.choices(DraughtsRLEnvironment.WHITE)
+		expect(choiceWhite).toEqual([
+			{ from: [5, 0], path: [[4, 1]], jump: [] },
+			{ from: [5, 2], path: [[4, 3]], jump: [] },
+			{ from: [5, 2], path: [[4, 1]], jump: [] },
+			{ from: [5, 4], path: [[4, 5]], jump: [] },
+			{ from: [5, 4], path: [[4, 3]], jump: [] },
+			{ from: [5, 6], path: [[4, 7]], jump: [] },
+			{ from: [5, 6], path: [[4, 5]], jump: [] },
+		])
 	})
 
-	test('winner', () => {
-		const env = new DraughtsRLEnvironment()
-		const board = env._board
-		let turn = DraughtsRLEnvironment.RED
+	describe('allPah', () => {
+		test('not start own piece', () => {
+			const env = new DraughtsRLEnvironment()
+			const board = env._board
 
-		while (!board.finish) {
-			const choices = board.choices(turn)
-			if (choices.length === 0) {
-				turn = board.nextTurn(turn)
-				continue
-			}
+			const path = board.allPath(0, 1, DraughtsRLEnvironment.WHITE)
+			expect(path).toHaveLength(0)
+		})
 
-			board.set(choices[0], turn)
-			turn = board.nextTurn(turn)
-		}
+		test('will be king', () => {
+			const env = new DraughtsRLEnvironment()
+			const board = env._board
 
-		expect(board.winner).not.toBeNull()
+			board.set({ from: 11, path: [16], jump: [] }, DraughtsRLEnvironment.RED)
+			board.set({ from: 22, path: [17], jump: [] }, DraughtsRLEnvironment.WHITE)
+			board.set({ from: 8, path: [11], jump: [] }, DraughtsRLEnvironment.RED)
+			board.set({ from: 26, path: [22], jump: [] }, DraughtsRLEnvironment.WHITE)
+			board.set({ from: 16, path: [20], jump: [] }, DraughtsRLEnvironment.RED)
+			board.set({ from: 22, path: [18], jump: [] }, DraughtsRLEnvironment.WHITE)
+			board.set({ from: 9, path: [13], jump: [] }, DraughtsRLEnvironment.RED)
+			board.set({ from: 31, path: [26], jump: [] }, DraughtsRLEnvironment.WHITE)
+
+			const path = board.allPath(3, 0, DraughtsRLEnvironment.RED)
+			expect(path).toEqual([
+				{
+					from: [3, 0],
+					path: [
+						[5, 2],
+						[7, 4],
+					],
+					jump: [
+						[4, 1],
+						[6, 3],
+					],
+				},
+			])
+		})
 	})
 })
diff --git a/tests/lib/rl/gomoku.test.js b/tests/lib/rl/gomoku.test.js
index 02d5743bb..60b2db198 100644
--- a/tests/lib/rl/gomoku.test.js
+++ b/tests/lib/rl/gomoku.test.js
@@ -18,6 +18,27 @@ describe('env', () => {
 		expect(env.states).toHaveLength(1 + 8 * 8)
 	})
 
+	describe('evaluation', () => {
+		test('set', () => {
+			const env = new GomokuRLEnvironment()
+			env.evaluation = state => {
+				expect(state).toHaveLength(1 + 8 * 8)
+				return 1
+			}
+
+			const score = env._board.score()
+			expect(score).toBe(1)
+		})
+
+		test('clear', () => {
+			const env = new GomokuRLEnvironment()
+			env.evaluation = null
+
+			const score = env._board.score()
+			expect(score).toBe(0)
+		})
+	})
+
 	describe('reset', () => {
 		test('success', () => {
 			const env = new GomokuRLEnvironment()
@@ -34,14 +55,20 @@ describe('env', () => {
 	})
 
 	describe('state', () => {
-		test.each([GomokuRLEnvironment.BLACK, GomokuRLEnvironment.WHITE])('success %i', agent => {
+		test.each([undefined, GomokuRLEnvironment.BLACK, GomokuRLEnvironment.WHITE])('success %i', agent => {
 			const env = new GomokuRLEnvironment()
 			env.reset(0, 1)
 			env.step(['1_1'], GomokuRLEnvironment.BLACK)
 			env.step(['2_2'], GomokuRLEnvironment.WHITE)
 
-			const black = agent === GomokuRLEnvironment.BLACK ? GomokuRLEnvironment.OWN : GomokuRLEnvironment.OTHER
-			const white = agent === GomokuRLEnvironment.BLACK ? GomokuRLEnvironment.OTHER : GomokuRLEnvironment.OWN
+			const black =
+				agent === undefined || agent === GomokuRLEnvironment.BLACK
+					? GomokuRLEnvironment.OWN
+					: GomokuRLEnvironment.OTHER
+			const white =
+				agent === undefined || agent === GomokuRLEnvironment.BLACK
+					? GomokuRLEnvironment.OTHER
+					: GomokuRLEnvironment.OWN
 
 			const state = env.state(agent)
 			expect(state).toHaveLength(1 + 8 * 8)
@@ -70,11 +97,11 @@ describe('env', () => {
 	})
 
 	describe('step', () => {
-		test('success', () => {
+		test.each([undefined, GomokuRLEnvironment.BLACK])('success agent: %p', agent => {
 			const env = new GomokuRLEnvironment()
 			env.reset()
 
-			const info = env.step(['3_5'], GomokuRLEnvironment.BLACK)
+			const info = env.step(['3_5'], agent)
 			expect(info.invalid).toBeFalsy()
 			expect(info.done).toBeFalsy()
 			expect(info.reward).toBe(0)
@@ -131,11 +158,11 @@ describe('env', () => {
 	})
 
 	describe('test', () => {
-		test('step', () => {
+		test.each([undefined, GomokuRLEnvironment.BLACK])('step agent: %p', agent => {
 			const env = new GomokuRLEnvironment()
 			const orgstate = env.reset()
 
-			const info = env.test(orgstate, ['3_5'], GomokuRLEnvironment.BLACK)
+			const info = env.test(orgstate, ['3_5'], agent)
 			expect(info.invalid).toBeFalsy()
 
 			const state = info.state
@@ -162,6 +189,78 @@ describe('board', () => {
 		expect(board.winner).toBeNull()
 	})
 
+	describe('winner', () => {
+		test.each(['black', 'white'])('%s', winner => {
+			const env = new GomokuRLEnvironment()
+			const board = env._board
+			const turn = winner === 'black' ? GomokuRLEnvironment.BLACK : GomokuRLEnvironment.WHITE
+			board.set([0, 0], turn)
+			board.set([0, 1], turn)
+			board.set([0, 2], turn)
+			board.set([0, 3], turn)
+			board.set([0, 4], turn)
+
+			expect(board.winner).toBe(turn)
+		})
+
+		test('game', () => {
+			const env = new GomokuRLEnvironment()
+			const board = env._board
+			let turn = GomokuRLEnvironment.BLACK
+
+			while (!board.finish) {
+				const choices = board.choices(turn)
+				if (choices.length === 0) {
+					turn = board.nextTurn(turn)
+					continue
+				}
+
+				board.set(choices[0], turn)
+				turn = board.nextTurn(turn)
+			}
+
+			expect(board.winner).not.toBeNull()
+		})
+	})
+
+	test('toString', () => {
+		const env = new GomokuRLEnvironment()
+		const board = env._board
+
+		board.set([2, 4], GomokuRLEnvironment.BLACK)
+		board.set([3, 6], GomokuRLEnvironment.WHITE)
+
+		expect(board.toString()).toBe(`- - - - - - - -
+- - - - - - - -
+- - - - x - - -
+- - - - - - o -
+- - - - - - - -
+- - - - - - - -
+- - - - - - - -
+- - - - - - - -
+`)
+	})
+
+	test('nextTurn', () => {
+		const env = new GomokuRLEnvironment()
+		const board = env._board
+
+		expect(board.nextTurn(GomokuRLEnvironment.BLACK)).toBe(GomokuRLEnvironment.WHITE)
+		expect(board.nextTurn(GomokuRLEnvironment.WHITE)).toBe(GomokuRLEnvironment.BLACK)
+	})
+
+	test('copy', () => {
+		const env = new GomokuRLEnvironment()
+		const board = env._board
+
+		const cp = board.copy()
+		for (let i = 0; i < board.size[0]; i++) {
+			for (let j = 0; j < board.size[1]; j++) {
+				expect(cp.at([i, j])).toBe(board.at([i, j]))
+			}
+		}
+	})
+
 	describe('choices', () => {
 		test('all', () => {
 			const env = new GomokuRLEnvironment()
@@ -218,60 +317,6 @@ describe('board', () => {
 		})
 	})
 
-	test('nextTurn', () => {
-		const env = new GomokuRLEnvironment()
-		const board = env._board
-
-		expect(board.nextTurn(GomokuRLEnvironment.BLACK)).toBe(GomokuRLEnvironment.WHITE)
-		expect(board.nextTurn(GomokuRLEnvironment.WHITE)).toBe(GomokuRLEnvironment.BLACK)
-	})
-
-	test('copy', () => {
-		const env = new GomokuRLEnvironment()
-		const board = env._board
-
-		const cp = board.copy()
-		for (let i = 0; i < board.size[0]; i++) {
-			for (let j = 0; j < board.size[1]; j++) {
-				expect(cp.at([i, j])).toBe(board.at([i, j]))
-			}
-		}
-	})
-
-	describe('winner', () => {
-		test.each(['black', 'white'])('%s', winner => {
-			const env = new GomokuRLEnvironment()
-			const board = env._board
-			const turn = winner === 'black' ? GomokuRLEnvironment.BLACK : GomokuRLEnvironment.WHITE
-			board.set([0, 0], turn)
-			board.set([0, 1], turn)
-			board.set([0, 2], turn)
-			board.set([0, 3], turn)
-			board.set([0, 4], turn)
-
-			expect(board.winner).toBe(turn)
-		})
-
-		test('game', () => {
-			const env = new GomokuRLEnvironment()
-			const board = env._board
-			let turn = GomokuRLEnvironment.BLACK
-
-			while (!board.finish) {
-				const choices = board.choices(turn)
-				if (choices.length === 0) {
-					turn = board.nextTurn(turn)
-					continue
-				}
-
-				board.set(choices[0], turn)
-				turn = board.nextTurn(turn)
-			}
-
-			expect(board.winner).not.toBeNull()
-		})
-	})
-
 	describe('row', () => {
 		test('empty', () => {
 			const env = new GomokuRLEnvironment()
diff --git a/tests/lib/rl/grid.test.js b/tests/lib/rl/grid.test.js
index 3c472dbe8..d2b2ea747 100644
--- a/tests/lib/rl/grid.test.js
+++ b/tests/lib/rl/grid.test.js
@@ -10,9 +10,17 @@ test('size', () => {
 	expect(env.size).toEqual([20, 10])
 })
 
-test('actions', () => {
-	const env = new GridRLEnvironment()
-	expect(env.actions).toEqual([[0, 1, 2, 3]])
+describe('actions', () => {
+	test('2d', () => {
+		const env = new GridRLEnvironment()
+		expect(env.actions).toEqual([[0, 1, 2, 3]])
+	})
+
+	test('1d', () => {
+		const env = new GridRLEnvironment()
+		env._dim = 1
+		expect(env.actions).toEqual([[0, 1]])
+	})
 })
 
 test('states', () => {
@@ -69,6 +77,16 @@ test('reset', () => {
 	expect(env.state()).toEqual([0, 0])
 })
 
+test('resetMap', () => {
+	const env = new GridRLEnvironment()
+	env._points.push([0, 1])
+	env.step([0])
+	env.resetMap()
+	expect(env._points).toHaveLength(0)
+	const state = env.state()
+	expect(state).toEqual([1, 0])
+})
+
 test('resetMapAsMaze', () => {
 	const env = new GridRLEnvironment()
 	env.resetMapAsMaze()
@@ -104,14 +122,28 @@ describe('state', () => {
 	})
 })
 
-test('step', () => {
-	const env = new GridRLEnvironment()
-	expect(env.epoch).toBe(0)
-	const info = env.step([0])
-	expect(env.epoch).toBe(1)
-	expect(info.done).toBeFalsy()
-	expect(info.reward).toBe(-1)
-	expect(info.state).toHaveLength(2)
+describe('step', () => {
+	test('2d', () => {
+		const env = new GridRLEnvironment()
+		expect(env.epoch).toBe(0)
+		const info = env.step([0])
+		expect(env.epoch).toBe(1)
+		expect(info.done).toBeFalsy()
+		expect(info.reward).toBe(-1)
+		expect(info.state).toEqual([1, 0])
+	})
+
+	test('1d', () => {
+		const env = new GridRLEnvironment()
+		env._dim = 1
+		env.reset()
+		expect(env.epoch).toBe(0)
+		const info = env.step([0])
+		expect(env.epoch).toBe(1)
+		expect(info.done).toBeFalsy()
+		expect(info.reward).toBe(-1)
+		expect(info.state).toEqual([1])
+	})
 })
 
 describe('test', () => {
diff --git a/tests/lib/rl/inhypercube.test.js b/tests/lib/rl/inhypercube.test.js
new file mode 100644
index 000000000..8831cbe7f
--- /dev/null
+++ b/tests/lib/rl/inhypercube.test.js
@@ -0,0 +1,79 @@
+import InHypercubeRLEnvironment from '../../../lib/rl/inhypercube.js'
+
+test('constructor', () => {
+	const env = new InHypercubeRLEnvironment()
+	expect(env).toBeDefined()
+})
+
+describe('actions', () => {
+	test('2d', () => {
+		const env = new InHypercubeRLEnvironment(2)
+		expect(env.actions).toEqual([[0, 1, 2, 3]])
+	})
+
+	test('3d', () => {
+		const env = new InHypercubeRLEnvironment(3)
+		env._dim = 1
+		expect(env.actions).toEqual([[0, 1, 2, 3, 4, 5]])
+	})
+})
+
+test.each([1, 2, 3])('states %dd', (d) => {
+	const env = new InHypercubeRLEnvironment(d)
+	expect(env.states).toHaveLength(d * 2)
+})
+
+test('reset', () => {
+	const env = new InHypercubeRLEnvironment()
+	for (let i = 0; i < 10; i++) {
+		env.step(env.sample_action())
+	}
+	const init_state = env.reset()
+	expect(init_state).toEqual([0, 0, 0, 0])
+	expect(env.state()).toEqual([0, 0, 0, 0])
+})
+
+describe('state', () => {
+	test('init', () => {
+		const env = new InHypercubeRLEnvironment()
+		expect(env.state()).toEqual([0, 0, 0, 0])
+	})
+})
+
+describe('step', () => {
+	test('2d', () => {
+		const env = new InHypercubeRLEnvironment()
+		expect(env.epoch).toBe(0)
+		const info = env.step([0])
+		expect(env.epoch).toBe(1)
+		expect(info.done).toBeFalsy()
+		expect(info.reward).toBe(0)
+		expect(info.state).toEqual([0.1, 0, 0.1, 0])
+	})
+})
+
+describe('test', () => {
+	test('step', () => {
+		const env = new InHypercubeRLEnvironment()
+		const info = env.test([0, 0, 0, 0], [0])
+		expect(info.done).toBeFalsy()
+		expect(info.reward).toBe(0)
+		expect(info.state).toEqual([0.1, 0, 0.1, 0])
+	})
+
+	test('goal', () => {
+		const env = new InHypercubeRLEnvironment()
+		const info = env.test([-1, 0, 0, 0], [1])
+		expect(info.done).toBeTruthy()
+		expect(info.reward).toBe(1)
+		expect(info.state).toEqual([-1.1, 0, -0.1, 0])
+	})
+
+	test('fail', () => {
+		const env = new InHypercubeRLEnvironment()
+		const info = env.test([1, 0, 0, 0], [0])
+		expect(info.done).toBeTruthy()
+		expect(info.reward).toBe(0)
+		expect(info.state).toEqual([1.1, 0, 0.1, 0])
+	})
+})
diff --git a/tests/lib/rl/mountaincar.test.js b/tests/lib/rl/mountaincar.test.js
index 8caab866f..7da4cbad7 100644
--- a/tests/lib/rl/mountaincar.test.js
+++ b/tests/lib/rl/mountaincar.test.js
@@ -73,6 +73,42 @@ describe('test', () => {
 		expect(info.state[1]).toBe(-0.0115)
 	})
 
+	test('big v', () => {
+		const env = new MountainCarRLEnvironment()
+		const info = env.test([0, 1], [0])
+		expect(info.done).toBeFalsy()
+		expect(info.reward).toBe(-1)
+		expect(info.state[0]).toBe(0.07)
+		expect(info.state[1]).toBe(0.07)
+	})
+
+	test('small p', () => {
+		const env = new MountainCarRLEnvironment()
+		const info = env.test([-Math.PI, 0], [1])
+		expect(info.done).toBeFalsy()
+		expect(info.reward).toBe(-1)
+		expect(info.state[0]).toBe(-1.2)
+		expect(info.state[1]).toBe(0.0025)
+	})
+
+	test('big p', () => {
+		const env = new MountainCarRLEnvironment()
+		const info = env.test([Math.PI, 0], [1])
+		expect(info.done).toBeTruthy()
+		expect(info.reward).toBe(-1)
+		expect(info.state[0]).toBe(0.6)
+		expect(info.state[1]).toBe(0.0025)
+	})
+
+	test('small p, v', () => {
+		const env = new MountainCarRLEnvironment()
+		const info = env.test([-Math.PI, -1], [1])
+		expect(info.done).toBeFalsy()
+		expect(info.reward).toBe(-1)
+		expect(info.state[0]).toBe(-1.2)
+		expect(info.state[1]).toBe(0)
+	})
+
 	test('goal', () => {
 		const env = new MountainCarRLEnvironment()
 		const info = env.test([0.5, 0.01], [2])
diff --git a/tests/lib/rl/pendulum.test.js b/tests/lib/rl/pendulum.test.js
index c60d672d6..2f10682b1 100644
--- a/tests/lib/rl/pendulum.test.js
+++ b/tests/lib/rl/pendulum.test.js
@@ -47,3 +47,44 @@ test('step', () => {
 	expect(info.reward).toBeCloseTo(0)
 	expect(info.state).toHaveLength(3)
 })
+
+describe('test', () => {
+	test('big t', () => {
+		const env = new PendulumRLEnvironment()
+		const info = env.test([-1, 0, 0], [0])
+
+		expect(info.done).toBeFalsy()
+		expect(info.state[0]).toBeGreaterThanOrEqual(-1)
+		expect(info.state[0]).toBeLessThanOrEqual(1)
+		expect(info.state[1]).toBeGreaterThanOrEqual(-1)
+		expect(info.state[1]).toBeLessThanOrEqual(1)
+		expect(info.state[2]).toBeGreaterThanOrEqual(-0.5)
+		expect(info.state[2]).toBeLessThanOrEqual(0.5)
+	})
+
+	test('small action[0]', () => {
+		const env = new PendulumRLEnvironment()
+		const info = env.test([1, 0, 0], [-10])
+
+		expect(info.done).toBeFalsy()
+		expect(info.state[0]).toBeGreaterThanOrEqual(-1)
+		expect(info.state[0]).toBeLessThanOrEqual(1)
+		expect(info.state[1]).toBeGreaterThanOrEqual(-1)
+		expect(info.state[1]).toBeLessThanOrEqual(1)
+		expect(info.state[2]).toBeGreaterThanOrEqual(-0.5)
+		expect(info.state[2]).toBeLessThanOrEqual(0.5)
+	})
+
+	test('big action[0]', () => {
+		const env = new PendulumRLEnvironment()
+		const info = env.test([1, 0, 0], [10])
+
+		expect(info.done).toBeFalsy()
+		expect(info.state[0]).toBeGreaterThanOrEqual(-1)
+		expect(info.state[0]).toBeLessThanOrEqual(1)
+		expect(info.state[1]).toBeGreaterThanOrEqual(-1)
+		expect(info.state[1]).toBeLessThanOrEqual(1)
+		expect(info.state[2]).toBeGreaterThanOrEqual(-0.5)
+		expect(info.state[2]).toBeLessThanOrEqual(0.5)
+	})
+})
diff --git a/tests/lib/rl/reversi.test.js b/tests/lib/rl/reversi.test.js
index 5350e72e3..6e90b939d 100644
--- a/tests/lib/rl/reversi.test.js
+++ b/tests/lib/rl/reversi.test.js
@@ -18,6 +18,27 @@ describe('env', () => {
 		expect(env.states).toHaveLength(1 + 8 * 8)
 	})
 
+	describe('evaluation', () => {
+		test('set', () => {
+			const env = new ReversiRLEnvironment()
+			env.evaluation = state => {
+				expect(state).toHaveLength(1 + 8 * 8)
+				return 1
+			}
+
+			const score = env._board.score()
+			expect(score).toBe(1)
+		})
+
+		test('clear', () => {
+			const env = new ReversiRLEnvironment()
+			env.evaluation = null
+
+			const score = env._board.score()
+			expect(score).toBe(0)
+		})
+	})
+
 	describe('reset', () => {
 		test('success', () => {
 			const env = new ReversiRLEnvironment()
@@ -28,9 +49,9 @@ describe('env', () => {
 			for (let i = 0, p = 1; i < 8; i++) {
 				for (let j = 0; j < 8; j++, p++) {
 					expect(state[p]).toBe(
-						(i === 3 && j === 3) || (i === 4 && j === 4)
+						(i === 3 && j === 4) || (i === 4 && j === 3)
 							? ReversiRLEnvironment.OWN
-							: (i === 3 && j === 4) || (i === 4 && j === 3)
+							: (i === 3 && j === 3) || (i === 4 && j === 4)
 							? ReversiRLEnvironment.OTHER
 							: ReversiRLEnvironment.EMPTY
 					)
@@ -40,12 +61,18 @@ describe('env', () => {
 	})
 
 	describe('state', () => {
-		test.each([ReversiRLEnvironment.BLACK, ReversiRLEnvironment.WHITE])('success %i', agent => {
+		test.each([undefined, ReversiRLEnvironment.BLACK, ReversiRLEnvironment.WHITE])('success %p', agent => {
 			const env = new ReversiRLEnvironment()
 			env.reset(0, 1)
 
-			const black = agent === ReversiRLEnvironment.BLACK ? ReversiRLEnvironment.OWN : ReversiRLEnvironment.OTHER
-			const white = agent === ReversiRLEnvironment.BLACK ? ReversiRLEnvironment.OTHER : ReversiRLEnvironment.OWN
+			const black =
+				agent === undefined || agent === ReversiRLEnvironment.BLACK
+					? ReversiRLEnvironment.OWN
+					: ReversiRLEnvironment.OTHER
+			const white =
+				agent === undefined || agent === ReversiRLEnvironment.BLACK
+					? ReversiRLEnvironment.OTHER
+					: ReversiRLEnvironment.OWN
 
 			const state = env.state(agent)
 			expect(state).toHaveLength(1 + 8 * 8)
@@ -53,9 +80,9 @@ describe('env', () => {
 			for (let i = 0, p = 1; i < 8; i++) {
 				for (let j = 0; j < 8; j++, p++) {
 					expect(state[p]).toBe(
-						(i === 3 && j === 3) || (i === 4 && j === 4)
+						(i === 3 && j === 4) || (i === 4 && j === 3)
 							? black
-							: (i === 3 && j === 4) || (i === 4 && j === 3)
+							: (i === 3 && j === 3) || (i === 4 && j === 4)
 							? white
 							: ReversiRLEnvironment.EMPTY
 					)
@@ -78,11 +105,11 @@ describe('env', () => {
 	})
 
 	describe('step', () => {
-		test('success', () => {
+		test.each([undefined, ReversiRLEnvironment.BLACK])('success agent: %p', agent => {
 			const env = new ReversiRLEnvironment()
 			env.reset()
 
-			const info = env.step(['3_5'], ReversiRLEnvironment.BLACK)
+			const info = env.step(['f5'], agent)
 			expect(info.invalid).toBeFalsy()
 			expect(info.done).toBeFalsy()
 			expect(info.reward).toBe(0)
@@ -93,9 +120,9 @@ describe('env', () => {
 			for (let i = 0, p = 1; i < 8; i++) {
 				for (let j = 0; j < 8; j++, p++) {
 					expect(state[p]).toBe(
-						(i === 3 && j === 3) || (i === 4 && j === 4) || (i === 3 && j === 4) || (i === 3 && j === 5)
+						(i === 3 && j === 4) || (i === 4 && j === 3) || (i === 4 && j === 4) || (i === 4 && j === 5)
 							? ReversiRLEnvironment.OWN
-							: i === 4 && j === 3
+							: i === 3 && j === 3
 							? ReversiRLEnvironment.OTHER
 							: ReversiRLEnvironment.EMPTY
 					)
@@ -105,11 +132,56 @@ describe('env', () => {
 			expect(env.epoch).toBe(1)
 		})
 
+		test('no action', () => {
+			const env = new ReversiRLEnvironment()
+			env.reset()
+
+			env.step(['f5'])
+			env.step(['f6'])
+			env.step(['d3'])
+			env.step(['g5'])
+			env.step(['h5'])
+			env.step(['h4'])
+			env.step(['f7'])
+			env.step(['h6'])
+
+			const info = env.step([ReversiRLEnvironment.EMPTY])
+			expect(info.invalid).toBeFalsy()
+			expect(info.done).toBeFalsy()
+			expect(info.reward).toBe(0)
+			expect(env.epoch).toBe(9)
+		})
+
+		test('win black', () => {
+			const env = new ReversiRLEnvironment()
+			env.reset()
+
+			env.step(['f5'])
+			env.step(['d6'])
+			env.step(['c5'])
+			env.step(['f4'])
+			env.step(['e3'])
+			env.step(['f6'])
+			env.step(['g5'])
+			env.step(['e6'])
+
+			const info = env.step(['e7'])
+			expect(info.invalid).toBeFalsy()
+			expect(info.done).toBeTruthy()
+			expect(info.reward).toBe(1)
+			expect(env.epoch).toBe(9)
+
+			const info2 = env.step([ReversiRLEnvironment.EMPTY])
+			expect(info2.invalid).toBeFalsy()
+			expect(info2.done).toBeTruthy()
+			expect(info2.reward).toBe(-1)
+		})
+
 		test('invalid position', () => {
 			const env = new ReversiRLEnvironment()
 			const state = env.reset()
 
-			const info = env.step(['0_0'], ReversiRLEnvironment.BLACK)
+			const info = env.step(['a1'], ReversiRLEnvironment.BLACK)
 			expect(info.invalid).toBeTruthy()
 			expect(info.done).toBeFalsy()
 			expect(info.reward).toBe(0)
@@ -133,7 +205,7 @@ describe('env', () => {
 			const env = new ReversiRLEnvironment()
 			env.reset()
 
-			const info = env.step(['0_0'], ReversiRLEnvironment.WHITE)
+			const info = env.step(['a1'], ReversiRLEnvironment.WHITE)
 			expect(info.invalid).toBeTruthy()
 			expect(info.done).toBeFalsy()
 			expect(info.reward).toBe(0)
@@ -143,7 +215,7 @@ describe('env', () => {
 
 		test('failed before reset', () => {
 			const env = new ReversiRLEnvironment()
-			expect(() => env.step(['3_5'], ReversiRLEnvironment.BLACK)).toThrow(
+			expect(() => env.step(['f4'], ReversiRLEnvironment.BLACK)).toThrow(
 				'Agent does not exist. Call reset to set agents.'
 			)
 		})
@@ -151,16 +223,16 @@ describe('env', () => {
 		test.each([1, 4])('failed %p', agent => {
 			const env = new ReversiRLEnvironment()
 			env.reset()
-			expect(() => env.step(['3_5'], agent)).toThrow('Unknown agent.')
+			expect(() => env.step(['f4'], agent)).toThrow('Unknown agent.')
 		})
 	})
 
 	describe('test', () => {
-		test('step', () => {
+		test.each([undefined, ReversiRLEnvironment.BLACK])('step agent: %p', agent => {
 			const env = new ReversiRLEnvironment()
 			const orgstate = env.reset()
 
-			const info = env.test(orgstate, ['3_5'], ReversiRLEnvironment.BLACK)
+			const info = env.test(orgstate, ['f5'], agent)
 			expect(info.invalid).toBeFalsy()
 
 			const state = info.state
@@ -169,9 +241,9 @@ describe('env', () => {
 			for (let i = 0, p = 1; i < 8; i++) {
 				for (let j = 0; j < 8; j++, p++) {
 					expect(state[p]).toBe(
-						(i === 3 && j === 3) || (i === 4 && j === 4) || (i === 3 && j === 4) || (i === 3 && j === 5)
+						(i === 3 && j === 4) || (i === 4 && j === 3) || (i === 4 && j === 4) || (i === 4 && j === 5)
 							? ReversiRLEnvironment.OWN
-							: i === 4 && j === 3
+							: i === 3 && j === 3
 							? ReversiRLEnvironment.OTHER
 							: ReversiRLEnvironment.EMPTY
 					)
@@ -189,10 +261,10 @@ describe('board', () => {
 		const board = env._board
 
 		expect(board.size).toEqual([8, 8])
-		expect(board.at([3, 3])).toBe(ReversiRLEnvironment.BLACK)
-		expect(board.at([4, 4])).toBe(ReversiRLEnvironment.BLACK)
-		expect(board.at([3, 4])).toBe(ReversiRLEnvironment.WHITE)
-		expect(board.at([4, 3])).toBe(ReversiRLEnvironment.WHITE)
+		expect(board.at([3, 3])).toBe(ReversiRLEnvironment.WHITE)
+		expect(board.at([4, 4])).toBe(ReversiRLEnvironment.WHITE)
+		expect(board.at([3, 4])).toBe(ReversiRLEnvironment.BLACK)
+		expect(board.at([4, 3])).toBe(ReversiRLEnvironment.BLACK)
 		expect(board.finish).toBeFalsy()
 		expect(board.count.black).toBe(2)
 		expect(board.count.white).toBe(2)
@@ -201,49 +273,131 @@ describe('board', () => {
 		expect(board.score(ReversiRLEnvironment.WHITE)).toBe(0)
 	})
 
-	test('choices', () => {
-		const env = new ReversiRLEnvironment()
-		const board = env._board
+	describe('winner', () => {
+		test('random', () => {
+			const env = new ReversiRLEnvironment()
+			const board = env._board
+			let turn = ReversiRLEnvironment.BLACK
 
-		const choiceBlack = board.choices(ReversiRLEnvironment.BLACK)
-		expect(choiceBlack).toEqual([
-			[2, 4],
-			[3, 5],
-			[4, 2],
-			[5, 3],
-		])
-		const choiceWhite = board.choices(ReversiRLEnvironment.WHITE)
-		expect(choiceWhite).toEqual([
-			[2, 3],
-			[3, 2],
-			[4, 5],
-			[5, 4],
-		])
-	})
+			while (!board.finish) {
+				const choices = board.choices(turn)
+				if (choices.length === 0) {
+					turn = board.nextTurn(turn)
+					continue
+				}
 
-	describe('set', () => {
-		test('success', () => {
+				board.set(choices[0], turn)
+				turn = board.nextTurn(turn)
+			}
+
+			expect(board.winner).not.toBeNull()
+		})
+
+		test('black', () => {
 			const env = new ReversiRLEnvironment()
 			const board = env._board
 
-			expect(board.at([2, 4])).toBe(ReversiRLEnvironment.EMPTY)
-			expect(board.at([3, 4])).toBe(ReversiRLEnvironment.WHITE)
+			const ps = ['f5', 'd6', 'c5', 'f4', 'e3', 'f6', 'g5', 'e6', 'e7']
+			for (let i = 0; i < ps.length; i++) {
+				board.set(ps[i], i % 2 === 0 ? ReversiRLEnvironment.BLACK : ReversiRLEnvironment.WHITE)
+			}
+			expect(board.winner).toBe(ReversiRLEnvironment.BLACK)
+		})
+
+		test('white', () => {
+			const env = new ReversiRLEnvironment()
+			const board = env._board
 
-			const success = board.set([2, 4], ReversiRLEnvironment.BLACK)
-			expect(success).toBeTruthy()
-			expect(board.at([2, 4])).toBe(ReversiRLEnvironment.BLACK)
-			expect(board.at([3, 4])).toBe(ReversiRLEnvironment.BLACK)
+			const ps = ['f5', 'f6', 'c4', 'f4', 'e6', 'b4', 'g6', 'f7', 'e8', 'g8', 'g5', 'h5']
+			for (let i = 0; i < ps.length; i++) {
+				board.set(ps[i], i % 2 === 0 ? ReversiRLEnvironment.BLACK : ReversiRLEnvironment.WHITE)
+			}
+			expect(board.winner).toBe(ReversiRLEnvironment.WHITE)
 		})
 
-		test('fail', () => {
+		test('draw', () => {
 			const env = new ReversiRLEnvironment()
 			const board = env._board
 
-			const success = board.set([2, 4], ReversiRLEnvironment.WHITE)
-			expect(success).toBeFalsy()
+			board.set('f5', ReversiRLEnvironment.BLACK)
+			board.set('d6', ReversiRLEnvironment.WHITE)
+			board.set('c7', ReversiRLEnvironment.BLACK)
+			board.set('f3', ReversiRLEnvironment.WHITE)
+			board.set('e3', ReversiRLEnvironment.BLACK)
+			board.set('d3', ReversiRLEnvironment.WHITE)
+			board.set('g2', ReversiRLEnvironment.BLACK)
+			board.set('f4', ReversiRLEnvironment.WHITE)
+			board.set('c6', ReversiRLEnvironment.BLACK)
+			board.set('d7', ReversiRLEnvironment.WHITE)
+			board.set('g4', ReversiRLEnvironment.BLACK)
+			board.set('b7', ReversiRLEnvironment.WHITE)
+			board.set('a8', ReversiRLEnvironment.BLACK)
+			board.set('g3', ReversiRLEnvironment.WHITE)
+			board.set('c8', ReversiRLEnvironment.BLACK)
+			board.set('h1', ReversiRLEnvironment.WHITE)
+			board.set('c4', ReversiRLEnvironment.BLACK)
+			board.set('b8', ReversiRLEnvironment.WHITE)
+			board.set('f2', ReversiRLEnvironment.BLACK)
+			board.set('e1', ReversiRLEnvironment.WHITE)
+			board.set('f1', ReversiRLEnvironment.BLACK)
+			board.set('d8', ReversiRLEnvironment.WHITE)
+			board.set('e8', ReversiRLEnvironment.BLACK)
+			board.set('a7', ReversiRLEnvironment.WHITE)
+			board.set('a6', ReversiRLEnvironment.BLACK)
+			board.set('b6', ReversiRLEnvironment.WHITE)
+			board.set('a5', ReversiRLEnvironment.BLACK)
+			board.set('g1', ReversiRLEnvironment.WHITE)
+			board.set('b5', ReversiRLEnvironment.BLACK)
+			board.set('e2', ReversiRLEnvironment.WHITE)
+			board.set('h2', ReversiRLEnvironment.BLACK)
+			board.set('c3', ReversiRLEnvironment.WHITE)
+			board.set('e6', ReversiRLEnvironment.BLACK)
+			board.set('c5', ReversiRLEnvironment.WHITE)
+			board.set('b4', ReversiRLEnvironment.BLACK)
+			board.set('e7', ReversiRLEnvironment.WHITE)
+			board.set('b3', ReversiRLEnvironment.BLACK)
+			board.set('d2', ReversiRLEnvironment.WHITE)
+			board.set('c1', ReversiRLEnvironment.BLACK)
+			board.set('d1', ReversiRLEnvironment.WHITE)
+			board.set('f8', ReversiRLEnvironment.BLACK)
+			board.set('b1', ReversiRLEnvironment.WHITE)
+			board.set('f7', ReversiRLEnvironment.BLACK)
+			board.set('g6', ReversiRLEnvironment.WHITE)
+			board.set('f6', ReversiRLEnvironment.BLACK)
+			board.set('h3', ReversiRLEnvironment.WHITE)
+			board.set('h6', ReversiRLEnvironment.BLACK)
+			board.set('a3', ReversiRLEnvironment.WHITE)
+			board.set('c2', ReversiRLEnvironment.BLACK)
+			board.set('h7', ReversiRLEnvironment.WHITE)
+			board.set('a2', ReversiRLEnvironment.BLACK)
+			board.set('a4', ReversiRLEnvironment.WHITE)
+			board.set('h8', ReversiRLEnvironment.BLACK)
+			board.set('g7', ReversiRLEnvironment.WHITE)
+			board.set('h5', ReversiRLEnvironment.BLACK)
+			board.set('a1', ReversiRLEnvironment.WHITE)
+			board.set('g8', ReversiRLEnvironment.BLACK)
+			board.set('b2', ReversiRLEnvironment.WHITE)
+			board.set('g5', ReversiRLEnvironment.BLACK)
+			board.set('h4', ReversiRLEnvironment.WHITE)
+			expect(board.winner).toBeNull()
 		})
 	})
 
+	test('toString', () => {
+		const env = new ReversiRLEnvironment()
+		const board = env._board
+
+		expect(board.toString()).toBe(`- - - - - - - -
+- - - - - - - -
+- - - - - - - -
+- - - o x - - -
+- - - x o - - -
+- - - - - - - -
+- - - - - - - -
+- - - - - - - -
+`)
+	})
+
 	test('nextTurn', () => {
 		const env = new ReversiRLEnvironment()
 		const board = env._board
@@ -265,22 +419,61 @@ describe('board', () => {
 		}
 	})
 
-	test('winner', () => {
+	test.each([[3, 3], 'd4'])('at %p', p => {
 		const env = new ReversiRLEnvironment()
 		const board = env._board
-		let turn = ReversiRLEnvironment.BLACK
 
-		while (!board.finish) {
-			const choices = board.choices(turn)
-			if (choices.length === 0) {
-				turn = board.nextTurn(turn)
-				continue
-			}
+		expect(board.at(p)).toBe(ReversiRLEnvironment.WHITE)
+	})
 
-			board.set(choices[0], turn)
-			turn = board.nextTurn(turn)
-		}
+	describe('set', () => {
+		test.each([[2, 3], 'd3'])('success %p', p => {
+			const env = new ReversiRLEnvironment()
+			const board = env._board
+
+			expect(board.at([2, 3])).toBe(ReversiRLEnvironment.EMPTY)
+			expect(board.at([3, 3])).toBe(ReversiRLEnvironment.WHITE)
+
+			const success = board.set(p, ReversiRLEnvironment.BLACK)
+			expect(success).toBeTruthy()
+			expect(board.at([2, 3])).toBe(ReversiRLEnvironment.BLACK)
+			expect(board.at([3, 3])).toBe(ReversiRLEnvironment.BLACK)
+		})
+
+		test('fail', () => {
+			const env = new ReversiRLEnvironment()
+			const board = env._board
 
-		expect(board.winner).not.toBeNull()
+			const success = board.set([2, 3], ReversiRLEnvironment.WHITE)
+			expect(success).toBeFalsy()
+		})
+
+		test('out of bounds', () => {
+			const env = new ReversiRLEnvironment()
+			const board = env._board
+
+			const success = board.set([-1, -1], ReversiRLEnvironment.WHITE)
+			expect(success).toBeFalsy()
+		})
+	})
+
+	test('choices', () => {
+		const env = new ReversiRLEnvironment()
+		const board = env._board
+
+		const choiceBlack = board.choices(ReversiRLEnvironment.BLACK)
+		expect(choiceBlack).toEqual([
+			[2, 3],
+			[3, 2],
+			[4, 5],
+			[5, 4],
+		])
+		const choiceWhite = board.choices(ReversiRLEnvironment.WHITE)
+		expect(choiceWhite).toEqual([
+			[2, 4],
+			[3, 5],
+			[4, 2],
+			[5, 3],
+		])
 	})
 })

From af11b3b535dcbb1caeeeb48548289b98951b8f4f Mon Sep 17 00:00:00 2001
From: ishii-norimi <mirasunimoni@yahoo.co.jp>
Date: Wed, 27 Sep 2023 22:41:34 +0900
Subject: [PATCH 2/3] Forgot to commit and format

---
 lib/rl/acrobot.js                | 23 ++++-------------------
 tests/lib/rl/inhypercube.test.js |  2 +-
 2 files changed, 5 insertions(+), 20 deletions(-)

diff --git a/lib/rl/acrobot.js b/lib/rl/acrobot.js
index 3780ab35c..f6bb0b75e 100644
--- a/lib/rl/acrobot.js
+++ b/lib/rl/acrobot.js
@@ -46,21 +46,6 @@ export default class AcrobotRLEnvironment extends RLEnvironmentBase {
 		]
 	}
 
-	set reward(value) {
-		this._reward = {
-			goal: 0,
-			step: -1,
-			fail: 0,
-		}
-		if (value === 'achieve') {
-			this._reward = {
-				goal: 0,
-				step: -1,
-				fail: 0,
-			}
-		}
-	}
-
 	reset() {
 		super.reset()
 		this._theta1 = Math.random() * 0.2 - 0.1
@@ -111,11 +96,11 @@ export default class AcrobotRLEnvironment extends RLEnvironmentBase {
 
 		const clip = (x, min, max) => (x < min ? min : x > max ? max : x)
 		t1 += this._dt * dt1
-		if (t1 < -Math.PI) t1 = t1 + 2 * Math.PI
-		if (t1 > Math.PI) t1 = t1 - 2 * Math.PI
+		while (t1 < -Math.PI) t1 += 2 * Math.PI
+		while (t1 > Math.PI) t1 -= 2 * Math.PI
 		t2 += this._dt * dt2
-		if (t2 < -Math.PI) t2 = t2 + 2 * Math.PI
-		if (t2 > Math.PI) t2 = t2 - 2 * Math.PI
+		while (t2 < -Math.PI) t2 += 2 * Math.PI
+		while (t2 > Math.PI) t2 -= 2 * Math.PI
 		dt1 = clip(dt1 + this._dt * ddt1, -this._max_vel1, this._max_vel1)
 		dt2 = clip(dt2 + this._dt * ddt2, -this._max_vel2, this._max_vel2)
 
diff --git a/tests/lib/rl/inhypercube.test.js b/tests/lib/rl/inhypercube.test.js
index 8831cbe7f..c7e308c85 100644
--- a/tests/lib/rl/inhypercube.test.js
+++ b/tests/lib/rl/inhypercube.test.js
@@ -18,7 +18,7 @@ describe('actions', () => {
 	})
 })
 
-test.each([1, 2, 3])('states %dd', (d) => {
+test.each([1, 2, 3])('states %dd', d => {
 	const env = new InHypercubeRLEnvironment(d)
 	expect(env.states).toHaveLength(d * 2)
 })

From cfd641a9f1bc40f9fba0259957f89011bf600ddc Mon Sep 17 00:00:00 2001
From: ishii-norimi <mirasunimoni@yahoo.co.jp>
Date: Thu, 28 Sep 2023 21:20:11 +0900
Subject: [PATCH 3/3] Add tests

---
 tests/lib/rl/gomoku.test.js | 90 ++++++++++++++++++++++++++++---------
 1 file changed, 68 insertions(+), 22 deletions(-)

diff --git a/tests/lib/rl/gomoku.test.js b/tests/lib/rl/gomoku.test.js
index 60b2db198..9e0c84b5c 100644
--- a/tests/lib/rl/gomoku.test.js
+++ b/tests/lib/rl/gomoku.test.js
@@ -118,6 +118,32 @@ describe('env', () => {
 			expect(env.epoch).toBe(1)
 		})
 
+		test('win black', () => {
+			const env = new GomokuRLEnvironment()
+			env.reset()
+
+			env.step(['0_0'], GomokuRLEnvironment.BLACK)
+			env.step(['1_0'], GomokuRLEnvironment.WHITE)
+			env.step(['0_1'], GomokuRLEnvironment.BLACK)
+			env.step(['1_1'], GomokuRLEnvironment.WHITE)
+			env.step(['0_2'], GomokuRLEnvironment.BLACK)
+			env.step(['1_2'], GomokuRLEnvironment.WHITE)
+			env.step(['0_3'], GomokuRLEnvironment.BLACK)
+			env.step(['1_3'], GomokuRLEnvironment.WHITE)
+
+			const info = env.step(['0_4'], GomokuRLEnvironment.BLACK)
+			expect(info.invalid).toBeFalsy()
+			expect(info.done).toBeTruthy()
+			expect(info.reward).toBe(1)
+			expect(env.epoch).toBe(9)
+
+			const info2 = env.step(['1_5'], GomokuRLEnvironment.WHITE)
+			expect(info2.invalid).toBeFalsy()
+			expect(info2.done).toBeTruthy()
+			expect(info2.reward).toBe(-1)
+			expect(env.epoch).toBe(10)
+		})
+
 		test('invalid position', () => {
 			const env = new GomokuRLEnvironment()
 			env.reset()
@@ -261,37 +287,23 @@ describe('board', () => {
 		}
 	})
 
-	describe('choices', () => {
-		test('all', () => {
+	describe('score', () => {
+		test('win', () => {
 			const env = new GomokuRLEnvironment()
 			const board = env._board
 
-			const c = []
-			for (let i = 0; i < board.size[0]; i++) {
-				for (let j = 0; j < board.size[1]; j++) {
-					c.push([i, j])
-				}
-			}
-
-			const choiceBlack = board.choices(GomokuRLEnvironment.BLACK)
-			expect(choiceBlack).toEqual(c)
-			const choiceWhite = board.choices(GomokuRLEnvironment.WHITE)
-			expect(choiceWhite).toEqual(c)
-		})
-
-		test('finish', () => {
-			const env = new GomokuRLEnvironment()
-			const board = env._board
 			board.set([0, 0], GomokuRLEnvironment.BLACK)
+			board.set([1, 0], GomokuRLEnvironment.WHITE)
 			board.set([0, 1], GomokuRLEnvironment.BLACK)
+			board.set([1, 1], GomokuRLEnvironment.WHITE)
 			board.set([0, 2], GomokuRLEnvironment.BLACK)
+			board.set([1, 2], GomokuRLEnvironment.WHITE)
 			board.set([0, 3], GomokuRLEnvironment.BLACK)
+			board.set([1, 3], GomokuRLEnvironment.WHITE)
 			board.set([0, 4], GomokuRLEnvironment.BLACK)
 
-			const choiceBlack = board.choices(GomokuRLEnvironment.BLACK)
-			expect(choiceBlack).toHaveLength(0)
-			const choiceWhite = board.choices(GomokuRLEnvironment.WHITE)
-			expect(choiceWhite).toHaveLength(0)
+			expect(board.score(GomokuRLEnvironment.BLACK)).toBe(6391)
+			expect(board.score(GomokuRLEnvironment.WHITE)).toBe(-6391)
 		})
 	})
 
@@ -317,6 +329,40 @@ describe('board', () => {
 		})
 	})
 
+	describe('choices', () => {
+		test('all', () => {
+			const env = new GomokuRLEnvironment()
+			const board = env._board
+
+			const c = []
+			for (let i = 0; i < board.size[0]; i++) {
+				for (let j = 0; j < board.size[1]; j++) {
+					c.push([i, j])
+				}
+			}
+
+			const choiceBlack = board.choices(GomokuRLEnvironment.BLACK)
+			expect(choiceBlack).toEqual(c)
+			const choiceWhite = board.choices(GomokuRLEnvironment.WHITE)
+			expect(choiceWhite).toEqual(c)
+		})
+
+		test('finish', () => {
+			const env = new GomokuRLEnvironment()
+			const board = env._board
+			board.set([0, 0], GomokuRLEnvironment.BLACK)
+			board.set([0, 1], GomokuRLEnvironment.BLACK)
+			board.set([0, 2], GomokuRLEnvironment.BLACK)
+			board.set([0, 3], GomokuRLEnvironment.BLACK)
+			board.set([0, 4], GomokuRLEnvironment.BLACK)
+
+			const choiceBlack = board.choices(GomokuRLEnvironment.BLACK)
+			expect(choiceBlack).toHaveLength(0)
+			const choiceWhite = board.choices(GomokuRLEnvironment.WHITE)
+			expect(choiceWhite).toHaveLength(0)
+		})
+	})
+
 	describe('row', () => {
 		test('empty', () => {
 			const env = new GomokuRLEnvironment()