ai-on-browser · ishii-norimi · Sep 29, 2023 · Sep 27, 2023 · Sep 27, 2023 · Sep 28, 2023
diff --git a/js/renderer/rl/draughts.js b/js/renderer/rl/draughts.js
@@ -160,7 +160,7 @@ class ManualPlayer {
 		for (let i = 0; i < board.size[0]; i++) {
 			this._check[i] = []
 			for (let j = 0; j < board.size[1]; j++) {
-				if ((i + j) % 2 > 0) continue
+				if ((i + j) % 2 === 0) continue
 				this._check[i][j] = document.createElementNS('http://www.w3.org/2000/svg', 'rect')
 				this._check[i][j].setAttribute('x', dw * j)
 				this._check[i][j].setAttribute('y', dh * i)

diff --git a/lib/rl/acrobot.js b/lib/rl/acrobot.js
@@ -46,21 +46,6 @@ export default class AcrobotRLEnvironment extends RLEnvironmentBase {
 		]
 	}
 
-	set reward(value) {
-		this._reward = {
-			goal: 0,
-			step: -1,
-			fail: 0,
-		}
-		if (value === 'achieve') {
-			this._reward = {
-				goal: 0,
-				step: -1,
-				fail: 0,
-			}
-		}
-	}
-
 	reset() {
 		super.reset()
 		this._theta1 = Math.random() * 0.2 - 0.1
@@ -111,11 +96,11 @@ export default class AcrobotRLEnvironment extends RLEnvironmentBase {
 
 		const clip = (x, min, max) => (x < min ? min : x > max ? max : x)
 		t1 += this._dt * dt1
-		if (t1 < -Math.PI) t1 = t1 + 2 * Math.PI
-		if (t1 > Math.PI) t1 = t1 - 2 * Math.PI
+		while (t1 < -Math.PI) t1 += 2 * Math.PI
+		while (t1 > Math.PI) t1 -= 2 * Math.PI
 		t2 += this._dt * dt2
-		if (t2 < -Math.PI) t2 = t2 + 2 * Math.PI
-		if (t2 > Math.PI) t2 = t2 - 2 * Math.PI
+		while (t2 < -Math.PI) t2 += 2 * Math.PI
+		while (t2 > Math.PI) t2 -= 2 * Math.PI
 		dt1 = clip(dt1 + this._dt * ddt1, -this._max_vel1, this._max_vel1)
 		dt2 = clip(dt2 + this._dt * ddt2, -this._max_vel2, this._max_vel2)
 

diff --git a/lib/rl/draughts.js b/lib/rl/draughts.js
@@ -47,7 +47,7 @@ export default class DraughtsRLEnvironment extends RLEnvironmentBase {
 		]
 		const checkBound = (x, y) => 0 <= x && x < this._size[0] && 0 <= y && y < this._size[1]
 		for (let i = 0; i < this._size[0]; i++) {
-			for (let j = i % 2 === 0 ? 0 : 1; j < this._size[1]; j += 2) {
+			for (let j = i % 2 === 1 ? 0 : 1; j < this._size[1]; j += 2) {
 				let midpath = []
 				for (const [di, dj] of d) {
 					const i1 = i + di
@@ -93,18 +93,14 @@ export default class DraughtsRLEnvironment extends RLEnvironmentBase {
 	get states() {
 		const s = [[RED, WHITE]]
 		for (let i = 0; i < this._size[0]; i++) {
-			for (let j = 0; j < this._size[1]; j++) {
-				if (j % 2 === i % 2) {
-					s.push([
-						EMPTY,
-						DraughtsRLEnvironment.OWN,
-						DraughtsRLEnvironment.OWN | KING,
-						DraughtsRLEnvironment.OTHER,
-						DraughtsRLEnvironment.OTHER | KING,
-					])
-				} else {
-					s.push([EMPTY])
-				}
+			for (let j = i % 2 === 0 ? 1 : 0; j < this._size[1]; j += 2) {
+				s.push([
+					EMPTY,
+					DraughtsRLEnvironment.OWN,
+					DraughtsRLEnvironment.OWN | KING,
+					DraughtsRLEnvironment.OTHER,
+					DraughtsRLEnvironment.OTHER | KING,
+				])
 			}
 		}
 		return s
@@ -123,7 +119,7 @@ export default class DraughtsRLEnvironment extends RLEnvironmentBase {
 	_makeState(board, agentturn, gameturn) {
 		const s = [gameturn]
 		for (let i = 0; i < this._size[0]; i++) {
-			for (let j = 0; j < this._size[1]; j++) {
+			for (let j = i % 2 === 0 ? 1 : 0; j < this._size[1]; j += 2) {
 				const p = board.at([i, j])
 				if (p === EMPTY) {
 					s.push(EMPTY)
@@ -144,7 +140,7 @@ export default class DraughtsRLEnvironment extends RLEnvironmentBase {
 		const board = new DraughtsBoard(this._size, this._evaluation)
 		const opturn = turn === RED ? WHITE : RED
 		for (let i = 0, p = 1; i < this._size[0]; i++) {
-			for (let j = 0; j < this._size[1]; j++, p++) {
+			for (let j = i % 2 === 0 ? 1 : 0; j < this._size[1]; j += 2, p++) {
 				if (state[p] === EMPTY) {
 					board._board[i][j] = EMPTY
 				} else {
@@ -241,6 +237,7 @@ class DraughtsBoard {
 	constructor(size, evaluator) {
 		this._evaluator = evaluator
 		this._size = size
+		this._lines = 3
 
 		this.reset()
 	}
@@ -280,6 +277,26 @@ class DraughtsBoard {
 		return null
 	}
 
+	toString() {
+		let buf = ''
+		for (let i = 0; i < this._size[0]; i++) {
+			for (let j = 0; j < this._size[1]; j++) {
+				if (j > 0) {
+					buf += ' '
+				}
+				if (this._board[i][j] === RED) {
+					buf += 'x'
+				} else if (this._board[i][j] === WHITE) {
+					buf += 'o'
+				} else {
+					buf += '-'
+				}
+			}
+			buf += '\n'
+		}
+		return buf
+	}
+
 	nextTurn(turn) {
 		if (turn === WHITE) {
 			return RED
@@ -310,20 +327,44 @@ class DraughtsBoard {
 		}
 	}
 
+	_num_to_pos(n) {
+		if (typeof n !== 'number') {
+			return n
+		}
+		const r = Math.floor((n - 1) / this._size[1])
+		const c = (n - 1) % this._size[1]
+		if (c < (this._size[1] - 1) / 2) {
+			return [r * 2, c * 2 + 1]
+		} else {
+			return [r * 2 + 1, (c - Math.floor(this._size[1] / 2)) * 2]
+		}
+	}
+
 	at(p) {
+		if (typeof p === 'number') {
+			p = this._num_to_pos(p)
+		}
 		return this._board[p[0]][p[1]]
 	}
 
 	set(p, turn) {
-		let piece = this._board[p.from[0]][p.from[1]]
+		p = {
+			from: this._num_to_pos(p.from),
+			path: p.path.map(v => this._num_to_pos(v)),
+			jump: p.jump.map(v => this._num_to_pos(v)),
+		}
+		let piece = this.at(p.from)
 		if (!(turn & piece)) {
 			return false
 		}
+		if ((p.jump.length !== 0 || p.path.length !== 1) && p.jump.length !== p.path.length) {
+			return false
+		}
 		const nturn = this.nextTurn(turn)
-		if (p.jump.some(([i, j]) => !(this._board[i][j] & nturn))) {
+		if (p.jump.some(j => !(this.at(j) & nturn))) {
 			return false
 		}
-		if (p.path.some(([i, j]) => this._board[i][j] !== EMPTY)) {
+		if (p.path.some(j => this.at(j) !== EMPTY)) {
 			return false
 		}
 
@@ -334,6 +375,27 @@ class DraughtsBoard {
 			}
 		}
 
+		if (p.jump.length === 0) {
+			for (let i = 0; i < 2; i++) {
+				if (Math.abs(p.from[i] - p.path[0][i]) !== 1) {
+					return false
+				}
+			}
+		} else {
+			let pos = p.from
+			for (let k = 0; k < p.path.length; k++) {
+				for (let i = 0; i < 2; i++) {
+					if (Math.abs(pos[i] - p.jump[k][i]) !== 1) {
+						return false
+					}
+					if (Math.abs(p.jump[k][i] - p.path[k][i]) !== 1) {
+						return false
+					}
+				}
+				pos = p.path[k]
+			}
+		}
+
 		this._board[p.from[0]][p.from[1]] = EMPTY
 		for (const [i, j] of p.jump) {
 			this._board[i][j] = EMPTY
@@ -354,10 +416,10 @@ class DraughtsBoard {
 			this._board[i] = Array(this._size[1]).fill(EMPTY)
 		}
 		for (let i = 0; i < this._size[0]; i++) {
-			for (let j = 0; j < this._size[1]; j++) {
-				if (i < 3 && (i + j) % 2 === 0) {
+			for (let j = i % 2 === 0 ? 1 : 0; j < this._size[1]; j += 2) {
+				if (i < this._lines) {
 					this._board[i][j] = RED
-				} else if (this._size[0] - 3 <= i && (i + j) % 2 === 0) {
+				} else if (this._size[0] - this._lines <= i) {
 					this._board[i][j] = WHITE
 				}
 			}
@@ -418,9 +480,9 @@ class DraughtsBoard {
 				cp._board[x + dx * 2][y + dy * 2] = this._board[x][y]
 				cp._board[x][y] = EMPTY
 				cp._board[x + dx][y + dy] = EMPTY
-				if (turn === RED && x * dx * 2 === this._size[0] - 1) {
+				if (turn === RED && x + dx * 2 === this._size[0] - 1) {
 					cp._board[x + dx * 2][y + dy * 2] |= KING
-				} else if (turn === WHITE && x * dx * 2 === 0) {
+				} else if (turn === WHITE && x + dx * 2 === 0) {
 					cp._board[x + dx * 2][y + dy * 2] |= KING
 				}
 				const npath = cp.allPath(x + dx * 2, y + dy * 2, turn, false)

diff --git a/lib/rl/gomoku.js b/lib/rl/gomoku.js
@@ -190,6 +190,26 @@ class GomokuBoard {
 		return null
 	}
 
+	toString() {
+		let buf = ''
+		for (let i = 0; i < this._size[0]; i++) {
+			for (let j = 0; j < this._size[1]; j++) {
+				if (j > 0) {
+					buf += ' '
+				}
+				if (this._board[i][j] === BLACK) {
+					buf += 'x'
+				} else if (this._board[i][j] === WHITE) {
+					buf += 'o'
+				} else {
+					buf += '-'
+				}
+			}
+			buf += '\n'
+		}
+		return buf
+	}
+
 	nextTurn(turn) {
 		return turn === BLACK ? WHITE : BLACK
 	}

diff --git a/lib/rl/inhypercube.js b/lib/rl/inhypercube.js
@@ -68,7 +68,7 @@ export default class InHypercubeRLEnvironment extends RLEnvironmentBase {
 		}
 
 		const success = p[this._success_dim] <= -this._fail_position
-		const fail = !success && p.every(v => Math.abs(v) >= this._fail_position)
+		const fail = !success && p.some(v => Math.abs(v) >= this._fail_position)
 		const done = this.epoch >= this._max_step || success || fail
 		const reward = fail ? this._reward.fail : success ? this._reward.goal : this._reward.step
 		return {

diff --git a/lib/rl/reversi.js b/lib/rl/reversi.js
@@ -46,7 +46,7 @@ export default class ReversiRLEnvironment extends RLEnvironmentBase {
 		const a = [EMPTY]
 		for (let i = 0; i < this._size[0]; i++) {
 			for (let j = 0; j < this._size[1]; j++) {
-				a.push(`${i}_${j}`)
+				a.push(`${String.fromCharCode('a'.charCodeAt(0) + i)}${i + 1}`)
 			}
 		}
 		return [a]
@@ -167,8 +167,7 @@ export default class ReversiRLEnvironment extends RLEnvironmentBase {
 				invalid,
 			}
 		}
-		const choice = action[0].split('_').map(v => +v)
-		const changed = board.set(choice, agent)
+		const changed = board.set(action[0], agent)
 		const done = board.finish
 		if (!changed) {
 			return {
@@ -233,6 +232,26 @@ class ReversiBoard {
 		return null
 	}
 
+	toString() {
+		let buf = ''
+		for (let i = 0; i < this._size[0]; i++) {
+			for (let j = 0; j < this._size[1]; j++) {
+				if (j > 0) {
+					buf += ' '
+				}
+				if (this._board[i][j] === BLACK) {
+					buf += 'x'
+				} else if (this._board[i][j] === WHITE) {
+					buf += 'o'
+				} else {
+					buf += '-'
+				}
+			}
+			buf += '\n'
+		}
+		return buf
+	}
+
 	nextTurn(turn) {
 		return flipPiece(turn)
 	}
@@ -260,10 +279,16 @@ class ReversiBoard {
 	}
 
 	at(p) {
+		if (typeof p === 'string') {
+			p = [p[1] - 1, p.charCodeAt(0) - 'a'.charCodeAt(0)]
+		}
 		return this._board[p[0]][p[1]]
 	}
 
 	set(p, turn) {
+		if (typeof p === 'string') {
+			p = [p[1] - 1, p.charCodeAt(0) - 'a'.charCodeAt(0)]
+		}
 		const flips = this.flipPositions(p[0], p[1], turn)
 		if (flips.length === 0) {
 			return false
@@ -282,10 +307,10 @@ class ReversiBoard {
 		}
 		const cx = Math.floor(this._size[0] / 2)
 		const cy = Math.floor(this._size[1] / 2)
-		this._board[cx - 1][cy - 1] = BLACK
-		this._board[cx - 1][cy] = WHITE
-		this._board[cx][cy - 1] = WHITE
-		this._board[cx][cy] = BLACK
+		this._board[cx - 1][cy - 1] = WHITE
+		this._board[cx - 1][cy] = BLACK
+		this._board[cx][cy - 1] = BLACK
+		this._board[cx][cy] = WHITE
 	}
 
 	choices(turn) {

diff --git a/tests/lib/rl/acrobot.test.js b/tests/lib/rl/acrobot.test.js
@@ -80,6 +80,41 @@ describe('test', () => {
 		expect(info.state[3]).toBeGreaterThan(0)
 	})
 
+	test('small t1, t2', () => {
+		const env = new AcrobotRLEnvironment()
+		const info = env.test([-4, -13, 0, 0], [0])
+		expect(info.done).toBeFalsy()
+		expect(info.reward).toBe(-1)
+		expect(info.state[0]).toBeCloseTo(-4 + 2 * Math.PI)
+		expect(info.state[1]).toBeCloseTo(-13 + 4 * Math.PI)
+		expect(info.state[2]).toBeLessThan(0)
+		expect(info.state[3]).toBeGreaterThan(0)
+	})
+
+	test('big t1, t2', () => {
+		const env = new AcrobotRLEnvironment()
+		const info = env.test([26, 4, 0, 0], [0])
+		expect(info.done).toBeFalsy()
+		expect(info.reward).toBe(-1)
+		expect(info.state[0]).toBeCloseTo(26 - 8 * Math.PI)
+		expect(info.state[1]).toBeCloseTo(4 - 2 * Math.PI)
+		expect(info.state[2]).toBeLessThan(0)
+		expect(info.state[3]).toBeGreaterThan(0)
+	})
+
+	test('clip dt1, dt2', () => {
+		const env = new AcrobotRLEnvironment()
+		const info = env.test([0, 0, -100, 100], [0])
+		expect(info.done).toBeFalsy()
+		expect(info.reward).toBe(-1)
+		for (let i = 0; i < 2; i++) {
+			expect(info.state[i]).toBeLessThanOrEqual(Math.PI)
+			expect(info.state[i]).toBeGreaterThanOrEqual(-Math.PI)
+		}
+		expect(info.state[2]).toBeCloseTo(-4 * Math.PI)
+		expect(info.state[3]).toBeCloseTo(9 * Math.PI)
+	})
+
 	test('goal', () => {
 		const env = new AcrobotRLEnvironment()
 		const info = env.test([Math.PI, Math.PI / 2, 0, 0], [0])