From 44ff1c6ed1e8137e39b3a404f3459ecdb9b7a7c7 Mon Sep 17 00:00:00 2001 From: ishii-norimi Date: Sat, 20 Sep 2025 12:28:47 +0900 Subject: [PATCH 1/2] Add Gem Puzzle Environment and Renderer --- README.md | 1 + js/platform/rl.js | 13 +- js/renderer/rl/gem_puzzle.js | 205 +++++++++++ lib/rl/gem_puzzle.js | 588 ++++++++++++++++++++++++++++++++ tests/lib/rl/gem_puzzle.test.js | 453 ++++++++++++++++++++++++ 5 files changed, 1259 insertions(+), 1 deletion(-) create mode 100644 js/renderer/rl/gem_puzzle.js create mode 100644 lib/rl/gem_puzzle.js create mode 100644 tests/lib/rl/gem_puzzle.test.js diff --git a/README.md b/README.md index 6600cd186..419e9a41e 100644 --- a/README.md +++ b/README.md @@ -200,6 +200,7 @@ for (let i = 0; i < n; i++) { | maze | A maze on a fine grid plane. | | waterball | Moving amidst the drift of bait and poison. | | blackjack | Blackjack game. | +| gem puzzle | 15 puzzle. | | draughts | Draughts game. | | reversi | Reversi game. | | gomoku | Gomoku game. | diff --git a/js/platform/rl.js b/js/platform/rl.js index b419cec56..657781eed 100644 --- a/js/platform/rl.js +++ b/js/platform/rl.js @@ -8,7 +8,18 @@ import RLRenderer from '../renderer/rl.js' const LoadedRLEnvironmentClass = {} const AIEnv = { - MD: ['grid', 'cartpole', 'mountaincar', 'acrobot', 'pendulum', 'maze', 'blackjack', 'waterball', 'breaker'], + MD: [ + 'grid', + 'cartpole', + 'mountaincar', + 'acrobot', + 'pendulum', + 'maze', + 'blackjack', + 'waterball', + 'breaker', + 'gem_puzzle', + ], GM: ['reversi', 'draughts', 'gomoku'], } diff --git a/js/renderer/rl/gem_puzzle.js b/js/renderer/rl/gem_puzzle.js new file mode 100644 index 000000000..5cde73e27 --- /dev/null +++ b/js/renderer/rl/gem_puzzle.js @@ -0,0 +1,205 @@ +import GemPuzzleRLEnvironment from '../../../lib/rl/gem_puzzle.js' + +export default class GemPuzzleRenderer { + constructor(renderer) { + this.renderer = renderer + this._init_menu() + } + + _init_menu() { + const r = this.renderer.setting.rl.configElement + r.replaceChildren() + r.appendChild(document.createTextNode('Size ')) + const size = document.createElement('input') + size.type = 'number' + size.min = 2 + size.max = 10 + size.value = this.renderer.env._size[0] + size.onchange = () => { + this.renderer.env._size = [+size.value, +size.value] + this.renderer.env._board._size = [+size.value, +size.value] + this.renderer.platform.init() + this.renderer.setting.ml.refresh() + } + r.appendChild(size) + } + + init(r) { + const width = 500 + const height = 500 + const base = r.appendChild(document.createElement('div')) + base.style.position = 'relative' + this._envrenderer = new Renderer(this.renderer.env, { width, height, g: base }) + this._envrenderer.init() + + this._manualButton = document.createElement('button') + this._manualButton.innerText = 'Manual' + this._manualButton.onclick = async () => { + this._game = new GemPuzzleGame(this.renderer.platform) + this._autoButton.disabled = true + this._manualButton.disabled = true + this._cancelButton.style.display = 'inline' + await this._game.start() + this._autoButton.disabled = false + this._manualButton.disabled = false + this._game = null + } + r.appendChild(this._manualButton) + + this._cancelButton = document.createElement('button') + this._cancelButton.innerText = 'Cancel' + this._cancelButton.onclick = async () => { + this._game.cancel() + this._cancelButton.style.display = 'none' + } + this._cancelButton.style.display = 'none' + r.appendChild(this._cancelButton) + + this._autoButton = document.createElement('button') + this._autoButton.innerText = 'Auto' + this._autoButton.onclick = async () => { + this._game = new GemPuzzleGame(this.renderer.platform) + this._autoButton.disabled = true + this._manualButton.disabled = true + await this._game.start(true) + this._autoButton.disabled = false + this._manualButton.disabled = false + this._game = null + } + r.appendChild(this._autoButton) + } + + render() { + const displayButton = this._game || this.renderer.platform._manager._modelname ? 'none' : null + this._manualButton.style.display = displayButton + this._autoButton.style.display = displayButton + this._envrenderer.render() + } +} + +class Renderer { + constructor(env, config = {}) { + this.env = env + + this._size = [config.width || 200, config.height || 200] + + this._points = [] + + this._q = null + + this._render_blocks = [] + + this.svg = document.createElementNS('http://www.w3.org/2000/svg', 'svg') + this.svg.setAttribute('width', this._size[0]) + this.svg.setAttribute('height', this._size[1]) + this.svg.setAttribute('viewbox', '0 0 200 200') + if (config.g) { + config.g.replaceChildren(this.svg) + } + } + + init() { + const height = this._size[0] + const width = this._size[1] + const dy = height / this.env._size[0] + const dx = width / this.env._size[1] + this._render_blocks = [] + for (let i = 0; i < this.env._size[0]; i++) { + this._render_blocks[i] = [] + for (let j = 0; j < this.env._size[1]; j++) { + const g = (this._render_blocks[i][j] = document.createElementNS('http://www.w3.org/2000/svg', 'g')) + g.classList.add('grid') + g.setAttribute('stroke-width', 1) + g.setAttribute('stroke', 'black') + g.setAttribute('stroke-opacity', 0.2) + this.svg.appendChild(g) + + const rect = document.createElementNS('http://www.w3.org/2000/svg', 'rect') + rect.setAttribute('x', dx * j) + rect.setAttribute('y', dx * i) + rect.setAttribute('width', dx) + rect.setAttribute('height', dy) + rect.setAttribute('fill', 'white') + g.appendChild(rect) + const text = document.createElementNS('http://www.w3.org/2000/svg', 'text') + text.classList.add('value') + text.setAttribute('x', dx * (j + 0.5)) + text.setAttribute('y', dy * (i + 0.5)) + text.setAttribute('font-size', 14) + text.setAttribute('user-select', 'none') + g.appendChild(text) + } + } + } + + render() { + const board = this.env._board + + for (let i = 0; i < this.env._size[0]; i++) { + for (let j = 0; j < this.env._size[1]; j++) { + this._render_blocks[i][j].querySelector('text.value').replaceChildren(board.at([i, j]) ?? '') + if (board.at([i, j]) === null) { + this._render_blocks[i][j].querySelector('rect').setAttribute('fill', 'rgba(0, 0, 0, 0.5)') + } else { + this._render_blocks[i][j].querySelector('rect').setAttribute('fill', 'white') + } + } + } + } +} + +class GemPuzzleGame { + constructor(platform) { + this._platform = platform + this._env = platform.env + } + + async start(auto = false) { + this._env.reset() + this._platform.render() + if (auto) { + const path = this._env._board.solve() + for (const m of path) { + await new Promise(resolve => setTimeout(resolve, 50)) + this._env.step([m]) + this._platform.render() + } + return + } + const { promise, resolve: cancelResolver } = Promise.withResolvers() + this._cancelResolver = cancelResolver + while (true) { + const move = await Promise.race([ + promise, + new Promise(resolve => { + const keyDown = e => { + if (e.code === 'ArrowUp') { + resolve(GemPuzzleRLEnvironment.UP) + } else if (e.code === 'ArrowDown') { + resolve(GemPuzzleRLEnvironment.DOWN) + } else if (e.code === 'ArrowLeft') { + resolve(GemPuzzleRLEnvironment.LEFT) + } else if (e.code === 'ArrowRight') { + resolve(GemPuzzleRLEnvironment.RIGHT) + } + document.removeEventListener('keydown', keyDown) + } + document.addEventListener('keydown', keyDown) + }), + ]) + if (move === null) { + break + } + const { done } = this._env.step([move]) + this._platform.render() + if (done) { + break + } + await new Promise(resolve => setTimeout(resolve, 10)) + } + } + + cancel() { + this._cancelResolver?.(null) + } +} diff --git a/lib/rl/gem_puzzle.js b/lib/rl/gem_puzzle.js new file mode 100644 index 000000000..3e672bbd5 --- /dev/null +++ b/lib/rl/gem_puzzle.js @@ -0,0 +1,588 @@ +import { RLEnvironmentBase } from './base.js' + +class GemPuzzleBoard { + constructor(size, evaluator) { + this._evaluator = evaluator + this._size = size + + this.reset() + this.random() + } + + static UP = 0 + static RIGHT = 1 + static DOWN = 2 + static LEFT = 3 + + get size() { + return this._size + } + + get finish() { + const lastv = this._size[0] * this._size[1] - 1 + for (let i = 0, v = 0; i < this._size[0]; i++) { + for (let j = 0; j < this._size[1]; j++, v++) { + if (v !== lastv && this._board[i][j] !== v) { + return false + } + } + } + return this._board[this._size[0] - 1][this._size[1] - 1] == null + } + + get emptyPosition() { + return this.find(null) + } + + toString() { + let buf = '' + const maxlen = ('' + (this._size[0] * this._size[1] - 1)).length + for (let i = 0; i < this._size[0]; i++) { + for (let j = 0; j < this._size[1]; j++) { + if (j > 0) { + buf += ' ' + } + if (this._board[i][j] === null) { + buf += ' '.repeat(maxlen) + } else { + const txt = '' + this._board[i][j] + const pad = maxlen - txt.length + buf += txt.padStart(Math.floor(pad / 2), ' ').padEnd(maxlen, ' ') + } + } + buf = buf.trimEnd() + buf += '\n' + } + return buf + } + + copy() { + const cp = new GemPuzzleBoard(this._size, this._evaluator) + for (let i = 0; i < this._size[0]; i++) { + for (let j = 0; j < this._size[1]; j++) { + cp._board[i][j] = this._board[i][j] + } + } + return cp + } + + score() { + if (this._evaluator) { + return this._evaluator(this) + } + let s = 0 + for (let i = 0; i < this._size[0]; i++) { + for (let j = 0; j < this._size[1]; j++) { + const v = this._board[i][j] + if (v === null) { + continue + } + s -= Math.abs(i - Math.floor(v / this._size[1])) + s -= Math.abs(j - (v % this._size[1])) + } + } + return s + } + + at(p) { + return this._board[p[0]][p[1]] + } + + find(v) { + for (let i = 0; i < this._size[0]; i++) { + for (let j = 0; j < this._size[1]; j++) { + if (this._board[i][j] === v) { + return [i, j] + } + } + } + return null + } + + move(m) { + const emptyPos = this.emptyPosition + if (m === GemPuzzleBoard.UP) { + if (emptyPos[0] <= 0) { + return false + } + this._board[emptyPos[0]][emptyPos[1]] = this._board[emptyPos[0] - 1][emptyPos[1]] + this._board[emptyPos[0] - 1][emptyPos[1]] = null + } else if (m === GemPuzzleBoard.RIGHT) { + if (emptyPos[1] >= this._size[1] - 1) { + return false + } + this._board[emptyPos[0]][emptyPos[1]] = this._board[emptyPos[0]][emptyPos[1] + 1] + this._board[emptyPos[0]][emptyPos[1] + 1] = null + } else if (m === GemPuzzleBoard.DOWN) { + if (emptyPos[0] >= this._size[0] - 1) { + return false + } + this._board[emptyPos[0]][emptyPos[1]] = this._board[emptyPos[0] + 1][emptyPos[1]] + this._board[emptyPos[0] + 1][emptyPos[1]] = null + } else { + if (emptyPos[1] <= 0) { + return false + } + this._board[emptyPos[0]][emptyPos[1]] = this._board[emptyPos[0]][emptyPos[1] - 1] + this._board[emptyPos[0]][emptyPos[1] - 1] = null + } + return true + } + + reset() { + this._board = [] + for (let i = 0; i < this._size[0]; i++) { + this._board[i] = [] + } + for (let i = 0, v = 0; i < this._size[0]; i++) { + for (let j = 0; j < this._size[1]; j++, v++) { + this._board[i][j] = v + } + } + this._board[this._size[0] - 1][this._size[1] - 1] = null + } + + random() { + this.reset() + const n = this._size[0] * this._size[1] + const c = this._size[1] + const k = 4 + + if (n - 1 >= k) { + for (let i = 0; i < n; i++) { + const idx = [] + for (let i = 0; i < k; i++) { + idx.push(Math.floor(Math.random() * (n - i - 1))) + } + for (let i = k - 1; i >= 0; i--) { + for (let j = k - 1; j > i; j--) { + if (idx[i] <= idx[j]) { + idx[j]++ + } + } + } + + const p = idx.map(i => [Math.floor(i / c), i % c]) + ;[this._board[p[0][0]][p[0][1]], this._board[p[1][0]][p[1][1]]] = [ + this._board[p[1][0]][p[1][1]], + this._board[p[0][0]][p[0][1]], + ] + ;[this._board[p[2][0]][p[2][1]], this._board[p[3][0]][p[3][1]]] = [ + this._board[p[3][0]][p[3][1]], + this._board[p[2][0]][p[2][1]], + ] + } + } + + let prev = null + for (let i = 0; i < this._size[0] * this._size[1] * 2; i++) { + const choices = this.choices() + let m = choices[Math.floor(Math.random() * choices.length)] + if ( + (prev === GemPuzzleBoard.DOWN && m === GemPuzzleBoard.UP) || + (prev === GemPuzzleBoard.UP && m === GemPuzzleBoard.DOWN) || + (prev === GemPuzzleBoard.LEFT && m === GemPuzzleBoard.RIGHT) || + (prev === GemPuzzleBoard.RIGHT && m === GemPuzzleBoard.LEFT) + ) { + m = choices[Math.floor(Math.random() * choices.length)] + } + this.move(m) + prev = m + } + } + + choices() { + const emptyPos = this.emptyPosition + const c = [] + if (emptyPos[0] > 0) { + c.push(GemPuzzleBoard.UP) + } + if (emptyPos[1] < this._size[1] - 1) { + c.push(GemPuzzleBoard.RIGHT) + } + if (emptyPos[0] < this._size[0] - 1) { + c.push(GemPuzzleBoard.DOWN) + } + if (emptyPos[1] > 0) { + c.push(GemPuzzleBoard.LEFT) + } + return c + } + + solve() { + const solver = new GemPuzzleSolver(this.copy()) + solver.solve() + return solver.path + } +} + +class GemPuzzleSolver { + constructor(board) { + this._board = board + + this._path = [] + } + + get path() { + return this._path + } + + solve() { + let emptyPos = this._board.emptyPosition + const r = this._board.size[0] + const c = this._board.size[1] + for (let i = 0; i < r - 2; i++) { + for (let j = 0; j < c - 2; j++) { + emptyPos = this._move(i * c + j, [i, j]) + } + if (this._board.at([i, c - 2]) === i * c + c - 2 && this._board.at([i, c - 1]) === i * c + c - 1) { + continue + } + emptyPos = this._move(i * c + c - 1, [i, c - 2]) + while (emptyPos[1] < c - 1) { + this._step(GemPuzzleBoard.RIGHT) + emptyPos[1]++ + } + while (emptyPos[0] > i + 1) { + this._step(GemPuzzleBoard.UP) + emptyPos[0]-- + } + if (emptyPos[0] < i + 1) { + this._step(GemPuzzleBoard.DOWN) + emptyPos[0]++ + } + if (this._board.at([i, c - 1]) === i * c + c - 2) { + this._step(GemPuzzleBoard.UP) + this._step(GemPuzzleBoard.LEFT) + this._step(GemPuzzleBoard.DOWN) + this._step(GemPuzzleBoard.RIGHT) + this._step(GemPuzzleBoard.DOWN) + this._step(GemPuzzleBoard.LEFT) + this._step(GemPuzzleBoard.UP) + this._step(GemPuzzleBoard.UP) + this._step(GemPuzzleBoard.RIGHT) + this._step(GemPuzzleBoard.DOWN) + this._step(GemPuzzleBoard.LEFT) + this._step(GemPuzzleBoard.DOWN) + this._step(GemPuzzleBoard.RIGHT) + this._step(GemPuzzleBoard.UP) + this._step(GemPuzzleBoard.UP) + this._step(GemPuzzleBoard.LEFT) + this._step(GemPuzzleBoard.DOWN) + emptyPos[1]-- + continue + } + emptyPos = this._move(i * c + c - 2, [i + 1, c - 2]) + + if (this._board.at([i, c - 1]) === i * c + c - 1) { + continue + } + while (emptyPos[0] <= i + 1) { + this._step(GemPuzzleBoard.DOWN) + emptyPos[0]++ + } + while (emptyPos[1] < c - 1) { + this._step(GemPuzzleBoard.RIGHT) + emptyPos[1]++ + } + this._step(GemPuzzleBoard.UP) + this._step(GemPuzzleBoard.UP) + this._step(GemPuzzleBoard.LEFT) + this._step(GemPuzzleBoard.DOWN) + emptyPos[0]-- + emptyPos[1]-- + } + + for (let j = 0; j < c - 2; j++) { + const lv = (r - 2) * c + j + const hv = (r - 1) * c + j + + if (this._board.at([r - 2, j]) === lv && this._board.at([r - 1, j]) === hv) { + continue + } + emptyPos = this._move(hv, [r - 2, j]) + while (emptyPos[0] < r - 1) { + this._step(GemPuzzleBoard.DOWN) + emptyPos[0]++ + } + while (emptyPos[1] > j + 1) { + this._step(GemPuzzleBoard.LEFT) + emptyPos[1]-- + } + if (emptyPos[1] < j + 1) { + this._step(GemPuzzleBoard.RIGHT) + emptyPos[1]++ + } + if (this._board.at([r - 1, j]) === lv) { + this._step(GemPuzzleBoard.LEFT) + this._step(GemPuzzleBoard.UP) + this._step(GemPuzzleBoard.RIGHT) + this._step(GemPuzzleBoard.DOWN) + this._step(GemPuzzleBoard.RIGHT) + this._step(GemPuzzleBoard.UP) + this._step(GemPuzzleBoard.LEFT) + this._step(GemPuzzleBoard.LEFT) + this._step(GemPuzzleBoard.DOWN) + this._step(GemPuzzleBoard.RIGHT) + this._step(GemPuzzleBoard.UP) + this._step(GemPuzzleBoard.RIGHT) + this._step(GemPuzzleBoard.DOWN) + this._step(GemPuzzleBoard.LEFT) + this._step(GemPuzzleBoard.LEFT) + this._step(GemPuzzleBoard.UP) + this._step(GemPuzzleBoard.RIGHT) + emptyPos[0]-- + continue + } + emptyPos = this._move(lv, [r - 2, j + 1]) + while (emptyPos[0] < r - 1) { + this._step(GemPuzzleBoard.DOWN) + emptyPos[0]++ + } + while (emptyPos[1] > j) { + this._step(GemPuzzleBoard.LEFT) + emptyPos[1]-- + } + this._step(GemPuzzleBoard.UP) + this._step(GemPuzzleBoard.RIGHT) + } + emptyPos = this._move((r - 2) * c + c - 2, [r - 2, c - 2]) + if (emptyPos[0] < r - 1) { + this._step(GemPuzzleBoard.DOWN) + } + if (emptyPos[1] < c - 1) { + this._step(GemPuzzleBoard.RIGHT) + } + } + + _step(m) { + this._board.move(m) + if (this._path.length > 0) { + const lm = this._path.at(-1) + if ( + (m === GemPuzzleBoard.UP && lm === GemPuzzleBoard.DWON) || + (m === GemPuzzleBoard.DOWN && lm === GemPuzzleBoard.UP) || + (m === GemPuzzleBoard.LEFT && lm === GemPuzzleBoard.RIGHT) || + (m === GemPuzzleBoard.RIGHT && lm === GemPuzzleBoard.LEFT) + ) { + this._path.pop() + return + } + } + this._path.push(m) + } + + _move(value, to) { + const emptyPos = this._board.emptyPosition + const r = this._board.size[0] + while (emptyPos[0] <= to[0]) { + this._step(GemPuzzleBoard.DOWN) + emptyPos[0]++ + } + if (this._board.at(to) === value) { + return emptyPos + } + + const pos = this._board.find(value) + if (pos[1] !== to[1]) { + if (pos[0] === emptyPos[0]) { + if (pos[0] === r - 1) { + this._step(GemPuzzleBoard.UP) + emptyPos[0]-- + } else { + this._step(GemPuzzleBoard.DOWN) + emptyPos[0]++ + } + } + while (emptyPos[1] < pos[1]) { + this._step(GemPuzzleBoard.RIGHT) + emptyPos[1]++ + } + while (emptyPos[1] > pos[1]) { + this._step(GemPuzzleBoard.LEFT) + emptyPos[1]-- + } + if (emptyPos[0] < pos[0]) { + while (emptyPos[0] < pos[0]) { + this._step(GemPuzzleBoard.DOWN) + emptyPos[0]++ + } + pos[0]-- + } else { + while (emptyPos[0] > pos[0] + 1) { + this._step(GemPuzzleBoard.UP) + emptyPos[0]-- + } + } + + if (pos[1] < to[1]) { + for (let t = 0; t < to[1] - pos[1]; t++) { + this._step(GemPuzzleBoard.RIGHT) + this._step(GemPuzzleBoard.UP) + this._step(GemPuzzleBoard.LEFT) + this._step(GemPuzzleBoard.DOWN) + this._step(GemPuzzleBoard.RIGHT) + emptyPos[1]++ + } + } else { + for (let t = 0; t < pos[1] - to[1]; t++) { + this._step(GemPuzzleBoard.LEFT) + this._step(GemPuzzleBoard.UP) + this._step(GemPuzzleBoard.RIGHT) + this._step(GemPuzzleBoard.DOWN) + this._step(GemPuzzleBoard.LEFT) + emptyPos[1]-- + } + } + pos[1] = to[1] + } + + if (pos[0] !== to[0]) { + if (pos[0] === emptyPos[0]) { + if (pos[0] === r - 1) { + this._step(GemPuzzleBoard.UP) + emptyPos[0]-- + } else { + this._step(GemPuzzleBoard.DOWN) + emptyPos[0]++ + } + } + if (emptyPos[1] <= pos[1]) { + while (emptyPos[1] <= pos[1]) { + this._step(GemPuzzleBoard.RIGHT) + emptyPos[1]++ + } + } else { + while (emptyPos[1] > pos[1] + 1) { + this._step(GemPuzzleBoard.LEFT) + emptyPos[1]-- + } + } + while (emptyPos[0] < pos[0]) { + this._step(GemPuzzleBoard.DOWN) + emptyPos[0]++ + } + while (emptyPos[0] > pos[0]) { + this._step(GemPuzzleBoard.UP) + emptyPos[0]-- + } + for (let t = 0; t < pos[0] - to[0]; t++) { + this._step(GemPuzzleBoard.UP) + this._step(GemPuzzleBoard.LEFT) + this._step(GemPuzzleBoard.DOWN) + this._step(GemPuzzleBoard.RIGHT) + this._step(GemPuzzleBoard.UP) + emptyPos[0]-- + } + pos[0] = to[0] + } + return emptyPos + } +} + +/** + * Gem puzzle environment + */ +export default class GemPuzzleRLEnvironment extends RLEnvironmentBase { + constructor() { + super() + + this._size = [4, 4] + + this._board = new GemPuzzleBoard(this._size, this._evaluation) + + this._reward = { + win: 10, + step: 0, + invalid: -100, + } + } + + static UP = GemPuzzleBoard.UP + static RIGHT = GemPuzzleBoard.RIGHT + static DOWN = GemPuzzleBoard.DOWN + static LEFT = GemPuzzleBoard.LEFT + + get actions() { + return [ + [ + GemPuzzleRLEnvironment.UP, + GemPuzzleRLEnvironment.RIGHT, + GemPuzzleRLEnvironment.DOWN, + GemPuzzleRLEnvironment.LEFT, + ], + ] + } + + get states() { + const s = [] + const n = this._size[0] * this._size[1] + const si = Array.from({ length: n }, (_, i) => i - 1) + for (let i = 0; i < n; i++) { + s.push(si) + } + return s + } + + set evaluation(func) { + if (func) { + this._board._evaluator = this._evaluation = board => { + return func(this._makeState(board)) + } + } else { + this._board._evaluator = this._evaluation = null + } + } + + _makeState(board) { + const s = [] + for (let i = 0; i < this._size[0]; i++) { + for (let j = 0; j < this._size[1]; j++) { + const p = board.at([i, j]) + s.push(p === null ? -1 : p) + } + } + return s + } + + _state2board(state) { + const board = new GemPuzzleBoard(this._size, this._evaluation) + for (let i = 0, p = 0; i < this._size[0]; i++) { + for (let j = 0; j < this._size[1]; j++, p++) { + board._board[i][j] = state[p] === -1 ? null : state[p] + } + } + return board + } + + reset() { + super.reset() + this._board.reset() + this._board.random() + + return this.state() + } + + state() { + return this._makeState(this._board) + } + + setState(state) { + this._board = this._state2board(state) + } + + step(action) { + return super.step(action) + } + + test(state, action) { + const board = this._state2board(state) + const changed = board.move(action[0]) + const done = board.finish + const reward = (done ? this._reward.win : this._reward.step) + board.score() + if (!changed) { + return { state, reward: reward + this._reward.invalid, done, invalid: true } + } + return { state: this._makeState(board), reward, done } + } +} diff --git a/tests/lib/rl/gem_puzzle.test.js b/tests/lib/rl/gem_puzzle.test.js new file mode 100644 index 000000000..da51cb395 --- /dev/null +++ b/tests/lib/rl/gem_puzzle.test.js @@ -0,0 +1,453 @@ +import GemPuzzleRLEnvironment from '../../../lib/rl/gem_puzzle.js' + +describe('env', () => { + test('actions', () => { + const env = new GemPuzzleRLEnvironment() + expect(env.actions).toEqual([[0, 1, 2, 3]]) + }) + + test('states', () => { + const env = new GemPuzzleRLEnvironment() + const states = env.states + const n = env._size[0] * env._size[1] + expect(states).toHaveLength(n) + for (let i = 0; i < n; i++) { + expect(states).toHaveLength(n) + } + }) + + describe('evaluation', () => { + test('set', () => { + const env = new GemPuzzleRLEnvironment() + const n = env._size[0] * env._size[1] + env.evaluation = state => { + expect(state).toHaveLength(n) + return 1 + } + + const score = env._board.score() + expect(score).toBe(1) + }) + + test('clear', () => { + const env = new GemPuzzleRLEnvironment() + const orgScore = env._board.score() + env.evaluation = null + + const score = env._board.score() + expect(score).toBe(orgScore) + }) + }) + + test('reset', () => { + const env = new GemPuzzleRLEnvironment() + const n = env._size[0] * env._size[1] + + const state = env.reset() + expect(state).toHaveLength(n) + }) + + test('state', () => { + const env = new GemPuzzleRLEnvironment() + const n = env._size[0] * env._size[1] + + const state = env.state() + expect(state).toHaveLength(n) + }) + + test('setState', () => { + const env = new GemPuzzleRLEnvironment() + + const n = env._size[0] * env._size[1] + const newState = Array.from({ length: n }, (_, i) => i - 1) + env.setState(newState) + + const state = env.state() + expect(state).toEqual(newState) + }) + + describe('step', () => { + test('valid not finish', () => { + const env = new GemPuzzleRLEnvironment() + env._board.reset() + + const info = env.step([GemPuzzleRLEnvironment.UP]) + expect(info.invalid).toBeFalsy() + expect(info.done).toBeFalsy() + expect(info.reward).toBe(-1) + + const n = env._size[0] * env._size[1] + const state = info.state + expect(state).toHaveLength(n) + }) + + test('valid finish', () => { + const env = new GemPuzzleRLEnvironment() + env._board.reset() + + env.step([GemPuzzleRLEnvironment.UP]) + const info = env.step([GemPuzzleRLEnvironment.DOWN]) + expect(info.invalid).toBeFalsy() + expect(info.done).toBeTruthy() + expect(info.reward).toBe(10) + + const n = env._size[0] * env._size[1] + const state = info.state + expect(state).toHaveLength(n) + }) + + test('invalid not finish', () => { + const env = new GemPuzzleRLEnvironment() + env._board.reset() + + env.step([GemPuzzleRLEnvironment.UP]) + const info = env.step([GemPuzzleRLEnvironment.RIGHT]) + expect(info.invalid).toBeTruthy() + expect(info.done).toBeFalsy() + expect(info.reward).toBe(-101) + + const n = env._size[0] * env._size[1] + const state = info.state + expect(state).toHaveLength(n) + }) + + test('invalid finish', () => { + const env = new GemPuzzleRLEnvironment() + env._board.reset() + + const info = env.step([GemPuzzleRLEnvironment.DOWN]) + expect(info.invalid).toBeTruthy() + expect(info.done).toBeTruthy() + expect(info.reward).toBe(-90) + + const n = env._size[0] * env._size[1] + const state = info.state + expect(state).toHaveLength(n) + }) + }) +}) + +describe('board', () => { + test('constructor', () => { + const env = new GemPuzzleRLEnvironment() + const board = env._board + + expect(board.size).toEqual([4, 4]) + }) + + test('size', () => { + const env = new GemPuzzleRLEnvironment() + const board = env._board + + expect(board.size).toEqual([4, 4]) + }) + + describe('finish', () => { + test('init', () => { + const env = new GemPuzzleRLEnvironment() + const board = env._board + + expect(board.finish).toBeFalsy() + }) + + test('reset', () => { + const env = new GemPuzzleRLEnvironment() + const board = env._board + board.reset() + + expect(board.finish).toBeTruthy() + }) + }) + + test('emptyPosition', () => { + const env = new GemPuzzleRLEnvironment() + const board = env._board + const ep = board.emptyPosition + + let emptyPos = [-1, -1] + for (let i = 0; i < board.size[0]; i++) { + for (let j = 0; j < board.size[1]; j++) { + if (board.at([i, j]) === null) { + emptyPos = [i, j] + } + } + } + + expect(ep).toEqual(emptyPos) + }) + + test('toString', () => { + const env = new GemPuzzleRLEnvironment() + const board = env._board + board.reset() + + expect(board.toString()).toBe(`0 1 2 3 +4 5 6 7 +8 9 10 11 +12 13 14 +`) + }) + + test('copy', () => { + const env = new GemPuzzleRLEnvironment() + const board = env._board + const cp = board.copy() + + expect(cp.size).toEqual(board.size) + for (let i = 0; i < board.size[0]; i++) { + for (let j = 0; j < board.size[1]; j++) { + expect(cp.at([i, j])).toBe(board.at([i, j])) + } + } + }) + + describe('score', () => { + test('init', () => { + const env = new GemPuzzleRLEnvironment() + const board = env._board + const score = board.score() + + let s = 0 + for (let i = 0; i < board.size[0]; i++) { + for (let j = 0; j < board.size[1]; j++) { + if (board.at([i, j]) === null) { + continue + } + const v = board.at([i, j]) + s -= Math.abs(i - Math.floor(v / board.size[1])) + Math.abs(j - (v % board.size[1])) + } + } + expect(score).toBe(s) + }) + + test('reset', () => { + const env = new GemPuzzleRLEnvironment() + const board = env._board + board.reset() + const score = board.score() + + expect(score).toBe(0) + }) + + test('evaluator', () => { + const env = new GemPuzzleRLEnvironment() + const board = env._board + board._evaluator = () => { + return 1 + } + const score = board.score() + + expect(score).toBe(1) + }) + }) + + test.todo('at') + + describe('find', () => { + test('number', () => { + const env = new GemPuzzleRLEnvironment() + const board = env._board + for (let k = 0; k < 15; k++) { + const ep = board.find(k) + + let emptyPos = [-1, -1] + for (let i = 0; i < board.size[0]; i++) { + for (let j = 0; j < board.size[1]; j++) { + if (board.at([i, j]) === k) { + emptyPos = [i, j] + } + } + } + + expect(ep).toEqual(emptyPos) + } + }) + + test('null', () => { + const env = new GemPuzzleRLEnvironment() + const board = env._board + const ep = board.find(null) + + let emptyPos = [-1, -1] + for (let i = 0; i < board.size[0]; i++) { + for (let j = 0; j < board.size[1]; j++) { + if (board.at([i, j]) === null) { + emptyPos = [i, j] + } + } + } + + expect(ep).toEqual(emptyPos) + }) + + test('not found', () => { + const env = new GemPuzzleRLEnvironment() + const board = env._board + const ep = board.find(999) + + expect(ep).toBeNull() + }) + }) + + describe('move', () => { + test('success', () => { + const env = new GemPuzzleRLEnvironment() + const board = env._board + board.reset() + + expect(board.emptyPosition).toEqual([3, 3]) + const upres = board.move(GemPuzzleRLEnvironment.UP) + expect(upres).toBeTruthy() + expect(board.emptyPosition).toEqual([2, 3]) + const ltres = board.move(GemPuzzleRLEnvironment.LEFT) + expect(ltres).toBeTruthy() + expect(board.emptyPosition).toEqual([2, 2]) + const dnres = board.move(GemPuzzleRLEnvironment.DOWN) + expect(dnres).toBeTruthy() + expect(board.emptyPosition).toEqual([3, 2]) + const rtres = board.move(GemPuzzleRLEnvironment.RIGHT) + expect(rtres).toBeTruthy() + expect(board.emptyPosition).toEqual([3, 3]) + }) + + test('fail', () => { + const env = new GemPuzzleRLEnvironment() + const board = env._board + board.reset() + + expect(board.emptyPosition).toEqual([3, 3]) + const dnres = board.move(GemPuzzleRLEnvironment.DOWN) + expect(dnres).toBeFalsy() + expect(board.emptyPosition).toEqual([3, 3]) + const rtres = board.move(GemPuzzleRLEnvironment.RIGHT) + expect(rtres).toBeFalsy() + expect(board.emptyPosition).toEqual([3, 3]) + + for (let i = 0; i < 3; i++) { + board.move(GemPuzzleRLEnvironment.UP) + board.move(GemPuzzleRLEnvironment.LEFT) + } + expect(board.emptyPosition).toEqual([0, 0]) + const upres = board.move(GemPuzzleRLEnvironment.UP) + expect(upres).toBeFalsy() + expect(board.emptyPosition).toEqual([0, 0]) + const ltres = board.move(GemPuzzleRLEnvironment.LEFT) + expect(ltres).toBeFalsy() + expect(board.emptyPosition).toEqual([0, 0]) + }) + }) + + test('reset', () => { + const env = new GemPuzzleRLEnvironment() + const board = env._board + board.reset() + + for (let i = 0, p = 0; i < board.size[0]; i++) { + for (let j = 0; j < board.size[1]; j++, p++) { + const v = p === board.size[0] * board.size[1] - 1 ? null : p + expect(board.at([i, j])).toBe(v) + } + } + }) + + test('random', () => { + const env = new GemPuzzleRLEnvironment() + const board = env._board + for (let i = 0; i < 100; i++) { + board.random() + + const path = board.solve() + + path.forEach(m => board.move(m)) + expect(board.finish).toBeTruthy() + } + }) + + describe('solve', () => { + test('solved', () => { + const env = new GemPuzzleRLEnvironment() + const board = env._board + board.reset() + + const path = board.solve() + + expect(path).toHaveLength(0) + }) + + test('move bit before check 1', () => { + const env = new GemPuzzleRLEnvironment() + const board = env._board + board._size = [3, 3] + board._board = [ + [3, 1, 5], + [4, 0, null], + [6, 2, 7], + ] + + const path = board.solve() + path.forEach(m => board.move(m)) + expect(board.finish).toBeTruthy() + }) + + test('move bit before check 2', () => { + const env = new GemPuzzleRLEnvironment() + const board = env._board + board._size = [3, 3] + board._board = [ + [3, 5, 2], + [1, 7, null], + [0, 6, 4], + ] + + const path = board.solve() + path.forEach(m => board.move(m)) + expect(board.finish).toBeTruthy() + }) + + test('move bit before check 3', () => { + const env = new GemPuzzleRLEnvironment() + const board = env._board + board._size = [4, 4] + board._board = [ + [4, 0, 3, 2], + [8, 6, 1, 7], + [5, 10, null, 14], + [12, 11, 13, 9], + ] + + const path = board.solve() + path.forEach(m => board.move(m)) + expect(board.finish).toBeTruthy() + }) + + test('need swap horizontal', () => { + const env = new GemPuzzleRLEnvironment() + const board = env._board + board._size = [3, 3] + board._board = [ + [0, 2, 1], + [3, 5, 4], + [6, 7, null], + ] + + const path = board.solve() + path.forEach(m => board.move(m)) + expect(board.finish).toBeTruthy() + }) + + test('need swap vertical', () => { + const env = new GemPuzzleRLEnvironment() + const board = env._board + board._size = [3, 3] + board._board = [ + [0, 1, 2], + [6, 5, 4], + [3, 7, null], + ] + + const path = board.solve() + path.forEach(m => board.move(m)) + expect(board.finish).toBeTruthy() + }) + }) +}) From b0314b74e34646739706edb2fbc0cad26cbe940d Mon Sep 17 00:00:00 2001 From: ishii-norimi Date: Sat, 20 Sep 2025 15:17:49 +0900 Subject: [PATCH 2/2] Add reset functionality and improve button management in GemPuzzleRenderer --- js/renderer/rl/gem_puzzle.js | 46 +++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/js/renderer/rl/gem_puzzle.js b/js/renderer/rl/gem_puzzle.js index 5cde73e27..6f9fe55c9 100644 --- a/js/renderer/rl/gem_puzzle.js +++ b/js/renderer/rl/gem_puzzle.js @@ -19,6 +19,7 @@ export default class GemPuzzleRenderer { this.renderer.env._size = [+size.value, +size.value] this.renderer.env._board._size = [+size.value, +size.value] this.renderer.platform.init() + this.renderer.env.reset() this.renderer.setting.ml.refresh() } r.appendChild(size) @@ -32,45 +33,61 @@ export default class GemPuzzleRenderer { this._envrenderer = new Renderer(this.renderer.env, { width, height, g: base }) this._envrenderer.init() + this._resetButton = document.createElement('button') + this._resetButton.innerText = 'Reset' + this._resetButton.onclick = async () => { + this.renderer.env.reset() + this._envrenderer.render() + } + r.appendChild(this._resetButton) + this._manualButton = document.createElement('button') this._manualButton.innerText = 'Manual' this._manualButton.onclick = async () => { this._game = new GemPuzzleGame(this.renderer.platform) this._autoButton.disabled = true this._manualButton.disabled = true + this._resetButton.disabled = true this._cancelButton.style.display = 'inline' await this._game.start() this._autoButton.disabled = false this._manualButton.disabled = false + this._resetButton.disabled = false + this._cancelButton.style.display = 'none' this._game = null } r.appendChild(this._manualButton) - this._cancelButton = document.createElement('button') - this._cancelButton.innerText = 'Cancel' - this._cancelButton.onclick = async () => { - this._game.cancel() - this._cancelButton.style.display = 'none' - } - this._cancelButton.style.display = 'none' - r.appendChild(this._cancelButton) - this._autoButton = document.createElement('button') this._autoButton.innerText = 'Auto' this._autoButton.onclick = async () => { this._game = new GemPuzzleGame(this.renderer.platform) this._autoButton.disabled = true this._manualButton.disabled = true + this._resetButton.disabled = true + this._cancelButton.style.display = 'inline' await this._game.start(true) this._autoButton.disabled = false this._manualButton.disabled = false + this._resetButton.disabled = false + this._cancelButton.style.display = 'none' this._game = null } r.appendChild(this._autoButton) + + this._cancelButton = document.createElement('button') + this._cancelButton.innerText = 'Cancel' + this._cancelButton.onclick = async () => { + this._game.cancel() + this._cancelButton.style.display = 'none' + } + this._cancelButton.style.display = 'none' + r.appendChild(this._cancelButton) } render() { - const displayButton = this._game || this.renderer.platform._manager._modelname ? 'none' : null + const displayButton = this.renderer.platform._manager._modelname ? 'none' : null + this._resetButton.style.display = displayButton this._manualButton.style.display = displayButton this._autoButton.style.display = displayButton this._envrenderer.render() @@ -127,9 +144,12 @@ class Renderer { text.setAttribute('y', dy * (i + 0.5)) text.setAttribute('font-size', 14) text.setAttribute('user-select', 'none') + text.setAttribute('dominant-baseline', 'middle') + text.setAttribute('text-anchor', 'middle') g.appendChild(text) } } + this.render() } render() { @@ -152,10 +172,10 @@ class GemPuzzleGame { constructor(platform) { this._platform = platform this._env = platform.env + this._cancel = false } async start(auto = false) { - this._env.reset() this._platform.render() if (auto) { const path = this._env._board.solve() @@ -163,6 +183,9 @@ class GemPuzzleGame { await new Promise(resolve => setTimeout(resolve, 50)) this._env.step([m]) this._platform.render() + if (this._cancel) { + break + } } return } @@ -201,5 +224,6 @@ class GemPuzzleGame { cancel() { this._cancelResolver?.(null) + this._cancel = true } }