Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion js/renderer/rl/draughts.js
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ class ManualPlayer {
for (let i = 0; i < board.size[0]; i++) {
this._check[i] = []
for (let j = 0; j < board.size[1]; j++) {
if ((i + j) % 2 > 0) continue
if ((i + j) % 2 === 0) continue
this._check[i][j] = document.createElementNS('http://www.w3.org/2000/svg', 'rect')
this._check[i][j].setAttribute('x', dw * j)
this._check[i][j].setAttribute('y', dh * i)
Expand Down
23 changes: 4 additions & 19 deletions lib/rl/acrobot.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,21 +46,6 @@ export default class AcrobotRLEnvironment extends RLEnvironmentBase {
]
}

set reward(value) {
this._reward = {
goal: 0,
step: -1,
fail: 0,
}
if (value === 'achieve') {
this._reward = {
goal: 0,
step: -1,
fail: 0,
}
}
}

reset() {
super.reset()
this._theta1 = Math.random() * 0.2 - 0.1
Expand Down Expand Up @@ -111,11 +96,11 @@ export default class AcrobotRLEnvironment extends RLEnvironmentBase {

const clip = (x, min, max) => (x < min ? min : x > max ? max : x)
t1 += this._dt * dt1
if (t1 < -Math.PI) t1 = t1 + 2 * Math.PI
if (t1 > Math.PI) t1 = t1 - 2 * Math.PI
while (t1 < -Math.PI) t1 += 2 * Math.PI
while (t1 > Math.PI) t1 -= 2 * Math.PI
t2 += this._dt * dt2
if (t2 < -Math.PI) t2 = t2 + 2 * Math.PI
if (t2 > Math.PI) t2 = t2 - 2 * Math.PI
while (t2 < -Math.PI) t2 += 2 * Math.PI
while (t2 > Math.PI) t2 -= 2 * Math.PI
dt1 = clip(dt1 + this._dt * ddt1, -this._max_vel1, this._max_vel1)
dt2 = clip(dt2 + this._dt * ddt2, -this._max_vel2, this._max_vel2)

Expand Down
108 changes: 85 additions & 23 deletions lib/rl/draughts.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ export default class DraughtsRLEnvironment extends RLEnvironmentBase {
]
const checkBound = (x, y) => 0 <= x && x < this._size[0] && 0 <= y && y < this._size[1]
for (let i = 0; i < this._size[0]; i++) {
for (let j = i % 2 === 0 ? 0 : 1; j < this._size[1]; j += 2) {
for (let j = i % 2 === 1 ? 0 : 1; j < this._size[1]; j += 2) {
let midpath = []
for (const [di, dj] of d) {
const i1 = i + di
Expand Down Expand Up @@ -93,18 +93,14 @@ export default class DraughtsRLEnvironment extends RLEnvironmentBase {
get states() {
const s = [[RED, WHITE]]
for (let i = 0; i < this._size[0]; i++) {
for (let j = 0; j < this._size[1]; j++) {
if (j % 2 === i % 2) {
s.push([
EMPTY,
DraughtsRLEnvironment.OWN,
DraughtsRLEnvironment.OWN | KING,
DraughtsRLEnvironment.OTHER,
DraughtsRLEnvironment.OTHER | KING,
])
} else {
s.push([EMPTY])
}
for (let j = i % 2 === 0 ? 1 : 0; j < this._size[1]; j += 2) {
s.push([
EMPTY,
DraughtsRLEnvironment.OWN,
DraughtsRLEnvironment.OWN | KING,
DraughtsRLEnvironment.OTHER,
DraughtsRLEnvironment.OTHER | KING,
])
}
}
return s
Expand All @@ -123,7 +119,7 @@ export default class DraughtsRLEnvironment extends RLEnvironmentBase {
_makeState(board, agentturn, gameturn) {
const s = [gameturn]
for (let i = 0; i < this._size[0]; i++) {
for (let j = 0; j < this._size[1]; j++) {
for (let j = i % 2 === 0 ? 1 : 0; j < this._size[1]; j += 2) {
const p = board.at([i, j])
if (p === EMPTY) {
s.push(EMPTY)
Expand All @@ -144,7 +140,7 @@ export default class DraughtsRLEnvironment extends RLEnvironmentBase {
const board = new DraughtsBoard(this._size, this._evaluation)
const opturn = turn === RED ? WHITE : RED
for (let i = 0, p = 1; i < this._size[0]; i++) {
for (let j = 0; j < this._size[1]; j++, p++) {
for (let j = i % 2 === 0 ? 1 : 0; j < this._size[1]; j += 2, p++) {
if (state[p] === EMPTY) {
board._board[i][j] = EMPTY
} else {
Expand Down Expand Up @@ -241,6 +237,7 @@ class DraughtsBoard {
constructor(size, evaluator) {
this._evaluator = evaluator
this._size = size
this._lines = 3

this.reset()
}
Expand Down Expand Up @@ -280,6 +277,26 @@ class DraughtsBoard {
return null
}

toString() {
let buf = ''
for (let i = 0; i < this._size[0]; i++) {
for (let j = 0; j < this._size[1]; j++) {
if (j > 0) {
buf += ' '
}
if (this._board[i][j] === RED) {
buf += 'x'
} else if (this._board[i][j] === WHITE) {
buf += 'o'
} else {
buf += '-'
}
}
buf += '\n'
}
return buf
}

nextTurn(turn) {
if (turn === WHITE) {
return RED
Expand Down Expand Up @@ -310,20 +327,44 @@ class DraughtsBoard {
}
}

_num_to_pos(n) {
if (typeof n !== 'number') {
return n
}
const r = Math.floor((n - 1) / this._size[1])
const c = (n - 1) % this._size[1]
if (c < (this._size[1] - 1) / 2) {
return [r * 2, c * 2 + 1]
} else {
return [r * 2 + 1, (c - Math.floor(this._size[1] / 2)) * 2]
}
}

at(p) {
if (typeof p === 'number') {
p = this._num_to_pos(p)
}
return this._board[p[0]][p[1]]
}

set(p, turn) {
let piece = this._board[p.from[0]][p.from[1]]
p = {
from: this._num_to_pos(p.from),
path: p.path.map(v => this._num_to_pos(v)),
jump: p.jump.map(v => this._num_to_pos(v)),
}
let piece = this.at(p.from)
if (!(turn & piece)) {
return false
}
if ((p.jump.length !== 0 || p.path.length !== 1) && p.jump.length !== p.path.length) {
return false
}
const nturn = this.nextTurn(turn)
if (p.jump.some(([i, j]) => !(this._board[i][j] & nturn))) {
if (p.jump.some(j => !(this.at(j) & nturn))) {
return false
}
if (p.path.some(([i, j]) => this._board[i][j] !== EMPTY)) {
if (p.path.some(j => this.at(j) !== EMPTY)) {
return false
}

Expand All @@ -334,6 +375,27 @@ class DraughtsBoard {
}
}

if (p.jump.length === 0) {
for (let i = 0; i < 2; i++) {
if (Math.abs(p.from[i] - p.path[0][i]) !== 1) {
return false
}
}
} else {
let pos = p.from
for (let k = 0; k < p.path.length; k++) {
for (let i = 0; i < 2; i++) {
if (Math.abs(pos[i] - p.jump[k][i]) !== 1) {
return false
}
if (Math.abs(p.jump[k][i] - p.path[k][i]) !== 1) {
return false
}
}
pos = p.path[k]
}
}

this._board[p.from[0]][p.from[1]] = EMPTY
for (const [i, j] of p.jump) {
this._board[i][j] = EMPTY
Expand All @@ -354,10 +416,10 @@ class DraughtsBoard {
this._board[i] = Array(this._size[1]).fill(EMPTY)
}
for (let i = 0; i < this._size[0]; i++) {
for (let j = 0; j < this._size[1]; j++) {
if (i < 3 && (i + j) % 2 === 0) {
for (let j = i % 2 === 0 ? 1 : 0; j < this._size[1]; j += 2) {
if (i < this._lines) {
this._board[i][j] = RED
} else if (this._size[0] - 3 <= i && (i + j) % 2 === 0) {
} else if (this._size[0] - this._lines <= i) {
this._board[i][j] = WHITE
}
}
Expand Down Expand Up @@ -418,9 +480,9 @@ class DraughtsBoard {
cp._board[x + dx * 2][y + dy * 2] = this._board[x][y]
cp._board[x][y] = EMPTY
cp._board[x + dx][y + dy] = EMPTY
if (turn === RED && x * dx * 2 === this._size[0] - 1) {
if (turn === RED && x + dx * 2 === this._size[0] - 1) {
cp._board[x + dx * 2][y + dy * 2] |= KING
} else if (turn === WHITE && x * dx * 2 === 0) {
} else if (turn === WHITE && x + dx * 2 === 0) {
cp._board[x + dx * 2][y + dy * 2] |= KING
}
const npath = cp.allPath(x + dx * 2, y + dy * 2, turn, false)
Expand Down
20 changes: 20 additions & 0 deletions lib/rl/gomoku.js
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,26 @@ class GomokuBoard {
return null
}

toString() {
let buf = ''
for (let i = 0; i < this._size[0]; i++) {
for (let j = 0; j < this._size[1]; j++) {
if (j > 0) {
buf += ' '
}
if (this._board[i][j] === BLACK) {
buf += 'x'
} else if (this._board[i][j] === WHITE) {
buf += 'o'
} else {
buf += '-'
}
}
buf += '\n'
}
return buf
}

nextTurn(turn) {
return turn === BLACK ? WHITE : BLACK
}
Expand Down
2 changes: 1 addition & 1 deletion lib/rl/inhypercube.js
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ export default class InHypercubeRLEnvironment extends RLEnvironmentBase {
}

const success = p[this._success_dim] <= -this._fail_position
const fail = !success && p.every(v => Math.abs(v) >= this._fail_position)
const fail = !success && p.some(v => Math.abs(v) >= this._fail_position)
const done = this.epoch >= this._max_step || success || fail
const reward = fail ? this._reward.fail : success ? this._reward.goal : this._reward.step
return {
Expand Down
39 changes: 32 additions & 7 deletions lib/rl/reversi.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ export default class ReversiRLEnvironment extends RLEnvironmentBase {
const a = [EMPTY]
for (let i = 0; i < this._size[0]; i++) {
for (let j = 0; j < this._size[1]; j++) {
a.push(`${i}_${j}`)
a.push(`${String.fromCharCode('a'.charCodeAt(0) + i)}${i + 1}`)
}
}
return [a]
Expand Down Expand Up @@ -167,8 +167,7 @@ export default class ReversiRLEnvironment extends RLEnvironmentBase {
invalid,
}
}
const choice = action[0].split('_').map(v => +v)
const changed = board.set(choice, agent)
const changed = board.set(action[0], agent)
const done = board.finish
if (!changed) {
return {
Expand Down Expand Up @@ -233,6 +232,26 @@ class ReversiBoard {
return null
}

toString() {
let buf = ''
for (let i = 0; i < this._size[0]; i++) {
for (let j = 0; j < this._size[1]; j++) {
if (j > 0) {
buf += ' '
}
if (this._board[i][j] === BLACK) {
buf += 'x'
} else if (this._board[i][j] === WHITE) {
buf += 'o'
} else {
buf += '-'
}
}
buf += '\n'
}
return buf
}

nextTurn(turn) {
return flipPiece(turn)
}
Expand Down Expand Up @@ -260,10 +279,16 @@ class ReversiBoard {
}

at(p) {
if (typeof p === 'string') {
p = [p[1] - 1, p.charCodeAt(0) - 'a'.charCodeAt(0)]
}
return this._board[p[0]][p[1]]
}

set(p, turn) {
if (typeof p === 'string') {
p = [p[1] - 1, p.charCodeAt(0) - 'a'.charCodeAt(0)]
}
const flips = this.flipPositions(p[0], p[1], turn)
if (flips.length === 0) {
return false
Expand All @@ -282,10 +307,10 @@ class ReversiBoard {
}
const cx = Math.floor(this._size[0] / 2)
const cy = Math.floor(this._size[1] / 2)
this._board[cx - 1][cy - 1] = BLACK
this._board[cx - 1][cy] = WHITE
this._board[cx][cy - 1] = WHITE
this._board[cx][cy] = BLACK
this._board[cx - 1][cy - 1] = WHITE
this._board[cx - 1][cy] = BLACK
this._board[cx][cy - 1] = BLACK
this._board[cx][cy] = WHITE
}

choices(turn) {
Expand Down
35 changes: 35 additions & 0 deletions tests/lib/rl/acrobot.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,41 @@ describe('test', () => {
expect(info.state[3]).toBeGreaterThan(0)
})

test('small t1, t2', () => {
const env = new AcrobotRLEnvironment()
const info = env.test([-4, -13, 0, 0], [0])
expect(info.done).toBeFalsy()
expect(info.reward).toBe(-1)
expect(info.state[0]).toBeCloseTo(-4 + 2 * Math.PI)
expect(info.state[1]).toBeCloseTo(-13 + 4 * Math.PI)
expect(info.state[2]).toBeLessThan(0)
expect(info.state[3]).toBeGreaterThan(0)
})

test('big t1, t2', () => {
const env = new AcrobotRLEnvironment()
const info = env.test([26, 4, 0, 0], [0])
expect(info.done).toBeFalsy()
expect(info.reward).toBe(-1)
expect(info.state[0]).toBeCloseTo(26 - 8 * Math.PI)
expect(info.state[1]).toBeCloseTo(4 - 2 * Math.PI)
expect(info.state[2]).toBeLessThan(0)
expect(info.state[3]).toBeGreaterThan(0)
})

test('clip dt1, dt2', () => {
const env = new AcrobotRLEnvironment()
const info = env.test([0, 0, -100, 100], [0])
expect(info.done).toBeFalsy()
expect(info.reward).toBe(-1)
for (let i = 0; i < 2; i++) {
expect(info.state[i]).toBeLessThanOrEqual(Math.PI)
expect(info.state[i]).toBeGreaterThanOrEqual(-Math.PI)
}
expect(info.state[2]).toBeCloseTo(-4 * Math.PI)
expect(info.state[3]).toBeCloseTo(9 * Math.PI)
})

test('goal', () => {
const env = new AcrobotRLEnvironment()
const info = env.test([Math.PI, Math.PI / 2, 0, 0], [0])
Expand Down
Loading