Skip to content
This repository was archived by the owner on May 6, 2021. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/policies/agents/agent.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ functor(x::Agent) = (policy = x.policy,), y -> @set x.policy = y.policy
(agent::Agent)(env) = agent.policy(env)

function check(agent::Agent, env::AbstractEnv)
if ActionStyle(env) === FULL_ACTION_SET && !haskey(agent.trajectory, :legal_actions_mask)
if ActionStyle(env) === FULL_ACTION_SET &&
!haskey(agent.trajectory, :legal_actions_mask)
@warn "The env[$(nameof(env))] is of FULL_ACTION_SET, but I can not find a trace named :legal_actions_mask in the trajectory"
end
check(agent.policy, env)
Expand Down
8 changes: 4 additions & 4 deletions src/policies/q_based_policies/q_based_policy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ Flux.functor(x::QBasedPolicy) = (learner = x.learner,), y -> @set x.learner = y.

(π::QBasedPolicy)(env) = π(env, ActionStyle(env))
(π::QBasedPolicy)(env, ::MinimalActionSet) = π.explorer(π.learner(env))
(π::QBasedPolicy)(env, ::FullActionSet) = π.explorer(π.learner(env), legal_action_space_mask(env))
(π::QBasedPolicy)(env, ::FullActionSet) =
π.explorer(π.learner(env), legal_action_space_mask(env))

RLBase.prob(p::QBasedPolicy, env) = prob(p, env, ActionStyle(env))
RLBase.prob(p::QBasedPolicy, env, ::MinimalActionSet) =
prob(p.explorer, p.learner(env))
RLBase.prob(p::QBasedPolicy, env, ::MinimalActionSet) = prob(p.explorer, p.learner(env))
RLBase.prob(p::QBasedPolicy, env, ::FullActionSet) =
prob(p.explorer, p.learner(env), legal_action_space_mask(env))

Expand All @@ -36,7 +36,7 @@ RLBase.update!(p::QBasedPolicy, trajectory::AbstractTrajectory) =
function check(p::QBasedPolicy, env::AbstractEnv)
A = action_space(env)
if (A isa AbstractVector && A == 1:length(A)) ||
(A isa Tuple && A == Tuple(1:length(A)))
(A isa Tuple && A == Tuple(1:length(A)))
# this is expected
else
@warn "Applying a QBasedPolicy to an environment with a unknown action space. Maybe convert the environment with `discrete2standard_discrete` in ReinforcementLearningEnvironments.jl first or redesign the environment."
Expand Down
16 changes: 12 additions & 4 deletions src/policies/random_policy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,35 @@ end

Random.seed!(p::RandomPolicy, seed) = Random.seed!(p.rng, seed)

RandomPolicy(s=nothing; rng = Random.GLOBAL_RNG) = RandomPolicy(s, rng)
RandomPolicy(s = nothing; rng = Random.GLOBAL_RNG) = RandomPolicy(s, rng)

(p::RandomPolicy{Nothing})(env) = rand(p.rng, legal_action_space(env))
(p::RandomPolicy)(env) = rand(p.rng, p.action_space)

function RLBase.prob(p::RandomPolicy{<:Union{AbstractVector,Tuple}}, env::AbstractEnv)
n = length(p.action_space)
Categorical(fill(1/n, n); check_args=false)
Categorical(fill(1 / n, n); check_args = false)
end

RLBase.prob(p::RandomPolicy{Nothing}, env::AbstractEnv) = prob(p, env, ChanceStyle(env))

function RLBase.prob(p::RandomPolicy{Nothing}, env::AbstractEnv, ::RLBase.AbstractChanceStyle)
function RLBase.prob(
p::RandomPolicy{Nothing},
env::AbstractEnv,
::RLBase.AbstractChanceStyle,
)
mask = legal_action_space_mask(env)
n = sum(mask)
prob = zeros(length(mask))
prob[mask] .= 1 / n
prob
end

function RLBase.prob(p::RandomPolicy{Nothing}, env::AbstractEnv, ::RLBase.ExplicitStochastic)
function RLBase.prob(
p::RandomPolicy{Nothing},
env::AbstractEnv,
::RLBase.ExplicitStochastic,
)
if current_player(env) == chance_player(env)
prob(env, chance_player(env))
else
Expand Down
2 changes: 1 addition & 1 deletion src/policies/random_start_policy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ for f in (:prob, :priority)
$f(p.random_policy, args...)
end
end
end
end
3 changes: 1 addition & 2 deletions test/components/agents.jl
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
@testset "Agent" begin
end
@testset "Agent" begin end