From 443ac757367ea265fd9479959006411e8bdcdf4e Mon Sep 17 00:00:00 2001 From: norci Date: Tue, 22 Dec 2020 14:42:21 +0800 Subject: [PATCH 1/3] added doc in agent.jl --- src/policies/agents/agent.jl | 37 ++++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/src/policies/agents/agent.jl b/src/policies/agents/agent.jl index 4d2cfde..c138087 100644 --- a/src/policies/agents/agent.jl +++ b/src/policies/agents/agent.jl @@ -31,31 +31,44 @@ function check(agent::Agent, env::AbstractEnv) check(agent.policy, env) end -##### -# update! -##### - +""" +abstract update agent and trajectory +""" function (agent::Agent)(stage::AbstractStage, env::AbstractEnv) update!(agent.trajectory, agent.policy, env, stage) update!(agent.policy, agent.trajectory, env, stage) end +""" +update agent and trajectory before an action +""" function (agent::Agent)(stage::PreActStage, env::AbstractEnv) action = update!(agent.trajectory, agent.policy, env, stage) update!(agent.policy, agent.trajectory, env, stage) action end +""" +abstract update policy +""" RLBase.update!(::AbstractPolicy, ::AbstractTrajectory, ::AbstractEnv, ::AbstractStage) = nothing + +""" +update policy for before an action +""" RLBase.update!(p::AbstractPolicy, t::AbstractTrajectory, ::AbstractEnv, ::PreActStage) = update!(p, t) -## update trajectory - +""" +abstract update trajectory +""" RLBase.update!(::AbstractTrajectory, ::AbstractPolicy, ::AbstractEnv, ::AbstractStage) = nothing +""" +update SART trajectory before an episode +""" function RLBase.update!( trajectory::Union{ CircularArraySARTTrajectory, @@ -71,6 +84,9 @@ function RLBase.update!( end end +""" +update SLART trajectory before an episode +""" function RLBase.update!( trajectory::Union{ CircularArraySLARTTrajectory, @@ -87,6 +103,9 @@ function RLBase.update!( end end +""" +update SART trajectory after an episode, or before an action. +""" function RLBase.update!( trajectory::Union{ CircularArraySARTTrajectory, @@ -102,6 +121,9 @@ function RLBase.update!( action end +""" +update SLART trajectory after an episode, or before an action. +""" function RLBase.update!( trajectory::Union{ CircularArraySLARTTrajectory, @@ -118,6 +140,9 @@ function RLBase.update!( action end +""" +update trajectory after an action. +""" function RLBase.update!( trajectory::AbstractTrajectory, ::AbstractPolicy, From 19da26c815fcd282de5e054289a37b5d85fc3b78 Mon Sep 17 00:00:00 2001 From: norci Date: Tue, 22 Dec 2020 16:54:55 +0800 Subject: [PATCH 2/3] Update src/policies/agents/agent.jl Co-authored-by: Jun Tian --- src/policies/agents/agent.jl | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/policies/agents/agent.jl b/src/policies/agents/agent.jl index c138087..1efb98a 100644 --- a/src/policies/agents/agent.jl +++ b/src/policies/agents/agent.jl @@ -32,7 +32,26 @@ function check(agent::Agent, env::AbstractEnv) end """ -abstract update agent and trajectory +Here we extend the definition of `(p::AbstractPolicy)(::AbstractEnv)` in +`RLBase` to accept an `AbstractStage` as the first argument. Algorithm designers +may customize these behaviors respectively. The default behaviors are: +1. Update the inner `trajectory` given the context of `policy`, `env`, and + `stage`. + 1. By default we do nothing. + 2. In `PreActStage`, we `push!` the current **state** of the `env` and the + **action** generated by `policy(env)` into the `trajectory`. And the + **action** is returned. + 3. In `PostActStage`, we query the `reward` and `is_terminated` info from + `env` and push them into `trajectory`. + 4. For `CircularSARTTrajectory`: + 1. In the `PosEpisodeStage`, we push the `state` at the end of an episode + and a dummy action into the `trajectory`. + 1. In the `PreEpisodeStage`, we pop out the lastest `state` and `action` + pair (which are dummy ones) from `trajectory`. +2. Update the inner `policy` given the context of `trajectory`, `env`, and + `stage`. + 1. By default, we only `update!` the `policy` in the `PreActStage`. And it's + despatched to `update!(policy, trajectory)`. """ function (agent::Agent)(stage::AbstractStage, env::AbstractEnv) update!(agent.trajectory, agent.policy, env, stage) From 05e5c21f71f41a59228613f3d513eaaf80ea77af Mon Sep 17 00:00:00 2001 From: norci Date: Tue, 22 Dec 2020 17:01:50 +0800 Subject: [PATCH 3/3] removed dup doc --- src/policies/agents/agent.jl | 29 +---------------------------- 1 file changed, 1 insertion(+), 28 deletions(-) diff --git a/src/policies/agents/agent.jl b/src/policies/agents/agent.jl index 1efb98a..817c6c9 100644 --- a/src/policies/agents/agent.jl +++ b/src/policies/agents/agent.jl @@ -43,7 +43,7 @@ may customize these behaviors respectively. The default behaviors are: **action** is returned. 3. In `PostActStage`, we query the `reward` and `is_terminated` info from `env` and push them into `trajectory`. - 4. For `CircularSARTTrajectory`: + 4. For `CircularSARTTrajectory`: 1. In the `PosEpisodeStage`, we push the `state` at the end of an episode and a dummy action into the `trajectory`. 1. In the `PreEpisodeStage`, we pop out the lastest `state` and `action` @@ -58,36 +58,21 @@ function (agent::Agent)(stage::AbstractStage, env::AbstractEnv) update!(agent.policy, agent.trajectory, env, stage) end -""" -update agent and trajectory before an action -""" function (agent::Agent)(stage::PreActStage, env::AbstractEnv) action = update!(agent.trajectory, agent.policy, env, stage) update!(agent.policy, agent.trajectory, env, stage) action end -""" -abstract update policy -""" RLBase.update!(::AbstractPolicy, ::AbstractTrajectory, ::AbstractEnv, ::AbstractStage) = nothing -""" -update policy for before an action -""" RLBase.update!(p::AbstractPolicy, t::AbstractTrajectory, ::AbstractEnv, ::PreActStage) = update!(p, t) -""" -abstract update trajectory -""" RLBase.update!(::AbstractTrajectory, ::AbstractPolicy, ::AbstractEnv, ::AbstractStage) = nothing -""" -update SART trajectory before an episode -""" function RLBase.update!( trajectory::Union{ CircularArraySARTTrajectory, @@ -103,9 +88,6 @@ function RLBase.update!( end end -""" -update SLART trajectory before an episode -""" function RLBase.update!( trajectory::Union{ CircularArraySLARTTrajectory, @@ -122,9 +104,6 @@ function RLBase.update!( end end -""" -update SART trajectory after an episode, or before an action. -""" function RLBase.update!( trajectory::Union{ CircularArraySARTTrajectory, @@ -140,9 +119,6 @@ function RLBase.update!( action end -""" -update SLART trajectory after an episode, or before an action. -""" function RLBase.update!( trajectory::Union{ CircularArraySLARTTrajectory, @@ -159,9 +135,6 @@ function RLBase.update!( action end -""" -update trajectory after an action. -""" function RLBase.update!( trajectory::AbstractTrajectory, ::AbstractPolicy,