From 443ac757367ea265fd9479959006411e8bdcdf4e Mon Sep 17 00:00:00 2001
From: norci <norci@users.noreply.github.com>
Date: Tue, 22 Dec 2020 14:42:21 +0800
Subject: [PATCH 1/3] added doc in agent.jl

---
 src/policies/agents/agent.jl | 37 ++++++++++++++++++++++++++++++------
 1 file changed, 31 insertions(+), 6 deletions(-)

diff --git a/src/policies/agents/agent.jl b/src/policies/agents/agent.jl
index 4d2cfde..c138087 100644
--- a/src/policies/agents/agent.jl
+++ b/src/policies/agents/agent.jl
@@ -31,31 +31,44 @@ function check(agent::Agent, env::AbstractEnv)
     check(agent.policy, env)
 end
 
-#####
-# update!
-#####
-
+"""
+abstract update agent and trajectory
+"""
 function (agent::Agent)(stage::AbstractStage, env::AbstractEnv)
     update!(agent.trajectory, agent.policy, env, stage)
     update!(agent.policy, agent.trajectory, env, stage)
 end
 
+"""
+update agent and trajectory before an action
+"""
 function (agent::Agent)(stage::PreActStage, env::AbstractEnv)
     action = update!(agent.trajectory, agent.policy, env, stage)
     update!(agent.policy, agent.trajectory, env, stage)
     action
 end
 
+"""
+abstract update policy
+"""
 RLBase.update!(::AbstractPolicy, ::AbstractTrajectory, ::AbstractEnv, ::AbstractStage) =
     nothing
+
+"""
+update policy for before an action
+"""
 RLBase.update!(p::AbstractPolicy, t::AbstractTrajectory, ::AbstractEnv, ::PreActStage) =
     update!(p, t)
 
-## update trajectory
-
+"""
+abstract update trajectory
+"""
 RLBase.update!(::AbstractTrajectory, ::AbstractPolicy, ::AbstractEnv, ::AbstractStage) =
     nothing
 
+"""
+update SART trajectory before an episode
+"""
 function RLBase.update!(
     trajectory::Union{
         CircularArraySARTTrajectory,
@@ -71,6 +84,9 @@ function RLBase.update!(
     end
 end
 
+"""
+update SLART trajectory before an episode
+"""
 function RLBase.update!(
     trajectory::Union{
         CircularArraySLARTTrajectory,
@@ -87,6 +103,9 @@ function RLBase.update!(
     end
 end
 
+"""
+update SART trajectory after an episode, or before an action.
+"""
 function RLBase.update!(
     trajectory::Union{
         CircularArraySARTTrajectory,
@@ -102,6 +121,9 @@ function RLBase.update!(
     action
 end
 
+"""
+update SLART trajectory after an episode, or before an action.
+"""
 function RLBase.update!(
     trajectory::Union{
         CircularArraySLARTTrajectory,
@@ -118,6 +140,9 @@ function RLBase.update!(
     action
 end
 
+"""
+update trajectory after an action.
+"""
 function RLBase.update!(
     trajectory::AbstractTrajectory,
     ::AbstractPolicy,

From 19da26c815fcd282de5e054289a37b5d85fc3b78 Mon Sep 17 00:00:00 2001
From: norci <norci@users.noreply.github.com>
Date: Tue, 22 Dec 2020 16:54:55 +0800
Subject: [PATCH 2/3] Update src/policies/agents/agent.jl

Co-authored-by: Jun Tian <find_my_way@foxmail.com>
---
 src/policies/agents/agent.jl | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/policies/agents/agent.jl b/src/policies/agents/agent.jl
index c138087..1efb98a 100644
--- a/src/policies/agents/agent.jl
+++ b/src/policies/agents/agent.jl
@@ -32,7 +32,26 @@ function check(agent::Agent, env::AbstractEnv)
 end
 
 """
-abstract update agent and trajectory
+Here we extend the definition of `(p::AbstractPolicy)(::AbstractEnv)` in
+`RLBase` to accept an `AbstractStage` as the first argument. Algorithm designers
+may customize these behaviors respectively. The default behaviors are:
+1. Update the inner `trajectory` given the context of `policy`, `env`, and
+   `stage`.
+  1. By default we do nothing.
+  2. In `PreActStage`, we `push!` the current **state** of the `env` and the
+     **action** generated by `policy(env)` into the `trajectory`. And the
+     **action** is returned.
+  3. In `PostActStage`, we query the `reward` and `is_terminated` info from
+     `env` and push them into `trajectory`.
+  4. For `CircularSARTTrajectory`:  
+     1. In the `PosEpisodeStage`, we push the `state` at the end of an episode
+        and a dummy action into the `trajectory`.
+     1. In the `PreEpisodeStage`, we pop out the lastest `state` and `action`
+        pair (which are dummy ones) from `trajectory`.
+2. Update the inner `policy` given the context of `trajectory`, `env`, and
+   `stage`.
+  1. By default, we only `update!` the `policy` in the `PreActStage`. And it's
+     despatched to `update!(policy, trajectory)`.
 """
 function (agent::Agent)(stage::AbstractStage, env::AbstractEnv)
     update!(agent.trajectory, agent.policy, env, stage)

From 05e5c21f71f41a59228613f3d513eaaf80ea77af Mon Sep 17 00:00:00 2001
From: norci <norci@users.noreply.github.com>
Date: Tue, 22 Dec 2020 17:01:50 +0800
Subject: [PATCH 3/3] removed dup doc

---
 src/policies/agents/agent.jl | 29 +----------------------------
 1 file changed, 1 insertion(+), 28 deletions(-)

diff --git a/src/policies/agents/agent.jl b/src/policies/agents/agent.jl
index 1efb98a..817c6c9 100644
--- a/src/policies/agents/agent.jl
+++ b/src/policies/agents/agent.jl
@@ -43,7 +43,7 @@ may customize these behaviors respectively. The default behaviors are:
      **action** is returned.
   3. In `PostActStage`, we query the `reward` and `is_terminated` info from
      `env` and push them into `trajectory`.
-  4. For `CircularSARTTrajectory`:  
+  4. For `CircularSARTTrajectory`:
      1. In the `PosEpisodeStage`, we push the `state` at the end of an episode
         and a dummy action into the `trajectory`.
      1. In the `PreEpisodeStage`, we pop out the lastest `state` and `action`
@@ -58,36 +58,21 @@ function (agent::Agent)(stage::AbstractStage, env::AbstractEnv)
     update!(agent.policy, agent.trajectory, env, stage)
 end
 
-"""
-update agent and trajectory before an action
-"""
 function (agent::Agent)(stage::PreActStage, env::AbstractEnv)
     action = update!(agent.trajectory, agent.policy, env, stage)
     update!(agent.policy, agent.trajectory, env, stage)
     action
 end
 
-"""
-abstract update policy
-"""
 RLBase.update!(::AbstractPolicy, ::AbstractTrajectory, ::AbstractEnv, ::AbstractStage) =
     nothing
 
-"""
-update policy for before an action
-"""
 RLBase.update!(p::AbstractPolicy, t::AbstractTrajectory, ::AbstractEnv, ::PreActStage) =
     update!(p, t)
 
-"""
-abstract update trajectory
-"""
 RLBase.update!(::AbstractTrajectory, ::AbstractPolicy, ::AbstractEnv, ::AbstractStage) =
     nothing
 
-"""
-update SART trajectory before an episode
-"""
 function RLBase.update!(
     trajectory::Union{
         CircularArraySARTTrajectory,
@@ -103,9 +88,6 @@ function RLBase.update!(
     end
 end
 
-"""
-update SLART trajectory before an episode
-"""
 function RLBase.update!(
     trajectory::Union{
         CircularArraySLARTTrajectory,
@@ -122,9 +104,6 @@ function RLBase.update!(
     end
 end
 
-"""
-update SART trajectory after an episode, or before an action.
-"""
 function RLBase.update!(
     trajectory::Union{
         CircularArraySARTTrajectory,
@@ -140,9 +119,6 @@ function RLBase.update!(
     action
 end
 
-"""
-update SLART trajectory after an episode, or before an action.
-"""
 function RLBase.update!(
     trajectory::Union{
         CircularArraySLARTTrajectory,
@@ -159,9 +135,6 @@ function RLBase.update!(
     action
 end
 
-"""
-update trajectory after an action.
-"""
 function RLBase.update!(
     trajectory::AbstractTrajectory,
     ::AbstractPolicy,