JuliaReinforcementLearning · findmyway · Jan 26, 2021 · Jan 26, 2021
diff --git a/src/policies/q_based_policies/explorers/UCB_explorer.jl b/src/policies/q_based_policies/explorers/UCB_explorer.jl
@@ -24,20 +24,7 @@ end
 UCBExplorer(na; c = 2.0, ϵ = 1e-10, step = 1, rng = Random.GLOBAL_RNG, is_training = true) =
     UCBExplorer(c, fill(ϵ, na), 1, rng, is_training)
 
-@doc raw"""
-    (ucb::UCBExplorer)(values::AbstractArray)
-Unlike [`EpsilonGreedyExplorer`](@ref), uncertaintyies are considered in UCB.
-
-!!! note
-    If multiple values with the same maximum value are found.
-    Then a random one will be returned!
-
-```math
-A_t = \underset{a}{\arg \max} \left[ Q_t(a) + c \sqrt{\frac{\ln t}{N_t(a)}} \right]
-```
-
-See more details at Section (2.7) on Page 35 of the book *Sutton, Richard S., and Andrew G. Barto. Reinforcement learning: An introduction. MIT press, 2018.*
-""" function (p::UCBExplorer)(values::AbstractArray)
+function (p::UCBExplorer)(values::AbstractArray)
     v, inds = find_all_max(@. values + p.c * sqrt(log(p.step + 1) / p.actioncounts))
     action = sample(p.rng, inds)
     if p.is_training

diff --git a/src/policies/tabular_random_policy.jl b/src/policies/tabular_random_policy.jl
@@ -64,7 +64,7 @@ end
 
 (p::TabularRandomPolicy)(env::AbstractEnv) = sample(p.rng, action_space(env), Weights(prob(p, env), 1.0))
 
-"!!! Assumeing table is already initialized"
+# !!! Assumeing table is already initialized
 (p::TabularRandomPolicy{S})(state::S) where S = sample(p.rng, Weights(p.table[state], 1.0))
 
 """