-
-
Notifications
You must be signed in to change notification settings - Fork 5.7k
Closed
Closed
Copy link
Description
Julia 0.5.0-rc2 allocates about 6x more than Julia 0.4.6 per inner loop iteration in cheapest_insertion in the following (should be copy-paste runnable):
function cheapest_insertion{T<:Real}(distmat::Matrix{T}, initpath::Vector{Int})
check_square(distmat, "Distance matrix passed to cheapest_insertion must be square.")
n = size(distmat, 1)
path = copy(initpath)
# collect cities to visited
visitus = setdiff(collect(1:n), initpath)
# helper for insertion cost
# tour cost change for inserting node k after the node at index after in the path
function inscost(k, after)
return distmat[path[after], k] +
distmat[k, path[after + 1]] -
distmat[path[after], path[after + 1]]
end
counter = 0
while !isempty(visitus)
bestCost = Inf
bestInsertion = (-1, -1)
for k in visitus
for after in 1:(length(path) - 1) # can't insert after end of path
counter += 1
c = inscost(k, after)
if c < bestCost
bestCost = c
bestInsertion = (k, after)
end
end
end
# bestInsertion now holds (k, after)
# insert into path, remove from to-do list
k, after = bestInsertion
insert!(path, after + 1, k)
visitus = setdiff(visitus, k)
end
println("inner loop $(counter) times")
return (path, pathcost(distmat, path))
end
###
# helpers
###
# make sure a passed distance matrix is a square
function check_square(m, msg)
if size(m, 1) != size(m, 2)
error(msg)
end
end
# helper for readable one-line path costs
# optionally specify the bounds for the subpath we want the cost of
# defaults to the whole path
# but when calculating reversed path costs can help to have subpath costs
function pathcost{T<:Real}(distmat::Matrix{T}, path::Vector{Int}, lb::Int = 1, ub::Int = length(path))
cost = zero(T)
for i in lb:(ub - 1)
@inbounds cost += distmat[path[i], path[i+1]]
end
return cost
end
srand(47)
dm = rand(300, 300)
@timev cheapest_insertion(dm, [1, 1])
@timev cheapest_insertion(dm, [1, 1])
Typical 0.5.0-rc2 output:
julia> include("alloctest.jl")
inner loop 4499950 times
2.530267 seconds (27.11 M allocations: 488.301 MB, 9.38% gc time)
elapsed time (ns): 2530266833
gc time (ns): 237287626
bytes allocated: 512020485
pool allocs: 27113047
non-pool GC allocs:624
GC pauses: 22
full collections: 1
inner loop 4499950 times
2.284020 seconds (27.06 M allocations: 486.182 MB, 1.07% gc time)
elapsed time (ns): 2284019665
gc time (ns): 24505169
bytes allocated: 509798272
pool allocs: 27061869
non-pool GC allocs:622
GC pauses: 22
([1,135,129,204,64,258,45,159,291,65 … 119,78,75,43,270,55,225,97,18,1],9.631546736690806)
Typical 0.4.6 output:
julia> include("alloctest.jl")
inner loop 4499950 times
0.460978 seconds (4.55 M allocations: 74.267 MB, 7.44% gc time)
elapsed time (ns): 460978489
gc time (ns): 34315232
bytes allocated: 77874451
pool allocs: 4554092
non-pool GC allocs:622
GC pauses: 3
full collections: 1
inner loop 4499950 times
0.314474 seconds (4.51 M allocations: 72.265 MB, 1.44% gc time)
elapsed time (ns): 314473520
gc time (ns): 4528180
bytes allocated: 75775184
pool allocs: 4510816
non-pool GC allocs:622
GC pauses: 3
([1,135,129,204,64,258,45,159,291,65 … 119,78,75,43,270,55,225,97,18,1],9.631546736690806)
Pinging @JeffBezanson who seemed interested in the julia-users discussion. Note as well that this performance regression is "fragile:" little changes to the code seem to make the regression disappear - this is why I couldn't find a more minimal example. E.g. changing the return of cheapest_insertion to return 47 has Julia 0.4.6 allocating the same number of bytes and running the same amount of time, but with that change Julia 0.5.0-rc2 runs 10x faster than 0.4.6.
Metadata
Metadata
Assignees
Labels
performanceMust go fasterMust go faster