-
-
Notifications
You must be signed in to change notification settings - Fork 5.7k
Closed
Labels
compiler:codegenGeneration of LLVM IR and native codeGeneration of LLVM IR and native codeperformanceMust go fasterMust go faster
Description
See below. Somehow if the signature of a function is (::ANY) and if the return type is jl_value_t*, a jlcall is generated even though a c call is generated both when the return type is different and when there's additional parameters to be specialized on.
I don't really know how it is decided whether a jl signature or a c signature will be generated but it seems that this is the wrong decision in this case.
This issue affect #11284. Although I guess I can let the helper function specialize on the first argument to work around this without causing more code to be generated (since the first argument should always be a DataType anyway....)
Test code
@noinline g(::ANY) = 1
@noinline f(::ANY) = Int
@noinline f(::ANY, b) = Int
@code_llvm g(1)
@code_llvm f(1)
@code_llvm f(1, 2)
k1() = g(1)
k2() = f(1)
k3() = f(1, 2)
@code_llvm k1()
@code_llvm k2()
@code_llvm k3()
function time_func(f::Function, args...)
println(f)
f(args...)
gc()
@time for i in 1:100000000
f(args...)
end
gc()
end
time_func(k1)
time_func(k2)
time_func(k3)Output
define i64 @julia_g_44382(%jl_value_t*) {
top:
ret i64 1
}
define %jl_value_t* @julia_f_44385(%jl_value_t*, %jl_value_t**, i32) {
top:
%3 = load %jl_value_t** inttoptr (i64 139983779531464 to %jl_value_t**), align 8
ret %jl_value_t* %3
}
define %jl_value_t* @julia_f_44387(%jl_value_t*, i64) {
top:
%2 = load %jl_value_t** inttoptr (i64 139983779531464 to %jl_value_t**), align 8
ret %jl_value_t* %2
}
define i64 @julia_k1_44391() {
top:
%0 = call i64 @julia_g_44382(%jl_value_t* inttoptr (i64 139983779569792 to %jl_value_t*))
ret i64 %0
}
define %jl_value_t* @julia_k2_44392() {
top:
%0 = alloca [3 x %jl_value_t*], align 8
%.sub = getelementptr inbounds [3 x %jl_value_t*]* %0, i64 0, i64 0
%1 = getelementptr [3 x %jl_value_t*]* %0, i64 0, i64 2
%2 = bitcast [3 x %jl_value_t*]* %0 to i64*
store i64 2, i64* %2, align 8
%3 = getelementptr [3 x %jl_value_t*]* %0, i64 0, i64 1
%4 = bitcast %jl_value_t** %3 to %jl_value_t***
%5 = load %jl_value_t*** @jl_pgcstack, align 8
store %jl_value_t** %5, %jl_value_t*** %4, align 8
store %jl_value_t** %.sub, %jl_value_t*** @jl_pgcstack, align 8
store %jl_value_t* inttoptr (i64 139983779569792 to %jl_value_t*), %jl_value_t** %1, align 8
%6 = call %jl_value_t* @julia_f_44385(%jl_value_t* inttoptr (i64 139983812208976 to %jl_value_t*), %jl_value_t** %1, i32 1)
%7 = load %jl_value_t*** %4, align 8
store %jl_value_t** %7, %jl_value_t*** @jl_pgcstack, align 8
ret %jl_value_t* %6
}
define %jl_value_t* @julia_k3_44393() {
top:
%0 = call %jl_value_t* @julia_f_44387(%jl_value_t* inttoptr (i64 139983779569792 to %jl_value_t*), i64 2)
ret %jl_value_t* %0
}
k1
1.267 seconds
k2
1.422 seconds
k3
1.280 seconds Metadata
Metadata
Assignees
Labels
compiler:codegenGeneration of LLVM IR and native codeGeneration of LLVM IR and native codeperformanceMust go fasterMust go faster