From f1586adecf1b608299328de1b4c1dcd81988d8e5 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 19 May 2017 13:40:27 +0900 Subject: [PATCH] refactor allocation to take an alignment argument [CI skip] * TODO: select bucket that fits multiple of alignment * TODO: allow alignment up to 64 * TODO: make arrays follow alignment as well * TODO: cleanup the mess that is jl_alignment, julia_alignment, datype_alignment, and jl_gc_alignment * TODO: teach jl_gc_alloc_ to do something with the alignment request --- base/boot.jl | 1 + src/array.c | 38 +++++++++++++++------------- src/builtins.c | 10 +++----- src/ccall.cpp | 17 +++++++------ src/cgutils.cpp | 17 ++++++++----- src/codegen.cpp | 4 +-- src/datatype.c | 38 ++++++++++++++-------------- src/dump.c | 38 ++++++++++++++-------------- src/gc.c | 25 +++++++++--------- src/gf.c | 6 ++--- src/intrinsics.cpp | 5 ++-- src/ircode.c | 13 +++++++--- src/julia_internal.h | 46 +++++++++++++++++++++++----------- src/llvm-alloc-opt.cpp | 36 +++++++++++++------------- src/llvm-final-gc-lowering.cpp | 5 ++-- src/llvm-late-gc-lowering.cpp | 10 +++++--- src/llvm-pass-helpers.cpp | 2 +- src/method.c | 9 +++---- src/module.c | 3 +-- src/runtime_intrinsics.c | 14 +++++------ src/simplevector.c | 6 ++--- src/staticdata.c | 11 +++++--- src/task.c | 4 +-- src/typemap.c | 10 +++----- 24 files changed, 201 insertions(+), 167 deletions(-) diff --git a/base/boot.jl b/base/boot.jl index 54d852ca96416..bba8d88c41392 100644 --- a/base/boot.jl +++ b/base/boot.jl @@ -475,6 +475,7 @@ Array{T}(A::AbstractArray{S,N}) where {T,N,S} = Array{T,N}(A) AbstractArray{T}(A::AbstractArray{S,N}) where {T,S,N} = AbstractArray{T,N}(A) # primitive Symbol constructors +# XXX: these use unrooted, invalid GC pointers eval(Core, :(function Symbol(s::String) $(Expr(:meta, :pure)) return ccall(:jl_symbol_n, Ref{Symbol}, (Ptr{UInt8}, Int), diff --git a/src/array.c b/src/array.c index b6231441f80bd..7416891fc3427 100644 --- a/src/array.c +++ b/src/array.c @@ -75,14 +75,15 @@ size_t jl_arr_xtralloc_limit = 0; #define MAXINTVAL (((size_t)-1)>>1) static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims, - int isunboxed, int hasptr, int isunion, int elsz) + int isunboxed, int hasptr, int isunion, int elsz, int elalign) { jl_ptls_t ptls = jl_get_ptls_states(); - size_t i, tot, nel=1; + size_t i, tot, nel = 1; void *data; jl_array_t *a; + assert(elalign); - for(i=0; i < ndims; i++) { + for (i = 0; i < ndims; i++) { size_t di = dims[i]; wideint_t prod = (wideint_t)nel * (wideint_t)di; if (prod > (wideint_t) MAXINTVAL || di > MAXINTVAL) @@ -115,11 +116,11 @@ static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims, int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT); if (tot <= ARRAY_INLINE_NBYTES) { if (isunboxed && elsz >= 4) - tsz = JL_ARRAY_ALIGN(tsz, JL_SMALL_BYTE_ALIGNMENT); // align data area + tsz = JL_ARRAY_ALIGN(tsz, elalign); // align data area size_t doffs = tsz; tsz += tot; - tsz = JL_ARRAY_ALIGN(tsz, JL_SMALL_BYTE_ALIGNMENT); // align whole object - a = (jl_array_t*)jl_gc_alloc(ptls, tsz, atype); + a = (jl_array_t*)jl_gc_alloc(ptls, tsz, elalign, atype); + tsz = JL_ARRAY_ALIGN(tsz + sizeof(void*), JL_SMALL_BYTE_ALIGNMENT) - sizeof(void*); // XXX: predict possible gc behavior // No allocation or safepoint allowed after this a->flags.how = 0; data = (char*)a + doffs; @@ -127,11 +128,11 @@ static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims, memset(data, 0, tot); } else { - tsz = JL_ARRAY_ALIGN(tsz, JL_CACHE_BYTE_ALIGNMENT); // align whole object data = jl_gc_managed_malloc(tot); // Allocate the Array **after** allocating the data // to make sure the array is still young - a = (jl_array_t*)jl_gc_alloc(ptls, tsz, atype); + a = (jl_array_t*)jl_gc_alloc(ptls, tsz, JL_SMALL_BYTE_ALIGNMENT, atype); + tsz = JL_ARRAY_ALIGN(tsz + sizeof(void*), JL_SMALL_BYTE_ALIGNMENT) - sizeof(void*); // XXX: predict possible gc behavior // No allocation or safepoint allowed after this a->flags.how = 2; jl_gc_track_malloced_array(ptls, a); @@ -187,13 +188,13 @@ static inline jl_array_t *_new_array(jl_value_t *atype, uint32_t ndims, size_t * elsz = LLT_ALIGN(elsz, al); } - return _new_array_(atype, ndims, dims, isunboxed, hasptr, isunion, elsz); + return _new_array_(atype, ndims, dims, isunboxed, hasptr, isunion, elsz, al); } jl_array_t *jl_new_array_for_deserialization(jl_value_t *atype, uint32_t ndims, size_t *dims, - int isunboxed, int hasptr, int isunion, int elsz) + int isunboxed, int hasptr, int isunion, int elsz, int elalign) { - return _new_array_(atype, ndims, dims, isunboxed, hasptr, isunion, elsz); + return _new_array_(atype, ndims, dims, isunboxed, hasptr, isunion, elsz, elalign); } #ifndef JL_NDEBUG @@ -224,7 +225,7 @@ JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data, int ndimwords = jl_array_ndimwords(ndims); int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords * sizeof(size_t) + sizeof(void*), JL_SMALL_BYTE_ALIGNMENT); - a = (jl_array_t*)jl_gc_alloc(ptls, tsz, atype); + a = (jl_array_t*)jl_gc_alloc(ptls, tsz, JL_SMALL_BYTE_ALIGNMENT, atype); // No allocation or safepoint allowed after this a->flags.pooled = tsz <= GC_MAX_SZCLASS; a->flags.ndims = ndims; @@ -305,7 +306,7 @@ JL_DLLEXPORT jl_array_t *jl_string_to_array(jl_value_t *str) int ndimwords = jl_array_ndimwords(1); int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t) + sizeof(void*), JL_SMALL_BYTE_ALIGNMENT); - a = (jl_array_t*)jl_gc_alloc(ptls, tsz, jl_array_uint8_type); + a = (jl_array_t*)jl_gc_alloc(ptls, tsz, JL_SMALL_BYTE_ALIGNMENT, jl_array_uint8_type); a->flags.pooled = tsz <= GC_MAX_SZCLASS; a->flags.ndims = 1; a->offset = 0; @@ -352,7 +353,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data, int ndimwords = jl_array_ndimwords(1); int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT); - a = (jl_array_t*)jl_gc_alloc(ptls, tsz, atype); + a = (jl_array_t*)jl_gc_alloc(ptls, tsz, align, atype); // No allocation or safepoint allowed after this a->flags.pooled = tsz <= GC_MAX_SZCLASS; a->data = data; @@ -419,7 +420,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data, int ndimwords = jl_array_ndimwords(ndims); int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT); - a = (jl_array_t*)jl_gc_alloc(ptls, tsz, atype); + a = (jl_array_t*)jl_gc_alloc(ptls, tsz, align, atype); // No allocation or safepoint allowed after this a->flags.pooled = tsz <= GC_MAX_SZCLASS; a->data = data; @@ -515,7 +516,7 @@ JL_DLLEXPORT jl_value_t *jl_pchar_to_string(const char *str, size_t len) jl_throw(jl_memory_exception); if (len == 0) return jl_an_empty_string; - jl_value_t *s = jl_gc_alloc_(jl_get_ptls_states(), sz, jl_string_type); // force inlining + jl_value_t *s = jl_gc_alloc_(jl_get_ptls_states(), sz, /*align*/ 0, jl_string_type); // force inlining *(size_t*)s = len; memcpy((char*)s + sizeof(size_t), str, len); ((char*)s + sizeof(size_t))[len] = 0; @@ -529,7 +530,7 @@ JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len) jl_throw(jl_memory_exception); if (len == 0) return jl_an_empty_string; - jl_value_t *s = jl_gc_alloc_(jl_get_ptls_states(), sz, jl_string_type); // force inlining + jl_value_t *s = jl_gc_alloc_(jl_get_ptls_states(), sz, /*align*/ 0, jl_string_type); // force inlining *(size_t*)s = len; ((char*)s + sizeof(size_t))[len] = 0; return s; @@ -1197,11 +1198,12 @@ JL_DLLEXPORT void jl_array_sizehint(jl_array_t *a, size_t sz) JL_DLLEXPORT jl_array_t *jl_array_copy(jl_array_t *ary) { size_t elsz = ary->elsize; + size_t elalign = ary->flags.ptrarray ? sizeof(void*) : jl_datatype_align(jl_tparam0(jl_typeof(ary))); size_t len = jl_array_len(ary); int isunion = jl_is_uniontype(jl_tparam0(jl_typeof(ary))); jl_array_t *new_ary = _new_array_(jl_typeof(ary), jl_array_ndims(ary), &ary->nrows, !ary->flags.ptrarray, - ary->flags.hasptr, isunion, elsz); + ary->flags.hasptr, isunion, elsz, elalign); memcpy(new_ary->data, ary->data, len * elsz); // ensure isbits union arrays copy their selector bytes correctly if (jl_array_isbitsunion(ary)) diff --git a/src/builtins.c b/src/builtins.c index 8cbce6c5b6188..276d36446eb4f 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -754,7 +754,7 @@ JL_CALLABLE(jl_f_tuple) if (tt->instance != NULL) return tt->instance; jl_ptls_t ptls = jl_get_ptls_states(); - jl_value_t *jv = jl_gc_alloc(ptls, jl_datatype_size(tt), tt); + jl_value_t *jv = jl_gc_alloc(ptls, jl_datatype_size(tt), jl_datatype_align(tt), tt); for (i = 0; i < nargs; i++) set_nth_field(tt, (void*)jv, i, args[i]); return jv; @@ -1091,8 +1091,7 @@ jl_expr_t *jl_exprn(jl_sym_t *head, size_t n) jl_ptls_t ptls = jl_get_ptls_states(); jl_array_t *ar = jl_alloc_vec_any(n); JL_GC_PUSH1(&ar); - jl_expr_t *ex = (jl_expr_t*)jl_gc_alloc(ptls, sizeof(jl_expr_t), - jl_expr_type); + jl_expr_t *ex = (jl_expr_t*)jl_gc_alloc(ptls, sizeof(jl_expr_t), /*align*/ 0, jl_expr_type); ex->head = head; ex->args = ar; JL_GC_POP(); @@ -1108,8 +1107,7 @@ JL_CALLABLE(jl_f__expr) JL_GC_PUSH1(&ar); for(size_t i=0; i < nargs-1; i++) jl_array_ptr_set(ar, i, args[i+1]); - jl_expr_t *ex = (jl_expr_t*)jl_gc_alloc(ptls, sizeof(jl_expr_t), - jl_expr_type); + jl_expr_t *ex = (jl_expr_t*)jl_gc_alloc(ptls, sizeof(jl_expr_t), /*align*/ 0, jl_expr_type); ex->head = (jl_sym_t*)args[0]; ex->args = ar; JL_GC_POP(); @@ -1124,7 +1122,7 @@ JL_DLLEXPORT jl_tvar_t *jl_new_typevar(jl_sym_t *name, jl_value_t *lb, jl_value_ if ((ub != (jl_value_t *)jl_any_type && !jl_is_type(ub) && !jl_is_typevar(ub)) || jl_is_vararg_type(ub)) jl_type_error_rt("TypeVar", "upper bound", (jl_value_t *)jl_type_type, ub); jl_ptls_t ptls = jl_get_ptls_states(); - jl_tvar_t *tv = (jl_tvar_t *)jl_gc_alloc(ptls, sizeof(jl_tvar_t), jl_tvar_type); + jl_tvar_t *tv = (jl_tvar_t *)jl_gc_alloc(ptls, sizeof(jl_tvar_t), 0, jl_tvar_type); tv->name = name; tv->lb = lb; tv->ub = ub; diff --git a/src/ccall.cpp b/src/ccall.cpp index 002ede019a6be..5a75b1039c499 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -924,8 +924,9 @@ static Value *box_ccall_result(jl_codectx_t &ctx, Value *result, Value *runtime_ // XXX: need to handle parameterized zero-byte types (singleton) const DataLayout &DL = jl_data_layout; unsigned nb = DL.getTypeStoreSize(result->getType()); + unsigned alignment = DL.getPrefTypeAlignment(result->getType()); MDNode *tbaa = jl_is_mutable(rt) ? tbaa_mutab : tbaa_immut; - Value *strct = emit_allocobj(ctx, nb, runtime_dt); + Value *strct = emit_allocobj(ctx, nb, alignment, runtime_dt); init_bits_value(ctx, strct, result, tbaa); return strct; } @@ -1802,7 +1803,7 @@ jl_cgval_t function_sig_t::emit_a_ccall( // and has incorrect write barriers. // instead this code path should behave like `unsafe_load` assert(jl_datatype_size(rt) > 0 && "sret shouldn't be a singleton instance"); - result = emit_allocobj(ctx, jl_datatype_size(rt), + result = emit_allocobj(ctx, jl_datatype_size(rt), jl_datatype_align(rt), literal_pointer_val(ctx, (jl_value_t*)rt)); sretboxed = true; gc_uses.push_back(result); @@ -1933,10 +1934,10 @@ jl_cgval_t function_sig_t::emit_a_ccall( if (static_rt) { Value *runtime_bt = literal_pointer_val(ctx, rt); size_t rtsz = jl_datatype_size(rt); + size_t rtal = julia_alignment(rt); assert(rtsz > 0); - Value *strct = emit_allocobj(ctx, rtsz, runtime_bt); + Value *strct = emit_allocobj(ctx, rtsz, rtal, runtime_bt); MDNode *tbaa = jl_is_mutable(rt) ? tbaa_mutab : tbaa_immut; - int boxalign = julia_alignment(rt); // copy the data from the return value to the new struct const DataLayout &DL = jl_data_layout; auto resultTy = result->getType(); @@ -1944,12 +1945,12 @@ jl_cgval_t function_sig_t::emit_a_ccall( // ARM and AArch64 can use a LLVM type larger than the julia type. // When this happens, cast through memory. auto slot = emit_static_alloca(ctx, resultTy); - slot->setAlignment(Align(boxalign)); - ctx.builder.CreateAlignedStore(result, slot, Align(boxalign)); - emit_memcpy(ctx, strct, tbaa, slot, tbaa, rtsz, boxalign); + slot->setAlignment(Align(rtal)); + ctx.builder.CreateAlignedStore(result, slot, Align(rtal)); + emit_memcpy(ctx, strct, tbaa, slot, tbaa, rtsz, rtal); } else { - init_bits_value(ctx, strct, result, tbaa, boxalign); + init_bits_value(ctx, strct, result, tbaa, rtal); } return mark_julia_type(ctx, strct, true, rt); } diff --git a/src/cgutils.cpp b/src/cgutils.cpp index d981971b92018..d4a8c02460102 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -2271,7 +2271,7 @@ static Value *emit_array_nd_index( // --- boxing --- -static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt); +static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, size_t alignment, Value *jt); static void init_bits_value(jl_codectx_t &ctx, Value *newv, Value *v, MDNode *tbaa, unsigned alignment = sizeof(void*)) // min alignment in julia's gc is pointer-aligned @@ -2573,7 +2573,7 @@ static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallB jl_cgval_t vinfo_r = jl_cgval_t(vinfo, (jl_value_t*)jt, NULL); box = _boxed_special(ctx, vinfo_r, t); if (!box) { - box = emit_allocobj(ctx, jl_datatype_size(jt), literal_pointer_val(ctx, (jl_value_t*)jt)); + box = emit_allocobj(ctx, jl_datatype_size(jt), jl_datatype_align(jt), literal_pointer_val(ctx, (jl_value_t*)jt)); init_bits_cgval(ctx, box, vinfo_r, jl_is_mutable(jt) ? tbaa_mutab : tbaa_immut); } } @@ -2637,7 +2637,7 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo) assert(!type_is_ghost(t)); // ghost values should have been handled by vinfo.constant above! box = _boxed_special(ctx, vinfo, t); if (!box) { - box = emit_allocobj(ctx, jl_datatype_size(jt), literal_pointer_val(ctx, (jl_value_t*)jt)); + box = emit_allocobj(ctx, jl_datatype_size(jt), jl_datatype_align(jt), literal_pointer_val(ctx, (jl_value_t*)jt)); init_bits_cgval(ctx, box, vinfo, jl_is_mutable(jt) ? tbaa_mutab : tbaa_immut); } } @@ -2759,11 +2759,16 @@ static void emit_cpointercheck(jl_codectx_t &ctx, const jl_cgval_t &x, const std } // allocation for known size object -static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt) +static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, size_t alignment, Value *jt) { Value *ptls_ptr = emit_bitcast(ctx, ctx.ptlsStates, T_pint8); Function *F = prepare_call(jl_alloc_obj_func); - auto call = ctx.builder.CreateCall(F, {ptls_ptr, ConstantInt::get(T_size, static_size), maybe_decay_untracked(ctx, jt)}); + auto call = ctx.builder.CreateCall(F, { + ptls_ptr, + ConstantInt::get(T_size, static_size), + ConstantInt::get(T_size, alignment), + maybe_decay_untracked(ctx, jt) + }); call->setAttributes(F->getAttributes()); return call; } @@ -3033,7 +3038,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg else return mark_julia_slot(strct, ty, NULL, tbaa_stack); } - Value *strct = emit_allocobj(ctx, jl_datatype_size(sty), + Value *strct = emit_allocobj(ctx, jl_datatype_size(sty), jl_datatype_align(sty), literal_pointer_val(ctx, (jl_value_t*)ty)); jl_cgval_t strctinfo = mark_julia_type(ctx, strct, true, ty); strct = decay_derived(ctx, strct); diff --git a/src/codegen.cpp b/src/codegen.cpp index f8492078492c4..79f735f0a7251 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -611,7 +611,7 @@ static const auto jlegal_func = new JuliaFunction{ static const auto jl_alloc_obj_func = new JuliaFunction{ "julia.gc_alloc_obj", [](LLVMContext &C) { return FunctionType::get(T_prjlvalue, - {T_pint8, T_size, T_prjlvalue}, false); }, + {T_pint8, T_size, T_size, T_prjlvalue}, false); }, [](LLVMContext &C) { return AttributeList::get(C, AttributeSet::get(C, makeArrayRef({Attribute::getWithAllocSizeArgs(C, 1, None)})), // returns %1 bytes Attributes(C, {Attribute::NoAlias, Attribute::NonNull}), @@ -5368,7 +5368,7 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con outboxed = (output_type != (jl_value_t*)jl_voidpointer_type); if (outboxed) { assert(jl_datatype_size(output_type) == sizeof(void*) * 4); - Value *strct = emit_allocobj(ctx, jl_datatype_size(output_type), + Value *strct = emit_allocobj(ctx, jl_datatype_size(output_type), jl_datatype_align(output_type), literal_pointer_val(ctx, (jl_value_t*)output_type)); Value *derived_strct = emit_bitcast(ctx, decay_derived(ctx, strct), T_psize); MDNode *tbaa = best_tbaa(output_type); diff --git a/src/datatype.c b/src/datatype.c index 3428e7479e9ed..c459fdc6eebc7 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -43,9 +43,8 @@ static jl_sym_t *jl_demangle_typename(jl_sym_t *s) JL_NOTSAFEPOINT JL_DLLEXPORT jl_methtable_t *jl_new_method_table(jl_sym_t *name, jl_module_t *module) { jl_ptls_t ptls = jl_get_ptls_states(); - jl_methtable_t *mt = - (jl_methtable_t*)jl_gc_alloc(ptls, sizeof(jl_methtable_t), - jl_methtable_type); + jl_methtable_t *mt = (jl_methtable_t*)jl_gc_alloc(ptls, + sizeof(jl_methtable_t), /*align*/ 0, jl_methtable_type); mt->name = jl_demangle_typename(name); mt->module = module; mt->defs = jl_nothing; @@ -63,9 +62,8 @@ JL_DLLEXPORT jl_methtable_t *jl_new_method_table(jl_sym_t *name, jl_module_t *mo JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *module) { jl_ptls_t ptls = jl_get_ptls_states(); - jl_typename_t *tn = - (jl_typename_t*)jl_gc_alloc(ptls, sizeof(jl_typename_t), - jl_typename_type); + jl_typename_t *tn = (jl_typename_t*)jl_gc_alloc(ptls, + sizeof(jl_typename_t), /*align*/ 0, jl_typename_type); tn->name = name; tn->module = module; tn->wrapper = NULL; @@ -88,7 +86,8 @@ jl_datatype_t *jl_new_abstracttype(jl_value_t *name, jl_module_t *module, jl_dat jl_datatype_t *jl_new_uninitialized_datatype(void) { jl_ptls_t ptls = jl_get_ptls_states(); - jl_datatype_t *t = (jl_datatype_t*)jl_gc_alloc(ptls, sizeof(jl_datatype_t), jl_datatype_type); + jl_datatype_t *t = (jl_datatype_t*)jl_gc_alloc(ptls, + sizeof(jl_datatype_t), /*align*/ 0, jl_datatype_type); t->hash = 0; t->hasfreetypevars = 0; t->isdispatchtuple = 0; @@ -229,7 +228,7 @@ STATIC_INLINE void jl_maybe_allocate_singleton_instance(jl_datatype_t *st) if (jl_is_datatype_make_singleton(st)) { // It's possible for st to already have an ->instance if it was redefined if (!st->instance) { - st->instance = jl_gc_alloc(jl_get_ptls_states(), 0, st); + st->instance = jl_gc_alloc(jl_get_ptls_states(), 0, 0, st); jl_gc_wb(st, st->instance); } } @@ -707,7 +706,7 @@ JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *dt, void *data) if (bt == jl_uint16_type) return jl_box_uint16(*(uint16_t*)data); if (bt == jl_char_type) return jl_box_char(*(uint32_t*)data); - jl_value_t *v = jl_gc_alloc(ptls, nb, bt); + jl_value_t *v = jl_gc_alloc(ptls, nb, jl_datatype_align(bt), bt); switch (nb) { case 1: *(uint8_t*) v = *(uint8_t*)data; break; case 2: *(uint16_t*)v = jl_load_unaligned_i16(data); break; @@ -725,7 +724,7 @@ JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *dt, void *data) JL_DLLEXPORT jl_value_t *jl_typemax_uint(jl_value_t *bt) { uint64_t data = 0xffffffffffffffffULL; - jl_value_t *v = jl_gc_alloc(jl_get_ptls_states(), sizeof(size_t), bt); + jl_value_t *v = jl_gc_alloc(jl_get_ptls_states(), sizeof(size_t), 0, bt); memcpy(v, &data, sizeof(size_t)); return v; } @@ -789,7 +788,7 @@ UNBOX_FUNC(uint8pointer, uint8_t*) JL_DLLEXPORT jl_value_t *pfx##_##typ(c_type x) \ { \ jl_ptls_t ptls = jl_get_ptls_states(); \ - jl_value_t *v = jl_gc_alloc(ptls, nw * sizeof(void*), \ + jl_value_t *v = jl_gc_alloc(ptls, nw * sizeof(void*), 0,\ jl_##typ##_type); \ *(c_type*)jl_data_ptr(v) = x; \ return v; \ @@ -813,7 +812,7 @@ BOX_FUNC(float64, double, jl_box, 2) c_type idx = x+NBOX_C/2; \ if ((u##c_type)idx < (u##c_type)NBOX_C) \ return boxed_##typ##_cache[idx]; \ - jl_value_t *v = jl_gc_alloc(ptls, nw * sizeof(void*), \ + jl_value_t *v = jl_gc_alloc(ptls, nw * sizeof(void*), 0,\ jl_##typ##_type); \ *(c_type*)jl_data_ptr(v) = x; \ return v; \ @@ -825,7 +824,7 @@ BOX_FUNC(float64, double, jl_box, 2) jl_ptls_t ptls = jl_get_ptls_states(); \ if (x < NBOX_C) \ return boxed_##typ##_cache[x]; \ - jl_value_t *v = jl_gc_alloc(ptls, nw * sizeof(void*), \ + jl_value_t *v = jl_gc_alloc(ptls, nw * sizeof(void*), 0,\ jl_##typ##_type); \ *(c_type*)jl_data_ptr(v) = x; \ return v; \ @@ -851,7 +850,7 @@ JL_DLLEXPORT jl_value_t *jl_box_char(uint32_t x) uint32_t u = bswap_32(x); if (u < 128) return boxed_char_cache[(uint8_t)u]; - jl_value_t *v = jl_gc_alloc(ptls, sizeof(void*), jl_char_type); + jl_value_t *v = jl_gc_alloc(ptls, sizeof(void*), /*align*/ 0, jl_char_type); *(uint32_t*)jl_data_ptr(v) = x; return v; } @@ -919,7 +918,7 @@ JL_DLLEXPORT jl_value_t *jl_new_struct(jl_datatype_t *type, ...) va_list args; size_t nf = jl_datatype_nfields(type); va_start(args, type); - jl_value_t *jv = jl_gc_alloc(ptls, jl_datatype_size(type), type); + jl_value_t *jv = jl_gc_alloc(ptls, jl_datatype_size(type), jl_datatype_align(type), type); for (size_t i = 0; i < nf; i++) { set_nth_field(type, (void*)jv, i, va_arg(args, jl_value_t*)); } @@ -950,7 +949,7 @@ JL_DLLEXPORT jl_value_t *jl_new_structv(jl_datatype_t *type, jl_value_t **args, } if (type->instance != NULL) return type->instance; - jl_value_t *jv = jl_gc_alloc(ptls, jl_datatype_size(type), type); + jl_value_t *jv = jl_gc_alloc(ptls, jl_datatype_size(type), jl_datatype_align(type), type); JL_GC_PUSH1(&jv); for (size_t i = 0; i < na; i++) { set_nth_field(type, (void*)jv, i, args[i]); @@ -981,7 +980,7 @@ JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup) } return type->instance; } - jl_value_t *jv = jl_gc_alloc(ptls, jl_datatype_size(type), type); + jl_value_t *jv = jl_gc_alloc(ptls, jl_datatype_size(type), jl_datatype_align(type), type); jl_value_t *fi = NULL; JL_GC_PUSH2(&jv, &fi); if (type->layout->npointers > 0) { @@ -1005,8 +1004,9 @@ JL_DLLEXPORT jl_value_t *jl_new_struct_uninit(jl_datatype_t *type) { jl_ptls_t ptls = jl_get_ptls_states(); if (type->instance != NULL) return type->instance; - size_t size = jl_datatype_size(type); - jl_value_t *jv = jl_gc_alloc(ptls, size, type); + size_t size = jl_datatype_size(type); + size_t align = jl_datatype_align(type); + jl_value_t *jv = jl_gc_alloc(ptls, size, align, type); if (size > 0) memset(jl_data_ptr(jv), 0, size); return jv; diff --git a/src/dump.c b/src/dump.c index d6a96ac3a7f01..513fc30292a3d 100644 --- a/src/dump.c +++ b/src/dump.c @@ -484,14 +484,17 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li jl_array_t *ar = (jl_array_t*)v; jl_value_t *et = jl_tparam0(jl_typeof(ar)); int isunion = jl_is_uniontype(et); - if (ar->flags.ndims == 1 && ar->elsize <= 0x1f) { + size_t elalign = ar->flags.ptrarray ? sizeof(void*) : jl_datatype_align(jl_tparam0(jl_typeof(ar))); + if (ar->flags.ndims == 1 && ar->elsize <= 0x1f && elalign <= 0xff) { write_uint8(s->s, TAG_ARRAY1D); write_uint8(s->s, (ar->flags.ptrarray << 7) | (ar->flags.hasptr << 6) | (isunion << 5) | (ar->elsize & 0x1f)); + write_uint8(s->s, elalign); } else { write_uint8(s->s, TAG_ARRAY); write_uint16(s->s, ar->flags.ndims); write_uint16(s->s, (ar->flags.ptrarray << 15) | (ar->flags.hasptr << 14) | (isunion << 13) | (ar->elsize & 0x1fff)); + write_uint16(s->s, elalign); } for (i = 0; i < ar->flags.ndims; i++) jl_serialize_value(s, jl_box_long(jl_array_dim(ar,i))); @@ -1332,7 +1335,7 @@ static jl_value_t *jl_deserialize_value_symbol(jl_serializer_state *s, uint8_t t static jl_value_t *jl_deserialize_value_array(jl_serializer_state *s, uint8_t tag) JL_GC_DISABLED { int16_t i, ndims; - int isptr, isunion, hasptr, elsize; + int isptr, isunion, hasptr, elsize, elalign; if (tag == TAG_ARRAY1D) { ndims = 1; elsize = read_uint8(s->s); @@ -1340,6 +1343,7 @@ static jl_value_t *jl_deserialize_value_array(jl_serializer_state *s, uint8_t ta hasptr = (elsize >> 6) & 1; isunion = (elsize >> 5) & 1; elsize = elsize & 0x1f; + elalign = read_uint8(s->s); } else { ndims = read_uint16(s->s); @@ -1348,6 +1352,7 @@ static jl_value_t *jl_deserialize_value_array(jl_serializer_state *s, uint8_t ta hasptr = (elsize >> 14) & 1; isunion = (elsize >> 13) & 1; elsize = elsize & 0x3fff; + elalign = read_uint16(s->s); } uintptr_t pos = backref_list.len; arraylist_push(&backref_list, NULL); @@ -1356,7 +1361,7 @@ static jl_value_t *jl_deserialize_value_array(jl_serializer_state *s, uint8_t ta dims[i] = jl_unbox_long(jl_deserialize_value(s, NULL)); } jl_array_t *a = jl_new_array_for_deserialization( - (jl_value_t*)NULL, ndims, dims, !isptr, hasptr, isunion, elsize); + (jl_value_t*)NULL, ndims, dims, !isptr, hasptr, isunion, elsize, elalign); backref_list.items[pos] = a; jl_value_t *aty = jl_deserialize_value(s, &jl_astaggedvalue(a)->type); jl_set_typeof(a, aty); @@ -1404,9 +1409,7 @@ static jl_value_t *jl_deserialize_value_array(jl_serializer_state *s, uint8_t ta static jl_value_t *jl_deserialize_value_method(jl_serializer_state *s, jl_value_t **loc) JL_GC_DISABLED { - jl_method_t *m = - (jl_method_t*)jl_gc_alloc(s->ptls, sizeof(jl_method_t), - jl_method_type); + jl_method_t *m = (jl_method_t*)jl_gc_alloc(s->ptls, sizeof(jl_method_t), /*align*/0, jl_method_type); memset(m, 0, sizeof(jl_method_t)); uintptr_t pos = backref_list.len; arraylist_push(&backref_list, m); @@ -1464,9 +1467,8 @@ static jl_value_t *jl_deserialize_value_method(jl_serializer_state *s, jl_value_ static jl_value_t *jl_deserialize_value_method_instance(jl_serializer_state *s, jl_value_t **loc) JL_GC_DISABLED { - jl_method_instance_t *mi = - (jl_method_instance_t*)jl_gc_alloc(s->ptls, sizeof(jl_method_instance_t), - jl_method_instance_type); + jl_method_instance_t *mi = (jl_method_instance_t*)jl_gc_alloc(s->ptls, + sizeof(jl_method_instance_t), 0, jl_method_instance_type); memset(mi, 0, sizeof(jl_method_instance_t)); uintptr_t pos = backref_list.len; arraylist_push(&backref_list, mi); @@ -1500,8 +1502,8 @@ static jl_value_t *jl_deserialize_value_method_instance(jl_serializer_state *s, static jl_value_t *jl_deserialize_value_code_instance(jl_serializer_state *s, jl_value_t **loc) JL_GC_DISABLED { - jl_code_instance_t *codeinst = - (jl_code_instance_t*)jl_gc_alloc(s->ptls, sizeof(jl_code_instance_t), jl_code_instance_type); + jl_code_instance_t *codeinst = (jl_code_instance_t*)jl_gc_alloc(s->ptls, + sizeof(jl_code_instance_t), 0, jl_code_instance_type); memset(codeinst, 0, sizeof(jl_code_instance_t)); arraylist_push(&backref_list, codeinst); int flags = read_uint8(s->s); @@ -1588,7 +1590,7 @@ static jl_value_t *jl_deserialize_value_module(jl_serializer_state *s) JL_GC_DIS static jl_value_t *jl_deserialize_value_singleton(jl_serializer_state *s, jl_value_t **loc) JL_GC_DISABLED { - jl_value_t *v = (jl_value_t*)jl_gc_alloc(s->ptls, 0, NULL); + jl_value_t *v = (jl_value_t*)jl_gc_alloc(s->ptls, 0, 0, NULL); uintptr_t pos = backref_list.len; arraylist_push(&backref_list, (void*)v); // TODO: optimize the case where the value can easily be obtained @@ -1646,7 +1648,7 @@ static void jl_deserialize_struct(jl_serializer_state *s, jl_value_t *v) JL_GC_D static jl_value_t *jl_deserialize_value_any(jl_serializer_state *s, uint8_t tag, jl_value_t **loc) JL_GC_DISABLED { int32_t sz = (tag == TAG_SHORT_GENERAL ? read_uint8(s->s) : read_int32(s->s)); - jl_value_t *v = jl_gc_alloc(s->ptls, sz, NULL); + jl_value_t *v = jl_gc_alloc(s->ptls, sz, /*align*/0, NULL); // TODO: this alignment seems wrong jl_set_typeof(v, (void*)(intptr_t)0x50); uintptr_t pos = backref_list.len; arraylist_push(&backref_list, v); @@ -1656,8 +1658,7 @@ static jl_value_t *jl_deserialize_value_any(jl_serializer_state *s, uint8_t tag, int internal = read_uint8(s->s); jl_typename_t *tn; if (internal) { - tn = (jl_typename_t*)jl_gc_alloc( - s->ptls, sizeof(jl_typename_t), jl_typename_type); + tn = (jl_typename_t*)jl_gc_alloc(s->ptls, sizeof(jl_typename_t), /*align*/ 0, jl_typename_type); memset(tn, 0, sizeof(jl_typename_t)); tn->cache = jl_emptysvec; // the cache is refilled later (tag 5) tn->linearcache = jl_emptysvec; // the cache is refilled later (tag 5) @@ -1756,15 +1757,14 @@ static jl_value_t *jl_deserialize_value(jl_serializer_state *s, jl_value_t **loc backref_list.items[pos] = v; return v; } - v = jl_gc_alloc(s->ptls, sizeof(jl_unionall_t), jl_unionall_type); - backref_list.items[pos] = v; + v = jl_gc_alloc(s->ptls, sizeof(jl_unionall_t), /*align*/0, jl_unionall_type); ((jl_unionall_t*)v)->var = (jl_tvar_t*)jl_deserialize_value(s, (jl_value_t**)&((jl_unionall_t*)v)->var); jl_gc_wb(v, ((jl_unionall_t*)v)->var); ((jl_unionall_t*)v)->body = jl_deserialize_value(s, &((jl_unionall_t*)v)->body); jl_gc_wb(v, ((jl_unionall_t*)v)->body); return v; case TAG_TVAR: - v = jl_gc_alloc(s->ptls, sizeof(jl_tvar_t), jl_tvar_type); + v = jl_gc_alloc(s->ptls, sizeof(jl_tvar_t), /*align*/ 0, jl_tvar_type); jl_tvar_t *tv = (jl_tvar_t*)v; arraylist_push(&backref_list, tv); tv->name = (jl_sym_t*)jl_deserialize_value(s, NULL); @@ -1811,7 +1811,7 @@ static jl_value_t *jl_deserialize_value(jl_serializer_state *s, jl_value_t **loc case TAG_BASE: return (jl_value_t*)jl_base_module; case TAG_CNULL: - v = jl_gc_alloc(s->ptls, sizeof(void*), NULL); + v = jl_gc_alloc(s->ptls, sizeof(void*), /*align*/ 0, NULL); jl_set_typeof(v, (void*)(intptr_t)0x50); *(void**)v = NULL; uintptr_t pos = backref_list.len; diff --git a/src/gc.c b/src/gc.c index ec253076e9688..b4a95e77109ec 100644 --- a/src/gc.c +++ b/src/gc.c @@ -837,8 +837,7 @@ static inline void maybe_collect(jl_ptls_t ptls) JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value) { - jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), - jl_weakref_type); + jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), 0, jl_weakref_type); wr->value = value; // NOTE: wb not needed here arraylist_push(&ptls->heap.weak_refs, wr); return wr; @@ -1183,12 +1182,14 @@ JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset, return jl_valueof(v); } -int jl_gc_classify_pools(size_t sz, int *osize) +int jl_gc_classify_pools(size_t sz, size_t alignment, int *osize) { if (sz > GC_MAX_SZCLASS) return -1; size_t allocsz = sz + sizeof(jl_taggedvalue_t); - int klass = jl_gc_szclass(allocsz); + size_t alignsz = jl_gc_alignsz(allocsz, alignment); + int klass = jl_gc_szclass(alignsz, alignment); + assert(klass != -1); *osize = jl_gc_sizeclasses[klass]; return (int)(intptr_t)(&((jl_ptls_t)0)->heap.norm_pools[klass]); } @@ -3216,9 +3217,9 @@ void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_mark_sp_t *sp) // allocator entry points -JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) +JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, size_t alignment, void *ty) { - return jl_gc_alloc_(ptls, sz, ty); + return jl_gc_alloc_(ptls, sz, alignment, ty); } // Per-thread initialization @@ -3610,31 +3611,31 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz) { jl_ptls_t ptls = jl_get_ptls_states(); - return jl_gc_alloc(ptls, sz, NULL); + return jl_gc_alloc(ptls, sz, /*align*/ 0, NULL); } JL_DLLEXPORT jl_value_t *jl_gc_alloc_0w(void) { jl_ptls_t ptls = jl_get_ptls_states(); - return jl_gc_alloc(ptls, 0, NULL); + return jl_gc_alloc(ptls, 0, 0, NULL); } JL_DLLEXPORT jl_value_t *jl_gc_alloc_1w(void) { jl_ptls_t ptls = jl_get_ptls_states(); - return jl_gc_alloc(ptls, sizeof(void*), NULL); + return jl_gc_alloc(ptls, sizeof(void*), 0, NULL); } JL_DLLEXPORT jl_value_t *jl_gc_alloc_2w(void) { jl_ptls_t ptls = jl_get_ptls_states(); - return jl_gc_alloc(ptls, sizeof(void*) * 2, NULL); + return jl_gc_alloc(ptls, sizeof(void*) * 2, 0, NULL); } JL_DLLEXPORT jl_value_t *jl_gc_alloc_3w(void) { jl_ptls_t ptls = jl_get_ptls_states(); - return jl_gc_alloc(ptls, sizeof(void*) * 3, NULL); + return jl_gc_alloc(ptls, sizeof(void*) * 3, 0, NULL); } JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void) @@ -3769,7 +3770,7 @@ JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) { - return jl_gc_alloc(ptls, sz, ty); + return jl_gc_alloc(ptls, sz, jl_datatype_align(ty), ty); } JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) diff --git a/src/gf.c b/src/gf.c index 102b799723145..f32a4cf6fa420 100644 --- a/src/gf.c +++ b/src/gf.c @@ -375,8 +375,8 @@ JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst( { jl_ptls_t ptls = jl_get_ptls_states(); assert(min_world <= max_world && "attempting to set invalid world constraints"); - jl_code_instance_t *codeinst = (jl_code_instance_t*)jl_gc_alloc(ptls, sizeof(jl_code_instance_t), - jl_code_instance_type); + jl_code_instance_t *codeinst = (jl_code_instance_t*)jl_gc_alloc(ptls, + sizeof(jl_code_instance_t), 0, jl_code_instance_type); codeinst->def = mi; codeinst->min_world = min_world; codeinst->max_world = max_world; @@ -2558,7 +2558,7 @@ enum SIGNATURE_FULLY_COVERS { static jl_method_match_t *make_method_match(jl_tupletype_t *spec_types, jl_svec_t *sparams, jl_method_t *method, enum SIGNATURE_FULLY_COVERS fully_covers) { jl_ptls_t ptls = jl_get_ptls_states(); - jl_method_match_t *match = (jl_method_match_t*)jl_gc_alloc(ptls, sizeof(jl_method_match_t), jl_method_match_type); + jl_method_match_t *match = (jl_method_match_t*)jl_gc_alloc(ptls, sizeof(jl_method_match_t), 0, jl_method_match_type); match->spec_types = spec_types; match->sparams = sparams; match->method = method; diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index ada6166c1ceb8..ebc4a35960137 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -497,7 +497,7 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv) return mark_julia_type(ctx, vx, false, bt); } else { - Value *box = emit_allocobj(ctx, nb, boxed(ctx, bt_value)); + Value *box = emit_allocobj(ctx, nb, jl_datatype_align(bt), boxed(ctx, bt_value)); init_bits_value(ctx, box, vx, tbaa_immut); return mark_julia_type(ctx, box, true, bt); } @@ -589,7 +589,8 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv) } assert(jl_is_datatype(ety)); uint64_t size = jl_datatype_size(ety); - Value *strct = emit_allocobj(ctx, size, + size_t alignment = jl_datatype_align(ety); + Value *strct = emit_allocobj(ctx, size, alignment, literal_pointer_val(ctx, ety)); im1 = ctx.builder.CreateMul(im1, ConstantInt::get(T_size, LLT_ALIGN(size, jl_datatype_align(ety)))); diff --git a/src/ircode.c b/src/ircode.c index 62e38e72697f6..2abcb3fa38a2a 100644 --- a/src/ircode.c +++ b/src/ircode.c @@ -276,14 +276,17 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) jl_array_t *ar = (jl_array_t*)v; jl_value_t *et = jl_tparam0(jl_typeof(ar)); int isunion = jl_is_uniontype(et); - if (ar->flags.ndims == 1 && ar->elsize <= 0x1f) { + size_t elalign = ar->flags.ptrarray ? sizeof(void*) : jl_datatype_align(jl_tparam0(jl_typeof(ar))); + if (ar->flags.ndims == 1 && ar->elsize <= 0x1f && elalign <= 0xff) { write_uint8(s->s, TAG_ARRAY1D); write_uint8(s->s, (ar->flags.ptrarray << 7) | (ar->flags.hasptr << 6) | (isunion << 5) | (ar->elsize & 0x1f)); + write_uint8(s->s, elalign); } else { write_uint8(s->s, TAG_ARRAY); write_uint16(s->s, ar->flags.ndims); write_uint16(s->s, (ar->flags.ptrarray << 15) | (ar->flags.hasptr << 14) | (isunion << 13) | (ar->elsize & 0x1fff)); + write_uint16(s->s, elalign); } for (i = 0; i < ar->flags.ndims; i++) jl_encode_value(s, jl_box_long(jl_array_dim(ar,i))); @@ -403,7 +406,7 @@ static jl_value_t *jl_decode_value_svec(jl_ircode_state *s, uint8_t tag) JL_GC_D static jl_value_t *jl_decode_value_array(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED { int16_t i, ndims; - int isptr, isunion, hasptr, elsize; + int isptr, isunion, hasptr, elsize, elalign; if (tag == TAG_ARRAY1D) { ndims = 1; elsize = read_uint8(s->s); @@ -411,6 +414,7 @@ static jl_value_t *jl_decode_value_array(jl_ircode_state *s, uint8_t tag) JL_GC_ hasptr = (elsize >> 6) & 1; isunion = (elsize >> 5) & 1; elsize = elsize & 0x1f; + elalign = read_uint8(s->s); } else { ndims = read_uint16(s->s); @@ -419,13 +423,14 @@ static jl_value_t *jl_decode_value_array(jl_ircode_state *s, uint8_t tag) JL_GC_ hasptr = (elsize >> 14) & 1; isunion = (elsize >> 13) & 1; elsize = elsize & 0x3fff; + elalign = read_uint16(s->s); } size_t *dims = (size_t*)alloca(ndims * sizeof(size_t)); for (i = 0; i < ndims; i++) { dims[i] = jl_unbox_long(jl_decode_value(s)); } jl_array_t *a = jl_new_array_for_deserialization( - (jl_value_t*)NULL, ndims, dims, !isptr, hasptr, isunion, elsize); + (jl_value_t*)NULL, ndims, dims, !isptr, hasptr, isunion, elsize, elalign); jl_value_t *aty = jl_decode_value(s); jl_set_typeof(a, aty); if (a->flags.ptrarray) { @@ -544,7 +549,7 @@ static jl_value_t *jl_decode_value_globalref(jl_ircode_state *s) JL_GC_DISABLED static jl_value_t *jl_decode_value_any(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED { int32_t sz = (tag == TAG_SHORT_GENERAL ? read_uint8(s->s) : read_int32(s->s)); - jl_value_t *v = jl_gc_alloc(s->ptls, sz, NULL); + jl_value_t *v = jl_gc_alloc(s->ptls, sz, /*align*/0, NULL); // TODO: this alignment seems wrong jl_set_typeof(v, (void*)(intptr_t)0x50); jl_datatype_t *dt = (jl_datatype_t*)jl_decode_value(s); jl_set_typeof(v, dt); diff --git a/src/julia_internal.h b/src/julia_internal.h index 369781b79bed3..e8c1c327ebd68 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -166,7 +166,7 @@ JL_DLLEXPORT extern const char *jl_filename; JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset, int osize); JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t allocsz); -int jl_gc_classify_pools(size_t sz, int *osize); +int jl_gc_classify_pools(size_t sz, size_t alignment, int *osize); extern jl_mutex_t gc_perm_lock; void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) JL_NOTSAFEPOINT; @@ -176,6 +176,12 @@ void jl_gc_force_mark_old(jl_ptls_t ptls, jl_value_t *v); void gc_sweep_sysimg(void); +#define JL_SMALL_BYTE_ALIGNMENT 16 +#define JL_CACHE_BYTE_ALIGNMENT 64 +// JL_HEAP_ALIGNMENT is the maximum alignment that the GC can provide +#define JL_HEAP_ALIGNMENT JL_CACHE_BYTE_ALIGNMENT +#define GC_MAX_SZCLASS (2032-sizeof(void*)) + // pools are 16376 bytes large (GC_POOL_SZ - GC_PAGE_OFFSET) static const int jl_gc_sizeclasses[] = { #ifdef _P64 @@ -234,12 +240,22 @@ STATIC_INLINE int jl_gc_alignment(size_t sz) } JL_DLLEXPORT int jl_alignment(size_t sz); +STATIC_INLINE size_t JL_CONST_FUNC jl_gc_alignsz(size_t sz, size_t alignment) +{ + // The pools are aligned with JL_HEAP_ALIGNMENT and no bigger alignment is possible. + assert(alignment <= JL_HEAP_ALIGNMENT); + // Alignment need to be powers of two + assert((alignment & (alignment - 1)) == 0); + size_t alsz = LLT_ALIGN(sz, alignment); + return alignment ? alsz : sz; +} + // the following table is computed from jl_gc_sizeclasses via the formula: // [searchsortedfirst(TABLE, i) for i = 0:16:table[end]] static const uint8_t szclass_table[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 23, 24, 24, 24, 25, 25, 25, 26, 26, 27, 27, 27, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 36, 36, 37, 37, 37, 37, 37, 37, 37, 37, 38, 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40}; static_assert(sizeof(szclass_table) == 128, ""); -STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass(unsigned sz) +STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass(unsigned sz, unsigned alignment) { assert(sz <= 2032); uint8_t klass = szclass_table[(sz + 15) / 16]; @@ -262,17 +278,19 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass(unsigned sz) #define JL_SMALL_BYTE_ALIGNMENT 16 #define JL_CACHE_BYTE_ALIGNMENT 64 // JL_HEAP_ALIGNMENT is the maximum alignment that the GC can provide -#define JL_HEAP_ALIGNMENT JL_SMALL_BYTE_ALIGNMENT +#define JL_HEAP_ALIGNMENT JL_CACHE_BYTE_ALIGNMENT #define GC_MAX_SZCLASS (2032-sizeof(void*)) -STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) +STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, size_t alignment, void *ty) { jl_value_t *v; const size_t allocsz = sz + sizeof(jl_taggedvalue_t); if (sz <= GC_MAX_SZCLASS) { - int pool_id = jl_gc_szclass(allocsz); + int pool_id = jl_gc_szclass(allocsz, alignment); jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id]; int osize = jl_gc_sizeclasses[pool_id]; + assert((size_t)osize >= alignment && + (alignment == 0 || (osize & (alignment - 1)) == 0)); v = jl_gc_pool_alloc(ptls, (char*)p - (char*)ptls, osize); } else { @@ -283,15 +301,15 @@ STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) jl_set_typeof(v, ty); return v; } -JL_DLLEXPORT jl_value_t *jl_gc_alloc(jl_ptls_t ptls, size_t sz, void *ty); -// On GCC, only inline when sz is constant +JL_DLLEXPORT jl_value_t *jl_gc_alloc(jl_ptls_t ptls, size_t sz, size_t alignment, void *ty); +// On GCC, only inline when sz and align is constant #ifdef __GNUC__ -# define jl_gc_alloc(ptls, sz, ty) \ - (__builtin_constant_p(sz) ? \ - jl_gc_alloc_(ptls, sz, ty) : \ - (jl_gc_alloc)(ptls, sz, ty)) +# define jl_gc_alloc(ptls, sz, align, ty) \ + (__builtin_constant_p(sz) && __builtin_constant_p(align) ? \ + jl_gc_alloc_(ptls, sz, align, ty) : \ + (jl_gc_alloc)(ptls, sz, align, ty)) #else -# define jl_gc_alloc(ptls, sz, ty) jl_gc_alloc_(ptls, sz, ty) +# define jl_gc_alloc(ptls, sz, align, ty) jl_gc_alloc_(ptls, sz, align, ty) #endif // jl_buff_tag must be a multiple of GC_PAGE_SZ so that it can't be @@ -300,7 +318,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_alloc(jl_ptls_t ptls, size_t sz, void *ty); typedef void jl_gc_tracked_buffer_t; // For the benefit of the static analyzer STATIC_INLINE jl_gc_tracked_buffer_t *jl_gc_alloc_buf(jl_ptls_t ptls, size_t sz) { - return jl_gc_alloc(ptls, sz, (void*)jl_buff_tag); + return jl_gc_alloc(ptls, sz, 0, (void*)jl_buff_tag); } STATIC_INLINE jl_value_t *jl_gc_permobj(size_t sz, void *ty) @@ -508,7 +526,7 @@ int jl_pointer_egal(jl_value_t *t); jl_value_t *jl_nth_slot_type(jl_value_t *sig JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT; void jl_compute_field_offsets(jl_datatype_t *st); jl_array_t *jl_new_array_for_deserialization(jl_value_t *atype, uint32_t ndims, size_t *dims, - int isunboxed, int hasptr, int isunion, int elsz); + int isunboxed, int hasptr, int isunion, int elsz, int elalign); void jl_module_run_initializer(jl_module_t *m); jl_binding_t *jl_get_module_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var) JL_NOTSAFEPOINT; void jl_binding_deprecation_warning(jl_module_t *m, jl_binding_t *b); diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index 4bf6038361541..37fed98e87521 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -145,7 +145,7 @@ struct Optimizer { private: bool isSafepoint(Instruction *inst); Instruction *getFirstSafepoint(BasicBlock *bb); - ssize_t getGCAllocSize(Instruction *I); + std::pair getGCAllocSize(Instruction *I); void pushInstruction(Instruction *I); void insertLifetimeEnd(Value *ptr, Constant *sz, Instruction *insert); @@ -157,7 +157,7 @@ struct Optimizer { void replaceIntrinsicUseWith(IntrinsicInst *call, Intrinsic::ID ID, Instruction *orig_i, Instruction *new_i); void removeAlloc(CallInst *orig_inst); - void moveToStack(CallInst *orig_inst, size_t sz, bool has_ref); + void moveToStack(CallInst *orig_inst, size_t sz, size_t al, bool has_ref); void splitOnStack(CallInst *orig_inst); void optimizeTag(CallInst *orig_inst); @@ -297,7 +297,7 @@ struct Optimizer { } }; - SetVector> worklist; + SetVector>> worklist; SmallVector removed; AllocUseInfo use_info; CheckInst::Stack check_stack; @@ -308,8 +308,8 @@ struct Optimizer { void Optimizer::pushInstruction(Instruction *I) { - ssize_t sz = getGCAllocSize(I); - if (sz != -1) { + auto sz = getGCAllocSize(I); + if (sz.first != -1) { worklist.insert(std::make_pair(cast(I), sz)); } } @@ -328,7 +328,8 @@ void Optimizer::optimizeAll() while (!worklist.empty()) { auto item = worklist.pop_back_val(); auto orig = item.first; - size_t sz = item.second; + size_t sz, al; + std::tie(sz, al) = item.second; checkInst(orig); if (use_info.escaped) { if (use_info.hastypeof) @@ -370,7 +371,7 @@ void Optimizer::optimizeAll() } // The object only has a single field that's a reference with only one kind of access. } - moveToStack(orig, sz, has_ref); + moveToStack(orig, sz, al, has_ref); } } @@ -418,18 +419,19 @@ Instruction *Optimizer::getFirstSafepoint(BasicBlock *bb) return first; } -ssize_t Optimizer::getGCAllocSize(Instruction *I) +std::pair Optimizer::getGCAllocSize(Instruction *I) { auto call = dyn_cast(I); if (!call) - return -1; + return std::make_pair(-1, -1); if (call->getCalledOperand() != pass.alloc_obj_func) - return -1; - assert(call->getNumArgOperands() == 3); + return std::make_pair(-1, -1); + assert(call->getNumArgOperands() == 4); size_t sz = (size_t)cast(call->getArgOperand(1))->getZExtValue(); + size_t al = (size_t)cast(call->getArgOperand(2))->getZExtValue(); if (sz < IntegerType::MAX_INT_BITS / 8 && sz < INT32_MAX) - return sz; - return -1; + return std::make_pair(sz, al); + return std::make_pair(-1, -1); } std::pair& @@ -906,18 +908,14 @@ void Optimizer::replaceIntrinsicUseWith(IntrinsicInst *call, Intrinsic::ID ID, // This function should not erase any safepoint so that the lifetime marker can find and cache // all the original safepoints. -void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref) +void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, size_t al, bool has_ref) { auto tag = orig_inst->getArgOperand(2); removed.push_back(orig_inst); // The allocation does not escape or get used in a phi node so none of the derived // SSA from it are live when we run the allocation again. // It is now safe to promote the allocation to an entry block alloca. - size_t align = 1; - // TODO: This is overly conservative. May want to instead pass this as a - // parameter to the allocation function directly. - if (sz > 1) - align = MinAlign(JL_SMALL_BYTE_ALIGNMENT, NextPowerOf2(sz)); + size_t align = MinAlign(JL_SMALL_BYTE_ALIGNMENT, al); // No debug info for prolog instructions IRBuilder<> prolog_builder(&F.getEntryBlock().front()); AllocaInst *buff; diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index e11df11dcc976..d74867ab3c8ea 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -183,11 +183,12 @@ Instruction *FinalLowerGC::getPgcstack(Instruction *ptlsStates) Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) { - assert(target->getNumArgOperands() == 2); + assert(target->getNumArgOperands() == 3); auto sz = (size_t)cast(target->getArgOperand(1))->getZExtValue(); + auto al = (size_t)cast(target->getArgOperand(2))->getZExtValue(); // This is strongly architecture and OS dependent int osize; - int offset = jl_gc_classify_pools(sz, &osize); + int offset = jl_gc_classify_pools(sz, al, &osize); IRBuilder<> builder(target); builder.SetCurrentDebugLocation(target->getDebugLoc()); auto ptls = target->getArgOperand(0); diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index ecf93e50eefde..89c4554c2b572 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2250,7 +2250,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) { CI->replaceAllUsesWith(ASCI); UpdatePtrNumbering(CI, ASCI, S); } else if (alloc_obj_func && callee == alloc_obj_func) { - assert(CI->getNumArgOperands() == 3); + assert(CI->getNumArgOperands() == 4); // Initialize an IR builder. IRBuilder<> builder(CI); @@ -2266,6 +2266,10 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) { builder.CreateIntCast( CI->getArgOperand(1), allocBytesIntrinsic->getFunctionType()->getParamType(1), + false), + builder.CreateIntCast( + CI->getArgOperand(2), + allocBytesIntrinsic->getFunctionType()->getParamType(2), false) }); newI->takeName(CI); @@ -2275,9 +2279,9 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) { // We pretty much only load using `T_size` so try our best to strip // as many cast as possible. #if JL_LLVM_VERSION >= 100000 - auto tag = CI->getArgOperand(2)->stripPointerCastsAndAliases(); + auto tag = CI->getArgOperand(3)->stripPointerCastsAndAliases(); #else - auto tag = CI->getArgOperand(2)->stripPointerCasts(); + auto tag = CI->getArgOperand(3)->stripPointerCasts(); #endif if (auto C = dyn_cast(tag)) { if (C->getOpcode() == Instruction::IntToPtr) { diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp index d594408a20992..426d30c86b98a 100644 --- a/src/llvm-pass-helpers.cpp +++ b/src/llvm-pass-helpers.cpp @@ -147,7 +147,7 @@ namespace jl_intrinsics { auto intrinsic = Function::Create( FunctionType::get( context.T_prjlvalue, - { context.T_pint8, context.T_size }, + { context.T_pint8, context.T_size, context.T_size }, false), Function::ExternalLinkage, GC_ALLOC_BYTES_NAME); diff --git a/src/method.c b/src/method.c index 5f4a954f882b8..0fd8145a8b617 100644 --- a/src/method.c +++ b/src/method.c @@ -307,7 +307,7 @@ JL_DLLEXPORT jl_method_instance_t *jl_new_method_instance_uninit(void) { jl_ptls_t ptls = jl_get_ptls_states(); jl_method_instance_t *li = - (jl_method_instance_t*)jl_gc_alloc(ptls, sizeof(jl_method_instance_t), + (jl_method_instance_t*)jl_gc_alloc(ptls, sizeof(jl_method_instance_t), 0, jl_method_instance_type); li->def.value = NULL; li->specTypes = NULL; @@ -323,7 +323,7 @@ JL_DLLEXPORT jl_code_info_t *jl_new_code_info_uninit(void) { jl_ptls_t ptls = jl_get_ptls_states(); jl_code_info_t *src = - (jl_code_info_t*)jl_gc_alloc(ptls, sizeof(jl_code_info_t), + (jl_code_info_t*)jl_gc_alloc(ptls, sizeof(jl_code_info_t), 0, jl_code_info_type); src->code = NULL; src->codelocs = NULL; @@ -474,7 +474,7 @@ JL_DLLEXPORT jl_code_info_t *jl_copy_code_info(jl_code_info_t *src) { jl_ptls_t ptls = jl_get_ptls_states(); jl_code_info_t *newsrc = - (jl_code_info_t*)jl_gc_alloc(ptls, sizeof(jl_code_info_t), + (jl_code_info_t*)jl_gc_alloc(ptls, sizeof(jl_code_info_t), 0, jl_code_info_type); *newsrc = *src; return newsrc; @@ -601,8 +601,7 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src) JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module) { jl_ptls_t ptls = jl_get_ptls_states(); - jl_method_t *m = - (jl_method_t*)jl_gc_alloc(ptls, sizeof(jl_method_t), jl_method_type); + jl_method_t *m = (jl_method_t*)jl_gc_alloc(ptls, sizeof(jl_method_t), 0, jl_method_type); m->specializations = jl_emptysvec; m->speckeyset = (jl_array_t*)jl_an_empty_vec_any; m->sig = NULL; diff --git a/src/module.c b/src/module.c index 88e9105cb9c7a..2a2dafb869bbb 100644 --- a/src/module.c +++ b/src/module.c @@ -20,8 +20,7 @@ JL_DLLEXPORT jl_module_t *jl_new_module(jl_sym_t *name) { jl_ptls_t ptls = jl_get_ptls_states(); const jl_uuid_t uuid_zero = {0, 0}; - jl_module_t *m = (jl_module_t*)jl_gc_alloc(ptls, sizeof(jl_module_t), - jl_module_type); + jl_module_t *m = (jl_module_t*)jl_gc_alloc(ptls, sizeof(jl_module_t), 0, jl_module_type); JL_GC_PUSH1(&m); assert(jl_is_symbol(name)); m->name = name; diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c index 2337abe7d5704..5f8f1ec61bc2b 100644 --- a/src/runtime_intrinsics.c +++ b/src/runtime_intrinsics.c @@ -422,7 +422,7 @@ static inline jl_value_t *jl_intrinsiclambda_u1(jl_value_t *ty, void *pa, unsign if (osize <= sizeof(cnt)) { return jl_new_bits(ty, &cnt); } - jl_value_t *newv = jl_gc_alloc(ptls, osize, ty); + jl_value_t *newv = jl_gc_alloc(ptls, osize, 0, ty); // perform zext, if needed memset((char*)jl_data_ptr(newv) + sizeof(cnt), 0, osize - sizeof(cnt)); memcpy(jl_data_ptr(newv), &cnt, sizeof(cnt)); @@ -484,7 +484,7 @@ static inline jl_value_t *jl_fintrinsic_1(jl_value_t *ty, jl_value_t *a, const c if (!jl_is_primitivetype(ty)) jl_errorf("%s: type is not a primitive type", name); unsigned sz2 = jl_datatype_size(ty); - jl_value_t *newv = jl_gc_alloc(ptls, sz2, ty); + jl_value_t *newv = jl_gc_alloc(ptls, sz2, 0, ty); void *pa = jl_data_ptr(a), *pr = jl_data_ptr(newv); unsigned sz = jl_datatype_size(jl_typeof(a)); switch (sz) { @@ -646,7 +646,7 @@ static inline jl_value_t *jl_intrinsiclambda_checked(jl_value_t *ty, void *pa, v jl_datatype_t *tuptyp = jl_apply_tuple_type_v(params, 2); JL_GC_PROMISE_ROOTED(tuptyp); // (JL_ALAWYS_LEAFTYPE) jl_ptls_t ptls = jl_get_ptls_states(); - jl_value_t *newv = jl_gc_alloc(ptls, ((jl_datatype_t*)tuptyp)->size, tuptyp); + jl_value_t *newv = jl_gc_alloc(ptls, jl_datatype_size(tuptyp), 0, tuptyp); intrinsic_checked_t op = select_intrinsic_checked(sz2, (const intrinsic_checked_t*)voidlist); int ovflw = op(sz * host_char_bit, pa, pb, jl_data_ptr(newv)); @@ -679,8 +679,8 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \ jl_error(#name ": types of a and b must match"); \ if (!jl_is_primitivetype(ty)) \ jl_error(#name ": values are not primitive types"); \ - int sz = jl_datatype_size(ty); \ - jl_value_t *newv = jl_gc_alloc(ptls, sz, ty); \ + size_t sz = jl_datatype_size(ty); \ + jl_value_t *newv = jl_gc_alloc(ptls, sz, 0, ty); \ void *pa = jl_data_ptr(a), *pb = jl_data_ptr(b), *pr = jl_data_ptr(newv); \ switch (sz) { \ /* choose the right size c-type operation */ \ @@ -742,8 +742,8 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b, jl_value_t *c) jl_error(#name ": types of a, b, and c must match"); \ if (!jl_is_primitivetype(ty)) \ jl_error(#name ": values are not primitive types"); \ - int sz = jl_datatype_size(ty); \ - jl_value_t *newv = jl_gc_alloc(ptls, sz, ty); \ + size_t sz = jl_datatype_size(ty); \ + jl_value_t *newv = jl_gc_alloc(ptls, sz, 0, ty); \ void *pa = jl_data_ptr(a), *pb = jl_data_ptr(b), *pc = jl_data_ptr(c), *pr = jl_data_ptr(newv); \ switch (sz) { \ /* choose the right size c-type operation */ \ diff --git a/src/simplevector.c b/src/simplevector.c index 41b1be14da7f4..c873ea57dc568 100644 --- a/src/simplevector.c +++ b/src/simplevector.c @@ -35,7 +35,7 @@ jl_svec_t *(jl_perm_symsvec)(size_t n, ...) JL_DLLEXPORT jl_svec_t *jl_svec1(void *a) { jl_ptls_t ptls = jl_get_ptls_states(); - jl_svec_t *v = (jl_svec_t*)jl_gc_alloc(ptls, sizeof(void*) * 2, + jl_svec_t *v = (jl_svec_t*)jl_gc_alloc(ptls, sizeof(void*) * 2, 0, jl_simplevector_type); jl_svec_set_len_unsafe(v, 1); jl_svecset(v, 0, a); @@ -45,7 +45,7 @@ JL_DLLEXPORT jl_svec_t *jl_svec1(void *a) JL_DLLEXPORT jl_svec_t *jl_svec2(void *a, void *b) { jl_ptls_t ptls = jl_get_ptls_states(); - jl_svec_t *v = (jl_svec_t*)jl_gc_alloc(ptls, sizeof(void*) * 3, + jl_svec_t *v = (jl_svec_t*)jl_gc_alloc(ptls, sizeof(void*) * 3, 0, jl_simplevector_type); jl_svec_set_len_unsafe(v, 2); jl_svecset(v, 0, a); @@ -57,7 +57,7 @@ JL_DLLEXPORT jl_svec_t *jl_alloc_svec_uninit(size_t n) { jl_ptls_t ptls = jl_get_ptls_states(); if (n == 0) return jl_emptysvec; - jl_svec_t *jv = (jl_svec_t*)jl_gc_alloc(ptls, (n + 1) * sizeof(void*), + jl_svec_t *jv = (jl_svec_t*)jl_gc_alloc(ptls, (n + 1) * sizeof(void*), 0, jl_simplevector_type); jl_svec_set_len_unsafe(jv, n); return jv; diff --git a/src/staticdata.c b/src/staticdata.c index 9726b676a529e..1c78d969ccc64 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -617,8 +617,11 @@ static void jl_write_values(jl_serializer_state *s) uintptr_t item = (uintptr_t)objects_list.items[i + 1]; jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v); assert((t->instance == NULL || t->instance == v) && "detected singleton construction corruption"); - // realign stream to expected gc alignment (16 bytes) + // realign stream to expected gc alignment uintptr_t skip_header_pos = ios_pos(s->s) + sizeof(jl_taggedvalue_t); + unsigned align = jl_datatype_align(t); + if (align < sizeof(void*)) + align = sizeof(void*); write_padding(s->s, LLT_ALIGN(skip_header_pos, 16) - skip_header_pos); // write header write_gctaggedfield(s, backref_id(s, t)); @@ -1410,7 +1413,7 @@ static void jl_save_system_image_to_stream(ios_t *f) write_uint32(f, const_data.size); // realign stream to max-alignment for data - write_padding(f, LLT_ALIGN(ios_pos(f), 16) - ios_pos(f)); + write_padding(f, LLT_ALIGN(ios_pos(f), 64) - ios_pos(f)); ios_seek(&const_data, 0); ios_copyall(f, &const_data); ios_close(&const_data); @@ -1534,7 +1537,7 @@ static void jl_restore_system_image_from_stream(ios_t *f) size_t sizeof_constdata = read_uint32(f); // realign stream to max-alignment for data - ios_seek(f, LLT_ALIGN(ios_pos(f), 16)); + ios_seek(f, LLT_ALIGN(ios_pos(f), 64)); ios_static_buffer(&const_data, f->buf + f->bpos, sizeof_constdata); ios_skip(f, sizeof_constdata); @@ -1564,7 +1567,7 @@ static void jl_restore_system_image_from_stream(ios_t *f) jl_value_t **tag = tags[i]; *tag = jl_read_value(&s); } - s.ptls->root_task = (jl_task_t*)jl_gc_alloc(s.ptls, sizeof(jl_task_t), jl_task_type); + s.ptls->root_task = (jl_task_t*)jl_gc_alloc(s.ptls, sizeof(jl_task_t), /*align*/ 0, jl_task_type); memset(s.ptls->root_task, 0, sizeof(jl_task_t)); s.ptls->root_task->tls = jl_read_value(&s); jl_init_int32_int64_cache(); diff --git a/src/task.c b/src/task.c index 3d7b40191c3d6..2d483f3db0bb1 100644 --- a/src/task.c +++ b/src/task.c @@ -651,7 +651,7 @@ JL_DLLEXPORT void jl_rethrow_other(jl_value_t *e JL_MAYBE_UNROOTED) JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion_future, size_t ssize) { jl_ptls_t ptls = jl_get_ptls_states(); - jl_task_t *t = (jl_task_t*)jl_gc_alloc(ptls, sizeof(jl_task_t), jl_task_type); + jl_task_t *t = (jl_task_t*)jl_gc_alloc(ptls, sizeof(jl_task_t), 0, jl_task_type); t->copy_stack = 0; if (ssize == 0) { // stack size unspecified; use default @@ -1220,7 +1220,7 @@ void jl_init_root_task(void *stack_lo, void *stack_hi) { jl_ptls_t ptls = jl_get_ptls_states(); if (ptls->root_task == NULL) { - ptls->root_task = (jl_task_t*)jl_gc_alloc(ptls, sizeof(jl_task_t), jl_task_type); + ptls->root_task = (jl_task_t*)jl_gc_alloc(ptls, sizeof(jl_task_t), 0, jl_task_type); memset(ptls->root_task, 0, sizeof(jl_task_t)); ptls->root_task->tls = jl_nothing; } diff --git a/src/typemap.c b/src/typemap.c index 347b2147d9aea..8839e1ff791df 100644 --- a/src/typemap.c +++ b/src/typemap.c @@ -1090,9 +1090,8 @@ static void jl_typemap_list_insert_sorted( static jl_typemap_level_t *jl_new_typemap_level(void) { jl_ptls_t ptls = jl_get_ptls_states(); - jl_typemap_level_t *cache = - (jl_typemap_level_t*)jl_gc_alloc(ptls, sizeof(jl_typemap_level_t), - jl_typemap_level_type); + jl_typemap_level_t *cache = (jl_typemap_level_t*)jl_gc_alloc(ptls, + sizeof(jl_typemap_level_t), 0, jl_typemap_level_type); cache->arg1 = (jl_array_t*)jl_an_empty_vec_any; cache->targ = (jl_array_t*)jl_an_empty_vec_any; cache->name1 = (jl_array_t*)jl_an_empty_vec_any; @@ -1267,9 +1266,8 @@ jl_typemap_entry_t *jl_typemap_alloc( isleafsig = issimplesig = 0; } - jl_typemap_entry_t *newrec = - (jl_typemap_entry_t*)jl_gc_alloc(ptls, sizeof(jl_typemap_entry_t), - jl_typemap_entry_type); + jl_typemap_entry_t *newrec = (jl_typemap_entry_t*)jl_gc_alloc(ptls, + sizeof(jl_typemap_entry_t), 0, jl_typemap_entry_type); newrec->sig = type; newrec->simplesig = simpletype; newrec->func.value = newvalue;