diff --git a/embed.fnc b/embed.fnc index f0d1dedb1485..e64a78efd9c2 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3433,7 +3433,7 @@ Adp |SV * |sv_strftime_tm |NN SV *fmt \ Adp |SV * |sv_string_from_errnum \ |int errnum \ |NULLOK SV *tgtsv -CMbdp |void |sv_taint |NN SV *sv +ACdp |SV * |sv_taint |NN SV *sv CRdp |bool |sv_tainted |NN SV * const sv Adip |bool |SvTRUE |NULLOK SV *sv Cdp |I32 |sv_true |NULLOK SV * const sv diff --git a/embed.h b/embed.h index 8f890fba4df4..82e7327ea4fa 100644 --- a/embed.h +++ b/embed.h @@ -752,6 +752,7 @@ # define sv_strftime_ints(a,b,c,d,e,f,g,h) Perl_sv_strftime_ints(aTHX_ a,b,c,d,e,f,g,h) # define sv_strftime_tm(a,b) Perl_sv_strftime_tm(aTHX_ a,b) # define sv_string_from_errnum(a,b) Perl_sv_string_from_errnum(aTHX_ a,b) +# define sv_taint(a) Perl_sv_taint(aTHX_ a) # define sv_tainted(a) Perl_sv_tainted(aTHX_ a) # define sv_true(a) Perl_sv_true(aTHX_ a) # define sv_uni_display(a,b,c,d) Perl_sv_uni_display(aTHX_ a,b,c,d) diff --git a/embedvar.h b/embedvar.h index 05f9ed6ef11d..e2e9c4681aa4 100644 --- a/embedvar.h +++ b/embedvar.h @@ -319,9 +319,8 @@ # define PL_sv_yes (vTHX->Isv_yes) # define PL_sv_zero (vTHX->Isv_zero) # define PL_sys_intern (vTHX->Isys_intern) +# define PL_taint (vTHX->Itaint) # define PL_taint_warn (vTHX->Itaint_warn) -# define PL_tainted (vTHX->Itainted) -# define PL_tainting (vTHX->Itainting) # define PL_threadhook (vTHX->Ithreadhook) # define PL_tmps_floor (vTHX->Itmps_floor) # define PL_tmps_ix (vTHX->Itmps_ix) diff --git a/hv.c b/hv.c index 7eaf6fc97063..63e17c0a0e38 100644 --- a/hv.c +++ b/hv.c @@ -1130,12 +1130,12 @@ Perl_hv_scalar(pTHX_ HV *hv) if (u <= (UV)IV_MAX) { SvIV_set(sv, (IV)u); (void)SvIOK_only(sv); - SvTAINT(sv); + sv = SvTAINTTC(sv); } else { SvIV_set(sv, 0); SvUV_set(sv, u); (void)SvIOK_only_UV(sv); - SvTAINT(sv); + sv = SvTAINTTC(sv); } return sv; diff --git a/intrpvar.h b/intrpvar.h index 482f46d71885..d6a14d477f7c 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -74,8 +74,9 @@ PERLVAR(I, multideref_pc, UNOP_AUX_item *) PERLVAR(I, curpm, PMOP *) /* what to do \ interps in REs from */ PERLVAR(I, curpm_under, PMOP *) /* what to do \ interps in REs from */ -PERLVAR(I, tainting, bool) /* ? doing taint checks */ -PERLVARI(I, tainted, bool, FALSE) /* using variables controlled by $< */ +/* bool PL_tainting --- ? doing taint checks */ +/* bool PL_tainted --- using variables controlled by $< */ +PERLVAR(I, taint, TAINT_U) /* PL_delaymagic is currently used for two purposes: to assure simultaneous * updates in ($<,$>) = ..., and to assure atomic update in push/unshift diff --git a/mathoms.c b/mathoms.c index 27fa2969d1b1..6057dacaa494 100644 --- a/mathoms.c +++ b/mathoms.c @@ -90,23 +90,6 @@ Perl_sv_unref(pTHX_ SV *sv) sv_unref_flags(sv, 0); } -/* -=for apidoc_section $tainting -=for apidoc sv_taint - -Taint an SV. Use C instead. - -=cut -*/ - -void -Perl_sv_taint(pTHX_ SV *sv) -{ - PERL_ARGS_ASSERT_SV_TAINT; - - sv_magic((sv), NULL, PERL_MAGIC_taint, NULL, 0); -} - /* sv_2iv() is now a macro using Perl_sv_2iv_flags(); * this function provided for binary compatibility only */ diff --git a/perl.c b/perl.c index 15127adc50fe..cdb23079b740 100644 --- a/perl.c +++ b/perl.c @@ -249,6 +249,17 @@ perl_construct(pTHXx) SvREADONLY_on(&PL_sv_placeholder); SvREFCNT(&PL_sv_placeholder) = SvREFCNT_IMMORTAL; + STATIC_ASSERT_STMT( + sizeof(((TAINT_U *)0)->both) + == (sizeof(((TAINT_U *)0)->u.tainting) + sizeof(((TAINT_U *)0)->u.tainted)) + ); + STATIC_ASSERT_STMT( + sizeof(((TAINT_U *)0)->both) + == (STRUCT_OFFSET(TAINT_U, u.tainted) + sizeof(((TAINT_U *)0)->u.tainted)) + ); + STATIC_ASSERT_STMT(STRUCT_OFFSET(TAINT_U, both) == STRUCT_OFFSET(TAINT_U, u.tainting)); + /* PL_taint.u.both = 0; */ + PL_sighandlerp = Perl_sighandler; PL_sighandler1p = Perl_sighandler1; PL_sighandler3p = Perl_sighandler3; diff --git a/perl.h b/perl.h index 7641425fcc12..a60a6c0e4895 100644 --- a/perl.h +++ b/perl.h @@ -940,6 +940,8 @@ symbol would not be defined on C> platforms. * know what you're doing: tests and CPAN modules' tests are bound to fail. */ #ifdef NO_TAINT_SUPPORT +# define PL_tainting PL_taint.u.tainting +# define PL_tainted PL_taint.u.tainted # define TAINT NOOP # define TAINT_NOT NOOP # define TAINT_IF(c) NOOP @@ -948,6 +950,7 @@ symbol would not be defined on C> platforms. # define TAINT_set(s) NOOP # define TAINT_get 0 # define TAINTING_get 0 +# define TAINT_AND_TAINTING_get 0 # define TAINTING_set(s) NOOP # define TAINT_WARN_get 0 # define TAINT_WARN_set(s) NOOP @@ -1014,6 +1017,10 @@ violations are fatal. =cut */ + +#define PL_tainting PL_taint.u.tainting +#define PL_tainted PL_taint.u.tainted + /* Set to tainted if we are running under tainting mode */ # define TAINT (PL_tainted = PL_tainting) @@ -1027,6 +1034,8 @@ violations are fatal. # define TAINT_set(s) (PL_tainted = cBOOL(s)) # define TAINT_get (cBOOL(UNLIKELY(PL_tainted))) /* Is something tainted? */ # define TAINTING_get (cBOOL(UNLIKELY(PL_tainting))) +/* Efficient version of (PL_tainted && PL_tainting) */ +# define TAINT_AND_TAINTING_get (UNLIKELY(PL_taint.both == (TRUE | (TRUE << 8)))) # define TAINTING_set(s) (PL_tainting = cBOOL(s)) # define TAINT_WARN_get (PL_taint_warn) # define TAINT_WARN_set(s) (PL_taint_warn = cBOOL(s)) @@ -3309,6 +3318,14 @@ typedef struct padname PADNAME; #include "handy.h" #include "charclass_invlists.h" +typedef union { + U16 both; + struct { + bool tainting; + bool tainted; + } u; +} TAINT_U; + #if defined(USE_LARGE_FILES) && !defined(NO_64_BIT_RAWIO) # if LSEEKSIZE == 8 && !defined(USE_64_BIT_RAWIO) # define USE_64_BIT_RAWIO /* implicit */ diff --git a/proto.h b/proto.h index 65fe5c5bd68c..e2f595e1e64c 100644 --- a/proto.h +++ b/proto.h @@ -4916,6 +4916,11 @@ PERL_CALLCONV SV * Perl_sv_string_from_errnum(pTHX_ int errnum, SV *tgtsv); #define PERL_ARGS_ASSERT_SV_STRING_FROM_ERRNUM +PERL_CALLCONV SV * +Perl_sv_taint(pTHX_ SV *sv); +#define PERL_ARGS_ASSERT_SV_TAINT \ + assert(sv) + PERL_CALLCONV bool Perl_sv_tainted(pTHX_ SV * const sv) __attribute__warn_unused_result__; @@ -5986,11 +5991,6 @@ Perl_sv_setsv(pTHX_ SV *dsv, SV *ssv); # define PERL_ARGS_ASSERT_SV_SETSV \ assert(dsv) -PERL_CALLCONV void -Perl_sv_taint(pTHX_ SV *sv); -# define PERL_ARGS_ASSERT_SV_TAINT \ - assert(sv) - PERL_CALLCONV void Perl_sv_unref(pTHX_ SV *sv); # define PERL_ARGS_ASSERT_SV_UNREF \ diff --git a/sv.c b/sv.c index 2de62af3b819..e19a4e7f7d1f 100644 --- a/sv.c +++ b/sv.c @@ -9994,12 +9994,14 @@ SV * Perl_vnewSVpvf(pTHX_ const char *const pat, va_list *const args) { SV *sv; + STRLEN pat_len = strlen(pat); PERL_ARGS_ASSERT_VNEWSVPVF; - sv = newSV(1); + /* Unlikely output len < input pat len. ("%c",'A')("%s","") is rare. */ + sv = newSV(pat_len+STRLENs("\0")); SvPVCLEAR_FRESH(sv); - sv_vcatpvfn_flags(sv, pat, strlen(pat), args, NULL, 0, NULL, 0); + sv_vcatpvfn_flags(sv, pat, pat_len, args, NULL, 0, NULL, 0); return sv; } @@ -10015,11 +10017,27 @@ The reference count for the SV is set to 1. SV * Perl_newSVnv(pTHX_ const NV n) { - SV *sv = newSV_type(SVt_NV); + SV *sv; +#if NVSIZE <= IVSIZE + /* This bodyless code has been agressively strip for speed. + Do not revise it unless you use disassembler and look at machine code.*/ + new_SV(sv); +#if PTRSIZE == 8 && (BYTEORDER == 0x1234 || BYTEORDER == 0x12345678) + *(Size_t *)(&SvREFCNT(sv)) = + ((Size_t)1) + | ((Size_t)(((Size_t)(SVt_NV | SVf_NOK | SVp_NOK)) << 32)); +#else + SvFLAGS(sv) = SVt_NV | SVf_NOK | SVp_NOK; +#endif + SET_SVANY_FOR_BODYLESS_NV(sv); + sv->sv_u.svu_nv = n; +#else + sv = newSV_type(SVt_NV); (void)SvNOK_on(sv); - SvNV_set(sv, n); - SvTAINT(sv); +#endif + + sv = SvTAINTTC(sv); return sv; } @@ -10036,11 +10054,26 @@ SV is set to 1. SV * Perl_newSViv(pTHX_ const IV i) { - SV *sv = newSV_type(SVt_IV); - (void)SvIOK_on(sv); + SV *sv; + new_SV(sv); - SvIV_set(sv, i); - SvTAINT(sv); + /* We're starting from SVt_FIRST, so provided that's + * actual 0, we don't have to unset any SV type flags + * to promote to SVt_IV. */ + STATIC_ASSERT_STMT(SVt_FIRST == 0); + + /* This bodyless code has been agressively striped for speed. + Do not revise it unless you use disassembler and look at machine code.*/ +#if PTRSIZE == 8 && (BYTEORDER == 0x1234 || BYTEORDER == 0x12345678) + *(Size_t *)(&SvREFCNT(sv)) = ((Size_t)1) | + ((Size_t)((((Size_t)(SVt_IV|SVf_IOK|SVp_IOK)) << 32))); +#else + SvFLAGS(sv) = SVt_IV | SVf_IOK | SVp_IOK; +#endif + SET_SVANY_FOR_BODYLESS_IV(sv); + sv->sv_u.svu_iv = i; + + sv = SvTAINTTC(sv); return sv; } @@ -10058,15 +10091,6 @@ SV * Perl_newSVuv(pTHX_ const UV u) { SV *sv; - - /* Inlining ONLY the small relevant subset of sv_setuv here - * for performance. Makes a significant difference. */ - - /* Using ivs is more efficient than using uvs - see sv_setuv */ - if (u <= (UV)IV_MAX) { - return newSViv((IV)u); - } - new_SV(sv); /* We're starting from SVt_FIRST, so provided that's @@ -10074,13 +10098,84 @@ Perl_newSVuv(pTHX_ const UV u) * to promote to SVt_IV. */ STATIC_ASSERT_STMT(SVt_FIRST == 0); + /* This bodyless code has been agressively striped for speed. + Do not revise it unless you use disassembler and look at machine code.*/ + + /* Verify the &~ and |, or &~ >> | or >> |, trick works. Portability. */ + STATIC_ASSERT_STMT( + cBOOL(((UV)SVf_IVisUV) == (((UV)IV_MAX)+1)) + || cBOOL((((UV)SVf_IVisUV)<<32) == (((UV)IV_MAX)+1))); + STATIC_ASSERT_STMT( + STRUCT_OFFSET(SV, sv_u.svu_uv) == STRUCT_OFFSET(SV, sv_u.svu_iv) + && sizeof(sv->sv_u.svu_uv) == sizeof(sv->sv_u.svu_iv)); + + /* branchless SvIsUV_on() replaces former code with former comments: + + * Inlining ONLY the small relevant subset of sv_setuv here + * for performance. Makes a significant difference. + + * Using ivs is more efficient than using uvs - see sv_setuv + if (u <= (UV)IV_MAX) { + return newSViv((IV)u); + } + */ + + /* If 64b CPU, and little endian (x86, x64, modern ARM), + set sv->sv_refcnt and sv->sv_flags, with exactly 1 CPU op. + 'sv->sv_refcnt = 0;' assignment in new_SV() will optimize away. + Assert sv->sv_any is 64b, and sv->sv_refcnt is directly afterwards + in memory layout, and that sv->sv_refcnt and sv->sv_flags are adjacent, + therefore proving this U32* ptr, casted to U64*, is aligned. + Note majority of modern Perl users use LE CPUs, with hardware unaligned + support. But there is no Configure/perlapi macro defines currently, + that config YES/NO for hardware unaligned. Still, because the SV head + struct currently is aligned on all 64b builds, make sure SV head struct + stays aligned unless intentional future refactoring of SV head struct. + + This optimization can't be done on i386, since 64b ints are always + emulated with 32b CPU ops by all CCs AFAIK. And the X32 Linux OS/Kernel + has already been grandfathered. Other than X32 I can't think of any + OS or CPU with native 64b CPU ops, but 32b pointers. + + Doing this trick on 64b big endian OS, is possible, but a BE core dev + must port the code, fix all bit operators, and test it. */ + STATIC_ASSERT_STMT( + (STRUCT_OFFSET(SV,sv_refcnt) == sizeof(sv->sv_any)) + && STRUCT_OFFSET(SV,sv_flags) == STRUCT_OFFSET(SV,sv_refcnt)+U32SIZE); + +#if PTRSIZE == 8 && (BYTEORDER == 0x1234 || BYTEORDER == 0x12345678) + STATIC_ASSERT_STMT( + sizeof(sv->sv_refcnt) + sizeof(sv->sv_flags) == PTRSIZE + && STRUCT_OFFSET(SV,sv_refcnt) == PTRSIZE); + + *(Size_t *)(&SvREFCNT(sv)) = + ((Size_t)1) + | ((Size_t) ( + ((Size_t) ( + ((U32)((UV)((u&(((UV)IV_MAX)+1))>>32))) + | (SVt_IV|SVf_IOK|SVp_IOK) + )) << 32 + )); +#else + /* Flags unrolled, since MSVC -O1 optimizer refused to combine + 3 SvFLAGS(s); statements if "SvFLAGS() |= dynamic_var;". + Unroll to guarentee any CC flags, any CCs, do exactly 1 write to + SvFLAGS(). */ + if(((UV)SVf_IVisUV) == (((UV)IV_MAX)+1)) /* UV is 32 */ + SvFLAGS(sv) = + ((U32)(u&(((UV)IV_MAX)+1))) + | (SVt_IV|SVf_IOK|SVp_IOK); + else /* UV is 64 */ + SvFLAGS(sv) = + ((U32)((UV)((u&(((UV)IV_MAX)+1))>>32))) + | (SVt_IV|SVf_IOK|SVp_IOK); +#endif + /* Explictly optimize out reading SvANY ptr. Some CCs might optimize + next 2 statements, MSVC did, but some may not. */ SET_SVANY_FOR_BODYLESS_IV(sv); - SvFLAGS(sv) |= SVt_IV; - (void)SvIOK_on(sv); - (void)SvIsUV_on(sv); + sv->sv_u.svu_uv = u; - SvUV_set(sv, u); - SvTAINT(sv); + sv = SvTAINTTC(sv); return sv; } @@ -11052,6 +11147,25 @@ Perl_sv_unref_flags(pTHX_ SV *const ref, const U32 flags) sv_2mortal(target); /* Schedule for freeing later */ } +/* +=for apidoc_section $tainting +=for apidoc sv_taint + +Taint an SV. Use C instead. Return value is input SV *. +Useful for chaining calls and more efficient C code (tail calling). + +=cut +*/ + +SV * +Perl_sv_taint(pTHX_ SV *sv) +{ + PERL_ARGS_ASSERT_SV_TAINT; + + sv_magic(sv, NULL, PERL_MAGIC_taint, NULL, 0); + return sv; +} + /* =for apidoc sv_untaint @@ -15798,7 +15912,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, #ifndef NO_TAINT_SUPPORT /* Set tainting stuff before PerlIO_debug can possibly get called */ - PL_tainting = proto_perl->Itainting; + PL_tainting = proto_perl->Itaint.u.tainting; PL_taint_warn = proto_perl->Itaint_warn; #else PL_tainting = FALSE; @@ -15943,7 +16057,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_statcache = proto_perl->Istatcache; #ifndef NO_TAINT_SUPPORT - PL_tainted = proto_perl->Itainted; + PL_tainted = proto_perl->Itaint.u.tainted; #else PL_tainted = FALSE; #endif diff --git a/sv.h b/sv.h index 47434ad6c0fd..7297632ac72c 100644 --- a/sv.h +++ b/sv.h @@ -1722,9 +1722,22 @@ the outputs of an expression in a pessimistic fashion; i.e., without paying attention to precisely which outputs are influenced by which inputs. =cut -*/ -#define sv_taint(sv) sv_magic((sv), NULL, PERL_MAGIC_taint, NULL, 0) +=for apidoc Cm|SV* sv|SvTAINTTC|SV* sv +Identical to C, except optimized for for C compilers to do tail calls. +Incoming arg I will be returned as the retval of I. +The return value I pointer will be identical to the incoming +argument I pointer. Ex. I). This way if I is on, and +the slow path branch executes, which has an internal helper function, that +helper function returns the argument passed in, and C compilers can optimize +the slowpath branch to a tail call, or use less registers. This macro is mostly +intended to be used if C is the last or almost last statement +in the caller function, and the caller has a I return type, and will +return C's arg I, to its caller as a return value. Similar idea +to C. + +=cut +*/ #ifdef NO_TAINT_SUPPORT # define SvTAINTED(sv) 0 @@ -1737,10 +1750,13 @@ attention to precisely which outputs are influenced by which inputs. #define SvTAINT(sv) \ STMT_START { \ assert(TAINTING_get || !TAINT_get); \ - if (UNLIKELY(TAINT_get)) \ - SvTAINTED_on(sv); \ + if (TAINT_AND_TAINTING_get) \ + sv_taint(sv); \ } STMT_END +#define SvTAINTTC(sv) ((assert(TAINTING_get || !TAINT_get), \ + TAINT_AND_TAINTING_get) ? sv_taint((sv)) : (sv)) + /* =for apidoc_section $SV =for apidoc Am|char*|SvPV_force |SV* sv|STRLEN len diff --git a/sv_inline.h b/sv_inline.h index a0fe8ec870c2..d65c784ca677 100644 --- a/sv_inline.h +++ b/sv_inline.h @@ -985,7 +985,7 @@ Perl_sv_setpv_freshbuf(pTHX_ SV *const sv) (void)SvPOK_only_UTF8(sv); /* UTF-8 flag will be 0; This is used instead of 'SvPOK_only' because the other sv_setpv functions use it */ - SvTAINT(sv); + SvTAINTTC(sv); return SvPVX(sv); }