From f130e7b43981f4ac2dec9273ec17abeefe175799 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 20 Sep 2025 10:57:34 -0600 Subject: [PATCH 01/14] perl.h: Swap #ifdef's for simplicity None of the affected code is in EBCDIC platforms; by making that the outermost #ifdef, we can eliminate some nesting --- perl.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/perl.h b/perl.h index 877b0d61cc7b..e36f99e4ba6a 100644 --- a/perl.h +++ b/perl.h @@ -5552,8 +5552,8 @@ EXTCONST int PL_sig_num[]; * fold, so that 'a' maps to 'A' and 'A' maps to 'a', ignoring more complicated * folds such as outside the range or to multiple characters. */ -#ifdef DOINIT -# ifndef EBCDIC +#ifndef EBCDIC +# ifdef DOINIT /* The EBCDIC fold table depends on the code page, and hence is found in * ebcdic_tables.h */ @@ -5717,15 +5717,13 @@ EXTCONST unsigned char PL_mod_latin1_uc[] = { 240-32, 241-32, 242-32, 243-32, 244-32, 245-32, 246-32, 247, 248-32, 249-32, 250-32, 251-32, 252-32, 253-32, 254-32, 255 }; -# endif /* !EBCDIC, but still in DOINIT */ -#else /* ! DOINIT */ -# ifndef EBCDIC +# else /* ! DOINIT */ EXTCONST unsigned char PL_fold[]; EXTCONST unsigned char PL_fold_latin1[]; EXTCONST unsigned char PL_mod_latin1_uc[]; EXTCONST unsigned char PL_latin1_lc[]; -# endif -#endif +# endif +#endif /* ! EBCDIC */ /* Although only used for debugging, these constants must be available in * non-debugging builds too, since they're used in ext/re/re_exec.c, From 1043e6b589dcb28fd4cae254218c2f650bdf0812 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 20 Sep 2025 11:04:44 -0600 Subject: [PATCH 02/14] perl.h: Eliminate some #ifdef DOINITs Most of the tables defined here can use INIT() instead of the more clumsy #ifdef DOINIT..#else..#endif sequences. The one that can't has #ifdef's in its initialization, so can't be initialized with INIT(). --- perl.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/perl.h b/perl.h index e36f99e4ba6a..8b31fbb3536a 100644 --- a/perl.h +++ b/perl.h @@ -5553,12 +5553,11 @@ EXTCONST int PL_sig_num[]; * folds such as outside the range or to multiple characters. */ #ifndef EBCDIC -# ifdef DOINIT /* The EBCDIC fold table depends on the code page, and hence is found in * ebcdic_tables.h */ -EXTCONST unsigned char PL_fold[] = { +EXTCONST unsigned char PL_fold[] INIT({ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, @@ -5591,9 +5590,9 @@ EXTCONST unsigned char PL_fold[] = { 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 -}; +}); -EXTCONST unsigned char PL_fold_latin1[] = { +EXTCONST unsigned char PL_fold_latin1[] INIT ({ /* Full latin1 complement folding, except for three problematic code points: * Micro sign (181 = 0xB5) and y with diaeresis (255 = 0xFF) have their * fold complements outside the Latin1 range, so can't match something @@ -5635,11 +5634,11 @@ EXTCONST unsigned char PL_fold_latin1[] = { 240-32, 241-32, 242-32, 243-32, 244-32, 245-32, 246-32, 247, 248-32, 249-32, 250-32, 251-32, 252-32, 253-32, 254-32, 255 /* y with diaeresis */ -}; +}); /* If these tables are accessed through ebcdic, the access will be converted to * latin1 first */ -EXTCONST unsigned char PL_latin1_lc[] = { /* lowercasing */ +EXTCONST unsigned char PL_latin1_lc[] INIT({ /* lowercasing */ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, @@ -5672,7 +5671,9 @@ EXTCONST unsigned char PL_latin1_lc[] = { /* lowercasing */ 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 -}; +}); + +# ifdef DOINIT /* upper and title case of latin1 characters, modified so that the three tricky * ones are mapped to 255 (which is one of the three) */ @@ -5718,10 +5719,7 @@ EXTCONST unsigned char PL_mod_latin1_uc[] = { 248-32, 249-32, 250-32, 251-32, 252-32, 253-32, 254-32, 255 }; # else /* ! DOINIT */ -EXTCONST unsigned char PL_fold[]; -EXTCONST unsigned char PL_fold_latin1[]; EXTCONST unsigned char PL_mod_latin1_uc[]; -EXTCONST unsigned char PL_latin1_lc[]; # endif #endif /* ! EBCDIC */ From a7f2c58ee6054ef22a00e5379b2bf82ae49a773a Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 20 Sep 2025 09:15:52 -0600 Subject: [PATCH 03/14] Use INIT() for PL_sig_name, _num --- perl.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/perl.h b/perl.h index 8b31fbb3536a..3ecc19d4141e 100644 --- a/perl.h +++ b/perl.h @@ -5539,15 +5539,14 @@ EXTCONST char PL_uudmap[256] = EXTCONST char PL_bitcount[256] = # include "bitcount.h" ; -EXTCONST char* const PL_sig_name[] = { SIG_NAME }; -EXTCONST int PL_sig_num[] = { SIG_NUM }; #else EXTCONST char PL_uudmap[256]; EXTCONST char PL_bitcount[256]; -EXTCONST char* const PL_sig_name[]; -EXTCONST int PL_sig_num[]; #endif +EXTCONST char* const PL_sig_name[] INIT( { SIG_NAME } ); +EXTCONST int PL_sig_num[] INIT( { SIG_NUM } ); + /* fast conversion and case folding tables. The folding tables complement the * fold, so that 'a' maps to 'A' and 'A' maps to 'a', ignoring more complicated * folds such as outside the range or to multiple characters. */ From 516b65cdf3f5b6f41555422a83c8ccb27ea9f251 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 20 Sep 2025 09:21:58 -0600 Subject: [PATCH 04/14] regen/ebcdic.pl: Use INIT() for ebcdic_tables.h This removes some DOINITs --- ebcdic_tables.h | 192 ++++++++++++------------------------------------ regen/ebcdic.pl | 10 +-- 2 files changed, 50 insertions(+), 152 deletions(-) diff --git a/ebcdic_tables.h b/ebcdic_tables.h index fe0075920bf8..aa86941486a1 100644 --- a/ebcdic_tables.h +++ b/ebcdic_tables.h @@ -43,10 +43,7 @@ SOFTWARE. && '$' == 91 && '@' == 124 && '`' == 121 && '\n' == 21 /* Index is ASCII platform code point; value is EBCDIC 1047 equivalent */ -# ifndef DOINIT - EXTCONST U8 PL_a2e[256]; -# else - EXTCONST U8 PL_a2e[256] = { +EXTCONST U8 PL_a2e[256] INIT({ /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ /*0_*/0x00,0x01,0x02,0x03,0x37,0x2D,0x2E,0x2F,0x16,0x05,0x15,0x0B,0x0C,0x0D,0x0E,0x0F, /*1_*/0x10,0x11,0x12,0x13,0x3C,0x3D,0x32,0x26,0x18,0x19,0x3F,0x27,0x1C,0x1D,0x1E,0x1F, @@ -65,14 +62,10 @@ SOFTWARE. /*E_*/0x44,0x45,0x42,0x46,0x43,0x47,0x9C,0x48,0x54,0x51,0x52,0x53,0x58,0x55,0x56,0x57, /*F_*/0x8C,0x49,0xCD,0xCE,0xCB,0xCF,0xCC,0xE1,0x70,0xDD,0xDE,0xDB,0xDC,0x8D,0x8E,0xDF /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ -}; -# endif +}); /* Index is EBCDIC 1047 code point; value is ASCII platform equivalent */ -# ifndef DOINIT - EXTCONST U8 PL_e2a[256]; -# else - EXTCONST U8 PL_e2a[256] = { +EXTCONST U8 PL_e2a[256] INIT({ /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ /*0_*/0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F, /*1_*/0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F, @@ -91,15 +84,11 @@ SOFTWARE. /*E_*/0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5, /*F_*/0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ -}; -# endif +}); /* (Confusingly named) Index is EBCDIC 1047 I8 byte; value is * EBCDIC 1047 UTF-EBCDIC equivalent */ -# ifndef DOINIT - EXTCONST U8 PL_utf2e[256]; -# else - EXTCONST U8 PL_utf2e[256] = { +EXTCONST U8 PL_utf2e[256] INIT({ /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ /*0_*/0x00,0x01,0x02,0x03,0x37,0x2D,0x2E,0x2F,0x16,0x05,0x15,0x0B,0x0C,0x0D,0x0E,0x0F, /*1_*/0x10,0x11,0x12,0x13,0x3C,0x3D,0x32,0x26,0x18,0x19,0x3F,0x27,0x1C,0x1D,0x1E,0x1F, @@ -118,15 +107,11 @@ SOFTWARE. /*E_*/0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBE,0xBF,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,0xDA,0xDB, /*F_*/0xDC,0xDD,0xDE,0xDF,0xE1,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,0xFA,0xFB,0xFC,0xFD,0xFE /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ -}; -# endif +}); /* (Confusingly named) Index is EBCDIC 1047 UTF-EBCDIC byte; value is * EBCDIC 1047 I8 equivalent */ -# ifndef DOINIT - EXTCONST U8 PL_e2utf[256]; -# else - EXTCONST U8 PL_e2utf[256] = { +EXTCONST U8 PL_e2utf[256] INIT({ /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ /*0_*/0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F, /*1_*/0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F, @@ -145,17 +130,13 @@ SOFTWARE. /*E_*/0x5C,0xF4,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA, /*F_*/0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xFB,0xFC,0xFD,0xFE,0xFF,0x9F /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ -}; -# endif +}); /* Index is EBCDIC 1047 UTF-EBCDIC byte; value is UTF8SKIP for start bytes * (including for overlongs); 1 for continuation. Adapted from the shadow * flags table in tr16. The entries marked 9 in tr16 are continuation bytes * and are marked as length 1 here so that we can recover. */ -# ifndef DOINIT - EXTCONST U8 PL_utf8skip[256]; -# else - EXTCONST U8 PL_utf8skip[256] = { +EXTCONST U8 PL_utf8skip[256] INIT({ /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ /*0_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*1_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -174,14 +155,10 @@ SOFTWARE. /*E_*/ 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 4, 5, 5, 5, /*F_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 6, 6, 7, 14, 1 /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ -}; -# endif +}); /* Index is EBCDIC 1047 code point; value is its lowercase equivalent */ -# ifndef DOINIT - EXTCONST U8 PL_latin1_lc[256]; -# else - EXTCONST U8 PL_latin1_lc[256] = { +EXTCONST U8 PL_latin1_lc[256] INIT({ /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ /*0_*/0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, /*1_*/0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, @@ -200,16 +177,12 @@ SOFTWARE. /*E_*/0xE0,0xE1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xEA,0xCB,0xCC,0xCD,0xCE,0xCF, /*F_*/0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xDB,0xDC,0xDD,0xDE,0xFF /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ -}; -# endif +}); /* Index is EBCDIC 1047 code point; value is its uppercase equivalent. * The 'mod' in the name means that codepoints whose uppercase is above 255 or * longer than 1 character map to LATIN SMALL LETTER Y WITH DIARESIS */ -# ifndef DOINIT - EXTCONST U8 PL_mod_latin1_uc[256]; -# else - EXTCONST U8 PL_mod_latin1_uc[256] = { +EXTCONST U8 PL_mod_latin1_uc[256] INIT({ /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ /*0_*/0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, /*1_*/0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, @@ -228,15 +201,11 @@ SOFTWARE. /*E_*/0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, /*F_*/0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ -}; -# endif +}); /* Index is EBCDIC 1047 code point; For A-Z, value is a-z; for a-z, value * is A-Z; all other code points map to themselves */ -# ifndef DOINIT - EXTCONST U8 PL_fold[256]; -# else - EXTCONST U8 PL_fold[256] = { +EXTCONST U8 PL_fold[256] INIT({ /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ /*0_*/0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, /*1_*/0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, @@ -255,16 +224,12 @@ SOFTWARE. /*E_*/0xE0,0xE1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, /*F_*/0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ -}; -# endif +}); /* Index is EBCDIC 1047 code point; value is its other fold-pair equivalent * (A => a; a => A, etc) in the 0-255 range. If no such equivalent, value is * the code point itself */ -# ifndef DOINIT - EXTCONST U8 PL_fold_latin1[256]; -# else - EXTCONST U8 PL_fold_latin1[256] = { +EXTCONST U8 PL_fold_latin1[256] INIT({ /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ /*0_*/0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, /*1_*/0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, @@ -283,8 +248,7 @@ SOFTWARE. /*E_*/0xE0,0xE1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xEA,0xCB,0xCC,0xCD,0xCE,0xCF, /*F_*/0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xDB,0xDC,0xDD,0xDE,0xFF /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ -}; -# endif +}); /* The table below is adapted from @@ -292,10 +256,7 @@ SOFTWARE. * See copyright notice at the beginning of this file. */ -# ifndef DOINIT - EXTCONST U8 PL_extended_utf8_dfa_tab[416]; -# else - EXTCONST U8 PL_extended_utf8_dfa_tab[416] = { +EXTCONST U8 PL_extended_utf8_dfa_tab[416] INIT({ /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ /*0_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*1_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -324,8 +285,7 @@ SOFTWARE. /*N8=128*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 64, 64, 64, 1, 1, 1, 1, /*N9=144*/ 1, 1, 1, 1, 1, 1, 1, 1, 80, 80, 80, 80, 1, 1, 1, 1 /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15*/ -}; -# endif +}); /* The table below is adapted from @@ -333,10 +293,7 @@ SOFTWARE. * See copyright notice at the beginning of this file. */ -# ifndef DOINIT - EXTCONST U16 PL_strict_utf8_dfa_tab[624]; -# else - EXTCONST U16 PL_strict_utf8_dfa_tab[624] = { +EXTCONST U16 PL_strict_utf8_dfa_tab[624] INIT({ /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ /*0_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*1_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -371,8 +328,7 @@ SOFTWARE. /*N14=322*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,345, /*N15=345*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,299 /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22*/ -}; -# endif +}); /* The table below is adapted from @@ -380,10 +336,7 @@ SOFTWARE. * See copyright notice at the beginning of this file. */ -# ifndef DOINIT - EXTCONST U8 PL_c9_utf8_dfa_tab[368]; -# else - EXTCONST U8 PL_c9_utf8_dfa_tab[368] = { +EXTCONST U8 PL_c9_utf8_dfa_tab[368] INIT({ /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ /*0_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*1_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -410,8 +363,7 @@ SOFTWARE. /*N6=84*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 42, 42, 42, /*N7=98*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 42, 1, 1, 1, 1 /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13*/ -}; -# endif +}); #endif /* EBCDIC 1047 */ @@ -421,10 +373,7 @@ SOFTWARE. && '$' == 91 && '@' == 124 && '`' == 121 && '\n' == 37 /* Index is ASCII platform code point; value is EBCDIC 037 equivalent */ -# ifndef DOINIT - EXTCONST U8 PL_a2e[256]; -# else - EXTCONST U8 PL_a2e[256] = { +EXTCONST U8 PL_a2e[256] INIT({ /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ /*0_*/0x00,0x01,0x02,0x03,0x37,0x2D,0x2E,0x2F,0x16,0x05,0x25,0x0B,0x0C,0x0D,0x0E,0x0F, /*1_*/0x10,0x11,0x12,0x13,0x3C,0x3D,0x32,0x26,0x18,0x19,0x3F,0x27,0x1C,0x1D,0x1E,0x1F, @@ -443,14 +392,10 @@ SOFTWARE. /*E_*/0x44,0x45,0x42,0x46,0x43,0x47,0x9C,0x48,0x54,0x51,0x52,0x53,0x58,0x55,0x56,0x57, /*F_*/0x8C,0x49,0xCD,0xCE,0xCB,0xCF,0xCC,0xE1,0x70,0xDD,0xDE,0xDB,0xDC,0x8D,0x8E,0xDF /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ -}; -# endif +}); /* Index is EBCDIC 037 code point; value is ASCII platform equivalent */ -# ifndef DOINIT - EXTCONST U8 PL_e2a[256]; -# else - EXTCONST U8 PL_e2a[256] = { +EXTCONST U8 PL_e2a[256] INIT({ /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ /*0_*/0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F, /*1_*/0x10,0x11,0x12,0x13,0x9D,0x85,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F, @@ -469,15 +414,11 @@ SOFTWARE. /*E_*/0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5, /*F_*/0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ -}; -# endif +}); /* (Confusingly named) Index is EBCDIC 037 I8 byte; value is * EBCDIC 037 UTF-EBCDIC equivalent */ -# ifndef DOINIT - EXTCONST U8 PL_utf2e[256]; -# else - EXTCONST U8 PL_utf2e[256] = { +EXTCONST U8 PL_utf2e[256] INIT({ /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ /*0_*/0x00,0x01,0x02,0x03,0x37,0x2D,0x2E,0x2F,0x16,0x05,0x25,0x0B,0x0C,0x0D,0x0E,0x0F, /*1_*/0x10,0x11,0x12,0x13,0x3C,0x3D,0x32,0x26,0x18,0x19,0x3F,0x27,0x1C,0x1D,0x1E,0x1F, @@ -496,15 +437,11 @@ SOFTWARE. /*E_*/0xB6,0xB7,0xB8,0xB9,0xBC,0xBD,0xBE,0xBF,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,0xDA,0xDB, /*F_*/0xDC,0xDD,0xDE,0xDF,0xE1,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,0xFA,0xFB,0xFC,0xFD,0xFE /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ -}; -# endif +}); /* (Confusingly named) Index is EBCDIC 037 UTF-EBCDIC byte; value is * EBCDIC 037 I8 equivalent */ -# ifndef DOINIT - EXTCONST U8 PL_e2utf[256]; -# else - EXTCONST U8 PL_e2utf[256] = { +EXTCONST U8 PL_e2utf[256] INIT({ /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ /*0_*/0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F, /*1_*/0x10,0x11,0x12,0x13,0x9D,0x85,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F, @@ -523,17 +460,13 @@ SOFTWARE. /*E_*/0x5C,0xF4,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA, /*F_*/0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xFB,0xFC,0xFD,0xFE,0xFF,0x9F /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ -}; -# endif +}); /* Index is EBCDIC 037 UTF-EBCDIC byte; value is UTF8SKIP for start bytes * (including for overlongs); 1 for continuation. Adapted from the shadow * flags table in tr16. The entries marked 9 in tr16 are continuation bytes * and are marked as length 1 here so that we can recover. */ -# ifndef DOINIT - EXTCONST U8 PL_utf8skip[256]; -# else - EXTCONST U8 PL_utf8skip[256] = { +EXTCONST U8 PL_utf8skip[256] INIT({ /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ /*0_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*1_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -552,14 +485,10 @@ SOFTWARE. /*E_*/ 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 4, 5, 5, 5, /*F_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 6, 6, 7, 14, 1 /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ -}; -# endif +}); /* Index is EBCDIC 037 code point; value is its lowercase equivalent */ -# ifndef DOINIT - EXTCONST U8 PL_latin1_lc[256]; -# else - EXTCONST U8 PL_latin1_lc[256] = { +EXTCONST U8 PL_latin1_lc[256] INIT({ /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ /*0_*/0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, /*1_*/0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, @@ -578,16 +507,12 @@ SOFTWARE. /*E_*/0xE0,0xE1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xEA,0xCB,0xCC,0xCD,0xCE,0xCF, /*F_*/0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xDB,0xDC,0xDD,0xDE,0xFF /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ -}; -# endif +}); /* Index is EBCDIC 037 code point; value is its uppercase equivalent. * The 'mod' in the name means that codepoints whose uppercase is above 255 or * longer than 1 character map to LATIN SMALL LETTER Y WITH DIARESIS */ -# ifndef DOINIT - EXTCONST U8 PL_mod_latin1_uc[256]; -# else - EXTCONST U8 PL_mod_latin1_uc[256] = { +EXTCONST U8 PL_mod_latin1_uc[256] INIT({ /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ /*0_*/0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, /*1_*/0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, @@ -606,15 +531,11 @@ SOFTWARE. /*E_*/0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, /*F_*/0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ -}; -# endif +}); /* Index is EBCDIC 037 code point; For A-Z, value is a-z; for a-z, value * is A-Z; all other code points map to themselves */ -# ifndef DOINIT - EXTCONST U8 PL_fold[256]; -# else - EXTCONST U8 PL_fold[256] = { +EXTCONST U8 PL_fold[256] INIT({ /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ /*0_*/0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, /*1_*/0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, @@ -633,16 +554,12 @@ SOFTWARE. /*E_*/0xE0,0xE1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, /*F_*/0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ -}; -# endif +}); /* Index is EBCDIC 037 code point; value is its other fold-pair equivalent * (A => a; a => A, etc) in the 0-255 range. If no such equivalent, value is * the code point itself */ -# ifndef DOINIT - EXTCONST U8 PL_fold_latin1[256]; -# else - EXTCONST U8 PL_fold_latin1[256] = { +EXTCONST U8 PL_fold_latin1[256] INIT({ /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ /*0_*/0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, /*1_*/0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, @@ -661,8 +578,7 @@ SOFTWARE. /*E_*/0xE0,0xE1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xEA,0xCB,0xCC,0xCD,0xCE,0xCF, /*F_*/0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xDB,0xDC,0xDD,0xDE,0xFF /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ -}; -# endif +}); /* The table below is adapted from @@ -670,10 +586,7 @@ SOFTWARE. * See copyright notice at the beginning of this file. */ -# ifndef DOINIT - EXTCONST U8 PL_extended_utf8_dfa_tab[416]; -# else - EXTCONST U8 PL_extended_utf8_dfa_tab[416] = { +EXTCONST U8 PL_extended_utf8_dfa_tab[416] INIT({ /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ /*0_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*1_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -702,8 +615,7 @@ SOFTWARE. /*N8=128*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 64, 64, 64, 1, 1, 1, 1, /*N9=144*/ 1, 1, 1, 1, 1, 1, 1, 1, 80, 80, 80, 80, 1, 1, 1, 1 /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15*/ -}; -# endif +}); /* The table below is adapted from @@ -711,10 +623,7 @@ SOFTWARE. * See copyright notice at the beginning of this file. */ -# ifndef DOINIT - EXTCONST U16 PL_strict_utf8_dfa_tab[624]; -# else - EXTCONST U16 PL_strict_utf8_dfa_tab[624] = { +EXTCONST U16 PL_strict_utf8_dfa_tab[624] INIT({ /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ /*0_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*1_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -749,8 +658,7 @@ SOFTWARE. /*N14=322*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,345, /*N15=345*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,299 /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22*/ -}; -# endif +}); /* The table below is adapted from @@ -758,10 +666,7 @@ SOFTWARE. * See copyright notice at the beginning of this file. */ -# ifndef DOINIT - EXTCONST U8 PL_c9_utf8_dfa_tab[368]; -# else - EXTCONST U8 PL_c9_utf8_dfa_tab[368] = { +EXTCONST U8 PL_c9_utf8_dfa_tab[368] INIT({ /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ /*0_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*1_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -788,8 +693,7 @@ SOFTWARE. /*N6=84*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 42, 42, 42, /*N7=98*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 42, 1, 1, 1, 1 /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13*/ -}; -# endif +}); #endif /* EBCDIC 037 */ diff --git a/regen/ebcdic.pl b/regen/ebcdic.pl index bb5c3abe99b8..735a4d06cc50 100644 --- a/regen/ebcdic.pl +++ b/regen/ebcdic.pl @@ -59,17 +59,11 @@ ($$$;$) # Anything locale related will be written on my $const = ($name !~ /locale/i) ? 'CONST' : ""; - my $declaration = "EXT$const $TYPE $name\[$size\]"; - print $out_fh < Date: Sat, 20 Sep 2025 09:27:47 -0600 Subject: [PATCH 05/14] regnodes.h: Use INIT(), not DOINIT By changing its regenerating program regcomp.pl --- regen/regcomp.pl | 60 ++++++++-------------------------- regnodes.h | 83 ++++++++++++------------------------------------ 2 files changed, 34 insertions(+), 109 deletions(-) diff --git a/regen/regcomp.pl b/regen/regcomp.pl index 889aadf42ca8..e51df5387500 100644 --- a/regen/regcomp.pl +++ b/regen/regcomp.pl @@ -256,21 +256,10 @@ sub process_flags { return $comment . <<"EOP"; #define REGNODE_\U$varname\E(node) (PL_${varname}_bitmask[(node) >> 3] & (1 << ((node) & 7))) -#ifndef DOINIT -EXTCONST U8 PL_${varname}\[] __attribute__deprecated__; -#else -EXTCONST U8 PL_${varname}\[] __attribute__deprecated__ = { - $out_string -}; -#endif /* DOINIT */ - -#ifndef DOINIT -EXTCONST U8 PL_${varname}_bitmask[]; -#else -EXTCONST U8 PL_${varname}_bitmask[] = { - $out_mask -}; -#endif /* DOINIT */ +EXTCONST U8 PL_${varname}\[] __attribute__deprecated__ +INIT({ $out_string }); + +EXTCONST U8 PL_${varname}_bitmask[] INIT({ $out_mask }); EOP } @@ -307,13 +296,8 @@ sub print_process_EXACTish { /* Do only UTF-8 target strings match 'op', known to be of type EXACT? */ #define isEXACT_REQ8(op) (assert(REGNODE_TYPE(op) == EXACT), (PL_EXACT_REQ8_bitmask & (1U << (op - EXACT)))) -#ifndef DOINIT -EXTCONST U32 PL_EXACTFish_bitmask; -EXTCONST U32 PL_EXACT_REQ8_bitmask; -#else -EXTCONST U32 PL_EXACTFish_bitmask = 0x$exactf; -EXTCONST U32 PL_EXACT_REQ8_bitmask = 0x$req8; -#endif /* DOINIT */ +EXTCONST U32 PL_EXACTFish_bitmask INIT(0x$exactf); +EXTCONST U32 PL_EXACT_REQ8_bitmask INIT(0x$req8); EOP } @@ -528,10 +512,7 @@ sub print_regnode_info { /* PL_regnode_info[] - Opcode/state names in string form, for debugging */ -#ifndef DOINIT -EXTCONST struct regnode_meta PL_regnode_info[]; -#else -EXTCONST struct regnode_meta PL_regnode_info[] = { +EXTCONST struct regnode_meta PL_regnode_info[] INIT( { EOP my @fields= qw(type arg_len arg_len_varies off_by_arg); foreach my $node_idx (0..$#all) { @@ -560,8 +541,7 @@ sub print_regnode_info { } print $out <> 3] & (1 << ((node) & 7))) -#ifndef DOINIT -EXTCONST U8 PL_varies[] __attribute__deprecated__; -#else -EXTCONST U8 PL_varies[] __attribute__deprecated__ = { - CLUMP, BRANCH, STAR, PLUS, CURLY, CURLYN, CURLYM, CURLYX, WHILEM, REF, +EXTCONST U8 PL_varies[] __attribute__deprecated__ +INIT({ CLUMP, BRANCH, STAR, PLUS, CURLY, CURLYN, CURLYM, CURLYX, WHILEM, REF, REFF, REFFL, REFFU, REFFA, REFN, REFFN, REFFLN, REFFUN, REFFAN, BRANCHJ, SUSPEND, IFTHEN, - 0 -}; -#endif /* DOINIT */ - -#ifndef DOINIT -EXTCONST U8 PL_varies_bitmask[]; -#else -EXTCONST U8 PL_varies_bitmask[] = { - 0x00, 0x00, 0x00, 0x00, 0x80, 0x01, 0x00, 0xC0, 0x1F, 0xFE, 0x97, 0x01, 0x00, 0x00 -}; -#endif /* DOINIT */ + 0 }); + +EXTCONST U8 PL_varies_bitmask[] INIT({ 0x00, 0x00, 0x00, 0x00, 0x80, 0x01, 0x00, 0xC0, 0x1F, 0xFE, 0x97, 0x01, 0x00, 0x00 }); /* The following always have a length of 1. U8 we can do strchr() on it. */ /* (Note that length 1 means "one character" under UTF8, not "one octet".) */ #define REGNODE_SIMPLE(node) (PL_simple_bitmask[(node) >> 3] & (1 << ((node) & 7))) -#ifndef DOINIT -EXTCONST U8 PL_simple[] __attribute__deprecated__; -#else -EXTCONST U8 PL_simple[] __attribute__deprecated__ = { - REG_ANY, SANY, ANYOF, ANYOFD, ANYOFL, ANYOFPOSIXL, ANYOFH, ANYOFHb, +EXTCONST U8 PL_simple[] __attribute__deprecated__ +INIT({ REG_ANY, SANY, ANYOF, ANYOFD, ANYOFL, ANYOFPOSIXL, ANYOFH, ANYOFHb, ANYOFHr, ANYOFHs, ANYOFR, ANYOFRb, ANYOFHbbm, ANYOFM, NANYOFM, POSIXD, POSIXL, POSIXU, POSIXA, NPOSIXD, NPOSIXL, NPOSIXU, NPOSIXA, REGEX_SET, - 0 -}; -#endif /* DOINIT */ - -#ifndef DOINIT -EXTCONST U8 PL_simple_bitmask[]; -#else -EXTCONST U8 PL_simple_bitmask[] = { - 0x00, 0x00, 0xFF, 0xFF, 0x7F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 -}; -#endif /* DOINIT */ + 0 }); + +EXTCONST U8 PL_simple_bitmask[] INIT({ 0x00, 0x00, 0xFF, 0xFF, 0x7F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }); /* Is 'op', known to be of type EXACT, folding? */ #define isEXACTFish(op) (assert(REGNODE_TYPE(op) == EXACT), (PL_EXACTFish_bitmask & (1U << (op - EXACT)))) @@ -2967,13 +2929,8 @@ EXTCONST U8 PL_simple_bitmask[] = { /* Do only UTF-8 target strings match 'op', known to be of type EXACT? */ #define isEXACT_REQ8(op) (assert(REGNODE_TYPE(op) == EXACT), (PL_EXACT_REQ8_bitmask & (1U << (op - EXACT)))) -#ifndef DOINIT -EXTCONST U32 PL_EXACTFish_bitmask; -EXTCONST U32 PL_EXACT_REQ8_bitmask; -#else -EXTCONST U32 PL_EXACTFish_bitmask = 0x33F8; -EXTCONST U32 PL_EXACT_REQ8_bitmask = 0x1E00; -#endif /* DOINIT */ +EXTCONST U32 PL_EXACTFish_bitmask INIT(0x33F8); +EXTCONST U32 PL_EXACT_REQ8_bitmask INIT(0x1E00); #endif /* defined(PERL_CORE) || defined(PERL_EXT_RE_BUILD) */ From 612950d2bdbaeaee684a4c5647c1f596301b45f2 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 20 Sep 2025 10:16:19 -0600 Subject: [PATCH 06/14] PL_phase_names: Use INIT(), not DOINIT --- perl.h | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/perl.h b/perl.h index 3ecc19d4141e..5ee217a8f577 100644 --- a/perl.h +++ b/perl.h @@ -5879,19 +5879,16 @@ enum perl_phase { PERL_PHASE_DESTRUCT = 6 }; -#ifdef DOINIT -EXTCONST char *const PL_phase_names[] = { - "CONSTRUCT", - "START", - "CHECK", - "INIT", - "RUN", - "END", - "DESTRUCT" -}; -#else -EXTCONST char *const PL_phase_names[]; -#endif +EXTCONST char *const PL_phase_names[] INIT( { + "CONSTRUCT", + "START", + "CHECK", + "INIT", + "RUN", + "END", + "DESTRUCT" + } + ); /* =for apidoc_section $utility From 16fc6120557ea5348d94d1b6579f8a686dc3f32d Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 20 Sep 2025 10:19:42 -0600 Subject: [PATCH 07/14] Intialize PL_block_type with INIT(), not DOINIT --- perl.h | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/perl.h b/perl.h index 5ee217a8f577..5b029b5763d6 100644 --- a/perl.h +++ b/perl.h @@ -5725,26 +5725,23 @@ EXTCONST unsigned char PL_mod_latin1_uc[]; /* Although only used for debugging, these constants must be available in * non-debugging builds too, since they're used in ext/re/re_exec.c, * which has DEBUGGING enabled always */ -#ifdef DOINIT -EXTCONST char* const PL_block_type[] = { - "NULL", - "WHEN", - "BLOCK", - "GIVEN", - "LOOP_ARY", - "LOOP_LAZYSV", - "LOOP_LAZYIV", - "LOOP_LIST", - "LOOP_PLAIN", - "SUB", - "FORMAT", - "EVAL", - "SUBST", - "DEFER" -}; -#else -EXTCONST char* PL_block_type[]; -#endif +EXTCONST char* const PL_block_type[] INIT({ + "NULL", + "WHEN", + "BLOCK", + "GIVEN", + "LOOP_ARY", + "LOOP_LAZYSV", + "LOOP_LAZYIV", + "LOOP_LIST", + "LOOP_PLAIN", + "SUB", + "FORMAT", + "EVAL", + "SUBST", + "DEFER" + } + ); /* These are all the compile time options that affect binary compatibility. Other compile time options that are binary compatible are in perl.c From 6f7463285f77d91c6177d8a15d5946444693d78b Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 20 Sep 2025 10:26:56 -0600 Subject: [PATCH 08/14] valid_types, deBruijn tables: Initialize with INIT() And avoid the more clumsy DOINITs --- perl.h | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/perl.h b/perl.h index 5b029b5763d6..91ba26295cfd 100644 --- a/perl.h +++ b/perl.h @@ -6315,49 +6315,39 @@ EXTCONST U8 PL_magic_data[256] = EXTCONST U8 PL_magic_data[256]; #endif -#ifdef DOINIT /* NL IV NV PV INV PI PN MG RX GV LV AV HV CV FM IO OBJ */ EXTCONST bool -PL_valid_types_IVX[] = { 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0 }; +PL_valid_types_IVX[] INIT({ 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0 }); EXTCONST bool -PL_valid_types_NVX[] = { 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0 }; +PL_valid_types_NVX[] INIT({ 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0 }); EXTCONST bool -PL_valid_types_PVX[] = { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0 }; +PL_valid_types_PVX[] INIT({ 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0 }); EXTCONST bool -PL_valid_types_RV[] = { 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0 }; +PL_valid_types_RV[] INIT({ 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0 }); EXTCONST bool -PL_valid_types_IV_set[] = { 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0 }; +PL_valid_types_IV_set[] + INIT({ 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0 }); EXTCONST bool -PL_valid_types_NV_set[] = { 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0 }; +PL_valid_types_NV_set[] + INIT({ 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0 }); EXTCONST U8 -PL_deBruijn_bitpos_tab32[] = { +PL_deBruijn_bitpos_tab32[] +INIT({ /* https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn */ 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 -}; +}); EXTCONST U8 -PL_deBruijn_bitpos_tab64[] = { +PL_deBruijn_bitpos_tab64[] +INIT({ /* https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers */ 63, 0, 58, 1, 59, 47, 53, 2, 60, 39, 48, 27, 54, 33, 42, 3, 61, 51, 37, 40, 49, 18, 28, 20, 55, 30, 34, 11, 43, 14, 22, 4, 62, 57, 46, 52, 38, 26, 32, 41, 50, 36, 17, 19, 29, 10, 13, 21, 56, 45, 25, 31, 35, 16, 9, 12, 44, 24, 15, 8, 23, 7, 6, 5 -}; - -#else - -EXTCONST bool PL_valid_types_IVX[]; -EXTCONST bool PL_valid_types_NVX[]; -EXTCONST bool PL_valid_types_PVX[]; -EXTCONST bool PL_valid_types_RV[]; -EXTCONST bool PL_valid_types_IV_set[]; -EXTCONST bool PL_valid_types_NV_set[]; -EXTCONST U8 PL_deBruijn_bitpos_tab32[]; -EXTCONST U8 PL_deBruijn_bitpos_tab64[]; - -#endif +}); /* The constants for using PL_deBruijn_bitpos_tab */ #define PERL_deBruijnMagic32_ 0x077CB531 From 35ac45f306d1525c1a03ed538f868d88757f6fdd Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 20 Sep 2025 10:30:39 -0600 Subject: [PATCH 09/14] PL_magic_data: Initialize with INIT(), not DOINIT --- perl.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/perl.h b/perl.h index 91ba26295cfd..f8d51366b5e2 100644 --- a/perl.h +++ b/perl.h @@ -6307,13 +6307,12 @@ EXTCONST runops_proc_t PL_runops_dbg #include "mg_vtable.h" +EXTCONST U8 PL_magic_data[256] #ifdef DOINIT -EXTCONST U8 PL_magic_data[256] = += # include "mg_data.h" -; -#else -EXTCONST U8 PL_magic_data[256]; #endif +; /* NL IV NV PV INV PI PN MG RX GV LV AV HV CV FM IO OBJ */ EXTCONST bool From 6b416f8db946634ddcbb4e0d5d3a042791283ac2 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 20 Sep 2025 10:31:13 -0600 Subject: [PATCH 10/14] perl.h: Change commented-out-code to use INIT() and not DOINIT --- perl.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/perl.h b/perl.h index f8d51366b5e2..b0da26c43eaf 100644 --- a/perl.h +++ b/perl.h @@ -6554,10 +6554,9 @@ SOFTWARE. */ -# ifdef DOINIT -# if 0 /* This is the original table given in +# if 0 /* This is the original table given in https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ */ -static U8 utf8d_C9[] = { +static U8 utf8d_C9[] INIT( { /* The first part of the table maps bytes to character classes that * to reduce the size of the transition table and create bitmasks. */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /*-1F*/ @@ -6576,9 +6575,10 @@ static U8 utf8d_C9[] = { 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,12,12,12,12,12 -}; +}); -# endif +# endif +# ifdef DOINIT /* This is a version of the above table customized for Perl that doesn't * exclude surrogates and accepts start bytes up through FD (FE on 64-bit From f6f870a55e25f0f01165790b0dde2921f3b5aaae Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 20 Sep 2025 11:26:51 -0600 Subject: [PATCH 11/14] opcode.h: Initialize with INIT(), not DOINIT by changing regen/opcode.pl --- opcode.h | 35 ++++++++++------------------------- regen/opcode.pl | 35 ++++++++++------------------------- 2 files changed, 20 insertions(+), 50 deletions(-) diff --git a/opcode.h b/opcode.h index e28266519ed7..ec329481d5a2 100644 --- a/opcode.h +++ b/opcode.h @@ -2473,25 +2473,12 @@ END_EXTERN_C #define OPpCONST_TOKEN_PACKAGE 0xc0 START_EXTERN_C -#ifndef DOINIT - -/* data about the flags in op_private */ - -EXTCONST I16 PL_op_private_bitdef_ix[]; -EXTCONST U16 PL_op_private_bitdefs[]; -EXTCONST char PL_op_private_labels[]; -EXTCONST I16 PL_op_private_bitfields[]; -EXTCONST U8 PL_op_private_valid[]; - -#else - - /* PL_op_private_labels[]: the short descriptions of private flags. * All labels are concatenated into a single char array * (separated by \0's) for compactness. */ -EXTCONST char PL_op_private_labels[] = { +EXTCONST char PL_op_private_labels[] INIT( { '$','M','O','D','\0', '+','1','\0', '-','\0', @@ -2606,7 +2593,7 @@ EXTCONST char PL_op_private_labels[] = { 'o','f','f','s','e','t','\0', 'r','a','n','g','e','\0', -}; +}); @@ -2620,7 +2607,7 @@ EXTCONST char PL_op_private_labels[] = { * -1 */ -EXTCONST I16 PL_op_private_bitfields[] = { +EXTCONST I16 PL_op_private_bitfields[] INIT( { 0, 8, -1, 0, 8, -1, 0, 727, 1, 554, 2, 71, 3, 298, -1, @@ -2634,13 +2621,13 @@ EXTCONST I16 PL_op_private_bitfields[] = { 4, -1, 0, 706, 1, 39, 2, 324, 3, 131, -1, 6, 721, 1, 463, 2, 246, 3, 596, -1, -}; +}); /* PL_op_private_bitdef_ix[]: map an op number to a starting position * in PL_op_private_bitdefs. If -1, the op has no bits defined */ -EXTCONST I16 PL_op_private_bitdef_ix[] = { +EXTCONST I16 PL_op_private_bitdef_ix[] INIT( { -1, /* null */ -1, /* stub */ 0, /* scalar */ @@ -3071,7 +3058,7 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { 194, /* paramtest */ 0, /* paramstore */ -}; +}); @@ -3087,7 +3074,7 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { * into PL_op_private_bitfields[] (for a bit field) */ -EXTCONST U16 PL_op_private_bitdefs[] = { +EXTCONST U16 PL_op_private_bitdefs[] INIT( { 0x0003, /* scalar, prototype, refgen, srefgen, readline, regcmaybe, regcreset, regcomp, substcont, chop, schop, defined, study, preinc, i_preinc, predec, i_predec, postinc, i_postinc, postdec, i_postdec, not, ucfirst, lcfirst, uc, lc, quotemeta, aeach, avalues, each, pop, shift, grepstart, anywhile, mapstart, mapwhile, range, and, or, dor, andassign, orassign, dorassign, argcheck, entergiven, leavegiven, enterwhen, leavewhen, untie, tied, dbmclose, getsockname, getpeername, lstat, stat, readlink, readdir, telldir, rewinddir, closedir, localtime, alarm, require, dofile, entertry, ghbyname, gnbyname, gpbyname, shostent, snetent, sprotoent, sservent, gpwnam, gpwuid, ggrnam, ggrgid, lock, once, fc, anonconst, cmpchain_and, cmpchain_dup, entertrycatch, catch, is_bool, is_weak, weaken, unweaken, is_tainted, multiparam, paramstore */ 0x3cfc, 0x54f9, /* pushmark */ 0x00bd, /* wantarray, runcv */ @@ -3172,13 +3159,13 @@ EXTCONST U16 PL_op_private_bitdefs[] = { 0x301c, 0x4fd8, 0x0003, /* methstart */ 0x3308, 0x3164, 0x0003, /* initfield */ -}; +}); /* PL_op_private_valid: for each op, indexed by op_type, indicate which * flags bits in op_private are legal */ -EXTCONST U8 PL_op_private_valid[] = { +EXTCONST U8 PL_op_private_valid[] INIT( { /* NULL */ (0xff), /* STUB */ (0), /* SCALAR */ (OPpARG1_MASK), @@ -3609,9 +3596,7 @@ EXTCONST U8 PL_op_private_valid[] = { /* PARAMTEST */ (OPpARG1_MASK|OPpPARAM_IF_FALSE|OPpPARAM_IF_UNDEF), /* PARAMSTORE */ (OPpARG1_MASK), -}; - -#endif /* !DOINIT */ +}); END_EXTERN_C diff --git a/regen/opcode.pl b/regen/opcode.pl index 9d1df94410e8..9b6b0698eefc 100755 --- a/regen/opcode.pl +++ b/regen/opcode.pl @@ -823,27 +823,14 @@ sub print_PL_op_private_tables { print $fh < Date: Sat, 20 Sep 2025 12:37:14 -0600 Subject: [PATCH 12/14] utf8.c: Initialize UTF8SKIP[] with INIT(), not DOINIT --- utf8.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/utf8.h b/utf8.h index 0fe5036ae148..9dc21c922fc6 100644 --- a/utf8.h +++ b/utf8.h @@ -235,10 +235,7 @@ For details, see the description for L. START_EXTERN_C -# ifndef DOINIT -EXTCONST unsigned char PL_utf8skip[]; -# else -EXTCONST unsigned char PL_utf8skip[] = { +EXTCONST unsigned char PL_utf8skip[] INIT( { /* 0x00 */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* ascii */ /* 0x10 */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* ascii */ /* 0x20 */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* ascii */ @@ -260,8 +257,7 @@ EXTCONST unsigned char PL_utf8skip[] = { /* 0xFE */ 7, /* More extended, Up to 72 bits (64-bit + reserved) */ /* 0xFF */ ASCII_PLATFORM_UTF8_MAXBYTES -}; -# endif +}); END_EXTERN_C From 2b0963c25afcfcf2e3053cdca237c5057b6fca1b Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 20 Sep 2025 10:18:39 -0600 Subject: [PATCH 13/14] regen/mk_invlists.pl: Use INIT(), not DOINIT There are no tables currently generated by this file that are affected, however. --- charclass_invlists.inc | 2 +- lib/unicore/uni_keywords.pl | 2 +- regen/mk_invlists.pl | 13 ++++--------- regexp_constants.h | 2 +- uni_keywords.h | 2 +- 5 files changed, 8 insertions(+), 13 deletions(-) diff --git a/charclass_invlists.inc b/charclass_invlists.inc index 71cf9a36f2dd..855ae3d196f9 100644 --- a/charclass_invlists.inc +++ b/charclass_invlists.inc @@ -492662,5 +492662,5 @@ static const U8 WB_dfa_table[] = { * 8c30575264b2772c7a69c5bb6069a28f0e0a7a0df735871bde2d99ee674316ac lib/unicore/version * 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl * c7ff8e0d207d3538c7feb4a1a152b159e5e902d20293b303569ea8323e84633e regen/mk_PL_charclass.pl - * 33a5e583d836b8bb97b6b1d1c6d1766defe4cbdeb8bcbe865d76f71206762be9 regen/mk_invlists.pl + * 20a6e3d507a66f4594586485568134873485b08e23383f3dc4e6b3047569267b regen/mk_invlists.pl * ex: set ro ft=c: */ diff --git a/lib/unicore/uni_keywords.pl b/lib/unicore/uni_keywords.pl index 839891fd208b..320995ba1d3d 100644 --- a/lib/unicore/uni_keywords.pl +++ b/lib/unicore/uni_keywords.pl @@ -1428,5 +1428,5 @@ # 8c30575264b2772c7a69c5bb6069a28f0e0a7a0df735871bde2d99ee674316ac lib/unicore/version # 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl # c7ff8e0d207d3538c7feb4a1a152b159e5e902d20293b303569ea8323e84633e regen/mk_PL_charclass.pl -# 33a5e583d836b8bb97b6b1d1c6d1766defe4cbdeb8bcbe865d76f71206762be9 regen/mk_invlists.pl +# 20a6e3d507a66f4594586485568134873485b08e23383f3dc4e6b3047569267b regen/mk_invlists.pl # ex: set ro ft=perl: diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl index 6578c2d1afde..2d4a8ebbf35a 100644 --- a/regen/mk_invlists.pl +++ b/regen/mk_invlists.pl @@ -348,13 +348,7 @@ ($charset, $indent_level=undef) $in_doinit = 1; print $fh < Date: Sat, 20 Sep 2025 11:19:25 -0600 Subject: [PATCH 14/14] mg_vtable, Use INIT() for PL_magic_vtable_names I'm uncertain about the other DOINIT use in this file. --- mg_vtable.h | 8 ++------ regen/mg_vtable.pl | 8 ++------ 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/mg_vtable.h b/mg_vtable.h index 46a56f50f405..609037deb458 100644 --- a/mg_vtable.h +++ b/mg_vtable.h @@ -103,8 +103,7 @@ enum { /* pass one of these to get_vtbl */ magic_vtable_max }; -#ifdef DOINIT -EXTCONST char * const PL_magic_vtable_names[magic_vtable_max] = { +EXTCONST char * const PL_magic_vtable_names[magic_vtable_max] INIT( { "arylen", "arylen_p", "backref", @@ -141,10 +140,7 @@ EXTCONST char * const PL_magic_vtable_names[magic_vtable_max] = { "utf8", "uvar", "vec" -}; -#else -EXTCONST char * const PL_magic_vtable_names[magic_vtable_max]; -#endif +}); /* These all need to be 0, not NULL, as NULL can be (void*)0, which is a * pointer to data, whereas we're assigning pointers to functions, which are diff --git a/regen/mg_vtable.pl b/regen/mg_vtable.pl index e113464eace7..4e0434141721 100644 --- a/regen/mg_vtable.pl +++ b/regen/mg_vtable.pl @@ -480,13 +480,9 @@ BEGIN $want }; -#ifdef DOINIT -EXTCONST char * const PL_magic_vtable_names[magic_vtable_max] = { +EXTCONST char * const PL_magic_vtable_names[magic_vtable_max] INIT( { "$names" -}; -#else -EXTCONST char * const PL_magic_vtable_names[magic_vtable_max]; -#endif +}); EOH }