Skip to content

Commit 1250ae4

Browse files
Replaced recppush/regcppop with memcpy
Basically tested with Intel VTune to be increasing the performance of a perl regex matching program with multiple capture groups and recursive patterns
1 parent fd4fc0f commit 1250ae4

File tree

2 files changed

+54
-41
lines changed

2 files changed

+54
-41
lines changed

AUTHORS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ Alexander D'Archangel <[email protected]>
6767
Alexander Foken
6868
Alexander Gernler <[email protected]>
6969
Alexander Gough <[email protected]>
70+
Alexander Nikolov <[email protected]>
7071
Alexander Hartmaier <[email protected]>
7172
Alexander Klimov <[email protected]>
7273
Alexander Smishlajev <[email protected]>

regexec.c

Lines changed: 53 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,6 @@ static void S_setup_eval_state(pTHX_ regmatch_info *const reginfo);
218218
static void S_cleanup_regmatch_info_aux(pTHX_ void *arg);
219219
static regmatch_state * S_push_slab(pTHX);
220220

221-
#define REGCP_PAREN_ELEMS 3
222221
#define REGCP_OTHER_ELEMS 3
223222
#define REGCP_FRAME_ELEMS 1
224223
/* REGCP_FRAME_ELEMS are not part of the REGCP_OTHER_ELEMS and
@@ -228,8 +227,9 @@ STATIC CHECKPOINT
228227
S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxopenparen _pDEPTH)
229228
{
230229
const int retval = PL_savestack_ix;
231-
const int paren_elems_to_push =
232-
(maxopenparen - parenfloor) * REGCP_PAREN_ELEMS;
230+
const SSize_t paren_bytes_to_push = sizeof(*rex->offs) * (maxopenparen - parenfloor);
231+
/* Number of savestack[] entries to be filled by the paren data */
232+
const int paren_elems_to_push = (paren_bytes_to_push + sizeof(*PL_savestack) - 1) / sizeof(*PL_savestack);
233233
const UV total_elems = paren_elems_to_push + REGCP_OTHER_ELEMS;
234234
const UV elems_shifted = total_elems << SAVE_TIGHT_SHIFT;
235235
I32 p;
@@ -238,9 +238,9 @@ S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxopenparen _pDEPTH)
238238
PERL_ARGS_ASSERT_REGCPPUSH;
239239

240240
if (paren_elems_to_push < 0)
241-
Perl_croak(aTHX_ "panic: paren_elems_to_push, %i < 0, maxopenparen: %i parenfloor: %i REGCP_PAREN_ELEMS: %u",
241+
Perl_croak(aTHX_ "panic: paren_elems_to_push, %i < 0, maxopenparen: %i parenfloor: %i",
242242
(int)paren_elems_to_push, (int)maxopenparen,
243-
(int)parenfloor, (unsigned)REGCP_PAREN_ELEMS);
243+
(int)parenfloor);
244244

245245
if ((elems_shifted >> SAVE_TIGHT_SHIFT) != total_elems)
246246
Perl_croak(aTHX_ "panic: paren_elems_to_push offset %" UVuf
@@ -249,31 +249,35 @@ S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxopenparen _pDEPTH)
249249
(unsigned long)maxopenparen,
250250
(long)parenfloor);
251251

252-
SSGROW(total_elems + REGCP_FRAME_ELEMS);
253-
254252
DEBUG_BUFFERS_r(
255253
if ((int)maxopenparen > (int)parenfloor)
256-
Perl_re_exec_indentf( aTHX_
254+
Perl_re_exec_indentf(aTHX_
257255
"rex=0x%" UVxf " offs=0x%" UVxf ": saving capture indices:\n",
258256
depth,
259257
PTR2UV(rex),
260258
PTR2UV(rex->offs)
261259
);
262260
);
263-
for (p = parenfloor+1; p <= (I32)maxopenparen; p++) {
264-
/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
265-
SSPUSHIV(rex->offs[p].end);
266-
SSPUSHIV(rex->offs[p].start);
267-
SSPUSHINT(rex->offs[p].start_tmp);
268-
DEBUG_BUFFERS_r(Perl_re_exec_indentf( aTHX_
269-
" \\%" UVuf ": %" IVdf "(%" IVdf ")..%" IVdf "\n",
270-
depth,
271-
(UV)p,
272-
(IV)rex->offs[p].start,
273-
(IV)rex->offs[p].start_tmp,
274-
(IV)rex->offs[p].end
275-
));
276-
}
261+
262+
SSGROW(total_elems + REGCP_FRAME_ELEMS);
263+
264+
/* memcpy the offs inside the stack - it's faster than for loop */
265+
memcpy(&PL_savestack[PL_savestack_ix], rex->offs + parenfloor + 1, paren_bytes_to_push);
266+
PL_savestack_ix += paren_elems_to_push;
267+
268+
DEBUG_BUFFERS_r(
269+
for (p = parenfloor + 1; p <= (I32)maxopenparen; p++) {
270+
Perl_re_exec_indentf(aTHX_
271+
" \\%" UVuf ": %" IVdf "(%" IVdf ")..%" IVdf "\n",
272+
depth,
273+
(UV)p,
274+
(IV)rex->offs[p].start,
275+
(IV)rex->offs[p].start_tmp,
276+
(IV)rex->offs[p].end
277+
);
278+
}
279+
);
280+
277281
/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
278282
SSPUSHINT(maxopenparen);
279283
SSPUSHINT(rex->lastparen);
@@ -366,25 +370,33 @@ S_regcppop(pTHX_ regexp *rex, U32 *maxopenparen_p _pDEPTH)
366370
PTR2UV(rex->offs)
367371
);
368372
);
369-
paren = *maxopenparen_p;
370-
for ( ; i > 0; i -= REGCP_PAREN_ELEMS) {
371-
SSize_t tmps;
372-
rex->offs[paren].start_tmp = SSPOPINT;
373-
rex->offs[paren].start = SSPOPIV;
374-
tmps = SSPOPIV;
375-
if (paren <= rex->lastparen)
376-
rex->offs[paren].end = tmps;
377-
DEBUG_BUFFERS_r( Perl_re_exec_indentf( aTHX_
378-
" \\%" UVuf ": %" IVdf "(%" IVdf ")..%" IVdf "%s\n",
379-
depth,
380-
(UV)paren,
381-
(IV)rex->offs[paren].start,
382-
(IV)rex->offs[paren].start_tmp,
383-
(IV)rex->offs[paren].end,
384-
(paren > rex->lastparen ? "(skipped)" : ""));
385-
);
386-
paren--;
387-
}
373+
/* substract remaining elements from the stack */
374+
PL_savestack_ix -= i;
375+
376+
/* static assert that offs struc size is not less than stack elem size */
377+
STATIC_ASSERT_STMT(sizeof(*rex->offs) >= sizeof(*PL_savestack));
378+
379+
/* calculate number of offs/capture groups stored */
380+
i = (i * sizeof(*PL_savestack)) / sizeof(*rex->offs);
381+
382+
/* calculate paren starting point */
383+
paren = *maxopenparen_p - i + 1;
384+
385+
/* restore them */
386+
memcpy(rex->offs + paren, &PL_savestack[PL_savestack_ix], i * sizeof(*rex->offs));
387+
388+
DEBUG_BUFFERS_r(
389+
for (; paren <= *maxopenparen_p; ++paren) {
390+
Perl_re_exec_indentf(aTHX_
391+
" \\%" UVuf ": %" IVdf "(%" IVdf ")..%" IVdf "%s\n",
392+
depth,
393+
(UV)paren,
394+
(IV)rex->offs[paren].start,
395+
(IV)rex->offs[paren].start_tmp,
396+
(IV)rex->offs[paren].end,
397+
(paren > rex->lastparen ? "(skipped)" : ""));
398+
}
399+
);
388400
#if 1
389401
/* It would seem that the similar code in regtry()
390402
* already takes care of this, and in fact it is in

0 commit comments

Comments
 (0)