Skip to content

Commit 895be15

Browse files
nikai3dherbertx
authored andcommitted
crypto: cast5 - simplify if-statements
I noticed that by factoring out common rounds from the branches of the if-statements in the encryption and decryption functions, the executable file size goes down significantly, for crypto/cast5.ko from 26688 bytes to 24336 bytes (amd64). On my test system, I saw a slight speedup. This is the first time I'm doing such a benchmark - I found a similar one on the crypto mailing list, and I hope I did it right? Before: # cryptsetup create dm-test /dev/hda2 -c cast5-cbc-plain -s 128 Passsatz eingeben: # dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50 52428800 Bytes (52 MB) kopiert, 2,43484 s, 21,5 MB/s # dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50 52428800 Bytes (52 MB) kopiert, 2,4089 s, 21,8 MB/s # dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50 52428800 Bytes (52 MB) kopiert, 2,41091 s, 21,7 MB/s After: # cryptsetup create dm-test /dev/hda2 -c cast5-cbc-plain -s 128 Passsatz eingeben: # dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50 52428800 Bytes (52 MB) kopiert, 2,38128 s, 22,0 MB/s # dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50 52428800 Bytes (52 MB) kopiert, 2,29486 s, 22,8 MB/s # dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50 52428800 Bytes (52 MB) kopiert, 2,37162 s, 22,1 MB/s Signed-off-by: Nicolas Kaiser <[email protected]> Signed-off-by: Herbert Xu <[email protected]>
1 parent 90246e7 commit 895be15

File tree

1 file changed

+24
-50
lines changed

1 file changed

+24
-50
lines changed

crypto/cast5.c

Lines changed: 24 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -604,36 +604,23 @@ static void cast5_encrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
604604
* Rounds 3, 6, 9, 12, and 15 use f function Type 3.
605605
*/
606606

607+
t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
608+
t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
609+
t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
610+
t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
611+
t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
612+
t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
613+
t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
614+
t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
615+
t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
616+
t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
617+
t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
618+
t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
607619
if (!(c->rr)) {
608-
t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
609-
t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
610-
t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
611-
t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
612-
t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
613-
t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
614-
t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
615-
t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
616-
t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
617-
t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
618-
t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
619-
t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
620620
t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]);
621621
t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]);
622622
t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]);
623623
t = l; l = r; r = t ^ F1(r, Km[15], Kr[15]);
624-
} else {
625-
t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
626-
t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
627-
t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
628-
t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
629-
t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
630-
t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
631-
t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
632-
t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
633-
t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
634-
t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
635-
t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
636-
t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
637624
}
638625

639626
/* c1...c64 <-- (R16,L16). (Exchange final blocks L16, R16 and
@@ -663,32 +650,19 @@ static void cast5_decrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
663650
t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]);
664651
t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]);
665652
t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]);
666-
t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
667-
t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
668-
t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
669-
t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
670-
t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
671-
t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
672-
t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
673-
t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
674-
t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
675-
t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
676-
t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
677-
t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
678-
} else {
679-
t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
680-
t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
681-
t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
682-
t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
683-
t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
684-
t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
685-
t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
686-
t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
687-
t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
688-
t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
689-
t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
690-
t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
691653
}
654+
t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
655+
t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
656+
t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
657+
t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
658+
t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
659+
t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
660+
t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
661+
t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
662+
t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
663+
t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
664+
t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
665+
t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
692666

693667
dst[0] = cpu_to_be32(r);
694668
dst[1] = cpu_to_be32(l);

0 commit comments

Comments
 (0)