2020#include <crypto/sha3.h>
2121#include <asm/unaligned.h>
2222
23+ /*
24+ * On some 32-bit architectures (mn10300 and h8300), GCC ends up using
25+ * over 1 KB of stack if we inline the round calculation into the loop
26+ * in keccakf(). On the other hand, on 64-bit architectures with plenty
27+ * of [64-bit wide] general purpose registers, not inlining it severely
28+ * hurts performance. So let's use 64-bitness as a heuristic to decide
29+ * whether to inline or not.
30+ */
31+ #ifdef CONFIG_64BIT
32+ #define SHA3_INLINE inline
33+ #else
34+ #define SHA3_INLINE noinline
35+ #endif
36+
2337#define KECCAK_ROUNDS 24
2438
2539static const u64 keccakf_rndc [24 ] = {
@@ -35,111 +49,115 @@ static const u64 keccakf_rndc[24] = {
3549
3650/* update the state with given number of rounds */
3751
38- static void __attribute__(( __optimize__ ( "O3" ))) keccakf (u64 st [25 ])
52+ static SHA3_INLINE void keccakf_round (u64 st [25 ])
3953{
4054 u64 t [5 ], tt , bc [5 ];
41- int round ;
4255
43- for (round = 0 ; round < KECCAK_ROUNDS ; round ++ ) {
56+ /* Theta */
57+ bc [0 ] = st [0 ] ^ st [5 ] ^ st [10 ] ^ st [15 ] ^ st [20 ];
58+ bc [1 ] = st [1 ] ^ st [6 ] ^ st [11 ] ^ st [16 ] ^ st [21 ];
59+ bc [2 ] = st [2 ] ^ st [7 ] ^ st [12 ] ^ st [17 ] ^ st [22 ];
60+ bc [3 ] = st [3 ] ^ st [8 ] ^ st [13 ] ^ st [18 ] ^ st [23 ];
61+ bc [4 ] = st [4 ] ^ st [9 ] ^ st [14 ] ^ st [19 ] ^ st [24 ];
62+
63+ t [0 ] = bc [4 ] ^ rol64 (bc [1 ], 1 );
64+ t [1 ] = bc [0 ] ^ rol64 (bc [2 ], 1 );
65+ t [2 ] = bc [1 ] ^ rol64 (bc [3 ], 1 );
66+ t [3 ] = bc [2 ] ^ rol64 (bc [4 ], 1 );
67+ t [4 ] = bc [3 ] ^ rol64 (bc [0 ], 1 );
68+
69+ st [0 ] ^= t [0 ];
70+
71+ /* Rho Pi */
72+ tt = st [1 ];
73+ st [ 1 ] = rol64 (st [ 6 ] ^ t [1 ], 44 );
74+ st [ 6 ] = rol64 (st [ 9 ] ^ t [4 ], 20 );
75+ st [ 9 ] = rol64 (st [22 ] ^ t [2 ], 61 );
76+ st [22 ] = rol64 (st [14 ] ^ t [4 ], 39 );
77+ st [14 ] = rol64 (st [20 ] ^ t [0 ], 18 );
78+ st [20 ] = rol64 (st [ 2 ] ^ t [2 ], 62 );
79+ st [ 2 ] = rol64 (st [12 ] ^ t [2 ], 43 );
80+ st [12 ] = rol64 (st [13 ] ^ t [3 ], 25 );
81+ st [13 ] = rol64 (st [19 ] ^ t [4 ], 8 );
82+ st [19 ] = rol64 (st [23 ] ^ t [3 ], 56 );
83+ st [23 ] = rol64 (st [15 ] ^ t [0 ], 41 );
84+ st [15 ] = rol64 (st [ 4 ] ^ t [4 ], 27 );
85+ st [ 4 ] = rol64 (st [24 ] ^ t [4 ], 14 );
86+ st [24 ] = rol64 (st [21 ] ^ t [1 ], 2 );
87+ st [21 ] = rol64 (st [ 8 ] ^ t [3 ], 55 );
88+ st [ 8 ] = rol64 (st [16 ] ^ t [1 ], 45 );
89+ st [16 ] = rol64 (st [ 5 ] ^ t [0 ], 36 );
90+ st [ 5 ] = rol64 (st [ 3 ] ^ t [3 ], 28 );
91+ st [ 3 ] = rol64 (st [18 ] ^ t [3 ], 21 );
92+ st [18 ] = rol64 (st [17 ] ^ t [2 ], 15 );
93+ st [17 ] = rol64 (st [11 ] ^ t [1 ], 10 );
94+ st [11 ] = rol64 (st [ 7 ] ^ t [2 ], 6 );
95+ st [ 7 ] = rol64 (st [10 ] ^ t [0 ], 3 );
96+ st [10 ] = rol64 ( tt ^ t [1 ], 1 );
97+
98+ /* Chi */
99+ bc [ 0 ] = ~st [ 1 ] & st [ 2 ];
100+ bc [ 1 ] = ~st [ 2 ] & st [ 3 ];
101+ bc [ 2 ] = ~st [ 3 ] & st [ 4 ];
102+ bc [ 3 ] = ~st [ 4 ] & st [ 0 ];
103+ bc [ 4 ] = ~st [ 0 ] & st [ 1 ];
104+ st [ 0 ] ^= bc [ 0 ];
105+ st [ 1 ] ^= bc [ 1 ];
106+ st [ 2 ] ^= bc [ 2 ];
107+ st [ 3 ] ^= bc [ 3 ];
108+ st [ 4 ] ^= bc [ 4 ];
109+
110+ bc [ 0 ] = ~st [ 6 ] & st [ 7 ];
111+ bc [ 1 ] = ~st [ 7 ] & st [ 8 ];
112+ bc [ 2 ] = ~st [ 8 ] & st [ 9 ];
113+ bc [ 3 ] = ~st [ 9 ] & st [ 5 ];
114+ bc [ 4 ] = ~st [ 5 ] & st [ 6 ];
115+ st [ 5 ] ^= bc [ 0 ];
116+ st [ 6 ] ^= bc [ 1 ];
117+ st [ 7 ] ^= bc [ 2 ];
118+ st [ 8 ] ^= bc [ 3 ];
119+ st [ 9 ] ^= bc [ 4 ];
120+
121+ bc [ 0 ] = ~st [11 ] & st [12 ];
122+ bc [ 1 ] = ~st [12 ] & st [13 ];
123+ bc [ 2 ] = ~st [13 ] & st [14 ];
124+ bc [ 3 ] = ~st [14 ] & st [10 ];
125+ bc [ 4 ] = ~st [10 ] & st [11 ];
126+ st [10 ] ^= bc [ 0 ];
127+ st [11 ] ^= bc [ 1 ];
128+ st [12 ] ^= bc [ 2 ];
129+ st [13 ] ^= bc [ 3 ];
130+ st [14 ] ^= bc [ 4 ];
131+
132+ bc [ 0 ] = ~st [16 ] & st [17 ];
133+ bc [ 1 ] = ~st [17 ] & st [18 ];
134+ bc [ 2 ] = ~st [18 ] & st [19 ];
135+ bc [ 3 ] = ~st [19 ] & st [15 ];
136+ bc [ 4 ] = ~st [15 ] & st [16 ];
137+ st [15 ] ^= bc [ 0 ];
138+ st [16 ] ^= bc [ 1 ];
139+ st [17 ] ^= bc [ 2 ];
140+ st [18 ] ^= bc [ 3 ];
141+ st [19 ] ^= bc [ 4 ];
142+
143+ bc [ 0 ] = ~st [21 ] & st [22 ];
144+ bc [ 1 ] = ~st [22 ] & st [23 ];
145+ bc [ 2 ] = ~st [23 ] & st [24 ];
146+ bc [ 3 ] = ~st [24 ] & st [20 ];
147+ bc [ 4 ] = ~st [20 ] & st [21 ];
148+ st [20 ] ^= bc [ 0 ];
149+ st [21 ] ^= bc [ 1 ];
150+ st [22 ] ^= bc [ 2 ];
151+ st [23 ] ^= bc [ 3 ];
152+ st [24 ] ^= bc [ 4 ];
153+ }
44154
45- /* Theta */
46- bc [0 ] = st [0 ] ^ st [5 ] ^ st [10 ] ^ st [15 ] ^ st [20 ];
47- bc [1 ] = st [1 ] ^ st [6 ] ^ st [11 ] ^ st [16 ] ^ st [21 ];
48- bc [2 ] = st [2 ] ^ st [7 ] ^ st [12 ] ^ st [17 ] ^ st [22 ];
49- bc [3 ] = st [3 ] ^ st [8 ] ^ st [13 ] ^ st [18 ] ^ st [23 ];
50- bc [4 ] = st [4 ] ^ st [9 ] ^ st [14 ] ^ st [19 ] ^ st [24 ];
51-
52- t [0 ] = bc [4 ] ^ rol64 (bc [1 ], 1 );
53- t [1 ] = bc [0 ] ^ rol64 (bc [2 ], 1 );
54- t [2 ] = bc [1 ] ^ rol64 (bc [3 ], 1 );
55- t [3 ] = bc [2 ] ^ rol64 (bc [4 ], 1 );
56- t [4 ] = bc [3 ] ^ rol64 (bc [0 ], 1 );
57-
58- st [0 ] ^= t [0 ];
59-
60- /* Rho Pi */
61- tt = st [1 ];
62- st [ 1 ] = rol64 (st [ 6 ] ^ t [1 ], 44 );
63- st [ 6 ] = rol64 (st [ 9 ] ^ t [4 ], 20 );
64- st [ 9 ] = rol64 (st [22 ] ^ t [2 ], 61 );
65- st [22 ] = rol64 (st [14 ] ^ t [4 ], 39 );
66- st [14 ] = rol64 (st [20 ] ^ t [0 ], 18 );
67- st [20 ] = rol64 (st [ 2 ] ^ t [2 ], 62 );
68- st [ 2 ] = rol64 (st [12 ] ^ t [2 ], 43 );
69- st [12 ] = rol64 (st [13 ] ^ t [3 ], 25 );
70- st [13 ] = rol64 (st [19 ] ^ t [4 ], 8 );
71- st [19 ] = rol64 (st [23 ] ^ t [3 ], 56 );
72- st [23 ] = rol64 (st [15 ] ^ t [0 ], 41 );
73- st [15 ] = rol64 (st [ 4 ] ^ t [4 ], 27 );
74- st [ 4 ] = rol64 (st [24 ] ^ t [4 ], 14 );
75- st [24 ] = rol64 (st [21 ] ^ t [1 ], 2 );
76- st [21 ] = rol64 (st [ 8 ] ^ t [3 ], 55 );
77- st [ 8 ] = rol64 (st [16 ] ^ t [1 ], 45 );
78- st [16 ] = rol64 (st [ 5 ] ^ t [0 ], 36 );
79- st [ 5 ] = rol64 (st [ 3 ] ^ t [3 ], 28 );
80- st [ 3 ] = rol64 (st [18 ] ^ t [3 ], 21 );
81- st [18 ] = rol64 (st [17 ] ^ t [2 ], 15 );
82- st [17 ] = rol64 (st [11 ] ^ t [1 ], 10 );
83- st [11 ] = rol64 (st [ 7 ] ^ t [2 ], 6 );
84- st [ 7 ] = rol64 (st [10 ] ^ t [0 ], 3 );
85- st [10 ] = rol64 ( tt ^ t [1 ], 1 );
86-
87- /* Chi */
88- bc [ 0 ] = ~st [ 1 ] & st [ 2 ];
89- bc [ 1 ] = ~st [ 2 ] & st [ 3 ];
90- bc [ 2 ] = ~st [ 3 ] & st [ 4 ];
91- bc [ 3 ] = ~st [ 4 ] & st [ 0 ];
92- bc [ 4 ] = ~st [ 0 ] & st [ 1 ];
93- st [ 0 ] ^= bc [ 0 ];
94- st [ 1 ] ^= bc [ 1 ];
95- st [ 2 ] ^= bc [ 2 ];
96- st [ 3 ] ^= bc [ 3 ];
97- st [ 4 ] ^= bc [ 4 ];
98-
99- bc [ 0 ] = ~st [ 6 ] & st [ 7 ];
100- bc [ 1 ] = ~st [ 7 ] & st [ 8 ];
101- bc [ 2 ] = ~st [ 8 ] & st [ 9 ];
102- bc [ 3 ] = ~st [ 9 ] & st [ 5 ];
103- bc [ 4 ] = ~st [ 5 ] & st [ 6 ];
104- st [ 5 ] ^= bc [ 0 ];
105- st [ 6 ] ^= bc [ 1 ];
106- st [ 7 ] ^= bc [ 2 ];
107- st [ 8 ] ^= bc [ 3 ];
108- st [ 9 ] ^= bc [ 4 ];
109-
110- bc [ 0 ] = ~st [11 ] & st [12 ];
111- bc [ 1 ] = ~st [12 ] & st [13 ];
112- bc [ 2 ] = ~st [13 ] & st [14 ];
113- bc [ 3 ] = ~st [14 ] & st [10 ];
114- bc [ 4 ] = ~st [10 ] & st [11 ];
115- st [10 ] ^= bc [ 0 ];
116- st [11 ] ^= bc [ 1 ];
117- st [12 ] ^= bc [ 2 ];
118- st [13 ] ^= bc [ 3 ];
119- st [14 ] ^= bc [ 4 ];
120-
121- bc [ 0 ] = ~st [16 ] & st [17 ];
122- bc [ 1 ] = ~st [17 ] & st [18 ];
123- bc [ 2 ] = ~st [18 ] & st [19 ];
124- bc [ 3 ] = ~st [19 ] & st [15 ];
125- bc [ 4 ] = ~st [15 ] & st [16 ];
126- st [15 ] ^= bc [ 0 ];
127- st [16 ] ^= bc [ 1 ];
128- st [17 ] ^= bc [ 2 ];
129- st [18 ] ^= bc [ 3 ];
130- st [19 ] ^= bc [ 4 ];
131-
132- bc [ 0 ] = ~st [21 ] & st [22 ];
133- bc [ 1 ] = ~st [22 ] & st [23 ];
134- bc [ 2 ] = ~st [23 ] & st [24 ];
135- bc [ 3 ] = ~st [24 ] & st [20 ];
136- bc [ 4 ] = ~st [20 ] & st [21 ];
137- st [20 ] ^= bc [ 0 ];
138- st [21 ] ^= bc [ 1 ];
139- st [22 ] ^= bc [ 2 ];
140- st [23 ] ^= bc [ 3 ];
141- st [24 ] ^= bc [ 4 ];
155+ static void __optimize ("O3" ) keccakf (u64 st [25 ])
156+ {
157+ int round ;
142158
159+ for (round = 0 ; round < KECCAK_ROUNDS ; round ++ ) {
160+ keccakf_round (st );
143161 /* Iota */
144162 st [0 ] ^= keccakf_rndc [round ];
145163 }
0 commit comments