-
Notifications
You must be signed in to change notification settings - Fork 172
Description
The newlib library included as part of the Zephyr SDK ARM toolchain contains unoptimised function implementations (e.g. memset, memcpy) that are normally used in the nano variant.
Since this newlib library is intended to be the full version (i.e. not nano), it should include the optimised function implementations, as the GNU ARM Embedded toolchain does.
This is especially important because the unoptimised implementations (in particular, memcpy) have been reported to be over 10 times slower than the optimised implementations, and this is simply an unacceptable level of performance.
Unoptimised memcpy implementation (gnuarmemb newlib nano / current Zephyr SDK newlib)
00000270 <memcpy>:
270: 440a add r2, r1
272: 4291 cmp r1, r2
274: f100 33ff add.w r3, r0, #4294967295 ; 0xffffffff
278: d100 bne.n 27c <memcpy+0xc>
27a: 4770 bx lr
27c: b510 push {r4, lr}
27e: f811 4b01 ldrb.w r4, [r1], #1
282: 4291 cmp r1, r2
284: f803 4f01 strb.w r4, [r3, #1]!
288: d1f9 bne.n 27e <memcpy+0xe>
28a: bd10 pop {r4, pc}
Optimised memcpy implementation (gnuarmemb newlib full / should be Zephyr SDK newlib)
000000ec <memcpy>:
ec: 4684 mov ip, r0
ee: ea41 0300 orr.w r3, r1, r0
f2: f013 0303 ands.w r3, r3, #3
f6: d149 bne.n 18c <_STRUCT_KERNEL_SIZE+0x64>
f8: 3a40 subs r2, #64 ; 0x40
fa: d323 bcc.n 144 <_STRUCT_KERNEL_SIZE+0x1c>
fc: 680b ldr r3, [r1, #0]
fe: 6003 str r3, [r0, #0]
100: 684b ldr r3, [r1, #4]
102: 6043 str r3, [r0, #4]
104: 688b ldr r3, [r1, #8]
106: 6083 str r3, [r0, #8]
108: 68cb ldr r3, [r1, #12]
10a: 60c3 str r3, [r0, #12]
10c: 690b ldr r3, [r1, #16]
10e: 6103 str r3, [r0, #16]
110: 694b ldr r3, [r1, #20]
112: 6143 str r3, [r0, #20]
114: 698b ldr r3, [r1, #24]
116: 6183 str r3, [r0, #24]
118: 69cb ldr r3, [r1, #28]
11a: 61c3 str r3, [r0, #28]
11c: 6a0b ldr r3, [r1, #32]
11e: 6203 str r3, [r0, #32]
120: 6a4b ldr r3, [r1, #36] ; 0x24
122: 6243 str r3, [r0, #36] ; 0x24
124: 6a8b ldr r3, [r1, #40] ; 0x28
126: 6283 str r3, [r0, #40] ; 0x28
128: 6acb ldr r3, [r1, #44] ; 0x2c
12a: 62c3 str r3, [r0, #44] ; 0x2c
12c: 6b0b ldr r3, [r1, #48] ; 0x30
12e: 6303 str r3, [r0, #48] ; 0x30
130: 6b4b ldr r3, [r1, #52] ; 0x34
132: 6343 str r3, [r0, #52] ; 0x34
134: 6b8b ldr r3, [r1, #56] ; 0x38
136: 6383 str r3, [r0, #56] ; 0x38
138: 6bcb ldr r3, [r1, #60] ; 0x3c
13a: 63c3 str r3, [r0, #60] ; 0x3c
13c: 3040 adds r0, #64 ; 0x40
13e: 3140 adds r1, #64 ; 0x40
140: 3a40 subs r2, #64 ; 0x40
142: d2db bcs.n fc <memcpy+0x10>
144: 3230 adds r2, #48 ; 0x30
146: d30b bcc.n 160 <_STRUCT_KERNEL_SIZE+0x38>
148: 680b ldr r3, [r1, #0]
14a: 6003 str r3, [r0, #0]
14c: 684b ldr r3, [r1, #4]
14e: 6043 str r3, [r0, #4]
150: 688b ldr r3, [r1, #8]
152: 6083 str r3, [r0, #8]
154: 68cb ldr r3, [r1, #12]
156: 60c3 str r3, [r0, #12]
158: 3010 adds r0, #16
15a: 3110 adds r1, #16
15c: 3a10 subs r2, #16
15e: d2f3 bcs.n 148 <_STRUCT_KERNEL_SIZE+0x20>
160: 320c adds r2, #12
162: d305 bcc.n 170 <_STRUCT_KERNEL_SIZE+0x48>
164: f851 3b04 ldr.w r3, [r1], #4
168: f840 3b04 str.w r3, [r0], #4
16c: 3a04 subs r2, #4
16e: d2f9 bcs.n 164 <_STRUCT_KERNEL_SIZE+0x3c>
170: 3204 adds r2, #4
172: d008 beq.n 186 <_STRUCT_KERNEL_SIZE+0x5e>
174: 07d2 lsls r2, r2, #31
176: bf1c itt ne
178: f811 3b01 ldrbne.w r3, [r1], #1
17c: f800 3b01 strbne.w r3, [r0], #1
180: d301 bcc.n 186 <_STRUCT_KERNEL_SIZE+0x5e>
182: 880b ldrh r3, [r1, #0]
184: 8003 strh r3, [r0, #0]
186: 4660 mov r0, ip
188: 4770 bx lr
18a: bf00 nop
18c: 2a08 cmp r2, #8
18e: d313 bcc.n 1b8 <_STRUCT_KERNEL_SIZE+0x90>
190: 078b lsls r3, r1, #30
192: d0b1 beq.n f8 <memcpy+0xc>
194: f010 0303 ands.w r3, r0, #3
198: d0ae beq.n f8 <memcpy+0xc>
19a: f1c3 0304 rsb r3, r3, #4
19e: 1ad2 subs r2, r2, r3
1a0: 07db lsls r3, r3, #31
1a2: bf1c itt ne
1a4: f811 3b01 ldrbne.w r3, [r1], #1
1a8: f800 3b01 strbne.w r3, [r0], #1
1ac: d3a4 bcc.n f8 <memcpy+0xc>
1ae: f831 3b02 ldrh.w r3, [r1], #2
1b2: f820 3b02 strh.w r3, [r0], #2
1b6: e79f b.n f8 <memcpy+0xc>
1b8: 3a04 subs r2, #4
1ba: d3d9 bcc.n 170 <_STRUCT_KERNEL_SIZE+0x48>
1bc: 3a01 subs r2, #1
1be: f811 3b01 ldrb.w r3, [r1], #1
1c2: f800 3b01 strb.w r3, [r0], #1
1c6: d2f9 bcs.n 1bc <_STRUCT_KERNEL_SIZE+0x94>
1c8: 780b ldrb r3, [r1, #0]
1ca: 7003 strb r3, [r0, #0]
1cc: 784b ldrb r3, [r1, #1]
1ce: 7043 strb r3, [r0, #1]
1d0: 788b ldrb r3, [r1, #2]
1d2: 7083 strb r3, [r0, #2]
1d4: 4660 mov r0, ip
1d6: 4770 bx lr
A further investigation into this issue is required.