Skip to content

Commit 554bfec

Browse files
committed
parisc: Fix access fault handling in pa_memcpy()
pa_memcpy() is the major memcpy implementation in the parisc kernel which is used to do any kind of userspace/kernel memory copies. Al Viro noticed various bugs in the implementation of pa_mempcy(), most notably that in case of faults it may report back to have copied more bytes than it actually did. Fixing those bugs is quite hard in the C-implementation, because the compiler is messing around with the registers and we are not guaranteed that specific variables are always in the same processor registers. This makes proper fault handling complicated. This patch implements pa_memcpy() in assembler. That way we have correct fault handling and adding a 64-bit copy routine was quite easy. Runtime tested with 32- and 64bit kernels. Reported-by: Al Viro <[email protected]> Cc: <[email protected]> # v4.9+ Signed-off-by: John David Anglin <[email protected]> Signed-off-by: Helge Deller <[email protected]>
1 parent fe82203 commit 554bfec

File tree

2 files changed

+321
-458
lines changed

2 files changed

+321
-458
lines changed

arch/parisc/lib/lusercopy.S

Lines changed: 318 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
* Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org>
66
* Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr>
77
* Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org>
8+
* Copyright (C) 2017 Helge Deller <[email protected]>
9+
* Copyright (C) 2017 John David Anglin <[email protected]>
810
*
911
*
1012
* This program is free software; you can redistribute it and/or modify
@@ -132,4 +134,320 @@ ENDPROC_CFI(lstrnlen_user)
132134

133135
.procend
134136

137+
138+
139+
/*
140+
* unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
141+
*
142+
* Inputs:
143+
* - sr1 already contains space of source region
144+
* - sr2 already contains space of destination region
145+
*
146+
* Returns:
147+
* - number of bytes that could not be copied.
148+
* On success, this will be zero.
149+
*
150+
* This code is based on a C-implementation of a copy routine written by
151+
* Randolph Chung, which in turn was derived from the glibc.
152+
*
153+
* Several strategies are tried to try to get the best performance for various
154+
* conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes
155+
* at a time using general registers. Unaligned copies are handled either by
156+
* aligning the destination and then using shift-and-write method, or in a few
157+
* cases by falling back to a byte-at-a-time copy.
158+
*
159+
* Testing with various alignments and buffer sizes shows that this code is
160+
* often >10x faster than a simple byte-at-a-time copy, even for strangely
161+
* aligned operands. It is interesting to note that the glibc version of memcpy
162+
* (written in C) is actually quite fast already. This routine is able to beat
163+
* it by 30-40% for aligned copies because of the loop unrolling, but in some
164+
* cases the glibc version is still slightly faster. This lends more
165+
* credibility that gcc can generate very good code as long as we are careful.
166+
*
167+
* Possible optimizations:
168+
* - add cache prefetching
169+
* - try not to use the post-increment address modifiers; they may create
170+
* additional interlocks. Assumption is that those were only efficient on old
171+
* machines (pre PA8000 processors)
172+
*/
173+
174+
dst = arg0
175+
src = arg1
176+
len = arg2
177+
end = arg3
178+
t1 = r19
179+
t2 = r20
180+
t3 = r21
181+
t4 = r22
182+
srcspc = sr1
183+
dstspc = sr2
184+
185+
t0 = r1
186+
a1 = t1
187+
a2 = t2
188+
a3 = t3
189+
a0 = t4
190+
191+
save_src = ret0
192+
save_dst = ret1
193+
save_len = r31
194+
195+
ENTRY_CFI(pa_memcpy)
196+
.proc
197+
.callinfo NO_CALLS
198+
.entry
199+
200+
/* Last destination address */
201+
add dst,len,end
202+
203+
/* short copy with less than 16 bytes? */
204+
cmpib,>>=,n 15,len,.Lbyte_loop
205+
206+
/* same alignment? */
207+
xor src,dst,t0
208+
extru t0,31,2,t1
209+
cmpib,<>,n 0,t1,.Lunaligned_copy
210+
211+
#ifdef CONFIG_64BIT
212+
/* only do 64-bit copies if we can get aligned. */
213+
extru t0,31,3,t1
214+
cmpib,<>,n 0,t1,.Lalign_loop32
215+
216+
/* loop until we are 64-bit aligned */
217+
.Lalign_loop64:
218+
extru dst,31,3,t1
219+
cmpib,=,n 0,t1,.Lcopy_loop_16
220+
20: ldb,ma 1(srcspc,src),t1
221+
21: stb,ma t1,1(dstspc,dst)
222+
b .Lalign_loop64
223+
ldo -1(len),len
224+
225+
ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
226+
ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
227+
228+
ldi 31,t0
229+
.Lcopy_loop_16:
230+
cmpb,COND(>>=),n t0,len,.Lword_loop
231+
232+
10: ldd 0(srcspc,src),t1
233+
11: ldd 8(srcspc,src),t2
234+
ldo 16(src),src
235+
12: std,ma t1,8(dstspc,dst)
236+
13: std,ma t2,8(dstspc,dst)
237+
14: ldd 0(srcspc,src),t1
238+
15: ldd 8(srcspc,src),t2
239+
ldo 16(src),src
240+
16: std,ma t1,8(dstspc,dst)
241+
17: std,ma t2,8(dstspc,dst)
242+
243+
ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
244+
ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault)
245+
ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
246+
ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
247+
ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
248+
ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault)
249+
ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
250+
ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
251+
252+
b .Lcopy_loop_16
253+
ldo -32(len),len
254+
255+
.Lword_loop:
256+
cmpib,COND(>>=),n 3,len,.Lbyte_loop
257+
20: ldw,ma 4(srcspc,src),t1
258+
21: stw,ma t1,4(dstspc,dst)
259+
b .Lword_loop
260+
ldo -4(len),len
261+
262+
ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
263+
ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
264+
265+
#endif /* CONFIG_64BIT */
266+
267+
/* loop until we are 32-bit aligned */
268+
.Lalign_loop32:
269+
extru dst,31,2,t1
270+
cmpib,=,n 0,t1,.Lcopy_loop_4
271+
20: ldb,ma 1(srcspc,src),t1
272+
21: stb,ma t1,1(dstspc,dst)
273+
b .Lalign_loop32
274+
ldo -1(len),len
275+
276+
ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
277+
ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
278+
279+
280+
.Lcopy_loop_4:
281+
cmpib,COND(>>=),n 15,len,.Lbyte_loop
282+
283+
10: ldw 0(srcspc,src),t1
284+
11: ldw 4(srcspc,src),t2
285+
12: stw,ma t1,4(dstspc,dst)
286+
13: stw,ma t2,4(dstspc,dst)
287+
14: ldw 8(srcspc,src),t1
288+
15: ldw 12(srcspc,src),t2
289+
ldo 16(src),src
290+
16: stw,ma t1,4(dstspc,dst)
291+
17: stw,ma t2,4(dstspc,dst)
292+
293+
ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
294+
ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault)
295+
ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
296+
ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
297+
ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
298+
ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault)
299+
ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
300+
ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
301+
302+
b .Lcopy_loop_4
303+
ldo -16(len),len
304+
305+
.Lbyte_loop:
306+
cmpclr,COND(<>) len,%r0,%r0
307+
b,n .Lcopy_done
308+
20: ldb 0(srcspc,src),t1
309+
ldo 1(src),src
310+
21: stb,ma t1,1(dstspc,dst)
311+
b .Lbyte_loop
312+
ldo -1(len),len
313+
314+
ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
315+
ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
316+
317+
.Lcopy_done:
318+
bv %r0(%r2)
319+
sub end,dst,ret0
320+
321+
322+
/* src and dst are not aligned the same way. */
323+
/* need to go the hard way */
324+
.Lunaligned_copy:
325+
/* align until dst is 32bit-word-aligned */
326+
extru dst,31,2,t1
327+
cmpib,COND(=),n 0,t1,.Lcopy_dstaligned
328+
20: ldb 0(srcspc,src),t1
329+
ldo 1(src),src
330+
21: stb,ma t1,1(dstspc,dst)
331+
b .Lunaligned_copy
332+
ldo -1(len),len
333+
334+
ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
335+
ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
336+
337+
.Lcopy_dstaligned:
338+
339+
/* store src, dst and len in safe place */
340+
copy src,save_src
341+
copy dst,save_dst
342+
copy len,save_len
343+
344+
/* len now needs give number of words to copy */
345+
SHRREG len,2,len
346+
347+
/*
348+
* Copy from a not-aligned src to an aligned dst using shifts.
349+
* Handles 4 words per loop.
350+
*/
351+
352+
depw,z src,28,2,t0
353+
subi 32,t0,t0
354+
mtsar t0
355+
extru len,31,2,t0
356+
cmpib,= 2,t0,.Lcase2
357+
/* Make src aligned by rounding it down. */
358+
depi 0,31,2,src
359+
360+
cmpiclr,<> 3,t0,%r0
361+
b,n .Lcase3
362+
cmpiclr,<> 1,t0,%r0
363+
b,n .Lcase1
364+
.Lcase0:
365+
cmpb,= %r0,len,.Lcda_finish
366+
nop
367+
368+
1: ldw,ma 4(srcspc,src), a3
369+
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
370+
1: ldw,ma 4(srcspc,src), a0
371+
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
372+
b,n .Ldo3
373+
.Lcase1:
374+
1: ldw,ma 4(srcspc,src), a2
375+
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
376+
1: ldw,ma 4(srcspc,src), a3
377+
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
378+
ldo -1(len),len
379+
cmpb,=,n %r0,len,.Ldo0
380+
.Ldo4:
381+
1: ldw,ma 4(srcspc,src), a0
382+
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
383+
shrpw a2, a3, %sar, t0
384+
1: stw,ma t0, 4(dstspc,dst)
385+
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
386+
.Ldo3:
387+
1: ldw,ma 4(srcspc,src), a1
388+
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
389+
shrpw a3, a0, %sar, t0
390+
1: stw,ma t0, 4(dstspc,dst)
391+
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
392+
.Ldo2:
393+
1: ldw,ma 4(srcspc,src), a2
394+
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
395+
shrpw a0, a1, %sar, t0
396+
1: stw,ma t0, 4(dstspc,dst)
397+
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
398+
.Ldo1:
399+
1: ldw,ma 4(srcspc,src), a3
400+
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
401+
shrpw a1, a2, %sar, t0
402+
1: stw,ma t0, 4(dstspc,dst)
403+
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
404+
ldo -4(len),len
405+
cmpb,<> %r0,len,.Ldo4
406+
nop
407+
.Ldo0:
408+
shrpw a2, a3, %sar, t0
409+
1: stw,ma t0, 4(dstspc,dst)
410+
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
411+
412+
.Lcda_rdfault:
413+
.Lcda_finish:
414+
/* calculate new src, dst and len and jump to byte-copy loop */
415+
sub dst,save_dst,t0
416+
add save_src,t0,src
417+
b .Lbyte_loop
418+
sub save_len,t0,len
419+
420+
.Lcase3:
421+
1: ldw,ma 4(srcspc,src), a0
422+
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
423+
1: ldw,ma 4(srcspc,src), a1
424+
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
425+
b .Ldo2
426+
ldo 1(len),len
427+
.Lcase2:
428+
1: ldw,ma 4(srcspc,src), a1
429+
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
430+
1: ldw,ma 4(srcspc,src), a2
431+
ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
432+
b .Ldo1
433+
ldo 2(len),len
434+
435+
436+
/* fault exception fixup handlers: */
437+
#ifdef CONFIG_64BIT
438+
.Lcopy16_fault:
439+
10: b .Lcopy_done
440+
std,ma t1,8(dstspc,dst)
441+
ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
442+
#endif
443+
444+
.Lcopy8_fault:
445+
10: b .Lcopy_done
446+
stw,ma t1,4(dstspc,dst)
447+
ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
448+
449+
.exit
450+
ENDPROC_CFI(pa_memcpy)
451+
.procend
452+
135453
.end

0 commit comments

Comments
 (0)