Skip to content

Commit 7cceca8

Browse files
Ard Biesheuvelherbertx
authored andcommitted
crypto: arm64/aes - implement support for XTS ciphertext stealing
Add the missing support for ciphertext stealing in the implementation of AES-XTS, which is part of the XTS specification but was omitted up until now due to lack of a need for it. The asm helpers are updated so they can deal with any input size, as long as the last full block and the final partial block are presented at the same time. The glue code is updated so that the common case of operating on a sector or page is mostly as before. When CTS is needed, the walk is split up into two pieces, unless the entire input is covered by a single step. Signed-off-by: Ard Biesheuvel <[email protected]> Signed-off-by: Herbert Xu <[email protected]>
1 parent 7c9d65c commit 7cceca8

File tree

2 files changed

+195
-30
lines changed

2 files changed

+195
-30
lines changed

arch/arm64/crypto/aes-glue.c

Lines changed: 114 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,10 @@ asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
9090
int rounds, int blocks, u8 ctr[]);
9191

9292
asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[],
93-
int rounds, int blocks, u32 const rk2[], u8 iv[],
93+
int rounds, int bytes, u32 const rk2[], u8 iv[],
9494
int first);
9595
asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[],
96-
int rounds, int blocks, u32 const rk2[], u8 iv[],
96+
int rounds, int bytes, u32 const rk2[], u8 iv[],
9797
int first);
9898

9999
asmlinkage void aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[],
@@ -527,43 +527,144 @@ static int __maybe_unused xts_encrypt(struct skcipher_request *req)
527527
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
528528
struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
529529
int err, first, rounds = 6 + ctx->key1.key_length / 4;
530+
int tail = req->cryptlen % AES_BLOCK_SIZE;
531+
struct scatterlist sg_src[2], sg_dst[2];
532+
struct skcipher_request subreq;
533+
struct scatterlist *src, *dst;
530534
struct skcipher_walk walk;
531-
unsigned int blocks;
535+
536+
if (req->cryptlen < AES_BLOCK_SIZE)
537+
return -EINVAL;
532538

533539
err = skcipher_walk_virt(&walk, req, false);
534540

535-
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
541+
if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
542+
int xts_blocks = DIV_ROUND_UP(req->cryptlen,
543+
AES_BLOCK_SIZE) - 2;
544+
545+
skcipher_walk_abort(&walk);
546+
547+
skcipher_request_set_tfm(&subreq, tfm);
548+
skcipher_request_set_callback(&subreq,
549+
skcipher_request_flags(req),
550+
NULL, NULL);
551+
skcipher_request_set_crypt(&subreq, req->src, req->dst,
552+
xts_blocks * AES_BLOCK_SIZE,
553+
req->iv);
554+
req = &subreq;
555+
err = skcipher_walk_virt(&walk, req, false);
556+
} else {
557+
tail = 0;
558+
}
559+
560+
for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) {
561+
int nbytes = walk.nbytes;
562+
563+
if (walk.nbytes < walk.total)
564+
nbytes &= ~(AES_BLOCK_SIZE - 1);
565+
536566
kernel_neon_begin();
537567
aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
538-
ctx->key1.key_enc, rounds, blocks,
568+
ctx->key1.key_enc, rounds, nbytes,
539569
ctx->key2.key_enc, walk.iv, first);
540570
kernel_neon_end();
541-
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
571+
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
542572
}
543573

544-
return err;
574+
if (err || likely(!tail))
575+
return err;
576+
577+
dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
578+
if (req->dst != req->src)
579+
dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
580+
581+
skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
582+
req->iv);
583+
584+
err = skcipher_walk_virt(&walk, &subreq, false);
585+
if (err)
586+
return err;
587+
588+
kernel_neon_begin();
589+
aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
590+
ctx->key1.key_enc, rounds, walk.nbytes,
591+
ctx->key2.key_enc, walk.iv, first);
592+
kernel_neon_end();
593+
594+
return skcipher_walk_done(&walk, 0);
545595
}
546596

547597
static int __maybe_unused xts_decrypt(struct skcipher_request *req)
548598
{
549599
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
550600
struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
551601
int err, first, rounds = 6 + ctx->key1.key_length / 4;
602+
int tail = req->cryptlen % AES_BLOCK_SIZE;
603+
struct scatterlist sg_src[2], sg_dst[2];
604+
struct skcipher_request subreq;
605+
struct scatterlist *src, *dst;
552606
struct skcipher_walk walk;
553-
unsigned int blocks;
607+
608+
if (req->cryptlen < AES_BLOCK_SIZE)
609+
return -EINVAL;
554610

555611
err = skcipher_walk_virt(&walk, req, false);
556612

557-
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
613+
if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
614+
int xts_blocks = DIV_ROUND_UP(req->cryptlen,
615+
AES_BLOCK_SIZE) - 2;
616+
617+
skcipher_walk_abort(&walk);
618+
619+
skcipher_request_set_tfm(&subreq, tfm);
620+
skcipher_request_set_callback(&subreq,
621+
skcipher_request_flags(req),
622+
NULL, NULL);
623+
skcipher_request_set_crypt(&subreq, req->src, req->dst,
624+
xts_blocks * AES_BLOCK_SIZE,
625+
req->iv);
626+
req = &subreq;
627+
err = skcipher_walk_virt(&walk, req, false);
628+
} else {
629+
tail = 0;
630+
}
631+
632+
for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) {
633+
int nbytes = walk.nbytes;
634+
635+
if (walk.nbytes < walk.total)
636+
nbytes &= ~(AES_BLOCK_SIZE - 1);
637+
558638
kernel_neon_begin();
559639
aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
560-
ctx->key1.key_dec, rounds, blocks,
640+
ctx->key1.key_dec, rounds, nbytes,
561641
ctx->key2.key_enc, walk.iv, first);
562642
kernel_neon_end();
563-
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
643+
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
564644
}
565645

566-
return err;
646+
if (err || likely(!tail))
647+
return err;
648+
649+
dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
650+
if (req->dst != req->src)
651+
dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
652+
653+
skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
654+
req->iv);
655+
656+
err = skcipher_walk_virt(&walk, &subreq, false);
657+
if (err)
658+
return err;
659+
660+
661+
kernel_neon_begin();
662+
aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
663+
ctx->key1.key_dec, rounds, walk.nbytes,
664+
ctx->key2.key_enc, walk.iv, first);
665+
kernel_neon_end();
666+
667+
return skcipher_walk_done(&walk, 0);
567668
}
568669

569670
static struct skcipher_alg aes_algs[] = { {
@@ -644,6 +745,7 @@ static struct skcipher_alg aes_algs[] = { {
644745
.min_keysize = 2 * AES_MIN_KEY_SIZE,
645746
.max_keysize = 2 * AES_MAX_KEY_SIZE,
646747
.ivsize = AES_BLOCK_SIZE,
748+
.walksize = 2 * AES_BLOCK_SIZE,
647749
.setkey = xts_set_key,
648750
.encrypt = xts_encrypt,
649751
.decrypt = xts_decrypt,

arch/arm64/crypto/aes-modes.S

Lines changed: 81 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -413,10 +413,10 @@ AES_ENDPROC(aes_ctr_encrypt)
413413

414414

415415
/*
416+
* aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
417+
* int bytes, u8 const rk2[], u8 iv[], int first)
416418
* aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
417-
* int blocks, u8 const rk2[], u8 iv[], int first)
418-
* aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
419-
* int blocks, u8 const rk2[], u8 iv[], int first)
419+
* int bytes, u8 const rk2[], u8 iv[], int first)
420420
*/
421421

422422
.macro next_tweak, out, in, tmp
@@ -451,7 +451,7 @@ AES_ENTRY(aes_xts_encrypt)
451451
.LxtsencloopNx:
452452
next_tweak v4, v4, v8
453453
.LxtsencNx:
454-
subs w4, w4, #4
454+
subs w4, w4, #64
455455
bmi .Lxtsenc1x
456456
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
457457
next_tweak v5, v4, v8
@@ -468,33 +468,66 @@ AES_ENTRY(aes_xts_encrypt)
468468
eor v2.16b, v2.16b, v6.16b
469469
st1 {v0.16b-v3.16b}, [x0], #64
470470
mov v4.16b, v7.16b
471-
cbz w4, .Lxtsencout
471+
cbz w4, .Lxtsencret
472472
xts_reload_mask v8
473473
b .LxtsencloopNx
474474
.Lxtsenc1x:
475-
adds w4, w4, #4
475+
adds w4, w4, #64
476476
beq .Lxtsencout
477+
subs w4, w4, #16
478+
bmi .LxtsencctsNx
477479
.Lxtsencloop:
478-
ld1 {v1.16b}, [x1], #16
479-
eor v0.16b, v1.16b, v4.16b
480+
ld1 {v0.16b}, [x1], #16
481+
.Lxtsencctsout:
482+
eor v0.16b, v0.16b, v4.16b
480483
encrypt_block v0, w3, x2, x8, w7
481484
eor v0.16b, v0.16b, v4.16b
482-
st1 {v0.16b}, [x0], #16
483-
subs w4, w4, #1
484-
beq .Lxtsencout
485+
cbz w4, .Lxtsencout
486+
subs w4, w4, #16
485487
next_tweak v4, v4, v8
488+
bmi .Lxtsenccts
489+
st1 {v0.16b}, [x0], #16
486490
b .Lxtsencloop
487491
.Lxtsencout:
492+
st1 {v0.16b}, [x0]
493+
.Lxtsencret:
488494
st1 {v4.16b}, [x6]
489495
ldp x29, x30, [sp], #16
490496
ret
491-
AES_ENDPROC(aes_xts_encrypt)
492497

498+
.LxtsencctsNx:
499+
mov v0.16b, v3.16b
500+
sub x0, x0, #16
501+
.Lxtsenccts:
502+
adr_l x8, .Lcts_permute_table
503+
504+
add x1, x1, w4, sxtw /* rewind input pointer */
505+
add w4, w4, #16 /* # bytes in final block */
506+
add x9, x8, #32
507+
add x8, x8, x4
508+
sub x9, x9, x4
509+
add x4, x0, x4 /* output address of final block */
510+
511+
ld1 {v1.16b}, [x1] /* load final block */
512+
ld1 {v2.16b}, [x8]
513+
ld1 {v3.16b}, [x9]
514+
515+
tbl v2.16b, {v0.16b}, v2.16b
516+
tbx v0.16b, {v1.16b}, v3.16b
517+
st1 {v2.16b}, [x4] /* overlapping stores */
518+
mov w4, wzr
519+
b .Lxtsencctsout
520+
AES_ENDPROC(aes_xts_encrypt)
493521

494522
AES_ENTRY(aes_xts_decrypt)
495523
stp x29, x30, [sp, #-16]!
496524
mov x29, sp
497525

526+
/* subtract 16 bytes if we are doing CTS */
527+
sub w8, w4, #0x10
528+
tst w4, #0xf
529+
csel w4, w4, w8, eq
530+
498531
ld1 {v4.16b}, [x6]
499532
xts_load_mask v8
500533
cbz w7, .Lxtsdecnotfirst
@@ -509,7 +542,7 @@ AES_ENTRY(aes_xts_decrypt)
509542
.LxtsdecloopNx:
510543
next_tweak v4, v4, v8
511544
.LxtsdecNx:
512-
subs w4, w4, #4
545+
subs w4, w4, #64
513546
bmi .Lxtsdec1x
514547
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
515548
next_tweak v5, v4, v8
@@ -530,22 +563,52 @@ AES_ENTRY(aes_xts_decrypt)
530563
xts_reload_mask v8
531564
b .LxtsdecloopNx
532565
.Lxtsdec1x:
533-
adds w4, w4, #4
566+
adds w4, w4, #64
534567
beq .Lxtsdecout
568+
subs w4, w4, #16
535569
.Lxtsdecloop:
536-
ld1 {v1.16b}, [x1], #16
537-
eor v0.16b, v1.16b, v4.16b
570+
ld1 {v0.16b}, [x1], #16
571+
bmi .Lxtsdeccts
572+
.Lxtsdecctsout:
573+
eor v0.16b, v0.16b, v4.16b
538574
decrypt_block v0, w3, x2, x8, w7
539575
eor v0.16b, v0.16b, v4.16b
540576
st1 {v0.16b}, [x0], #16
541-
subs w4, w4, #1
542-
beq .Lxtsdecout
577+
cbz w4, .Lxtsdecout
578+
subs w4, w4, #16
543579
next_tweak v4, v4, v8
544580
b .Lxtsdecloop
545581
.Lxtsdecout:
546582
st1 {v4.16b}, [x6]
547583
ldp x29, x30, [sp], #16
548584
ret
585+
586+
.Lxtsdeccts:
587+
adr_l x8, .Lcts_permute_table
588+
589+
add x1, x1, w4, sxtw /* rewind input pointer */
590+
add w4, w4, #16 /* # bytes in final block */
591+
add x9, x8, #32
592+
add x8, x8, x4
593+
sub x9, x9, x4
594+
add x4, x0, x4 /* output address of final block */
595+
596+
next_tweak v5, v4, v8
597+
598+
ld1 {v1.16b}, [x1] /* load final block */
599+
ld1 {v2.16b}, [x8]
600+
ld1 {v3.16b}, [x9]
601+
602+
eor v0.16b, v0.16b, v5.16b
603+
decrypt_block v0, w3, x2, x8, w7
604+
eor v0.16b, v0.16b, v5.16b
605+
606+
tbl v2.16b, {v0.16b}, v2.16b
607+
tbx v0.16b, {v1.16b}, v3.16b
608+
609+
st1 {v2.16b}, [x4] /* overlapping stores */
610+
mov w4, wzr
611+
b .Lxtsdecctsout
549612
AES_ENDPROC(aes_xts_decrypt)
550613

551614
/*

0 commit comments

Comments
 (0)