11//! SIMD (Single Instruction; Multiple Data) convenience functions.
22//!
33//! May offer a potential boost in performance on some targets by performing
4- //! the same operations on multiple elements at once.
4+ //! the same operation on multiple elements at once.
55//!
66//! Some functions are known to not work on MIPS.
77
@@ -10,7 +10,6 @@ const builtin = @import("builtin");
1010
1111pub fn suggestVectorLengthForCpu (comptime T : type , comptime cpu : std.Target.Cpu ) ? comptime_int {
1212 // This is guesswork, if you have better suggestions can add it or edit the current here
13- // This can run in comptime only, but stage 1 fails at it, stage 2 can understand it
1413 const element_bit_size = @max (8 , std .math .ceilPowerOfTwo (u16 , @bitSizeOf (T )) catch unreachable );
1514 const vector_bit_size : u16 = blk : {
1615 if (cpu .arch .isX86 ()) {
@@ -37,8 +36,37 @@ pub fn suggestVectorLengthForCpu(comptime T: type, comptime cpu: std.Target.Cpu)
3736 // the 2048 bits or using just 64 per vector or something in between
3837 if (std .Target .mips .featureSetHas (cpu .features , std .Target .mips .Feature .mips3d )) break :blk 64 ;
3938 } else if (cpu .arch .isRISCV ()) {
40- // in risc-v the Vector Extension allows configurable vector sizes, but a standard size of 128 is a safe estimate
41- if (std .Target .riscv .featureSetHas (cpu .features , .v )) break :blk 128 ;
39+ // In RISC-V Vector Registers are length agnostic so there's no good way to determine the best size.
40+ // The usual vector length in most RISC-V cpus is 256 bits, however it can get to multiple kB.
41+ if (std .Target .riscv .featureSetHas (cpu .features , .v )) {
42+ var vec_bit_length : u32 = 256 ;
43+ if (std .Target .riscv .featureSetHas (cpu .features , .zvl32b )) {
44+ vec_bit_length = 32 ;
45+ } else if (std .Target .riscv .featureSetHas (cpu .features , .zvl64b )) {
46+ vec_bit_length = 64 ;
47+ } else if (std .Target .riscv .featureSetHas (cpu .features , .zvl128b )) {
48+ vec_bit_length = 128 ;
49+ } else if (std .Target .riscv .featureSetHas (cpu .features , .zvl256b )) {
50+ vec_bit_length = 256 ;
51+ } else if (std .Target .riscv .featureSetHas (cpu .features , .zvl512b )) {
52+ vec_bit_length = 512 ;
53+ } else if (std .Target .riscv .featureSetHas (cpu .features , .zvl1024b )) {
54+ vec_bit_length = 1024 ;
55+ } else if (std .Target .riscv .featureSetHas (cpu .features , .zvl2048b )) {
56+ vec_bit_length = 2048 ;
57+ } else if (std .Target .riscv .featureSetHas (cpu .features , .zvl4096b )) {
58+ vec_bit_length = 4096 ;
59+ } else if (std .Target .riscv .featureSetHas (cpu .features , .zvl8192b )) {
60+ vec_bit_length = 8192 ;
61+ } else if (std .Target .riscv .featureSetHas (cpu .features , .zvl16384b )) {
62+ vec_bit_length = 16384 ;
63+ } else if (std .Target .riscv .featureSetHas (cpu .features , .zvl32768b )) {
64+ vec_bit_length = 32768 ;
65+ } else if (std .Target .riscv .featureSetHas (cpu .features , .zvl65536b )) {
66+ vec_bit_length = 65536 ;
67+ }
68+ break :blk vec_bit_length ;
69+ }
4270 } else if (cpu .arch .isSPARC ()) {
4371 // TODO: Test Sparc capability to handle bigger vectors
4472 // In theory Sparc have 32 registers of 64 bits which can use in parallel
0 commit comments