@@ -290,3 +290,209 @@ define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
290
290
%gep = getelementptr inbounds i8 , ptr %base , i64 %mul
291
291
ret ptr %gep
292
292
}
293
+
294
+ ; Test PTRADD handling in AMDGPUDAGToDAGISel::SelectGlobalSAddr.
295
+ define amdgpu_kernel void @uniform_base_varying_offset_imm (ptr addrspace (1 ) %p ) {
296
+ ; GFX942_PTRADD-LABEL: uniform_base_varying_offset_imm:
297
+ ; GFX942_PTRADD: ; %bb.0: ; %entry
298
+ ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
299
+ ; GFX942_PTRADD-NEXT: v_and_b32_e32 v0, 0x3ff, v0
300
+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v1, 0
301
+ ; GFX942_PTRADD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
302
+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v2, 1
303
+ ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
304
+ ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
305
+ ; GFX942_PTRADD-NEXT: global_store_dword v[0:1], v2, off offset:16
306
+ ; GFX942_PTRADD-NEXT: s_endpgm
307
+ ;
308
+ ; GFX942_LEGACY-LABEL: uniform_base_varying_offset_imm:
309
+ ; GFX942_LEGACY: ; %bb.0: ; %entry
310
+ ; GFX942_LEGACY-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
311
+ ; GFX942_LEGACY-NEXT: v_and_b32_e32 v0, 0x3ff, v0
312
+ ; GFX942_LEGACY-NEXT: v_lshlrev_b32_e32 v0, 2, v0
313
+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v1, 1
314
+ ; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
315
+ ; GFX942_LEGACY-NEXT: global_store_dword v0, v1, s[0:1] offset:16
316
+ ; GFX942_LEGACY-NEXT: s_endpgm
317
+ entry:
318
+ %tid = call i32 @llvm.amdgcn.workitem.id.x ()
319
+ %shift = shl i32 %tid , 2
320
+ %voffset = zext i32 %shift to i64
321
+ %gep1 = getelementptr inbounds i8 , ptr addrspace (1 ) %p , i64 %voffset
322
+ %gep2 = getelementptr inbounds i8 , ptr addrspace (1 ) %gep1 , i64 16
323
+ store i32 1 , ptr addrspace (1 ) %gep2
324
+ ret void
325
+ }
326
+
327
+ ; Adjusted from global-saddr-load.ll. Tests PTRADD handling in
328
+ ; AMDGPUDAGToDAGISel::SelectSMRDBaseOffset.
329
+ define amdgpu_kernel void @global_load_saddr_i32_uniform_offset (ptr addrspace (1 ) %sbase , i32 %soffset , ptr addrspace (1 ) %r ) {
330
+ ; GFX942_PTRADD-LABEL: global_load_saddr_i32_uniform_offset:
331
+ ; GFX942_PTRADD: ; %bb.0:
332
+ ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
333
+ ; GFX942_PTRADD-NEXT: s_load_dword s6, s[4:5], 0x8
334
+ ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10
335
+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v0, 0
336
+ ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
337
+ ; GFX942_PTRADD-NEXT: s_add_u32 s0, s0, s6
338
+ ; GFX942_PTRADD-NEXT: s_addc_u32 s1, s1, 0
339
+ ; GFX942_PTRADD-NEXT: s_load_dword s0, s[0:1], 0x0
340
+ ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
341
+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v1, s0
342
+ ; GFX942_PTRADD-NEXT: global_store_dword v0, v1, s[2:3]
343
+ ; GFX942_PTRADD-NEXT: s_endpgm
344
+ ;
345
+ ; GFX942_LEGACY-LABEL: global_load_saddr_i32_uniform_offset:
346
+ ; GFX942_LEGACY: ; %bb.0:
347
+ ; GFX942_LEGACY-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
348
+ ; GFX942_LEGACY-NEXT: s_load_dword s6, s[4:5], 0x8
349
+ ; GFX942_LEGACY-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10
350
+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v0, 0
351
+ ; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
352
+ ; GFX942_LEGACY-NEXT: s_load_dword s0, s[0:1], s6 offset:0x0
353
+ ; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
354
+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v1, s0
355
+ ; GFX942_LEGACY-NEXT: global_store_dword v0, v1, s[2:3]
356
+ ; GFX942_LEGACY-NEXT: s_endpgm
357
+ %zext.offset = zext i32 %soffset to i64
358
+ %gep0 = getelementptr inbounds i8 , ptr addrspace (1 ) %sbase , i64 %zext.offset
359
+ %load = load i32 , ptr addrspace (1 ) %gep0
360
+ %to.vgpr = bitcast i32 %load to float
361
+ store float %to.vgpr , ptr addrspace (1 ) %r
362
+ ret void
363
+ }
364
+
365
+ ; Adjusted from llvm.amdgcn.global.load.lds.ll, tests the offset lowering for
366
+ ; Intrinsic::amdgcn_global_load_lds.
367
+ define void @global_load_lds_dword_saddr_and_vaddr (ptr addrspace (1 ) nocapture inreg %gptr , ptr addrspace (3 ) nocapture %lptr , i32 %voffset ) {
368
+ ; GFX942_PTRADD-LABEL: global_load_lds_dword_saddr_and_vaddr:
369
+ ; GFX942_PTRADD: ; %bb.0: ; %main_body
370
+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
371
+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v2, v1
372
+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v3, 0
373
+ ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[2:3], s[0:1], 0, v[2:3]
374
+ ; GFX942_PTRADD-NEXT: v_readfirstlane_b32 s0, v0
375
+ ; GFX942_PTRADD-NEXT: s_mov_b32 m0, s0
376
+ ; GFX942_PTRADD-NEXT: s_nop 0
377
+ ; GFX942_PTRADD-NEXT: global_load_lds_dword v[2:3], off offset:48 sc1
378
+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
379
+ ; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
380
+ ;
381
+ ; GFX942_LEGACY-LABEL: global_load_lds_dword_saddr_and_vaddr:
382
+ ; GFX942_LEGACY: ; %bb.0: ; %main_body
383
+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
384
+ ; GFX942_LEGACY-NEXT: v_readfirstlane_b32 s2, v0
385
+ ; GFX942_LEGACY-NEXT: s_mov_b32 m0, s2
386
+ ; GFX942_LEGACY-NEXT: s_nop 0
387
+ ; GFX942_LEGACY-NEXT: global_load_lds_dword v1, s[0:1] offset:48 sc1
388
+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
389
+ ; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
390
+ main_body:
391
+ %voffset.64 = zext i32 %voffset to i64
392
+ %gep = getelementptr i8 , ptr addrspace (1 ) %gptr , i64 %voffset.64
393
+ call void @llvm.amdgcn.global.load.lds (ptr addrspace (1 ) %gep , ptr addrspace (3 ) %lptr , i32 4 , i32 48 , i32 16 )
394
+ ret void
395
+ }
396
+
397
+ ; Taken from shl_add_ptr_global.ll, tests PTRADD handling in
398
+ ; SITargetLowering::performSHLPtrCombine.
399
+ define void @shl_base_global_ptr_global_atomic_fadd (ptr addrspace (1 ) %out , ptr addrspace (1 ) %extra.use , ptr addrspace (1 ) %ptr ) {
400
+ ; GFX942_PTRADD-LABEL: shl_base_global_ptr_global_atomic_fadd:
401
+ ; GFX942_PTRADD: ; %bb.0:
402
+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
403
+ ; GFX942_PTRADD-NEXT: s_mov_b64 s[0:1], 0x80
404
+ ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 0, s[0:1]
405
+ ; GFX942_PTRADD-NEXT: v_lshlrev_b64 v[4:5], 2, v[0:1]
406
+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v6, 0x42c80000
407
+ ; GFX942_PTRADD-NEXT: global_atomic_add_f32 v[4:5], v6, off
408
+ ; GFX942_PTRADD-NEXT: global_store_dwordx2 v[2:3], v[0:1], off sc0 sc1
409
+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0)
410
+ ; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
411
+ ;
412
+ ; GFX942_LEGACY-LABEL: shl_base_global_ptr_global_atomic_fadd:
413
+ ; GFX942_LEGACY: ; %bb.0:
414
+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
415
+ ; GFX942_LEGACY-NEXT: v_lshlrev_b64 v[0:1], 2, v[4:5]
416
+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v6, 0x42c80000
417
+ ; GFX942_LEGACY-NEXT: global_atomic_add_f32 v[0:1], v6, off offset:512
418
+ ; GFX942_LEGACY-NEXT: s_mov_b64 s[0:1], 0x80
419
+ ; GFX942_LEGACY-NEXT: v_lshl_add_u64 v[0:1], v[4:5], 0, s[0:1]
420
+ ; GFX942_LEGACY-NEXT: global_store_dwordx2 v[2:3], v[0:1], off sc0 sc1
421
+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0)
422
+ ; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
423
+ %arrayidx0 = getelementptr inbounds [512 x i32 ], ptr addrspace (1 ) %ptr , i64 0 , i64 32
424
+ %cast = ptrtoint ptr addrspace (1 ) %arrayidx0 to i64
425
+ %shl = shl i64 %cast , 2
426
+ %castback = inttoptr i64 %shl to ptr addrspace (1 )
427
+ %unused = atomicrmw fadd ptr addrspace (1 ) %castback , float 100 .0 syncscope("agent" ) monotonic , align 4 , !amdgpu.no.fine.grained.memory !0 , !amdgpu.ignore.denormal.mode !0
428
+ store volatile i64 %cast , ptr addrspace (1 ) %extra.use , align 4
429
+ ret void
430
+ }
431
+
432
+ ; Test PTRADD handling in TargetLowering::SimplifyDemandedBits and
433
+ ; TargetLowering::ShrinkDemandedOp.
434
+ define i32 @gep_in_const_as_cast_to_const32_as (ptr addrspace (4 ) %src , i64 %offset ) {
435
+ ; GFX942_PTRADD-LABEL: gep_in_const_as_cast_to_const32_as:
436
+ ; GFX942_PTRADD: ; %bb.0: ; %entry
437
+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
438
+ ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
439
+ ; GFX942_PTRADD-NEXT: s_mov_b32 s1, 0
440
+ ; GFX942_PTRADD-NEXT: v_readfirstlane_b32 s0, v0
441
+ ; GFX942_PTRADD-NEXT: s_load_dword s0, s[0:1], 0x0
442
+ ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
443
+ ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v0, s0
444
+ ; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
445
+ ;
446
+ ; GFX942_LEGACY-LABEL: gep_in_const_as_cast_to_const32_as:
447
+ ; GFX942_LEGACY: ; %bb.0: ; %entry
448
+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
449
+ ; GFX942_LEGACY-NEXT: v_add_u32_e32 v0, v0, v2
450
+ ; GFX942_LEGACY-NEXT: s_mov_b32 s1, 0
451
+ ; GFX942_LEGACY-NEXT: v_readfirstlane_b32 s0, v0
452
+ ; GFX942_LEGACY-NEXT: s_load_dword s0, s[0:1], 0x0
453
+ ; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
454
+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v0, s0
455
+ ; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
456
+ entry:
457
+ %gep = getelementptr i8 , ptr addrspace (4 ) %src , i64 %offset
458
+ %gep.cast = addrspacecast ptr addrspace (4 ) %gep to ptr addrspace (6 )
459
+ %l = load i32 , ptr addrspace (6 ) %gep.cast
460
+ ret i32 %l
461
+ }
462
+
463
+ @CG = addrspace (4 ) constant [16 x i32 ] zeroinitializer , align 4
464
+
465
+ ; Test PTRADD handling in isMemSrcFromConstant.
466
+ define void @replace_const0_memcpy_by_memset (ptr align 4 %dst ) {
467
+ ; GFX942_PTRADD-LABEL: replace_const0_memcpy_by_memset:
468
+ ; GFX942_PTRADD: ; %bb.0: ; %entry
469
+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
470
+ ; GFX942_PTRADD-NEXT: s_getpc_b64 s[0:1]
471
+ ; GFX942_PTRADD-NEXT: s_add_u32 s0, s0, CG@gotpcrel32@lo+4
472
+ ; GFX942_PTRADD-NEXT: s_addc_u32 s1, s1, CG@gotpcrel32@hi+12
473
+ ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
474
+ ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
475
+ ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4
476
+ ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
477
+ ; GFX942_PTRADD-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
478
+ ; GFX942_PTRADD-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
479
+ ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
480
+ ; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
481
+ ;
482
+ ; GFX942_LEGACY-LABEL: replace_const0_memcpy_by_memset:
483
+ ; GFX942_LEGACY: ; %bb.0: ; %entry
484
+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
485
+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v2, 0
486
+ ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v3, v2
487
+ ; GFX942_LEGACY-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
488
+ ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
489
+ ; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
490
+ entry:
491
+ %gep = getelementptr i8 , ptr addrspace (4 ) @CG , i64 4
492
+ tail call void @llvm.memcpy.p0.p4.i64 (ptr noundef nonnull align 4 %dst , ptr addrspace (4 ) noundef nonnull align 4 %gep , i64 8 , i1 false )
493
+ ret void
494
+ }
495
+
496
+ declare void @llvm.memcpy.p0.p4.i64 (ptr noalias nocapture writeonly , ptr addrspace (4 ) noalias nocapture readonly , i64 , i1 immarg)
497
+
498
+ !0 = !{}
0 commit comments