Skip to content

Commit fdcee2d

Browse files
[SLP]Reorder tree, if the reorder indices are non empty
Need to consider the ordering for all nodes with the specified ordering, not only loads/store/extracts. Reviewers: hiraditya, RKSimon Reviewed By: hiraditya Pull Request: #136185
1 parent e298f16 commit fdcee2d

File tree

2 files changed

+8
-8
lines changed

2 files changed

+8
-8
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -6711,6 +6711,8 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom,
67116711
"BinaryOperator and CastInst.");
67126712
return TE.ReorderIndices;
67136713
}
6714+
if (!TE.ReorderIndices.empty())
6715+
return TE.ReorderIndices;
67146716
if (TE.State == TreeEntry::Vectorize && TE.getOpcode() == Instruction::PHI) {
67156717
if (!TE.ReorderIndices.empty())
67166718
return TE.ReorderIndices;

llvm/test/Transforms/SLPVectorizer/revec.ll

+6-8
Original file line numberDiff line numberDiff line change
@@ -416,16 +416,15 @@ define void @test13(<8 x i32> %0, ptr %out0, ptr %out1, ptr %out2) {
416416
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> poison, <8 x i32> [[TMP0:%.*]], i64 0)
417417
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
418418
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
419-
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
420419
; CHECK-NEXT: br label [[FOR_END_LOOPEXIT:%.*]]
421420
; CHECK: for.end.loopexit:
422-
; CHECK-NEXT: [[TMP5:%.*]] = phi <16 x i32> [ [[TMP4]], [[ENTRY:%.*]] ]
423-
; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v16i32(<16 x i32> [[TMP5]], i64 12)
421+
; CHECK-NEXT: [[TMP4:%.*]] = phi <16 x i32> [ [[TMP3]], [[ENTRY:%.*]] ]
422+
; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v16i32(<16 x i32> [[TMP4]], i64 4)
424423
; CHECK-NEXT: [[OR0:%.*]] = or <4 x i32> [[TMP6]], zeroinitializer
425424
; CHECK-NEXT: store <4 x i32> [[OR0]], ptr [[OUT0:%.*]], align 4
426-
; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v16i32(<16 x i32> [[TMP4]], i64 0)
425+
; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v16i32(<16 x i32> [[TMP3]], i64 0)
427426
; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[OUT1:%.*]], align 4
428-
; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v16i32(<16 x i32> [[TMP4]], i64 8)
427+
; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v16i32(<16 x i32> [[TMP3]], i64 12)
429428
; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr [[OUT2:%.*]], align 4
430429
; CHECK-NEXT: ret void
431430
;
@@ -456,11 +455,10 @@ define void @test14(<8 x i1> %0) {
456455
; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i1> [[TMP2]] to <16 x i16>
457456
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
458457
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
459-
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7>
460458
; CHECK-NEXT: br label [[FOR_END_LOOPEXIT:%.*]]
461459
; CHECK: for.end.loopexit:
462-
; CHECK-NEXT: [[TMP7:%.*]] = phi <16 x i16> [ [[TMP6]], [[ENTRY:%.*]] ]
463-
; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i16> @llvm.vector.extract.v4i16.v16i16(<16 x i16> [[TMP7]], i64 12)
460+
; CHECK-NEXT: [[TMP6:%.*]] = phi <16 x i16> [ [[TMP5]], [[ENTRY:%.*]] ]
461+
; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i16> @llvm.vector.extract.v4i16.v16i16(<16 x i16> [[TMP6]], i64 4)
464462
; CHECK-NEXT: [[OR0:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer
465463
; CHECK-NEXT: ret void
466464
;

0 commit comments

Comments
 (0)