1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
| //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Move instructions between statements.
//
//===----------------------------------------------------------------------===//
#include "polly/ForwardOpTree.h"
#include "polly/Options.h"
#include "polly/ScopBuilder.h"
#include "polly/ScopInfo.h"
#include "polly/ScopPass.h"
#include "polly/Support/GICHelper.h"
#include "polly/Support/ISLOStream.h"
#include "polly/Support/ISLTools.h"
#include "polly/Support/VirtualInstruction.h"
#include "polly/ZoneAlgo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "isl/ctx.h"
#include "isl/isl-noexceptions.h"
#include <cassert>
#include <memory>
#define DEBUG_TYPE "polly-optree"
using namespace llvm;
using namespace polly;
static cl::opt<bool>
AnalyzeKnown("polly-optree-analyze-known",
cl::desc("Analyze array contents for load forwarding"),
cl::cat(PollyCategory), cl::init(true), cl::Hidden);
static cl::opt<bool>
NormalizePHIs("polly-optree-normalize-phi",
cl::desc("Replace PHIs by their incoming values"),
cl::cat(PollyCategory), cl::init(false), cl::Hidden);
static cl::opt<unsigned>
MaxOps("polly-optree-max-ops",
cl::desc("Maximum number of ISL operations to invest for known "
"analysis; 0=no limit"),
cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);
STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
STATISTIC(KnownOutOfQuota,
"Analyses aborted because max_operations was reached");
STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
STATISTIC(TotalKnownLoadsForwarded,
"Number of forwarded loads because their value was known");
STATISTIC(TotalReloads, "Number of reloaded values");
STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
STATISTIC(TotalModifiedStmts,
"Number of statements with at least one forwarded tree");
STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");
STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
STATISTIC(NumValueWritesInLoops,
"Number of scalar value writes nested in affine loops after OpTree");
STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
STATISTIC(NumPHIWritesInLoops,
"Number of scalar phi writes nested in affine loops after OpTree");
STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
STATISTIC(NumSingletonWritesInLoops,
"Number of singleton writes nested in affine loops after OpTree");
namespace {
/// The state of whether an operand tree was/can be forwarded.
///
/// The items apply to an instructions and its operand tree with the instruction
/// as the root element. If the value in question is not an instruction in the
/// SCoP, it can be a leaf of an instruction's operand tree.
enum ForwardingDecision {
/// The root instruction or value cannot be forwarded at all.
FD_CannotForward,
/// The root instruction or value can be forwarded as a leaf of a larger
/// operand tree.
/// It does not make sense to move the value itself, it would just replace it
/// by a use of itself. For instance, a constant "5" used in a statement can
/// be forwarded, but it would just replace it by the same constant "5".
/// However, it makes sense to move as an operand of
///
/// %add = add 5, 5
///
/// where "5" is moved as part of a larger operand tree. "5" would be placed
/// (disregarding for a moment that literal constants don't have a location
/// and can be used anywhere) into the same statement as %add would.
FD_CanForwardLeaf,
/// The root instruction can be forwarded and doing so avoids a scalar
/// dependency.
///
/// This can be either because the operand tree can be moved to the target
/// statement, or a memory access is redirected to read from a different
/// location.
FD_CanForwardProfitably,
/// Used to indicate that a forwarding has be carried out successfully, and
/// the forwarded memory access can be deleted.
FD_DidForwardTree,
/// Used to indicate that a forwarding has be carried out successfully, and
/// the forwarded memory access is being reused.
FD_DidForwardLeaf,
/// A forwarding method cannot be applied to the operand tree.
/// The difference to FD_CannotForward is that there might be other methods
/// that can handle it.
/// The conditions that make an operand tree applicable must be checked even
/// with DoIt==true because a method following the one that returned
/// FD_NotApplicable might have returned FD_CanForwardTree.
FD_NotApplicable
};
/// Implementation of operand tree forwarding for a specific SCoP.
///
/// For a statement that requires a scalar value (through a value read
/// MemoryAccess), see if its operand can be moved into the statement. If so,
/// the MemoryAccess is removed and the all the operand tree instructions are
/// moved into the statement. All original instructions are left in the source
/// statements. The simplification pass can clean these up.
class ForwardOpTreeImpl : ZoneAlgorithm {
private:
/// Scope guard to limit the number of isl operations for this pass.
IslMaxOperationsGuard &MaxOpGuard;
/// How many instructions have been copied to other statements.
int NumInstructionsCopied = 0;
/// Number of loads forwarded because their value was known.
int NumKnownLoadsForwarded = 0;
/// Number of values reloaded from known array elements.
int NumReloads = 0;
/// How many read-only accesses have been copied.
int NumReadOnlyCopied = 0;
/// How many operand trees have been forwarded.
int NumForwardedTrees = 0;
/// Number of statements with at least one forwarded operand tree.
int NumModifiedStmts = 0;
/// Whether we carried out at least one change to the SCoP.
bool Modified = false;
/// Contains the zones where array elements are known to contain a specific
/// value.
/// { [Element[] -> Zone[]] -> ValInst[] }
/// @see computeKnown()
isl::union_map Known;
/// Translator for newly introduced ValInsts to already existing ValInsts such
/// that new introduced load instructions can reuse the Known analysis of its
/// original load. { ValInst[] -> ValInst[] }
isl::union_map Translator;
/// Get list of array elements that do contain the same ValInst[] at Domain[].
///
/// @param ValInst { Domain[] -> ValInst[] }
/// The values for which we search for alternative locations,
/// per statement instance.
///
/// @return { Domain[] -> Element[] }
/// For each statement instance, the array elements that contain the
/// same ValInst.
isl::union_map findSameContentElements(isl::union_map ValInst) {
assert(!ValInst.is_single_valued().is_false());
// { Domain[] }
isl::union_set Domain = ValInst.domain();
// { Domain[] -> Scatter[] }
isl::union_map Schedule = getScatterFor(Domain);
// { Element[] -> [Scatter[] -> ValInst[]] }
isl::union_map MustKnownCurried =
convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();
// { [Domain[] -> ValInst[]] -> Scatter[] }
isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);
// { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
isl::union_map SchedValDomVal =
DomValSched.range_product(ValInst.range_map()).reverse();
// { Element[] -> [Domain[] -> ValInst[]] }
isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);
// { Domain[] -> Element[] }
isl::union_map MustKnownMap =
MustKnownInst.uncurry().domain().unwrap().reverse();
simplify(MustKnownMap);
return MustKnownMap;
}
/// Find a single array element for each statement instance, within a single
/// array.
///
/// @param MustKnown { Domain[] -> Element[] }
/// Set of candidate array elements.
/// @param Domain { Domain[] }
/// The statement instance for which we need elements for.
///
/// @return { Domain[] -> Element[] }
/// For each statement instance, an array element out of @p MustKnown.
/// All array elements must be in the same array (Polly does not yet
/// support reading from different accesses using the same
/// MemoryAccess). If no mapping for all of @p Domain exists, returns
/// null.
isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
// { Domain[] -> Element[] }
isl::map Result;
// MemoryAccesses can read only elements from a single array
// (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
// Look through all spaces until we find one that contains at least the
// wanted statement instance.s
for (isl::map Map : MustKnown.get_map_list()) {
// Get the array this is accessing.
isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());
// No support for generation of indirect array accesses.
if (SAI->getBasePtrOriginSAI())
continue;
// Determine whether this map contains all wanted values.
isl::set MapDom = Map.domain();
if (!Domain.is_subset(MapDom).is_true())
continue;
// There might be multiple array elements that contain the same value, but
// choose only one of them. lexmin is used because it returns a one-value
// mapping, we do not care about which one.
// TODO: Get the simplest access function.
Result = Map.lexmin();
break;
}
return Result;
}
public:
ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard)
: ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {}
/// Compute the zones of known array element contents.
///
/// @return True if the computed #Known is usable.
bool computeKnownValues() {
isl::union_map MustKnown, KnownFromLoad, KnownFromInit;
// Check that nothing strange occurs.
collectCompatibleElts();
{
IslQuotaScope QuotaScope = MaxOpGuard.enter();
computeCommon();
if (NormalizePHIs)
computeNormalizedPHIs();
Known = computeKnown(true, true);
// Preexisting ValInsts use the known content analysis of themselves.
Translator = makeIdentityMap(Known.range(), false);
}
if (!Known || !Translator || !NormalizeMap) {
assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
Known = nullptr;
Translator = nullptr;
NormalizeMap = nullptr;
LLVM_DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
return false;
}
KnownAnalyzed++;
LLVM_DEBUG(dbgs() << "All known: " << Known << "\n");
return true;
}
void printStatistics(raw_ostream &OS, int Indent = 0) {
OS.indent(Indent) << "Statistics {\n";
OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
<< '\n';
OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
<< '\n';
OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n';
OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
<< '\n';
OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
<< '\n';
OS.indent(Indent + 4) << "Statements with forwarded operand trees: "
<< NumModifiedStmts << '\n';
OS.indent(Indent) << "}\n";
}
void printStatements(raw_ostream &OS, int Indent = 0) const {
OS.indent(Indent) << "After statements {\n";
for (auto &Stmt : *S) {
OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
for (auto *MA : Stmt)
MA->print(OS);
OS.indent(Indent + 12);
Stmt.printInstructions(OS);
}
OS.indent(Indent) << "}\n";
}
/// Create a new MemoryAccess of type read and MemoryKind::Array.
///
/// @param Stmt The statement in which the access occurs.
/// @param LI The instruction that does the access.
/// @param AccessRelation The array element that each statement instance
/// accesses.
///
/// @param The newly created access.
MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
isl::map AccessRelation) {
isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());
// Create a dummy SCEV access, to be replaced anyway.
SmallVector<const SCEV *, 4> Sizes;
Sizes.reserve(SAI->getNumberOfDimensions());
SmallVector<const SCEV *, 4> Subscripts;
Subscripts.reserve(SAI->getNumberOfDimensions());
for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) {
Sizes.push_back(SAI->getDimensionSize(i));
Subscripts.push_back(nullptr);
}
MemoryAccess *Access =
new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
S->addAccessFunction(Access);
Stmt->addAccess(Access, true);
Access->setNewAccessRelation(AccessRelation);
return Access;
}
/// Forward a load by reading from an array element that contains the same
/// value. Typically the location it was loaded from.
///
/// @param TargetStmt The statement the operand tree will be copied to.
/// @param Inst The (possibly speculatable) instruction to forward.
/// @param UseStmt The statement that uses @p Inst.
/// @param UseLoop The loop @p Inst is used in.
/// @param DefStmt The statement @p Inst is defined in.
/// @param DefLoop The loop which contains @p Inst.
/// @param DoIt If false, only determine whether an operand tree can be
/// forwarded. If true, carry out the forwarding. Do not
/// use DoIt==true if an operand tree is not known to be
/// forwardable.
///
/// @return FD_NotApplicable if @p Inst cannot be forwarded by creating a new
/// load.
/// FD_CannotForward if the pointer operand cannot be forwarded.
/// FD_CanForwardProfitably if @p Inst is forwardable.
/// FD_DidForwardTree if @p DoIt was true.
ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
ScopStmt *UseStmt, Loop *UseLoop,
ScopStmt *DefStmt, Loop *DefLoop,
bool DoIt) {
// Cannot do anything without successful known analysis.
if (Known.is_null() || Translator.is_null() ||
MaxOpGuard.hasQuotaExceeded())
return FD_NotApplicable;
LoadInst *LI = dyn_cast<LoadInst>(Inst);
if (!LI)
return FD_NotApplicable;
// If the load is already in the statement, no forwarding is necessary.
// However, it might happen that the LoadInst is already present in the
// statement's instruction list. In that case we do as follows:
// - For the evaluation (DoIt==false), we can trivially forward it as it is
// benefit of forwarding an already present instruction.
// - For the execution (DoIt==true), prepend the instruction (to make it
// available to all instructions following in the instruction list), but
// do not add another MemoryAccess.
MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
if (Access && !DoIt)
return FD_CanForwardProfitably;
ForwardingDecision OpDecision = forwardTree(
TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop, DoIt);
switch (OpDecision) {
case FD_CannotForward:
assert(!DoIt);
return OpDecision;
case FD_CanForwardLeaf:
case FD_CanForwardProfitably:
assert(!DoIt);
break;
case FD_DidForwardLeaf:
case FD_DidForwardTree:
assert(DoIt);
break;
default:
llvm_unreachable("Shouldn't return this");
}
IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
// { DomainDef[] -> ValInst[] }
isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
assert(!isNormalized(ExpectedVal).is_false() &&
"LoadInsts are always normalized");
// { DomainUse[] -> DomainTarget[] }
isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
// { DomainTarget[] -> ValInst[] }
isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
isl::union_map TranslatedExpectedVal =
isl::union_map(TargetExpectedVal).apply_range(Translator);
// { DomainTarget[] -> Element[] }
isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
if (!SameVal)
return FD_NotApplicable;
if (DoIt)
TargetStmt->prependInstruction(LI);
if (!DoIt)
return FD_CanForwardProfitably;
if (Access) {
LLVM_DEBUG(
dbgs() << " forwarded known load with preexisting MemoryAccess"
<< Access << "\n");
} else {
Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
LLVM_DEBUG(dbgs() << " forwarded known load with new MemoryAccess"
<< Access << "\n");
// { ValInst[] }
isl::space ValInstSpace = ExpectedVal.get_space().range();
// After adding a new load to the SCoP, also update the Known content
// about it. The new load will have a known ValInst of
// { [DomainTarget[] -> Value[]] }
// but which -- because it is a copy of it -- has same value as the
// { [DomainDef[] -> Value[]] }
// that it replicates. Instead of cloning the known content of
// [DomainDef[] -> Value[]]
// for DomainTarget[], we add a 'translator' that maps
// [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
// before comparing to the known content.
// TODO: 'Translator' could also be used to map PHINodes to their incoming
// ValInsts.
if (ValInstSpace.is_wrapping()) {
// { DefDomain[] -> Value[] }
isl::map ValInsts = ExpectedVal.range().unwrap();
// { DefDomain[] }
isl::set DefDomain = ValInsts.domain();
// { Value[] }
isl::space ValSpace = ValInstSpace.unwrap().range();
// { Value[] -> Value[] }
isl::map ValToVal =
isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
// { DomainDef[] -> DomainTarget[] }
isl::map DefToTarget = getDefToTarget(DefStmt, TargetStmt);
// { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal);
Translator = Translator.add_map(LocalTranslator);
LLVM_DEBUG(dbgs() << " local translator is " << LocalTranslator
<< "\n");
}
}
LLVM_DEBUG(dbgs() << " expected values where " << TargetExpectedVal
<< "\n");
LLVM_DEBUG(dbgs() << " candidate elements where " << Candidates
<< "\n");
assert(Access);
NumKnownLoadsForwarded++;
TotalKnownLoadsForwarded++;
return FD_DidForwardTree;
}
/// Forward a scalar by redirecting the access to an array element that stores
/// the same value.
///
/// @param TargetStmt The statement the operand tree will be copied to.
/// @param Inst The scalar to forward.
/// @param UseStmt The statement that uses @p Inst.
/// @param UseLoop The loop @p Inst is used in.
/// @param DefStmt The statement @p Inst is defined in.
/// @param DefLoop The loop which contains @p Inst.
/// @param DoIt If false, only determine whether an operand tree can be
/// forwarded. If true, carry out the forwarding. Do not
/// use DoIt==true if an operand tree is not known to be
/// forwardable.
///
/// @return FD_NotApplicable if @p Inst cannot be reloaded.
/// FD_CanForwardLeaf if @p Inst can be reloaded.
/// FD_CanForwardProfitably if @p Inst has been reloaded.
/// FD_DidForwardLeaf if @p DoIt was true.
ForwardingDecision reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst,
ScopStmt *UseStmt, Loop *UseLoop,
ScopStmt *DefStmt, Loop *DefLoop,
bool DoIt) {
// Cannot do anything without successful known analysis.
if (Known.is_null() || Translator.is_null() ||
MaxOpGuard.hasQuotaExceeded())
return FD_NotApplicable;
MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst);
if (Access && Access->isLatestArrayKind()) {
if (DoIt)
return FD_DidForwardLeaf;
return FD_CanForwardLeaf;
}
// Don't spend too much time analyzing whether it can be reloaded. When
// carrying-out the forwarding, we cannot bail-out in the middle of the
// transformation. It also shouldn't take as long because some results are
// cached.
IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
// { DomainDef[] -> ValInst[] }
isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop);
// { DomainUse[] -> DomainTarget[] }
isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
// { DomainTarget[] -> ValInst[] }
isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
isl::union_map TranslatedExpectedVal =
TargetExpectedVal.apply_range(Translator);
// { DomainTarget[] -> Element[] }
isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
if (!SameVal)
return FD_NotApplicable;
if (!DoIt)
return FD_CanForwardProfitably;
if (!Access)
Access = TargetStmt->ensureValueRead(Inst);
simplify(SameVal);
Access->setNewAccessRelation(SameVal);
TotalReloads++;
NumReloads++;
return FD_DidForwardLeaf;
}
/// Forwards a speculatively executable instruction.
///
/// @param TargetStmt The statement the operand tree will be copied to.
/// @param UseInst The (possibly speculatable) instruction to forward.
/// @param DefStmt The statement @p UseInst is defined in.
/// @param DefLoop The loop which contains @p UseInst.
/// @param DoIt If false, only determine whether an operand tree can be
/// forwarded. If true, carry out the forwarding. Do not
/// use DoIt==true if an operand tree is not known to be
/// forwardable.
///
/// @return FD_NotApplicable if @p UseInst is not speculatable.
/// FD_CannotForward if one of @p UseInst's operands is not
/// forwardable.
/// FD_CanForwardTree if @p UseInst is forwardable.
/// FD_DidForward if @p DoIt was true.
ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt,
Instruction *UseInst,
ScopStmt *DefStmt, Loop *DefLoop,
bool DoIt) {
// PHIs, unless synthesizable, are not yet supported.
if (isa<PHINode>(UseInst))
return FD_NotApplicable;
// Compatible instructions must satisfy the following conditions:
// 1. Idempotent (instruction will be copied, not moved; although its
// original instance might be removed by simplification)
// 2. Not access memory (There might be memory writes between)
// 3. Not cause undefined behaviour (we might copy to a location when the
// original instruction was no executed; this is currently not possible
// because we do not forward PHINodes)
// 4. Not leak memory if executed multiple times (i.e. malloc)
//
// Instruction::mayHaveSideEffects is not sufficient because it considers
// malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
// not sufficient because it allows memory accesses.
if (mayBeMemoryDependent(*UseInst))
return FD_NotApplicable;
if (DoIt) {
// To ensure the right order, prepend this instruction before its
// operands. This ensures that its operands are inserted before the
// instruction using them.
// TODO: The operand tree is not really a tree, but a DAG. We should be
// able to handle DAGs without duplication.
TargetStmt->prependInstruction(UseInst);
NumInstructionsCopied++;
TotalInstructionsCopied++;
}
for (Value *OpVal : UseInst->operand_values()) {
ForwardingDecision OpDecision =
forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DoIt);
switch (OpDecision) {
case FD_CannotForward:
assert(!DoIt);
return FD_CannotForward;
case FD_CanForwardLeaf:
case FD_CanForwardProfitably:
assert(!DoIt);
break;
case FD_DidForwardLeaf:
case FD_DidForwardTree:
assert(DoIt);
break;
case FD_NotApplicable:
llvm_unreachable("forwardTree should never return FD_NotApplicable");
}
}
if (DoIt)
return FD_DidForwardTree;
return FD_CanForwardProfitably;
}
/// Determines whether an operand tree can be forwarded or carries out a
/// forwarding, depending on the @p DoIt flag.
///
/// @param TargetStmt The statement the operand tree will be copied to.
/// @param UseVal The value (usually an instruction) which is root of an
/// operand tree.
/// @param UseStmt The statement that uses @p UseVal.
/// @param UseLoop The loop @p UseVal is used in.
/// @param DoIt If false, only determine whether an operand tree can be
/// forwarded. If true, carry out the forwarding. Do not
/// use DoIt==true if an operand tree is not known to be
/// forwardable.
///
/// @return If DoIt==false, return whether the operand tree can be forwarded.
/// If DoIt==true, return FD_DidForward.
ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
ScopStmt *UseStmt, Loop *UseLoop, bool DoIt) {
ScopStmt *DefStmt = nullptr;
Loop *DefLoop = nullptr;
// { DefDomain[] -> TargetDomain[] }
isl::map DefToTarget;
VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
switch (VUse.getKind()) {
case VirtualUse::Constant:
case VirtualUse::Block:
case VirtualUse::Hoisted:
// These can be used anywhere without special considerations.
if (DoIt)
return FD_DidForwardTree;
return FD_CanForwardLeaf;
case VirtualUse::Synthesizable: {
// ScopExpander will take care for of generating the code at the new
// location.
if (DoIt)
return FD_DidForwardTree;
// Check if the value is synthesizable at the new location as well. This
// might be possible when leaving a loop for which ScalarEvolution is
// unable to derive the exit value for.
// TODO: If there is a LCSSA PHI at the loop exit, use that one.
// If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
// do not forward past its loop header. This would require us to use a
// previous loop induction variable instead the current one. We currently
// do not allow forwarding PHI nodes, thus this should never occur (the
// only exception where no phi is necessary being an unreachable loop
// without edge from the outside).
VirtualUse TargetUse = VirtualUse::create(
S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
if (TargetUse.getKind() == VirtualUse::Synthesizable)
return FD_CanForwardLeaf;
LLVM_DEBUG(
dbgs() << " Synthesizable would not be synthesizable anymore: "
<< *UseVal << "\n");
return FD_CannotForward;
}
case VirtualUse::ReadOnly:
// Note that we cannot return FD_CanForwardTree here. With a operand tree
// depth of 0, UseVal is the use in TargetStmt that we try to replace.
// With -polly-analyze-read-only-scalars=true we would ensure the
// existence of a MemoryAccess (which already exists for a leaf) and be
// removed again by tryForwardTree because it's goal is to remove this
// scalar MemoryAccess. It interprets FD_CanForwardTree as the permission
// to do so.
if (!DoIt)
return FD_CanForwardLeaf;
// If we model read-only scalars, we need to create a MemoryAccess for it.
if (ModelReadOnlyScalars)
TargetStmt->ensureValueRead(UseVal);
NumReadOnlyCopied++;
TotalReadOnlyCopied++;
return FD_DidForwardLeaf;
case VirtualUse::Intra:
// Knowing that UseStmt and DefStmt are the same statement instance, just
// reuse the information about UseStmt for DefStmt
DefStmt = UseStmt;
LLVM_FALLTHROUGH;
case VirtualUse::Inter:
Instruction *Inst = cast<Instruction>(UseVal);
if (!DefStmt) {
DefStmt = S->getStmtFor(Inst);
if (!DefStmt)
return FD_CannotForward;
}
DefLoop = LI->getLoopFor(Inst->getParent());
ForwardingDecision SpeculativeResult =
forwardSpeculatable(TargetStmt, Inst, DefStmt, DefLoop, DoIt);
if (SpeculativeResult != FD_NotApplicable)
return SpeculativeResult;
ForwardingDecision KnownResult = forwardKnownLoad(
TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt);
if (KnownResult != FD_NotApplicable)
return KnownResult;
ForwardingDecision ReloadResult = reloadKnownContent(
TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt);
if (ReloadResult != FD_NotApplicable)
return ReloadResult;
// When no method is found to forward the operand tree, we effectively
// cannot handle it.
LLVM_DEBUG(dbgs() << " Cannot forward instruction: " << *Inst << "\n");
return FD_CannotForward;
}
llvm_unreachable("Case unhandled");
}
/// Try to forward an operand tree rooted in @p RA.
bool tryForwardTree(MemoryAccess *RA) {
assert(RA->isLatestScalarKind());
LLVM_DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n");
ScopStmt *Stmt = RA->getStatement();
Loop *InLoop = Stmt->getSurroundingLoop();
isl::map TargetToUse;
if (!Known.is_null()) {
isl::space DomSpace = Stmt->getDomainSpace();
TargetToUse =
isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
}
ForwardingDecision Assessment =
forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, false);
assert(Assessment != FD_DidForwardTree && Assessment != FD_DidForwardLeaf);
if (Assessment != FD_CanForwardProfitably)
return false;
ForwardingDecision Execution =
forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, true);
assert(((Execution == FD_DidForwardTree) ||
(Execution == FD_DidForwardLeaf)) &&
"A previous positive assessment must also be executable");
if (Execution == FD_DidForwardTree)
Stmt->removeSingleMemoryAccess(RA);
return true;
}
/// Return which SCoP this instance is processing.
Scop *getScop() const { return S; }
/// Run the algorithm: Use value read accesses as operand tree roots and try
/// to forward them into the statement.
bool forwardOperandTrees() {
for (ScopStmt &Stmt : *S) {
bool StmtModified = false;
// Because we are modifying the MemoryAccess list, collect them first to
// avoid iterator invalidation.
SmallVector<MemoryAccess *, 16> Accs;
for (MemoryAccess *RA : Stmt) {
if (!RA->isRead())
continue;
if (!RA->isLatestScalarKind())
continue;
Accs.push_back(RA);
}
for (MemoryAccess *RA : Accs) {
if (tryForwardTree(RA)) {
Modified = true;
StmtModified = true;
NumForwardedTrees++;
TotalForwardedTrees++;
}
}
if (StmtModified) {
NumModifiedStmts++;
TotalModifiedStmts++;
}
}
if (Modified)
ScopsModified++;
return Modified;
}
/// Print the pass result, performed transformations and the SCoP after the
/// transformation.
void print(raw_ostream &OS, int Indent = 0) {
printStatistics(OS, Indent);
if (!Modified) {
// This line can easily be checked in regression tests.
OS << "ForwardOpTree executed, but did not modify anything\n";
return;
}
printStatements(OS, Indent);
}
};
/// Pass that redirects scalar reads to array elements that are known to contain
/// the same value.
///
/// This reduces the number of scalar accesses and therefore potentially
/// increases the freedom of the scheduler. In the ideal case, all reads of a
/// scalar definition are redirected (We currently do not care about removing
/// the write in this case). This is also useful for the main DeLICM pass as
/// there are less scalars to be mapped.
class ForwardOpTree : public ScopPass {
private:
/// The pass implementation, also holding per-scop data.
std::unique_ptr<ForwardOpTreeImpl> Impl;
public:
static char ID;
explicit ForwardOpTree() : ScopPass(ID) {}
ForwardOpTree(const ForwardOpTree &) = delete;
ForwardOpTree &operator=(const ForwardOpTree &) = delete;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequiredTransitive<ScopInfoRegionPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.setPreservesAll();
}
bool runOnScop(Scop &S) override {
// Free resources for previous SCoP's computation, if not yet done.
releaseMemory();
LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
{
IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false);
Impl = std::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard);
if (AnalyzeKnown) {
LLVM_DEBUG(dbgs() << "Prepare forwarders...\n");
Impl->computeKnownValues();
}
LLVM_DEBUG(dbgs() << "Forwarding operand trees...\n");
Impl->forwardOperandTrees();
if (MaxOpGuard.hasQuotaExceeded()) {
LLVM_DEBUG(dbgs() << "Not all operations completed because of "
"max_operations exceeded\n");
KnownOutOfQuota++;
}
}
LLVM_DEBUG(dbgs() << "\nFinal Scop:\n");
LLVM_DEBUG(dbgs() << S);
// Update statistics
auto ScopStats = S.getStatistics();
NumValueWrites += ScopStats.NumValueWrites;
NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
NumPHIWrites += ScopStats.NumPHIWrites;
NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
NumSingletonWrites += ScopStats.NumSingletonWrites;
NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
return false;
}
void printScop(raw_ostream &OS, Scop &S) const override {
if (!Impl)
return;
assert(Impl->getScop() == &S);
Impl->print(OS);
}
void releaseMemory() override { Impl.reset(); }
}; // class ForwardOpTree
char ForwardOpTree::ID;
} // namespace
ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); }
INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree",
"Polly - Forward operand tree", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(ForwardOpTree, "polly-optree",
"Polly - Forward operand tree", false, false)
|