reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced

References

lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
  427     Value *const Cond = B.CreateIntrinsic(Intrinsic::amdgcn_ps_live, {}, {});
  435     B.SetInsertPoint(&I);
  440   Type *const VecTy = VectorType::get(B.getInt32Ty(), 2);
  448   Type *const WaveTy = B.getIntNTy(ST->getWavefrontSize());
  449   CallInst *const Ballot = B.CreateIntrinsic(
  450       Intrinsic::amdgcn_icmp, {WaveTy, B.getInt32Ty()},
  451       {B.getInt32(1), B.getInt32(0), B.getInt32(CmpInst::ICMP_NE)});
  451       {B.getInt32(1), B.getInt32(0), B.getInt32(CmpInst::ICMP_NE)});
  451       {B.getInt32(1), B.getInt32(0), B.getInt32(CmpInst::ICMP_NE)});
  459     Mbcnt = B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {},
  460                               {Ballot, B.getInt32(0)});
  462     Value *const BitCast = B.CreateBitCast(Ballot, VecTy);
  463     Value *const ExtractLo = B.CreateExtractElement(BitCast, B.getInt32(0));
  463     Value *const ExtractLo = B.CreateExtractElement(BitCast, B.getInt32(0));
  464     Value *const ExtractHi = B.CreateExtractElement(BitCast, B.getInt32(1));
  464     Value *const ExtractHi = B.CreateExtractElement(BitCast, B.getInt32(1));
  465     Mbcnt = B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {},
  466                               {ExtractLo, B.getInt32(0)});
  468         B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_hi, {}, {ExtractHi, Mbcnt});
  470   Mbcnt = B.CreateIntCast(Mbcnt, Ty, false);
  472   Value *const Identity = B.getInt(getIdentityValueForAtomicOp(Op, TyBitWidth));
  482     NewV = B.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, Ty, {V, Identity});
  486     NewV = buildScan(B, ScanOp, NewV, Identity);
  487     ExclScan = buildShiftRight(B, NewV, Identity);
  492     Value *const LastLaneIdx = B.getInt32(ST->getWavefrontSize() - 1);
  494       Value *const ExtractLo = B.CreateTrunc(NewV, B.getInt32Ty());
  494       Value *const ExtractLo = B.CreateTrunc(NewV, B.getInt32Ty());
  496           B.CreateTrunc(B.CreateLShr(NewV, 32), B.getInt32Ty());
  496           B.CreateTrunc(B.CreateLShr(NewV, 32), B.getInt32Ty());
  496           B.CreateTrunc(B.CreateLShr(NewV, 32), B.getInt32Ty());
  497       CallInst *const ReadLaneLo = B.CreateIntrinsic(
  499       CallInst *const ReadLaneHi = B.CreateIntrinsic(
  501       Value *const PartialInsert = B.CreateInsertElement(
  502           UndefValue::get(VecTy), ReadLaneLo, B.getInt32(0));
  504           B.CreateInsertElement(PartialInsert, ReadLaneHi, B.getInt32(1));
  504           B.CreateInsertElement(PartialInsert, ReadLaneHi, B.getInt32(1));
  505       NewV = B.CreateBitCast(Insert, Ty);
  507       NewV = B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {},
  514     NewV = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, NewV);
  524       Value *const Ctpop = B.CreateIntCast(
  525           B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot), Ty, false);
  526       NewV = B.CreateMul(V, Ctpop);
  544       Value *const Ctpop = B.CreateIntCast(
  545           B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot), Ty, false);
  546       NewV = B.CreateMul(V, B.CreateAnd(Ctpop, 1));
  546       NewV = B.CreateMul(V, B.CreateAnd(Ctpop, 1));
  554   Value *const Cond = B.CreateICmpEQ(Mbcnt, B.getIntN(TyBitWidth, 0));
  554   Value *const Cond = B.CreateICmpEQ(Mbcnt, B.getIntN(TyBitWidth, 0));
  568   B.SetInsertPoint(SingleLaneTerminator);
  573   B.Insert(NewI);
  578   B.SetInsertPoint(&I);
  583     PHINode *const PHI = B.CreatePHI(Ty, 2);
  593       Value *const ExtractLo = B.CreateTrunc(PHI, B.getInt32Ty());
  593       Value *const ExtractLo = B.CreateTrunc(PHI, B.getInt32Ty());
  595           B.CreateTrunc(B.CreateLShr(PHI, 32), B.getInt32Ty());
  595           B.CreateTrunc(B.CreateLShr(PHI, 32), B.getInt32Ty());
  595           B.CreateTrunc(B.CreateLShr(PHI, 32), B.getInt32Ty());
  597           B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractLo);
  599           B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractHi);
  600       Value *const PartialInsert = B.CreateInsertElement(
  601           UndefValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0));
  603           B.CreateInsertElement(PartialInsert, ReadFirstLaneHi, B.getInt32(1));
  603           B.CreateInsertElement(PartialInsert, ReadFirstLaneHi, B.getInt32(1));
  604       BroadcastI = B.CreateBitCast(Insert, Ty);
  607       BroadcastI = B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, PHI);
  618       LaneOffset = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, ExclScan);
  625         LaneOffset = B.CreateMul(V, Mbcnt);
  633         LaneOffset = B.CreateSelect(Cond, Identity, V);
  636         LaneOffset = B.CreateMul(V, B.CreateAnd(Mbcnt, 1));
  636         LaneOffset = B.CreateMul(V, B.CreateAnd(Mbcnt, 1));
  640     Value *const Result = buildNonAtomicBinOp(B, Op, BroadcastI, LaneOffset);
  644       B.SetInsertPoint(PixelExitBB->getFirstNonPHI());
  646       PHINode *const PHI = B.CreatePHI(Ty, 2);