/* Autogenerated by mlir-tblgen; don't manually edit */

#ifdef GEN_PASS_DECL
// Generate declarations for all passes.
#define GEN_PASS_DECL_TRITONAMDFOLDTRUECMPI
#define GEN_PASS_DECL_TRITONAMDGPUACCELERATEMATMUL
#define GEN_PASS_DECL_TRITONAMDGPUBLOCKPINGPONG
#define GEN_PASS_DECL_TRITONAMDGPUCANONICALIZEPOINTERS
#define GEN_PASS_DECL_TRITONAMDGPUCOALESCEASYNCCOPY
#define GEN_PASS_DECL_TRITONAMDGPUCONVERTTOBUFFEROPS
#define GEN_PASS_DECL_TRITONAMDGPUHOISTLAYOUTCONVERSIONS
#define GEN_PASS_DECL_TRITONAMDGPUINTHREADTRANSPOSE
#define GEN_PASS_DECL_TRITONAMDGPUOPTIMIZEEPILOGUE
#define GEN_PASS_DECL_TRITONAMDGPUREORDERINSTRUCTIONS
#define GEN_PASS_DECL_TRITONAMDGPUSTREAMPIPELINE
#define GEN_PASS_DECL_TRITONAMDGPUUPDATEASYNCWAITCOUNT
#undef GEN_PASS_DECL
#endif // GEN_PASS_DECL

//===----------------------------------------------------------------------===//
// TritonAMDFoldTrueCmpI
//===----------------------------------------------------------------------===//
#ifdef GEN_PASS_DECL_TRITONAMDFOLDTRUECMPI
std::unique_ptr<::mlir::Pass> createTritonAMDFoldTrueCmpI();
#undef GEN_PASS_DECL_TRITONAMDFOLDTRUECMPI
#endif // GEN_PASS_DECL_TRITONAMDFOLDTRUECMPI
#ifdef GEN_PASS_DEF_TRITONAMDFOLDTRUECMPI

namespace impl {
  std::unique_ptr<::mlir::Pass> createTritonAMDFoldTrueCmpI();
} // namespace impl
namespace impl {

template <typename DerivedT>
class TritonAMDFoldTrueCmpIBase : public ::mlir::OperationPass<mlir::ModuleOp> {
public:
  using Base = TritonAMDFoldTrueCmpIBase;

  TritonAMDFoldTrueCmpIBase() : ::mlir::OperationPass<mlir::ModuleOp>(::mlir::TypeID::get<DerivedT>()) {}
  TritonAMDFoldTrueCmpIBase(const TritonAMDFoldTrueCmpIBase &other) : ::mlir::OperationPass<mlir::ModuleOp>(other) {}
  TritonAMDFoldTrueCmpIBase& operator=(const TritonAMDFoldTrueCmpIBase &) = delete;
  TritonAMDFoldTrueCmpIBase(TritonAMDFoldTrueCmpIBase &&) = delete;
  TritonAMDFoldTrueCmpIBase& operator=(TritonAMDFoldTrueCmpIBase &&) = delete;
  ~TritonAMDFoldTrueCmpIBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("tritonamdgpu-fold-true-cmpi");
  }
  ::llvm::StringRef getArgument() const override { return "tritonamdgpu-fold-true-cmpi"; }

  ::llvm::StringRef getDescription() const override { return "Fold true arith.cmpi to %true"; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("TritonAMDFoldTrueCmpI");
  }
  ::llvm::StringRef getName() const override { return "TritonAMDFoldTrueCmpI"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Return the dialect that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TritonAMDFoldTrueCmpIBase<DerivedT>)

protected:
private:

  friend std::unique_ptr<::mlir::Pass> createTritonAMDFoldTrueCmpI() {
    return std::make_unique<DerivedT>();
  }
};
} // namespace impl

std::unique_ptr<::mlir::Pass> createTritonAMDFoldTrueCmpI() {
  return impl::createTritonAMDFoldTrueCmpI();
}
#undef GEN_PASS_DEF_TRITONAMDFOLDTRUECMPI
#endif // GEN_PASS_DEF_TRITONAMDFOLDTRUECMPI

//===----------------------------------------------------------------------===//
// TritonAMDGPUAccelerateMatmul
//===----------------------------------------------------------------------===//
#ifdef GEN_PASS_DECL_TRITONAMDGPUACCELERATEMATMUL
struct TritonAMDGPUAccelerateMatmulOptions {
  std::string archGenerationName = std::string{};
  int32_t matrixInstructionSize = 0;
  int32_t kPack = 1;
};
std::unique_ptr<::mlir::Pass> createTritonAMDGPUAccelerateMatmul();
std::unique_ptr<::mlir::Pass> createTritonAMDGPUAccelerateMatmul(TritonAMDGPUAccelerateMatmulOptions options);
#undef GEN_PASS_DECL_TRITONAMDGPUACCELERATEMATMUL
#endif // GEN_PASS_DECL_TRITONAMDGPUACCELERATEMATMUL
#ifdef GEN_PASS_DEF_TRITONAMDGPUACCELERATEMATMUL

namespace impl {
  std::unique_ptr<::mlir::Pass> createTritonAMDGPUAccelerateMatmul();
} // namespace impl

namespace impl {
  std::unique_ptr<::mlir::Pass> createTritonAMDGPUAccelerateMatmul(TritonAMDGPUAccelerateMatmulOptions options);
} // namespace impl
namespace impl {

template <typename DerivedT>
class TritonAMDGPUAccelerateMatmulBase : public ::mlir::OperationPass<mlir::ModuleOp> {
public:
  using Base = TritonAMDGPUAccelerateMatmulBase;

  TritonAMDGPUAccelerateMatmulBase() : ::mlir::OperationPass<mlir::ModuleOp>(::mlir::TypeID::get<DerivedT>()) {}
  TritonAMDGPUAccelerateMatmulBase(const TritonAMDGPUAccelerateMatmulBase &other) : ::mlir::OperationPass<mlir::ModuleOp>(other) {}
  TritonAMDGPUAccelerateMatmulBase& operator=(const TritonAMDGPUAccelerateMatmulBase &) = delete;
  TritonAMDGPUAccelerateMatmulBase(TritonAMDGPUAccelerateMatmulBase &&) = delete;
  TritonAMDGPUAccelerateMatmulBase& operator=(TritonAMDGPUAccelerateMatmulBase &&) = delete;
  ~TritonAMDGPUAccelerateMatmulBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("tritonamdgpu-accelerate-matmul");
  }
  ::llvm::StringRef getArgument() const override { return "tritonamdgpu-accelerate-matmul"; }

  ::llvm::StringRef getDescription() const override { return "accelerate matmul"; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("TritonAMDGPUAccelerateMatmul");
  }
  ::llvm::StringRef getName() const override { return "TritonAMDGPUAccelerateMatmul"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Return the dialect that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    registry.insert<mlir::triton::amdgpu::TritonAMDGPUDialect>();
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TritonAMDGPUAccelerateMatmulBase<DerivedT>)

  TritonAMDGPUAccelerateMatmulBase(TritonAMDGPUAccelerateMatmulOptions options) : TritonAMDGPUAccelerateMatmulBase() {
    archGenerationName = std::move(options.archGenerationName);
    matrixInstructionSize = std::move(options.matrixInstructionSize);
    kPack = std::move(options.kPack);
  }
protected:
  ::mlir::Pass::Option<std::string> archGenerationName{*this, "arch-generation-name", ::llvm::cl::desc("GFX generation name of target device."), ::llvm::cl::init(std::string{})};
  ::mlir::Pass::Option<int32_t> matrixInstructionSize{*this, "matrix-instruction-size", ::llvm::cl::desc("enforce matrix instruction MN size"), ::llvm::cl::init(0)};
  ::mlir::Pass::Option<int32_t> kPack{*this, "kPack", ::llvm::cl::desc("KWidth / kBase"), ::llvm::cl::init(1)};
private:

  friend std::unique_ptr<::mlir::Pass> createTritonAMDGPUAccelerateMatmul() {
    return std::make_unique<DerivedT>();
  }

  friend std::unique_ptr<::mlir::Pass> createTritonAMDGPUAccelerateMatmul(TritonAMDGPUAccelerateMatmulOptions options) {
    return std::make_unique<DerivedT>(std::move(options));
  }
};
} // namespace impl

std::unique_ptr<::mlir::Pass> createTritonAMDGPUAccelerateMatmul() {
  return impl::createTritonAMDGPUAccelerateMatmul();
}

std::unique_ptr<::mlir::Pass> createTritonAMDGPUAccelerateMatmul(TritonAMDGPUAccelerateMatmulOptions options) {
  return impl::createTritonAMDGPUAccelerateMatmul(std::move(options));
}
#undef GEN_PASS_DEF_TRITONAMDGPUACCELERATEMATMUL
#endif // GEN_PASS_DEF_TRITONAMDGPUACCELERATEMATMUL

//===----------------------------------------------------------------------===//
// TritonAMDGPUBlockPingpong
//===----------------------------------------------------------------------===//
#ifdef GEN_PASS_DECL_TRITONAMDGPUBLOCKPINGPONG
struct TritonAMDGPUBlockPingpongOptions {
  int32_t numStages = 2;
};
std::unique_ptr<::mlir::Pass> createTritonAMDGPUBlockPingpong();
std::unique_ptr<::mlir::Pass> createTritonAMDGPUBlockPingpong(TritonAMDGPUBlockPingpongOptions options);
#undef GEN_PASS_DECL_TRITONAMDGPUBLOCKPINGPONG
#endif // GEN_PASS_DECL_TRITONAMDGPUBLOCKPINGPONG
#ifdef GEN_PASS_DEF_TRITONAMDGPUBLOCKPINGPONG

namespace impl {
  std::unique_ptr<::mlir::Pass> createTritonAMDGPUBlockPingpong();
} // namespace impl

namespace impl {
  std::unique_ptr<::mlir::Pass> createTritonAMDGPUBlockPingpong(TritonAMDGPUBlockPingpongOptions options);
} // namespace impl
namespace impl {

template <typename DerivedT>
class TritonAMDGPUBlockPingpongBase : public ::mlir::OperationPass<mlir::ModuleOp> {
public:
  using Base = TritonAMDGPUBlockPingpongBase;

  TritonAMDGPUBlockPingpongBase() : ::mlir::OperationPass<mlir::ModuleOp>(::mlir::TypeID::get<DerivedT>()) {}
  TritonAMDGPUBlockPingpongBase(const TritonAMDGPUBlockPingpongBase &other) : ::mlir::OperationPass<mlir::ModuleOp>(other) {}
  TritonAMDGPUBlockPingpongBase& operator=(const TritonAMDGPUBlockPingpongBase &) = delete;
  TritonAMDGPUBlockPingpongBase(TritonAMDGPUBlockPingpongBase &&) = delete;
  TritonAMDGPUBlockPingpongBase& operator=(TritonAMDGPUBlockPingpongBase &&) = delete;
  ~TritonAMDGPUBlockPingpongBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("tritonamdgpu-block-pingpong");
  }
  ::llvm::StringRef getArgument() const override { return "tritonamdgpu-block-pingpong"; }

  ::llvm::StringRef getDescription() const override { return "Interleaving instructions from two warps on the same SIMD to better utilize matrix core"; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("TritonAMDGPUBlockPingpong");
  }
  ::llvm::StringRef getName() const override { return "TritonAMDGPUBlockPingpong"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Return the dialect that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    registry.insert<mlir::ROCDL::ROCDLDialect, mlir::triton::amdgpu::TritonAMDGPUDialect>();
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TritonAMDGPUBlockPingpongBase<DerivedT>)

  TritonAMDGPUBlockPingpongBase(TritonAMDGPUBlockPingpongOptions options) : TritonAMDGPUBlockPingpongBase() {
    numStages = std::move(options.numStages);
  }
protected:
  ::mlir::Pass::Option<int32_t> numStages{*this, "num-stages", ::llvm::cl::desc("Number of Pipeline stages"), ::llvm::cl::init(2)};
private:

  friend std::unique_ptr<::mlir::Pass> createTritonAMDGPUBlockPingpong() {
    return std::make_unique<DerivedT>();
  }

  friend std::unique_ptr<::mlir::Pass> createTritonAMDGPUBlockPingpong(TritonAMDGPUBlockPingpongOptions options) {
    return std::make_unique<DerivedT>(std::move(options));
  }
};
} // namespace impl

std::unique_ptr<::mlir::Pass> createTritonAMDGPUBlockPingpong() {
  return impl::createTritonAMDGPUBlockPingpong();
}

std::unique_ptr<::mlir::Pass> createTritonAMDGPUBlockPingpong(TritonAMDGPUBlockPingpongOptions options) {
  return impl::createTritonAMDGPUBlockPingpong(std::move(options));
}
#undef GEN_PASS_DEF_TRITONAMDGPUBLOCKPINGPONG
#endif // GEN_PASS_DEF_TRITONAMDGPUBLOCKPINGPONG

//===----------------------------------------------------------------------===//
// TritonAMDGPUCanonicalizePointers
//===----------------------------------------------------------------------===//
#ifdef GEN_PASS_DECL_TRITONAMDGPUCANONICALIZEPOINTERS
std::unique_ptr<::mlir::Pass> createTritonAMDGPUCanonicalizePointers();
#undef GEN_PASS_DECL_TRITONAMDGPUCANONICALIZEPOINTERS
#endif // GEN_PASS_DECL_TRITONAMDGPUCANONICALIZEPOINTERS
#ifdef GEN_PASS_DEF_TRITONAMDGPUCANONICALIZEPOINTERS

namespace impl {
  std::unique_ptr<::mlir::Pass> createTritonAMDGPUCanonicalizePointers();
} // namespace impl
namespace impl {

template <typename DerivedT>
class TritonAMDGPUCanonicalizePointersBase : public ::mlir::OperationPass<mlir::triton::FuncOp> {
public:
  using Base = TritonAMDGPUCanonicalizePointersBase;

  TritonAMDGPUCanonicalizePointersBase() : ::mlir::OperationPass<mlir::triton::FuncOp>(::mlir::TypeID::get<DerivedT>()) {}
  TritonAMDGPUCanonicalizePointersBase(const TritonAMDGPUCanonicalizePointersBase &other) : ::mlir::OperationPass<mlir::triton::FuncOp>(other) {}
  TritonAMDGPUCanonicalizePointersBase& operator=(const TritonAMDGPUCanonicalizePointersBase &) = delete;
  TritonAMDGPUCanonicalizePointersBase(TritonAMDGPUCanonicalizePointersBase &&) = delete;
  TritonAMDGPUCanonicalizePointersBase& operator=(TritonAMDGPUCanonicalizePointersBase &&) = delete;
  ~TritonAMDGPUCanonicalizePointersBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("tritonamdgpu-canonicalize-pointers");
  }
  ::llvm::StringRef getArgument() const override { return "tritonamdgpu-canonicalize-pointers"; }

  ::llvm::StringRef getDescription() const override { return "Canonicalize pointers: rewrite pointers passed to load/store operation as a `<basePtr, offset>` pair."; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("TritonAMDGPUCanonicalizePointers");
  }
  ::llvm::StringRef getName() const override { return "TritonAMDGPUCanonicalizePointers"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Return the dialect that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TritonAMDGPUCanonicalizePointersBase<DerivedT>)

protected:
private:

  friend std::unique_ptr<::mlir::Pass> createTritonAMDGPUCanonicalizePointers() {
    return std::make_unique<DerivedT>();
  }
};
} // namespace impl

std::unique_ptr<::mlir::Pass> createTritonAMDGPUCanonicalizePointers() {
  return impl::createTritonAMDGPUCanonicalizePointers();
}
#undef GEN_PASS_DEF_TRITONAMDGPUCANONICALIZEPOINTERS
#endif // GEN_PASS_DEF_TRITONAMDGPUCANONICALIZEPOINTERS

//===----------------------------------------------------------------------===//
// TritonAMDGPUCoalesceAsyncCopy
//===----------------------------------------------------------------------===//
#ifdef GEN_PASS_DECL_TRITONAMDGPUCOALESCEASYNCCOPY
struct TritonAMDGPUCoalesceAsyncCopyOptions {
  std::string archGenerationName = std::string{};
};
std::unique_ptr<::mlir::Pass> createTritonAMDGPUCoalesceAsyncCopy();
std::unique_ptr<::mlir::Pass> createTritonAMDGPUCoalesceAsyncCopy(TritonAMDGPUCoalesceAsyncCopyOptions options);
#undef GEN_PASS_DECL_TRITONAMDGPUCOALESCEASYNCCOPY
#endif // GEN_PASS_DECL_TRITONAMDGPUCOALESCEASYNCCOPY
#ifdef GEN_PASS_DEF_TRITONAMDGPUCOALESCEASYNCCOPY

namespace impl {
  std::unique_ptr<::mlir::Pass> createTritonAMDGPUCoalesceAsyncCopy();
} // namespace impl

namespace impl {
  std::unique_ptr<::mlir::Pass> createTritonAMDGPUCoalesceAsyncCopy(TritonAMDGPUCoalesceAsyncCopyOptions options);
} // namespace impl
namespace impl {

template <typename DerivedT>
class TritonAMDGPUCoalesceAsyncCopyBase : public ::mlir::OperationPass<mlir::ModuleOp> {
public:
  using Base = TritonAMDGPUCoalesceAsyncCopyBase;

  TritonAMDGPUCoalesceAsyncCopyBase() : ::mlir::OperationPass<mlir::ModuleOp>(::mlir::TypeID::get<DerivedT>()) {}
  TritonAMDGPUCoalesceAsyncCopyBase(const TritonAMDGPUCoalesceAsyncCopyBase &other) : ::mlir::OperationPass<mlir::ModuleOp>(other) {}
  TritonAMDGPUCoalesceAsyncCopyBase& operator=(const TritonAMDGPUCoalesceAsyncCopyBase &) = delete;
  TritonAMDGPUCoalesceAsyncCopyBase(TritonAMDGPUCoalesceAsyncCopyBase &&) = delete;
  TritonAMDGPUCoalesceAsyncCopyBase& operator=(TritonAMDGPUCoalesceAsyncCopyBase &&) = delete;
  ~TritonAMDGPUCoalesceAsyncCopyBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("tritonamdgpu-coalesce-async-copy");
  }
  ::llvm::StringRef getArgument() const override { return "tritonamdgpu-coalesce-async-copy"; }

  ::llvm::StringRef getDescription() const override { return "Improve coalescing for async global to local copies"; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("TritonAMDGPUCoalesceAsyncCopy");
  }
  ::llvm::StringRef getName() const override { return "TritonAMDGPUCoalesceAsyncCopy"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Return the dialect that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TritonAMDGPUCoalesceAsyncCopyBase<DerivedT>)

  TritonAMDGPUCoalesceAsyncCopyBase(TritonAMDGPUCoalesceAsyncCopyOptions options) : TritonAMDGPUCoalesceAsyncCopyBase() {
    archGenerationName = std::move(options.archGenerationName);
  }
protected:
  ::mlir::Pass::Option<std::string> archGenerationName{*this, "arch-generation-name", ::llvm::cl::desc("GFX generation name of target device."), ::llvm::cl::init(std::string{})};
private:

  friend std::unique_ptr<::mlir::Pass> createTritonAMDGPUCoalesceAsyncCopy() {
    return std::make_unique<DerivedT>();
  }

  friend std::unique_ptr<::mlir::Pass> createTritonAMDGPUCoalesceAsyncCopy(TritonAMDGPUCoalesceAsyncCopyOptions options) {
    return std::make_unique<DerivedT>(std::move(options));
  }
};
} // namespace impl

std::unique_ptr<::mlir::Pass> createTritonAMDGPUCoalesceAsyncCopy() {
  return impl::createTritonAMDGPUCoalesceAsyncCopy();
}

std::unique_ptr<::mlir::Pass> createTritonAMDGPUCoalesceAsyncCopy(TritonAMDGPUCoalesceAsyncCopyOptions options) {
  return impl::createTritonAMDGPUCoalesceAsyncCopy(std::move(options));
}
#undef GEN_PASS_DEF_TRITONAMDGPUCOALESCEASYNCCOPY
#endif // GEN_PASS_DEF_TRITONAMDGPUCOALESCEASYNCCOPY

//===----------------------------------------------------------------------===//
// TritonAMDGPUConvertToBufferOps
//===----------------------------------------------------------------------===//
#ifdef GEN_PASS_DECL_TRITONAMDGPUCONVERTTOBUFFEROPS
struct TritonAMDGPUConvertToBufferOpsOptions {
  std::string archGenerationName = std::string{};
  bool allowBufferAtomics = true;
};
std::unique_ptr<::mlir::Pass> createTritonAMDGPUConvertToBufferOps();
std::unique_ptr<::mlir::Pass> createTritonAMDGPUConvertToBufferOps(TritonAMDGPUConvertToBufferOpsOptions options);
#undef GEN_PASS_DECL_TRITONAMDGPUCONVERTTOBUFFEROPS
#endif // GEN_PASS_DECL_TRITONAMDGPUCONVERTTOBUFFEROPS
#ifdef GEN_PASS_DEF_TRITONAMDGPUCONVERTTOBUFFEROPS

namespace impl {
  std::unique_ptr<::mlir::Pass> createTritonAMDGPUConvertToBufferOps();
} // namespace impl

namespace impl {
  std::unique_ptr<::mlir::Pass> createTritonAMDGPUConvertToBufferOps(TritonAMDGPUConvertToBufferOpsOptions options);
} // namespace impl
namespace impl {

template <typename DerivedT>
class TritonAMDGPUConvertToBufferOpsBase : public ::mlir::OperationPass<mlir::ModuleOp> {
public:
  using Base = TritonAMDGPUConvertToBufferOpsBase;

  TritonAMDGPUConvertToBufferOpsBase() : ::mlir::OperationPass<mlir::ModuleOp>(::mlir::TypeID::get<DerivedT>()) {}
  TritonAMDGPUConvertToBufferOpsBase(const TritonAMDGPUConvertToBufferOpsBase &other) : ::mlir::OperationPass<mlir::ModuleOp>(other) {}
  TritonAMDGPUConvertToBufferOpsBase& operator=(const TritonAMDGPUConvertToBufferOpsBase &) = delete;
  TritonAMDGPUConvertToBufferOpsBase(TritonAMDGPUConvertToBufferOpsBase &&) = delete;
  TritonAMDGPUConvertToBufferOpsBase& operator=(TritonAMDGPUConvertToBufferOpsBase &&) = delete;
  ~TritonAMDGPUConvertToBufferOpsBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("tritonamdgpu-convert-buffer-ops");
  }
  ::llvm::StringRef getArgument() const override { return "tritonamdgpu-convert-buffer-ops"; }

  ::llvm::StringRef getDescription() const override { return "Convert memory operations to buffer operations"; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("TritonAMDGPUConvertToBufferOps");
  }
  ::llvm::StringRef getName() const override { return "TritonAMDGPUConvertToBufferOps"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Return the dialect that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    registry.insert<mlir::triton::amdgpu::TritonAMDGPUDialect>();
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TritonAMDGPUConvertToBufferOpsBase<DerivedT>)

  TritonAMDGPUConvertToBufferOpsBase(TritonAMDGPUConvertToBufferOpsOptions options) : TritonAMDGPUConvertToBufferOpsBase() {
    archGenerationName = std::move(options.archGenerationName);
    allowBufferAtomics = std::move(options.allowBufferAtomics);
  }
protected:
  ::mlir::Pass::Option<std::string> archGenerationName{*this, "arch-generation-name", ::llvm::cl::desc("GFX generation name of target device."), ::llvm::cl::init(std::string{})};
  ::mlir::Pass::Option<bool> allowBufferAtomics{*this, "allow-buffer-atomics", ::llvm::cl::desc("Allow buffer atomic operations when the hardware supports it."), ::llvm::cl::init(true)};
private:

  friend std::unique_ptr<::mlir::Pass> createTritonAMDGPUConvertToBufferOps() {
    return std::make_unique<DerivedT>();
  }

  friend std::unique_ptr<::mlir::Pass> createTritonAMDGPUConvertToBufferOps(TritonAMDGPUConvertToBufferOpsOptions options) {
    return std::make_unique<DerivedT>(std::move(options));
  }
};
} // namespace impl

std::unique_ptr<::mlir::Pass> createTritonAMDGPUConvertToBufferOps() {
  return impl::createTritonAMDGPUConvertToBufferOps();
}

std::unique_ptr<::mlir::Pass> createTritonAMDGPUConvertToBufferOps(TritonAMDGPUConvertToBufferOpsOptions options) {
  return impl::createTritonAMDGPUConvertToBufferOps(std::move(options));
}
#undef GEN_PASS_DEF_TRITONAMDGPUCONVERTTOBUFFEROPS
#endif // GEN_PASS_DEF_TRITONAMDGPUCONVERTTOBUFFEROPS

//===----------------------------------------------------------------------===//
// TritonAMDGPUHoistLayoutConversions
//===----------------------------------------------------------------------===//
#ifdef GEN_PASS_DECL_TRITONAMDGPUHOISTLAYOUTCONVERSIONS
std::unique_ptr<::mlir::Pass> createTritonAMDGPUHoistLayoutConversions();
#undef GEN_PASS_DECL_TRITONAMDGPUHOISTLAYOUTCONVERSIONS
#endif // GEN_PASS_DECL_TRITONAMDGPUHOISTLAYOUTCONVERSIONS
#ifdef GEN_PASS_DEF_TRITONAMDGPUHOISTLAYOUTCONVERSIONS

namespace impl {
  std::unique_ptr<::mlir::Pass> createTritonAMDGPUHoistLayoutConversions();
} // namespace impl
namespace impl {

template <typename DerivedT>
class TritonAMDGPUHoistLayoutConversionsBase : public ::mlir::OperationPass<mlir::triton::FuncOp> {
public:
  using Base = TritonAMDGPUHoistLayoutConversionsBase;

  TritonAMDGPUHoistLayoutConversionsBase() : ::mlir::OperationPass<mlir::triton::FuncOp>(::mlir::TypeID::get<DerivedT>()) {}
  TritonAMDGPUHoistLayoutConversionsBase(const TritonAMDGPUHoistLayoutConversionsBase &other) : ::mlir::OperationPass<mlir::triton::FuncOp>(other) {}
  TritonAMDGPUHoistLayoutConversionsBase& operator=(const TritonAMDGPUHoistLayoutConversionsBase &) = delete;
  TritonAMDGPUHoistLayoutConversionsBase(TritonAMDGPUHoistLayoutConversionsBase &&) = delete;
  TritonAMDGPUHoistLayoutConversionsBase& operator=(TritonAMDGPUHoistLayoutConversionsBase &&) = delete;
  ~TritonAMDGPUHoistLayoutConversionsBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("tritonamdgpu-hoist-layout-conversions");
  }
  ::llvm::StringRef getArgument() const override { return "tritonamdgpu-hoist-layout-conversions"; }

  ::llvm::StringRef getDescription() const override { return "Hoist layout conversions out of the loop"; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("TritonAMDGPUHoistLayoutConversions");
  }
  ::llvm::StringRef getName() const override { return "TritonAMDGPUHoistLayoutConversions"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Return the dialect that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TritonAMDGPUHoistLayoutConversionsBase<DerivedT>)

protected:
private:

  friend std::unique_ptr<::mlir::Pass> createTritonAMDGPUHoistLayoutConversions() {
    return std::make_unique<DerivedT>();
  }
};
} // namespace impl

std::unique_ptr<::mlir::Pass> createTritonAMDGPUHoistLayoutConversions() {
  return impl::createTritonAMDGPUHoistLayoutConversions();
}
#undef GEN_PASS_DEF_TRITONAMDGPUHOISTLAYOUTCONVERSIONS
#endif // GEN_PASS_DEF_TRITONAMDGPUHOISTLAYOUTCONVERSIONS

//===----------------------------------------------------------------------===//
// TritonAMDGPUInThreadTranspose
//===----------------------------------------------------------------------===//
#ifdef GEN_PASS_DECL_TRITONAMDGPUINTHREADTRANSPOSE
std::unique_ptr<::mlir::Pass> createTritonAMDGPUInThreadTranspose();
#undef GEN_PASS_DECL_TRITONAMDGPUINTHREADTRANSPOSE
#endif // GEN_PASS_DECL_TRITONAMDGPUINTHREADTRANSPOSE
#ifdef GEN_PASS_DEF_TRITONAMDGPUINTHREADTRANSPOSE

namespace impl {
  std::unique_ptr<::mlir::Pass> createTritonAMDGPUInThreadTranspose();
} // namespace impl
namespace impl {

template <typename DerivedT>
class TritonAMDGPUInThreadTransposeBase : public ::mlir::OperationPass<mlir::triton::FuncOp> {
public:
  using Base = TritonAMDGPUInThreadTransposeBase;

  TritonAMDGPUInThreadTransposeBase() : ::mlir::OperationPass<mlir::triton::FuncOp>(::mlir::TypeID::get<DerivedT>()) {}
  TritonAMDGPUInThreadTransposeBase(const TritonAMDGPUInThreadTransposeBase &other) : ::mlir::OperationPass<mlir::triton::FuncOp>(other) {}
  TritonAMDGPUInThreadTransposeBase& operator=(const TritonAMDGPUInThreadTransposeBase &) = delete;
  TritonAMDGPUInThreadTransposeBase(TritonAMDGPUInThreadTransposeBase &&) = delete;
  TritonAMDGPUInThreadTransposeBase& operator=(TritonAMDGPUInThreadTransposeBase &&) = delete;
  ~TritonAMDGPUInThreadTransposeBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("tritonamdgpu-in-thread-transpose");
  }
  ::llvm::StringRef getArgument() const override { return "tritonamdgpu-in-thread-transpose"; }

  ::llvm::StringRef getDescription() const override { return "Extend global load sizePerThread to 2D shape and perform transpose within registers per thread before writing to shared memory"; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("TritonAMDGPUInThreadTranspose");
  }
  ::llvm::StringRef getName() const override { return "TritonAMDGPUInThreadTranspose"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Return the dialect that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    registry.insert<mlir::triton::amdgpu::TritonAMDGPUDialect>();
    registry.insert<mlir::triton::gpu::TritonGPUDialect>();
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TritonAMDGPUInThreadTransposeBase<DerivedT>)

protected:
private:

  friend std::unique_ptr<::mlir::Pass> createTritonAMDGPUInThreadTranspose() {
    return std::make_unique<DerivedT>();
  }
};
} // namespace impl

std::unique_ptr<::mlir::Pass> createTritonAMDGPUInThreadTranspose() {
  return impl::createTritonAMDGPUInThreadTranspose();
}
#undef GEN_PASS_DEF_TRITONAMDGPUINTHREADTRANSPOSE
#endif // GEN_PASS_DEF_TRITONAMDGPUINTHREADTRANSPOSE

//===----------------------------------------------------------------------===//
// TritonAMDGPUOptimizeEpilogue
//===----------------------------------------------------------------------===//
#ifdef GEN_PASS_DECL_TRITONAMDGPUOPTIMIZEEPILOGUE
std::unique_ptr<::mlir::Pass> createTritonAMDGPUOptimizeEpilogue();
#undef GEN_PASS_DECL_TRITONAMDGPUOPTIMIZEEPILOGUE
#endif // GEN_PASS_DECL_TRITONAMDGPUOPTIMIZEEPILOGUE
#ifdef GEN_PASS_DEF_TRITONAMDGPUOPTIMIZEEPILOGUE

namespace impl {
  std::unique_ptr<::mlir::Pass> createTritonAMDGPUOptimizeEpilogue();
} // namespace impl
namespace impl {

template <typename DerivedT>
class TritonAMDGPUOptimizeEpilogueBase : public ::mlir::OperationPass<mlir::ModuleOp> {
public:
  using Base = TritonAMDGPUOptimizeEpilogueBase;

  TritonAMDGPUOptimizeEpilogueBase() : ::mlir::OperationPass<mlir::ModuleOp>(::mlir::TypeID::get<DerivedT>()) {}
  TritonAMDGPUOptimizeEpilogueBase(const TritonAMDGPUOptimizeEpilogueBase &other) : ::mlir::OperationPass<mlir::ModuleOp>(other) {}
  TritonAMDGPUOptimizeEpilogueBase& operator=(const TritonAMDGPUOptimizeEpilogueBase &) = delete;
  TritonAMDGPUOptimizeEpilogueBase(TritonAMDGPUOptimizeEpilogueBase &&) = delete;
  TritonAMDGPUOptimizeEpilogueBase& operator=(TritonAMDGPUOptimizeEpilogueBase &&) = delete;
  ~TritonAMDGPUOptimizeEpilogueBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("tritonamdgpu-optimize-epilogue");
  }
  ::llvm::StringRef getArgument() const override { return "tritonamdgpu-optimize-epilogue"; }

  ::llvm::StringRef getDescription() const override { return "Optimize epilogue: (1) Store accumulators directly without going thorough SMEM in epilogue."; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("TritonAMDGPUOptimizeEpilogue");
  }
  ::llvm::StringRef getName() const override { return "TritonAMDGPUOptimizeEpilogue"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Return the dialect that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TritonAMDGPUOptimizeEpilogueBase<DerivedT>)

protected:
private:

  friend std::unique_ptr<::mlir::Pass> createTritonAMDGPUOptimizeEpilogue() {
    return std::make_unique<DerivedT>();
  }
};
} // namespace impl

std::unique_ptr<::mlir::Pass> createTritonAMDGPUOptimizeEpilogue() {
  return impl::createTritonAMDGPUOptimizeEpilogue();
}
#undef GEN_PASS_DEF_TRITONAMDGPUOPTIMIZEEPILOGUE
#endif // GEN_PASS_DEF_TRITONAMDGPUOPTIMIZEEPILOGUE

//===----------------------------------------------------------------------===//
// TritonAMDGPUReorderInstructions
//===----------------------------------------------------------------------===//
#ifdef GEN_PASS_DECL_TRITONAMDGPUREORDERINSTRUCTIONS
std::unique_ptr<::mlir::Pass> createTritonAMDGPUReorderInstructions();
#undef GEN_PASS_DECL_TRITONAMDGPUREORDERINSTRUCTIONS
#endif // GEN_PASS_DECL_TRITONAMDGPUREORDERINSTRUCTIONS
#ifdef GEN_PASS_DEF_TRITONAMDGPUREORDERINSTRUCTIONS

namespace impl {
  std::unique_ptr<::mlir::Pass> createTritonAMDGPUReorderInstructions();
} // namespace impl
namespace impl {

template <typename DerivedT>
class TritonAMDGPUReorderInstructionsBase : public ::mlir::OperationPass<mlir::ModuleOp> {
public:
  using Base = TritonAMDGPUReorderInstructionsBase;

  TritonAMDGPUReorderInstructionsBase() : ::mlir::OperationPass<mlir::ModuleOp>(::mlir::TypeID::get<DerivedT>()) {}
  TritonAMDGPUReorderInstructionsBase(const TritonAMDGPUReorderInstructionsBase &other) : ::mlir::OperationPass<mlir::ModuleOp>(other) {}
  TritonAMDGPUReorderInstructionsBase& operator=(const TritonAMDGPUReorderInstructionsBase &) = delete;
  TritonAMDGPUReorderInstructionsBase(TritonAMDGPUReorderInstructionsBase &&) = delete;
  TritonAMDGPUReorderInstructionsBase& operator=(TritonAMDGPUReorderInstructionsBase &&) = delete;
  ~TritonAMDGPUReorderInstructionsBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("tritonamdgpu-reorder-instructions");
  }
  ::llvm::StringRef getArgument() const override { return "tritonamdgpu-reorder-instructions"; }

  ::llvm::StringRef getDescription() const override { return "Reorder instructions"; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("TritonAMDGPUReorderInstructions");
  }
  ::llvm::StringRef getName() const override { return "TritonAMDGPUReorderInstructions"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Return the dialect that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TritonAMDGPUReorderInstructionsBase<DerivedT>)

protected:
private:

  friend std::unique_ptr<::mlir::Pass> createTritonAMDGPUReorderInstructions() {
    return std::make_unique<DerivedT>();
  }
};
} // namespace impl

std::unique_ptr<::mlir::Pass> createTritonAMDGPUReorderInstructions() {
  return impl::createTritonAMDGPUReorderInstructions();
}
#undef GEN_PASS_DEF_TRITONAMDGPUREORDERINSTRUCTIONS
#endif // GEN_PASS_DEF_TRITONAMDGPUREORDERINSTRUCTIONS

//===----------------------------------------------------------------------===//
// TritonAMDGPUStreamPipeline
//===----------------------------------------------------------------------===//
#ifdef GEN_PASS_DECL_TRITONAMDGPUSTREAMPIPELINE
struct TritonAMDGPUStreamPipelineOptions {
  int32_t numStages = 2;
  int32_t globalPrefetch = 0;
  int32_t localPrefetch = 0;
  bool useAsyncCopy = false;
  bool usePingpong = false;
};
std::unique_ptr<::mlir::Pass> createTritonAMDGPUStreamPipeline();
std::unique_ptr<::mlir::Pass> createTritonAMDGPUStreamPipeline(TritonAMDGPUStreamPipelineOptions options);
#undef GEN_PASS_DECL_TRITONAMDGPUSTREAMPIPELINE
#endif // GEN_PASS_DECL_TRITONAMDGPUSTREAMPIPELINE
#ifdef GEN_PASS_DEF_TRITONAMDGPUSTREAMPIPELINE

namespace impl {
  std::unique_ptr<::mlir::Pass> createTritonAMDGPUStreamPipeline();
} // namespace impl

namespace impl {
  std::unique_ptr<::mlir::Pass> createTritonAMDGPUStreamPipeline(TritonAMDGPUStreamPipelineOptions options);
} // namespace impl
namespace impl {

template <typename DerivedT>
class TritonAMDGPUStreamPipelineBase : public ::mlir::OperationPass<mlir::ModuleOp> {
public:
  using Base = TritonAMDGPUStreamPipelineBase;

  TritonAMDGPUStreamPipelineBase() : ::mlir::OperationPass<mlir::ModuleOp>(::mlir::TypeID::get<DerivedT>()) {}
  TritonAMDGPUStreamPipelineBase(const TritonAMDGPUStreamPipelineBase &other) : ::mlir::OperationPass<mlir::ModuleOp>(other) {}
  TritonAMDGPUStreamPipelineBase& operator=(const TritonAMDGPUStreamPipelineBase &) = delete;
  TritonAMDGPUStreamPipelineBase(TritonAMDGPUStreamPipelineBase &&) = delete;
  TritonAMDGPUStreamPipelineBase& operator=(TritonAMDGPUStreamPipelineBase &&) = delete;
  ~TritonAMDGPUStreamPipelineBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("tritonamdgpu-stream-pipeline");
  }
  ::llvm::StringRef getArgument() const override { return "tritonamdgpu-stream-pipeline"; }

  ::llvm::StringRef getDescription() const override { return "pipeline"; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("TritonAMDGPUStreamPipeline");
  }
  ::llvm::StringRef getName() const override { return "TritonAMDGPUStreamPipeline"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Return the dialect that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    registry.insert<mlir::triton::amdgpu::TritonAMDGPUDialect>();
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TritonAMDGPUStreamPipelineBase<DerivedT>)

  TritonAMDGPUStreamPipelineBase(TritonAMDGPUStreamPipelineOptions options) : TritonAMDGPUStreamPipelineBase() {
    numStages = std::move(options.numStages);
    globalPrefetch = std::move(options.globalPrefetch);
    localPrefetch = std::move(options.localPrefetch);
    useAsyncCopy = std::move(options.useAsyncCopy);
    usePingpong = std::move(options.usePingpong);
  }
protected:
  ::mlir::Pass::Option<int32_t> numStages{*this, "num_stages", ::llvm::cl::desc("Number of Pipeline stages"), ::llvm::cl::init(2)};
  ::mlir::Pass::Option<int32_t> globalPrefetch{*this, "global_prefetch", ::llvm::cl::desc("Set global prefetch stage count"), ::llvm::cl::init(0)};
  ::mlir::Pass::Option<int32_t> localPrefetch{*this, "local_prefetch", ::llvm::cl::desc("Set local prefetch stage count"), ::llvm::cl::init(0)};
  ::mlir::Pass::Option<bool> useAsyncCopy{*this, "use_async_copy", ::llvm::cl::desc("Use AsyncCopyGlobalToLocal to directly load to shared memory"), ::llvm::cl::init(false)};
  ::mlir::Pass::Option<bool> usePingpong{*this, "use_pingpong", ::llvm::cl::desc("Use schedules to enable block ping-pong"), ::llvm::cl::init(false)};
private:

  friend std::unique_ptr<::mlir::Pass> createTritonAMDGPUStreamPipeline() {
    return std::make_unique<DerivedT>();
  }

  friend std::unique_ptr<::mlir::Pass> createTritonAMDGPUStreamPipeline(TritonAMDGPUStreamPipelineOptions options) {
    return std::make_unique<DerivedT>(std::move(options));
  }
};
} // namespace impl

std::unique_ptr<::mlir::Pass> createTritonAMDGPUStreamPipeline() {
  return impl::createTritonAMDGPUStreamPipeline();
}

std::unique_ptr<::mlir::Pass> createTritonAMDGPUStreamPipeline(TritonAMDGPUStreamPipelineOptions options) {
  return impl::createTritonAMDGPUStreamPipeline(std::move(options));
}
#undef GEN_PASS_DEF_TRITONAMDGPUSTREAMPIPELINE
#endif // GEN_PASS_DEF_TRITONAMDGPUSTREAMPIPELINE

//===----------------------------------------------------------------------===//
// TritonAMDGPUUpdateAsyncWaitCount
//===----------------------------------------------------------------------===//
#ifdef GEN_PASS_DECL_TRITONAMDGPUUPDATEASYNCWAITCOUNT
struct TritonAMDGPUUpdateAsyncWaitCountOptions {
  std::string archGenerationName = std::string{};
};
std::unique_ptr<::mlir::Pass> createTritonAMDGPUUpdateAsyncWaitCount();
std::unique_ptr<::mlir::Pass> createTritonAMDGPUUpdateAsyncWaitCount(TritonAMDGPUUpdateAsyncWaitCountOptions options);
#undef GEN_PASS_DECL_TRITONAMDGPUUPDATEASYNCWAITCOUNT
#endif // GEN_PASS_DECL_TRITONAMDGPUUPDATEASYNCWAITCOUNT
#ifdef GEN_PASS_DEF_TRITONAMDGPUUPDATEASYNCWAITCOUNT

namespace impl {
  std::unique_ptr<::mlir::Pass> createTritonAMDGPUUpdateAsyncWaitCount();
} // namespace impl

namespace impl {
  std::unique_ptr<::mlir::Pass> createTritonAMDGPUUpdateAsyncWaitCount(TritonAMDGPUUpdateAsyncWaitCountOptions options);
} // namespace impl
namespace impl {

template <typename DerivedT>
class TritonAMDGPUUpdateAsyncWaitCountBase : public ::mlir::OperationPass<mlir::ModuleOp> {
public:
  using Base = TritonAMDGPUUpdateAsyncWaitCountBase;

  TritonAMDGPUUpdateAsyncWaitCountBase() : ::mlir::OperationPass<mlir::ModuleOp>(::mlir::TypeID::get<DerivedT>()) {}
  TritonAMDGPUUpdateAsyncWaitCountBase(const TritonAMDGPUUpdateAsyncWaitCountBase &other) : ::mlir::OperationPass<mlir::ModuleOp>(other) {}
  TritonAMDGPUUpdateAsyncWaitCountBase& operator=(const TritonAMDGPUUpdateAsyncWaitCountBase &) = delete;
  TritonAMDGPUUpdateAsyncWaitCountBase(TritonAMDGPUUpdateAsyncWaitCountBase &&) = delete;
  TritonAMDGPUUpdateAsyncWaitCountBase& operator=(TritonAMDGPUUpdateAsyncWaitCountBase &&) = delete;
  ~TritonAMDGPUUpdateAsyncWaitCountBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("tritonamdgpu-update-async-wait-count");
  }
  ::llvm::StringRef getArgument() const override { return "tritonamdgpu-update-async-wait-count"; }

  ::llvm::StringRef getDescription() const override { return "Adjust async wait count to allow prefetching over multiple loop iterations"; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("TritonAMDGPUUpdateAsyncWaitCount");
  }
  ::llvm::StringRef getName() const override { return "TritonAMDGPUUpdateAsyncWaitCount"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Return the dialect that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TritonAMDGPUUpdateAsyncWaitCountBase<DerivedT>)

  TritonAMDGPUUpdateAsyncWaitCountBase(TritonAMDGPUUpdateAsyncWaitCountOptions options) : TritonAMDGPUUpdateAsyncWaitCountBase() {
    archGenerationName = std::move(options.archGenerationName);
  }
protected:
  ::mlir::Pass::Option<std::string> archGenerationName{*this, "arch-generation-name", ::llvm::cl::desc("GFX generation name of target device."), ::llvm::cl::init(std::string{})};
private:

  friend std::unique_ptr<::mlir::Pass> createTritonAMDGPUUpdateAsyncWaitCount() {
    return std::make_unique<DerivedT>();
  }

  friend std::unique_ptr<::mlir::Pass> createTritonAMDGPUUpdateAsyncWaitCount(TritonAMDGPUUpdateAsyncWaitCountOptions options) {
    return std::make_unique<DerivedT>(std::move(options));
  }
};
} // namespace impl

std::unique_ptr<::mlir::Pass> createTritonAMDGPUUpdateAsyncWaitCount() {
  return impl::createTritonAMDGPUUpdateAsyncWaitCount();
}

std::unique_ptr<::mlir::Pass> createTritonAMDGPUUpdateAsyncWaitCount(TritonAMDGPUUpdateAsyncWaitCountOptions options) {
  return impl::createTritonAMDGPUUpdateAsyncWaitCount(std::move(options));
}
#undef GEN_PASS_DEF_TRITONAMDGPUUPDATEASYNCWAITCOUNT
#endif // GEN_PASS_DEF_TRITONAMDGPUUPDATEASYNCWAITCOUNT
#ifdef GEN_PASS_REGISTRATION

//===----------------------------------------------------------------------===//
// TritonAMDFoldTrueCmpI Registration
//===----------------------------------------------------------------------===//

inline void registerTritonAMDFoldTrueCmpI() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createTritonAMDFoldTrueCmpI();
  });
}

// Old registration code, kept for temporary backwards compatibility.
inline void registerTritonAMDFoldTrueCmpIPass() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createTritonAMDFoldTrueCmpI();
  });
}

//===----------------------------------------------------------------------===//
// TritonAMDGPUAccelerateMatmul Registration
//===----------------------------------------------------------------------===//

inline void registerTritonAMDGPUAccelerateMatmul() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createTritonAMDGPUAccelerateMatmul();
  });
}

// Old registration code, kept for temporary backwards compatibility.
inline void registerTritonAMDGPUAccelerateMatmulPass() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createTritonAMDGPUAccelerateMatmul();
  });
}

//===----------------------------------------------------------------------===//
// TritonAMDGPUBlockPingpong Registration
//===----------------------------------------------------------------------===//

inline void registerTritonAMDGPUBlockPingpong() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createTritonAMDGPUBlockPingpong();
  });
}

// Old registration code, kept for temporary backwards compatibility.
inline void registerTritonAMDGPUBlockPingpongPass() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createTritonAMDGPUBlockPingpong();
  });
}

//===----------------------------------------------------------------------===//
// TritonAMDGPUCanonicalizePointers Registration
//===----------------------------------------------------------------------===//

inline void registerTritonAMDGPUCanonicalizePointers() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createTritonAMDGPUCanonicalizePointers();
  });
}

// Old registration code, kept for temporary backwards compatibility.
inline void registerTritonAMDGPUCanonicalizePointersPass() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createTritonAMDGPUCanonicalizePointers();
  });
}

//===----------------------------------------------------------------------===//
// TritonAMDGPUCoalesceAsyncCopy Registration
//===----------------------------------------------------------------------===//

inline void registerTritonAMDGPUCoalesceAsyncCopy() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createTritonAMDGPUCoalesceAsyncCopy();
  });
}

// Old registration code, kept for temporary backwards compatibility.
inline void registerTritonAMDGPUCoalesceAsyncCopyPass() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createTritonAMDGPUCoalesceAsyncCopy();
  });
}

//===----------------------------------------------------------------------===//
// TritonAMDGPUConvertToBufferOps Registration
//===----------------------------------------------------------------------===//

inline void registerTritonAMDGPUConvertToBufferOps() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createTritonAMDGPUConvertToBufferOps();
  });
}

// Old registration code, kept for temporary backwards compatibility.
inline void registerTritonAMDGPUConvertToBufferOpsPass() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createTritonAMDGPUConvertToBufferOps();
  });
}

//===----------------------------------------------------------------------===//
// TritonAMDGPUHoistLayoutConversions Registration
//===----------------------------------------------------------------------===//

inline void registerTritonAMDGPUHoistLayoutConversions() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createTritonAMDGPUHoistLayoutConversions();
  });
}

// Old registration code, kept for temporary backwards compatibility.
inline void registerTritonAMDGPUHoistLayoutConversionsPass() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createTritonAMDGPUHoistLayoutConversions();
  });
}

//===----------------------------------------------------------------------===//
// TritonAMDGPUInThreadTranspose Registration
//===----------------------------------------------------------------------===//

inline void registerTritonAMDGPUInThreadTranspose() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createTritonAMDGPUInThreadTranspose();
  });
}

// Old registration code, kept for temporary backwards compatibility.
inline void registerTritonAMDGPUInThreadTransposePass() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createTritonAMDGPUInThreadTranspose();
  });
}

//===----------------------------------------------------------------------===//
// TritonAMDGPUOptimizeEpilogue Registration
//===----------------------------------------------------------------------===//

inline void registerTritonAMDGPUOptimizeEpilogue() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createTritonAMDGPUOptimizeEpilogue();
  });
}

// Old registration code, kept for temporary backwards compatibility.
inline void registerTritonAMDGPUOptimizeEpiloguePass() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createTritonAMDGPUOptimizeEpilogue();
  });
}

//===----------------------------------------------------------------------===//
// TritonAMDGPUReorderInstructions Registration
//===----------------------------------------------------------------------===//

inline void registerTritonAMDGPUReorderInstructions() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createTritonAMDGPUReorderInstructions();
  });
}

// Old registration code, kept for temporary backwards compatibility.
inline void registerTritonAMDGPUReorderInstructionsPass() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createTritonAMDGPUReorderInstructions();
  });
}

//===----------------------------------------------------------------------===//
// TritonAMDGPUStreamPipeline Registration
//===----------------------------------------------------------------------===//

inline void registerTritonAMDGPUStreamPipeline() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createTritonAMDGPUStreamPipeline();
  });
}

// Old registration code, kept for temporary backwards compatibility.
inline void registerTritonAMDGPUStreamPipelinePass() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createTritonAMDGPUStreamPipeline();
  });
}

//===----------------------------------------------------------------------===//
// TritonAMDGPUUpdateAsyncWaitCount Registration
//===----------------------------------------------------------------------===//

inline void registerTritonAMDGPUUpdateAsyncWaitCount() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createTritonAMDGPUUpdateAsyncWaitCount();
  });
}

// Old registration code, kept for temporary backwards compatibility.
inline void registerTritonAMDGPUUpdateAsyncWaitCountPass() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createTritonAMDGPUUpdateAsyncWaitCount();
  });
}

//===----------------------------------------------------------------------===//
// TritonAMDGPU Registration
//===----------------------------------------------------------------------===//

inline void registerTritonAMDGPUPasses() {
  registerTritonAMDFoldTrueCmpI();
  registerTritonAMDGPUAccelerateMatmul();
  registerTritonAMDGPUBlockPingpong();
  registerTritonAMDGPUCanonicalizePointers();
  registerTritonAMDGPUCoalesceAsyncCopy();
  registerTritonAMDGPUConvertToBufferOps();
  registerTritonAMDGPUHoistLayoutConversions();
  registerTritonAMDGPUInThreadTranspose();
  registerTritonAMDGPUOptimizeEpilogue();
  registerTritonAMDGPUReorderInstructions();
  registerTritonAMDGPUStreamPipeline();
  registerTritonAMDGPUUpdateAsyncWaitCount();
}
#undef GEN_PASS_REGISTRATION
#endif // GEN_PASS_REGISTRATION
// Deprecated. Please use the new per-pass macros.
#ifdef GEN_PASS_CLASSES

template <typename DerivedT>
class TritonAMDFoldTrueCmpIBase : public ::mlir::OperationPass<mlir::ModuleOp> {
public:
  using Base = TritonAMDFoldTrueCmpIBase;

  TritonAMDFoldTrueCmpIBase() : ::mlir::OperationPass<mlir::ModuleOp>(::mlir::TypeID::get<DerivedT>()) {}
  TritonAMDFoldTrueCmpIBase(const TritonAMDFoldTrueCmpIBase &other) : ::mlir::OperationPass<mlir::ModuleOp>(other) {}
  TritonAMDFoldTrueCmpIBase& operator=(const TritonAMDFoldTrueCmpIBase &) = delete;
  TritonAMDFoldTrueCmpIBase(TritonAMDFoldTrueCmpIBase &&) = delete;
  TritonAMDFoldTrueCmpIBase& operator=(TritonAMDFoldTrueCmpIBase &&) = delete;
  ~TritonAMDFoldTrueCmpIBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("tritonamdgpu-fold-true-cmpi");
  }
  ::llvm::StringRef getArgument() const override { return "tritonamdgpu-fold-true-cmpi"; }

  ::llvm::StringRef getDescription() const override { return "Fold true arith.cmpi to %true"; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("TritonAMDFoldTrueCmpI");
  }
  ::llvm::StringRef getName() const override { return "TritonAMDFoldTrueCmpI"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Register the dialects that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TritonAMDFoldTrueCmpIBase<DerivedT>)

protected:
};

template <typename DerivedT>
class TritonAMDGPUAccelerateMatmulBase : public ::mlir::OperationPass<mlir::ModuleOp> {
public:
  using Base = TritonAMDGPUAccelerateMatmulBase;

  TritonAMDGPUAccelerateMatmulBase() : ::mlir::OperationPass<mlir::ModuleOp>(::mlir::TypeID::get<DerivedT>()) {}
  TritonAMDGPUAccelerateMatmulBase(const TritonAMDGPUAccelerateMatmulBase &other) : ::mlir::OperationPass<mlir::ModuleOp>(other) {}
  TritonAMDGPUAccelerateMatmulBase& operator=(const TritonAMDGPUAccelerateMatmulBase &) = delete;
  TritonAMDGPUAccelerateMatmulBase(TritonAMDGPUAccelerateMatmulBase &&) = delete;
  TritonAMDGPUAccelerateMatmulBase& operator=(TritonAMDGPUAccelerateMatmulBase &&) = delete;
  ~TritonAMDGPUAccelerateMatmulBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("tritonamdgpu-accelerate-matmul");
  }
  ::llvm::StringRef getArgument() const override { return "tritonamdgpu-accelerate-matmul"; }

  ::llvm::StringRef getDescription() const override { return "accelerate matmul"; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("TritonAMDGPUAccelerateMatmul");
  }
  ::llvm::StringRef getName() const override { return "TritonAMDGPUAccelerateMatmul"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Register the dialects that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    registry.insert<mlir::triton::amdgpu::TritonAMDGPUDialect>();
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TritonAMDGPUAccelerateMatmulBase<DerivedT>)

protected:
  ::mlir::Pass::Option<std::string> archGenerationName{*this, "arch-generation-name", ::llvm::cl::desc("GFX generation name of target device."), ::llvm::cl::init(std::string{})};
  ::mlir::Pass::Option<int32_t> matrixInstructionSize{*this, "matrix-instruction-size", ::llvm::cl::desc("enforce matrix instruction MN size"), ::llvm::cl::init(0)};
  ::mlir::Pass::Option<int32_t> kPack{*this, "kPack", ::llvm::cl::desc("KWidth / kBase"), ::llvm::cl::init(1)};
};

template <typename DerivedT>
class TritonAMDGPUBlockPingpongBase : public ::mlir::OperationPass<mlir::ModuleOp> {
public:
  using Base = TritonAMDGPUBlockPingpongBase;

  TritonAMDGPUBlockPingpongBase() : ::mlir::OperationPass<mlir::ModuleOp>(::mlir::TypeID::get<DerivedT>()) {}
  TritonAMDGPUBlockPingpongBase(const TritonAMDGPUBlockPingpongBase &other) : ::mlir::OperationPass<mlir::ModuleOp>(other) {}
  TritonAMDGPUBlockPingpongBase& operator=(const TritonAMDGPUBlockPingpongBase &) = delete;
  TritonAMDGPUBlockPingpongBase(TritonAMDGPUBlockPingpongBase &&) = delete;
  TritonAMDGPUBlockPingpongBase& operator=(TritonAMDGPUBlockPingpongBase &&) = delete;
  ~TritonAMDGPUBlockPingpongBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("tritonamdgpu-block-pingpong");
  }
  ::llvm::StringRef getArgument() const override { return "tritonamdgpu-block-pingpong"; }

  ::llvm::StringRef getDescription() const override { return "Interleaving instructions from two warps on the same SIMD to better utilize matrix core"; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("TritonAMDGPUBlockPingpong");
  }
  ::llvm::StringRef getName() const override { return "TritonAMDGPUBlockPingpong"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Register the dialects that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    registry.insert<mlir::ROCDL::ROCDLDialect, mlir::triton::amdgpu::TritonAMDGPUDialect>();
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TritonAMDGPUBlockPingpongBase<DerivedT>)

protected:
  ::mlir::Pass::Option<int32_t> numStages{*this, "num-stages", ::llvm::cl::desc("Number of Pipeline stages"), ::llvm::cl::init(2)};
};

template <typename DerivedT>
class TritonAMDGPUCanonicalizePointersBase : public ::mlir::OperationPass<mlir::triton::FuncOp> {
public:
  using Base = TritonAMDGPUCanonicalizePointersBase;

  TritonAMDGPUCanonicalizePointersBase() : ::mlir::OperationPass<mlir::triton::FuncOp>(::mlir::TypeID::get<DerivedT>()) {}
  TritonAMDGPUCanonicalizePointersBase(const TritonAMDGPUCanonicalizePointersBase &other) : ::mlir::OperationPass<mlir::triton::FuncOp>(other) {}
  TritonAMDGPUCanonicalizePointersBase& operator=(const TritonAMDGPUCanonicalizePointersBase &) = delete;
  TritonAMDGPUCanonicalizePointersBase(TritonAMDGPUCanonicalizePointersBase &&) = delete;
  TritonAMDGPUCanonicalizePointersBase& operator=(TritonAMDGPUCanonicalizePointersBase &&) = delete;
  ~TritonAMDGPUCanonicalizePointersBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("tritonamdgpu-canonicalize-pointers");
  }
  ::llvm::StringRef getArgument() const override { return "tritonamdgpu-canonicalize-pointers"; }

  ::llvm::StringRef getDescription() const override { return "Canonicalize pointers: rewrite pointers passed to load/store operation as a `<basePtr, offset>` pair."; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("TritonAMDGPUCanonicalizePointers");
  }
  ::llvm::StringRef getName() const override { return "TritonAMDGPUCanonicalizePointers"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Register the dialects that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TritonAMDGPUCanonicalizePointersBase<DerivedT>)

protected:
};

template <typename DerivedT>
class TritonAMDGPUCoalesceAsyncCopyBase : public ::mlir::OperationPass<mlir::ModuleOp> {
public:
  using Base = TritonAMDGPUCoalesceAsyncCopyBase;

  TritonAMDGPUCoalesceAsyncCopyBase() : ::mlir::OperationPass<mlir::ModuleOp>(::mlir::TypeID::get<DerivedT>()) {}
  TritonAMDGPUCoalesceAsyncCopyBase(const TritonAMDGPUCoalesceAsyncCopyBase &other) : ::mlir::OperationPass<mlir::ModuleOp>(other) {}
  TritonAMDGPUCoalesceAsyncCopyBase& operator=(const TritonAMDGPUCoalesceAsyncCopyBase &) = delete;
  TritonAMDGPUCoalesceAsyncCopyBase(TritonAMDGPUCoalesceAsyncCopyBase &&) = delete;
  TritonAMDGPUCoalesceAsyncCopyBase& operator=(TritonAMDGPUCoalesceAsyncCopyBase &&) = delete;
  ~TritonAMDGPUCoalesceAsyncCopyBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("tritonamdgpu-coalesce-async-copy");
  }
  ::llvm::StringRef getArgument() const override { return "tritonamdgpu-coalesce-async-copy"; }

  ::llvm::StringRef getDescription() const override { return "Improve coalescing for async global to local copies"; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("TritonAMDGPUCoalesceAsyncCopy");
  }
  ::llvm::StringRef getName() const override { return "TritonAMDGPUCoalesceAsyncCopy"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Register the dialects that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TritonAMDGPUCoalesceAsyncCopyBase<DerivedT>)

protected:
  ::mlir::Pass::Option<std::string> archGenerationName{*this, "arch-generation-name", ::llvm::cl::desc("GFX generation name of target device."), ::llvm::cl::init(std::string{})};
};

template <typename DerivedT>
class TritonAMDGPUConvertToBufferOpsBase : public ::mlir::OperationPass<mlir::ModuleOp> {
public:
  using Base = TritonAMDGPUConvertToBufferOpsBase;

  TritonAMDGPUConvertToBufferOpsBase() : ::mlir::OperationPass<mlir::ModuleOp>(::mlir::TypeID::get<DerivedT>()) {}
  TritonAMDGPUConvertToBufferOpsBase(const TritonAMDGPUConvertToBufferOpsBase &other) : ::mlir::OperationPass<mlir::ModuleOp>(other) {}
  TritonAMDGPUConvertToBufferOpsBase& operator=(const TritonAMDGPUConvertToBufferOpsBase &) = delete;
  TritonAMDGPUConvertToBufferOpsBase(TritonAMDGPUConvertToBufferOpsBase &&) = delete;
  TritonAMDGPUConvertToBufferOpsBase& operator=(TritonAMDGPUConvertToBufferOpsBase &&) = delete;
  ~TritonAMDGPUConvertToBufferOpsBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("tritonamdgpu-convert-buffer-ops");
  }
  ::llvm::StringRef getArgument() const override { return "tritonamdgpu-convert-buffer-ops"; }

  ::llvm::StringRef getDescription() const override { return "Convert memory operations to buffer operations"; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("TritonAMDGPUConvertToBufferOps");
  }
  ::llvm::StringRef getName() const override { return "TritonAMDGPUConvertToBufferOps"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Register the dialects that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    registry.insert<mlir::triton::amdgpu::TritonAMDGPUDialect>();
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TritonAMDGPUConvertToBufferOpsBase<DerivedT>)

protected:
  ::mlir::Pass::Option<std::string> archGenerationName{*this, "arch-generation-name", ::llvm::cl::desc("GFX generation name of target device."), ::llvm::cl::init(std::string{})};
  ::mlir::Pass::Option<bool> allowBufferAtomics{*this, "allow-buffer-atomics", ::llvm::cl::desc("Allow buffer atomic operations when the hardware supports it."), ::llvm::cl::init(true)};
};

template <typename DerivedT>
class TritonAMDGPUHoistLayoutConversionsBase : public ::mlir::OperationPass<mlir::triton::FuncOp> {
public:
  using Base = TritonAMDGPUHoistLayoutConversionsBase;

  TritonAMDGPUHoistLayoutConversionsBase() : ::mlir::OperationPass<mlir::triton::FuncOp>(::mlir::TypeID::get<DerivedT>()) {}
  TritonAMDGPUHoistLayoutConversionsBase(const TritonAMDGPUHoistLayoutConversionsBase &other) : ::mlir::OperationPass<mlir::triton::FuncOp>(other) {}
  TritonAMDGPUHoistLayoutConversionsBase& operator=(const TritonAMDGPUHoistLayoutConversionsBase &) = delete;
  TritonAMDGPUHoistLayoutConversionsBase(TritonAMDGPUHoistLayoutConversionsBase &&) = delete;
  TritonAMDGPUHoistLayoutConversionsBase& operator=(TritonAMDGPUHoistLayoutConversionsBase &&) = delete;
  ~TritonAMDGPUHoistLayoutConversionsBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("tritonamdgpu-hoist-layout-conversions");
  }
  ::llvm::StringRef getArgument() const override { return "tritonamdgpu-hoist-layout-conversions"; }

  ::llvm::StringRef getDescription() const override { return "Hoist layout conversions out of the loop"; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("TritonAMDGPUHoistLayoutConversions");
  }
  ::llvm::StringRef getName() const override { return "TritonAMDGPUHoistLayoutConversions"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Register the dialects that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TritonAMDGPUHoistLayoutConversionsBase<DerivedT>)

protected:
};

template <typename DerivedT>
class TritonAMDGPUInThreadTransposeBase : public ::mlir::OperationPass<mlir::triton::FuncOp> {
public:
  using Base = TritonAMDGPUInThreadTransposeBase;

  TritonAMDGPUInThreadTransposeBase() : ::mlir::OperationPass<mlir::triton::FuncOp>(::mlir::TypeID::get<DerivedT>()) {}
  TritonAMDGPUInThreadTransposeBase(const TritonAMDGPUInThreadTransposeBase &other) : ::mlir::OperationPass<mlir::triton::FuncOp>(other) {}
  TritonAMDGPUInThreadTransposeBase& operator=(const TritonAMDGPUInThreadTransposeBase &) = delete;
  TritonAMDGPUInThreadTransposeBase(TritonAMDGPUInThreadTransposeBase &&) = delete;
  TritonAMDGPUInThreadTransposeBase& operator=(TritonAMDGPUInThreadTransposeBase &&) = delete;
  ~TritonAMDGPUInThreadTransposeBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("tritonamdgpu-in-thread-transpose");
  }
  ::llvm::StringRef getArgument() const override { return "tritonamdgpu-in-thread-transpose"; }

  ::llvm::StringRef getDescription() const override { return "Extend global load sizePerThread to 2D shape and perform transpose within registers per thread before writing to shared memory"; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("TritonAMDGPUInThreadTranspose");
  }
  ::llvm::StringRef getName() const override { return "TritonAMDGPUInThreadTranspose"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Register the dialects that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    registry.insert<mlir::triton::amdgpu::TritonAMDGPUDialect>();
    registry.insert<mlir::triton::gpu::TritonGPUDialect>();
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TritonAMDGPUInThreadTransposeBase<DerivedT>)

protected:
};

template <typename DerivedT>
class TritonAMDGPUOptimizeEpilogueBase : public ::mlir::OperationPass<mlir::ModuleOp> {
public:
  using Base = TritonAMDGPUOptimizeEpilogueBase;

  TritonAMDGPUOptimizeEpilogueBase() : ::mlir::OperationPass<mlir::ModuleOp>(::mlir::TypeID::get<DerivedT>()) {}
  TritonAMDGPUOptimizeEpilogueBase(const TritonAMDGPUOptimizeEpilogueBase &other) : ::mlir::OperationPass<mlir::ModuleOp>(other) {}
  TritonAMDGPUOptimizeEpilogueBase& operator=(const TritonAMDGPUOptimizeEpilogueBase &) = delete;
  TritonAMDGPUOptimizeEpilogueBase(TritonAMDGPUOptimizeEpilogueBase &&) = delete;
  TritonAMDGPUOptimizeEpilogueBase& operator=(TritonAMDGPUOptimizeEpilogueBase &&) = delete;
  ~TritonAMDGPUOptimizeEpilogueBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("tritonamdgpu-optimize-epilogue");
  }
  ::llvm::StringRef getArgument() const override { return "tritonamdgpu-optimize-epilogue"; }

  ::llvm::StringRef getDescription() const override { return "Optimize epilogue: (1) Store accumulators directly without going thorough SMEM in epilogue."; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("TritonAMDGPUOptimizeEpilogue");
  }
  ::llvm::StringRef getName() const override { return "TritonAMDGPUOptimizeEpilogue"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Register the dialects that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TritonAMDGPUOptimizeEpilogueBase<DerivedT>)

protected:
};

template <typename DerivedT>
class TritonAMDGPUReorderInstructionsBase : public ::mlir::OperationPass<mlir::ModuleOp> {
public:
  using Base = TritonAMDGPUReorderInstructionsBase;

  TritonAMDGPUReorderInstructionsBase() : ::mlir::OperationPass<mlir::ModuleOp>(::mlir::TypeID::get<DerivedT>()) {}
  TritonAMDGPUReorderInstructionsBase(const TritonAMDGPUReorderInstructionsBase &other) : ::mlir::OperationPass<mlir::ModuleOp>(other) {}
  TritonAMDGPUReorderInstructionsBase& operator=(const TritonAMDGPUReorderInstructionsBase &) = delete;
  TritonAMDGPUReorderInstructionsBase(TritonAMDGPUReorderInstructionsBase &&) = delete;
  TritonAMDGPUReorderInstructionsBase& operator=(TritonAMDGPUReorderInstructionsBase &&) = delete;
  ~TritonAMDGPUReorderInstructionsBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("tritonamdgpu-reorder-instructions");
  }
  ::llvm::StringRef getArgument() const override { return "tritonamdgpu-reorder-instructions"; }

  ::llvm::StringRef getDescription() const override { return "Reorder instructions"; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("TritonAMDGPUReorderInstructions");
  }
  ::llvm::StringRef getName() const override { return "TritonAMDGPUReorderInstructions"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Register the dialects that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TritonAMDGPUReorderInstructionsBase<DerivedT>)

protected:
};

template <typename DerivedT>
class TritonAMDGPUStreamPipelineBase : public ::mlir::OperationPass<mlir::ModuleOp> {
public:
  using Base = TritonAMDGPUStreamPipelineBase;

  TritonAMDGPUStreamPipelineBase() : ::mlir::OperationPass<mlir::ModuleOp>(::mlir::TypeID::get<DerivedT>()) {}
  TritonAMDGPUStreamPipelineBase(const TritonAMDGPUStreamPipelineBase &other) : ::mlir::OperationPass<mlir::ModuleOp>(other) {}
  TritonAMDGPUStreamPipelineBase& operator=(const TritonAMDGPUStreamPipelineBase &) = delete;
  TritonAMDGPUStreamPipelineBase(TritonAMDGPUStreamPipelineBase &&) = delete;
  TritonAMDGPUStreamPipelineBase& operator=(TritonAMDGPUStreamPipelineBase &&) = delete;
  ~TritonAMDGPUStreamPipelineBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("tritonamdgpu-stream-pipeline");
  }
  ::llvm::StringRef getArgument() const override { return "tritonamdgpu-stream-pipeline"; }

  ::llvm::StringRef getDescription() const override { return "pipeline"; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("TritonAMDGPUStreamPipeline");
  }
  ::llvm::StringRef getName() const override { return "TritonAMDGPUStreamPipeline"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Register the dialects that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    registry.insert<mlir::triton::amdgpu::TritonAMDGPUDialect>();
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TritonAMDGPUStreamPipelineBase<DerivedT>)

protected:
  ::mlir::Pass::Option<int32_t> numStages{*this, "num_stages", ::llvm::cl::desc("Number of Pipeline stages"), ::llvm::cl::init(2)};
  ::mlir::Pass::Option<int32_t> globalPrefetch{*this, "global_prefetch", ::llvm::cl::desc("Set global prefetch stage count"), ::llvm::cl::init(0)};
  ::mlir::Pass::Option<int32_t> localPrefetch{*this, "local_prefetch", ::llvm::cl::desc("Set local prefetch stage count"), ::llvm::cl::init(0)};
  ::mlir::Pass::Option<bool> useAsyncCopy{*this, "use_async_copy", ::llvm::cl::desc("Use AsyncCopyGlobalToLocal to directly load to shared memory"), ::llvm::cl::init(false)};
  ::mlir::Pass::Option<bool> usePingpong{*this, "use_pingpong", ::llvm::cl::desc("Use schedules to enable block ping-pong"), ::llvm::cl::init(false)};
};

template <typename DerivedT>
class TritonAMDGPUUpdateAsyncWaitCountBase : public ::mlir::OperationPass<mlir::ModuleOp> {
public:
  using Base = TritonAMDGPUUpdateAsyncWaitCountBase;

  TritonAMDGPUUpdateAsyncWaitCountBase() : ::mlir::OperationPass<mlir::ModuleOp>(::mlir::TypeID::get<DerivedT>()) {}
  TritonAMDGPUUpdateAsyncWaitCountBase(const TritonAMDGPUUpdateAsyncWaitCountBase &other) : ::mlir::OperationPass<mlir::ModuleOp>(other) {}
  TritonAMDGPUUpdateAsyncWaitCountBase& operator=(const TritonAMDGPUUpdateAsyncWaitCountBase &) = delete;
  TritonAMDGPUUpdateAsyncWaitCountBase(TritonAMDGPUUpdateAsyncWaitCountBase &&) = delete;
  TritonAMDGPUUpdateAsyncWaitCountBase& operator=(TritonAMDGPUUpdateAsyncWaitCountBase &&) = delete;
  ~TritonAMDGPUUpdateAsyncWaitCountBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("tritonamdgpu-update-async-wait-count");
  }
  ::llvm::StringRef getArgument() const override { return "tritonamdgpu-update-async-wait-count"; }

  ::llvm::StringRef getDescription() const override { return "Adjust async wait count to allow prefetching over multiple loop iterations"; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("TritonAMDGPUUpdateAsyncWaitCount");
  }
  ::llvm::StringRef getName() const override { return "TritonAMDGPUUpdateAsyncWaitCount"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Register the dialects that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TritonAMDGPUUpdateAsyncWaitCountBase<DerivedT>)

protected:
  ::mlir::Pass::Option<std::string> archGenerationName{*this, "arch-generation-name", ::llvm::cl::desc("GFX generation name of target device."), ::llvm::cl::init(std::string{})};
};
#undef GEN_PASS_CLASSES
#endif // GEN_PASS_CLASSES
