/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include "bf16bf16bf16_grouped_wgrad_common.cuh"

namespace fbgemm_gpu {

at::Tensor bf16bf16bf16_grouped_wgrad_128_128_128_1_2_1_9_t(
    at::Tensor X, // BF16
    at::Tensor W, // BF16
    at::Tensor M_sizes,
    at::Tensor output,
    bool output_accum,
    int sm_count) {
  if (output_accum) {
    return bf16bf16bf16_grouped_wgrad_impl<128, 128, 128, 1, 2, 1, true, true>(
        X, W, M_sizes, output, sm_count);
  } else {
    return bf16bf16bf16_grouped_wgrad_impl<128, 128, 128, 1, 2, 1, false, true>(
        X, W, M_sizes, output, sm_count);
  }
}

} // namespace fbgemm_gpu
