Skip to content

Batchnorm training mode support in a minimal build #17103

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 11, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 5 additions & 9 deletions onnxruntime/core/providers/cpu/nn/batch_norm.h
Original file line number Diff line number Diff line change
@@ -29,10 +29,6 @@

namespace onnxruntime {

#if !defined(ORT_MINIMAL_BUILD)
#define BATCHNORM_INCLUDE_TRAINING_SUPPORT
#endif

template <typename T>
class BatchNorm : public OpKernel {
public:
@@ -51,7 +47,7 @@ class BatchNorm : public OpKernel {
}

if (is_train_) {
#if defined(BATCHNORM_INCLUDE_TRAINING_SUPPORT)
#ifdef ENABLE_TRAINING_OPS
momentum_ = op_kernel_info.GetAttrOrDefault<float>("momentum", 0.9f);
ORT_ENFORCE(is_spatial_, "Training mode only supports spatial BN");
#else
@@ -88,7 +84,7 @@ class BatchNorm : public OpKernel {
// calculate sample_size (including all channels)
size_t sample_size_incl_all_channels = sample_size * C;

#if defined(BATCHNORM_INCLUDE_TRAINING_SUPPORT)
#ifdef ENABLE_TRAINING_OPS
AllocatorPtr alloc;
ORT_RETURN_IF_ERROR(p_op_kernel_context->GetTempSpaceAllocator(&alloc));

@@ -115,7 +111,7 @@ class BatchNorm : public OpKernel {
ConstEigenVectorArrayMap<T> scale_arr(scale->Data<T>(), is_spatial_ ? C : sample_size_incl_all_channels);
ConstEigenVectorArrayMap<T> bias_arr(B->Data<T>(), is_spatial_ ? C : sample_size_incl_all_channels);

#if defined(BATCHNORM_INCLUDE_TRAINING_SUPPORT)
#ifdef ENABLE_TRAINING_OPS
// Note that we only support spatial BN for training
if (is_train_) {
EigenVectorArrayMap<T> saved_mean_arr(saved_mean->MutableData<T>(), C);
@@ -166,7 +162,7 @@ class BatchNorm : public OpKernel {
ConstEigenVectorArrayMap<T> var_arr(var->Data<T>(), is_spatial_ ? C : sample_size_incl_all_channels);
inv_std = (var_arr + epsilon_).sqrt().inverse();
} else {
#if defined(BATCHNORM_INCLUDE_TRAINING_SUPPORT)
#ifdef ENABLE_TRAINING_OPS
EigenVectorArrayMap<T> saved_inv_std_arr(saved_inv_std->MutableData<T>(), C);
saved_inv_std_arr = (saved_inv_std_arr + epsilon_).inverse().sqrt();
inv_std = saved_inv_std_arr;
@@ -175,7 +171,7 @@ class BatchNorm : public OpKernel {

// If we're training, do batch normalization based on computation from this batch
ConstEigenVectorArrayMap<T> mean_arr(
#if defined(BATCHNORM_INCLUDE_TRAINING_SUPPORT)
#ifdef ENABLE_TRAINING_OPS
!is_train_ ? mean->Data<T>() : saved_mean->Data<T>(),
#else
mean->Data<T>(),
5 changes: 2 additions & 3 deletions onnxruntime/test/providers/cpu/nn/batch_norm_op_test.cc
Original file line number Diff line number Diff line change
@@ -2,7 +2,6 @@
// Licensed under the MIT License.

#include "core/framework/tensor.h"
#include "core/providers/cpu/nn/batch_norm.h" // for BATCHNORM_INCLUDE_TRAINING_SUPPORT
#include "core/session/inference_session.h"
#include "test/common/dnnl_op_test_utils.h"
#include "test/providers/provider_test_utils.h"
@@ -847,7 +846,7 @@ TEST(BatchNormTest, BatchNorm2d_bfloat16) {
#endif // USE_DNNL

// TODO fix flaky test for CUDA
#ifdef BATCHNORM_INCLUDE_TRAINING_SUPPORT
#ifdef ENABLE_TRAINING_OPS
TEST(BatchNormTest, ForwardTrainingTestWithSavedOutputsOpset9) {
// TODO: Unskip when fixed #41968513
if (DefaultDmlExecutionProvider().get() != nullptr) {
@@ -937,7 +936,7 @@ TEST(BatchNormTest, ForwardTrainingTestOpset15) {
{kCudaExecutionProvider, kRocmExecutionProvider,
kTensorrtExecutionProvider, kOpenVINOExecutionProvider, kDnnlExecutionProvider});
}
#endif // BATCHNORM_INCLUDE_TRAINING_SUPPORT
#endif // ENABLE_TRAINING_OPS

} // namespace test
} // namespace onnxruntime