Skip to content

Commit ce75ea8

Browse files
committedApr 15, 2025·
[QNN EP] Enable QnnGpu backend in QNN EP.
Description Enable the GPU backend also for the onnxruntime QNN EP. Motivation and Context Why is this change required? What problem does it solve? It allows QNN EP to run on the GPU backend also. With this change many models can now run fully on QNN EP GPU backend, like resnet_50, google_vit_base_fp32, squeezenet1.0-7 etc. Also the onnxruntime node tests and versioned operator tests pass numbers for the GPU is comparable to the HTP now. Note: Currently QNN_LOG_LEVEL_DEBUG need to be enabled to run correctly.
1 parent cda0d14 commit ce75ea8

File tree

9 files changed

+50
-11
lines changed

9 files changed

+50
-11
lines changed
 

‎onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ Status ConvOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
8787
}
8888

8989
ONNX_NAMESPACE::DataType input_data_type = input_0.node_arg.Type();
90-
bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType());
91-
ORT_RETURN_IF(!is_npu_backend && input_data_type != ONNX_NAMESPACE::Utils::DataTypeUtils::ToType("float"),
90+
bool is_cpu_backend = IsCpuBackend(qnn_model_wrapper.GetQnnBackendType());
91+
ORT_RETURN_IF(is_cpu_backend && input_data_type != ONNX_NAMESPACE::Utils::DataTypeUtils::ToType("float"),
9292
"QNN EP: Data type ", input_data_type->c_str(),
9393
" is not supported for Conv operator in CPU backend.");
9494

@@ -112,6 +112,7 @@ Status ConvOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
112112
}
113113

114114
// Validate that weight is signed type for per-channel quantization (required by QNN docs).
115+
bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType());
115116
if (is_npu_backend) {
116117
const auto& input_1 = inputs[1]; // weight
117118
bool is_per_axis_quant = false;

‎onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,8 @@ Status ResizeOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
222222
ORT_RETURN_IF_NOT(input_shape[0] == output_shape[0] && input_shape[1] == output_shape[1],
223223
"QNN EP: Resize may only change the spatial dimensions.");
224224

225-
if (!is_npu_backend) {
225+
const bool is_cpu_backend = IsCpuBackend(qnn_model_wrapper.GetQnnBackendType());
226+
if (is_cpu_backend) {
226227
ONNX_NAMESPACE::DataType input_data_type = input_0.node_arg.Type();
227228
ORT_RETURN_IF(input_data_type != ONNX_NAMESPACE::Utils::DataTypeUtils::ToType("float"),
228229
"QNN EP: Data type ", input_data_type->c_str(),

‎onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <string>
99
#include "QnnOpDef.h"
1010
#include "CPU/QnnCpuCommon.h"
11+
#include "GPU/QnnGpuCommon.h"
1112
// TODO: not exist for Windows yet
1213
// #include "GPU/QnnGpuCommon.h"
1314
#include "DSP/QnnDspCommon.h"
@@ -171,10 +172,9 @@ void QnnBackendManager::SetQnnBackendType(uint32_t backend_id) {
171172
case QNN_BACKEND_ID_CPU:
172173
qnn_backend_type_ = QnnBackendType::CPU;
173174
break;
174-
// TODO: update once it's ready for Widows
175-
// case QNN_BACKEND_ID_GPU:
176-
// qnn_backend_type_ = QnnBackendType::GPU;
177-
// break;
175+
case QNN_BACKEND_ID_GPU:
176+
qnn_backend_type_ = QnnBackendType::GPU;
177+
break;
178178
case QNN_BACKEND_ID_DSP:
179179
qnn_backend_type_ = QnnBackendType::DSP;
180180
break;
@@ -617,16 +617,31 @@ Status QnnBackendManager::CreateContext(bool enable_htp_weight_sharing) {
617617

618618
QnnContext_Config_t context_priority_config = QNN_CONTEXT_CONFIG_INIT;
619619
ORT_RETURN_IF_ERROR(SetQnnContextConfig(context_priority_, context_priority_config));
620+
620621
const QnnContext_Config_t* npu_context_configs[] = {&context_priority_config,
621622
&context_config_weight_sharing,
622623
nullptr};
623624
const QnnContext_Config_t* empty_context_configs[] = {nullptr};
624-
bool is_npu_backend = IsNpuBackend(GetQnnBackendType());
625+
626+
const QnnContext_Config_t** configs = nullptr;
627+
switch (GetQnnBackendType()) {
628+
case QnnBackendType::HTP:
629+
case QnnBackendType::DSP:
630+
configs = npu_context_configs;
631+
break;
632+
case QnnBackendType::GPU:
633+
// Currently only this works with QnnGpu.
634+
configs = nullptr;
635+
break;
636+
default:
637+
configs = empty_context_configs;
638+
break;
639+
}
625640

626641
Qnn_ContextHandle_t context = nullptr;
627642
Qnn_ErrorHandle_t result = qnn_interface_.contextCreate(backend_handle_,
628643
device_handle_,
629-
is_npu_backend ? npu_context_configs : empty_context_configs,
644+
configs,
630645
&context);
631646

632647
ORT_RETURN_IF(QNN_CONTEXT_NO_ERROR != result, "Failed to create context. Error: ", QnnErrorHandleToString(result));

‎onnxruntime/core/providers/qnn/builder/qnn_def.cc

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -598,5 +598,18 @@ bool IsNpuBackend(QnnBackendType backend_type) {
598598
return backend_type == QnnBackendType::HTP || backend_type == QnnBackendType::DSP;
599599
}
600600

601+
bool IsGpuBackend(QnnBackendType backend_type) {
602+
return backend_type == QnnBackendType::GPU;
603+
}
604+
605+
bool IsCpuBackend(QnnBackendType backend_type) {
606+
return backend_type == QnnBackendType::CPU;
607+
}
608+
609+
// Is it Qualcomm hardware ?
610+
bool IsQpuBackend(QnnBackendType backend_type) {
611+
return IsNpuBackend(backend_type) || IsGpuBackend(backend_type);
612+
}
613+
601614
} // namespace qnn
602615
} // namespace onnxruntime

‎onnxruntime/core/providers/qnn/builder/qnn_def.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,14 @@ enum class QnnBackendType : uint8_t {
7373
HTP_FP16
7474
};
7575

76+
bool IsCpuBackend(QnnBackendType backend_type);
77+
7678
bool IsNpuBackend(QnnBackendType backend_type);
7779

80+
bool IsGpuBackend(QnnBackendType backend_type);
81+
82+
bool IsQpuBackend(QnnBackendType backend_type);
83+
7884
// constexpr config values
7985
constexpr const int kSleepMinLatency = 40;
8086
constexpr const int kSleepLowLatency = 100;

‎onnxruntime/core/providers/qnn/qnn_execution_provider.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -789,8 +789,8 @@ QNNExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_viewer
789789
return result;
790790
}
791791

792-
if ((context_cache_enabled_ || is_qnn_ctx_model) && !IsNpuBackend(qnn_backend_manager_->GetQnnBackendType())) {
793-
LOGS(logger, ERROR) << "Qnn context cache only works for HTP or DSP backend.";
792+
if ((context_cache_enabled_ || is_qnn_ctx_model) && !IsQpuBackend(qnn_backend_manager_->GetQnnBackendType())) {
793+
LOGS(logger, ERROR) << "Qnn context cache only works for HTP/DSP/GPU backend.";
794794
return result;
795795
}
796796

‎setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,7 @@ def finalize_options(self):
400400
# QNN V68/V73 dependencies
401401
qnn_deps = [
402402
"QnnCpu.dll",
403+
"QnnGpu.dll",
403404
"QnnHtp.dll",
404405
"QnnSaver.dll",
405406
"QnnSystem.dll",

‎tools/ci_build/github/azure-pipelines/templates/c-api-artifacts-package-and-publish-steps-windows.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ steps:
6666
copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\libQnnHtp*.so $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib /Y
6767
copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\libqnnhtp*.cat $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib /Y
6868
copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnCpu.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib
69+
copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnGpu.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib
6970
copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnHtp.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib
7071
copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnHtpPrepare.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib
7172
copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnHtpV68Stub.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib

‎tools/nuget/generate_nuspec_for_native_nuget.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,7 @@ def generate_files(line_list, args):
552552

553553
if is_qnn_package:
554554
files_list.append("<file src=" + '"' + os.path.join(args.native_build_path, "QnnCpu.dll") + runtimes + " />")
555+
files_list.append("<file src=" + '"' + os.path.join(args.native_build_path, "QnnGpu.dll") + runtimes + " />")
555556
files_list.append("<file src=" + '"' + os.path.join(args.native_build_path, "QnnHtp.dll") + runtimes + " />")
556557
files_list.append("<file src=" + '"' + os.path.join(args.native_build_path, "QnnSaver.dll") + runtimes + " />")
557558
if args.target_architecture != "x64":

0 commit comments

Comments
 (0)
Please sign in to comment.