Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Allow embed mode disabled with all models in buffers
  • Loading branch information
adrianlizarraga committed Apr 18, 2025
commit 6224f727debd173c144be62febd5d0b3f0887d12
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,13 @@ Status CreateEPContextNodes(Model* model,
} else {
context_bin_path = context_model_path;
}

if (context_bin_path.empty()) {
// Context bin path is empty, so just use the graph name (e.g., "QNNExecutionProvider_QNN_13728744673520368385_2_0").
// This happens if both the input model and output model are stored in buffers (i.e., there are no paths).
context_bin_path = ToPathString(graph_name);
}

context_bin_path = context_bin_path + ToPathString("_qnn.bin");
context_cache_name = std::filesystem::path(context_bin_path).filename().string();

Expand Down
30 changes: 16 additions & 14 deletions onnxruntime/core/session/model_compilation_options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -137,11 +137,7 @@ Status ModelCompilationOptions::ResetOutputModelSettings() {
return session_options_.value.config_options.AddConfigEntry(kOrtSessionOptionEpContextFilePath, "");
}

Status ModelCompilationOptions::Check() const {
ORT_ENFORCE(session_options_.value.ep_context_gen_options.enable);
const EpContextModelGenerationOptions& ep_context_gen_options = session_options_.value.ep_context_gen_options;
const bool explicit_writes_to_file = !ep_context_gen_options.output_model_file_path.empty();
const bool writes_to_buffer = ep_context_gen_options.output_model_buffer_ptr != nullptr;
Status ModelCompilationOptions::CheckInputModelSettings() const {
const bool comes_from_file = !input_model_path_.empty();
const bool comes_from_memory = input_model_data_ != nullptr;

Expand All @@ -164,15 +160,14 @@ Status ModelCompilationOptions::Check() const {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Buffer for input model data has size 0");
}

if (comes_from_memory && writes_to_buffer && !ep_context_gen_options.embed_ep_context_in_model) {
// TODO(adrianlizarraga): We may want to support this in the future. That is, both input/output models
// are in buffers but the context cache binary is dumped to a file. Would need to allow user to specify
// a custom path for the context cache binary.
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"EPContext embed mode must be true (enabled) when both the "
"input and output models are stored in buffers. "
"Please call ModelCompilationOptions_SetEpContextEmbedMode(true).");
}
return Status::OK();
}

Status ModelCompilationOptions::CheckOutputModelSettings() const {
const EpContextModelGenerationOptions& ep_context_gen_options = session_options_.value.ep_context_gen_options;

const bool explicit_writes_to_file = !ep_context_gen_options.output_model_file_path.empty();
const bool writes_to_buffer = ep_context_gen_options.output_model_buffer_ptr != nullptr;

if (!explicit_writes_to_file && !writes_to_buffer) {
// User did not specify an output file or an output buffer. We default to generating an output file
Expand All @@ -197,5 +192,12 @@ Status ModelCompilationOptions::Check() const {

return Status::OK();
}

Status ModelCompilationOptions::Check() const {
ORT_ENFORCE(session_options_.value.ep_context_gen_options.enable);
ORT_RETURN_IF_ERROR(CheckInputModelSettings());
ORT_RETURN_IF_ERROR(CheckOutputModelSettings());
return Status::OK();
}
} // namespace onnxruntime
#endif // !defined(ORT_MINIMAL_BUILD)
2 changes: 2 additions & 0 deletions onnxruntime/core/session/model_compilation_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ class ModelCompilationOptions {
private:
void ResetInputModelSettings();
Status ResetOutputModelSettings();
Status CheckInputModelSettings() const;
Status CheckOutputModelSettings() const;

const OrtEnv& env_;
OrtSessionOptions session_options_;
Expand Down
65 changes: 46 additions & 19 deletions onnxruntime/test/providers/qnn/qnn_ep_context_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -385,40 +385,67 @@ TEST_F(QnnHTPBackendTests, CompileApi_FromSessionOptions_OutputModelBuffer) {
// Test using the CompileModel() API with settings:
// - input model from buffer
// - save output model to buffer
// - EPContext nodes in output model use embedded binary blobs.
TEST_F(QnnHTPBackendTests, CompileApi_FromSessionOptions_InputAndOutputModelsInBuffers_Embedded) {
// - test enabling AND disabling embed mode for context binary in EPContext node attributes
TEST_F(QnnHTPBackendTests, CompileApi_FromSessionOptions_InputAndOutputModelsInBuffers) {
// Create a test model and serialize it to a buffer.
TestModel test_model;
CreateTestModel(BuildGraphWithQAndNonQ(false), 21, logging::Severity::kERROR, test_model);
std::string model_data = test_model.Serialize();

// Initialize session options with QNN EP
Ort::SessionOptions so;
Ort::SessionOptions session_options;
ProviderOptions provider_options;
provider_options["backend_type"] = "htp";
provider_options["offload_graph_io_quantization"] = "0";
so.AppendExecutionProvider("QNN", provider_options);
session_options.AppendExecutionProvider("QNN", provider_options);

Ort::AllocatorWithDefaultOptions allocator;
void* output_model_buffer = nullptr;
size_t output_model_buffer_size = 0;

// Create model compilation options from the session options.
Ort::ModelCompilationOptions compile_options(*ort_env, so);
compile_options.SetInputModelFromBuffer(reinterpret_cast<const void*>(model_data.data()), model_data.size());
compile_options.SetOutputModelBuffer(allocator, &output_model_buffer, &output_model_buffer_size);
compile_options.SetEpContextEmbedMode(true);
// Test embed mode enabled.
{
void* output_model_buffer = nullptr;
size_t output_model_buffer_size = 0;

// Compile the model.
Ort::Status status = Ort::CompileModel(*ort_env, compile_options);
ASSERT_TRUE(status.IsOK()) << status.GetErrorMessage();
// Create model compilation options from the session options.
Ort::ModelCompilationOptions compile_options(*ort_env, session_options);
compile_options.SetInputModelFromBuffer(reinterpret_cast<const void*>(model_data.data()), model_data.size());
compile_options.SetOutputModelBuffer(allocator, &output_model_buffer, &output_model_buffer_size);
compile_options.SetEpContextEmbedMode(true);

// Make sure the compiled model was saved to the buffer.
ASSERT_TRUE(output_model_buffer != nullptr);
ASSERT_TRUE(output_model_buffer_size > 0);
// Compile the model.
Ort::Status status = Ort::CompileModel(*ort_env, compile_options);
ASSERT_TRUE(status.IsOK()) << status.GetErrorMessage();

// Check that the compiled model has the expected number of EPContext nodes.
CheckEpContextNodeCounts(output_model_buffer, output_model_buffer_size, 2, 2);
// Make sure the compiled model was saved to the buffer.
ASSERT_TRUE(output_model_buffer != nullptr);
ASSERT_TRUE(output_model_buffer_size > 0);

// Check that the compiled model has the expected number of EPContext nodes.
CheckEpContextNodeCounts(output_model_buffer, output_model_buffer_size, 2, 2);
}

// Test embed mode disabled.
{
void* output_model_buffer = nullptr;
size_t output_model_buffer_size = 0;

// Create model compilation options from the session options.
Ort::ModelCompilationOptions compile_options(*ort_env, session_options);
compile_options.SetInputModelFromBuffer(reinterpret_cast<const void*>(model_data.data()), model_data.size());
compile_options.SetOutputModelBuffer(allocator, &output_model_buffer, &output_model_buffer_size);
compile_options.SetEpContextEmbedMode(false);

// Compile the model.
Ort::Status status = Ort::CompileModel(*ort_env, compile_options);
ASSERT_TRUE(status.IsOK()) << status.GetErrorMessage();

// Make sure the compiled model was saved to the buffer.
ASSERT_TRUE(output_model_buffer != nullptr);
ASSERT_TRUE(output_model_buffer_size > 0);

// Check that the compiled model has the expected number of EPContext nodes.
CheckEpContextNodeCounts(output_model_buffer, output_model_buffer_size, 2, 2);
}
}

// Test using the CompileModel() API with settings:
Expand Down
Loading