Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 39d60b7

Browse files
authoredApr 16, 2025··
Clean up Compile API (#24436)
### Description Address additional review comments on #24207: - Remove use of `#ifdef ORT_MINIMAL_BUILD` in public C/C++ API headers for Compile API - Use `AllocatorPtr` internally to ensure memory is properly released if an exception is thrown while serializing the output model to the user's buffer. - Improve C API function documentation. - Clean up internal `ModelCompilationOptions` class ### Motivation and Context Useful review comments were left on the original PR after merge. This addresses those comments.
1 parent cf99ca9 commit 39d60b7

9 files changed

+376
-129
lines changed
 

‎include/onnxruntime/core/session/onnxruntime_c_api.h

Lines changed: 45 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4926,6 +4926,19 @@ struct OrtApi {
49264926
ORT_API2_STATUS(SessionOptionsSetLoadCancellationFlag, _Inout_ OrtSessionOptions* options,
49274927
_In_ bool cancel);
49284928

4929+
/** \brief Get the Compile API instance.
4930+
*
4931+
* Get the Compile API instance to compile ONNX models. Execution providers that support compilation fuse a subgraph
4932+
* into an EPContext node that wraps a provider-specific binary representation of the subgraph.
4933+
* For more details about the EPContext design, refer to:
4934+
* \htmlonly
4935+
* <a href="https://onnxruntime.ai/docs/execution-providers/EP-Context-Design.html">EPContext design document.</a>
4936+
* \endhtmlonly
4937+
*
4938+
* \return Compile API struct instance.
4939+
*
4940+
* \since Version 1.22.
4941+
*/
49294942
const OrtCompileApi*(ORT_API_CALL* GetCompileApi)();
49304943
};
49314944

@@ -5448,20 +5461,25 @@ struct OrtModelEditorApi {
54485461
*
54495462
* Execution providers that support compilation fuse a subgraph into an EPContext node that wraps a provider-specific
54505463
* binary representation of the subgraph.
5451-
* More details relate to EPContext design refers to:
5464+
* For more details about the EPContext design, refer to:
54525465
* \htmlonly
54535466
* <a href="https://onnxruntime.ai/docs/execution-providers/EP-Context-Design.html">EPContext design document.</a>
54545467
* \endhtmlonly
54555468
*
5469+
* Example (error handling not shown):
5470+
* OrtStatus* status = NULL;
5471+
* OrtCompileApi* compile_api = ort_api->GetCompileApi();
5472+
* OrtModelCompilationOptions* compile_options = NULL;
5473+
*
5474+
* status = compile_api->CreateModelCompilationOptionsFromSessionOptions(env, session_options, &compile_options);
5475+
* status = compile_api->ModelCompilationOptions_SetInputModelPath(compile_options, ORT_TSTR("model.onnx"));
5476+
* status = compile_api->ModelCompilationOptions_SetOutputModelPath(compile_options, ORT_TSTR("model.compiled.onnx"));
5477+
* status = compile_api->CompileModel(env, compile_options);
5478+
* compile_api->ReleaseModelCompilationOptions(compile_options);
5479+
*
54565480
* \since Version 1.22.
54575481
*/
54585482
struct OrtCompileApi {
5459-
// Model compilation requires a full build. We return nullptr from GetCompileApi if this is a minimal
5460-
// build, so it doesn't matter if there are no function pointers in this struct as a user will never get an
5461-
// OrtCompileApi instance. We do however need a dummy field to avoid empty struct warning.
5462-
#if defined(ORT_MINIMAL_BUILD)
5463-
const bool not_defined_in_this_build;
5464-
#else
54655483
/// @}
54665484
/// \name OrtModelCompilationOptions
54675485
/// @{
@@ -5486,6 +5504,9 @@ struct OrtCompileApi {
54865504
_In_ const OrtSessionOptions* session_options, _Outptr_ OrtModelCompilationOptions** out);
54875505

54885506
/** \brief Sets the file path to the input ONNX model to compile.
5507+
*
5508+
* The input model's location (e.g., file path or memory buffer) must be set with either
5509+
* ModelCompilationOptions_SetInputModelPath or ModelCompilationOptions_SetInputModelFromBuffer.
54895510
*
54905511
* \param[in] model_compile_options The OrtModelCompilationOptions instance.
54915512
* \param[in] input_model_path Null terminated string of the path (wchar on Windows, char otherwise).
@@ -5498,6 +5519,9 @@ struct OrtCompileApi {
54985519
_In_ const ORTCHAR_T* input_model_path);
54995520

55005521
/** \brief Sets the buffer that stores the bytes of the loaded ONNX model to compile.
5522+
*
5523+
* The input model's location (e.g., file path or memory buffer) must be set with either
5524+
* ModelCompilationOptions_SetInputModelPath or ModelCompilationOptions_SetInputModelFromBuffer.
55015525
*
55025526
* \param[in] model_compile_options The OrtModelCompilationOptions instance.
55035527
* \param[in] input_model_data Buffer containing the loaded ONNX model bytes.
@@ -5514,9 +5538,11 @@ struct OrtCompileApi {
55145538

55155539
/** \brief Sets the file path for the output ONNX model generated by CompileModel.
55165540
*
5517-
* If the output model path is not specified and the output model is not to be stored in a buffer,
5518-
* ONNX Runtime will generate a path based on the input model's file path.
5519-
* Examples:
5541+
* The output model's location (e.g., file path or memory buffer) can be set with either
5542+
* ModelCompilationOptions_SetOutputModelPath or ModelCompilationOptions_SetOutputModelBuffer.
5543+
*
5544+
* If the output model's location is not set, ONNX Runtime will generate an output file with a path based on
5545+
* the input model's file path. Examples:
55205546
* /Path/my_model.onnx -> /Path/my_model_ctx.onnx
55215547
* /Path/my_model -> /Path/my_model_ctx.onnx
55225548
*
@@ -5554,10 +5580,18 @@ struct OrtCompileApi {
55545580
*
55555581
* The caller passes an OrtAllocator that ONNX Runtime uses to allocate memory for the buffer.
55565582
*
5583+
* The output model's location (e.g., file path or memory buffer) can be set with either
5584+
* ModelCompilationOptions_SetOutputModelPath or ModelCompilationOptions_SetOutputModelBuffer.
5585+
*
5586+
* If the output model's location is not set, ONNX Runtime will generate an output file with a path based on
5587+
* the input model's file path. Examples:
5588+
* /Path/my_model.onnx -> /Path/my_model_ctx.onnx
5589+
* /Path/my_model -> /Path/my_model_ctx.onnx
5590+
*
55575591
* \param[in] model_compile_options The OrtModelCompilationOptions instance.
55585592
* \param[in] allocator The allocator used to allocate the buffer for the compiled model.
55595593
* \param[out] output_model_buffer_ptr Pointer to the buffer that stores the compiled model.
5560-
* \param[out] output_model_buffer_size_ptr Pointer set to the size of output buffer in bytes.
5594+
* \param[out] output_model_buffer_size_ptr Pointer set to the size of output model in bytes.
55615595
*
55625596
* \snippet{doc} snippets.dox OrtStatus Return Value
55635597
*
@@ -5604,7 +5638,6 @@ struct OrtCompileApi {
56045638
* \since Version 1.22.
56055639
*/
56065640
ORT_API2_STATUS(CompileModel, _In_ const OrtEnv* env, _In_ const OrtModelCompilationOptions* model_options);
5607-
#endif
56085641
};
56095642
/*
56105643
* This is the old way to add the CUDA provider to the session, please use SessionOptionsAppendExecutionProvider_CUDA above to access the latest functionality

‎include/onnxruntime/core/session/onnxruntime_cxx_api.h

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -559,11 +559,10 @@ ORT_DEFINE_RELEASE(ValueInfo);
559559
ORT_DEFINE_RELEASE(Node);
560560
ORT_DEFINE_RELEASE(Graph);
561561
ORT_DEFINE_RELEASE(Model);
562-
#if !defined(ORT_MINIMAL_BUILD)
563562
ORT_DEFINE_RELEASE_FROM_API_STRUCT(ModelCompilationOptions, GetCompileApi);
564-
#endif // !defined(ORT_MINIMAL_BUILD)
565563

566564
#undef ORT_DEFINE_RELEASE
565+
#undef ORT_DEFINE_RELEASE_FROM_API_STRUCT
567566

568567
/** \brief This is a tagging template type. Use it with Base<T> to indicate that the C++ interface object
569568
* has no ownership of the underlying C object.
@@ -1012,7 +1011,6 @@ struct SessionOptions : detail::SessionOptionsImpl<OrtSessionOptions> {
10121011
ConstSessionOptions GetConst() const { return ConstSessionOptions{this->p_}; }
10131012
};
10141013

1015-
#if !defined(ORT_MINIMAL_BUILD)
10161014
/** \brief Options object used when compiling a model.
10171015
*
10181016
* Wraps ::OrtModelCompilationOptions object and methods
@@ -1021,9 +1019,7 @@ struct ModelCompilationOptions : detail::Base<OrtModelCompilationOptions> {
10211019
using Base = detail::Base<OrtModelCompilationOptions>;
10221020
using Base::Base;
10231021

1024-
explicit ModelCompilationOptions(std::nullptr_t) {} ///< Create an empty ModelCompilationOptions object, must be assigned a valid one to be used.
1025-
explicit ModelCompilationOptions(OrtModelCompilationOptions* p) ///< Takes ownership of an OrtModelCompilationOptions
1026-
: detail::Base<OrtModelCompilationOptions>{p} {}
1022+
explicit ModelCompilationOptions(std::nullptr_t) {} ///< Create an empty ModelCompilationOptions object, must be assigned a valid one to be used.
10271023

10281024
ModelCompilationOptions(const Env& env, const SessionOptions& session_options); ///< Wraps OrtApi::CreateModelCompilationOptionsFromSessionOptions
10291025
ModelCompilationOptions(const Env& env, ConstSessionOptions session_options); ///< Wraps OrtApi::CreateModelCompilationOptionsFromSessionOptions
@@ -1046,7 +1042,6 @@ struct ModelCompilationOptions : detail::Base<OrtModelCompilationOptions> {
10461042
* \return A Status indicating success or failure.
10471043
*/
10481044
Status CompileModel(const Env& env, const ModelCompilationOptions& model_compilation_options);
1049-
#endif // !defined(ORT_MINIMAL_BUILD)
10501045

10511046
/** \brief Wrapper around ::OrtModelMetadata
10521047
*

‎include/onnxruntime/core/session/onnxruntime_cxx_inline.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -630,7 +630,6 @@ inline RunOptions& RunOptions::AddActiveLoraAdapter(const LoraAdapter& adapter)
630630
return *this;
631631
}
632632

633-
#if !defined(ORT_MINIMAL_BUILD)
634633
inline ModelCompilationOptions::ModelCompilationOptions(const Env& env, const SessionOptions& session_options) {
635634
ThrowOnError(GetCompileApi().CreateModelCompilationOptionsFromSessionOptions(env, session_options, &this->p_));
636635
}
@@ -686,7 +685,6 @@ inline ModelCompilationOptions& ModelCompilationOptions::SetEpContextEmbedMode(
686685
embed_ep_context_in_model));
687686
return *this;
688687
}
689-
#endif // !defined(ORT_MINIMAL_BUILD)
690688

691689
namespace detail {
692690

‎onnxruntime/core/framework/graph_partitioner.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -877,12 +877,12 @@ static Status CreateEpContextModel(const ExecutionProviders& execution_providers
877877
ORT_RETURN_IF(buffer_size > static_cast<size_t>(std::numeric_limits<int>::max()),
878878
"Cannot serialize ONNX ModelProto larger than 2GB");
879879

880-
OrtAllocator* allocator = ep_context_gen_options.output_model_buffer_allocator;
881-
void* buffer = allocator->Alloc(allocator, buffer_size);
882-
model_proto.SerializeToArray(buffer, static_cast<int>(buffer_size));
880+
AllocatorPtr allocator = ep_context_gen_options.output_model_buffer_allocator;
881+
IAllocatorUniquePtr<void> buffer = IAllocator::MakeUniquePtr<void>(allocator, buffer_size);
882+
model_proto.SerializeToArray(buffer.get(), static_cast<int>(buffer_size));
883883

884884
*ep_context_gen_options.output_model_buffer_size_ptr = buffer_size;
885-
*ep_context_gen_options.output_model_buffer_ptr = buffer;
885+
*ep_context_gen_options.output_model_buffer_ptr = buffer.release();
886886
} else {
887887
ORT_RETURN_IF_ERROR(Model::SaveWithExternalInitializers(ep_context_model, context_cache_path,
888888
external_ini_path, model_saving_options));

‎onnxruntime/core/framework/session_options.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <functional>
1212
#include <gsl/gsl>
1313
#include "core/common/inlined_containers.h"
14+
#include "core/framework/allocator.h"
1415
#include "core/framework/config_options.h"
1516
#include "core/framework/ort_value.h"
1617
#include "core/session/onnxruntime_c_api.h"
@@ -83,7 +84,7 @@ struct EpContextModelGenerationOptions {
8384
std::string output_model_file_path;
8485
void** output_model_buffer_ptr = nullptr;
8586
size_t* output_model_buffer_size_ptr = nullptr;
86-
OrtAllocator* output_model_buffer_allocator = nullptr;
87+
AllocatorPtr output_model_buffer_allocator = nullptr;
8788

8889
std::string output_external_initializers_file_path;
8990
size_t output_external_initializer_size_threshold = 0;

0 commit comments

Comments
 (0)
Please sign in to comment.