Skip to content

Clean up Compile API #24436

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 16, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 45 additions & 12 deletions include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
@@ -4926,6 +4926,19 @@ struct OrtApi {
ORT_API2_STATUS(SessionOptionsSetLoadCancellationFlag, _Inout_ OrtSessionOptions* options,
_In_ bool cancel);

/** \brief Get the Compile API instance.
*
* Get the Compile API instance to compile ONNX models. Execution providers that support compilation fuse a subgraph
* into an EPContext node that wraps a provider-specific binary representation of the subgraph.
* For more details about the EPContext design, refer to:
* \htmlonly
* <a href="https://onnxruntime.ai/docs/execution-providers/EP-Context-Design.html">EPContext design document.</a>
* \endhtmlonly
*
* \return Compile API struct instance.
*
* \since Version 1.22.
*/
const OrtCompileApi*(ORT_API_CALL* GetCompileApi)();
};

@@ -5448,20 +5461,25 @@ struct OrtModelEditorApi {
*
* Execution providers that support compilation fuse a subgraph into an EPContext node that wraps a provider-specific
* binary representation of the subgraph.
* More details relate to EPContext design refers to:
* For more details about the EPContext design, refer to:
* \htmlonly
* <a href="https://onnxruntime.ai/docs/execution-providers/EP-Context-Design.html">EPContext design document.</a>
* \endhtmlonly
*
* Example (error handling not shown):
* OrtStatus* status = NULL;
* OrtCompileApi* compile_api = ort_api->GetCompileApi();
* OrtModelCompilationOptions* compile_options = NULL;
*
* status = compile_api->CreateModelCompilationOptionsFromSessionOptions(env, session_options, &compile_options);
* status = compile_api->ModelCompilationOptions_SetInputModelPath(compile_options, ORT_TSTR("model.onnx"));
* status = compile_api->ModelCompilationOptions_SetOutputModelPath(compile_options, ORT_TSTR("model.compiled.onnx"));
* status = compile_api->CompileModel(env, compile_options);
* compile_api->ReleaseModelCompilationOptions(compile_options);
*
* \since Version 1.22.
*/
struct OrtCompileApi {
// Model compilation requires a full build. We return nullptr from GetCompileApi if this is a minimal
// build, so it doesn't matter if there are no function pointers in this struct as a user will never get an
// OrtCompileApi instance. We do however need a dummy field to avoid empty struct warning.
#if defined(ORT_MINIMAL_BUILD)
const bool not_defined_in_this_build;
#else
/// @}
/// \name OrtModelCompilationOptions
/// @{
@@ -5486,6 +5504,9 @@ struct OrtCompileApi {
_In_ const OrtSessionOptions* session_options, _Outptr_ OrtModelCompilationOptions** out);

/** \brief Sets the file path to the input ONNX model to compile.
*
* The input model's location (e.g., file path or memory buffer) must be set with either
* ModelCompilationOptions_SetInputModelPath or ModelCompilationOptions_SetInputModelFromBuffer.
*
* \param[in] model_compile_options The OrtModelCompilationOptions instance.
* \param[in] input_model_path Null terminated string of the path (wchar on Windows, char otherwise).
@@ -5498,6 +5519,9 @@ struct OrtCompileApi {
_In_ const ORTCHAR_T* input_model_path);

/** \brief Sets the buffer that stores the bytes of the loaded ONNX model to compile.
*
* The input model's location (e.g., file path or memory buffer) must be set with either
* ModelCompilationOptions_SetInputModelPath or ModelCompilationOptions_SetInputModelFromBuffer.
*
* \param[in] model_compile_options The OrtModelCompilationOptions instance.
* \param[in] input_model_data Buffer containing the loaded ONNX model bytes.
@@ -5514,9 +5538,11 @@ struct OrtCompileApi {

/** \brief Sets the file path for the output ONNX model generated by CompileModel.
*
* If the output model path is not specified and the output model is not to be stored in a buffer,
* ONNX Runtime will generate a path based on the input model's file path.
* Examples:
* The output model's location (e.g., file path or memory buffer) can be set with either
* ModelCompilationOptions_SetOutputModelPath or ModelCompilationOptions_SetOutputModelBuffer.
*
* If the output model's location is not set, ONNX Runtime will generate an output file with a path based on
* the input model's file path. Examples:
* /Path/my_model.onnx -> /Path/my_model_ctx.onnx
* /Path/my_model -> /Path/my_model_ctx.onnx
*
@@ -5554,10 +5580,18 @@ struct OrtCompileApi {
*
* The caller passes an OrtAllocator that ONNX Runtime uses to allocate memory for the buffer.
*
* The output model's location (e.g., file path or memory buffer) can be set with either
* ModelCompilationOptions_SetOutputModelPath or ModelCompilationOptions_SetOutputModelBuffer.
*
* If the output model's location is not set, ONNX Runtime will generate an output file with a path based on
* the input model's file path. Examples:
* /Path/my_model.onnx -> /Path/my_model_ctx.onnx
* /Path/my_model -> /Path/my_model_ctx.onnx
*
* \param[in] model_compile_options The OrtModelCompilationOptions instance.
* \param[in] allocator The allocator used to allocate the buffer for the compiled model.
* \param[out] output_model_buffer_ptr Pointer to the buffer that stores the compiled model.
* \param[out] output_model_buffer_size_ptr Pointer set to the size of output buffer in bytes.
* \param[out] output_model_buffer_size_ptr Pointer set to the size of output model in bytes.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
@@ -5604,7 +5638,6 @@ struct OrtCompileApi {
* \since Version 1.22.
*/
ORT_API2_STATUS(CompileModel, _In_ const OrtEnv* env, _In_ const OrtModelCompilationOptions* model_options);
#endif
};
/*
* This is the old way to add the CUDA provider to the session, please use SessionOptionsAppendExecutionProvider_CUDA above to access the latest functionality
9 changes: 2 additions & 7 deletions include/onnxruntime/core/session/onnxruntime_cxx_api.h
Original file line number Diff line number Diff line change
@@ -559,11 +559,10 @@ ORT_DEFINE_RELEASE(ValueInfo);
ORT_DEFINE_RELEASE(Node);
ORT_DEFINE_RELEASE(Graph);
ORT_DEFINE_RELEASE(Model);
#if !defined(ORT_MINIMAL_BUILD)
ORT_DEFINE_RELEASE_FROM_API_STRUCT(ModelCompilationOptions, GetCompileApi);
#endif // !defined(ORT_MINIMAL_BUILD)

#undef ORT_DEFINE_RELEASE
#undef ORT_DEFINE_RELEASE_FROM_API_STRUCT

/** \brief This is a tagging template type. Use it with Base<T> to indicate that the C++ interface object
* has no ownership of the underlying C object.
@@ -1012,7 +1011,6 @@ struct SessionOptions : detail::SessionOptionsImpl<OrtSessionOptions> {
ConstSessionOptions GetConst() const { return ConstSessionOptions{this->p_}; }
};

#if !defined(ORT_MINIMAL_BUILD)
/** \brief Options object used when compiling a model.
*
* Wraps ::OrtModelCompilationOptions object and methods
@@ -1021,9 +1019,7 @@ struct ModelCompilationOptions : detail::Base<OrtModelCompilationOptions> {
using Base = detail::Base<OrtModelCompilationOptions>;
using Base::Base;

explicit ModelCompilationOptions(std::nullptr_t) {} ///< Create an empty ModelCompilationOptions object, must be assigned a valid one to be used.
explicit ModelCompilationOptions(OrtModelCompilationOptions* p) ///< Takes ownership of an OrtModelCompilationOptions
: detail::Base<OrtModelCompilationOptions>{p} {}
explicit ModelCompilationOptions(std::nullptr_t) {} ///< Create an empty ModelCompilationOptions object, must be assigned a valid one to be used.

ModelCompilationOptions(const Env& env, const SessionOptions& session_options); ///< Wraps OrtApi::CreateModelCompilationOptionsFromSessionOptions
ModelCompilationOptions(const Env& env, ConstSessionOptions session_options); ///< Wraps OrtApi::CreateModelCompilationOptionsFromSessionOptions
@@ -1046,7 +1042,6 @@ struct ModelCompilationOptions : detail::Base<OrtModelCompilationOptions> {
* \return A Status indicating success or failure.
*/
Status CompileModel(const Env& env, const ModelCompilationOptions& model_compilation_options);
#endif // !defined(ORT_MINIMAL_BUILD)

/** \brief Wrapper around ::OrtModelMetadata
*
2 changes: 0 additions & 2 deletions include/onnxruntime/core/session/onnxruntime_cxx_inline.h
Original file line number Diff line number Diff line change
@@ -630,7 +630,6 @@ inline RunOptions& RunOptions::AddActiveLoraAdapter(const LoraAdapter& adapter)
return *this;
}

#if !defined(ORT_MINIMAL_BUILD)
inline ModelCompilationOptions::ModelCompilationOptions(const Env& env, const SessionOptions& session_options) {
ThrowOnError(GetCompileApi().CreateModelCompilationOptionsFromSessionOptions(env, session_options, &this->p_));
}
@@ -686,7 +685,6 @@ inline ModelCompilationOptions& ModelCompilationOptions::SetEpContextEmbedMode(
embed_ep_context_in_model));
return *this;
}
#endif // !defined(ORT_MINIMAL_BUILD)

namespace detail {

8 changes: 4 additions & 4 deletions onnxruntime/core/framework/graph_partitioner.cc
Original file line number Diff line number Diff line change
@@ -877,12 +877,12 @@ static Status CreateEpContextModel(const ExecutionProviders& execution_providers
ORT_RETURN_IF(buffer_size > static_cast<size_t>(std::numeric_limits<int>::max()),
"Cannot serialize ONNX ModelProto larger than 2GB");

OrtAllocator* allocator = ep_context_gen_options.output_model_buffer_allocator;
void* buffer = allocator->Alloc(allocator, buffer_size);
model_proto.SerializeToArray(buffer, static_cast<int>(buffer_size));
AllocatorPtr allocator = ep_context_gen_options.output_model_buffer_allocator;
IAllocatorUniquePtr<void> buffer = IAllocator::MakeUniquePtr<void>(allocator, buffer_size);
model_proto.SerializeToArray(buffer.get(), static_cast<int>(buffer_size));

*ep_context_gen_options.output_model_buffer_size_ptr = buffer_size;
*ep_context_gen_options.output_model_buffer_ptr = buffer;
*ep_context_gen_options.output_model_buffer_ptr = buffer.release();
} else {
ORT_RETURN_IF_ERROR(Model::SaveWithExternalInitializers(ep_context_model, context_cache_path,
external_ini_path, model_saving_options));
3 changes: 2 additions & 1 deletion onnxruntime/core/framework/session_options.h
Original file line number Diff line number Diff line change
@@ -11,6 +11,7 @@
#include <functional>
#include <gsl/gsl>
#include "core/common/inlined_containers.h"
#include "core/framework/allocator.h"
#include "core/framework/config_options.h"
#include "core/framework/ort_value.h"
#include "core/session/onnxruntime_c_api.h"
@@ -83,7 +84,7 @@ struct EpContextModelGenerationOptions {
std::string output_model_file_path;
void** output_model_buffer_ptr = nullptr;
size_t* output_model_buffer_size_ptr = nullptr;
OrtAllocator* output_model_buffer_allocator = nullptr;
AllocatorPtr output_model_buffer_allocator = nullptr;

std::string output_external_initializers_file_path;
size_t output_external_initializer_size_threshold = 0;
Loading