microsoft · adrianlizarraga · Apr 16, 2025 · Apr 15, 2025 · Apr 15, 2025 · Apr 16, 2025
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -4926,6 +4926,19 @@ struct OrtApi {
   ORT_API2_STATUS(SessionOptionsSetLoadCancellationFlag, _Inout_ OrtSessionOptions* options,
                   _In_ bool cancel);
 
+  /** \brief Get the Compile API instance.
+   *
+   * Get the Compile API instance to compile ONNX models. Execution providers that support compilation fuse a subgraph
+   * into an EPContext node that wraps a provider-specific binary representation of the subgraph.
+   * For more details about the EPContext design, refer to:
+   *  \htmlonly
+   *  <a href="https://onnxruntime.ai/docs/execution-providers/EP-Context-Design.html">EPContext design document.</a>
+   *  \endhtmlonly
+   *
+   * \return Compile API struct instance.
+   *
+   * \since Version 1.22.
+   */
   const OrtCompileApi*(ORT_API_CALL* GetCompileApi)();
 };
 
@@ -5448,20 +5461,25 @@ struct OrtModelEditorApi {
  *
  * Execution providers that support compilation fuse a subgraph into an EPContext node that wraps a provider-specific
  * binary representation of the subgraph.
- * More details relate to EPContext design refers to:
+ * For more details about the EPContext design, refer to:
  *  \htmlonly
  *  <a href="https://onnxruntime.ai/docs/execution-providers/EP-Context-Design.html">EPContext design document.</a>
  *  \endhtmlonly
  *
+ * Example (error handling not shown):
+ *   OrtStatus* status = NULL;
+ *   OrtCompileApi* compile_api = ort_api->GetCompileApi();
+ *   OrtModelCompilationOptions* compile_options = NULL;
+ *
+ *   status = compile_api->CreateModelCompilationOptionsFromSessionOptions(env, session_options, &compile_options);
+ *   status = compile_api->ModelCompilationOptions_SetInputModelPath(compile_options, ORT_TSTR("model.onnx"));
+ *   status = compile_api->ModelCompilationOptions_SetOutputModelPath(compile_options, ORT_TSTR("model.compiled.onnx"));
+ *   status = compile_api->CompileModel(env, compile_options);
+ *   compile_api->ReleaseModelCompilationOptions(compile_options);
+ *
  * \since Version 1.22.
  */
 struct OrtCompileApi {
-  // Model compilation requires a full build. We return nullptr from GetCompileApi if this is a minimal
-  // build, so it doesn't matter if there are no function pointers in this struct as a user will never get an
-  // OrtCompileApi instance. We do however need a dummy field to avoid empty struct warning.
-#if defined(ORT_MINIMAL_BUILD)
-  const bool not_defined_in_this_build;
-#else
   /// @}
   /// \name OrtModelCompilationOptions
   /// @{
@@ -5486,6 +5504,9 @@ struct OrtCompileApi {
                   _In_ const OrtSessionOptions* session_options, _Outptr_ OrtModelCompilationOptions** out);
 
   /** \brief Sets the file path to the input ONNX model to compile.
+   *
+   * The input model's location (e.g., file path or memory buffer) must be set with either
+   * ModelCompilationOptions_SetInputModelPath or ModelCompilationOptions_SetInputModelFromBuffer.
    *
    * \param[in] model_compile_options The OrtModelCompilationOptions instance.
    * \param[in] input_model_path Null terminated string of the path (wchar on Windows, char otherwise).
@@ -5498,6 +5519,9 @@ struct OrtCompileApi {
                   _In_ const ORTCHAR_T* input_model_path);
 
   /** \brief Sets the buffer that stores the bytes of the loaded ONNX model to compile.
+   *
+   * The input model's location (e.g., file path or memory buffer) must be set with either
+   * ModelCompilationOptions_SetInputModelPath or ModelCompilationOptions_SetInputModelFromBuffer.
    *
    * \param[in] model_compile_options The OrtModelCompilationOptions instance.
    * \param[in] input_model_data Buffer containing the loaded ONNX model bytes.
@@ -5514,9 +5538,11 @@ struct OrtCompileApi {
 
   /** \brief Sets the file path for the output ONNX model generated by CompileModel.
    *
-   * If the output model path is not specified and the output model is not to be stored in a buffer,
-   * ONNX Runtime will generate a path based on the input model's file path.
-   * Examples:
+   * The output model's location (e.g., file path or memory buffer) can be set with either
+   * ModelCompilationOptions_SetOutputModelPath or ModelCompilationOptions_SetOutputModelBuffer.
+   *
+   * If the output model's location is not set, ONNX Runtime will generate an output file with a path based on
+   * the input model's file path. Examples:
    *   /Path/my_model.onnx -> /Path/my_model_ctx.onnx
    *   /Path/my_model -> /Path/my_model_ctx.onnx
    *
@@ -5554,10 +5580,18 @@ struct OrtCompileApi {
    *
    * The caller passes an OrtAllocator that ONNX Runtime uses to allocate memory for the buffer.
    *
+   * The output model's location (e.g., file path or memory buffer) can be set with either
+   * ModelCompilationOptions_SetOutputModelPath or ModelCompilationOptions_SetOutputModelBuffer.
+   *
+   * If the output model's location is not set, ONNX Runtime will generate an output file with a path based on
+   * the input model's file path. Examples:
+   *   /Path/my_model.onnx -> /Path/my_model_ctx.onnx
+   *   /Path/my_model -> /Path/my_model_ctx.onnx
+   *
    * \param[in] model_compile_options The OrtModelCompilationOptions instance.
    * \param[in] allocator The allocator used to allocate the buffer for the compiled model.
    * \param[out] output_model_buffer_ptr Pointer to the buffer that stores the compiled model.
-   * \param[out] output_model_buffer_size_ptr Pointer set to the size of output buffer in bytes.
+   * \param[out] output_model_buffer_size_ptr Pointer set to the size of output model in bytes.
    *
    * \snippet{doc} snippets.dox OrtStatus Return Value
    *
@@ -5604,7 +5638,6 @@ struct OrtCompileApi {
    * \since Version 1.22.
    */
   ORT_API2_STATUS(CompileModel, _In_ const OrtEnv* env, _In_ const OrtModelCompilationOptions* model_options);
-#endif
 };
 /*
  * This is the old way to add the CUDA provider to the session, please use SessionOptionsAppendExecutionProvider_CUDA above to access the latest functionality

diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -559,11 +559,10 @@ ORT_DEFINE_RELEASE(ValueInfo);
 ORT_DEFINE_RELEASE(Node);
 ORT_DEFINE_RELEASE(Graph);
 ORT_DEFINE_RELEASE(Model);
-#if !defined(ORT_MINIMAL_BUILD)
 ORT_DEFINE_RELEASE_FROM_API_STRUCT(ModelCompilationOptions, GetCompileApi);
-#endif  // !defined(ORT_MINIMAL_BUILD)
 
 #undef ORT_DEFINE_RELEASE
+#undef ORT_DEFINE_RELEASE_FROM_API_STRUCT
 
 /** \brief This is a tagging template type. Use it with Base<T> to indicate that the C++ interface object
  *   has no ownership of the underlying C object.
@@ -1012,7 +1011,6 @@ struct SessionOptions : detail::SessionOptionsImpl<OrtSessionOptions> {
   ConstSessionOptions GetConst() const { return ConstSessionOptions{this->p_}; }
 };
 
-#if !defined(ORT_MINIMAL_BUILD)
 /** \brief Options object used when compiling a model.
  *
  * Wraps ::OrtModelCompilationOptions object and methods
@@ -1021,9 +1019,7 @@ struct ModelCompilationOptions : detail::Base<OrtModelCompilationOptions> {
   using Base = detail::Base<OrtModelCompilationOptions>;
   using Base::Base;
 
-  explicit ModelCompilationOptions(std::nullptr_t) {}              ///< Create an empty ModelCompilationOptions object, must be assigned a valid one to be used.
-  explicit ModelCompilationOptions(OrtModelCompilationOptions* p)  ///< Takes ownership of an OrtModelCompilationOptions
-      : detail::Base<OrtModelCompilationOptions>{p} {}
+  explicit ModelCompilationOptions(std::nullptr_t) {}  ///< Create an empty ModelCompilationOptions object, must be assigned a valid one to be used.
 
   ModelCompilationOptions(const Env& env, const SessionOptions& session_options);  ///< Wraps OrtApi::CreateModelCompilationOptionsFromSessionOptions
   ModelCompilationOptions(const Env& env, ConstSessionOptions session_options);    ///< Wraps OrtApi::CreateModelCompilationOptionsFromSessionOptions
@@ -1046,7 +1042,6 @@ struct ModelCompilationOptions : detail::Base<OrtModelCompilationOptions> {
  * \return A Status indicating success or failure.
  */
 Status CompileModel(const Env& env, const ModelCompilationOptions& model_compilation_options);
-#endif  // !defined(ORT_MINIMAL_BUILD)
 
 /** \brief Wrapper around ::OrtModelMetadata
  *

diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -630,7 +630,6 @@ inline RunOptions& RunOptions::AddActiveLoraAdapter(const LoraAdapter& adapter)
   return *this;
 }
 
-#if !defined(ORT_MINIMAL_BUILD)
 inline ModelCompilationOptions::ModelCompilationOptions(const Env& env, const SessionOptions& session_options) {
   ThrowOnError(GetCompileApi().CreateModelCompilationOptionsFromSessionOptions(env, session_options, &this->p_));
 }
@@ -686,7 +685,6 @@ inline ModelCompilationOptions& ModelCompilationOptions::SetEpContextEmbedMode(
       embed_ep_context_in_model));
   return *this;
 }
-#endif  // !defined(ORT_MINIMAL_BUILD)
 
 namespace detail {
 

diff --git a/onnxruntime/core/framework/graph_partitioner.cc b/onnxruntime/core/framework/graph_partitioner.cc
@@ -877,12 +877,12 @@ static Status CreateEpContextModel(const ExecutionProviders& execution_providers
     ORT_RETURN_IF(buffer_size > static_cast<size_t>(std::numeric_limits<int>::max()),
                   "Cannot serialize ONNX ModelProto larger than 2GB");
 
-    OrtAllocator* allocator = ep_context_gen_options.output_model_buffer_allocator;
-    void* buffer = allocator->Alloc(allocator, buffer_size);
-    model_proto.SerializeToArray(buffer, static_cast<int>(buffer_size));
+    AllocatorPtr allocator = ep_context_gen_options.output_model_buffer_allocator;
+    IAllocatorUniquePtr<void> buffer = IAllocator::MakeUniquePtr<void>(allocator, buffer_size);
+    model_proto.SerializeToArray(buffer.get(), static_cast<int>(buffer_size));
 
     *ep_context_gen_options.output_model_buffer_size_ptr = buffer_size;
-    *ep_context_gen_options.output_model_buffer_ptr = buffer;
+    *ep_context_gen_options.output_model_buffer_ptr = buffer.release();
   } else {
     ORT_RETURN_IF_ERROR(Model::SaveWithExternalInitializers(ep_context_model, context_cache_path,
                                                             external_ini_path, model_saving_options));

diff --git a/onnxruntime/core/framework/session_options.h b/onnxruntime/core/framework/session_options.h
@@ -11,6 +11,7 @@
 #include <functional>
 #include <gsl/gsl>
 #include "core/common/inlined_containers.h"
+#include "core/framework/allocator.h"
 #include "core/framework/config_options.h"
 #include "core/framework/ort_value.h"
 #include "core/session/onnxruntime_c_api.h"
@@ -83,7 +84,7 @@ struct EpContextModelGenerationOptions {
   std::string output_model_file_path;
   void** output_model_buffer_ptr = nullptr;
   size_t* output_model_buffer_size_ptr = nullptr;
-  OrtAllocator* output_model_buffer_allocator = nullptr;
+  AllocatorPtr output_model_buffer_allocator = nullptr;
 
   std::string output_external_initializers_file_path;
   size_t output_external_initializer_size_threshold = 0;