Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 043c26d

Browse files
skottmckayashrit-ms
authored andcommittedApr 24, 2025·
Refine device discovery a bit more. (#24481)
### Description <!-- Describe your changes. --> Fix some issues. Use adapter number instead of bus number. Bus number doesn't work as expected on VMs. Disable for XBOX build. Needs different handling for adapter lookup. Use adapter number as device_id when creating DML OrtEpDevice. Fix some issues with the metadata. ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. -->
1 parent 8487ca2 commit 043c26d

File tree

3 files changed

+117
-112
lines changed

3 files changed

+117
-112
lines changed
 

‎onnxruntime/core/platform/windows/device_discovery.cc

Lines changed: 102 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -11,50 +11,39 @@
1111
#include <unordered_set>
1212

1313
#include "core/common/cpuid_info.h"
14+
#include "core/common/logging/logging.h"
1415
#include "core/session/abi_devices.h"
1516

16-
//// UsingSetupApi
17+
//// For SetupApi info
1718
#include <Windows.h>
1819
#include <SetupAPI.h>
1920
#include <devguid.h>
2021
#include <cfgmgr32.h>
2122
#pragma comment(lib, "setupapi.lib")
2223

23-
//// Using D3D12
24+
//// For D3D12 info
2425
// #include <windows.h>
2526
#include <d3d12.h>
2627
#include <dxgi1_6.h>
2728
#include <iostream>
29+
#include <wrl/client.h>
30+
using Microsoft::WRL::ComPtr;
2831

2932
#pragma comment(lib, "d3d12.lib")
3033
#pragma comment(lib, "dxgi.lib")
3134

32-
//// Using DXCore. Requires newer Windows SDK than what we target by default.
33-
// these values were added in 10.0.22621.0 as part of DirectXCore API
34-
//
35-
// In theory this #if should be fine, but the QNN ARM64 CI fails even with that applied.
36-
// with the NTDII_VERSION value there...
37-
//
38-
// Defining a local GUID instead.
39-
// #if NTDDI_VERSION < NTDDI_WIN10_RS5
40-
// DEFINE_GUID(DXCORE_ADAPTER_ATTRIBUTE_D3D12_GENERIC_ML, 0xb71b0d41, 0x1088, 0x422f, 0xa2, 0x7c, 0x2, 0x50, 0xb7, 0xd3, 0xa9, 0x88);
41-
// DEFINE_GUID(DXCORE_HARDWARE_TYPE_ATTRIBUTE_NPU, 0xd46140c4, 0xadd7, 0x451b, 0x9e, 0x56, 0x6, 0xfe, 0x8c, 0x3b, 0x58, 0xed);
42-
// #endif
35+
//// For DXCore info.
4336
#include <initguid.h>
4437
#include <dxcore.h>
4538
#include <dxcore_interface.h>
4639
#include <wil/com.h>
4740

48-
//
49-
// In theory this #if should be fine, but the QNN ARM64 CI fails even with that applied. Not sure what is happening
50-
// with the NTDII_VERSION value there...
51-
//
52-
// Defining a local GUID instead.
5341
#include "core/common/cpuid_info.h"
5442
#include "core/session/abi_devices.h"
5543

5644
namespace onnxruntime {
57-
#if !defined(ORT_MINIMAL_BUILD)
45+
// unsupported in minimal build. also needs xbox specific handling to be implemented.
46+
#if !defined(ORT_MINIMAL_BUILD) && !defined(_GAMING_XBOX)
5847
namespace {
5948

6049
// device info we accumulate from various sources
@@ -64,7 +53,6 @@ struct DeviceInfo {
6453
uint32_t device_id;
6554
std::wstring vendor;
6655
std::wstring description;
67-
std::vector<DWORD> bus_ids; // assuming could have multiple GPUs that are the same model
6856
std::unordered_map<std::wstring, std::wstring> metadata;
6957
};
7058

@@ -97,14 +85,13 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
9785
for (auto guid : guids) {
9886
HDEVINFO devInfo = SetupDiGetClassDevs(&guid, nullptr, nullptr, DIGCF_PRESENT);
9987
if (devInfo == INVALID_HANDLE_VALUE) {
100-
return device_info;
88+
continue;
10189
}
10290

10391
SP_DEVINFO_DATA devData = {};
10492
devData.cbSize = sizeof(SP_DEVINFO_DATA);
10593

106-
std::wstring buffer;
107-
buffer.resize(1024);
94+
WCHAR buffer[1024];
10895

10996
for (DWORD i = 0; SetupDiEnumDeviceInfo(devInfo, i, &devData); ++i) {
11097
DWORD size = 0;
@@ -114,13 +101,8 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
114101
DeviceInfo* entry = nullptr;
115102

116103
//// Get hardware ID (contains VEN_xxxx&DEV_xxxx)
117-
if (SetupDiGetDeviceRegistryPropertyW(devInfo,
118-
&devData,
119-
SPDRP_HARDWAREID,
120-
&regDataType,
121-
(PBYTE)buffer.data(),
122-
(DWORD)buffer.size(),
123-
&size)) {
104+
if (SetupDiGetDeviceRegistryPropertyW(devInfo, &devData, SPDRP_HARDWAREID, &regDataType,
105+
(PBYTE)buffer, sizeof(buffer), &size)) {
124106
// PCI\VEN_xxxx&DEV_yyyy&...
125107
// ACPI\VEN_xxxx&DEV_yyyy&... if we're lucky.
126108
// ACPI values seem to be very inconsistent, so we check fairly carefully and always require a device id.
@@ -148,23 +130,31 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
148130
device_info[key] = {};
149131
} else {
150132
if (guid == GUID_DEVCLASS_PROCESSOR) {
151-
// skip duplicate processor entries as we don't need to accumulate bus numbers for them
133+
// skip duplicate processor entries
152134
continue;
153135
}
154136
}
155137

156138
entry = &device_info[key];
157139
entry->vendor_id = vendor_id;
158140
entry->device_id = device_id;
141+
// put the first hardware id string in the metadata. ignore the other lines.
142+
entry->metadata.emplace(L"SPDRP_HARDWAREID", std::wstring(buffer, wcslen(buffer)));
159143
} else {
160144
// need valid ids
161145
continue;
162146
}
163147

164-
// Get device description.
148+
// Use the friendly name if available.
149+
if (SetupDiGetDeviceRegistryPropertyW(devInfo, &devData, SPDRP_FRIENDLYNAME, nullptr,
150+
(PBYTE)buffer, sizeof(buffer), &size)) {
151+
entry->description = std::wstring{buffer};
152+
}
153+
154+
// Set type using the device description to try and infer an NPU.
165155
if (SetupDiGetDeviceRegistryPropertyW(devInfo, &devData, SPDRP_DEVICEDESC, nullptr,
166-
(PBYTE)buffer.data(), (DWORD)buffer.size(), &size)) {
167-
entry->description = buffer;
156+
(PBYTE)buffer, sizeof(buffer), &size)) {
157+
std::wstring desc{buffer};
168158

169159
// Should we require the NPU to be found by DXCore or do we want to allow this vague matching?
170160
// Probably depends on whether we always attempt to run DXCore or not.
@@ -175,9 +165,13 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
175165
desc.find(L"VPU") != std::wstring::npos);
176166
};
177167

178-
// not 100% accurate. is there a better way?
168+
// use description if no friendly name
169+
if (entry->description.empty()) {
170+
entry->description = desc;
171+
}
172+
179173
uint64_t npu_key = GetDeviceKey(*entry);
180-
bool is_npu = npus.count(npu_key) > 0 || possible_npu(entry->description);
174+
bool is_npu = npus.count(npu_key) > 0 || possible_npu(desc);
181175

182176
if (guid == GUID_DEVCLASS_DISPLAY) {
183177
entry->type = OrtHardwareDeviceType_GPU;
@@ -201,18 +195,21 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
201195
}
202196

203197
if (SetupDiGetDeviceRegistryPropertyW(devInfo, &devData, SPDRP_MFG, nullptr,
204-
(PBYTE)buffer.data(), (DWORD)buffer.size(), &size)) {
205-
entry->vendor = buffer;
198+
(PBYTE)buffer, sizeof(buffer), &size)) {
199+
entry->vendor = std::wstring(buffer, wcslen(buffer));
206200
}
207201

208-
if (guid != GUID_DEVCLASS_PROCESSOR) {
209-
DWORD busNumber = 0;
210-
size = 0;
211-
if (SetupDiGetDeviceRegistryPropertyW(devInfo, &devData, SPDRP_BUSNUMBER, nullptr,
212-
reinterpret_cast<PBYTE>(&busNumber), sizeof(busNumber), &size)) {
213-
// push_back in case there are two identical devices. not sure how else to tell them apart
214-
entry->bus_ids.push_back(busNumber);
202+
// Add the UI number if GPU. Helpful if user has integrated and discrete GPUs
203+
if (entry->type == OrtHardwareDeviceType_GPU) {
204+
DWORD ui_number = 0;
205+
if (SetupDiGetDeviceRegistryPropertyW(devInfo, &devData, SPDRP_UI_NUMBER, nullptr,
206+
(PBYTE)&ui_number, sizeof(ui_number), &size)) {
207+
// use value read.
208+
} else {
209+
// infer it as 0 if not set.
215210
}
211+
212+
entry->metadata.emplace(L"SPDRP_UI_NUMBER", std::to_wstring(ui_number));
216213
}
217214
}
218215

@@ -226,50 +223,58 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
226223
std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoD3D12() {
227224
std::unordered_map<uint64_t, DeviceInfo> device_info;
228225

229-
IDXGIFactory6* factory = nullptr;
230-
HRESULT hr = CreateDXGIFactory1(IID_PPV_ARGS(&factory));
231-
if (FAILED(hr)) {
226+
ComPtr<IDXGIFactory6> factory;
227+
if (FAILED(CreateDXGIFactory2(0, IID_PPV_ARGS(&factory)))) {
232228
std::cerr << "Failed to create DXGI factory.\n";
233229
return device_info;
234230
}
235231

236-
IDXGIAdapter1* adapter = nullptr;
237-
238-
// iterate by high-performance GPU preference first
239-
for (UINT i = 0; factory->EnumAdapterByGpuPreference(i, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE,
240-
IID_PPV_ARGS(&adapter)) != DXGI_ERROR_NOT_FOUND;
241-
++i) {
232+
ComPtr<IDXGIAdapter1> adapter;
233+
for (UINT i = 0; factory->EnumAdapters1(i, adapter.ReleaseAndGetAddressOf()) != DXGI_ERROR_NOT_FOUND; ++i) {
242234
DXGI_ADAPTER_DESC1 desc;
243235
if (FAILED(adapter->GetDesc1(&desc))) {
244236
continue;
245237
}
246238

247-
do {
248-
if ((desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) != 0 ||
249-
(desc.Flags & DXGI_ADAPTER_FLAG_REMOTE) != 0) {
250-
// software or remote. skip
251-
break;
252-
}
239+
if ((desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) != 0 ||
240+
(desc.Flags & DXGI_ADAPTER_FLAG_REMOTE) != 0) {
241+
// software or remote. skip
242+
continue;
243+
}
253244

254-
static_assert(sizeof(LUID) == sizeof(uint64_t), "LUID and uint64_t are not the same size");
255-
uint64_t key = GetLuidKey(desc.AdapterLuid);
245+
static_assert(sizeof(LUID) == sizeof(uint64_t), "LUID and uint64_t are not the same size");
246+
uint64_t key = GetLuidKey(desc.AdapterLuid);
256247

257-
DeviceInfo& info = device_info[key];
258-
info.type = OrtHardwareDeviceType_GPU;
259-
info.vendor_id = desc.VendorId;
260-
info.device_id = desc.DeviceId;
261-
info.description = std::wstring(desc.Description);
262-
263-
info.metadata[L"VideoMemory"] = std::to_wstring(desc.DedicatedVideoMemory / (1024 * 1024)) + L" MB";
264-
info.metadata[L"SystemMemory"] = std::to_wstring(desc.DedicatedSystemMemory / (1024 * 1024)) + L" MB";
265-
info.metadata[L"SharedSystemMemory"] = std::to_wstring(desc.DedicatedSystemMemory / (1024 * 1024)) + L" MB";
266-
info.metadata[L"HighPerformanceIndex"] = std::to_wstring(i);
267-
} while (false);
248+
DeviceInfo& info = device_info[key];
249+
info.type = OrtHardwareDeviceType_GPU;
250+
info.vendor_id = desc.VendorId;
251+
info.device_id = desc.DeviceId;
252+
info.description = std::wstring(desc.Description);
268253

269-
adapter->Release();
254+
info.metadata[L"DxgiAdapterNumber"] = std::to_wstring(i);
255+
info.metadata[L"VideoMemory"] = std::to_wstring(desc.DedicatedVideoMemory / (1024 * 1024)) + L" MB";
256+
info.metadata[L"SystemMemory"] = std::to_wstring(desc.DedicatedSystemMemory / (1024 * 1024)) + L" MB";
257+
info.metadata[L"SharedSystemMemory"] = std::to_wstring(desc.DedicatedSystemMemory / (1024 * 1024)) + L" MB";
270258
}
271259

272-
factory->Release();
260+
// iterate by high-performance GPU preference to add that info
261+
for (UINT i = 0; factory->EnumAdapterByGpuPreference(
262+
i, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE,
263+
IID_PPV_ARGS(adapter.ReleaseAndGetAddressOf())) != DXGI_ERROR_NOT_FOUND;
264+
++i) {
265+
DXGI_ADAPTER_DESC1 desc;
266+
if (FAILED(adapter->GetDesc1(&desc))) {
267+
continue;
268+
}
269+
270+
uint64_t key = GetLuidKey(desc.AdapterLuid);
271+
272+
auto it = device_info.find(key);
273+
if (it != device_info.end()) {
274+
DeviceInfo& info = it->second;
275+
info.metadata[L"HighPerformanceIndex"] = std::to_wstring(i);
276+
}
277+
}
273278

274279
return device_info;
275280
}
@@ -284,7 +289,9 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoDxcore() {
284289
return device_info;
285290
}
286291

287-
// manually define for older Windows versions. will be no matches but means this code works on machines with dxcore.
292+
// NOTE: These GUIDs requires a newer Windows SDK than what we target by default.
293+
// They were added in 10.0.22621.0 as part of DirectXCore API
294+
// To workaround this we define a local copy of the values. On an older Windows machine they won't match anything.
288295
static const GUID local_DXCORE_ADAPTER_ATTRIBUTE_D3D12_GENERIC_ML = {0xb71b0d41, 0x1088, 0x422f, 0xa2, 0x7c, 0x2, 0x50, 0xb7, 0xd3, 0xa9, 0x88};
289296
static const GUID local_DXCORE_HARDWARE_TYPE_ATTRIBUTE_NPU = {0xd46140c4, 0xadd7, 0x451b, 0x9e, 0x56, 0x6, 0xfe, 0x8c, 0x3b, 0x58, 0xed};
290297

@@ -353,27 +360,17 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoDxcore() {
353360
&is_integrated))) {
354361
info.metadata[L"Discrete"] = is_integrated ? L"0" : L"1";
355362
}
356-
357-
// this returns char_t on us-en Windows. assuming it returns wchar_t on other locales but not clear what it
358-
// does when.
359-
// The description from SetupApi is wchar_t so assuming we have that and don't need this one.
360-
//
361-
// hrId = HRESULT_FROM_WIN32(ERROR_NOT_FOUND);
362-
// std::wstring driverDescription;
363-
// driverDescription.resize(256);
364-
//// this doesn't seem to return wchar_t
365-
// if (adapter->IsPropertySupported(DXCoreAdapterProperty::DriverDescription)) {
366-
// hrId = adapter->GetProperty(DXCoreAdapterProperty::DriverDescription, sizeof(driverDescription),
367-
// &driverDescription);
368-
// info.description = driverDescription;
369-
// }
370363
}
371364
}
372365

373366
return device_info;
374367
}
375368
} // namespace
376369

370+
// Get devices from various sources and combine them into a single set of devices.
371+
// For CPU we use setupapi data.
372+
// For GPU we augment the d3d12 and dxcore data with the setupapi data.
373+
// For NPU we augment the dxcore data with the setupapi data.
377374
std::unordered_set<OrtHardwareDevice> DeviceDiscovery::DiscoverDevicesForPlatform() {
378375
// dxcore info. key is luid
379376
std::unordered_map<uint64_t, DeviceInfo> luid_to_dxinfo = GetDeviceInfoDxcore();
@@ -408,18 +405,12 @@ std::unordered_set<OrtHardwareDevice> DeviceDiscovery::DiscoverDevicesForPlatfor
408405
}
409406
}
410407

411-
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter; // wstring to string
408+
std::wstring_convert<std::codecvt_utf8<wchar_t> > converter; // wstring to string
412409
const auto device_to_ortdevice = [&converter](
413410
DeviceInfo& device,
414411
std::unordered_map<std::wstring, std::wstring>* extra_metadata = nullptr) {
415412
OrtHardwareDevice ortdevice{device.type, device.vendor_id, device.device_id, converter.to_bytes(device.vendor)};
416413

417-
if (device.bus_ids.size() > 0) {
418-
// use the first bus number. not sure how to handle multiple
419-
ortdevice.metadata.Add("BusNumber", std::to_string(device.bus_ids.back()).c_str());
420-
device.bus_ids.pop_back();
421-
}
422-
423414
if (!device.description.empty()) {
424415
ortdevice.metadata.Add("Description", converter.to_bytes(device.description));
425416
}
@@ -437,6 +428,18 @@ std::unordered_set<OrtHardwareDevice> DeviceDiscovery::DiscoverDevicesForPlatfor
437428
}
438429
}
439430

431+
std::ostringstream oss;
432+
oss << "Adding OrtHardwareDevice {vendor_id:0x" << std::hex << ortdevice.vendor_id
433+
<< ", device_id:0x" << ortdevice.device_id
434+
<< ", type:" << std::dec << static_cast<int>(ortdevice.type)
435+
<< ", metadata: [";
436+
for (auto& [key, value] : ortdevice.metadata.entries) {
437+
oss << key << "=" << value << ", ";
438+
}
439+
440+
oss << "]}" << std::endl;
441+
LOGS_DEFAULT(INFO) << oss.str();
442+
440443
return ortdevice;
441444
};
442445

@@ -459,14 +462,14 @@ std::unordered_set<OrtHardwareDevice> DeviceDiscovery::DiscoverDevicesForPlatfor
459462
// use SetupApi info. merge metadata.
460463
devices.emplace(device_to_ortdevice(it->second, &device.metadata));
461464
} else {
462-
// no matching entry in SetupApi. use the dxinfo. no vendor. no BusNumber.
465+
// no matching entry in SetupApi. use the dxinfo. will be missing vendor name and UI_NUMBER
463466
devices.emplace(device_to_ortdevice(device));
464467
}
465468
}
466469

467470
return devices;
468471
}
469-
#else // !defined(ORT_MINIMAL_BUILD)
472+
#else // !defined(ORT_MINIMAL_BUILD) && !defined(_GAMING_XBOX)
470473
std::unordered_set<OrtHardwareDevice> DeviceDiscovery::DiscoverDevicesForPlatform() {
471474
return {};
472475
}

‎onnxruntime/core/session/environment.cc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -500,11 +500,13 @@ Status Environment::EpInfo::Create(std::unique_ptr<EpLibrary> library_in, std::u
500500
ed->device = &device;
501501

502502
if (ep_metadata) {
503-
ed->ep_metadata = *ep_metadata;
503+
ed->ep_metadata = std::move(*ep_metadata);
504+
delete ep_metadata;
504505
}
505506

506507
if (ep_options) {
507-
ed->ep_options = *ep_options;
508+
ed->ep_options = std::move(*ep_options);
509+
delete ep_options;
508510
}
509511

510512
ed->ep_factory = &factory;

0 commit comments

Comments
 (0)
Please sign in to comment.