11
11
#include < unordered_set>
12
12
13
13
#include " core/common/cpuid_info.h"
14
+ #include " core/common/logging/logging.h"
14
15
#include " core/session/abi_devices.h"
15
16
16
- // // UsingSetupApi
17
+ // // For SetupApi info
17
18
#include < Windows.h>
18
19
#include < SetupAPI.h>
19
20
#include < devguid.h>
20
21
#include < cfgmgr32.h>
21
22
#pragma comment(lib, "setupapi.lib")
22
23
23
- // // Using D3D12
24
+ // // For D3D12 info
24
25
// #include <windows.h>
25
26
#include < d3d12.h>
26
27
#include < dxgi1_6.h>
27
28
#include < iostream>
29
+ #include < wrl/client.h>
30
+ using Microsoft::WRL::ComPtr;
28
31
29
32
#pragma comment(lib, "d3d12.lib")
30
33
#pragma comment(lib, "dxgi.lib")
31
34
32
- // // Using DXCore. Requires newer Windows SDK than what we target by default.
33
- // these values were added in 10.0.22621.0 as part of DirectXCore API
34
- //
35
- // In theory this #if should be fine, but the QNN ARM64 CI fails even with that applied.
36
- // with the NTDII_VERSION value there...
37
- //
38
- // Defining a local GUID instead.
39
- // #if NTDDI_VERSION < NTDDI_WIN10_RS5
40
- // DEFINE_GUID(DXCORE_ADAPTER_ATTRIBUTE_D3D12_GENERIC_ML, 0xb71b0d41, 0x1088, 0x422f, 0xa2, 0x7c, 0x2, 0x50, 0xb7, 0xd3, 0xa9, 0x88);
41
- // DEFINE_GUID(DXCORE_HARDWARE_TYPE_ATTRIBUTE_NPU, 0xd46140c4, 0xadd7, 0x451b, 0x9e, 0x56, 0x6, 0xfe, 0x8c, 0x3b, 0x58, 0xed);
42
- // #endif
35
+ // // For DXCore info.
43
36
#include < initguid.h>
44
37
#include < dxcore.h>
45
38
#include < dxcore_interface.h>
46
39
#include < wil/com.h>
47
40
48
- //
49
- // In theory this #if should be fine, but the QNN ARM64 CI fails even with that applied. Not sure what is happening
50
- // with the NTDII_VERSION value there...
51
- //
52
- // Defining a local GUID instead.
53
41
#include " core/common/cpuid_info.h"
54
42
#include " core/session/abi_devices.h"
55
43
56
44
namespace onnxruntime {
57
- #if !defined(ORT_MINIMAL_BUILD)
45
+ // unsupported in minimal build. also needs xbox specific handling to be implemented.
46
+ #if !defined(ORT_MINIMAL_BUILD) && !defined(_GAMING_XBOX)
58
47
namespace {
59
48
60
49
// device info we accumulate from various sources
@@ -64,7 +53,6 @@ struct DeviceInfo {
64
53
uint32_t device_id;
65
54
std::wstring vendor;
66
55
std::wstring description;
67
- std::vector<DWORD> bus_ids; // assuming could have multiple GPUs that are the same model
68
56
std::unordered_map<std::wstring, std::wstring> metadata;
69
57
};
70
58
@@ -97,14 +85,13 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
97
85
for (auto guid : guids) {
98
86
HDEVINFO devInfo = SetupDiGetClassDevs (&guid, nullptr , nullptr , DIGCF_PRESENT);
99
87
if (devInfo == INVALID_HANDLE_VALUE) {
100
- return device_info ;
88
+ continue ;
101
89
}
102
90
103
91
SP_DEVINFO_DATA devData = {};
104
92
devData.cbSize = sizeof (SP_DEVINFO_DATA);
105
93
106
- std::wstring buffer;
107
- buffer.resize (1024 );
94
+ WCHAR buffer[1024 ];
108
95
109
96
for (DWORD i = 0 ; SetupDiEnumDeviceInfo (devInfo, i, &devData); ++i) {
110
97
DWORD size = 0 ;
@@ -114,13 +101,8 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
114
101
DeviceInfo* entry = nullptr ;
115
102
116
103
// // Get hardware ID (contains VEN_xxxx&DEV_xxxx)
117
- if (SetupDiGetDeviceRegistryPropertyW (devInfo,
118
- &devData,
119
- SPDRP_HARDWAREID,
120
- ®DataType,
121
- (PBYTE)buffer.data (),
122
- (DWORD)buffer.size (),
123
- &size)) {
104
+ if (SetupDiGetDeviceRegistryPropertyW (devInfo, &devData, SPDRP_HARDWAREID, ®DataType,
105
+ (PBYTE)buffer, sizeof (buffer), &size)) {
124
106
// PCI\VEN_xxxx&DEV_yyyy&...
125
107
// ACPI\VEN_xxxx&DEV_yyyy&... if we're lucky.
126
108
// ACPI values seem to be very inconsistent, so we check fairly carefully and always require a device id.
@@ -148,23 +130,31 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
148
130
device_info[key] = {};
149
131
} else {
150
132
if (guid == GUID_DEVCLASS_PROCESSOR) {
151
- // skip duplicate processor entries as we don't need to accumulate bus numbers for them
133
+ // skip duplicate processor entries
152
134
continue ;
153
135
}
154
136
}
155
137
156
138
entry = &device_info[key];
157
139
entry->vendor_id = vendor_id;
158
140
entry->device_id = device_id;
141
+ // put the first hardware id string in the metadata. ignore the other lines.
142
+ entry->metadata .emplace (L" SPDRP_HARDWAREID" , std::wstring (buffer, wcslen (buffer)));
159
143
} else {
160
144
// need valid ids
161
145
continue ;
162
146
}
163
147
164
- // Get device description.
148
+ // Use the friendly name if available.
149
+ if (SetupDiGetDeviceRegistryPropertyW (devInfo, &devData, SPDRP_FRIENDLYNAME, nullptr ,
150
+ (PBYTE)buffer, sizeof (buffer), &size)) {
151
+ entry->description = std::wstring{buffer};
152
+ }
153
+
154
+ // Set type using the device description to try and infer an NPU.
165
155
if (SetupDiGetDeviceRegistryPropertyW (devInfo, &devData, SPDRP_DEVICEDESC, nullptr ,
166
- (PBYTE)buffer. data (), (DWORD) buffer. size ( ), &size)) {
167
- entry-> description = buffer;
156
+ (PBYTE)buffer, sizeof ( buffer), &size)) {
157
+ std::wstring desc{ buffer} ;
168
158
169
159
// Should we require the NPU to be found by DXCore or do we want to allow this vague matching?
170
160
// Probably depends on whether we always attempt to run DXCore or not.
@@ -175,9 +165,13 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
175
165
desc.find (L" VPU" ) != std::wstring::npos);
176
166
};
177
167
178
- // not 100% accurate. is there a better way?
168
+ // use description if no friendly name
169
+ if (entry->description .empty ()) {
170
+ entry->description = desc;
171
+ }
172
+
179
173
uint64_t npu_key = GetDeviceKey (*entry);
180
- bool is_npu = npus.count (npu_key) > 0 || possible_npu (entry-> description );
174
+ bool is_npu = npus.count (npu_key) > 0 || possible_npu (desc );
181
175
182
176
if (guid == GUID_DEVCLASS_DISPLAY) {
183
177
entry->type = OrtHardwareDeviceType_GPU;
@@ -201,18 +195,21 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
201
195
}
202
196
203
197
if (SetupDiGetDeviceRegistryPropertyW (devInfo, &devData, SPDRP_MFG, nullptr ,
204
- (PBYTE)buffer. data (), (DWORD) buffer. size ( ), &size)) {
205
- entry->vendor = buffer;
198
+ (PBYTE)buffer, sizeof ( buffer), &size)) {
199
+ entry->vendor = std::wstring ( buffer, wcslen (buffer)) ;
206
200
}
207
201
208
- if (guid != GUID_DEVCLASS_PROCESSOR) {
209
- DWORD busNumber = 0 ;
210
- size = 0 ;
211
- if (SetupDiGetDeviceRegistryPropertyW (devInfo, &devData, SPDRP_BUSNUMBER, nullptr ,
212
- reinterpret_cast <PBYTE>(&busNumber), sizeof (busNumber), &size)) {
213
- // push_back in case there are two identical devices. not sure how else to tell them apart
214
- entry->bus_ids .push_back (busNumber);
202
+ // Add the UI number if GPU. Helpful if user has integrated and discrete GPUs
203
+ if (entry->type == OrtHardwareDeviceType_GPU) {
204
+ DWORD ui_number = 0 ;
205
+ if (SetupDiGetDeviceRegistryPropertyW (devInfo, &devData, SPDRP_UI_NUMBER, nullptr ,
206
+ (PBYTE)&ui_number, sizeof (ui_number), &size)) {
207
+ // use value read.
208
+ } else {
209
+ // infer it as 0 if not set.
215
210
}
211
+
212
+ entry->metadata .emplace (L" SPDRP_UI_NUMBER" , std::to_wstring (ui_number));
216
213
}
217
214
}
218
215
@@ -226,50 +223,58 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoSetupApi(const std::unorde
226
223
std::unordered_map<uint64_t , DeviceInfo> GetDeviceInfoD3D12 () {
227
224
std::unordered_map<uint64_t , DeviceInfo> device_info;
228
225
229
- IDXGIFactory6* factory = nullptr ;
230
- HRESULT hr = CreateDXGIFactory1 (IID_PPV_ARGS (&factory));
231
- if (FAILED (hr)) {
226
+ ComPtr<IDXGIFactory6> factory;
227
+ if (FAILED (CreateDXGIFactory2 (0 , IID_PPV_ARGS (&factory)))) {
232
228
std::cerr << " Failed to create DXGI factory.\n " ;
233
229
return device_info;
234
230
}
235
231
236
- IDXGIAdapter1* adapter = nullptr ;
237
-
238
- // iterate by high-performance GPU preference first
239
- for (UINT i = 0 ; factory->EnumAdapterByGpuPreference (i, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE,
240
- IID_PPV_ARGS (&adapter)) != DXGI_ERROR_NOT_FOUND;
241
- ++i) {
232
+ ComPtr<IDXGIAdapter1> adapter;
233
+ for (UINT i = 0 ; factory->EnumAdapters1 (i, adapter.ReleaseAndGetAddressOf ()) != DXGI_ERROR_NOT_FOUND; ++i) {
242
234
DXGI_ADAPTER_DESC1 desc;
243
235
if (FAILED (adapter->GetDesc1 (&desc))) {
244
236
continue ;
245
237
}
246
238
247
- do {
248
- if ((desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) != 0 ||
249
- (desc.Flags & DXGI_ADAPTER_FLAG_REMOTE) != 0 ) {
250
- // software or remote. skip
251
- break ;
252
- }
239
+ if ((desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) != 0 ||
240
+ (desc.Flags & DXGI_ADAPTER_FLAG_REMOTE) != 0 ) {
241
+ // software or remote. skip
242
+ continue ;
243
+ }
253
244
254
- static_assert (sizeof (LUID) == sizeof (uint64_t ), " LUID and uint64_t are not the same size" );
255
- uint64_t key = GetLuidKey (desc.AdapterLuid );
245
+ static_assert (sizeof (LUID) == sizeof (uint64_t ), " LUID and uint64_t are not the same size" );
246
+ uint64_t key = GetLuidKey (desc.AdapterLuid );
256
247
257
- DeviceInfo& info = device_info[key];
258
- info.type = OrtHardwareDeviceType_GPU;
259
- info.vendor_id = desc.VendorId ;
260
- info.device_id = desc.DeviceId ;
261
- info.description = std::wstring (desc.Description );
262
-
263
- info.metadata [L" VideoMemory" ] = std::to_wstring (desc.DedicatedVideoMemory / (1024 * 1024 )) + L" MB" ;
264
- info.metadata [L" SystemMemory" ] = std::to_wstring (desc.DedicatedSystemMemory / (1024 * 1024 )) + L" MB" ;
265
- info.metadata [L" SharedSystemMemory" ] = std::to_wstring (desc.DedicatedSystemMemory / (1024 * 1024 )) + L" MB" ;
266
- info.metadata [L" HighPerformanceIndex" ] = std::to_wstring (i);
267
- } while (false );
248
+ DeviceInfo& info = device_info[key];
249
+ info.type = OrtHardwareDeviceType_GPU;
250
+ info.vendor_id = desc.VendorId ;
251
+ info.device_id = desc.DeviceId ;
252
+ info.description = std::wstring (desc.Description );
268
253
269
- adapter->Release ();
254
+ info.metadata [L" DxgiAdapterNumber" ] = std::to_wstring (i);
255
+ info.metadata [L" VideoMemory" ] = std::to_wstring (desc.DedicatedVideoMemory / (1024 * 1024 )) + L" MB" ;
256
+ info.metadata [L" SystemMemory" ] = std::to_wstring (desc.DedicatedSystemMemory / (1024 * 1024 )) + L" MB" ;
257
+ info.metadata [L" SharedSystemMemory" ] = std::to_wstring (desc.DedicatedSystemMemory / (1024 * 1024 )) + L" MB" ;
270
258
}
271
259
272
- factory->Release ();
260
+ // iterate by high-performance GPU preference to add that info
261
+ for (UINT i = 0 ; factory->EnumAdapterByGpuPreference (
262
+ i, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE,
263
+ IID_PPV_ARGS (adapter.ReleaseAndGetAddressOf ())) != DXGI_ERROR_NOT_FOUND;
264
+ ++i) {
265
+ DXGI_ADAPTER_DESC1 desc;
266
+ if (FAILED (adapter->GetDesc1 (&desc))) {
267
+ continue ;
268
+ }
269
+
270
+ uint64_t key = GetLuidKey (desc.AdapterLuid );
271
+
272
+ auto it = device_info.find (key);
273
+ if (it != device_info.end ()) {
274
+ DeviceInfo& info = it->second ;
275
+ info.metadata [L" HighPerformanceIndex" ] = std::to_wstring (i);
276
+ }
277
+ }
273
278
274
279
return device_info;
275
280
}
@@ -284,7 +289,9 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoDxcore() {
284
289
return device_info;
285
290
}
286
291
287
- // manually define for older Windows versions. will be no matches but means this code works on machines with dxcore.
292
+ // NOTE: These GUIDs requires a newer Windows SDK than what we target by default.
293
+ // They were added in 10.0.22621.0 as part of DirectXCore API
294
+ // To workaround this we define a local copy of the values. On an older Windows machine they won't match anything.
288
295
static const GUID local_DXCORE_ADAPTER_ATTRIBUTE_D3D12_GENERIC_ML = {0xb71b0d41 , 0x1088 , 0x422f , 0xa2 , 0x7c , 0x2 , 0x50 , 0xb7 , 0xd3 , 0xa9 , 0x88 };
289
296
static const GUID local_DXCORE_HARDWARE_TYPE_ATTRIBUTE_NPU = {0xd46140c4 , 0xadd7 , 0x451b , 0x9e , 0x56 , 0x6 , 0xfe , 0x8c , 0x3b , 0x58 , 0xed };
290
297
@@ -353,27 +360,17 @@ std::unordered_map<uint64_t, DeviceInfo> GetDeviceInfoDxcore() {
353
360
&is_integrated))) {
354
361
info.metadata [L" Discrete" ] = is_integrated ? L" 0" : L" 1" ;
355
362
}
356
-
357
- // this returns char_t on us-en Windows. assuming it returns wchar_t on other locales but not clear what it
358
- // does when.
359
- // The description from SetupApi is wchar_t so assuming we have that and don't need this one.
360
- //
361
- // hrId = HRESULT_FROM_WIN32(ERROR_NOT_FOUND);
362
- // std::wstring driverDescription;
363
- // driverDescription.resize(256);
364
- // // this doesn't seem to return wchar_t
365
- // if (adapter->IsPropertySupported(DXCoreAdapterProperty::DriverDescription)) {
366
- // hrId = adapter->GetProperty(DXCoreAdapterProperty::DriverDescription, sizeof(driverDescription),
367
- // &driverDescription);
368
- // info.description = driverDescription;
369
- // }
370
363
}
371
364
}
372
365
373
366
return device_info;
374
367
}
375
368
} // namespace
376
369
370
+ // Get devices from various sources and combine them into a single set of devices.
371
+ // For CPU we use setupapi data.
372
+ // For GPU we augment the d3d12 and dxcore data with the setupapi data.
373
+ // For NPU we augment the dxcore data with the setupapi data.
377
374
std::unordered_set<OrtHardwareDevice> DeviceDiscovery::DiscoverDevicesForPlatform () {
378
375
// dxcore info. key is luid
379
376
std::unordered_map<uint64_t , DeviceInfo> luid_to_dxinfo = GetDeviceInfoDxcore ();
@@ -408,18 +405,12 @@ std::unordered_set<OrtHardwareDevice> DeviceDiscovery::DiscoverDevicesForPlatfor
408
405
}
409
406
}
410
407
411
- std::wstring_convert<std::codecvt_utf8<wchar_t >> converter; // wstring to string
408
+ std::wstring_convert<std::codecvt_utf8<wchar_t > > converter; // wstring to string
412
409
const auto device_to_ortdevice = [&converter](
413
410
DeviceInfo& device,
414
411
std::unordered_map<std::wstring, std::wstring>* extra_metadata = nullptr ) {
415
412
OrtHardwareDevice ortdevice{device.type , device.vendor_id , device.device_id , converter.to_bytes (device.vendor )};
416
413
417
- if (device.bus_ids .size () > 0 ) {
418
- // use the first bus number. not sure how to handle multiple
419
- ortdevice.metadata .Add (" BusNumber" , std::to_string (device.bus_ids .back ()).c_str ());
420
- device.bus_ids .pop_back ();
421
- }
422
-
423
414
if (!device.description .empty ()) {
424
415
ortdevice.metadata .Add (" Description" , converter.to_bytes (device.description ));
425
416
}
@@ -437,6 +428,18 @@ std::unordered_set<OrtHardwareDevice> DeviceDiscovery::DiscoverDevicesForPlatfor
437
428
}
438
429
}
439
430
431
+ std::ostringstream oss;
432
+ oss << " Adding OrtHardwareDevice {vendor_id:0x" << std::hex << ortdevice.vendor_id
433
+ << " , device_id:0x" << ortdevice.device_id
434
+ << " , type:" << std::dec << static_cast <int >(ortdevice.type )
435
+ << " , metadata: [" ;
436
+ for (auto & [key, value] : ortdevice.metadata .entries ) {
437
+ oss << key << " =" << value << " , " ;
438
+ }
439
+
440
+ oss << " ]}" << std::endl;
441
+ LOGS_DEFAULT (INFO) << oss.str ();
442
+
440
443
return ortdevice;
441
444
};
442
445
@@ -459,14 +462,14 @@ std::unordered_set<OrtHardwareDevice> DeviceDiscovery::DiscoverDevicesForPlatfor
459
462
// use SetupApi info. merge metadata.
460
463
devices.emplace (device_to_ortdevice (it->second , &device.metadata ));
461
464
} else {
462
- // no matching entry in SetupApi. use the dxinfo. no vendor. no BusNumber.
465
+ // no matching entry in SetupApi. use the dxinfo. will be missing vendor name and UI_NUMBER
463
466
devices.emplace (device_to_ortdevice (device));
464
467
}
465
468
}
466
469
467
470
return devices;
468
471
}
469
- #else // !defined(ORT_MINIMAL_BUILD)
472
+ #else // !defined(ORT_MINIMAL_BUILD) && !defined(_GAMING_XBOX)
470
473
std::unordered_set<OrtHardwareDevice> DeviceDiscovery::DiscoverDevicesForPlatform () {
471
474
return {};
472
475
}
0 commit comments