forked from bo3b/3Dmigoto
-
Notifications
You must be signed in to change notification settings - Fork 0
/
util.h
1577 lines (1423 loc) · 40.8 KB
/
util.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#pragma once
#include <ctype.h>
#include <wchar.h>
#include <string>
#include <vector>
#include <map>
#include <d3d11_1.h>
#include <dxgi1_2.h>
#include <D3Dcompiler.h>
#include <d3d9.h>
#include <DirectXMath.h>
#include "version.h"
#include "log.h"
#include "crc32c.h"
#include "util_min.h"
#include "D3D_Shaders\stdafx.h"
#if MIGOTO_DX == 11
#include "DirectX11\HookedDevice.h"
#include "DirectX11\HookedContext.h"
#elif MIGOTO_DX == 9
#include "DirectX9\HookedDeviceDX9.h"
#endif // MIGOTO_DX
// Sets the threshold for warning about IniParams size. The larger IniParams is
// the more CPU -> GPU bandwidth we will require to update it, so we want to
// discourage modders from picking arbitrarily high IniParams.
//
// This threshold is somewhat arbitrary and I haven't measured how performance
// actually goes in practice, so we can tweak it as we encounter real world
// performance issues. I've chosen the page size of 4K as a starting point as
// exceeding that will likely add additional performance overheads beyond the
// bandwidth requirements (ideally we would also ensure the IniParams buffer is
// aligned to a page boundary).
//
// If a shaderhacker wants more than 1024 (256x4) IniParams they should
// probably think about using a different storage means anyway, since IniParams
// has other problems such as no meaningful names, no namespacing, etc.
const int INI_PARAMS_SIZE_WARNING = 256;
// -----------------------------------------------------------------------------------------------
// This critical section must be held to avoid race conditions when creating
// any resource. The nvapi functions used to set the resource creation mode
// affect global state, so if multiple threads are creating resources
// simultaneously it is possible for a StereoMode override or stereo/mono copy
// on one thread to affect another. This should be taken before setting the
// surface creation mode and released only after it has been restored. If the
// creation mode is not being set it should still be taken around the actual
// CreateXXX call.
//
// The actual variable definition is in the DX11 project to remind anyone using
// this from another project that they need to InitializeCriticalSection[Pretty]
extern CRITICAL_SECTION resource_creation_mode_lock;
// Use the pretty lock debugging version if lock.h is included first, otherwise
// use the regular EnterCriticalSection:
#ifdef EnterCriticalSectionPretty
#define LockResourceCreationMode() \
EnterCriticalSectionPretty(&resource_creation_mode_lock)
#else
#define LockResourceCreationMode() \
EnterCriticalSection(&resource_creation_mode_lock)
#endif
#define UnlockResourceCreationMode() \
LeaveCriticalSection(&resource_creation_mode_lock)
// -----------------------------------------------------------------------------------------------
// Create hash code for textures or buffers.
// Wrapped in try/catch because it can crash in Dirt Rally,
// because of noncontiguous or non-mapped memory for the texture. Not sure this
// is the best strategy.
// Now switching to use crc32_append instead of fnv_64_buf for performance. This
// implementation of crc32c uses the SSE 4.2 instructions in the CPU to calculate,
// and is some 30x faster than fnv_64_buf.
//
// Not changing shader hash calculation as there are thousands of shaders already
// in the field, and there is no known bottleneck for that calculation.
static uint32_t crc32c_hw(uint32_t seed, const void *buffer, size_t length)
{
try
{
const uint8_t *cast_buffer = static_cast<const uint8_t*>(buffer);
return crc32c_append(seed, cast_buffer, length);
}
catch (...)
{
// Fatal error, but catch it and return null for hash.
LogInfo(" ******* Exception caught while calculating crc32c_hw hash ******\n");
return 0;
}
}
// -----------------------------------------------------------------------------------------------
// Primary hash calculation for all shader file names.
// 64 bit magic FNV-0 and FNV-1 prime
#define FNV_64_PRIME ((UINT64)0x100000001b3ULL)
static UINT64 fnv_64_buf(const void *buf, size_t len)
{
UINT64 hval = 0;
unsigned const char *bp = (unsigned const char *)buf; /* start of buffer */
unsigned const char *be = bp + len; /* beyond end of buffer */
// FNV-1 hash each octet of the buffer
while (bp < be)
{
// multiply by the 64 bit FNV magic prime mod 2^64 */
hval *= FNV_64_PRIME;
// xor the bottom with the current octet
hval ^= (UINT64)*bp++;
}
return hval;
}
// -----------------------------------------------------------------------------------------------
// Strip spaces from the right of a string.
// Returns a pointer to the last non-NULL character of the truncated string.
static char *RightStripA(char *buf)
{
char *end = buf + strlen(buf) - 1;
while (end > buf && isspace(*end))
end--;
*(end + 1) = 0;
return end;
}
static wchar_t *RightStripW(wchar_t *buf)
{
wchar_t *end = buf + wcslen(buf) - 1;
while (end > buf && iswspace(*end))
end--;
*(end + 1) = 0;
return end;
}
static char *readStringParameter(wchar_t *val)
{
static char buf[MAX_PATH];
wcstombs(buf, val, MAX_PATH);
RightStripA(buf);
char *start = buf; while (isspace(*start)) start++;
return start;
}
static void BeepSuccess()
{
// High beep for success
Beep(1800, 400);
}
static void BeepShort()
{
// Short High beep
Beep(1800, 100);
}
static void BeepFailure()
{
// Bonk sound for failure.
Beep(200, 150);
}
static void BeepFailure2()
{
// Brnk, dunk sound for failure.
Beep(300, 200); Beep(200, 150);
}
static void BeepProfileFail()
{
// Brnk, du-du-dunk sound to signify the profile failed to install.
// This is more likely to hit than the others for an end user (e.g. if
// they denied admin privileges), so use a unique tone to make it
// easier to identify.
Beep(300, 300);
Beep(200, 100);
Beep(200, 100);
Beep(200, 200);
}
static DECLSPEC_NORETURN void DoubleBeepExit()
{
// Fatal error somewhere, known to crash, might as well exit cleanly
// with some notification.
BeepFailure2();
Sleep(500);
BeepFailure2();
Sleep(200);
if (LogFile) {
// Make sure the log is written out so we see the failure message
fclose(LogFile);
LogFile = 0;
}
ExitProcess(0xc0000135);
}
// -----------------------------------------------------------------------------------------------
static int _autoicmp(const wchar_t *s1, const wchar_t *s2)
{
return _wcsicmp(s1, s2);
}
static int _autoicmp(const char *s1, const char *s2)
{
return _stricmp(s1, s2);
}
// To use this function be sure to terminate an EnumName_t list with {NULL, 0}
// as it cannot use ArraySize on passed in arrays.
template <class T1, class T2>
static T2 lookup_enum_val(struct EnumName_t<T1, T2> *enum_names, T1 name, T2 default, bool *found=NULL)
{
for (; enum_names->name; enum_names++) {
if (!_autoicmp(name, enum_names->name)) {
if (found)
*found = true;
return enum_names->val;
}
}
if (found)
*found = false;
return default;
}
template <class T1, class T2>
static T2 lookup_enum_val(struct EnumName_t<T1, T2> *enum_names, T1 name, size_t len, T2 default, bool *found=NULL)
{
for (; enum_names->name; enum_names++) {
if (!_wcsnicmp(name, enum_names->name, len)) {
if (found)
*found = true;
return enum_names->val;
}
}
if (found)
*found = false;
return default;
}
template <class T1, class T2>
static T1 lookup_enum_name(struct EnumName_t<T1, T2> *enum_names, T2 val)
{
for (; enum_names->name; enum_names++) {
if (val == enum_names->val)
return enum_names->name;
}
return NULL;
}
template <class T2>
static wstring lookup_enum_bit_names(struct EnumName_t<const wchar_t*, T2> *enum_names, T2 val)
{
wstring ret;
T2 remaining = val;
for (; enum_names->name; enum_names++) {
if ((T2)(val & enum_names->val) == enum_names->val) {
if (!ret.empty())
ret += L' ';
ret += enum_names->name;
remaining = (T2)(remaining & (T2)~enum_names->val);
}
}
if (remaining != (T2)0) {
wchar_t buf[20];
wsprintf(buf, L"%x", remaining);
if (!ret.empty())
ret += L' ';
ret += L"unknown:0x";
ret += buf;
}
return ret;
}
// Parses an option string of names given by enum_names. The enum used with
// this function should have an INVALID entry, other flags declared as powers
// of two, and the SENSIBLE_ENUM macro used to enable the bitwise and logical
// operators. As above, the EnumName_t list must be terminated with {NULL, 0}
//
// If you wish to parse an option string that contains exactly one unrecognised
// argument, provide a pointer to a pointer in the 'unrecognised' field and the
// unrecognised option will be returned. Multiple unrecognised options are
// still considered errors.
template <class T1, class T2, class T3>
static T2 parse_enum_option_string(struct EnumName_t<T1, T2> *enum_names, T3 option_string, T1 *unrecognised)
{
T3 ptr = option_string, cur;
T2 ret = (T2)0;
T2 tmp = T2::INVALID;
if (unrecognised)
*unrecognised = NULL;
while (*ptr) {
// Skip over whitespace:
for (; *ptr == L' '; ptr++) {}
// Mark start of current entry:
cur = ptr;
// Scan until the next whitespace or end of string:
for (; *ptr && *ptr != L' '; ptr++) {}
if (*ptr) {
// NULL terminate the current entry and advance pointer:
*ptr = L'\0';
ptr++;
}
// Lookup the value of the current entry:
tmp = lookup_enum_val<T1, T2> (enum_names, cur, T2::INVALID);
if (tmp != T2::INVALID) {
ret |= tmp;
} else {
if (unrecognised && !(*unrecognised)) {
*unrecognised = cur;
} else {
LogOverlayW(LOG_WARNING, L"WARNING: Unknown option: %s\n", cur);
ret |= T2::INVALID;
}
}
}
return ret;
}
// Two template argument version is the typical case for now. We probably want
// to start adding the 'const' modifier in a bunch of places as we work towards
// migrating to C++ strings, since .c_str() always returns a const string.
// Since the parse_enum_option_string currently modified one of its inputs, it
// cannot use const, so the three argument template version above is to allow
// both const and non-const types passed in.
template <class T1, class T2>
static T2 parse_enum_option_string(struct EnumName_t<T1, T2> *enum_names, T1 option_string, T1 *unrecognised)
{
return parse_enum_option_string<T1, T2, T1>(enum_names, option_string, unrecognised);
}
// This is similar to the above, but stops parsing when it hits an unrecognised
// keyword and returns the position without throwing any errors. It also
// doesn't modify the option_string, allowing it to be used with C++ strings.
template <class T1, class T2>
static T2 parse_enum_option_string_prefix(struct EnumName_t<T1, T2> *enum_names, T1 option_string, T1 *unrecognised)
{
T1 ptr = option_string, cur;
T2 ret = (T2)0;
T2 tmp = T2::INVALID;
size_t len;
if (unrecognised)
*unrecognised = NULL;
while (*ptr) {
// Skip over whitespace:
for (; *ptr == L' '; ptr++) {}
// Mark start of current entry:
cur = ptr;
// Scan until the next whitespace or end of string:
for (; *ptr && *ptr != L' '; ptr++) {}
// Note word length:
len = ptr - cur;
// Advance pointer if not at end of string:
if (*ptr)
ptr++;
// Lookup the value of the current entry:
tmp = lookup_enum_val<T1, T2> (enum_names, cur, len, T2::INVALID);
if (tmp != T2::INVALID) {
ret |= tmp;
} else {
if (unrecognised)
*unrecognised = cur;
return ret;
}
}
return ret;
}
#if MIGOTO_DX == 11
// http://msdn.microsoft.com/en-us/library/windows/desktop/bb173059(v=vs.85).aspx
static char *DXGIFormats[] = {
"UNKNOWN",
"R32G32B32A32_TYPELESS",
"R32G32B32A32_FLOAT",
"R32G32B32A32_UINT",
"R32G32B32A32_SINT",
"R32G32B32_TYPELESS",
"R32G32B32_FLOAT",
"R32G32B32_UINT",
"R32G32B32_SINT",
"R16G16B16A16_TYPELESS",
"R16G16B16A16_FLOAT",
"R16G16B16A16_UNORM",
"R16G16B16A16_UINT",
"R16G16B16A16_SNORM",
"R16G16B16A16_SINT",
"R32G32_TYPELESS",
"R32G32_FLOAT",
"R32G32_UINT",
"R32G32_SINT",
"R32G8X24_TYPELESS",
"D32_FLOAT_S8X24_UINT",
"R32_FLOAT_X8X24_TYPELESS",
"X32_TYPELESS_G8X24_UINT",
"R10G10B10A2_TYPELESS",
"R10G10B10A2_UNORM",
"R10G10B10A2_UINT",
"R11G11B10_FLOAT",
"R8G8B8A8_TYPELESS",
"R8G8B8A8_UNORM",
"R8G8B8A8_UNORM_SRGB",
"R8G8B8A8_UINT",
"R8G8B8A8_SNORM",
"R8G8B8A8_SINT",
"R16G16_TYPELESS",
"R16G16_FLOAT",
"R16G16_UNORM",
"R16G16_UINT",
"R16G16_SNORM",
"R16G16_SINT",
"R32_TYPELESS",
"D32_FLOAT",
"R32_FLOAT",
"R32_UINT",
"R32_SINT",
"R24G8_TYPELESS",
"D24_UNORM_S8_UINT",
"R24_UNORM_X8_TYPELESS",
"X24_TYPELESS_G8_UINT",
"R8G8_TYPELESS",
"R8G8_UNORM",
"R8G8_UINT",
"R8G8_SNORM",
"R8G8_SINT",
"R16_TYPELESS",
"R16_FLOAT",
"D16_UNORM",
"R16_UNORM",
"R16_UINT",
"R16_SNORM",
"R16_SINT",
"R8_TYPELESS",
"R8_UNORM",
"R8_UINT",
"R8_SNORM",
"R8_SINT",
"A8_UNORM",
"R1_UNORM",
"R9G9B9E5_SHAREDEXP",
"R8G8_B8G8_UNORM",
"G8R8_G8B8_UNORM",
"BC1_TYPELESS",
"BC1_UNORM",
"BC1_UNORM_SRGB",
"BC2_TYPELESS",
"BC2_UNORM",
"BC2_UNORM_SRGB",
"BC3_TYPELESS",
"BC3_UNORM",
"BC3_UNORM_SRGB",
"BC4_TYPELESS",
"BC4_UNORM",
"BC4_SNORM",
"BC5_TYPELESS",
"BC5_UNORM",
"BC5_SNORM",
"B5G6R5_UNORM",
"B5G5R5A1_UNORM",
"B8G8R8A8_UNORM",
"B8G8R8X8_UNORM",
"R10G10B10_XR_BIAS_A2_UNORM",
"B8G8R8A8_TYPELESS",
"B8G8R8A8_UNORM_SRGB",
"B8G8R8X8_TYPELESS",
"B8G8R8X8_UNORM_SRGB",
"BC6H_TYPELESS",
"BC6H_UF16",
"BC6H_SF16",
"BC7_TYPELESS",
"BC7_UNORM",
"BC7_UNORM_SRGB",
"AYUV",
"Y410",
"Y416",
"NV12",
"P010",
"P016",
"420_OPAQUE",
"YUY2",
"Y210",
"Y216",
"NV11",
"AI44",
"IA44",
"P8",
"A8P8",
"B4G4R4A4_UNORM"
};
static char *TexFormatStr(unsigned int format)
{
if (format < sizeof(DXGIFormats) / sizeof(DXGIFormats[0]))
return DXGIFormats[format];
return "UNKNOWN";
}
static DXGI_FORMAT ParseFormatString(const char *fmt, bool allow_numeric_format)
{
size_t num_formats = sizeof(DXGIFormats) / sizeof(DXGIFormats[0]);
unsigned format;
int nargs, end;
if (allow_numeric_format) {
// Try parsing format string as decimal:
nargs = sscanf_s(fmt, "%u%n", &format, &end);
if (nargs == 1 && end == strlen(fmt))
return (DXGI_FORMAT)format;
}
if (!_strnicmp(fmt, "DXGI_FORMAT_", 12))
fmt += 12;
// Look up format string:
for (format = 0; format < num_formats; format++) {
if (!_strnicmp(fmt, DXGIFormats[format], 30))
return (DXGI_FORMAT)format;
}
// UNKNOWN/0 is a valid format (e.g. for structured buffers), so return
// -1 cast to a DXGI_FORMAT to signify an error:
return (DXGI_FORMAT)-1;
}
static DXGI_FORMAT ParseFormatString(const wchar_t *wfmt, bool allow_numeric_format)
{
char afmt[42];
wcstombs(afmt, wfmt, 42);
afmt[41] = '\0';
return ParseFormatString(afmt, allow_numeric_format);
}
// From DirectXTK with extra formats added
static DXGI_FORMAT EnsureNotTypeless( DXGI_FORMAT fmt )
{
// Assumes UNORM or FLOAT; doesn't use UINT or SINT
switch( fmt )
{
case DXGI_FORMAT_R32G32B32A32_TYPELESS: return DXGI_FORMAT_R32G32B32A32_FLOAT;
case DXGI_FORMAT_R32G32B32_TYPELESS: return DXGI_FORMAT_R32G32B32_FLOAT;
case DXGI_FORMAT_R16G16B16A16_TYPELESS: return DXGI_FORMAT_R16G16B16A16_UNORM;
case DXGI_FORMAT_R32G32_TYPELESS: return DXGI_FORMAT_R32G32_FLOAT;
case DXGI_FORMAT_R10G10B10A2_TYPELESS: return DXGI_FORMAT_R10G10B10A2_UNORM;
case DXGI_FORMAT_R8G8B8A8_TYPELESS: return DXGI_FORMAT_R8G8B8A8_UNORM;
case DXGI_FORMAT_R16G16_TYPELESS: return DXGI_FORMAT_R16G16_UNORM;
case DXGI_FORMAT_R32_TYPELESS: return DXGI_FORMAT_R32_FLOAT;
case DXGI_FORMAT_R8G8_TYPELESS: return DXGI_FORMAT_R8G8_UNORM;
case DXGI_FORMAT_R16_TYPELESS: return DXGI_FORMAT_R16_UNORM;
case DXGI_FORMAT_R8_TYPELESS: return DXGI_FORMAT_R8_UNORM;
case DXGI_FORMAT_BC1_TYPELESS: return DXGI_FORMAT_BC1_UNORM;
case DXGI_FORMAT_BC2_TYPELESS: return DXGI_FORMAT_BC2_UNORM;
case DXGI_FORMAT_BC3_TYPELESS: return DXGI_FORMAT_BC3_UNORM;
case DXGI_FORMAT_BC4_TYPELESS: return DXGI_FORMAT_BC4_UNORM;
case DXGI_FORMAT_BC5_TYPELESS: return DXGI_FORMAT_BC5_UNORM;
case DXGI_FORMAT_B8G8R8A8_TYPELESS: return DXGI_FORMAT_B8G8R8A8_UNORM;
case DXGI_FORMAT_B8G8R8X8_TYPELESS: return DXGI_FORMAT_B8G8R8X8_UNORM;
case DXGI_FORMAT_BC7_TYPELESS: return DXGI_FORMAT_BC7_UNORM;
// Extra depth/stencil buffer formats not covered in DirectXTK (discards
// stencil buffer to allow binding to a shader resource, alternatively we could
// discard the depth buffer if we ever needed the stencil buffer):
case DXGI_FORMAT_R32G8X24_TYPELESS: return DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS;
case DXGI_FORMAT_R24G8_TYPELESS: return DXGI_FORMAT_R24_UNORM_X8_TYPELESS;
default: return fmt;
}
}
// Is there already a utility function that does this?
static UINT dxgi_format_size(DXGI_FORMAT format)
{
switch (format) {
case DXGI_FORMAT_R32G32B32A32_TYPELESS:
case DXGI_FORMAT_R32G32B32A32_FLOAT:
case DXGI_FORMAT_R32G32B32A32_UINT:
case DXGI_FORMAT_R32G32B32A32_SINT:
return 16;
case DXGI_FORMAT_R32G32B32_TYPELESS:
case DXGI_FORMAT_R32G32B32_FLOAT:
case DXGI_FORMAT_R32G32B32_UINT:
case DXGI_FORMAT_R32G32B32_SINT:
return 12;
case DXGI_FORMAT_R16G16B16A16_TYPELESS:
case DXGI_FORMAT_R16G16B16A16_FLOAT:
case DXGI_FORMAT_R16G16B16A16_UNORM:
case DXGI_FORMAT_R16G16B16A16_UINT:
case DXGI_FORMAT_R16G16B16A16_SNORM:
case DXGI_FORMAT_R16G16B16A16_SINT:
case DXGI_FORMAT_R32G32_TYPELESS:
case DXGI_FORMAT_R32G32_FLOAT:
case DXGI_FORMAT_R32G32_UINT:
case DXGI_FORMAT_R32G32_SINT:
case DXGI_FORMAT_R32G8X24_TYPELESS:
case DXGI_FORMAT_D32_FLOAT_S8X24_UINT:
case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS:
case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT:
return 8;
case DXGI_FORMAT_R10G10B10A2_TYPELESS:
case DXGI_FORMAT_R10G10B10A2_UNORM:
case DXGI_FORMAT_R10G10B10A2_UINT:
case DXGI_FORMAT_R11G11B10_FLOAT:
case DXGI_FORMAT_R8G8B8A8_TYPELESS:
case DXGI_FORMAT_R8G8B8A8_UNORM:
case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
case DXGI_FORMAT_R8G8B8A8_UINT:
case DXGI_FORMAT_R8G8B8A8_SNORM:
case DXGI_FORMAT_R8G8B8A8_SINT:
case DXGI_FORMAT_R16G16_TYPELESS:
case DXGI_FORMAT_R16G16_FLOAT:
case DXGI_FORMAT_R16G16_UNORM:
case DXGI_FORMAT_R16G16_UINT:
case DXGI_FORMAT_R16G16_SNORM:
case DXGI_FORMAT_R16G16_SINT:
case DXGI_FORMAT_R32_TYPELESS:
case DXGI_FORMAT_D32_FLOAT:
case DXGI_FORMAT_R32_FLOAT:
case DXGI_FORMAT_R32_UINT:
case DXGI_FORMAT_R32_SINT:
case DXGI_FORMAT_R24G8_TYPELESS:
case DXGI_FORMAT_D24_UNORM_S8_UINT:
case DXGI_FORMAT_R24_UNORM_X8_TYPELESS:
case DXGI_FORMAT_X24_TYPELESS_G8_UINT:
case DXGI_FORMAT_R9G9B9E5_SHAREDEXP:
case DXGI_FORMAT_R8G8_B8G8_UNORM:
case DXGI_FORMAT_G8R8_G8B8_UNORM:
case DXGI_FORMAT_B8G8R8A8_UNORM:
case DXGI_FORMAT_B8G8R8X8_UNORM:
case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM:
case DXGI_FORMAT_B8G8R8A8_TYPELESS:
case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
case DXGI_FORMAT_B8G8R8X8_TYPELESS:
case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB:
return 4;
case DXGI_FORMAT_R8G8_TYPELESS:
case DXGI_FORMAT_R8G8_UNORM:
case DXGI_FORMAT_R8G8_UINT:
case DXGI_FORMAT_R8G8_SNORM:
case DXGI_FORMAT_R8G8_SINT:
case DXGI_FORMAT_R16_TYPELESS:
case DXGI_FORMAT_R16_FLOAT:
case DXGI_FORMAT_D16_UNORM:
case DXGI_FORMAT_R16_UNORM:
case DXGI_FORMAT_R16_UINT:
case DXGI_FORMAT_R16_SNORM:
case DXGI_FORMAT_R16_SINT:
case DXGI_FORMAT_B5G6R5_UNORM:
case DXGI_FORMAT_B5G5R5A1_UNORM:
return 2;
case DXGI_FORMAT_R8_TYPELESS:
case DXGI_FORMAT_R8_UNORM:
case DXGI_FORMAT_R8_UINT:
case DXGI_FORMAT_R8_SNORM:
case DXGI_FORMAT_R8_SINT:
case DXGI_FORMAT_A8_UNORM:
return 1;
default:
return 0;
}
}
static const char* type_name(IUnknown *object)
{
ID3D11Device1 *device;
ID3D11DeviceContext1 *context;
// Seems that not even try / catch is safe in all cases of this
// (grumble grumble poorly designed grumble...). The only cases where
// we should be called on an object without type information is while
// hooking the device and/or context, so check if it is one of those
// cases:
device = lookup_hooked_device((ID3D11Device1*)object);
if (device)
return "Hooked_ID3D11Device";
context = lookup_hooked_context((ID3D11DeviceContext1*)object);
if (context)
return "Hooked_ID3D11DeviceContext";
try {
return typeid(*object).name();
} catch (__non_rtti_object) {
return "<NO_RTTI>";
} catch(bad_typeid) {
return "<NULL>";
}
}
#endif // MIGOTO_DX == 11
#if MIGOTO_DX == 9
static const char* type_name_dx9(IUnknown *object)
{
IDirect3DDevice9 *device;
// Seems that not even try / catch is safe in all cases of this
// (grumble grumble poorly designed grumble...). The only cases where
// we should be called on an object without type information is while
// hooking the device and/or context, so check if it is one of those
// cases:
device = lookup_hooked_device_dx9((IDirect3DDevice9Ex*)object);
if (device)
return "Hooked_IDirect3DDevice9";
try {
return typeid(*object).name();
}
catch (__non_rtti_object) {
return "<NO_RTTI>";
}
catch (bad_typeid) {
return "<NULL>";
}
}
#endif // MIGOTO_DX == 9
// -----------------------------------------------------------------------------------------------
// Common routine to handle disassembling binary shaders to asm text.
// This is used whenever we need the Asm text.
// New version using Flugan's wrapper around D3DDisassemble to replace the
// problematic %f floating point values with %.9e, which is enough that a 32bit
// floating point value will be reproduced exactly:
static string BinaryToAsmText(const void *pShaderBytecode, size_t BytecodeLength,
bool patch_cb_offsets,
bool disassemble_undecipherable_data = true,
int hexdump = 0, bool d3dcompiler_46_compat = true)
{
string comments;
vector<byte> byteCode(BytecodeLength);
vector<byte> disassembly;
HRESULT r;
comments = "// using 3Dmigoto v" + string(VER_FILE_VERSION_STR) + " on " + LogTime() + "//\n";
memcpy(byteCode.data(), pShaderBytecode, BytecodeLength);
#if MIGOTO_DX == 9
r = disassemblerDX9(&byteCode, &disassembly, comments.c_str());
#elif MIGOTO_DX == 11
r = disassembler(&byteCode, &disassembly, comments.c_str(), hexdump,
d3dcompiler_46_compat, disassemble_undecipherable_data, patch_cb_offsets);
#endif // MIGOTO_DX
if (FAILED(r)) {
LogInfo(" disassembly failed. Error: %x\n", r);
return "";
}
return string(disassembly.begin(), disassembly.end());
}
// Get the shader model from the binary shader bytecode.
//
// This used to disassemble, then search for the text string, but if we are going to
// do all that work, we might as well use the James-Jones decoder to get it.
// The other reason to do it this way is that we have seen multiple shader versions
// in Unity games, and the old technique of searching for the first uncommented line
// would fail.
// This is an interesting idea, but doesn't work well here because of project structure.
// for the moment, let's leave this here, but use the disassemble search approach.
//static string GetShaderModel(const void *pShaderBytecode)
//{
// Shader *shader = DecodeDXBC((uint32_t*)pShaderBytecode);
// if (shader == nullptr)
// return "";
//
// string shaderModel;
//
// switch (shader->eShaderType)
// {
// case PIXEL_SHADER:
// shaderModel = "ps";
// break;
// case VERTEX_SHADER:
// shaderModel = "vs";
// break;
// case GEOMETRY_SHADER:
// shaderModel = "gs";
// break;
// case HULL_SHADER:
// shaderModel = "hs";
// break;
// case DOMAIN_SHADER:
// shaderModel = "ds";
// break;
// case COMPUTE_SHADER:
// shaderModel = "cs";
// break;
// default:
// return ""; // Failure.
// }
//
// shaderModel += "_" + shader->ui32MajorVersion;
// shaderModel += "_" + shader->ui32MinorVersion;
//
// delete shader;
//
// return shaderModel;
//}
static string GetShaderModel(const void *pShaderBytecode, size_t bytecodeLength)
{
string asmText = BinaryToAsmText(pShaderBytecode, bytecodeLength, false);
if (asmText.empty())
return "";
// Read shader model. This is the first not commented line.
char *pos = (char *)asmText.data();
char *end = pos + asmText.size();
while ((pos[0] == '/' || pos[0] == '\n') && pos < end)
{
while (pos[0] != 0x0a && pos < end) pos++;
pos++;
}
// Extract model.
char *eol = pos;
while (eol[0] != 0x0a && pos < end) eol++;
string shaderModel(pos, eol);
return shaderModel;
}
// Create a text file containing text for the string specified. Can be Asm or HLSL.
// If the file already exists and the caller did not specify overwrite (used
// for reassembled text), return that as an error to avoid overwriting previous
// work.
// We previously would overwrite the file only after checking if the contents were different,
// this relaxes that to just being same file name.
static HRESULT CreateTextFile(wchar_t *fullPath, string *asmText, bool overwrite)
{
FILE *f;
if (!overwrite) {
_wfopen_s(&f, fullPath, L"rb");
if (f)
{
fclose(f);
LogInfoW(L" CreateTextFile error: file already exists %s\n", fullPath);
return ERROR_FILE_EXISTS;
}
}
_wfopen_s(&f, fullPath, L"wb");
if (f)
{
fwrite(asmText->data(), 1, asmText->size(), f);
fclose(f);
}
return S_OK;
}
// Get shader type from asm, first non-commented line. CS, PS, VS.
// Not sure this works on weird Unity variant with embedded types.
// Specific variant to name files consistently, so we know they are Asm text.
static HRESULT CreateAsmTextFile(wchar_t* fileDirectory, UINT64 hash, const wchar_t* shaderType,
const void *pShaderBytecode, size_t bytecodeLength, bool patch_cb_offsets)
{
string asmText = BinaryToAsmText(pShaderBytecode, bytecodeLength, patch_cb_offsets);
if (asmText.empty())
{
return E_OUTOFMEMORY;
}
wchar_t fullPath[MAX_PATH];
swprintf_s(fullPath, MAX_PATH, L"%ls\\%016llx-%ls.txt", fileDirectory, hash, shaderType);
HRESULT hr = CreateTextFile(fullPath, &asmText, false);
if (SUCCEEDED(hr))
LogInfoW(L" storing disassembly to %s\n", fullPath);
else
LogInfoW(L" error: %x, storing disassembly to %s\n", hr, fullPath);
return hr;
}
// Specific variant to name files, so we know they are HLSL text.
static HRESULT CreateHLSLTextFile(UINT64 hash, string hlslText)
{
}
// -----------------------------------------------------------------------------------------------
// Parses the name of one of the IniParam constants: x, y, z, w, x1, y1, ..., z7, w7
static bool ParseIniParamName(const wchar_t *name, int *idx, float DirectX::XMFLOAT4::**component)
{
int ret, len1, len2;
wchar_t component_chr;
size_t length = wcslen(name);
ret = swscanf_s(name, L"%lc%n%u%n", &component_chr, 1, &len1, idx, &len2);
// May or may not have matched index. Make sure entire string was
// matched either way and check index is valid if it was matched:
if (ret == 1 && len1 == length) {
*idx = 0;
} else if (ret == 2 && len2 == length) {
#if MIGOTO_DX == 9
// Added gating for this DX9 specific limitation that we definitely do
// not want to enforce in DX11 as that would break a bunch of mods -DSS
if (*idx >= 225)
return false;
#endif // MIGOTO_DX == 9
} else {
return false;
}
switch (towlower(component_chr)) {
case L'x':
*component = &DirectX::XMFLOAT4::x;
return true;
case L'y':
*component = &DirectX::XMFLOAT4::y;
return true;
case L'z':
*component = &DirectX::XMFLOAT4::z;
return true;
case L'w':
*component = &DirectX::XMFLOAT4::w;
return true;
}
return false;
}
// -----------------------------------------------------------------------------------------------
BOOL CreateDirectoryEnsuringAccess(LPCWSTR path);
errno_t wfopen_ensuring_access(FILE** pFile, const wchar_t *filename, const wchar_t *mode);
void set_file_last_write_time(wchar_t *path, FILETIME *ftWrite, DWORD flags=0);
void touch_file(wchar_t *path, DWORD flags=0);
#define touch_dir(path) touch_file(path, FILE_FLAG_BACKUP_SEMANTICS)
bool check_interface_supported(IUnknown *unknown, REFIID riid);
void analyse_iunknown(IUnknown *unknown);
// For the time being, since we are not setup to use the Win10 SDK, we'll add
// these manually. Some games under Win10 are requesting these.
struct _declspec(uuid("9d06dffa-d1e5-4d07-83a8-1bb123f2f841")) ID3D11Device2;
struct _declspec(uuid("420d5b32-b90c-4da4-bef0-359f6a24a83a")) ID3D11DeviceContext2;
struct _declspec(uuid("A8BE2AC4-199F-4946-B331-79599FB98DE7")) IDXGISwapChain2;
struct _declspec(uuid("94D99BDB-F1F8-4AB0-B236-7DA0170EDAB1")) IDXGISwapChain3;
struct _declspec(uuid("3D585D5A-BD4A-489E-B1F4-3DBCB6452FFB")) IDXGISwapChain4;
std::string NameFromIID(IID id);
void WarnIfConflictingShaderExists(wchar_t *orig_path, const char *message = "");
static const char *end_user_conflicting_shader_msg =
"Conflicting shaders present - please use uninstall.bat and reinstall the fix.\n";
struct OMState {
UINT NumRTVs;
#if MIGOTO_DX == 9
vector<IDirect3DSurface9*> rtvs;
IDirect3DSurface9 *dsv;