Chameleon

Chameleon Commit Details

Date:2017-08-14 16:45:45 (1 year 1 month ago)
Author:ErmaC
Commit:2896
Parents: 2895
Message:Bronya’s AMD CPU improvements
Changes:
M/branches/ErmaC/Enoch/i386/boot2/Makefile
M/branches/ErmaC/Enoch/i386/cdboot/cdboot.s
M/branches/ErmaC/Enoch/i386/libsaio/cpu.c
M/branches/ErmaC/Enoch/i386/libsaio/platform.h

File differences

branches/ErmaC/Enoch/i386/libsaio/cpu.c
2121
2222
2323
24
24
25
26
27
28
29
30
31
2532
26
27
28
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
2948
3049
3150
......
7190
7291
7392
74
93
7594
7695
77
96
7897
7998
8099
......
129148
130149
131150
132
151
133152
134
153
135154
136155
137156
......
330349
331350
332351
333
352
334353
335354
336355
337356
338357
339
358
340359
341360
342361
343362
344
363
345364
346365
347366
......
376395
377396
378397
379
380398
381399
382400
......
448466
449467
450468
469
451470
452471
453472
......
492511
493512
494513
495
496
514
515
497516
498517
499
500
518
519
501520
502521
503522
......
685704
686705
687706
688
689
690707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
691732
692733
693734
......
739780
740781
741782
742
743
744
745
746
747783
748784
749785
......
754790
755791
756792
793
794
795
796
797
757798
758799
759800
......
799840
800841
801842
802
843
803844
804845
805846
806
847
807848
808849
809850
......
820861
821862
822863
823
864
824865
825866
826867
......
891932
892933
893934
894
935
895936
896937
897938
......
956997
957998
958999
959
1000
9601001
961
962
1002
1003
9631004
964
1005
9651006
9661007
9671008
......
9721013
9731014
9741015
975
1016
1017
9761018
9771019
978
979
1020
1021
9801022
981
982
983
984
985
986
987
988
1023
1024
1025
1026
1027
1028
1029
1030
9891031
990
1032
9911033
9921034
993
1035
9941036
9951037
9961038
......
9991041
10001042
10011043
1002
1044
1045
10031046
10041047
1005
1006
1048
1049
10071050
1008
1009
1010
1011
1012
1013
1014
1015
1051
10161052
1017
1053
1054
1055
1056
1057
1058
1059
1060
10181061
10191062
1020
1063
10211064
10221065
10231066
......
10291072
10301073
10311074
1032
1075
10331076
10341077
10351078
......
10471090
10481091
10491092
1050
1093
10511094
10521095
10531096
......
10611104
10621105
10631106
1064
1107
10651108
10661109
10671110
10681111
10691112
1070
1071
1113
1114
10721115
10731116
1074
1117
10751118
10761119
10771120
......
10831126
10841127
10851128
1086
1129
10871130
1088
1089
1090
1131
1132
1133
10911134
1092
1093
1094
1095
1096
1097
1098
1099
1135
1136
1137
11001138
1101
1139
11021140
11031141
1104
1142
11051143
11061144
11071145
11081146
11091147
11101148
1111
1149
11121150
11131151
1114
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
11151162
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1163
11281164
11291165
1130
1166
11311167
1168
11321169
11331170
11341171
11351172
1136
1173
11371174
11381175
11391176
11401177
1141
1178
11421179
11431180
11441181
......
11461183
11471184
11481185
1149
1186
11501187
1151
1152
1188
11531189
1190
1191
1192
1193
1194
1195
1196
11541197
11551198
11561199
11571200
11581201
11591202
1160
1161
1162
1163
1164
1165
1166
1167
1168
1203
11691204
11701205
11711206
1207
1208
11721209
11731210
11741211
11751212
1213
1214
11761215
11771216
11781217
......
11821221
11831222
11841223
1224
1225
11851226
11861227
11871228
clock_frequency_info_t gPEClockFrequencyInfo;
static __unused uint64_t rdtsc32(void)
//static __unused uint64_t rdtsc32(void)
//{
//unsigned int lo,hi;
//__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
//return ((uint64_t)hi << 32) | lo;
//}
uint64_t getCycles(void)
{
unsigned int lo,hi;
__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
return ((uint64_t)hi << 32) | lo;
#if defined(__ARM_ARCH_7A__)
uint32_t r;
asm volatile("mrc p15, 0, %0, c9, c13, 0\t\n" : "=r" (r)); /* Read PMCCNTR */
return ((uint64_t)r) << 6; /* 1 tick = 64 clocks */
#elif defined(__x86_64__)
unsigned a, d;
asm volatile("rdtsc" : "=a" (a), "=d" (d));
return ((uint64_t)a) | (((uint64_t)d) << 32);
#elif defined(__i386__)
uint64_t ret;
asm volatile("rdtsc": "=A" (ret));
return ret;
#else
return 0;
#endif
}
/*
attempts++;
enable_PIT2();// turn on PIT2
set_PIT2(0);// reset timer 2 to be zero
latchTime = rdtsc32();// get the time stamp to time
latchTime = getCycles();// get the time stamp to time
latchTime = get_PIT2(&timerValue) - latchTime; // time how long this takes
set_PIT2(SAMPLE_CLKS_INT);// set up the timer for (almost) 1/20th a second
saveTime = rdtsc32();// now time how long a 20th a second is...
saveTime = getCycles();// now time how long a 20th a second is...
get_PIT2(&lastValue);
get_PIT2(&lastValue);// read twice, first value may be unreliable
do {
{
enable_PIT2();
set_PIT2_mode0(CALIBRATE_LATCH);
tscStart = rdtsc64();
tscStart = getCycles();
pollCount = poll_PIT2_gate();
tscEnd = rdtsc64();
tscEnd = getCycles();
/* The poll loop must have run at least a few times for accuracy */
if (pollCount <= 1)
{
* Do the work
*/
offset = 0;
lastTsc = rdtsc64();
lastTsc = getCycles();
do
{
(void) memcpy((char*) dst + offset, (const char*) src + offset, chunk);
offset += (ptrdiff_t) chunk;
len -= chunk;
if ((rdtsc64() - lastTsc) < threshold)
if ((getCycles() - lastTsc) < threshold)
{
continue;
}
(void) readKeyboardStatus();// visit real-mode
lastTsc = rdtsc64();
lastTsc = getCycles();
}
while (len > chunk);
if (len)
uint32_treg[4];
char*s= 0;
do_cpuid(0x00000000, p->CPU.CPUID[CPUID_0]); // MaxFn, Vendor
do_cpuid(0x00000001, p->CPU.CPUID[CPUID_1]); // Signature, stepping, features
do_cpuid(0x00000002, p->CPU.CPUID[CPUID_2]); // TLB/Cache/Prefetch
do_cpuid(0x80000005, p->CPU.CPUID[CPUID_85]); // TLB/Cache/Prefetch
do_cpuid(0x80000006, p->CPU.CPUID[CPUID_86]); // TLB/Cache/Prefetch
do_cpuid(0x80000008, p->CPU.CPUID[CPUID_88]);
do_cpuid(0x8000001E, p->CPU.CPUID[CPUID_81E]);
break;
uint8_tbus_ratio_max= 0;
uint8_tbus_ratio_min= 0;
uint8_tcurrdiv= 0;
uint8_tcurrcoef= 0;
uint32_tcurrdiv= 0;
uint32_tcurrcoef= 0;
uint8_tmaxdiv= 0;
uint8_tmaxcoef= 0;
uint8_tpic0_mask;
uint8_tcpuMultN2= 0;
uint8_tpic0_mask= 0;
uint32_tcpuMultN2= 0;
const char*newratio;
case CPUID_VENDOR_AMD:
{
post_startup_cpu_fixups();
cores_per_package = bitfield(p->CPU.CPUID[CPUID_88][ecx], 7, 0) + 1;
threads_per_core = cores_per_package;
if (p->CPU.ExtFamily < 0x8)
{
cores_per_package = bitfield(p->CPU.CPUID[CPUID_88][ecx], 7, 0) + 1;
//threads_per_core = cores_per_package;
}
else
// Bronya : test for SMT
// Properly calculate number of cores on AMD Zen
// TODO: Check MSR for SMT
if (p->CPU.ExtFamily >= 0x8)
{
uint64_t cores = 0;
uint64_t logical = 0;
cores = bitfield(p->CPU.CPUID[CPUID_81E][ebx], 7, 0); // cores
logical = bitfield(p->CPU.CPUID[CPUID_81E][ebx], 15, 8) + 1; // 2
cores_per_package = (bitfield(p->CPU.CPUID[CPUID_88][ecx], 7, 0) + 1) / logical; //8 cores
//threads_per_core = cores_per_package;
}
if (cores_per_package == 0)
{
cores_per_package = 1;
p->CPU.Features |= CPU_FEATURE_SSE42;
}
if ((bit(29) & p->CPU.CPUID[CPUID_81][3]) != 0)
{
p->CPU.Features |= CPU_FEATURE_EM64T;
}
if ((bit(5) & p->CPU.CPUID[CPUID_1][3]) != 0)
{
p->CPU.Features |= CPU_FEATURE_MSR;
p->CPU.Features |= CPU_FEATURE_HTT;
}
if ((bit(29) & p->CPU.CPUID[CPUID_81][3]) != 0)
{
p->CPU.Features |= CPU_FEATURE_EM64T;
}
pic0_mask = inb(0x21U);
outb(0x21U, 0xFFU); // mask PIC0 interrupts for duration of timing tests
case CPUID_MODEL_SKYLAKE_S:
/* --------------------------------------------------------- */
msr = rdmsr64(MSR_PLATFORM_INFO);
DBG("msr(%d): platform_info %08x\n", __LINE__, bitfield(msr, 31, 0));
DBG("msr(%d): platform_info %08llx\n", __LINE__, bitfield(msr, 31, 0));
bus_ratio_max = bitfield(msr, 15, 8);
bus_ratio_min = bitfield(msr, 47, 40); //valv: not sure about this one (Remarq.1)
msr = rdmsr64(MSR_FLEX_RATIO);
DBG("msr(%d): flex_ratio %08x\n", __LINE__, bitfield(msr, 31, 0));
DBG("msr(%d): flex_ratio %08llx\n", __LINE__, bitfield(msr, 31, 0));
if (bitfield(msr, 16, 16))
{
flex_ratio = bitfield(msr, 15, 8);
// Clear bit 16 (evidently the presence bit)
wrmsr64(MSR_FLEX_RATIO, (msr & 0xFFFFFFFFFFFEFFFFULL));
msr = rdmsr64(MSR_FLEX_RATIO);
DBG("CPU: Unusable flex ratio detected. Patched MSR now %08x\n", bitfield(msr, 31, 0));
DBG("CPU: Unusable flex ratio detected. Patched MSR now %08llx\n", bitfield(msr, 31, 0));
}
else
{
default:
msr = rdmsr64(MSR_IA32_PERF_STATUS);
DBG("msr(%d): ia32_perf_stat 0x%08x\n", __LINE__, bitfield(msr, 31, 0));
DBG("msr(%d): ia32_perf_stat 0x%08llx\n", __LINE__, bitfield(msr, 31, 0));
currcoef = bitfield(msr, 12, 8); // Bungo: reverted to 2263 state because of wrong old CPUs freq. calculating
// Non-integer bus ratio for the max-multi
maxdiv = bitfield(msr, 46, 46);
{
uint64_t fidvid = 0;
uint64_t cpuMult;
uint64_t fid;
uint64_t cpuFid;
fidvid = rdmsr64(K8_FIDVID_STATUS);
fid = bitfield(fidvid, 5, 0);
fidvid = rdmsr64(AMD_K8_PERF_STS);
cpuFid = bitfield(fidvid, 5, 0);
cpuMult = (fid + 8) / 2;
cpuMult = (cpuFid + 0x8) * 10 / 2;
currcoef = cpuMult;
cpuMultN2 = (fidvid & (uint64_t)bit(0));
case 0x10: /*** AMD Family 10h ***/
{
uint64_t cofvid = 0;
uint64_t prfsts = 0;
uint64_t cpuMult;
uint64_t divisor = 0;
uint64_t did;
uint64_t fid;
uint64_t cpuDid;
uint64_t cpuFid;
cofvid = rdmsr64(K10_COFVID_STATUS);
did = bitfield(cofvid, 8, 6);
fid = bitfield(cofvid, 5, 0);
if (did == 0) divisor = 2;
else if (did == 1) divisor = 4;
else if (did == 2) divisor = 8;
else if (did == 3) divisor = 16;
else if (did == 4) divisor = 32;
prfsts = rdmsr64(AMD_COFVID_STS);
cpuDid = bitfield(prfsts, 8, 6);
cpuFid = bitfield(prfsts, 5, 0);
if (cpuDid == 0) divisor = 2;
else if (cpuDid == 1) divisor = 4;
else if (cpuDid == 2) divisor = 8;
else if (cpuDid == 3) divisor = 16;
else if (cpuDid == 4) divisor = 32;
cpuMult = (fid + 16) / divisor;
cpuMult = ((cpuFid + 0x10) * 10) / (2^cpuDid);
currcoef = cpuMult;
cpuMultN2 = (cofvid & (uint64_t)bit(0));
cpuMultN2 = (prfsts & (uint64_t)bit(0));
currdiv = cpuMultN2;
/****** Addon END ******/
case 0x11: /*** AMD Family 11h ***/
{
uint64_t cofvid = 0;
uint64_t prfsts;
uint64_t cpuMult;
uint64_t divisor = 0;
uint64_t did;
uint64_t fid;
uint64_t cpuDid;
uint64_t cpuFid;
cofvid = rdmsr64(K10_COFVID_STATUS);
did = bitfield(cofvid, 8, 6);
fid = bitfield(cofvid, 5, 0);
if (did == 0) divisor = 2;
else if (did == 1) divisor = 4;
else if (did == 2) divisor = 8;
else if (did == 3) divisor = 16;
else if (did == 4) divisor = 32;
prfsts = rdmsr64(AMD_COFVID_STS);
cpuMult = (fid + 8) / divisor;
cpuDid = bitfield(prfsts, 8, 6);
cpuFid = bitfield(prfsts, 5, 0);
if (cpuDid == 0) divisor = 2;
else if (cpuDid == 1) divisor = 4;
else if (cpuDid == 2) divisor = 8;
else if (cpuDid == 3) divisor = 16;
else if (cpuDid == 4) divisor = 0;
cpuMult = ((cpuFid + 0x8) * 10 ) / divisor;
currcoef = cpuMult;
cpuMultN2 = (cofvid & (uint64_t)bit(0));
cpuMultN2 = (prfsts & (uint64_t)bit(0));
currdiv = cpuMultN2;
/****** Addon END ******/
// 8:4 CpuFid: current CPU core frequency ID
// 3:0 CpuDid: current CPU core divisor ID
uint64_t prfsts,CpuFid,CpuDid;
prfsts = rdmsr64(K10_COFVID_STATUS);
prfsts = rdmsr64(AMD_COFVID_STS);
CpuDid = bitfield(prfsts, 3, 0) ;
CpuFid = bitfield(prfsts, 8, 4) ;
case 8: divisor = 16; break;
default: divisor = 1; break;
}
currcoef = (CpuFid + 0x10) / divisor;
currcoef = ((CpuFid + 0x10) * 10) / divisor;
cpuMultN2 = (prfsts & (uint64_t)bit(0));
currdiv = cpuMultN2;
// 8:4: current CPU core divisor ID most significant digit
// 3:0: current CPU core divisor ID least significant digit
uint64_t prfsts;
prfsts = rdmsr64(K10_COFVID_STATUS);
prfsts = rdmsr64(AMD_COFVID_STS);
uint64_t CpuDidMSD,CpuDidLSD;
CpuDidMSD = bitfield(prfsts, 8, 4) ;
CpuDidLSD = bitfield(prfsts, 3, 0) ;
uint64_t frequencyId = 0x10;
currcoef = (frequencyId + 0x10) /
uint64_t frequencyId = tscFreq/Mega;
currcoef = (((frequencyId + 5) / 100) + 0x10) * 10 /
(CpuDidMSD + (CpuDidLSD * 0.25) + 1);
currdiv = ((CpuDidMSD) + 1) << 2;
currdiv += bitfield(msr, 3, 0);
currdiv += bitfield(prfsts, 3, 0);
cpuMultN2 = (prfsts & (uint64_t)bit(0));
currdiv = cpuMultN2;
case 0x06: /*** AMD Family 06h ***/
{
uint64_t cofvid = 0;
uint64_t prfsts = 0;
uint64_t cpuMult;
uint64_t divisor = 0;
uint64_t did;
uint64_t fid;
//uint64_t divisor = 0;
uint64_t cpuDid;
uint64_t cpuFid;
cofvid = rdmsr64(K10_COFVID_STATUS);
did = bitfield(cofvid, 8, 6);
fid = bitfield(cofvid, 5, 0);
if (did == 0) divisor = 2;
else if (did == 1) divisor = 4;
else if (did == 2) divisor = 8;
else if (did == 3) divisor = 16;
else if (did == 4) divisor = 32;
prfsts = rdmsr64(AMD_COFVID_STS);
cpuDid = bitfield(prfsts, 8, 6);
cpuFid = bitfield(prfsts, 5, 0);
cpuMult = (fid + 16) / divisor;
cpuMult = ((cpuFid + 0x10) * 10) / (2^cpuDid);
currcoef = cpuMult;
cpuMultN2 = (cofvid & (uint64_t)bit(0));
cpuMultN2 = (prfsts & 0x01) * 1;//(prfsts & (uint64_t)bit(0));
currdiv = cpuMultN2;
}
break;
case 0x16: /*** AMD Family 16h kabini ***/
{
uint64_t cofvid = 0;
uint64_t prfsts = 0;
uint64_t cpuMult;
uint64_t divisor = 0;
uint64_t did;
uint64_t cpuDid;
uint64_t cpuFid;
prfsts = rdmsr64(AMD_COFVID_STS);
cpuDid = bitfield(prfsts, 8, 6);
cpuFid = bitfield(prfsts, 5, 0);
if (cpuDid == 0) divisor = 1;
else if (cpuDid == 1) divisor = 2;
else if (cpuDid == 2) divisor = 4;
else if (cpuDid == 3) divisor = 8;
else if (cpuDid == 4) divisor = 16;
uint64_t fid;
cofvid = rdmsr64(K10_COFVID_STATUS);
did = bitfield(cofvid, 8, 6);
fid = bitfield(cofvid, 5, 0);
if (did == 0) divisor = 1;
else if (did == 1) divisor = 2;
else if (did == 2) divisor = 4;
else if (did == 3) divisor = 8;
else if (did == 4) divisor = 16;
cpuMult = (fid + 16) / divisor;
cpuMult = ((cpuFid + 0x10) * 10) / divisor;
currcoef = cpuMult;
cpuMultN2 = (cofvid & (uint64_t)bit(0));
cpuMultN2 = (prfsts & (uint64_t)bit(0));
currdiv = cpuMultN2;
/****** Addon END ******/
}
break;
case 0x17: /*** Bronya: For AMD Family 17h Ryzen ***/
case 0x17: /*** AMD Family 17h Ryzen ***/
{
uint64_t cpuMult;
uint64_t CpuDfsId;
uint64_t CpuFid;
uint64_t fid = 0;
uint64_t prfsts = 0;
prfsts = rdmsr64(AMD_PSTATE0_STS);
CpuDfsId = bitfield(prfsts, 13, 8);
CpuFid = bitfield(prfsts, 7, 0);
cpuMult = (CpuFid / CpuDfsId) * 2;
cpuMult = (CpuFid * 10 / CpuDfsId) * 2;
cpuMultN2 = (prfsts & (uint64_t)bit(0));
currdiv = cpuMultN2;
currcoef = cpuMult;
fid = (int)(cpuMult / 10);
uint8_t fdiv = cpuMult - (fid * 10);
if (fdiv > 0) {
currdiv = 1;
}
/****** Addon END ******/
}
break;
default:
{
typedef unsigned long long vlong;
uint64_t prfsts;
prfsts = rdmsr64(K10_COFVID_STATUS);
uint64_t r;
vlong hz;
r = (prfsts>>6) & 0x07;
hz = (((prfsts & 0x3f)+0x10)*100000000ll)/(1<<r);
currcoef = hz / (200 * Mega);
currcoef = tscFreq / (200 * Mega);
}
}
#define nya(x) x/10,x%10
if (currcoef)
{
if (currdiv)
{
currcoef = nya(currcoef);
busFrequency = ((tscFreq * 2) / ((currcoef * 2) + 1));
busFCvtt2n = ((1 * Giga) << 32) / busFrequency;
tscFCvtt2n = busFCvtt2n * 2 / (1 + (2 * currcoef));
}
else
{
currcoef = nya(currcoef);
busFrequency = (tscFreq / currcoef);
busFCvtt2n = ((1 * Giga) << 32) / busFrequency;
tscFCvtt2n = busFCvtt2n / currcoef;
branches/ErmaC/Enoch/i386/libsaio/platform.h
2727
2828
2929
30
30
31
3132
3233
3334
......
337338
338339
339340
340
341
342
343
344
341
342
343
344
345
345346
346347
347348
348
349349
350
350351
351352
352353
#define CPUID_8610
#define CPUID_8711
#define CPUID_8812
#define CPUID_MAX13
#define CPUID_81E13
#define CPUID_MAX14
#define CPUID_MODEL_ANY0x00
#define CPUID_MODEL_UNKNOWN0x01
#define MSR_K7_CLK_CTL0xC001001b
#define MSR_K7_FID_VID_CTL0xC0010041
#define K8_FIDVID_STATUS0xC0010042
#define K10_COFVID_LIMIT0xC0010061// max enabled p-state (msr >> 4) & 7
#define K10_COFVID_CONTROL0xC0010062// switch to p-state
#define K10_PSTATE_STATUS0xC0010064
#define K10_COFVID_STATUS0xC0010071// current p-state (msr >> 16) & 7
#define AMD_K8_PERF_STS 0xC0010042
#define AMD_PSTATE_LIMIT 0xC0010061 // max enabled p-state (msr >> 4) & 7
#define AMD_PSTATE_CONTROL 0xC0010062 // switch to p-state
#define AMD_PSTATE0_STS 0xC0010064
#define AMD_COFVID_STS 0xC0010071 // current p-state (msr >> 16) & 7
#define MSR_AMD_MPERF0x000000E7
#define MSR_AMD_APERF0x000000E8
#define AMD_PSTATE0_STS0xC0010064
#define DEFAULT_FSB100000 /* for now, hardcoding 100MHz for old CPUs */
// DFE: This constant comes from older xnu:
branches/ErmaC/Enoch/i386/boot2/Makefile
5656
5757
5858
59
59
6060
6161
6262
UTILDIR = $(SRCROOT)/i386/util
DIRS_NEEDED = $(OBJROOT) $(SYMROOT)
BOOT2ADDR = 20200
MAXBOOTSIZE = 458240
MAXBOOTSIZE = 474624
#
branches/ErmaC/Enoch/i386/cdboot/cdboot.s
114114
115115
116116
117
117
118118
119119
120120
; at build time.
kSectorBytes EQU 2048; sector size in bytes
kBoot2Size EQU 65024; default load size for boot2
kBoot2MaxSize EQU 458240; max size for boot2
kBoot2MaxSize EQU 474624; max size for boot2
kBoot2Address EQU 0x0200 ; boot2 load address
kBoot2Segment EQU 0x2000 ; boot2 load segment

Archive Download the corresponding diff file

Revision: 2896