Index: branches/ErmaC/Enoch/i386/libsaio/cpu.c =================================================================== --- branches/ErmaC/Enoch/i386/libsaio/cpu.c (revision 2895) +++ branches/ErmaC/Enoch/i386/libsaio/cpu.c (revision 2896) @@ -21,11 +21,30 @@ clock_frequency_info_t gPEClockFrequencyInfo; -static __unused uint64_t rdtsc32(void) +//static __unused uint64_t rdtsc32(void) +//{ +// unsigned int lo,hi; +// __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); +// return ((uint64_t)hi << 32) | lo; +//} + +uint64_t getCycles(void) { - unsigned int lo,hi; - __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); - return ((uint64_t)hi << 32) | lo; +#if defined(__ARM_ARCH_7A__) + uint32_t r; + asm volatile("mrc p15, 0, %0, c9, c13, 0\t\n" : "=r" (r)); /* Read PMCCNTR */ + return ((uint64_t)r) << 6; /* 1 tick = 64 clocks */ +#elif defined(__x86_64__) + unsigned a, d; + asm volatile("rdtsc" : "=a" (a), "=d" (d)); + return ((uint64_t)a) | (((uint64_t)d) << 32); +#elif defined(__i386__) + uint64_t ret; + asm volatile("rdtsc": "=A" (ret)); + return ret; +#else + return 0; +#endif } /* @@ -71,10 +90,10 @@ attempts++; enable_PIT2(); // turn on PIT2 set_PIT2(0); // reset timer 2 to be zero - latchTime = rdtsc32(); // get the time stamp to time + latchTime = getCycles(); // get the time stamp to time latchTime = get_PIT2(&timerValue) - latchTime; // time how long this takes set_PIT2(SAMPLE_CLKS_INT); // set up the timer for (almost) 1/20th a second - saveTime = rdtsc32(); // now time how long a 20th a second is... + saveTime = getCycles(); // now time how long a 20th a second is... get_PIT2(&lastValue); get_PIT2(&lastValue); // read twice, first value may be unreliable do { @@ -129,9 +148,9 @@ { enable_PIT2(); set_PIT2_mode0(CALIBRATE_LATCH); - tscStart = rdtsc64(); + tscStart = getCycles(); pollCount = poll_PIT2_gate(); - tscEnd = rdtsc64(); + tscEnd = getCycles(); /* The poll loop must have run at least a few times for accuracy */ if (pollCount <= 1) { @@ -330,18 +349,18 @@ * Do the work */ offset = 0; - lastTsc = rdtsc64(); + lastTsc = getCycles(); do { (void) memcpy((char*) dst + offset, (const char*) src + offset, chunk); offset += (ptrdiff_t) chunk; len -= chunk; - if ((rdtsc64() - lastTsc) < threshold) + if ((getCycles() - lastTsc) < threshold) { continue; } (void) readKeyboardStatus(); // visit real-mode - lastTsc = rdtsc64(); + lastTsc = getCycles(); } while (len > chunk); if (len) @@ -376,7 +395,6 @@ uint32_t reg[4]; char *s = 0; - do_cpuid(0x00000000, p->CPU.CPUID[CPUID_0]); // MaxFn, Vendor do_cpuid(0x00000001, p->CPU.CPUID[CPUID_1]); // Signature, stepping, features do_cpuid(0x00000002, p->CPU.CPUID[CPUID_2]); // TLB/Cache/Prefetch @@ -448,6 +466,7 @@ do_cpuid(0x80000005, p->CPU.CPUID[CPUID_85]); // TLB/Cache/Prefetch do_cpuid(0x80000006, p->CPU.CPUID[CPUID_86]); // TLB/Cache/Prefetch do_cpuid(0x80000008, p->CPU.CPUID[CPUID_88]); + do_cpuid(0x8000001E, p->CPU.CPUID[CPUID_81E]); break; @@ -492,12 +511,12 @@ uint8_t bus_ratio_max = 0; uint8_t bus_ratio_min = 0; - uint8_t currdiv = 0; - uint8_t currcoef = 0; + uint32_t currdiv = 0; + uint32_t currcoef = 0; uint8_t maxdiv = 0; uint8_t maxcoef = 0; - uint8_t pic0_mask; - uint8_t cpuMultN2 = 0; + uint8_t pic0_mask = 0; + uint32_t cpuMultN2 = 0; const char *newratio; @@ -685,9 +704,31 @@ case CPUID_VENDOR_AMD: { post_startup_cpu_fixups(); - cores_per_package = bitfield(p->CPU.CPUID[CPUID_88][ecx], 7, 0) + 1; - threads_per_core = cores_per_package; + if (p->CPU.ExtFamily < 0x8) + { + cores_per_package = bitfield(p->CPU.CPUID[CPUID_88][ecx], 7, 0) + 1; + //threads_per_core = cores_per_package; + } + else + + // Bronya : test for SMT + // Properly calculate number of cores on AMD Zen + // TODO: Check MSR for SMT + if (p->CPU.ExtFamily >= 0x8) + { + uint64_t cores = 0; + uint64_t logical = 0; + + cores = bitfield(p->CPU.CPUID[CPUID_81E][ebx], 7, 0); // cores + logical = bitfield(p->CPU.CPUID[CPUID_81E][ebx], 15, 8) + 1; // 2 + + cores_per_package = (bitfield(p->CPU.CPUID[CPUID_88][ecx], 7, 0) + 1) / logical; //8 cores + + //threads_per_core = cores_per_package; + + } + if (cores_per_package == 0) { cores_per_package = 1; @@ -739,11 +780,6 @@ p->CPU.Features |= CPU_FEATURE_SSE42; } - if ((bit(29) & p->CPU.CPUID[CPUID_81][3]) != 0) - { - p->CPU.Features |= CPU_FEATURE_EM64T; - } - if ((bit(5) & p->CPU.CPUID[CPUID_1][3]) != 0) { p->CPU.Features |= CPU_FEATURE_MSR; @@ -754,6 +790,11 @@ p->CPU.Features |= CPU_FEATURE_HTT; } + if ((bit(29) & p->CPU.CPUID[CPUID_81][3]) != 0) + { + p->CPU.Features |= CPU_FEATURE_EM64T; + } + pic0_mask = inb(0x21U); outb(0x21U, 0xFFU); // mask PIC0 interrupts for duration of timing tests @@ -799,11 +840,11 @@ case CPUID_MODEL_SKYLAKE_S: /* --------------------------------------------------------- */ msr = rdmsr64(MSR_PLATFORM_INFO); - DBG("msr(%d): platform_info %08x\n", __LINE__, bitfield(msr, 31, 0)); + DBG("msr(%d): platform_info %08llx\n", __LINE__, bitfield(msr, 31, 0)); bus_ratio_max = bitfield(msr, 15, 8); bus_ratio_min = bitfield(msr, 47, 40); //valv: not sure about this one (Remarq.1) msr = rdmsr64(MSR_FLEX_RATIO); - DBG("msr(%d): flex_ratio %08x\n", __LINE__, bitfield(msr, 31, 0)); + DBG("msr(%d): flex_ratio %08llx\n", __LINE__, bitfield(msr, 31, 0)); if (bitfield(msr, 16, 16)) { flex_ratio = bitfield(msr, 15, 8); @@ -820,7 +861,7 @@ // Clear bit 16 (evidently the presence bit) wrmsr64(MSR_FLEX_RATIO, (msr & 0xFFFFFFFFFFFEFFFFULL)); msr = rdmsr64(MSR_FLEX_RATIO); - DBG("CPU: Unusable flex ratio detected. Patched MSR now %08x\n", bitfield(msr, 31, 0)); + DBG("CPU: Unusable flex ratio detected. Patched MSR now %08llx\n", bitfield(msr, 31, 0)); } else { @@ -891,7 +932,7 @@ default: msr = rdmsr64(MSR_IA32_PERF_STATUS); - DBG("msr(%d): ia32_perf_stat 0x%08x\n", __LINE__, bitfield(msr, 31, 0)); + DBG("msr(%d): ia32_perf_stat 0x%08llx\n", __LINE__, bitfield(msr, 31, 0)); currcoef = bitfield(msr, 12, 8); // Bungo: reverted to 2263 state because of wrong old CPUs freq. calculating // Non-integer bus ratio for the max-multi maxdiv = bitfield(msr, 46, 46); @@ -956,12 +997,12 @@ { uint64_t fidvid = 0; uint64_t cpuMult; - uint64_t fid; + uint64_t cpuFid; - fidvid = rdmsr64(K8_FIDVID_STATUS); - fid = bitfield(fidvid, 5, 0); + fidvid = rdmsr64(AMD_K8_PERF_STS); + cpuFid = bitfield(fidvid, 5, 0); - cpuMult = (fid + 8) / 2; + cpuMult = (cpuFid + 0x8) * 10 / 2; currcoef = cpuMult; cpuMultN2 = (fidvid & (uint64_t)bit(0)); @@ -972,25 +1013,26 @@ case 0x10: /*** AMD Family 10h ***/ { - uint64_t cofvid = 0; + + uint64_t prfsts = 0; uint64_t cpuMult; uint64_t divisor = 0; - uint64_t did; - uint64_t fid; + uint64_t cpuDid; + uint64_t cpuFid; - cofvid = rdmsr64(K10_COFVID_STATUS); - did = bitfield(cofvid, 8, 6); - fid = bitfield(cofvid, 5, 0); - if (did == 0) divisor = 2; - else if (did == 1) divisor = 4; - else if (did == 2) divisor = 8; - else if (did == 3) divisor = 16; - else if (did == 4) divisor = 32; + prfsts = rdmsr64(AMD_COFVID_STS); + cpuDid = bitfield(prfsts, 8, 6); + cpuFid = bitfield(prfsts, 5, 0); + if (cpuDid == 0) divisor = 2; + else if (cpuDid == 1) divisor = 4; + else if (cpuDid == 2) divisor = 8; + else if (cpuDid == 3) divisor = 16; + else if (cpuDid == 4) divisor = 32; - cpuMult = (fid + 16) / divisor; + cpuMult = ((cpuFid + 0x10) * 10) / (2^cpuDid); currcoef = cpuMult; - cpuMultN2 = (cofvid & (uint64_t)bit(0)); + cpuMultN2 = (prfsts & (uint64_t)bit(0)); currdiv = cpuMultN2; /****** Addon END ******/ @@ -999,25 +1041,26 @@ case 0x11: /*** AMD Family 11h ***/ { - uint64_t cofvid = 0; + + uint64_t prfsts; uint64_t cpuMult; uint64_t divisor = 0; - uint64_t did; - uint64_t fid; + uint64_t cpuDid; + uint64_t cpuFid; - cofvid = rdmsr64(K10_COFVID_STATUS); - did = bitfield(cofvid, 8, 6); - fid = bitfield(cofvid, 5, 0); - if (did == 0) divisor = 2; - else if (did == 1) divisor = 4; - else if (did == 2) divisor = 8; - else if (did == 3) divisor = 16; - else if (did == 4) divisor = 32; + prfsts = rdmsr64(AMD_COFVID_STS); - cpuMult = (fid + 8) / divisor; + cpuDid = bitfield(prfsts, 8, 6); + cpuFid = bitfield(prfsts, 5, 0); + if (cpuDid == 0) divisor = 2; + else if (cpuDid == 1) divisor = 4; + else if (cpuDid == 2) divisor = 8; + else if (cpuDid == 3) divisor = 16; + else if (cpuDid == 4) divisor = 0; + cpuMult = ((cpuFid + 0x8) * 10 ) / divisor; currcoef = cpuMult; - cpuMultN2 = (cofvid & (uint64_t)bit(0)); + cpuMultN2 = (prfsts & (uint64_t)bit(0)); currdiv = cpuMultN2; /****** Addon END ******/ @@ -1029,7 +1072,7 @@ // 8:4 CpuFid: current CPU core frequency ID // 3:0 CpuDid: current CPU core divisor ID uint64_t prfsts,CpuFid,CpuDid; - prfsts = rdmsr64(K10_COFVID_STATUS); + prfsts = rdmsr64(AMD_COFVID_STS); CpuDid = bitfield(prfsts, 3, 0) ; CpuFid = bitfield(prfsts, 8, 4) ; @@ -1047,7 +1090,7 @@ case 8: divisor = 16; break; default: divisor = 1; break; } - currcoef = (CpuFid + 0x10) / divisor; + currcoef = ((CpuFid + 0x10) * 10) / divisor; cpuMultN2 = (prfsts & (uint64_t)bit(0)); currdiv = cpuMultN2; @@ -1061,17 +1104,17 @@ // 8:4: current CPU core divisor ID most significant digit // 3:0: current CPU core divisor ID least significant digit uint64_t prfsts; - prfsts = rdmsr64(K10_COFVID_STATUS); + prfsts = rdmsr64(AMD_COFVID_STS); uint64_t CpuDidMSD,CpuDidLSD; CpuDidMSD = bitfield(prfsts, 8, 4) ; CpuDidLSD = bitfield(prfsts, 3, 0) ; - uint64_t frequencyId = 0x10; - currcoef = (frequencyId + 0x10) / + uint64_t frequencyId = tscFreq/Mega; + currcoef = (((frequencyId + 5) / 100) + 0x10) * 10 / (CpuDidMSD + (CpuDidLSD * 0.25) + 1); currdiv = ((CpuDidMSD) + 1) << 2; - currdiv += bitfield(msr, 3, 0); + currdiv += bitfield(prfsts, 3, 0); cpuMultN2 = (prfsts & (uint64_t)bit(0)); currdiv = cpuMultN2; @@ -1083,62 +1126,56 @@ case 0x06: /*** AMD Family 06h ***/ { - uint64_t cofvid = 0; + uint64_t prfsts = 0; uint64_t cpuMult; - uint64_t divisor = 0; - uint64_t did; - uint64_t fid; + //uint64_t divisor = 0; + uint64_t cpuDid; + uint64_t cpuFid; - cofvid = rdmsr64(K10_COFVID_STATUS); - did = bitfield(cofvid, 8, 6); - fid = bitfield(cofvid, 5, 0); - if (did == 0) divisor = 2; - else if (did == 1) divisor = 4; - else if (did == 2) divisor = 8; - else if (did == 3) divisor = 16; - else if (did == 4) divisor = 32; + prfsts = rdmsr64(AMD_COFVID_STS); + cpuDid = bitfield(prfsts, 8, 6); + cpuFid = bitfield(prfsts, 5, 0); - cpuMult = (fid + 16) / divisor; + cpuMult = ((cpuFid + 0x10) * 10) / (2^cpuDid); currcoef = cpuMult; - cpuMultN2 = (cofvid & (uint64_t)bit(0)); + cpuMultN2 = (prfsts & 0x01) * 1;//(prfsts & (uint64_t)bit(0)); currdiv = cpuMultN2; } break; case 0x16: /*** AMD Family 16h kabini ***/ { - uint64_t cofvid = 0; + uint64_t prfsts = 0; uint64_t cpuMult; uint64_t divisor = 0; - uint64_t did; + uint64_t cpuDid; + uint64_t cpuFid; + prfsts = rdmsr64(AMD_COFVID_STS); + cpuDid = bitfield(prfsts, 8, 6); + cpuFid = bitfield(prfsts, 5, 0); + if (cpuDid == 0) divisor = 1; + else if (cpuDid == 1) divisor = 2; + else if (cpuDid == 2) divisor = 4; + else if (cpuDid == 3) divisor = 8; + else if (cpuDid == 4) divisor = 16; - uint64_t fid; - - cofvid = rdmsr64(K10_COFVID_STATUS); - did = bitfield(cofvid, 8, 6); - fid = bitfield(cofvid, 5, 0); - if (did == 0) divisor = 1; - else if (did == 1) divisor = 2; - else if (did == 2) divisor = 4; - else if (did == 3) divisor = 8; - else if (did == 4) divisor = 16; - - cpuMult = (fid + 16) / divisor; + cpuMult = ((cpuFid + 0x10) * 10) / divisor; currcoef = cpuMult; - cpuMultN2 = (cofvid & (uint64_t)bit(0)); + cpuMultN2 = (prfsts & (uint64_t)bit(0)); currdiv = cpuMultN2; + /****** Addon END ******/ } break; - case 0x17: /*** Bronya: For AMD Family 17h Ryzen ***/ + case 0x17: /*** AMD Family 17h Ryzen ***/ { uint64_t cpuMult; uint64_t CpuDfsId; uint64_t CpuFid; - + uint64_t fid = 0; uint64_t prfsts = 0; prfsts = rdmsr64(AMD_PSTATE0_STS); @@ -1146,33 +1183,35 @@ CpuDfsId = bitfield(prfsts, 13, 8); CpuFid = bitfield(prfsts, 7, 0); - cpuMult = (CpuFid / CpuDfsId) * 2; + cpuMult = (CpuFid * 10 / CpuDfsId) * 2; - cpuMultN2 = (prfsts & (uint64_t)bit(0)); - currdiv = cpuMultN2; + currcoef = cpuMult; + fid = (int)(cpuMult / 10); + + uint8_t fdiv = cpuMult - (fid * 10); + if (fdiv > 0) { + currdiv = 1; + } + /****** Addon END ******/ } break; default: { - typedef unsigned long long vlong; - uint64_t prfsts; - prfsts = rdmsr64(K10_COFVID_STATUS); - uint64_t r; - vlong hz; - r = (prfsts>>6) & 0x07; - hz = (((prfsts & 0x3f)+0x10)*100000000ll)/(1<> 4) & 7 -#define K10_COFVID_CONTROL 0xC0010062 // switch to p-state -#define K10_PSTATE_STATUS 0xC0010064 -#define K10_COFVID_STATUS 0xC0010071 // current p-state (msr >> 16) & 7 +#define AMD_K8_PERF_STS 0xC0010042 +#define AMD_PSTATE_LIMIT 0xC0010061 // max enabled p-state (msr >> 4) & 7 +#define AMD_PSTATE_CONTROL 0xC0010062 // switch to p-state +#define AMD_PSTATE0_STS 0xC0010064 +#define AMD_COFVID_STS 0xC0010071 // current p-state (msr >> 16) & 7 #define MSR_AMD_MPERF 0x000000E7 #define MSR_AMD_APERF 0x000000E8 -#define AMD_PSTATE0_STS 0xC0010064 + #define DEFAULT_FSB 100000 /* for now, hardcoding 100MHz for old CPUs */ // DFE: This constant comes from older xnu: Index: branches/ErmaC/Enoch/i386/boot2/Makefile =================================================================== --- branches/ErmaC/Enoch/i386/boot2/Makefile (revision 2895) +++ branches/ErmaC/Enoch/i386/boot2/Makefile (revision 2896) @@ -56,7 +56,7 @@ UTILDIR = $(SRCROOT)/i386/util DIRS_NEEDED = $(OBJROOT) $(SYMROOT) BOOT2ADDR = 20200 -MAXBOOTSIZE = 458240 +MAXBOOTSIZE = 474624 # Index: branches/ErmaC/Enoch/i386/cdboot/cdboot.s =================================================================== --- branches/ErmaC/Enoch/i386/cdboot/cdboot.s (revision 2895) +++ branches/ErmaC/Enoch/i386/cdboot/cdboot.s (revision 2896) @@ -114,7 +114,7 @@ ; at build time. kSectorBytes EQU 2048 ; sector size in bytes kBoot2Size EQU 65024 ; default load size for boot2 -kBoot2MaxSize EQU 458240 ; max size for boot2 +kBoot2MaxSize EQU 474624 ; max size for boot2 kBoot2Address EQU 0x0200 ; boot2 load address kBoot2Segment EQU 0x2000 ; boot2 load segment