Index: trunk/i386/libsaio/cpu.c =================================================================== --- trunk/i386/libsaio/cpu.c (revision 1189) +++ trunk/i386/libsaio/cpu.c (revision 1190) @@ -10,7 +10,7 @@ #include "boot.h" #ifndef DEBUG_CPU -#define DEBUG_CPU 0 +#define DEBUG_CPU 1 #endif #if DEBUG_CPU @@ -30,7 +30,7 @@ unsigned long pollCount; uint64_t retval = 0; int i; - + /* Time how many TSC ticks elapse in 30 msec using the 8254 PIT * counter 2. We run this loop 3 times to make sure the cache * is hot and we take the minimum delta from all of the runs. @@ -67,7 +67,7 @@ * Hz so we need to convert our milliseconds to seconds. Since we're * dividing by the milliseconds, we simply multiply by 1000. */ - + /* Unlike linux, we're not limited to 32-bit, but we do need to take care * that we're going to multiply by 1000 first so we do need at least some * arithmetic headroom. For now, 32-bit should be enough. @@ -83,7 +83,115 @@ return retval; } +#if 0 /* + * DFE: Measures the Max Performance Frequency in Hz (64-bit) + */ +static uint64_t measure_mperf_frequency(void) +{ + uint64_t mperfStart; + uint64_t mperfEnd; + uint64_t mperfDelta = 0xffffffffffffffffULL; + unsigned long pollCount; + uint64_t retval = 0; + int i; + + /* Time how many MPERF ticks elapse in 30 msec using the 8254 PIT + * counter 2. We run this loop 3 times to make sure the cache + * is hot and we take the minimum delta from all of the runs. + * That is to say that we're biased towards measuring the minimum + * number of MPERF ticks that occur while waiting for the timer to + * expire. + */ + for(i = 0; i < 10; ++i) + { + enable_PIT2(); + set_PIT2_mode0(CALIBRATE_LATCH); + mperfStart = rdmsr64(MSR_AMD_MPERF); + pollCount = poll_PIT2_gate(); + mperfEnd = rdmsr64(MSR_AMD_MPERF); + /* The poll loop must have run at least a few times for accuracy */ + if(pollCount <= 1) + continue; + /* The MPERF must increment at LEAST once every millisecond. We + * should have waited exactly 30 msec so the MPERF delta should + * be >= 30. Anything less and the processor is way too slow. + */ + if((mperfEnd - mperfStart) <= CALIBRATE_TIME_MSEC) + continue; + // tscDelta = MIN(tscDelta, (tscEnd - tscStart)) + if( (mperfEnd - mperfStart) < mperfDelta ) + mperfDelta = mperfEnd - mperfStart; + } + /* mperfDelta is now the least number of MPERF ticks the processor made in + * a timespan of 0.03 s (e.g. 30 milliseconds) + */ + + if(mperfDelta > (1ULL<<32)) + retval = 0; + else + { + retval = mperfDelta * 1000 / 30; + } + disable_PIT2(); + return retval; +} +#endif +/* + * Measures the Actual Performance Frequency in Hz (64-bit) + */ +static uint64_t measure_aperf_frequency(void) +{ + uint64_t aperfStart; + uint64_t aperfEnd; + uint64_t aperfDelta = 0xffffffffffffffffULL; + unsigned long pollCount; + uint64_t retval = 0; + int i; + + /* Time how many APERF ticks elapse in 30 msec using the 8254 PIT + * counter 2. We run this loop 3 times to make sure the cache + * is hot and we take the minimum delta from all of the runs. + * That is to say that we're biased towards measuring the minimum + * number of APERF ticks that occur while waiting for the timer to + * expire. + */ + for(i = 0; i < 10; ++i) + { + enable_PIT2(); + set_PIT2_mode0(CALIBRATE_LATCH); + aperfStart = rdmsr64(MSR_AMD_APERF); + pollCount = poll_PIT2_gate(); + aperfEnd = rdmsr64(MSR_AMD_APERF); + /* The poll loop must have run at least a few times for accuracy */ + if(pollCount <= 1) + continue; + /* The TSC must increment at LEAST once every millisecond. We + * should have waited exactly 30 msec so the APERF delta should + * be >= 30. Anything less and the processor is way too slow. + */ + if((aperfEnd - aperfStart) <= CALIBRATE_TIME_MSEC) + continue; + // tscDelta = MIN(tscDelta, (tscEnd - tscStart)) + if( (aperfEnd - aperfStart) < aperfDelta ) + aperfDelta = aperfEnd - aperfStart; + } + /* mperfDelta is now the least number of MPERF ticks the processor made in + * a timespan of 0.03 s (e.g. 30 milliseconds) + */ + + if(aperfDelta > (1ULL<<32)) + retval = 0; + else + { + retval = aperfDelta * 1000 / 30; + } + disable_PIT2(); + return retval; +} + + +/* * Calculates the FSB and CPU frequencies using specific MSRs for each CPU * - multi. is read from a specific MSR. In the case of Intel, there is: * a max multi. (used to calculate the FSB freq.), @@ -101,28 +209,34 @@ int len, myfsb; uint8_t bus_ratio_min; uint32_t max_ratio, min_ratio; - + max_ratio = min_ratio = myfsb = bus_ratio_min = 0; maxcoef = maxdiv = bus_ratio_max = currcoef = currdiv = 0; - + /* get cpuid values */ do_cpuid(0x00000000, p->CPU.CPUID[CPUID_0]); do_cpuid(0x00000001, p->CPU.CPUID[CPUID_1]); do_cpuid(0x00000002, p->CPU.CPUID[CPUID_2]); do_cpuid(0x00000003, p->CPU.CPUID[CPUID_3]); - do_cpuid2(0x00000004, 0, p->CPU.CPUID[CPUID_4]); + do_cpuid2(0x00000004, 0, p->CPU.CPUID[CPUID_4]); do_cpuid(0x80000000, p->CPU.CPUID[CPUID_80]); - if ((p->CPU.CPUID[CPUID_80][0] & 0x0000000f) >= 1) { + if ((p->CPU.CPUID[CPUID_80][0] & 0x0000000f) >= 8) { + do_cpuid(0x80000008, p->CPU.CPUID[CPUID_88]); + do_cpuid(0x80000001, p->CPU.CPUID[CPUID_81]); + } + else if ((p->CPU.CPUID[CPUID_80][0] & 0x0000000f) >= 1) { do_cpuid(0x80000001, p->CPU.CPUID[CPUID_81]); } + + #if DEBUG_CPU { int i; printf("CPUID Raw Values:\n"); for (i=0; iCPU.CPUID[i][0], p->CPU.CPUID[i][1], - p->CPU.CPUID[i][2], p->CPU.CPUID[i][3]); + p->CPU.CPUID[i][0], p->CPU.CPUID[i][1], + p->CPU.CPUID[i][2], p->CPU.CPUID[i][3]); } } #endif @@ -136,15 +250,23 @@ p->CPU.Model += (p->CPU.ExtModel << 4); - if (p->CPU.Vendor == 0x756E6547 /* Intel */ && + if (p->CPU.Vendor == CPUID_VENDOR_INTEL && p->CPU.Family == 0x06 && p->CPU.Model >= CPUID_MODEL_NEHALEM && p->CPU.Model != CPUID_MODEL_ATOM // MSR is *NOT* available on the Intel Atom CPU - ){ + ) + { msr = rdmsr64(MSR_CORE_THREAD_COUNT); // Undocumented MSR in Nehalem and newer CPUs p->CPU.NoCores = bitfield((uint32_t)msr, 31, 16); // Using undocumented MSR to get actual values p->CPU.NoThreads = bitfield((uint32_t)msr, 15, 0); // Using undocumented MSR to get actual values - } else { + } + else if (p->CPU.Vendor == CPUID_VENDOR_AMD) + { + p->CPU.NoThreads = bitfield(p->CPU.CPUID[CPUID_1][1], 23, 16); + p->CPU.NoCores = bitfield(p->CPU.CPUID[CPUID_88][2], 7, 0) + 1; + } + else + { p->CPU.NoThreads = bitfield(p->CPU.CPUID[CPUID_1][1], 23, 16); // Use previous method for Cores and Threads p->CPU.NoCores = bitfield(p->CPU.CPUID[CPUID_4][0], 31, 26) + 1; } @@ -171,12 +293,12 @@ strlcpy(p->CPU.BrandString, s, sizeof(p->CPU.BrandString)); if (!strncmp(p->CPU.BrandString, CPU_STRING_UNKNOWN, MIN(sizeof(p->CPU.BrandString), strlen(CPU_STRING_UNKNOWN) + 1))) { - /* - * This string means we have a firmware-programmable brand string, - * and the firmware couldn't figure out what sort of CPU we have. - */ - p->CPU.BrandString[0] = '\0'; - } + /* + * This string means we have a firmware-programmable brand string, + * and the firmware couldn't figure out what sort of CPU we have. + */ + p->CPU.BrandString[0] = '\0'; + } } /* setup features */ @@ -208,12 +330,12 @@ if (p->CPU.NoThreads > p->CPU.NoCores) { p->CPU.Features |= CPU_FEATURE_HTT; } - + tscFrequency = measure_tsc_frequency(); fsbFrequency = 0; cpuFrequency = 0; - - if ((p->CPU.Vendor == 0x756E6547 /* Intel */) && ((p->CPU.Family == 0x06) || (p->CPU.Family == 0x0f))) { + + if ((p->CPU.Vendor == CPUID_VENDOR_INTEL) && ((p->CPU.Family == 0x06) || (p->CPU.Family == 0x0f))) { int intelCPU = p->CPU.Model; if ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0c) || (p->CPU.Family == 0x0f && p->CPU.Model >= 0x03)) { /* Nehalem CPU model */ @@ -227,34 +349,34 @@ p->CPU.Model == CPU_MODEL_SANDY || p->CPU.Model == CPU_MODEL_SANDY_XEON)) { msr = rdmsr64(MSR_PLATFORM_INFO); - DBG("msr(%d): platform_info %08x\n", __LINE__, msr & 0xffffffff); - bus_ratio_max = (msr >> 8) & 0xff; - bus_ratio_min = (msr >> 40) & 0xff; //valv: not sure about this one (Remarq.1) + DBG("msr(%d): platform_info %08x\n", __LINE__, bitfield(msr, 31, 0)); + bus_ratio_max = bitfield(msr, 14, 8); + bus_ratio_min = bitfield(msr, 46, 40); //valv: not sure about this one (Remarq.1) msr = rdmsr64(MSR_FLEX_RATIO); - DBG("msr(%d): flex_ratio %08x\n", __LINE__, msr & 0xffffffff); - if ((msr >> 16) & 0x01) { - flex_ratio = (msr >> 8) & 0xff; + DBG("msr(%d): flex_ratio %08x\n", __LINE__, bitfield(msr, 31, 0)); + if (bitfield(msr, 16, 16)) { + flex_ratio = bitfield(msr, 14, 8); /* bcc9: at least on the gigabyte h67ma-ud2h, - where the cpu multipler can't be changed to - allow overclocking, the flex_ratio msr has unexpected (to OSX) - contents. These contents cause mach_kernel to - fail to compute the bus ratio correctly, instead - causing the system to crash since tscGranularity - is inadvertently set to 0. - */ + where the cpu multipler can't be changed to + allow overclocking, the flex_ratio msr has unexpected (to OSX) + contents. These contents cause mach_kernel to + fail to compute the bus ratio correctly, instead + causing the system to crash since tscGranularity + is inadvertently set to 0. + */ if (flex_ratio == 0) { /* Clear bit 16 (evidently the - presence bit) */ + presence bit) */ wrmsr64(MSR_FLEX_RATIO, (msr & 0xFFFFFFFFFFFEFFFFULL)); msr = rdmsr64(MSR_FLEX_RATIO); - verbose("Unusable flex ratio detected. Patched MSR now %08x\n", msr & 0xffffffff); + verbose("Unusable flex ratio detected. Patched MSR now %08x\n", bitfield(msr, 31, 0)); } else { if (bus_ratio_max > flex_ratio) { bus_ratio_max = flex_ratio; } } } - + if (bus_ratio_max) { fsbFrequency = (tscFrequency / bus_ratio_max); } @@ -270,9 +392,9 @@ max_ratio = atoi(newratio); max_ratio = (max_ratio * 10); if (len >= 3) max_ratio = (max_ratio + 5); - + verbose("Bus-Ratio: min=%d, max=%s\n", bus_ratio_min, newratio); - + // extreme overclockers may love 320 ;) if ((max_ratio >= min_ratio) && (max_ratio <= 320)) { cpuFrequency = (fsbFrequency * max_ratio) / 10; @@ -286,29 +408,29 @@ /*if(bus_ratio_max > 0) bus_ratio = flex_ratio;*/ p->CPU.MaxRatio = max_ratio; p->CPU.MinRatio = min_ratio; - + myfsb = fsbFrequency / 1000000; verbose("Sticking with [BCLK: %dMhz, Bus-Ratio: %d]\n", myfsb, max_ratio); currcoef = bus_ratio_max; } else { msr = rdmsr64(MSR_IA32_PERF_STATUS); - DBG("msr(%d): ia32_perf_stat 0x%08x\n", __LINE__, msr & 0xffffffff); - currcoef = (msr >> 8) & 0x1f; + DBG("msr(%d): ia32_perf_stat 0x%08x\n", __LINE__, bitfield(msr, 31, 0)); + currcoef = bitfield(msr, 12, 8); /* Non-integer bus ratio for the max-multi*/ - maxdiv = (msr >> 46) & 0x01; + maxdiv = bitfield(msr, 46, 46); /* Non-integer bus ratio for the current-multi (undocumented)*/ - currdiv = (msr >> 14) & 0x01; - + currdiv = bitfield(msr, 14, 14); + if ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0e) || (p->CPU.Family == 0x0f)) // This will always be model >= 3 { /* On these models, maxcoef defines TSC freq */ - maxcoef = (msr >> 40) & 0x1f; + maxcoef = bitfield(msr, 44, 40); } else { /* On lower models, currcoef defines TSC freq */ /* XXX */ maxcoef = currcoef; } - + if (maxcoef) { if (maxdiv) { fsbFrequency = ((tscFrequency * 2) / ((maxcoef * 2) + 1)); @@ -329,60 +451,98 @@ p->CPU.Features |= CPU_FEATURE_MOBILE; } } -#if 0 - else if((p->CPU.Vendor == 0x68747541 /* AMD */) && (p->CPU.Family == 0x0f)) { - if(p->CPU.ExtFamily == 0x00 /* K8 */) { - msr = rdmsr64(K8_FIDVID_STATUS); - currcoef = (msr & 0x3f) / 2 + 4; - currdiv = (msr & 0x01) * 2; - } else if(p->CPU.ExtFamily >= 0x01 /* K10+ */) { - msr = rdmsr64(K10_COFVID_STATUS); - if(p->CPU.ExtFamily == 0x01 /* K10 */) - currcoef = (msr & 0x3f) + 0x10; - else /* K11+ */ - currcoef = (msr & 0x3f) + 0x08; - currdiv = (2 << ((msr >> 6) & 0x07)); - } + else if((p->CPU.Vendor == 0x68747541 /* AMD */) && (p->CPU.Family == 0x0f)) + { + switch(p->CPU.ExtFamily) + { + case 0x00: /* K8 */ + msr = rdmsr64(K8_FIDVID_STATUS); + maxcoef = bitfield(msr, 21, 16) / 2 + 4; + currcoef = bitfield(msr, 5, 0) / 2 + 4; + break; + + case 0x01: /* K10 */ + msr = rdmsr64(K10_COFVID_STATUS); + //uint64_t mperf = measure_mperf_frequency(); + uint64_t aperf = measure_aperf_frequency(); + // NOTE: tsc runs at the maccoeff (non turbo) + // *not* at the turbo frequency. + maxcoef = bitfield(msr, 54, 49) / 2 + 4; // VRIFY + currcoef = bitfield(msr, 5, 0) + 0x10; // note: this is * 2 + currdiv = 2 << bitfield(msr, 8, 6); + + cpuFrequency = aperf; + break; + + case 0x05: /* K14 */ + msr = rdmsr64(K10_COFVID_STATUS); + currcoef = (bitfield(msr, 54, 49) + 0x10) << 2; + currdiv = (bitfield(msr, 8, 4) + 1) << 2; + currdiv += bitfield(msr, 3, 0); - if (currcoef) { - if (currdiv) { - fsbFrequency = ((tscFrequency * currdiv) / currcoef); - DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv); - } else { - fsbFrequency = (tscFrequency / currcoef); - DBG("%d\n", currcoef); - } - fsbFrequency = (tscFrequency / currcoef); - cpuFrequency = tscFrequency; - } - } + break; + + case 0x02: /* K11 */ + // not implimented + break; + } + + if (maxcoef) { + if (currdiv) { + if(!cpuFrequency) + fsbFrequency = ((tscFrequency * currdiv) / currcoef); + else + fsbFrequency = ((cpuFrequency * currdiv) / currcoef); - if (!fsbFrequency) { - fsbFrequency = (DEFAULT_FSB * 1000); - cpuFrequency = tscFrequency; - DBG("0 ! using the default value for FSB !\n"); - } + DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv); + } else { + if(!cpuFrequency) + fsbFrequency = (tscFrequency / maxcoef); + else + fsbFrequency = (cpuFrequency / maxcoef); + DBG("%d\n", currcoef); + } + } + else if (currcoef) + { + if (currdiv) + { + fsbFrequency = ((tscFrequency * currdiv) / currcoef); + DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv); + } else { + fsbFrequency = (tscFrequency / currcoef); + DBG("%d\n", currcoef); + } + } + if(!cpuFrequency) cpuFrequency = tscFrequency; + } +#if 0 + if (!fsbFrequency) { + fsbFrequency = (DEFAULT_FSB * 1000); + cpuFrequency = tscFrequency; + DBG("0 ! using the default value for FSB !\n"); + } #endif - - p->CPU.MaxCoef = maxcoef; - p->CPU.MaxDiv = maxdiv; - p->CPU.CurrCoef = currcoef; - p->CPU.CurrDiv = currdiv; - p->CPU.TSCFrequency = tscFrequency; - p->CPU.FSBFrequency = fsbFrequency; - p->CPU.CPUFrequency = cpuFrequency; - - DBG("CPU: Brand String: %s\n", p->CPU.BrandString); - DBG("CPU: Vendor/Family/ExtFamily: 0x%x/0x%x/0x%x\n", p->CPU.Vendor, p->CPU.Family, p->CPU.ExtFamily); - DBG("CPU: Model/ExtModel/Stepping: 0x%x/0x%x/0x%x\n", p->CPU.Model, p->CPU.ExtModel, p->CPU.Stepping); - DBG("CPU: MaxCoef/CurrCoef: 0x%x/0x%x\n", p->CPU.MaxCoef, p->CPU.CurrCoef); - DBG("CPU: MaxDiv/CurrDiv: 0x%x/0x%x\n", p->CPU.MaxDiv, p->CPU.CurrDiv); - DBG("CPU: TSCFreq: %dMHz\n", p->CPU.TSCFrequency / 1000000); - DBG("CPU: FSBFreq: %dMHz\n", p->CPU.FSBFrequency / 1000000); - DBG("CPU: CPUFreq: %dMHz\n", p->CPU.CPUFrequency / 1000000); - DBG("CPU: NoCores/NoThreads: %d/%d\n", p->CPU.NoCores, p->CPU.NoThreads); - DBG("CPU: Features: 0x%08x\n", p->CPU.Features); + + p->CPU.MaxCoef = maxcoef; + p->CPU.MaxDiv = maxdiv; + p->CPU.CurrCoef = currcoef; + p->CPU.CurrDiv = currdiv; + p->CPU.TSCFrequency = tscFrequency; + p->CPU.FSBFrequency = fsbFrequency; + p->CPU.CPUFrequency = cpuFrequency; + + DBG("CPU: Brand String: %s\n", p->CPU.BrandString); + DBG("CPU: Vendor/Family/ExtFamily: 0x%x/0x%x/0x%x\n", p->CPU.Vendor, p->CPU.Family, p->CPU.ExtFamily); + DBG("CPU: Model/ExtModel/Stepping: 0x%x/0x%x/0x%x\n", p->CPU.Model, p->CPU.ExtModel, p->CPU.Stepping); + DBG("CPU: MaxCoef/CurrCoef: 0x%x/0x%x\n", p->CPU.MaxCoef, p->CPU.CurrCoef); + DBG("CPU: MaxDiv/CurrDiv: 0x%x/0x%x\n", p->CPU.MaxDiv, p->CPU.CurrDiv); + DBG("CPU: TSCFreq: %dMHz\n", p->CPU.TSCFrequency / 1000000); + DBG("CPU: FSBFreq: %dMHz\n", p->CPU.FSBFrequency / 1000000); + DBG("CPU: CPUFreq: %dMHz\n", p->CPU.CPUFrequency / 1000000); + DBG("CPU: NoCores/NoThreads: %d/%d\n", p->CPU.NoCores, p->CPU.NoThreads); + DBG("CPU: Features: 0x%08x\n", p->CPU.Features); #if DEBUG_CPU - pause(); + pause(); #endif } Index: trunk/i386/libsaio/platform.h =================================================================== --- trunk/i386/libsaio/platform.h (revision 1189) +++ trunk/i386/libsaio/platform.h (revision 1190) @@ -13,6 +13,10 @@ extern void scan_platform(void); extern void dumpPhysAddr(const char * title, void * a, int len); +/* CPUID Vendor */ +#define CPUID_VENDOR_INTEL 0x756E6547 +#define CPUID_VENDOR_AMD 0x68747541 + /* CPUID index into cpuid_raw */ #define CPUID_0 0 #define CPUID_1 1 @@ -21,7 +25,8 @@ #define CPUID_4 4 #define CPUID_80 5 #define CPUID_81 6 -#define CPUID_MAX 7 +#define CPUID_88 7 +#define CPUID_MAX 8 #define CPU_MODEL_YONAH 0x0E // Sossaman, Yonah #define CPU_MODEL_MEROM 0x0F // Allendale, Conroe, Kentsfield, Woodcrest, Clovertown, Tigerton, Merom Index: trunk/i386/libsaio/cpu.h =================================================================== --- trunk/i386/libsaio/cpu.h (revision 1189) +++ trunk/i386/libsaio/cpu.h (revision 1190) @@ -16,7 +16,7 @@ #define CPU_STRING_UNKNOWN "Unknown CPU Type" -#define MSR_IA32_PERF_STATUS 0x198 +#define MSR_IA32_PERF_STATUS 0x00000198 #define MSR_IA32_PERF_CONTROL 0x199 #define MSR_IA32_EXT_CONFIG 0x00EE #define MSR_FLEX_RATIO 0x194 @@ -28,6 +28,9 @@ #define K8_FIDVID_STATUS 0xC0010042 #define K10_COFVID_STATUS 0xC0010071 +#define MSR_AMD_MPERF 0x000000E7 +#define MSR_AMD_APERF 0x000000E8 + #define DEFAULT_FSB 100000 /* for now, hardcoding 100MHz for old CPUs */ // DFE: This constant comes from older xnu: