Index: trunk/i386/libsaio/cpu.c =================================================================== --- trunk/i386/libsaio/cpu.c (revision 1371) +++ trunk/i386/libsaio/cpu.c (revision 1372) @@ -1,5 +1,5 @@ /* - * Copyright 2008 Islam Ahmed Zaid. All rights reserved. + * Copyright 2008 Islam Ahmed Zaid. All rights reserved. * AsereBLN: 2009: cleanup and bugfix */ @@ -24,222 +24,170 @@ */ static uint64_t measure_tsc_frequency(void) { - uint64_t tscStart; - uint64_t tscEnd; - uint64_t tscDelta = 0xffffffffffffffffULL; - unsigned long pollCount; - uint64_t retval = 0; - int i; - - /* Time how many TSC ticks elapse in 30 msec using the 8254 PIT - * counter 2. We run this loop 3 times to make sure the cache - * is hot and we take the minimum delta from all of the runs. - * That is to say that we're biased towards measuring the minimum - * number of TSC ticks that occur while waiting for the timer to - * expire. That theoretically helps avoid inconsistencies when - * running under a VM if the TSC is not virtualized and the host - * steals time. The TSC is normally virtualized for VMware. - */ - for(i = 0; i < 10; ++i) - { - enable_PIT2(); - set_PIT2_mode0(CALIBRATE_LATCH); - tscStart = rdtsc64(); - pollCount = poll_PIT2_gate(); - tscEnd = rdtsc64(); - /* The poll loop must have run at least a few times for accuracy */ - if(pollCount <= 1) - continue; - /* The TSC must increment at LEAST once every millisecond. We - * should have waited exactly 30 msec so the TSC delta should - * be >= 30. Anything less and the processor is way too slow. - */ - if((tscEnd - tscStart) <= CALIBRATE_TIME_MSEC) - continue; - // tscDelta = MIN(tscDelta, (tscEnd - tscStart)) - if( (tscEnd - tscStart) < tscDelta ) - tscDelta = tscEnd - tscStart; - } - /* tscDelta is now the least number of TSC ticks the processor made in - * a timespan of 0.03 s (e.g. 30 milliseconds) - * Linux thus divides by 30 which gives the answer in kiloHertz because - * 1 / ms = kHz. But we're xnu and most of the rest of the code uses - * Hz so we need to convert our milliseconds to seconds. Since we're - * dividing by the milliseconds, we simply multiply by 1000. - */ - - /* Unlike linux, we're not limited to 32-bit, but we do need to take care - * that we're going to multiply by 1000 first so we do need at least some - * arithmetic headroom. For now, 32-bit should be enough. - * Also unlike Linux, our compiler can do 64-bit integer arithmetic. - */ - if(tscDelta > (1ULL<<32)) - retval = 0; - else - { - retval = tscDelta * 1000 / 30; - } - disable_PIT2(); - return retval; + uint64_t tscStart; + uint64_t tscEnd; + uint64_t tscDelta = 0xffffffffffffffffULL; + unsigned long pollCount; + uint64_t retval = 0; + int i; + + /* Time how many TSC ticks elapse in 30 msec using the 8254 PIT + * counter 2. We run this loop 3 times to make sure the cache + * is hot and we take the minimum delta from all of the runs. + * That is to say that we're biased towards measuring the minimum + * number of TSC ticks that occur while waiting for the timer to + * expire. That theoretically helps avoid inconsistencies when + * running under a VM if the TSC is not virtualized and the host + * steals time. The TSC is normally virtualized for VMware. + */ + for(i = 0; i < 10; ++i) + { + enable_PIT2(); + set_PIT2_mode0(CALIBRATE_LATCH); + tscStart = rdtsc64(); + pollCount = poll_PIT2_gate(); + tscEnd = rdtsc64(); + /* The poll loop must have run at least a few times for accuracy */ + if (pollCount <= 1) + continue; + /* The TSC must increment at LEAST once every millisecond. + * We should have waited exactly 30 msec so the TSC delta should + * be >= 30. Anything less and the processor is way too slow. + */ + if ((tscEnd - tscStart) <= CALIBRATE_TIME_MSEC) + continue; + // tscDelta = MIN(tscDelta, (tscEnd - tscStart)) + if ( (tscEnd - tscStart) < tscDelta ) + tscDelta = tscEnd - tscStart; + } + /* tscDelta is now the least number of TSC ticks the processor made in + * a timespan of 0.03 s (e.g. 30 milliseconds) + * Linux thus divides by 30 which gives the answer in kiloHertz because + * 1 / ms = kHz. But we're xnu and most of the rest of the code uses + * Hz so we need to convert our milliseconds to seconds. Since we're + * dividing by the milliseconds, we simply multiply by 1000. + */ + + /* Unlike linux, we're not limited to 32-bit, but we do need to take care + * that we're going to multiply by 1000 first so we do need at least some + * arithmetic headroom. For now, 32-bit should be enough. + * Also unlike Linux, our compiler can do 64-bit integer arithmetic. + */ + if (tscDelta > (1ULL<<32)) + retval = 0; + else + { + retval = tscDelta * 1000 / 30; + } + disable_PIT2(); + return retval; } -#if 0 /* - * DFE: Measures the Max Performance Frequency in Hz (64-bit) - */ -static uint64_t measure_mperf_frequency(void) -{ - uint64_t mperfStart; - uint64_t mperfEnd; - uint64_t mperfDelta = 0xffffffffffffffffULL; - unsigned long pollCount; - uint64_t retval = 0; - int i; - - /* Time how many MPERF ticks elapse in 30 msec using the 8254 PIT - * counter 2. We run this loop 3 times to make sure the cache - * is hot and we take the minimum delta from all of the runs. - * That is to say that we're biased towards measuring the minimum - * number of MPERF ticks that occur while waiting for the timer to - * expire. - */ - for(i = 0; i < 10; ++i) - { - enable_PIT2(); - set_PIT2_mode0(CALIBRATE_LATCH); - mperfStart = rdmsr64(MSR_AMD_MPERF); - pollCount = poll_PIT2_gate(); - mperfEnd = rdmsr64(MSR_AMD_MPERF); - /* The poll loop must have run at least a few times for accuracy */ - if(pollCount <= 1) - continue; - /* The MPERF must increment at LEAST once every millisecond. We - * should have waited exactly 30 msec so the MPERF delta should - * be >= 30. Anything less and the processor is way too slow. - */ - if((mperfEnd - mperfStart) <= CALIBRATE_TIME_MSEC) - continue; - // tscDelta = MIN(tscDelta, (tscEnd - tscStart)) - if( (mperfEnd - mperfStart) < mperfDelta ) - mperfDelta = mperfEnd - mperfStart; - } - /* mperfDelta is now the least number of MPERF ticks the processor made in - * a timespan of 0.03 s (e.g. 30 milliseconds) - */ - - if(mperfDelta > (1ULL<<32)) - retval = 0; - else - { - retval = mperfDelta * 1000 / 30; - } - disable_PIT2(); - return retval; -} -#endif -/* + * Original comment/code: + * "DFE: Measures the Max Performance Frequency in Hz (64-bit)" + * * Measures the Actual Performance Frequency in Hz (64-bit) + * (just a naming change, mperf --> aperf ) */ static uint64_t measure_aperf_frequency(void) { - uint64_t aperfStart; - uint64_t aperfEnd; - uint64_t aperfDelta = 0xffffffffffffffffULL; - unsigned long pollCount; - uint64_t retval = 0; - int i; - - /* Time how many APERF ticks elapse in 30 msec using the 8254 PIT - * counter 2. We run this loop 3 times to make sure the cache - * is hot and we take the minimum delta from all of the runs. - * That is to say that we're biased towards measuring the minimum - * number of APERF ticks that occur while waiting for the timer to - * expire. - */ - for(i = 0; i < 10; ++i) - { - enable_PIT2(); - set_PIT2_mode0(CALIBRATE_LATCH); - aperfStart = rdmsr64(MSR_AMD_APERF); - pollCount = poll_PIT2_gate(); - aperfEnd = rdmsr64(MSR_AMD_APERF); - /* The poll loop must have run at least a few times for accuracy */ - if(pollCount <= 1) - continue; - /* The TSC must increment at LEAST once every millisecond. We - * should have waited exactly 30 msec so the APERF delta should - * be >= 30. Anything less and the processor is way too slow. - */ - if((aperfEnd - aperfStart) <= CALIBRATE_TIME_MSEC) - continue; - // tscDelta = MIN(tscDelta, (tscEnd - tscStart)) - if( (aperfEnd - aperfStart) < aperfDelta ) - aperfDelta = aperfEnd - aperfStart; - } - /* mperfDelta is now the least number of MPERF ticks the processor made in - * a timespan of 0.03 s (e.g. 30 milliseconds) - */ - - if(aperfDelta > (1ULL<<32)) - retval = 0; - else - { - retval = aperfDelta * 1000 / 30; - } - disable_PIT2(); - return retval; + uint64_t aperfStart; + uint64_t aperfEnd; + uint64_t aperfDelta = 0xffffffffffffffffULL; + unsigned long pollCount; + uint64_t retval = 0; + int i; + + /* Time how many APERF ticks elapse in 30 msec using the 8254 PIT + * counter 2. We run this loop 3 times to make sure the cache + * is hot and we take the minimum delta from all of the runs. + * That is to say that we're biased towards measuring the minimum + * number of APERF ticks that occur while waiting for the timer to + * expire. + */ + for(i = 0; i < 10; ++i) + { + enable_PIT2(); + set_PIT2_mode0(CALIBRATE_LATCH); + aperfStart = rdmsr64(MSR_AMD_APERF); + pollCount = poll_PIT2_gate(); + aperfEnd = rdmsr64(MSR_AMD_APERF); + /* The poll loop must have run at least a few times for accuracy */ + if (pollCount <= 1) + continue; + /* The TSC must increment at LEAST once every millisecond. + * We should have waited exactly 30 msec so the APERF delta should + * be >= 30. Anything less and the processor is way too slow. + */ + if ((aperfEnd - aperfStart) <= CALIBRATE_TIME_MSEC) + continue; + // tscDelta = MIN(tscDelta, (tscEnd - tscStart)) + if ( (aperfEnd - aperfStart) < aperfDelta ) + aperfDelta = aperfEnd - aperfStart; + } + /* mperfDelta is now the least number of MPERF ticks the processor made in + * a timespan of 0.03 s (e.g. 30 milliseconds) + */ + + if (aperfDelta > (1ULL<<32)) + retval = 0; + else + { + retval = aperfDelta * 1000 / 30; + } + disable_PIT2(); + return retval; } - /* * Calculates the FSB and CPU frequencies using specific MSRs for each CPU * - multi. is read from a specific MSR. In the case of Intel, there is: - * a max multi. (used to calculate the FSB freq.), - * and a current multi. (used to calculate the CPU freq.) + * a max multi. (used to calculate the FSB freq.), + * and a current multi. (used to calculate the CPU freq.) * - fsbFrequency = tscFrequency / multi * - cpuFrequency = fsbFrequency * multi */ - void scan_cpu(PlatformInfo_t *p) { uint64_t tscFrequency, fsbFrequency, cpuFrequency; uint64_t msr, flex_ratio; uint8_t maxcoef, maxdiv, currcoef, bus_ratio_max, currdiv; - const char *newratio; - int len, myfsb; - uint8_t bus_ratio_min; - uint32_t max_ratio, min_ratio; - + const char *newratio; + int len, myfsb; + uint8_t bus_ratio_min; + uint32_t max_ratio, min_ratio; + max_ratio = min_ratio = myfsb = bus_ratio_min = 0; maxcoef = maxdiv = bus_ratio_max = currcoef = currdiv = 0; - + /* get cpuid values */ do_cpuid(0x00000000, p->CPU.CPUID[CPUID_0]); do_cpuid(0x00000001, p->CPU.CPUID[CPUID_1]); do_cpuid(0x00000002, p->CPU.CPUID[CPUID_2]); do_cpuid(0x00000003, p->CPU.CPUID[CPUID_3]); - do_cpuid2(0x00000004, 0, p->CPU.CPUID[CPUID_4]); + do_cpuid2(0x00000004, 0, p->CPU.CPUID[CPUID_4]); do_cpuid(0x80000000, p->CPU.CPUID[CPUID_80]); - if ((p->CPU.CPUID[CPUID_80][0] & 0x0000000f) >= 8) { - do_cpuid(0x80000008, p->CPU.CPUID[CPUID_88]); - do_cpuid(0x80000001, p->CPU.CPUID[CPUID_81]); + if ((p->CPU.CPUID[CPUID_80][0] & 0x0000000f) >= 8) { + do_cpuid(0x80000008, p->CPU.CPUID[CPUID_88]); + do_cpuid(0x80000001, p->CPU.CPUID[CPUID_81]); } - else if ((p->CPU.CPUID[CPUID_80][0] & 0x0000000f) >= 1) { + else if ((p->CPU.CPUID[CPUID_80][0] & 0x0000000f) >= 1) { do_cpuid(0x80000001, p->CPU.CPUID[CPUID_81]); } - - + #if DEBUG_CPU { int i; printf("CPUID Raw Values:\n"); for (i=0; iCPU.CPUID[i][0], p->CPU.CPUID[i][1], - p->CPU.CPUID[i][2], p->CPU.CPUID[i][3]); + p->CPU.CPUID[i][0], p->CPU.CPUID[i][1], + p->CPU.CPUID[i][2], p->CPU.CPUID[i][3]); } } #endif + p->CPU.Vendor = p->CPU.CPUID[CPUID_0][1]; p->CPU.Signature = p->CPU.CPUID[CPUID_1][0]; p->CPU.Stepping = bitfield(p->CPU.CPUID[CPUID_1][0], 3, 0); @@ -248,34 +196,35 @@ p->CPU.ExtModel = bitfield(p->CPU.CPUID[CPUID_1][0], 19, 16); p->CPU.ExtFamily = bitfield(p->CPU.CPUID[CPUID_1][0], 27, 20); - p->CPU.Model += (p->CPU.ExtModel << 4); - - if (p->CPU.Vendor == CPUID_VENDOR_INTEL && - p->CPU.Family == 0x06 && - p->CPU.Model >= CPUID_MODEL_NEHALEM && - p->CPU.Model != CPUID_MODEL_ATOM // MSR is *NOT* available on the Intel Atom CPU - ) - { - msr = rdmsr64(MSR_CORE_THREAD_COUNT); // Undocumented MSR in Nehalem and newer CPUs - p->CPU.NoCores = bitfield((uint32_t)msr, 31, 16); // Using undocumented MSR to get actual values - p->CPU.NoThreads = bitfield((uint32_t)msr, 15, 0); // Using undocumented MSR to get actual values + p->CPU.Model += (p->CPU.ExtModel << 4); + + if (p->CPU.Vendor == CPUID_VENDOR_INTEL && + p->CPU.Family == 0x06 && + p->CPU.Model >= CPUID_MODEL_NEHALEM && + p->CPU.Model != CPUID_MODEL_ATOM // MSR is *NOT* available on the Intel Atom CPU + ) + { + msr = rdmsr64(MSR_CORE_THREAD_COUNT); // Undocumented MSR in Nehalem and newer CPUs + p->CPU.NoCores = bitfield((uint32_t)msr, 31, 16); // Using undocumented MSR to get actual values + p->CPU.NoThreads = bitfield((uint32_t)msr, 15, 0); // Using undocumented MSR to get actual values } - else if (p->CPU.Vendor == CPUID_VENDOR_AMD) - { - p->CPU.NoThreads = bitfield(p->CPU.CPUID[CPUID_1][1], 23, 16); - p->CPU.NoCores = bitfield(p->CPU.CPUID[CPUID_88][2], 7, 0) + 1; - } - else - { - p->CPU.NoThreads = bitfield(p->CPU.CPUID[CPUID_1][1], 23, 16); // Use previous method for Cores and Threads - p->CPU.NoCores = bitfield(p->CPU.CPUID[CPUID_4][0], 31, 26) + 1; + else if (p->CPU.Vendor == CPUID_VENDOR_AMD) + { + p->CPU.NoThreads = bitfield(p->CPU.CPUID[CPUID_1][1], 23, 16); + p->CPU.NoCores = bitfield(p->CPU.CPUID[CPUID_88][2], 7, 0) + 1; } + else + { + // Use previous method for Cores and Threads + p->CPU.NoThreads = bitfield(p->CPU.CPUID[CPUID_1][1], 23, 16); + p->CPU.NoCores = bitfield(p->CPU.CPUID[CPUID_4][0], 31, 26) + 1; + } /* get brand string (if supported) */ /* Copyright: from Apple's XNU cpuid.c */ if (p->CPU.CPUID[CPUID_80][0] > 0x80000004) { uint32_t reg[4]; - char str[128], *s; + char str[128], *s; /* * The brand string 48 bytes (max), guaranteed to * be NULL terminated. @@ -290,15 +239,15 @@ if (*s != ' ') break; } - strlcpy(p->CPU.BrandString, s, sizeof(p->CPU.BrandString)); + strlcpy(p->CPU.BrandString, s, sizeof(p->CPU.BrandString)); if (!strncmp(p->CPU.BrandString, CPU_STRING_UNKNOWN, MIN(sizeof(p->CPU.BrandString), strlen(CPU_STRING_UNKNOWN) + 1))) { - /* - * This string means we have a firmware-programmable brand string, - * and the firmware couldn't figure out what sort of CPU we have. - */ - p->CPU.BrandString[0] = '\0'; - } + /* + * This string means we have a firmware-programmable brand string, + * and the firmware couldn't figure out what sort of CPU we have. + */ + p->CPU.BrandString[0] = '\0'; + } } /* setup features */ @@ -330,52 +279,52 @@ if (p->CPU.NoThreads > p->CPU.NoCores) { p->CPU.Features |= CPU_FEATURE_HTT; } - + tscFrequency = measure_tsc_frequency(); fsbFrequency = 0; cpuFrequency = 0; - + if ((p->CPU.Vendor == CPUID_VENDOR_INTEL) && ((p->CPU.Family == 0x06) || (p->CPU.Family == 0x0f))) { int intelCPU = p->CPU.Model; if ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0c) || (p->CPU.Family == 0x0f && p->CPU.Model >= 0x03)) { /* Nehalem CPU model */ - if (p->CPU.Family == 0x06 && (p->CPU.Model == CPU_MODEL_NEHALEM || - p->CPU.Model == CPU_MODEL_FIELDS || - p->CPU.Model == CPU_MODEL_DALES || - p->CPU.Model == CPU_MODEL_DALES_32NM || - p->CPU.Model == CPU_MODEL_WESTMERE || - p->CPU.Model == CPU_MODEL_NEHALEM_EX || - p->CPU.Model == CPU_MODEL_WESTMERE_EX || - p->CPU.Model == CPU_MODEL_SANDY || - p->CPU.Model == CPU_MODEL_SANDY_XEON)) { + if (p->CPU.Family == 0x06 && (p->CPU.Model == CPU_MODEL_NEHALEM || + p->CPU.Model == CPU_MODEL_FIELDS || + p->CPU.Model == CPU_MODEL_DALES || + p->CPU.Model == CPU_MODEL_DALES_32NM || + p->CPU.Model == CPU_MODEL_WESTMERE || + p->CPU.Model == CPU_MODEL_NEHALEM_EX || + p->CPU.Model == CPU_MODEL_WESTMERE_EX || + p->CPU.Model == CPU_MODEL_SANDY || + p->CPU.Model == CPU_MODEL_SANDY_XEON)) { msr = rdmsr64(MSR_PLATFORM_INFO); - DBG("msr(%d): platform_info %08x\n", __LINE__, bitfield(msr, 31, 0)); - bus_ratio_max = bitfield(msr, 14, 8); - bus_ratio_min = bitfield(msr, 46, 40); //valv: not sure about this one (Remarq.1) + DBG("msr(%d): platform_info %08x\n", __LINE__, bitfield(msr, 31, 0)); + bus_ratio_max = bitfield(msr, 14, 8); + bus_ratio_min = bitfield(msr, 46, 40); //valv: not sure about this one (Remarq.1) msr = rdmsr64(MSR_FLEX_RATIO); - DBG("msr(%d): flex_ratio %08x\n", __LINE__, bitfield(msr, 31, 0)); - if (bitfield(msr, 16, 16)) { - flex_ratio = bitfield(msr, 14, 8); + DBG("msr(%d): flex_ratio %08x\n", __LINE__, bitfield(msr, 31, 0)); + if (bitfield(msr, 16, 16)) { + flex_ratio = bitfield(msr, 14, 8); /* bcc9: at least on the gigabyte h67ma-ud2h, - where the cpu multipler can't be changed to - allow overclocking, the flex_ratio msr has unexpected (to OSX) - contents. These contents cause mach_kernel to - fail to compute the bus ratio correctly, instead - causing the system to crash since tscGranularity - is inadvertently set to 0. - */ + where the cpu multipler can't be changed to + allow overclocking, the flex_ratio msr has unexpected (to OSX) + contents. These contents cause mach_kernel to + fail to compute the bus ratio correctly, instead + causing the system to crash since tscGranularity + is inadvertently set to 0. + */ if (flex_ratio == 0) { /* Clear bit 16 (evidently the presence bit) */ wrmsr64(MSR_FLEX_RATIO, (msr & 0xFFFFFFFFFFFEFFFFULL)); msr = rdmsr64(MSR_FLEX_RATIO); - verbose("Unusable flex ratio detected. Patched MSR now %08x\n", bitfield(msr, 31, 0)); + verbose("Unusable flex ratio detected. Patched MSR now %08x\n", bitfield(msr, 31, 0)); } else { if (bus_ratio_max > flex_ratio) { bus_ratio_max = flex_ratio; } } } - + if (bus_ratio_max) { fsbFrequency = (tscFrequency / bus_ratio_max); } @@ -391,9 +340,9 @@ max_ratio = atoi(newratio); max_ratio = (max_ratio * 10); if (len >= 3) max_ratio = (max_ratio + 5); - + verbose("Bus-Ratio: min=%d, max=%s\n", bus_ratio_min, newratio); - + // extreme overclockers may love 320 ;) if ((max_ratio >= min_ratio) && (max_ratio <= 320)) { cpuFrequency = (fsbFrequency * max_ratio) / 10; @@ -404,32 +353,33 @@ } } //valv: to be uncommented if Remarq.1 didn't stick - /*if(bus_ratio_max > 0) bus_ratio = flex_ratio;*/ + /*if (bus_ratio_max > 0) bus_ratio = flex_ratio;*/ p->CPU.MaxRatio = max_ratio; p->CPU.MinRatio = min_ratio; - + myfsb = fsbFrequency / 1000000; verbose("Sticking with [BCLK: %dMhz, Bus-Ratio: %d]\n", myfsb, max_ratio); currcoef = bus_ratio_max; } else { msr = rdmsr64(MSR_IA32_PERF_STATUS); - DBG("msr(%d): ia32_perf_stat 0x%08x\n", __LINE__, bitfield(msr, 31, 0)); - currcoef = bitfield(msr, 12, 8); + DBG("msr(%d): ia32_perf_stat 0x%08x\n", __LINE__, bitfield(msr, 31, 0)); + currcoef = bitfield(msr, 12, 8); /* Non-integer bus ratio for the max-multi*/ - maxdiv = bitfield(msr, 46, 46); + maxdiv = bitfield(msr, 46, 46); /* Non-integer bus ratio for the current-multi (undocumented)*/ - currdiv = bitfield(msr, 14, 14); - - if ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0e) || (p->CPU.Family == 0x0f)) // This will always be model >= 3 + currdiv = bitfield(msr, 14, 14); + + // This will always be model >= 3 + if ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0e) || (p->CPU.Family == 0x0f)) { /* On these models, maxcoef defines TSC freq */ - maxcoef = bitfield(msr, 44, 40); + maxcoef = bitfield(msr, 44, 40); } else { /* On lower models, currcoef defines TSC freq */ /* XXX */ maxcoef = currcoef; } - + if (maxcoef) { if (maxdiv) { fsbFrequency = ((tscFrequency * 2) / ((maxcoef * 2) + 1)); @@ -450,105 +400,107 @@ p->CPU.Features |= CPU_FEATURE_MOBILE; } } - else if((p->CPU.Vendor == CPUID_VENDOR_AMD) && (p->CPU.Family == 0x0f)) - { - switch(p->CPU.ExtFamily) - { - case 0x00: /* K8 */ - msr = rdmsr64(K8_FIDVID_STATUS); - maxcoef = bitfield(msr, 21, 16) / 2 + 4; - currcoef = bitfield(msr, 5, 0) / 2 + 4; - break; - - case 0x01: /* K10 */ - msr = rdmsr64(K10_COFVID_STATUS); - do_cpuid2(0x00000006, 0, p->CPU.CPUID[CPUID_6]); - if(bitfield(p->CPU.CPUID[CPUID_6][2], 0, 0) == 1) // EffFreq: effective frequency interface - { - //uint64_t mperf = measure_mperf_frequency(); - uint64_t aperf = measure_aperf_frequency(); - cpuFrequency = aperf; - } - // NOTE: tsc runs at the maccoeff (non turbo) - // *not* at the turbo frequency. - maxcoef = bitfield(msr, 54, 49) / 2 + 4; - currcoef = bitfield(msr, 5, 0) + 0x10; - currdiv = 2 << bitfield(msr, 8, 6); - - break; - - case 0x05: /* K14 */ - msr = rdmsr64(K10_COFVID_STATUS); - currcoef = (bitfield(msr, 54, 49) + 0x10) << 2; - currdiv = (bitfield(msr, 8, 4) + 1) << 2; - currdiv += bitfield(msr, 3, 0); - - break; - - case 0x02: /* K11 */ - // not implimented - break; - } - - if (maxcoef) - { - if (currdiv) - { - if(!currcoef) currcoef = maxcoef; - if(!cpuFrequency) - fsbFrequency = ((tscFrequency * currdiv) / currcoef); - else - fsbFrequency = ((cpuFrequency * currdiv) / currcoef); - - DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv); - } else { - if(!cpuFrequency) - fsbFrequency = (tscFrequency / maxcoef); - else - fsbFrequency = (cpuFrequency / maxcoef); - DBG("%d\n", currcoef); - } - } - else if (currcoef) - { - if (currdiv) - { - fsbFrequency = ((tscFrequency * currdiv) / currcoef); - DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv); - } else { - fsbFrequency = (tscFrequency / currcoef); - DBG("%d\n", currcoef); - } - } - if(!cpuFrequency) cpuFrequency = tscFrequency; - } + else if ((p->CPU.Vendor == CPUID_VENDOR_AMD) && (p->CPU.Family == 0x0f)) + { + switch(p->CPU.ExtFamily) + { + case 0x00: /* K8 */ + msr = rdmsr64(K8_FIDVID_STATUS); + maxcoef = bitfield(msr, 21, 16) / 2 + 4; + currcoef = bitfield(msr, 5, 0) / 2 + 4; + break; + + case 0x01: /* K10 */ + msr = rdmsr64(K10_COFVID_STATUS); + do_cpuid2(0x00000006, 0, p->CPU.CPUID[CPUID_6]); + // EffFreq: effective frequency interface + if (bitfield(p->CPU.CPUID[CPUID_6][2], 0, 0) == 1) + { + //uint64_t mperf = measure_mperf_frequency(); + uint64_t aperf = measure_aperf_frequency(); + cpuFrequency = aperf; + } + // NOTE: tsc runs at the maccoeff (non turbo) + // *not* at the turbo frequency. + maxcoef = bitfield(msr, 54, 49) / 2 + 4; + currcoef = bitfield(msr, 5, 0) + 0x10; + currdiv = 2 << bitfield(msr, 8, 6); + + break; + + case 0x05: /* K14 */ + msr = rdmsr64(K10_COFVID_STATUS); + currcoef = (bitfield(msr, 54, 49) + 0x10) << 2; + currdiv = (bitfield(msr, 8, 4) + 1) << 2; + currdiv += bitfield(msr, 3, 0); + + break; + + case 0x02: /* K11 */ + // not implimented + break; + } + + if (maxcoef) + { + if (currdiv) + { + if (!currcoef) currcoef = maxcoef; + if (!cpuFrequency) + fsbFrequency = ((tscFrequency * currdiv) / currcoef); + else + fsbFrequency = ((cpuFrequency * currdiv) / currcoef); + + DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv); + } else { + if (!cpuFrequency) + fsbFrequency = (tscFrequency / maxcoef); + else + fsbFrequency = (cpuFrequency / maxcoef); + DBG("%d\n", currcoef); + } + } + else if (currcoef) + { + if (currdiv) + { + fsbFrequency = ((tscFrequency * currdiv) / currcoef); + DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv); + } else { + fsbFrequency = (tscFrequency / currcoef); + DBG("%d\n", currcoef); + } + } + if (!cpuFrequency) cpuFrequency = tscFrequency; + } + #if 0 - if (!fsbFrequency) { - fsbFrequency = (DEFAULT_FSB * 1000); - cpuFrequency = tscFrequency; - DBG("0 ! using the default value for FSB !\n"); - } + if (!fsbFrequency) { + fsbFrequency = (DEFAULT_FSB * 1000); + cpuFrequency = tscFrequency; + DBG("0 ! using the default value for FSB !\n"); + } #endif - - p->CPU.MaxCoef = maxcoef; - p->CPU.MaxDiv = maxdiv; - p->CPU.CurrCoef = currcoef; - p->CPU.CurrDiv = currdiv; - p->CPU.TSCFrequency = tscFrequency; - p->CPU.FSBFrequency = fsbFrequency; - p->CPU.CPUFrequency = cpuFrequency; - - DBG("CPU: Brand String: %s\n", p->CPU.BrandString); - DBG("CPU: Vendor/Family/ExtFamily: 0x%x/0x%x/0x%x\n", p->CPU.Vendor, p->CPU.Family, p->CPU.ExtFamily); - DBG("CPU: Model/ExtModel/Stepping: 0x%x/0x%x/0x%x\n", p->CPU.Model, p->CPU.ExtModel, p->CPU.Stepping); - DBG("CPU: MaxCoef/CurrCoef: 0x%x/0x%x\n", p->CPU.MaxCoef, p->CPU.CurrCoef); - DBG("CPU: MaxDiv/CurrDiv: 0x%x/0x%x\n", p->CPU.MaxDiv, p->CPU.CurrDiv); - DBG("CPU: TSCFreq: %dMHz\n", p->CPU.TSCFrequency / 1000000); - DBG("CPU: FSBFreq: %dMHz\n", p->CPU.FSBFrequency / 1000000); - DBG("CPU: CPUFreq: %dMHz\n", p->CPU.CPUFrequency / 1000000); - DBG("CPU: NoCores/NoThreads: %d/%d\n", p->CPU.NoCores, p->CPU.NoThreads); - DBG("CPU: Features: 0x%08x\n", p->CPU.Features); + + p->CPU.MaxCoef = maxcoef; + p->CPU.MaxDiv = maxdiv; + p->CPU.CurrCoef = currcoef; + p->CPU.CurrDiv = currdiv; + p->CPU.TSCFrequency = tscFrequency; + p->CPU.FSBFrequency = fsbFrequency; + p->CPU.CPUFrequency = cpuFrequency; + + DBG("CPU: Brand String: %s\n", p->CPU.BrandString); + DBG("CPU: Vendor/Family/ExtFamily: 0x%x/0x%x/0x%x\n", p->CPU.Vendor, p->CPU.Family, p->CPU.ExtFamily); + DBG("CPU: Model/ExtModel/Stepping: 0x%x/0x%x/0x%x\n", p->CPU.Model, p->CPU.ExtModel, p->CPU.Stepping); + DBG("CPU: MaxCoef/CurrCoef: 0x%x/0x%x\n", p->CPU.MaxCoef, p->CPU.CurrCoef); + DBG("CPU: MaxDiv/CurrDiv: 0x%x/0x%x\n", p->CPU.MaxDiv, p->CPU.CurrDiv); + DBG("CPU: TSCFreq: %dMHz\n", p->CPU.TSCFrequency / 1000000); + DBG("CPU: FSBFreq: %dMHz\n", p->CPU.FSBFrequency / 1000000); + DBG("CPU: CPUFreq: %dMHz\n", p->CPU.CPUFrequency / 1000000); + DBG("CPU: NoCores/NoThreads: %d/%d\n", p->CPU.NoCores, p->CPU.NoThreads); + DBG("CPU: Features: 0x%08x\n", p->CPU.Features); #if DEBUG_CPU - pause(); + pause(); #endif } Index: trunk/i386/libsaio/fake_efi.c =================================================================== --- trunk/i386/libsaio/fake_efi.c (revision 1371) +++ trunk/i386/libsaio/fake_efi.c (revision 1372) @@ -492,7 +492,7 @@ // belong to smbios config only ... const char *sysId = getStringForKey(kSystemID, &bootInfo->chameleonConfig); EFI_CHAR8* ret = getUUIDFromString(sysId); - + if (!sysId || !ret) // try bios dmi info UUID extraction { ret = getSmbiosUUID(); @@ -566,7 +566,7 @@ { DT__AddProperty(runtimeServicesNode, "table", sizeof(uint64_t), &gST64->RuntimeServices); } - + // Set up the /efi/configuration-table node which will eventually have several child nodes for // all of the configuration tables needed by various kernel extensions. gEfiConfigurationTableNode = DT__AddChild(node, "configuration-table"); @@ -591,7 +591,7 @@ // Export system-id. Can be disabled with SystemId=No in com.apple.Boot.plist if ((ret=getSystemID())) DT__AddProperty(efiPlatformNode, SYSTEM_ID_PROP, UUID_LEN, (EFI_UINT32*) ret); - + // Export SystemSerialNumber if present if ((ret16=getSmbiosChar16("SMserial", &len))) DT__AddProperty(efiPlatformNode, SYSTEM_SERIAL_PROP, len, ret16); @@ -647,12 +647,12 @@ err = loadConfigFile(dirSpecSMBIOS, &bootInfo->smbiosConfig); } } - + if (err) { verbose("No SMBIOS replacement found.\n"); } - + // get a chance to scan mem dynamically if user asks for it while having the config options // loaded as well, as opposed to when it was in scan_platform(); also load the orig. smbios // so that we can access dmi info, without patching the smbios yet. @@ -668,7 +668,7 @@ addConfigurationTable(&gEfiSmbiosTableGuid, &smbios_p, NULL); setupBoardId(); //need to be called after getSmbios - + // Setup ACPI with DSDT overrides (mackerintel's patch) setupAcpi(); @@ -690,26 +690,26 @@ Node *node; SMBEntryPoint *origeps; void *tableAddress; - + node = DT__FindNode("/efi/platform", false); if (!node) { verbose("/efi/platform node not found\n"); return; } - + origeps = getSmbios(SMBIOS_ORIGINAL); if (!origeps) { return; } - + tableAddress = (void *)AllocateKernelMemory(origeps->dmi.tableLength); if (!tableAddress) { return; } - + memcpy(tableAddress, (void *)origeps->dmi.tableAddress, origeps->dmi.tableLength); DT__AddProperty(node, "SMBIOS", origeps->dmi.tableLength, tableAddress); } @@ -721,14 +721,14 @@ { // Generate efi device strings setup_pci_devs(root_pci_dev); - + readSMBIOSInfo(getSmbios(SMBIOS_ORIGINAL)); - + // load smbios.plist file if any setupSmbiosConfigFile("smbios.plist"); - + setupSMBIOSTable(); - + // Initialize the base table if (archCpuType == CPU_TYPE_I386) { @@ -741,10 +741,9 @@ // Initialize the device tree setupEfiDeviceTree(); - - saveOriginalSMBIOS(); - + + saveOriginalSMBIOS(); + // Add configuration table entries to both the services table and the device tree setupEfiConfigurationTable(); } -