Chameleon

Chameleon Svn Source Tree

Root/tags/2.0/i386/libsaio/cpu.c

Source at commit 1808 created 12 years 3 months ago.
By blackosx, Revise layout of package installer 'Welcome' file so it looks cleaner. Change the copyright notice to begin from 2009 as seen in the Chameleon 2.0 r431 installer. Should this date be set earlier?
1/*
2 * Copyright 2008 Islam Ahmed Zaid. All rights reserved. <azismed@gmail.com>
3 * AsereBLN: 2009: cleanup and bugfix
4 */
5
6#include "libsaio.h"
7#include "platform.h"
8#include "cpu.h"
9#include "bootstruct.h"
10#include "boot.h"
11
12#ifndef DEBUG_CPU
13#define DEBUG_CPU 0
14#endif
15
16#if DEBUG_CPU
17#define DBG(x...)printf(x)
18#else
19#define DBG(x...)msglog(x)
20#endif
21
22/*
23 * DFE: Measures the TSC frequency in Hz (64-bit) using the ACPI PM timer
24 */
25static uint64_t measure_tsc_frequency(void)
26{
27uint64_t tscStart;
28uint64_t tscEnd;
29uint64_t tscDelta = 0xffffffffffffffffULL;
30unsigned long pollCount;
31uint64_t retval = 0;
32int i;
33
34/* Time how many TSC ticks elapse in 30 msec using the 8254 PIT
35 * counter 2. We run this loop 3 times to make sure the cache
36 * is hot and we take the minimum delta from all of the runs.
37 * That is to say that we're biased towards measuring the minimum
38 * number of TSC ticks that occur while waiting for the timer to
39 * expire. That theoretically helps avoid inconsistencies when
40 * running under a VM if the TSC is not virtualized and the host
41 * steals time. The TSC is normally virtualized for VMware.
42 */
43for(i = 0; i < 10; ++i)
44{
45enable_PIT2();
46set_PIT2_mode0(CALIBRATE_LATCH);
47tscStart = rdtsc64();
48pollCount = poll_PIT2_gate();
49tscEnd = rdtsc64();
50/* The poll loop must have run at least a few times for accuracy */
51if (pollCount <= 1)
52continue;
53/* The TSC must increment at LEAST once every millisecond.
54 * We should have waited exactly 30 msec so the TSC delta should
55 * be >= 30. Anything less and the processor is way too slow.
56 */
57if ((tscEnd - tscStart) <= CALIBRATE_TIME_MSEC)
58continue;
59// tscDelta = MIN(tscDelta, (tscEnd - tscStart))
60if ( (tscEnd - tscStart) < tscDelta )
61tscDelta = tscEnd - tscStart;
62}
63/* tscDelta is now the least number of TSC ticks the processor made in
64 * a timespan of 0.03 s (e.g. 30 milliseconds)
65 * Linux thus divides by 30 which gives the answer in kiloHertz because
66 * 1 / ms = kHz. But we're xnu and most of the rest of the code uses
67 * Hz so we need to convert our milliseconds to seconds. Since we're
68 * dividing by the milliseconds, we simply multiply by 1000.
69 */
70
71/* Unlike linux, we're not limited to 32-bit, but we do need to take care
72 * that we're going to multiply by 1000 first so we do need at least some
73 * arithmetic headroom. For now, 32-bit should be enough.
74 * Also unlike Linux, our compiler can do 64-bit integer arithmetic.
75 */
76if (tscDelta > (1ULL<<32))
77retval = 0;
78else
79{
80retval = tscDelta * 1000 / 30;
81}
82disable_PIT2();
83return retval;
84}
85
86/*
87 * Original comment/code:
88 * "DFE: Measures the Max Performance Frequency in Hz (64-bit)"
89 *
90 * Measures the Actual Performance Frequency in Hz (64-bit)
91 * (just a naming change, mperf --> aperf )
92 */
93static uint64_t measure_aperf_frequency(void)
94{
95uint64_t aperfStart;
96uint64_t aperfEnd;
97uint64_t aperfDelta = 0xffffffffffffffffULL;
98unsigned long pollCount;
99uint64_t retval = 0;
100int i;
101
102/* Time how many APERF ticks elapse in 30 msec using the 8254 PIT
103 * counter 2. We run this loop 3 times to make sure the cache
104 * is hot and we take the minimum delta from all of the runs.
105 * That is to say that we're biased towards measuring the minimum
106 * number of APERF ticks that occur while waiting for the timer to
107 * expire.
108 */
109for(i = 0; i < 10; ++i)
110{
111enable_PIT2();
112set_PIT2_mode0(CALIBRATE_LATCH);
113aperfStart = rdmsr64(MSR_AMD_APERF);
114pollCount = poll_PIT2_gate();
115aperfEnd = rdmsr64(MSR_AMD_APERF);
116/* The poll loop must have run at least a few times for accuracy */
117if (pollCount <= 1)
118continue;
119/* The TSC must increment at LEAST once every millisecond.
120 * We should have waited exactly 30 msec so the APERF delta should
121 * be >= 30. Anything less and the processor is way too slow.
122 */
123if ((aperfEnd - aperfStart) <= CALIBRATE_TIME_MSEC)
124continue;
125// tscDelta = MIN(tscDelta, (tscEnd - tscStart))
126if ( (aperfEnd - aperfStart) < aperfDelta )
127aperfDelta = aperfEnd - aperfStart;
128}
129/* mperfDelta is now the least number of MPERF ticks the processor made in
130 * a timespan of 0.03 s (e.g. 30 milliseconds)
131 */
132
133if (aperfDelta > (1ULL<<32))
134retval = 0;
135else
136{
137retval = aperfDelta * 1000 / 30;
138}
139disable_PIT2();
140return retval;
141}
142
143/*
144 * Calculates the FSB and CPU frequencies using specific MSRs for each CPU
145 * - multi. is read from a specific MSR. In the case of Intel, there is:
146 * a max multi. (used to calculate the FSB freq.),
147 * and a current multi. (used to calculate the CPU freq.)
148 * - fsbFrequency = tscFrequency / multi
149 * - cpuFrequency = fsbFrequency * multi
150 */
151void scan_cpu(PlatformInfo_t *p)
152{
153uint64_ttscFrequency, fsbFrequency, cpuFrequency;
154uint64_tmsr, flex_ratio;
155uint8_tmaxcoef, maxdiv, currcoef, bus_ratio_max, currdiv;
156const char*newratio;
157intlen, myfsb;
158uint8_tbus_ratio_min;
159uint32_tmax_ratio, min_ratio;
160
161max_ratio = min_ratio = myfsb = bus_ratio_min = 0;
162maxcoef = maxdiv = bus_ratio_max = currcoef = currdiv = 0;
163
164/* get cpuid values */
165do_cpuid(0x00000000, p->CPU.CPUID[CPUID_0]);
166do_cpuid(0x00000001, p->CPU.CPUID[CPUID_1]);
167do_cpuid(0x00000002, p->CPU.CPUID[CPUID_2]);
168do_cpuid(0x00000003, p->CPU.CPUID[CPUID_3]);
169do_cpuid2(0x00000004, 0, p->CPU.CPUID[CPUID_4]);
170do_cpuid(0x80000000, p->CPU.CPUID[CPUID_80]);
171if ((p->CPU.CPUID[CPUID_80][0] & 0x0000000f) >= 8) {
172do_cpuid(0x80000008, p->CPU.CPUID[CPUID_88]);
173do_cpuid(0x80000001, p->CPU.CPUID[CPUID_81]);
174}
175else if ((p->CPU.CPUID[CPUID_80][0] & 0x0000000f) >= 1) {
176do_cpuid(0x80000001, p->CPU.CPUID[CPUID_81]);
177}
178
179#if DEBUG_CPU
180{
181inti;
182printf("CPUID Raw Values:\n");
183for (i=0; i<CPUID_MAX; i++) {
184printf("%02d: %08x-%08x-%08x-%08x\n", i,
185 p->CPU.CPUID[i][0], p->CPU.CPUID[i][1],
186 p->CPU.CPUID[i][2], p->CPU.CPUID[i][3]);
187}
188}
189#endif
190
191p->CPU.Vendor= p->CPU.CPUID[CPUID_0][1];
192p->CPU.Signature= p->CPU.CPUID[CPUID_1][0];
193p->CPU.Stepping= bitfield(p->CPU.CPUID[CPUID_1][0], 3, 0);
194p->CPU.Model= bitfield(p->CPU.CPUID[CPUID_1][0], 7, 4);
195p->CPU.Family= bitfield(p->CPU.CPUID[CPUID_1][0], 11, 8);
196p->CPU.ExtModel= bitfield(p->CPU.CPUID[CPUID_1][0], 19, 16);
197p->CPU.ExtFamily= bitfield(p->CPU.CPUID[CPUID_1][0], 27, 20);
198
199p->CPU.Model += (p->CPU.ExtModel << 4);
200
201if (p->CPU.Vendor == CPUID_VENDOR_INTEL &&
202p->CPU.Family == 0x06 &&
203p->CPU.Model >= CPUID_MODEL_NEHALEM &&
204p->CPU.Model != CPUID_MODEL_ATOM// MSR is *NOT* available on the Intel Atom CPU
205)
206{
207msr = rdmsr64(MSR_CORE_THREAD_COUNT);// Undocumented MSR in Nehalem and newer CPUs
208p->CPU.NoCores= bitfield((uint32_t)msr, 31, 16);// Using undocumented MSR to get actual values
209p->CPU.NoThreads= bitfield((uint32_t)msr, 15, 0);// Using undocumented MSR to get actual values
210}
211else if (p->CPU.Vendor == CPUID_VENDOR_AMD)
212{
213p->CPU.NoThreads= bitfield(p->CPU.CPUID[CPUID_1][1], 23, 16);
214p->CPU.NoCores= bitfield(p->CPU.CPUID[CPUID_88][2], 7, 0) + 1;
215}
216else
217{
218// Use previous method for Cores and Threads
219p->CPU.NoThreads= bitfield(p->CPU.CPUID[CPUID_1][1], 23, 16);
220p->CPU.NoCores= bitfield(p->CPU.CPUID[CPUID_4][0], 31, 26) + 1;
221}
222
223/* get brand string (if supported) */
224/* Copyright: from Apple's XNU cpuid.c */
225if (p->CPU.CPUID[CPUID_80][0] > 0x80000004) {
226uint32_treg[4];
227charstr[128], *s;
228/*
229 * The brand string 48 bytes (max), guaranteed to
230 * be NULL terminated.
231 */
232do_cpuid(0x80000002, reg);
233bcopy((char *)reg, &str[0], 16);
234do_cpuid(0x80000003, reg);
235bcopy((char *)reg, &str[16], 16);
236do_cpuid(0x80000004, reg);
237bcopy((char *)reg, &str[32], 16);
238for (s = str; *s != '\0'; s++) {
239if (*s != ' ') break;
240}
241
242strlcpy(p->CPU.BrandString, s, sizeof(p->CPU.BrandString));
243
244if (!strncmp(p->CPU.BrandString, CPU_STRING_UNKNOWN, MIN(sizeof(p->CPU.BrandString), strlen(CPU_STRING_UNKNOWN) + 1))) {
245/*
246 * This string means we have a firmware-programmable brand string,
247 * and the firmware couldn't figure out what sort of CPU we have.
248 */
249p->CPU.BrandString[0] = '\0';
250}
251}
252
253/* setup features */
254if ((bit(23) & p->CPU.CPUID[CPUID_1][3]) != 0) {
255p->CPU.Features |= CPU_FEATURE_MMX;
256}
257if ((bit(25) & p->CPU.CPUID[CPUID_1][3]) != 0) {
258p->CPU.Features |= CPU_FEATURE_SSE;
259}
260if ((bit(26) & p->CPU.CPUID[CPUID_1][3]) != 0) {
261p->CPU.Features |= CPU_FEATURE_SSE2;
262}
263if ((bit(0) & p->CPU.CPUID[CPUID_1][2]) != 0) {
264p->CPU.Features |= CPU_FEATURE_SSE3;
265}
266if ((bit(19) & p->CPU.CPUID[CPUID_1][2]) != 0) {
267p->CPU.Features |= CPU_FEATURE_SSE41;
268}
269if ((bit(20) & p->CPU.CPUID[CPUID_1][2]) != 0) {
270p->CPU.Features |= CPU_FEATURE_SSE42;
271}
272if ((bit(29) & p->CPU.CPUID[CPUID_81][3]) != 0) {
273p->CPU.Features |= CPU_FEATURE_EM64T;
274}
275if ((bit(5) & p->CPU.CPUID[CPUID_1][3]) != 0) {
276p->CPU.Features |= CPU_FEATURE_MSR;
277}
278//if ((bit(28) & p->CPU.CPUID[CPUID_1][3]) != 0) {
279if (p->CPU.NoThreads > p->CPU.NoCores) {
280p->CPU.Features |= CPU_FEATURE_HTT;
281}
282
283tscFrequency = measure_tsc_frequency();
284fsbFrequency = 0;
285cpuFrequency = 0;
286
287if ((p->CPU.Vendor == CPUID_VENDOR_INTEL) && ((p->CPU.Family == 0x06) || (p->CPU.Family == 0x0f))) {
288int intelCPU = p->CPU.Model;
289if ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0c) || (p->CPU.Family == 0x0f && p->CPU.Model >= 0x03)) {
290/* Nehalem CPU model */
291if (p->CPU.Family == 0x06 && (p->CPU.Model == CPU_MODEL_NEHALEM ||
292 p->CPU.Model == CPU_MODEL_FIELDS ||
293 p->CPU.Model == CPU_MODEL_DALES ||
294 p->CPU.Model == CPU_MODEL_DALES_32NM ||
295 p->CPU.Model == CPU_MODEL_WESTMERE ||
296 p->CPU.Model == CPU_MODEL_NEHALEM_EX ||
297 p->CPU.Model == CPU_MODEL_WESTMERE_EX ||
298 p->CPU.Model == CPU_MODEL_SANDY ||
299 p->CPU.Model == CPU_MODEL_SANDY_XEON)) {
300msr = rdmsr64(MSR_PLATFORM_INFO);
301DBG("msr(%d): platform_info %08x\n", __LINE__, bitfield(msr, 31, 0));
302bus_ratio_max = bitfield(msr, 14, 8);
303bus_ratio_min = bitfield(msr, 46, 40); //valv: not sure about this one (Remarq.1)
304msr = rdmsr64(MSR_FLEX_RATIO);
305DBG("msr(%d): flex_ratio %08x\n", __LINE__, bitfield(msr, 31, 0));
306if (bitfield(msr, 16, 16)) {
307flex_ratio = bitfield(msr, 14, 8);
308/* bcc9: at least on the gigabyte h67ma-ud2h,
309 where the cpu multipler can't be changed to
310 allow overclocking, the flex_ratio msr has unexpected (to OSX)
311 contents.These contents cause mach_kernel to
312 fail to compute the bus ratio correctly, instead
313 causing the system to crash since tscGranularity
314 is inadvertently set to 0.
315 */
316if (flex_ratio == 0) {
317/* Clear bit 16 (evidently the presence bit) */
318wrmsr64(MSR_FLEX_RATIO, (msr & 0xFFFFFFFFFFFEFFFFULL));
319msr = rdmsr64(MSR_FLEX_RATIO);
320verbose("Unusable flex ratio detected. Patched MSR now %08x\n", bitfield(msr, 31, 0));
321} else {
322if (bus_ratio_max > flex_ratio) {
323bus_ratio_max = flex_ratio;
324}
325}
326}
327
328if (bus_ratio_max) {
329fsbFrequency = (tscFrequency / bus_ratio_max);
330}
331//valv: Turbo Ratio Limit
332if ((intelCPU != 0x2e) && (intelCPU != 0x2f)) {
333msr = rdmsr64(MSR_TURBO_RATIO_LIMIT);
334cpuFrequency = bus_ratio_max * fsbFrequency;
335max_ratio = bus_ratio_max * 10;
336} else {
337cpuFrequency = tscFrequency;
338}
339if ((getValueForKey(kbusratio, &newratio, &len, &bootInfo->chameleonConfig)) && (len <= 4)) {
340max_ratio = atoi(newratio);
341max_ratio = (max_ratio * 10);
342if (len >= 3) max_ratio = (max_ratio + 5);
343
344verbose("Bus-Ratio: min=%d, max=%s\n", bus_ratio_min, newratio);
345
346// extreme overclockers may love 320 ;)
347if ((max_ratio >= min_ratio) && (max_ratio <= 320)) {
348cpuFrequency = (fsbFrequency * max_ratio) / 10;
349if (len >= 3) maxdiv = 1;
350else maxdiv = 0;
351} else {
352max_ratio = (bus_ratio_max * 10);
353}
354}
355//valv: to be uncommented if Remarq.1 didn't stick
356/*if (bus_ratio_max > 0) bus_ratio = flex_ratio;*/
357p->CPU.MaxRatio = max_ratio;
358p->CPU.MinRatio = min_ratio;
359
360myfsb = fsbFrequency / 1000000;
361verbose("Sticking with [BCLK: %dMhz, Bus-Ratio: %d]\n", myfsb, max_ratio);
362currcoef = bus_ratio_max;
363} else {
364msr = rdmsr64(MSR_IA32_PERF_STATUS);
365DBG("msr(%d): ia32_perf_stat 0x%08x\n", __LINE__, bitfield(msr, 31, 0));
366currcoef = bitfield(msr, 12, 8);
367/* Non-integer bus ratio for the max-multi*/
368maxdiv = bitfield(msr, 46, 46);
369/* Non-integer bus ratio for the current-multi (undocumented)*/
370currdiv = bitfield(msr, 14, 14);
371
372// This will always be model >= 3
373if ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0e) || (p->CPU.Family == 0x0f))
374{
375/* On these models, maxcoef defines TSC freq */
376maxcoef = bitfield(msr, 44, 40);
377} else {
378/* On lower models, currcoef defines TSC freq */
379/* XXX */
380maxcoef = currcoef;
381}
382
383if (maxcoef) {
384if (maxdiv) {
385fsbFrequency = ((tscFrequency * 2) / ((maxcoef * 2) + 1));
386} else {
387fsbFrequency = (tscFrequency / maxcoef);
388}
389if (currdiv) {
390cpuFrequency = (fsbFrequency * ((currcoef * 2) + 1) / 2);
391} else {
392cpuFrequency = (fsbFrequency * currcoef);
393}
394DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");
395}
396}
397}
398/* Mobile CPU */
399if (rdmsr64(MSR_IA32_PLATFORM_ID) & (1<<28)) {
400p->CPU.Features |= CPU_FEATURE_MOBILE;
401}
402}
403else if ((p->CPU.Vendor == CPUID_VENDOR_AMD) && (p->CPU.Family == 0x0f))
404{
405switch(p->CPU.ExtFamily)
406{
407case 0x00: /* K8 */
408msr = rdmsr64(K8_FIDVID_STATUS);
409maxcoef = bitfield(msr, 21, 16) / 2 + 4;
410currcoef = bitfield(msr, 5, 0) / 2 + 4;
411break;
412
413case 0x01: /* K10 */
414msr = rdmsr64(K10_COFVID_STATUS);
415do_cpuid2(0x00000006, 0, p->CPU.CPUID[CPUID_6]);
416// EffFreq: effective frequency interface
417if (bitfield(p->CPU.CPUID[CPUID_6][2], 0, 0) == 1)
418{
419//uint64_t mperf = measure_mperf_frequency();
420uint64_t aperf = measure_aperf_frequency();
421cpuFrequency = aperf;
422}
423// NOTE: tsc runs at the maccoeff (non turbo)
424//*not* at the turbo frequency.
425maxcoef = bitfield(msr, 54, 49) / 2 + 4;
426currcoef = bitfield(msr, 5, 0) + 0x10;
427currdiv = 2 << bitfield(msr, 8, 6);
428
429break;
430
431case 0x05: /* K14 */
432msr = rdmsr64(K10_COFVID_STATUS);
433currcoef = (bitfield(msr, 54, 49) + 0x10) << 2;
434currdiv = (bitfield(msr, 8, 4) + 1) << 2;
435currdiv += bitfield(msr, 3, 0);
436
437break;
438
439case 0x02: /* K11 */
440// not implimented
441break;
442}
443
444if (maxcoef)
445{
446if (currdiv)
447{
448if (!currcoef) currcoef = maxcoef;
449if (!cpuFrequency)
450fsbFrequency = ((tscFrequency * currdiv) / currcoef);
451else
452fsbFrequency = ((cpuFrequency * currdiv) / currcoef);
453
454DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);
455} else {
456if (!cpuFrequency)
457fsbFrequency = (tscFrequency / maxcoef);
458else
459fsbFrequency = (cpuFrequency / maxcoef);
460DBG("%d\n", currcoef);
461}
462}
463else if (currcoef)
464{
465if (currdiv)
466{
467fsbFrequency = ((tscFrequency * currdiv) / currcoef);
468DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);
469} else {
470fsbFrequency = (tscFrequency / currcoef);
471DBG("%d\n", currcoef);
472}
473}
474if (!cpuFrequency) cpuFrequency = tscFrequency;
475}
476
477#if 0
478if (!fsbFrequency) {
479fsbFrequency = (DEFAULT_FSB * 1000);
480cpuFrequency = tscFrequency;
481DBG("0 ! using the default value for FSB !\n");
482}
483#endif
484
485p->CPU.MaxCoef = maxcoef;
486p->CPU.MaxDiv = maxdiv;
487p->CPU.CurrCoef = currcoef;
488p->CPU.CurrDiv = currdiv;
489p->CPU.TSCFrequency = tscFrequency;
490p->CPU.FSBFrequency = fsbFrequency;
491p->CPU.CPUFrequency = cpuFrequency;
492
493// keep formatted with spaces instead of tabs
494DBG("CPU: Brand String: %s\n", p->CPU.BrandString);
495 DBG("CPU: Vendor/Family/ExtFamily: 0x%x/0x%x/0x%x\n", p->CPU.Vendor, p->CPU.Family, p->CPU.ExtFamily);
496 DBG("CPU: Model/ExtModel/Stepping: 0x%x/0x%x/0x%x\n", p->CPU.Model, p->CPU.ExtModel, p->CPU.Stepping);
497 DBG("CPU: MaxCoef/CurrCoef: 0x%x/0x%x\n", p->CPU.MaxCoef, p->CPU.CurrCoef);
498 DBG("CPU: MaxDiv/CurrDiv: 0x%x/0x%x\n", p->CPU.MaxDiv, p->CPU.CurrDiv);
499 DBG("CPU: TSCFreq: %dMHz\n", p->CPU.TSCFrequency / 1000000);
500 DBG("CPU: FSBFreq: %dMHz\n", p->CPU.FSBFrequency / 1000000);
501 DBG("CPU: CPUFreq: %dMHz\n", p->CPU.CPUFrequency / 1000000);
502 DBG("CPU: NoCores/NoThreads: %d/%d\n", p->CPU.NoCores, p->CPU.NoThreads);
503 DBG("CPU: Features: 0x%08x\n", p->CPU.Features);
504#if DEBUG_CPU
505pause();
506#endif
507}
508

Archive Download this file

Revision: 1808