Chameleon

Chameleon Svn Source Tree

Root/branches/valv/i386/libsaio/cpu.c

1/*
2 * Copyright 2008 Islam Ahmed Zaid. All rights reserved. <azismed@gmail.com>
3 * AsereBLN: 2009: cleanup and bugfix
4 */
5
6#include "libsaio.h"
7#include "platform.h"
8#include "cpu.h"
9#include "boot.h"
10#include "bootstruct.h"
11
12#ifndef DEBUG_CPU
13#define DEBUG_CPU 0
14#endif
15
16#if DEBUG_CPU
17#define DBG(x...)printf(x)
18#else
19#define DBG(x...)
20#endif
21
22
23static inline uint64_t rdtsc64(void)
24{
25uint64_t ret;
26__asm__ volatile("rdtsc" : "=A" (ret));
27return ret;
28}
29
30static inline uint64_t rdmsr64(uint32_t msr)
31{
32 uint64_t ret;
33 __asm__ volatile("rdmsr" : "=A" (ret) : "c" (msr));
34 return ret;
35}
36
37static inline void do_cpuid(uint32_t selector, uint32_t *data)
38{
39asm volatile ("cpuid"
40 : "=a" (data[0]),
41 "=b" (data[1]),
42 "=c" (data[2]),
43 "=d" (data[3])
44 : "a" (selector));
45}
46
47static inline void do_cpuid2(uint32_t selector, uint32_t selector2, uint32_t *data)
48{
49asm volatile ("cpuid"
50 : "=a" (data[0]),
51 "=b" (data[1]),
52 "=c" (data[2]),
53 "=d" (data[3])
54 : "a" (selector), "c" (selector2));
55}
56
57/*static inline unsigned long long rdmsr46(unsigned int msr, unsigned low, unsigned high)
58{
59//unsigned low, high;
60asm volatile("rdmsr" : "=a" (low), "=d" (high));
61return ((low) | ((uint64_t)(high) << 32));
62}*/
63
64// DFE: enable_PIT2 and disable_PIT2 come from older xnu
65
66/*
67 * Enable or disable timer 2.
68 * Port 0x61 controls timer 2:
69 * bit 0 gates the clock,
70 * bit 1 gates output to speaker.
71 */
72static inline void enable_PIT2(void)
73{
74 /* Enable gate, disable speaker */
75 __asm__ volatile(
76 " inb $0x61,%%al \n\t"
77 " and $0xFC,%%al \n\t" /* & ~0x03 */
78 " or $1,%%al \n\t"
79 " outb %%al,$0x61 \n\t"
80 : : : "%al" );
81}
82
83static inline void disable_PIT2(void)
84{
85 /* Disable gate and output to speaker */
86 __asm__ volatile(
87 " inb $0x61,%%al \n\t"
88 " and $0xFC,%%al \n\t"/* & ~0x03 */
89 " outb %%al,$0x61 \n\t"
90 : : : "%al" );
91}
92
93// DFE: set_PIT2_mode0, poll_PIT2_gate, and measure_tsc_frequency are
94// roughly based on Linux code
95
96/* Set the 8254 channel 2 to mode 0 with the specified value.
97 In mode 0, the counter will initially set its gate low when the
98 timer expires. For this to be useful, you ought to set it high
99 before calling this function. The enable_PIT2 function does this.
100 */
101static inline void set_PIT2_mode0(uint16_t value)
102{
103 __asm__ volatile(
104 " movb $0xB0,%%al \n\t"
105 " outb%%al,$0x43\n\t"
106 " movb%%dl,%%al\n\t"
107 " outb%%al,$0x42\n\t"
108 " movb%%dh,%%al\n\t"
109 " outb%%al,$0x42"
110 : : "d"(value) /*: no clobber */ );
111}
112
113/* Returns the number of times the loop ran before the PIT2 signaled */
114static inline unsigned long poll_PIT2_gate(void)
115{
116 unsigned long count = 0;
117 unsigned char nmi_sc_val;
118 do {
119 ++count;
120 __asm__ volatile(
121 "inb$0x61,%0"
122 : "=q"(nmi_sc_val) /*:*/ /* no input */ /*:*/ /* no clobber */);
123 } while( (nmi_sc_val & 0x20) == 0);
124 return count;
125}
126
127/*
128 * DFE: Measures the TSC frequency in Hz (64-bit) using the ACPI PM timer
129 */
130static uint64_t measure_tsc_frequency(void)
131{
132 uint64_t tscStart;
133 uint64_t tscEnd;
134 uint64_t tscDelta = 0xffffffffffffffffULL;
135 unsigned long pollCount;
136 uint64_t retval = 0;
137 int i;
138
139 /* Time how many TSC ticks elapse in 30 msec using the 8254 PIT
140 * counter 2. We run this loop 3 times to make sure the cache
141 * is hot and we take the minimum delta from all of the runs.
142 * That is to say that we're biased towards measuring the minimum
143 * number of TSC ticks that occur while waiting for the timer to
144 * expire. That theoretically helps avoid inconsistencies when
145 * running under a VM if the TSC is not virtualized and the host
146 * steals time. The TSC is normally virtualized for VMware.
147 */
148 for(i = 0; i < 10; ++i)
149 {
150 enable_PIT2();
151 set_PIT2_mode0(CALIBRATE_LATCH);
152 tscStart = rdtsc64();
153 pollCount = poll_PIT2_gate();
154 tscEnd = rdtsc64();
155 /* The poll loop must have run at least a few times for accuracy */
156 if(pollCount <= 1)
157 continue;
158 /* The TSC must increment at LEAST once every millisecond. We
159 * should have waited exactly 30 msec so the TSC delta should
160 * be >= 30. Anything less and the processor is way too slow.
161 */
162 if((tscEnd - tscStart) <= CALIBRATE_TIME_MSEC)
163 continue;
164 // tscDelta = min(tscDelta, (tscEnd - tscStart))
165 if( (tscEnd - tscStart) < tscDelta )
166 tscDelta = tscEnd - tscStart;
167 }
168 /* tscDelta is now the least number of TSC ticks the processor made in
169 * a timespan of 0.03 s (e.g. 30 milliseconds)
170 * Linux thus divides by 30 which gives the answer in kiloHertz because
171 * 1 / ms = kHz. But we're xnu and most of the rest of the code uses
172 * Hz so we need to convert our milliseconds to seconds. Since we're
173 * dividing by the milliseconds, we simply multiply by 1000.
174 */
175
176 /* Unlike linux, we're not limited to 32-bit, but we do need to take care
177 * that we're going to multiply by 1000 first so we do need at least some
178 * arithmetic headroom. For now, 32-bit should be enough.
179 * Also unlike Linux, our compiler can do 64-bit integer arithmetic.
180 */
181 if(tscDelta > (1ULL<<32))
182 retval = 0;
183 else
184 {
185 retval = tscDelta * 1000 / 30;
186 }
187 disable_PIT2();
188 return retval;
189}
190
191/*
192 * Calculates the FSB and CPU frequencies using specific MSRs for each CPU
193 * - multi. is read from a specific MSR. In the case of Intel, there is:
194 * a max multi. (used to calculate the FSB freq.),
195 * and a current multi. (used to calculate the CPU freq.)
196 * - fsbFrequency = tscFrequency / multi
197 * - cpuFrequency = fsbFrequency * multi
198 */
199
200void scan_cpu(PlatformInfo_t *p)
201{
202uint64_ttscFrequency, fsbFrequency, cpuFrequency, minfsb, maxfsb;
203uint64_tmsr, flex_ratio;
204intbus_ratio;
205uint8_tmaxcoef, maxdiv, currcoef, currdiv;
206boolfix_fsb;
207//const uint32_t fsb_cloud[] = {266666667, 133333333, 200000000, 166666667, 333333333, 100000000, 400000000, 0};
208//uint32_tlo, hi;
209
210maxcoef = maxdiv = currcoef = currdiv = 0;
211
212/* get cpuid values */
213do_cpuid(0x00000000, p->CPU.CPUID[CPUID_0]);
214do_cpuid(0x00000001, p->CPU.CPUID[CPUID_1]);
215do_cpuid(0x00000002, p->CPU.CPUID[CPUID_2]);
216do_cpuid(0x00000003, p->CPU.CPUID[CPUID_3]);
217do_cpuid2(0x00000004, 0, p->CPU.CPUID[CPUID_4]);
218do_cpuid(0x80000000, p->CPU.CPUID[CPUID_80]);
219if ((p->CPU.CPUID[CPUID_80][0] & 0x0000000f) >= 1) {
220do_cpuid(0x80000001, p->CPU.CPUID[CPUID_81]);
221}
222#if DEBUG_CPU
223{
224inti;
225printf("CPUID Raw Values:\n");
226for (i=0; i<CPUID_MAX; i++) {
227printf("%02d: %08x-%08x-%08x-%08x\n", i,
228p->CPU.CPUID[i][0], p->CPU.CPUID[i][1],
229p->CPU.CPUID[i][2], p->CPU.CPUID[i][3]);
230}
231}
232#endif
233p->CPU.Vendor= p->CPU.CPUID[CPUID_0][1];
234p->CPU.Model= bitfield(p->CPU.CPUID[CPUID_1][0], 7, 4);
235p->CPU.Family= bitfield(p->CPU.CPUID[CPUID_1][0], 11, 8);
236p->CPU.ExtModel= bitfield(p->CPU.CPUID[CPUID_1][0], 19, 16);
237p->CPU.ExtFamily= bitfield(p->CPU.CPUID[CPUID_1][0], 27, 20);
238p->CPU.NoThreads= bitfield(p->CPU.CPUID[CPUID_1][1], 23, 16);
239p->CPU.NoCores= bitfield(p->CPU.CPUID[CPUID_4][0], 31, 26) + 1;
240
241p->CPU.Model += (p->CPU.ExtModel << 4);
242
243/* setup features */
244if ((bit(23) & p->CPU.CPUID[CPUID_1][3]) != 0) {
245p->CPU.Features |= CPU_FEATURE_MMX;
246}
247if ((bit(25) & p->CPU.CPUID[CPUID_1][3]) != 0) {
248p->CPU.Features |= CPU_FEATURE_SSE;
249}
250if ((bit(26) & p->CPU.CPUID[CPUID_1][3]) != 0) {
251p->CPU.Features |= CPU_FEATURE_SSE2;
252}
253if ((bit(0) & p->CPU.CPUID[CPUID_1][2]) != 0) {
254p->CPU.Features |= CPU_FEATURE_SSE3;
255}
256if ((bit(19) & p->CPU.CPUID[CPUID_1][2]) != 0) {
257p->CPU.Features |= CPU_FEATURE_SSE41;
258}
259if ((bit(20) & p->CPU.CPUID[CPUID_1][2]) != 0) {
260p->CPU.Features |= CPU_FEATURE_SSE42;
261}
262if ((bit(29) & p->CPU.CPUID[CPUID_81][3]) != 0) {
263p->CPU.Features |= CPU_FEATURE_EM64T;
264}
265//if ((bit(28) & p->CPU.CPUID[CPUID_1][3]) != 0) {
266if (p->CPU.NoThreads > p->CPU.NoCores) {
267p->CPU.Features |= CPU_FEATURE_HTT;
268}
269
270tscFrequency = measure_tsc_frequency();
271fsbFrequency = 0;
272cpuFrequency = 0;
273minfsb = 183000000;
274maxfsb = 185000000;
275fix_fsb = false;
276
277if ((p->CPU.Vendor == 0x756E6547 /* Intel */) && ((p->CPU.Family == 0x06) || (p->CPU.Family == 0x0f))) {
278if ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0c) || (p->CPU.Family == 0x0f && p->CPU.Model >= 0x03)) {
279if (p->CPU.Family == 0x06) {
280/* TODO: Split detection algo into sections, maybe relying on ExtModel, like this:
281if (p->CPU.ExtModel == 0x1) {
282} else if (p->CPU.ExtModel == 0x2) {
283}*/
284int intelCPU = p->CPU.Model;
285int bus;
286
287switch (intelCPU) {
288case 0x1a:// Core i7 LGA1366, Xeon 550, 45nm
289// TODO: 0x1e needs to be split to avoid 860 & 875k collision.
290case 0x1e:// Core i7, i5 LGA1156, "Lynnfield", "Jasper", 45nm
291case 0x1f:// Core i7, i5, Nehalem
292case 0x25:// Core i7, i5, i3 LGA1156, "Westmere", 32nm
293case 0x2c:// Core i7 LGA1366, Six-core, "Westmere", 32nm
294case 0x2e:// Core i7, Nehalem-Ex, Xeon
295case 0x2f:
296msr = rdmsr64(MSR_PLATFORM_INFO);
297currcoef = (msr >> 8) & 0xff;
298msr = rdmsr64(MSR_FLEX_RATIO);
299if ((msr >> 16) & 0x01) {
300flex_ratio = (msr >> 8) & 0xff;
301if (currcoef > flex_ratio) {
302currcoef = flex_ratio;
303}
304}
305if (currcoef) {
306fsbFrequency = (tscFrequency / currcoef);
307}
308cpuFrequency = tscFrequency;
309break;
310case 0xe:// Core Duo/Solo, Pentium M DC
311case 0xf:// Core Xeon, Core 2 DC, 65nm
312case 0x16:// Celeron, Core 2 SC, 65nm
313case 0x17:// Core 2 Duo/Extreme, Xeon, 45nm
314case 0x1c:// Atom :)
315case 0x27:// Atom Lincroft, 45nm
316getBoolForKey(kFixFSB, &fix_fsb, &bootInfo->bootConfig);
317if (fix_fsb) {
318msr = rdmsr64(MSR_FSB_FREQ);
319bus = (msr >> 0) & 0x7;
320switch (bus) {
321case 0:
322fsbFrequency = 266666667;
323break;
324case 1:
325fsbFrequency = 133333333;
326break;
327case 2:
328fsbFrequency = 200000000;
329break;
330case 3:
331fsbFrequency = 166666667;
332break;
333case 4:
334fsbFrequency = 333333333;
335break;
336case 5:
337fsbFrequency = 100000000;
338break;
339case 6:
340fsbFrequency = 400000000;
341break;
342default:
343fsbFrequency = 200000000;
344DBG("Defaulting the FSB frequency to 200Mhz \n");
345break;
346}
347verbose("CPU: FSB Fix applied !\n");
348if (!getIntForKey(kbusratio, &bus_ratio, &bootInfo->bootConfig)) {
349verbose("CPU: using oldschool cpu freq detection !\n");
350goto oldschool;
351} else
352cpuFrequency = (fsbFrequency * (bus_ratio / 10));
353
354if (((fsbFrequency) > (minfsb) && (fsbFrequency) < (maxfsb)) || (!fsbFrequency)) {
355fsbFrequency = 200000000;
356}
357} else {
358verbose("CPU: No FSB Fix applied ! fall back to oldschool \n");
359goto oldschool;
360}
361/*msr = rdmsr64(IA32_PERF_STATUS);
362currdiv = (msr >> 14) & 0x01;
363maxdiv = (msr >> 46) & 0x01;
364lo = (uint32_t)rdmsr64(IA32_PERF_STATUS);
365hi = (uint32_t)(rdmsr64(IA32_PERF_STATUS) >> 32);
366if (lo >> 31) {
367currcoef = (hi >> 8) & 0x1f;
368} else {
369lo = (uint32_t)rdmsr64(MSR_IA32_PLATFORM_ID);
370currcoef = (lo >> 8) & 0x1f;
371}
372if (maxdiv) {
373cpuFrequency = (fsbFrequency * (currcoef + 1));
374} else {
375cpuFrequency = (fsbFrequency * currcoef);
376}
377if (currdiv) {
378cpuFrequency = (fsbFrequency * ((currcoef * 2) + 1) / 2);
379} else {
380cpuFrequency = (fsbFrequency * currcoef);
381}*/
382//cpuFrequency = tscFrequency;
383break;
384/*case 0x17:// Core 2 Duo/Extreme, Xeon, 45nm
385lo = (uint32_t)rdmsr64(IA32_PERF_STATUS);
386hi = (uint32_t)(rdmsr64(IA32_PERF_STATUS) >> 32);
387//rdmsr46(IA32_PERF_STATUS, lo, hi);
388if (lo >> 31) {
389currcoef = (hi >> 8) & 0x1f;
390} else {
391lo = (uint32_t)rdmsr64(MSR_IA32_PLATFORM_ID);
392//hi = (uint32_t)(rdmsr64(MSR_IA32_PLATFORM_ID) >> 32);
393//rdmsr46(MSR_IA32_PLATFORM_ID, lo, hi);
394currcoef = (lo >> 8) & 0x1f;
395}
396fsbFrequency = ((fsb_cloud[lo & 0x7]) * 2);
397//cpuFrequency = (fsbFrequency * currcoef);
398if (!fsbFrequency) {
399fsbFrequency = (DEFAULT_FSB * 2000);
400DBG("0 ! Defaulting the FSB frequency to 200Mhz !\n");
401}*/
402case 0x1d:// Xeon MP MP 7400
403default:
404goto oldschool;
405break;
406}
407} else {
408oldschool:
409msr = rdmsr64(IA32_PERF_STATUS);
410DBG("msr(%d): ia32_perf_stat 0x%08x\n", __LINE__, msr & 0xffffffff);
411currcoef = (msr >> 8) & 0x1f;
412/* Non-integer bus ratio for the max-multi*/
413maxdiv = (msr >> 46) & 0x01;
414/* Non-integer bus ratio for the current-multi (undocumented)*/
415currdiv = (msr >> 14) & 0x01;
416
417if ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0e) || (p->CPU.Family == 0x0f)) // This will always be model >= 3
418{
419/* On these models, maxcoef defines TSC freq */
420maxcoef = (msr >> 40) & 0x1f;
421} else {
422/* On lower models, currcoef defines TSC freq */
423/* XXX */
424maxcoef = currcoef;
425}
426
427if (maxcoef) {
428if (!fix_fsb) {
429if (maxdiv) {
430fsbFrequency = ((tscFrequency * 2) / ((maxcoef * 2) + 1));
431} else {
432fsbFrequency = (tscFrequency / maxcoef);
433}
434}
435if (currdiv) {
436cpuFrequency = (fsbFrequency * ((currcoef * 2) + 1) / 2);
437} else {
438cpuFrequency = (fsbFrequency * currcoef);
439}
440DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");
441}
442/*if (p->CPU.Family == 0x06 && p->CPU.Model >= 0x0e)
443{
444maxcoef = (msr >> 40) & 0x1f;
445if (maxcoef) {
446if (maxdiv) {
447fsbFrequency = ((tscFrequency * 2) / ((maxcoef * 2) + 1));
448} else {
449fsbFrequency = (tscFrequency / maxcoef);
450}
451if (currdiv) {
452cpuFrequency = (fsbFrequency * ((currcoef * 2) + 1) / 2);
453} else {
454cpuFrequency = (fsbFrequency * currcoef);
455}
456}
457if (((fsbFrequency) > (minfsb) && (fsbFrequency) < (maxfsb)) || (!fsbFrequency)) {
458fsbFrequency = 200000000;
459DBG("Defaulting FSB frequency to 200Mhz !\n");
460}
461}
462if (p->CPU.Family == 0x0f) {
463msr = rdmsr64(0x0000002C); // Xeon related register.
464int bus;
465bus = (msr >> 16) & 0x7;
466switch (bus) {
467case 0:
468if (p->CPU.Model == 2) {
469fsbFrequency = 100000000;
470} else {
471fsbFrequency = 266666667;
472}
473break;
474case 1:
475fsbFrequency = 133333333;
476break;
477case 2:
478fsbFrequency = 200000000;
479break;
480case 3:
481fsbFrequency = 166666667;
482break;
483case 4:
484fsbFrequency = 333333333;
485break;
486default:
487break;
488}
489} else {
490fsbFrequency = 100000000;
491DBG("Defaulting FSB frequency to 100Mhz !\n");
492}*/
493if (((fsbFrequency) > (minfsb) && (fsbFrequency) < (maxfsb)) || (!fsbFrequency)) {
494fsbFrequency = 200000000;
495DBG("Defaulting FSB frequency to 200Mhz !\n");
496}
497}
498}
499
500// Mobile CPU ?
501if (rdmsr64(0x17) & (1<<28)) {
502p->CPU.Features |= CPU_FEATURE_MOBILE;
503}
504}
505#if 0
506else if((p->CPU.Vendor == 0x68747541 /* AMD */) && (p->CPU.Family == 0x0f)) {
507if(p->CPU.ExtFamily == 0x00 /* K8 */) {
508msr = rdmsr64(K8_FIDVID_STATUS);
509currcoef = (msr & 0x3f) / 2 + 4;
510currdiv = (msr & 0x01) * 2;
511} else if(p->CPU.ExtFamily >= 0x01 /* K10+ */) {
512msr = rdmsr64(K10_COFVID_STATUS);
513if(p->CPU.ExtFamily == 0x01 /* K10 */)
514currcoef = (msr & 0x3f) + 0x10;
515else /* K11+ */
516currcoef = (msr & 0x3f) + 0x08;
517currdiv = (2 << ((msr >> 6) & 0x07));
518}
519
520if (currcoef) {
521if (currdiv) {
522fsbFrequency = ((tscFrequency * currdiv) / currcoef);
523DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);
524} else {
525fsbFrequency = (tscFrequency / currcoef);
526DBG("%d\n", currcoef);
527}
528fsbFrequency = (tscFrequency / currcoef);
529cpuFrequency = tscFrequency;
530}
531}
532
533if (!fsbFrequency) {
534fsbFrequency = (DEFAULT_FSB * 1000);
535cpuFrequency = tscFrequency;
536DBG("0 ! using the default value for FSB !\n");
537}
538#endif
539
540p->CPU.MaxCoef = maxcoef;
541p->CPU.MaxDiv = maxdiv;
542p->CPU.CurrCoef = currcoef;
543p->CPU.CurrDiv = currdiv;
544p->CPU.TSCFrequency = tscFrequency;
545p->CPU.FSBFrequency = fsbFrequency;
546p->CPU.CPUFrequency = cpuFrequency;
547#if DEBUG_CPU
548DBG("CPU: Vendor/Model/ExtModel: 0x%x/0x%x/0x%x\n", p->CPU.Vendor, p->CPU.Model, p->CPU.ExtModel);
549DBG("CPU: Family/ExtFamily: 0x%x/0x%x\n", p->CPU.Family, p->CPU.ExtFamily);
550DBG("CPU: MaxCoef/CurrCoef: 0x%x/0x%x\n", p->CPU.MaxCoef, p->CPU.CurrCoef);
551DBG("CPU: MaxDiv/CurrDiv: 0x%x/0x%x\n", p->CPU.MaxDiv, p->CPU.CurrDiv);
552DBG("CPU: TSCFreq: %dMHz\n", p->CPU.TSCFrequency / 1000000);
553DBG("CPU: FSBFreq: %dMHz\n", p->CPU.FSBFrequency / 1000000);
554DBG("CPU: CPUFreq: %dMHz\n", p->CPU.CPUFrequency / 1000000);
555DBG("CPU: NoCores/NoThreads: %d/%d\n", p->CPU.NoCores, p->CPU.NoThreads);
556DBG("CPU: Features: 0x%08x\n", p->CPU.Features);
557pause();
558#endif
559}
560

Archive Download this file

Revision: 177