Chameleon

Chameleon Svn Source Tree

Root/branches/andyvand/i386/libsaio/cpu.c

1/*
2 * Copyright 2008 Islam Ahmed Zaid. All rights reserved. <azismed@gmail.com>
3 * AsereBLN: 2009: cleanup and bugfix
4 */
5
6#include "libsaio.h"
7#include "platform.h"
8#include "cpu.h"
9
10#ifndef DEBUG_CPU
11#define DEBUG_CPU 0
12#endif
13
14#if DEBUG_CPU
15#define DBG(x...)printf(x)
16#else
17#define DBG(x...)
18#endif
19
20
21static inline uint64_t rdtsc64(void)
22{
23uint64_t ret;
24__asm__ volatile("rdtsc" : "=A" (ret));
25return ret;
26}
27
28static inline uint64_t rdmsr64(uint32_t msr)
29{
30 uint64_t ret;
31 __asm__ volatile("rdmsr" : "=A" (ret) : "c" (msr));
32 return ret;
33}
34
35static inline void do_cpuid(uint32_t selector, uint32_t *data)
36{
37asm volatile ("cpuid"
38 : "=a" (data[0]),
39 "=b" (data[1]),
40 "=c" (data[2]),
41 "=d" (data[3])
42 : "a" (selector));
43}
44
45static inline void do_cpuid2(uint32_t selector, uint32_t selector2, uint32_t *data)
46{
47asm volatile ("cpuid"
48 : "=a" (data[0]),
49 "=b" (data[1]),
50 "=c" (data[2]),
51 "=d" (data[3])
52 : "a" (selector), "c" (selector2));
53}
54
55// DFE: enable_PIT2 and disable_PIT2 come from older xnu
56
57/*
58 * Enable or disable timer 2.
59 * Port 0x61 controls timer 2:
60 * bit 0 gates the clock,
61 * bit 1 gates output to speaker.
62 */
63static inline void enable_PIT2(void)
64{
65 /* Enable gate, disable speaker */
66 __asm__ volatile(
67 " inb $0x61,%%al \n\t"
68 " and $0xFC,%%al \n\t" /* & ~0x03 */
69 " or $1,%%al \n\t"
70 " outb %%al,$0x61 \n\t"
71 : : : "%al" );
72}
73
74static inline void disable_PIT2(void)
75{
76 /* Disable gate and output to speaker */
77 __asm__ volatile(
78 " inb $0x61,%%al \n\t"
79 " and $0xFC,%%al \n\t"/* & ~0x03 */
80 " outb %%al,$0x61 \n\t"
81 : : : "%al" );
82}
83
84// DFE: set_PIT2_mode0, poll_PIT2_gate, and measure_tsc_frequency are
85// roughly based on Linux code
86
87/* Set the 8254 channel 2 to mode 0 with the specified value.
88 In mode 0, the counter will initially set its gate low when the
89 timer expires. For this to be useful, you ought to set it high
90 before calling this function. The enable_PIT2 function does this.
91 */
92static inline void set_PIT2_mode0(uint16_t value)
93{
94 __asm__ volatile(
95 " movb $0xB0,%%al \n\t"
96 " outb%%al,$0x43\n\t"
97 " movb%%dl,%%al\n\t"
98 " outb%%al,$0x42\n\t"
99 " movb%%dh,%%al\n\t"
100 " outb%%al,$0x42"
101 : : "d"(value) /*: no clobber */ );
102}
103
104/* Returns the number of times the loop ran before the PIT2 signaled */
105static inline unsigned long poll_PIT2_gate(void)
106{
107 unsigned long count = 0;
108 unsigned char nmi_sc_val;
109 do {
110 ++count;
111 __asm__ volatile(
112 "inb$0x61,%0"
113 : "=q"(nmi_sc_val) /*:*/ /* no input */ /*:*/ /* no clobber */);
114 } while( (nmi_sc_val & 0x20) == 0);
115 return count;
116}
117
118/*
119 * DFE: Measures the TSC frequency in Hz (64-bit) using the ACPI PM timer
120 */
121static uint64_t measure_tsc_frequency(void)
122{
123 uint64_t tscStart;
124 uint64_t tscEnd;
125 uint64_t tscDelta = 0xffffffffffffffffULL;
126 unsigned long pollCount;
127 uint64_t retval = 0;
128 int i;
129
130 /* Time how many TSC ticks elapse in 30 msec using the 8254 PIT
131 * counter 2. We run this loop 3 times to make sure the cache
132 * is hot and we take the minimum delta from all of the runs.
133 * That is to say that we're biased towards measuring the minimum
134 * number of TSC ticks that occur while waiting for the timer to
135 * expire. That theoretically helps avoid inconsistencies when
136 * running under a VM if the TSC is not virtualized and the host
137 * steals time. The TSC is normally virtualized for VMware.
138 */
139 for(i = 0; i < 10; ++i)
140 {
141 enable_PIT2();
142 set_PIT2_mode0(CALIBRATE_LATCH);
143 tscStart = rdtsc64();
144 pollCount = poll_PIT2_gate();
145 tscEnd = rdtsc64();
146 /* The poll loop must have run at least a few times for accuracy */
147 if(pollCount <= 1)
148 continue;
149 /* The TSC must increment at LEAST once every millisecond. We
150 * should have waited exactly 30 msec so the TSC delta should
151 * be >= 30. Anything less and the processor is way too slow.
152 */
153 if((tscEnd - tscStart) <= CALIBRATE_TIME_MSEC)
154 continue;
155 // tscDelta = min(tscDelta, (tscEnd - tscStart))
156 if( (tscEnd - tscStart) < tscDelta )
157 tscDelta = tscEnd - tscStart;
158 }
159 /* tscDelta is now the least number of TSC ticks the processor made in
160 * a timespan of 0.03 s (e.g. 30 milliseconds)
161 * Linux thus divides by 30 which gives the answer in kiloHertz because
162 * 1 / ms = kHz. But we're xnu and most of the rest of the code uses
163 * Hz so we need to convert our milliseconds to seconds. Since we're
164 * dividing by the milliseconds, we simply multiply by 1000.
165 */
166
167 /* Unlike linux, we're not limited to 32-bit, but we do need to take care
168 * that we're going to multiply by 1000 first so we do need at least some
169 * arithmetic headroom. For now, 32-bit should be enough.
170 * Also unlike Linux, our compiler can do 64-bit integer arithmetic.
171 */
172 if(tscDelta > (1ULL<<32))
173 retval = 0;
174 else
175 {
176 retval = tscDelta * 1000 / 30;
177 }
178 disable_PIT2();
179 return retval;
180}
181
182/*
183 * Calculates the FSB and CPU frequencies using specific MSRs for each CPU
184 * - multi. is read from a specific MSR. In the case of Intel, there is:
185 * a max multi. (used to calculate the FSB freq.),
186 * and a current multi. (used to calculate the CPU freq.)
187 * - fsbFrequency = tscFrequency / multi
188 * - cpuFrequency = fsbFrequency * multi
189 */
190
191void scan_cpu(PlatformInfo_t *p)
192{
193uint64_ttscFrequency, fsbFrequency, cpuFrequency;
194uint64_tmsr, flex_ratio;
195uint8_tmaxcoef, maxdiv, currcoef, currdiv;
196
197maxcoef = maxdiv = currcoef = currdiv = 0;
198
199/* get cpuid values */
200do_cpuid(0x00000000, p->CPU.CPUID[CPUID_0]);
201do_cpuid(0x00000001, p->CPU.CPUID[CPUID_1]);
202do_cpuid(0x00000002, p->CPU.CPUID[CPUID_2]);
203do_cpuid(0x00000003, p->CPU.CPUID[CPUID_3]);
204do_cpuid2(0x00000004, 0, p->CPU.CPUID[CPUID_4]);
205do_cpuid(0x80000000, p->CPU.CPUID[CPUID_80]);
206if ((p->CPU.CPUID[CPUID_80][0] & 0x0000000f) >= 1) {
207do_cpuid(0x80000001, p->CPU.CPUID[CPUID_81]);
208}
209#if DEBUG_CPU
210{
211inti;
212printf("CPUID Raw Values:\n");
213for (i=0; i<CPUID_MAX; i++) {
214printf("%02d: %08x-%08x-%08x-%08x\n", i,
215p->CPU.CPUID[i][0], p->CPU.CPUID[i][1],
216p->CPU.CPUID[i][2], p->CPU.CPUID[i][3]);
217}
218}
219#endif
220p->CPU.Vendor= p->CPU.CPUID[CPUID_0][1];
221p->CPU.Model= bitfield(p->CPU.CPUID[CPUID_1][0], 7, 4);
222p->CPU.Family= bitfield(p->CPU.CPUID[CPUID_1][0], 11, 8);
223p->CPU.ExtModel= bitfield(p->CPU.CPUID[CPUID_1][0], 19, 16);
224p->CPU.ExtFamily= bitfield(p->CPU.CPUID[CPUID_1][0], 27, 20);
225p->CPU.NoThreads= bitfield(p->CPU.CPUID[CPUID_1][1], 23, 16);
226p->CPU.NoCores= bitfield(p->CPU.CPUID[CPUID_4][0], 31, 26) + 1;
227
228p->CPU.Model += (p->CPU.ExtModel << 4);
229
230/* setup features */
231if ((bit(23) & p->CPU.CPUID[CPUID_1][3]) != 0) {
232p->CPU.Features |= CPU_FEATURE_MMX;
233}
234if ((bit(25) & p->CPU.CPUID[CPUID_1][3]) != 0) {
235p->CPU.Features |= CPU_FEATURE_SSE;
236}
237if ((bit(26) & p->CPU.CPUID[CPUID_1][3]) != 0) {
238p->CPU.Features |= CPU_FEATURE_SSE2;
239}
240if ((bit(0) & p->CPU.CPUID[CPUID_1][2]) != 0) {
241p->CPU.Features |= CPU_FEATURE_SSE3;
242}
243if ((bit(19) & p->CPU.CPUID[CPUID_1][2]) != 0) {
244p->CPU.Features |= CPU_FEATURE_SSE41;
245}
246if ((bit(20) & p->CPU.CPUID[CPUID_1][2]) != 0) {
247p->CPU.Features |= CPU_FEATURE_SSE42;
248}
249if ((bit(29) & p->CPU.CPUID[CPUID_81][3]) != 0) {
250p->CPU.Features |= CPU_FEATURE_EM64T;
251}
252//if ((bit(28) & p->CPU.CPUID[CPUID_1][3]) != 0) {
253if (p->CPU.NoThreads > p->CPU.NoCores) {
254p->CPU.Features |= CPU_FEATURE_HTT;
255}
256
257tscFrequency = measure_tsc_frequency();
258fsbFrequency = 0;
259cpuFrequency = 0;
260
261if ((p->CPU.Vendor == 0x756E6547 /* Intel */) && ((p->CPU.Family == 0x06) || (p->CPU.Family == 0x0f))) {
262if ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0c) || (p->CPU.Family == 0x0f && p->CPU.Model >= 0x03)) {
263/* Nehalem CPU model */
264if (p->CPU.Family == 0x06 && (p->CPU.Model == 0x1a || p->CPU.Model == 0x1e)) {
265msr = rdmsr64(MSR_PLATFORM_INFO);
266DBG("msr(%d): platform_info %08x\n", __LINE__, msr & 0xffffffff);
267currcoef = (msr >> 8) & 0xff;
268msr = rdmsr64(MSR_FLEX_RATIO);
269DBG("msr(%d): flex_ratio %08x\n", __LINE__, msr & 0xffffffff);
270if ((msr >> 16) & 0x01) {
271flex_ratio = (msr >> 8) & 0xff;
272if (currcoef > flex_ratio) {
273currcoef = flex_ratio;
274}
275}
276
277if (currcoef) {
278fsbFrequency = (tscFrequency / currcoef);
279}
280cpuFrequency = tscFrequency;
281} else {
282msr = rdmsr64(IA32_PERF_STATUS);
283DBG("msr(%d): ia32_perf_stat 0x%08x\n", __LINE__, msr & 0xffffffff);
284currcoef = (msr >> 8) & 0x1f;
285/* Non-integer bus ratio for the max-multi*/
286maxdiv = (msr >> 46) & 0x01;
287/* Non-integer bus ratio for the current-multi (undocumented)*/
288currdiv = (msr >> 14) & 0x01;
289
290if ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0e) || (p->CPU.Family == 0x0f)) // This will always be model >= 3
291{
292/* On these models, maxcoef defines TSC freq */
293maxcoef = (msr >> 40) & 0x1f;
294} else {
295/* On lower models, currcoef defines TSC freq */
296/* XXX */
297maxcoef = currcoef;
298}
299
300if (maxcoef) {
301if (maxdiv) {
302fsbFrequency = ((tscFrequency * 2) / ((maxcoef * 2) + 1));
303} else {
304fsbFrequency = (tscFrequency / maxcoef);
305}
306if (currdiv) {
307cpuFrequency = (fsbFrequency * ((currcoef * 2) + 1) / 2);
308} else {
309cpuFrequency = (fsbFrequency * currcoef);
310}
311DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");
312}
313}
314}
315/* Mobile CPU ? */
316if (rdmsr64(0x17) & (1<<28)) {
317p->CPU.Features |= CPU_FEATURE_MOBILE;
318}
319}
320#if 0
321else if((p->CPU.Vendor == 0x68747541 /* AMD */) && (p->CPU.Family == 0x0f)) {
322if(p->CPU.ExtFamily == 0x00 /* K8 */) {
323msr = rdmsr64(K8_FIDVID_STATUS);
324currcoef = (msr & 0x3f) / 2 + 4;
325currdiv = (msr & 0x01) * 2;
326} else if(p->CPU.ExtFamily >= 0x01 /* K10+ */) {
327msr = rdmsr64(K10_COFVID_STATUS);
328if(p->CPU.ExtFamily == 0x01 /* K10 */)
329currcoef = (msr & 0x3f) + 0x10;
330else /* K11+ */
331currcoef = (msr & 0x3f) + 0x08;
332currdiv = (2 << ((msr >> 6) & 0x07));
333}
334
335if (currcoef) {
336if (currdiv) {
337fsbFrequency = ((tscFrequency * currdiv) / currcoef);
338DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);
339} else {
340fsbFrequency = (tscFrequency / currcoef);
341DBG("%d\n", currcoef);
342}
343fsbFrequency = (tscFrequency / currcoef);
344cpuFrequency = tscFrequency;
345}
346}
347
348if (!fsbFrequency) {
349fsbFrequency = (DEFAULT_FSB * 1000);
350cpuFrequency = tscFrequency;
351DBG("0 ! using the default value for FSB !\n");
352}
353#endif
354
355p->CPU.MaxCoef = maxcoef;
356p->CPU.MaxDiv = maxdiv;
357p->CPU.CurrCoef = currcoef;
358p->CPU.CurrDiv = currdiv;
359p->CPU.TSCFrequency = tscFrequency;
360p->CPU.FSBFrequency = fsbFrequency;
361p->CPU.CPUFrequency = cpuFrequency;
362#if DEBUG_CPU
363DBG("CPU: Vendor/Model/ExtModel: 0x%x/0x%x/0x%x\n", p->CPU.Vendor, p->CPU.Model, p->CPU.ExtModel);
364DBG("CPU: Family/ExtFamily: 0x%x/0x%x\n", p->CPU.Family, p->CPU.ExtFamily);
365DBG("CPU: MaxCoef/CurrCoef: 0x%x/0x%x\n", p->CPU.MaxCoef, p->CPU.CurrCoef);
366DBG("CPU: MaxDiv/CurrDiv: 0x%x/0x%x\n", p->CPU.MaxDiv, p->CPU.CurrDiv);
367DBG("CPU: TSCFreq: %dMHz\n", p->CPU.TSCFrequency / 1000000);
368DBG("CPU: FSBFreq: %dMHz\n", p->CPU.FSBFrequency / 1000000);
369DBG("CPU: CPUFreq: %dMHz\n", p->CPU.CPUFrequency / 1000000);
370DBG("CPU: NoCores/NoThreads: %d/%d\n", p->CPU.NoCores, p->CPU.NoThreads);
371DBG("CPU: Features: 0x%08x\n", p->CPU.Features);
372printf("(Press a key to continue...)\n");
373getc();
374#endif
375}
376

Archive Download this file

Revision: 67