1 | /*␊ |
2 | * Copyright 2008 Islam Ahmed Zaid. All rights reserved. <azismed@gmail.com>␊ |
3 | * AsereBLN: 2009: cleanup and bugfix␊ |
4 | */␊ |
5 | ␊ |
6 | #include "libsaio.h"␊ |
7 | #include "platform.h"␊ |
8 | #include "cpu.h"␊ |
9 | ␊ |
10 | #ifndef DEBUG_CPU␊ |
11 | #define DEBUG_CPU 0␊ |
12 | #endif␊ |
13 | ␊ |
14 | #if DEBUG_CPU␊ |
15 | #define DBG(x...)␉␉printf(x)␊ |
16 | #else␊ |
17 | #define DBG(x...)␊ |
18 | #endif␊ |
19 | ␊ |
20 | ␊ |
21 | static inline uint64_t rdtsc64(void)␊ |
22 | {␊ |
23 | ␉uint64_t ret;␊ |
24 | ␉__asm__ volatile("rdtsc" : "=A" (ret));␊ |
25 | ␉return ret;␊ |
26 | }␊ |
27 | ␊ |
28 | static inline uint64_t rdmsr64(uint32_t msr)␊ |
29 | {␊ |
30 | uint64_t ret;␊ |
31 | __asm__ volatile("rdmsr" : "=A" (ret) : "c" (msr));␊ |
32 | return ret;␊ |
33 | }␊ |
34 | ␊ |
35 | static inline void do_cpuid(uint32_t selector, uint32_t *data)␊ |
36 | {␊ |
37 | ␉asm volatile ("cpuid"␊ |
38 | ␉ : "=a" (data[0]),␊ |
39 | ␉ "=b" (data[1]),␊ |
40 | ␉ "=c" (data[2]),␊ |
41 | ␉ "=d" (data[3])␊ |
42 | ␉ : "a" (selector));␊ |
43 | }␊ |
44 | ␊ |
45 | static inline void do_cpuid2(uint32_t selector, uint32_t selector2, uint32_t *data)␊ |
46 | {␊ |
47 | ␉asm volatile ("cpuid"␊ |
48 | ␉ : "=a" (data[0]),␊ |
49 | ␉ "=b" (data[1]),␊ |
50 | ␉ "=c" (data[2]),␊ |
51 | ␉ "=d" (data[3])␊ |
52 | ␉ : "a" (selector), "c" (selector2));␊ |
53 | }␊ |
54 | ␊ |
55 | // DFE: enable_PIT2 and disable_PIT2 come from older xnu␊ |
56 | ␊ |
57 | /*␊ |
58 | * Enable or disable timer 2.␊ |
59 | * Port 0x61 controls timer 2:␊ |
60 | * bit 0 gates the clock,␊ |
61 | * bit 1 gates output to speaker.␊ |
62 | */␊ |
63 | static inline void enable_PIT2(void)␊ |
64 | {␊ |
65 | /* Enable gate, disable speaker */␊ |
66 | __asm__ volatile(␊ |
67 | " inb $0x61,%%al \n\t"␊ |
68 | " and $0xFC,%%al \n\t" /* & ~0x03 */␊ |
69 | " or $1,%%al \n\t"␊ |
70 | " outb %%al,$0x61 \n\t"␊ |
71 | : : : "%al" );␊ |
72 | }␊ |
73 | ␊ |
74 | static inline void disable_PIT2(void)␊ |
75 | {␊ |
76 | /* Disable gate and output to speaker */␊ |
77 | __asm__ volatile(␊ |
78 | " inb $0x61,%%al \n\t"␊ |
79 | " and $0xFC,%%al \n\t"␉/* & ~0x03 */␊ |
80 | " outb %%al,$0x61 \n\t"␊ |
81 | : : : "%al" );␊ |
82 | }␊ |
83 | ␊ |
84 | // DFE: set_PIT2_mode0, poll_PIT2_gate, and measure_tsc_frequency are␊ |
85 | // roughly based on Linux code␊ |
86 | ␊ |
87 | /* Set the 8254 channel 2 to mode 0 with the specified value.␊ |
88 | In mode 0, the counter will initially set its gate low when the␊ |
89 | timer expires. For this to be useful, you ought to set it high␊ |
90 | before calling this function. The enable_PIT2 function does this.␊ |
91 | */␊ |
92 | static inline void set_PIT2_mode0(uint16_t value)␊ |
93 | {␊ |
94 | __asm__ volatile(␊ |
95 | " movb $0xB0,%%al \n\t"␊ |
96 | " outb␉%%al,$0x43␉\n\t"␊ |
97 | " movb␉%%dl,%%al␉\n\t"␊ |
98 | " outb␉%%al,$0x42␉\n\t"␊ |
99 | " movb␉%%dh,%%al␉\n\t"␊ |
100 | " outb␉%%al,$0x42"␊ |
101 | : : "d"(value) /*: no clobber */ );␊ |
102 | }␊ |
103 | ␊ |
104 | /* Returns the number of times the loop ran before the PIT2 signaled */␊ |
105 | static inline unsigned long poll_PIT2_gate(void)␊ |
106 | {␊ |
107 | unsigned long count = 0;␊ |
108 | unsigned char nmi_sc_val;␊ |
109 | do {␊ |
110 | ++count;␊ |
111 | __asm__ volatile(␊ |
112 | "inb␉$0x61,%0"␊ |
113 | : "=q"(nmi_sc_val) /*:*/ /* no input */ /*:*/ /* no clobber */);␊ |
114 | } while( (nmi_sc_val & 0x20) == 0);␊ |
115 | return count;␊ |
116 | }␊ |
117 | ␊ |
118 | /*␊ |
119 | * DFE: Measures the TSC frequency in Hz (64-bit) using the ACPI PM timer␊ |
120 | */␊ |
121 | static uint64_t measure_tsc_frequency(void)␊ |
122 | {␊ |
123 | uint64_t tscStart;␊ |
124 | uint64_t tscEnd;␊ |
125 | uint64_t tscDelta = 0xffffffffffffffffULL;␊ |
126 | unsigned long pollCount;␊ |
127 | uint64_t retval = 0;␊ |
128 | int i;␊ |
129 | ␊ |
130 | /* Time how many TSC ticks elapse in 30 msec using the 8254 PIT␊ |
131 | * counter 2. We run this loop 3 times to make sure the cache␊ |
132 | * is hot and we take the minimum delta from all of the runs.␊ |
133 | * That is to say that we're biased towards measuring the minimum␊ |
134 | * number of TSC ticks that occur while waiting for the timer to␊ |
135 | * expire. That theoretically helps avoid inconsistencies when␊ |
136 | * running under a VM if the TSC is not virtualized and the host␊ |
137 | * steals time. The TSC is normally virtualized for VMware.␊ |
138 | */␊ |
139 | for(i = 0; i < 10; ++i)␊ |
140 | {␊ |
141 | enable_PIT2();␊ |
142 | set_PIT2_mode0(CALIBRATE_LATCH);␊ |
143 | tscStart = rdtsc64();␊ |
144 | pollCount = poll_PIT2_gate();␊ |
145 | tscEnd = rdtsc64();␊ |
146 | /* The poll loop must have run at least a few times for accuracy */␊ |
147 | if(pollCount <= 1)␊ |
148 | continue;␊ |
149 | /* The TSC must increment at LEAST once every millisecond. We␊ |
150 | * should have waited exactly 30 msec so the TSC delta should␊ |
151 | * be >= 30. Anything less and the processor is way too slow.␊ |
152 | */␊ |
153 | if((tscEnd - tscStart) <= CALIBRATE_TIME_MSEC)␊ |
154 | continue;␊ |
155 | // tscDelta = min(tscDelta, (tscEnd - tscStart))␊ |
156 | if( (tscEnd - tscStart) < tscDelta )␊ |
157 | tscDelta = tscEnd - tscStart;␊ |
158 | }␊ |
159 | /* tscDelta is now the least number of TSC ticks the processor made in␊ |
160 | * a timespan of 0.03 s (e.g. 30 milliseconds)␊ |
161 | * Linux thus divides by 30 which gives the answer in kiloHertz because␊ |
162 | * 1 / ms = kHz. But we're xnu and most of the rest of the code uses␊ |
163 | * Hz so we need to convert our milliseconds to seconds. Since we're␊ |
164 | * dividing by the milliseconds, we simply multiply by 1000.␊ |
165 | */␊ |
166 | ␊ |
167 | /* Unlike linux, we're not limited to 32-bit, but we do need to take care␊ |
168 | * that we're going to multiply by 1000 first so we do need at least some␊ |
169 | * arithmetic headroom. For now, 32-bit should be enough.␊ |
170 | * Also unlike Linux, our compiler can do 64-bit integer arithmetic.␊ |
171 | */␊ |
172 | if(tscDelta > (1ULL<<32))␊ |
173 | retval = 0;␊ |
174 | else␊ |
175 | {␊ |
176 | retval = tscDelta * 1000 / 30;␊ |
177 | }␊ |
178 | disable_PIT2();␊ |
179 | return retval;␊ |
180 | }␊ |
181 | ␊ |
182 | /*␊ |
183 | * Calculates the FSB and CPU frequencies using specific MSRs for each CPU␊ |
184 | * - multi. is read from a specific MSR. In the case of Intel, there is:␊ |
185 | * a max multi. (used to calculate the FSB freq.),␊ |
186 | * and a current multi. (used to calculate the CPU freq.)␊ |
187 | * - fsbFrequency = tscFrequency / multi␊ |
188 | * - cpuFrequency = fsbFrequency * multi␊ |
189 | */␊ |
190 | ␊ |
191 | void scan_cpu(PlatformInfo_t *p)␊ |
192 | {␊ |
193 | ␉uint64_t␉tscFrequency, fsbFrequency, cpuFrequency;␊ |
194 | ␉uint64_t␉msr, flex_ratio;␊ |
195 | ␉uint8_t␉␉maxcoef, maxdiv, currcoef, currdiv;␊ |
196 | ␊ |
197 | ␉maxcoef = maxdiv = currcoef = currdiv = 0;␊ |
198 | ␊ |
199 | ␉/* get cpuid values */␊ |
200 | ␉do_cpuid(0x00000000, p->CPU.CPUID[CPUID_0]);␊ |
201 | ␉do_cpuid(0x00000001, p->CPU.CPUID[CPUID_1]);␊ |
202 | ␉do_cpuid(0x00000002, p->CPU.CPUID[CPUID_2]);␊ |
203 | ␉do_cpuid(0x00000003, p->CPU.CPUID[CPUID_3]);␊ |
204 | ␉do_cpuid2(0x00000004, 0, p->CPU.CPUID[CPUID_4]);␊ |
205 | ␉do_cpuid(0x80000000, p->CPU.CPUID[CPUID_80]);␊ |
206 | ␉if ((p->CPU.CPUID[CPUID_80][0] & 0x0000000f) >= 1) {␊ |
207 | ␉␉do_cpuid(0x80000001, p->CPU.CPUID[CPUID_81]);␊ |
208 | ␉}␊ |
209 | #if DEBUG_CPU␊ |
210 | ␉{␊ |
211 | ␉␉int␉␉i;␊ |
212 | ␉␉printf("CPUID Raw Values:\n");␊ |
213 | ␉␉for (i=0; i<CPUID_MAX; i++) {␊ |
214 | ␉␉␉printf("%02d: %08x-%08x-%08x-%08x\n", i,␊ |
215 | ␉␉␉␉p->CPU.CPUID[i][0], p->CPU.CPUID[i][1],␊ |
216 | ␉␉␉␉p->CPU.CPUID[i][2], p->CPU.CPUID[i][3]);␊ |
217 | ␉␉}␊ |
218 | ␉}␊ |
219 | #endif␊ |
220 | ␉p->CPU.Vendor␉␉= p->CPU.CPUID[CPUID_0][1];␊ |
221 | ␉p->CPU.Model␉␉= bitfield(p->CPU.CPUID[CPUID_1][0], 7, 4);␊ |
222 | ␉p->CPU.Family␉␉= bitfield(p->CPU.CPUID[CPUID_1][0], 11, 8);␊ |
223 | ␉p->CPU.ExtModel␉␉= bitfield(p->CPU.CPUID[CPUID_1][0], 19, 16);␊ |
224 | ␉p->CPU.ExtFamily␉= bitfield(p->CPU.CPUID[CPUID_1][0], 27, 20);␊ |
225 | ␉p->CPU.NoThreads␉= bitfield(p->CPU.CPUID[CPUID_1][1], 23, 16);␊ |
226 | ␉p->CPU.NoCores␉␉= bitfield(p->CPU.CPUID[CPUID_4][0], 31, 26) + 1;␊ |
227 | ␊ |
228 | ␉p->CPU.Model += (p->CPU.ExtModel << 4);␊ |
229 | ␊ |
230 | ␉/* setup features */␊ |
231 | ␉if ((bit(23) & p->CPU.CPUID[CPUID_1][3]) != 0) {␊ |
232 | ␉␉p->CPU.Features |= CPU_FEATURE_MMX;␊ |
233 | ␉}␊ |
234 | ␉if ((bit(25) & p->CPU.CPUID[CPUID_1][3]) != 0) {␊ |
235 | ␉␉p->CPU.Features |= CPU_FEATURE_SSE;␊ |
236 | ␉}␊ |
237 | ␉if ((bit(26) & p->CPU.CPUID[CPUID_1][3]) != 0) {␊ |
238 | ␉␉p->CPU.Features |= CPU_FEATURE_SSE2;␊ |
239 | ␉}␊ |
240 | ␉if ((bit(0) & p->CPU.CPUID[CPUID_1][2]) != 0) {␊ |
241 | ␉␉p->CPU.Features |= CPU_FEATURE_SSE3;␊ |
242 | ␉}␊ |
243 | ␉if ((bit(19) & p->CPU.CPUID[CPUID_1][2]) != 0) {␊ |
244 | ␉␉p->CPU.Features |= CPU_FEATURE_SSE41;␊ |
245 | ␉}␊ |
246 | ␉if ((bit(20) & p->CPU.CPUID[CPUID_1][2]) != 0) {␊ |
247 | ␉␉p->CPU.Features |= CPU_FEATURE_SSE42;␊ |
248 | ␉}␊ |
249 | ␉if ((bit(29) & p->CPU.CPUID[CPUID_81][3]) != 0) {␊ |
250 | ␉␉p->CPU.Features |= CPU_FEATURE_EM64T;␊ |
251 | ␉}␊ |
252 | ␉//if ((bit(28) & p->CPU.CPUID[CPUID_1][3]) != 0) {␊ |
253 | ␉if (p->CPU.NoThreads > p->CPU.NoCores) {␊ |
254 | ␉␉p->CPU.Features |= CPU_FEATURE_HTT;␊ |
255 | ␉}␊ |
256 | ␊ |
257 | ␉tscFrequency = measure_tsc_frequency();␊ |
258 | ␉fsbFrequency = 0;␊ |
259 | ␉cpuFrequency = 0;␊ |
260 | ␊ |
261 | ␉if ((p->CPU.Vendor == 0x756E6547 /* Intel */) && ((p->CPU.Family == 0x06) || (p->CPU.Family == 0x0f))) {␊ |
262 | ␉␉if ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0c) || (p->CPU.Family == 0x0f && p->CPU.Model >= 0x03)) {␊ |
263 | ␉␉␉/* Nehalem CPU model */␊ |
264 | ␉␉␉if (p->CPU.Family == 0x06 && (p->CPU.Model == 0x1a || p->CPU.Model == 0x1e)) {␊ |
265 | ␉␉␉␉msr = rdmsr64(MSR_PLATFORM_INFO);␊ |
266 | ␉␉␉␉DBG("msr(%d): platform_info %08x\n", __LINE__, msr & 0xffffffff);␊ |
267 | ␉␉␉␉currcoef = (msr >> 8) & 0xff;␊ |
268 | ␉␉␉␉msr = rdmsr64(MSR_FLEX_RATIO);␊ |
269 | ␉␉␉␉DBG("msr(%d): flex_ratio %08x\n", __LINE__, msr & 0xffffffff);␊ |
270 | ␉␉␉␉if ((msr >> 16) & 0x01) {␊ |
271 | ␉␉␉␉␉flex_ratio = (msr >> 8) & 0xff;␊ |
272 | ␉␉␉␉␉if (currcoef > flex_ratio) {␊ |
273 | ␉␉␉␉␉␉currcoef = flex_ratio;␊ |
274 | ␉␉␉␉␉}␊ |
275 | ␉␉␉␉}␊ |
276 | ␊ |
277 | ␉␉␉␉if (currcoef) {␊ |
278 | ␉␉␉␉␉fsbFrequency = (tscFrequency / currcoef);␊ |
279 | ␉␉␉␉}␊ |
280 | ␉␉␉␉cpuFrequency = tscFrequency;␊ |
281 | ␉␉␉} else {␊ |
282 | ␉␉␉␉msr = rdmsr64(IA32_PERF_STATUS);␊ |
283 | ␉␉␉␉DBG("msr(%d): ia32_perf_stat 0x%08x\n", __LINE__, msr & 0xffffffff);␊ |
284 | ␉␉␉␉currcoef = (msr >> 8) & 0x1f;␊ |
285 | ␉␉␉␉/* Non-integer bus ratio for the max-multi*/␊ |
286 | ␉␉␉␉maxdiv = (msr >> 46) & 0x01;␊ |
287 | ␉␉␉␉/* Non-integer bus ratio for the current-multi (undocumented)*/␊ |
288 | ␉␉␉␉currdiv = (msr >> 14) & 0x01;␊ |
289 | ␊ |
290 | ␉␉␉␉if ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0e) || (p->CPU.Family == 0x0f)) // This will always be model >= 3␊ |
291 | ␉␉␉␉{␊ |
292 | ␉␉␉␉␉/* On these models, maxcoef defines TSC freq */␊ |
293 | ␉␉␉␉␉maxcoef = (msr >> 40) & 0x1f;␊ |
294 | ␉␉␉␉} else {␊ |
295 | ␉␉␉␉␉/* On lower models, currcoef defines TSC freq */␊ |
296 | ␉␉␉␉␉/* XXX */␊ |
297 | ␉␉␉␉␉maxcoef = currcoef;␊ |
298 | ␉␉␉␉}␊ |
299 | ␊ |
300 | ␉␉␉␉if (maxcoef) {␊ |
301 | ␉␉␉␉␉if (maxdiv) {␊ |
302 | ␉␉␉␉␉␉fsbFrequency = ((tscFrequency * 2) / ((maxcoef * 2) + 1));␊ |
303 | ␉␉␉␉␉} else {␊ |
304 | ␉␉␉␉␉␉fsbFrequency = (tscFrequency / maxcoef);␊ |
305 | ␉␉␉␉␉}␊ |
306 | ␉␉␉␉␉if (currdiv) {␊ |
307 | ␉␉␉␉␉␉cpuFrequency = (fsbFrequency * ((currcoef * 2) + 1) / 2);␊ |
308 | ␉␉␉␉␉} else {␊ |
309 | ␉␉␉␉␉␉cpuFrequency = (fsbFrequency * currcoef);␊ |
310 | ␉␉␉␉␉}␊ |
311 | ␉␉␉␉␉DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");␊ |
312 | ␉␉␉␉}␊ |
313 | ␉␉␉}␊ |
314 | ␉␉}␊ |
315 | ␉␉/* Mobile CPU ? */␊ |
316 | ␉␉if (rdmsr64(0x17) & (1<<28)) {␊ |
317 | ␉␉␉p->CPU.Features |= CPU_FEATURE_MOBILE;␊ |
318 | ␉␉}␊ |
319 | ␉}␊ |
320 | #if 0␊ |
321 | ␉else if((p->CPU.Vendor == 0x68747541 /* AMD */) && (p->CPU.Family == 0x0f)) {␊ |
322 | ␉␉if(p->CPU.ExtFamily == 0x00 /* K8 */) {␊ |
323 | ␉␉␉msr = rdmsr64(K8_FIDVID_STATUS);␊ |
324 | ␉␉␉currcoef = (msr & 0x3f) / 2 + 4;␊ |
325 | ␉␉␉currdiv = (msr & 0x01) * 2;␊ |
326 | ␉␉} else if(p->CPU.ExtFamily >= 0x01 /* K10+ */) {␊ |
327 | ␉␉␉msr = rdmsr64(K10_COFVID_STATUS);␊ |
328 | ␉␉␉if(p->CPU.ExtFamily == 0x01 /* K10 */)␊ |
329 | ␉␉␉␉currcoef = (msr & 0x3f) + 0x10;␊ |
330 | ␉␉␉else /* K11+ */␊ |
331 | ␉␉␉␉currcoef = (msr & 0x3f) + 0x08;␊ |
332 | ␉␉␉currdiv = (2 << ((msr >> 6) & 0x07));␊ |
333 | ␉␉}␊ |
334 | ␊ |
335 | ␉␉if (currcoef) {␊ |
336 | ␉␉␉if (currdiv) {␊ |
337 | ␉␉␉␉fsbFrequency = ((tscFrequency * currdiv) / currcoef);␊ |
338 | ␉␉␉␉DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);␊ |
339 | ␉␉␉} else {␊ |
340 | ␉␉␉␉fsbFrequency = (tscFrequency / currcoef);␊ |
341 | ␉␉␉␉DBG("%d\n", currcoef);␊ |
342 | ␉␉␉}␊ |
343 | ␉␉␉fsbFrequency = (tscFrequency / currcoef);␊ |
344 | ␉␉␉cpuFrequency = tscFrequency;␊ |
345 | ␉␉}␊ |
346 | ␉}␊ |
347 | ␊ |
348 | ␉if (!fsbFrequency) {␊ |
349 | ␉␉fsbFrequency = (DEFAULT_FSB * 1000);␊ |
350 | ␉␉cpuFrequency = tscFrequency;␊ |
351 | ␉␉DBG("0 ! using the default value for FSB !\n");␊ |
352 | ␉}␊ |
353 | #endif␊ |
354 | ␊ |
355 | ␉p->CPU.MaxCoef = maxcoef;␊ |
356 | ␉p->CPU.MaxDiv = maxdiv;␊ |
357 | ␉p->CPU.CurrCoef = currcoef;␊ |
358 | ␉p->CPU.CurrDiv = currdiv;␊ |
359 | ␉p->CPU.TSCFrequency = tscFrequency;␊ |
360 | ␉p->CPU.FSBFrequency = fsbFrequency;␊ |
361 | ␉p->CPU.CPUFrequency = cpuFrequency;␊ |
362 | #if DEBUG_CPU␊ |
363 | ␉DBG("CPU: Vendor/Model/ExtModel: 0x%x/0x%x/0x%x\n", p->CPU.Vendor, p->CPU.Model, p->CPU.ExtModel);␊ |
364 | ␉DBG("CPU: Family/ExtFamily: 0x%x/0x%x\n", p->CPU.Family, p->CPU.ExtFamily);␊ |
365 | ␉DBG("CPU: MaxCoef/CurrCoef: 0x%x/0x%x\n", p->CPU.MaxCoef, p->CPU.CurrCoef);␊ |
366 | ␉DBG("CPU: MaxDiv/CurrDiv: 0x%x/0x%x\n", p->CPU.MaxDiv, p->CPU.CurrDiv);␊ |
367 | ␉DBG("CPU: TSCFreq: %dMHz\n", p->CPU.TSCFrequency / 1000000);␊ |
368 | ␉DBG("CPU: FSBFreq: %dMHz\n", p->CPU.FSBFrequency / 1000000);␊ |
369 | ␉DBG("CPU: CPUFreq: %dMHz\n", p->CPU.CPUFrequency / 1000000);␊ |
370 | ␉DBG("CPU: NoCores/NoThreads: %d/%d\n", p->CPU.NoCores, p->CPU.NoThreads);␊ |
371 | ␉DBG("CPU: Features: 0x%08x\n", p->CPU.Features);␊ |
372 | ␉printf("(Press a key to continue...)\n");␊ |
373 | ␉getc();␊ |
374 | #endif␊ |
375 | }␊ |
376 | |