1 | /*␊ |
2 | * Copyright 2008 Islam Ahmed Zaid. All rights reserved. <azismed@gmail.com>␊ |
3 | * AsereBLN: 2009: cleanup and bugfix␊ |
4 | */␊ |
5 | ␊ |
6 | #include "libsaio.h"␊ |
7 | #include "platform.h"␊ |
8 | #include "cpu.h"␊ |
9 | ␊ |
10 | #ifndef DEBUG_CPU␊ |
11 | #define DEBUG_CPU 0␊ |
12 | #endif␊ |
13 | ␊ |
14 | #if DEBUG_CPU␊ |
15 | #define DBG(x...)␉␉printf(x)␊ |
16 | #else␊ |
17 | #define DBG(x...)␉␉msglog(x)␊ |
18 | #endif␊ |
19 | ␊ |
20 | //#define AMD_SUPPORT ␊ |
21 | ␊ |
22 | #ifndef INTEL_SUPPORT␊ |
23 | #define INTEL_SUPPORT 0 //Default (0: nolegacy, 1 : legacy)␊ |
24 | #endif␊ |
25 | ␊ |
26 | #ifdef AMD_SUPPORT␊ |
27 | #ifdef LEGACY_CPU␊ |
28 | #undef LEGACY_CPU␊ |
29 | #endif␊ |
30 | #ifdef INTEL_SUPPORT␊ |
31 | #undef INTEL_SUPPORT␊ |
32 | #endif␊ |
33 | #define LEGACY_CPU 1␊ |
34 | #endif␊ |
35 | ␊ |
36 | #ifdef INTEL_SUPPORT ␊ |
37 | #ifdef LEGACY_CPU␊ |
38 | #undef LEGACY_CPU␊ |
39 | #endif␊ |
40 | #define LEGACY_CPU INTEL_SUPPORT␊ |
41 | #endif␊ |
42 | // (?) : if AMD_SUPPORT then (LEGACY_CPU = 1 && INTEL_SUPPORT = disabled)␊ |
43 | //␉␉ else LEGACY_CPU = INTEL_SUPPORT␊ |
44 | ␊ |
45 | ␊ |
46 | #if LEGACY_CPU␊ |
47 | ␊ |
48 | ␊ |
49 | // DFE: enable_PIT2 and disable_PIT2 come from older xnu␊ |
50 | ␊ |
51 | /*␊ |
52 | * Enable or disable timer 2.␊ |
53 | * Port 0x61 controls timer 2:␊ |
54 | * bit 0 gates the clock,␊ |
55 | * bit 1 gates output to speaker.␊ |
56 | */␊ |
57 | static inline void enable_PIT2(void)␊ |
58 | {␊ |
59 | /* Enable gate, disable speaker */␊ |
60 | __asm__ volatile(␊ |
61 | ␉␉␉␉␉ " inb $0x61,%%al \n\t"␊ |
62 | ␉␉␉␉␉ " and $0xFC,%%al \n\t" /* & ~0x03 */␊ |
63 | ␉␉␉␉␉ " or $1,%%al \n\t"␊ |
64 | ␉␉␉␉␉ " outb %%al,$0x61 \n\t"␊ |
65 | ␉␉␉␉␉ : : : "%al" );␊ |
66 | }␊ |
67 | ␊ |
68 | static inline void disable_PIT2(void)␊ |
69 | {␊ |
70 | /* Disable gate and output to speaker */␊ |
71 | __asm__ volatile(␊ |
72 | ␉␉␉␉␉ " inb $0x61,%%al \n\t"␊ |
73 | ␉␉␉␉␉ " and $0xFC,%%al \n\t"␉/* & ~0x03 */␊ |
74 | ␉␉␉␉␉ " outb %%al,$0x61 \n\t"␊ |
75 | ␉␉␉␉␉ : : : "%al" );␊ |
76 | }␊ |
77 | #if 0␊ |
78 | static uint64_t measure_tsc_frequency(void);␊ |
79 | // DFE: set_PIT2_mode0, poll_PIT2_gate, and measure_tsc_frequency are␊ |
80 | // roughly based on Linux code␊ |
81 | ␊ |
82 | /* Set the 8254 channel 2 to mode 0 with the specified value.␊ |
83 | In mode 0, the counter will initially set its gate low when the␊ |
84 | timer expires. For this to be useful, you ought to set it high␊ |
85 | before calling this function. The enable_PIT2 function does this.␊ |
86 | */␊ |
87 | static inline void set_PIT2_mode0(uint16_t value)␊ |
88 | {␊ |
89 | __asm__ volatile(␊ |
90 | ␉␉␉␉␉ " movb $0xB0,%%al \n\t"␊ |
91 | ␉␉␉␉␉ " outb␉%%al,$0x43␉\n\t"␊ |
92 | ␉␉␉␉␉ " movb␉%%dl,%%al␉\n\t"␊ |
93 | ␉␉␉␉␉ " outb␉%%al,$0x42␉\n\t"␊ |
94 | ␉␉␉␉␉ " movb␉%%dh,%%al␉\n\t"␊ |
95 | ␉␉␉␉␉ " outb␉%%al,$0x42"␊ |
96 | ␉␉␉␉␉ : : "d"(value) /*: no clobber */ );␊ |
97 | }␊ |
98 | ␊ |
99 | /* Returns the number of times the loop ran before the PIT2 signaled */␊ |
100 | static inline unsigned long poll_PIT2_gate(void)␊ |
101 | {␊ |
102 | unsigned long count = 0;␊ |
103 | unsigned char nmi_sc_val;␊ |
104 | do {␊ |
105 | ++count;␊ |
106 | __asm__ volatile(␊ |
107 | ␉␉␉␉␉␉ "inb␉$0x61,%0"␊ |
108 | ␉␉␉␉␉␉ : "=q"(nmi_sc_val) /*:*/ /* no input */ /*:*/ /* no clobber */);␊ |
109 | } while( (nmi_sc_val & 0x20) == 0);␊ |
110 | return count;␊ |
111 | }␊ |
112 | /*␊ |
113 | * DFE: Measures the TSC frequency in Hz (64-bit) using the ACPI PM timer␊ |
114 | */␊ |
115 | static uint64_t measure_tsc_frequency(void)␊ |
116 | {␊ |
117 | uint64_t tscStart;␊ |
118 | uint64_t tscEnd;␊ |
119 | uint64_t tscDelta = 0xffffffffffffffffULL;␊ |
120 | unsigned long pollCount;␊ |
121 | uint64_t retval = 0;␊ |
122 | int i;␊ |
123 | ␉␊ |
124 | /* Time how many TSC ticks elapse in 30 msec using the 8254 PIT␊ |
125 | * counter 2. We run this loop 3 times to make sure the cache␊ |
126 | * is hot and we take the minimum delta from all of the runs.␊ |
127 | * That is to say that we're biased towards measuring the minimum␊ |
128 | * number of TSC ticks that occur while waiting for the timer to␊ |
129 | * expire. That theoretically helps avoid inconsistencies when␊ |
130 | * running under a VM if the TSC is not virtualized and the host␊ |
131 | * steals time. The TSC is normally virtualized for VMware.␊ |
132 | */␊ |
133 | for(i = 0; i < 10; ++i)␊ |
134 | {␊ |
135 | enable_PIT2();␊ |
136 | set_PIT2_mode0(CALIBRATE_LATCH);␊ |
137 | tscStart = rdtsc64();␊ |
138 | pollCount = poll_PIT2_gate();␊ |
139 | tscEnd = rdtsc64();␊ |
140 | /* The poll loop must have run at least a few times for accuracy */␊ |
141 | if(pollCount <= 1)␊ |
142 | continue;␊ |
143 | /* The TSC must increment at LEAST once every millisecond. We␊ |
144 | * should have waited exactly 30 msec so the TSC delta should␊ |
145 | * be >= 30. Anything less and the processor is way too slow.␊ |
146 | */␊ |
147 | if((tscEnd - tscStart) <= CALIBRATE_TIME_MSEC)␊ |
148 | continue;␊ |
149 | // tscDelta = min(tscDelta, (tscEnd - tscStart))␊ |
150 | if( (tscEnd - tscStart) < tscDelta )␊ |
151 | tscDelta = tscEnd - tscStart;␊ |
152 | }␊ |
153 | /* tscDelta is now the least number of TSC ticks the processor made in␊ |
154 | * a timespan of 0.03 s (e.g. 30 milliseconds)␊ |
155 | * Linux thus divides by 30 which gives the answer in kiloHertz because␊ |
156 | * 1 / ms = kHz. But we're xnu and most of the rest of the code uses␊ |
157 | * Hz so we need to convert our milliseconds to seconds. Since we're␊ |
158 | * dividing by the milliseconds, we simply multiply by 1000.␊ |
159 | */␊ |
160 | ␉␊ |
161 | /* Unlike linux, we're not limited to 32-bit, but we do need to take care␊ |
162 | * that we're going to multiply by 1000 first so we do need at least some␊ |
163 | * arithmetic headroom. For now, 32-bit should be enough.␊ |
164 | * Also unlike Linux, our compiler can do 64-bit integer arithmetic.␊ |
165 | */␊ |
166 | if(tscDelta > (1ULL<<32))␊ |
167 | retval = 0;␊ |
168 | else␊ |
169 | {␊ |
170 | retval = tscDelta * 1000 / 30;␊ |
171 | }␊ |
172 | disable_PIT2();␊ |
173 | return retval;␊ |
174 | }␊ |
175 | #endif␊ |
176 | ␊ |
177 | static inline static void␊ |
178 | set_PIT2(int value)␊ |
179 | {␊ |
180 | /*␊ |
181 | * First, tell the clock we are going to write 16 bits to the counter␊ |
182 | * and enable one-shot mode (command 0xB8 to port 0x43)␊ |
183 | * Then write the two bytes into the PIT2 clock register (port 0x42).␊ |
184 | * Loop until the value is "realized" in the clock,␊ |
185 | * this happens on the next tick.␊ |
186 | */␊ |
187 | asm volatile(␊ |
188 | " movb $0xB8,%%al \n\t"␊ |
189 | " outb %%al,$0x43 \n\t"␊ |
190 | " movb %%dl,%%al \n\t"␊ |
191 | " outb %%al,$0x42 \n\t"␊ |
192 | " movb %%dh,%%al \n\t"␊ |
193 | " outb %%al,$0x42 \n"␊ |
194 | "1: inb $0x42,%%al \n\t" ␊ |
195 | " inb $0x42,%%al \n\t"␊ |
196 | " cmp %%al,%%dh \n\t"␊ |
197 | " jne 1b"␊ |
198 | : : "d"(value) : "%al");␊ |
199 | }␊ |
200 | ␊ |
201 | ␊ |
202 | static inline static uint64_t␊ |
203 | get_PIT2(unsigned int *value)␊ |
204 | {␊ |
205 | register uint64_t result;␊ |
206 | /*␊ |
207 | * This routine first latches the time (command 0x80 to port 0x43),␊ |
208 | * then gets the time stamp so we know how long the read will take later.␊ |
209 | * Read (from port 0x42) and return the current value of the timer.␊ |
210 | */␊ |
211 | #ifdef __i386__␊ |
212 | asm volatile(␊ |
213 | " xorl %%ecx,%%ecx \n\t"␊ |
214 | " movb $0x80,%%al \n\t"␊ |
215 | " outb %%al,$0x43 \n\t"␊ |
216 | " rdtsc \n\t"␊ |
217 | " pushl %%eax \n\t"␊ |
218 | " inb $0x42,%%al \n\t"␊ |
219 | " movb %%al,%%cl \n\t"␊ |
220 | " inb $0x42,%%al \n\t"␊ |
221 | " movb %%al,%%ch \n\t"␊ |
222 | " popl %%eax "␊ |
223 | : "=A"(result), "=c"(*value));␊ |
224 | #else /* __x86_64__ */␊ |
225 | asm volatile(␊ |
226 | " xorq %%rcx,%%rcx \n\t"␊ |
227 | " movb $0x80,%%al \n\t"␊ |
228 | " outb %%al,$0x43 \n\t"␊ |
229 | " rdtsc \n\t"␊ |
230 | " pushq %%rax \n\t"␊ |
231 | " inb $0x42,%%al \n\t"␊ |
232 | " movb %%al,%%cl \n\t"␊ |
233 | " inb $0x42,%%al \n\t"␊ |
234 | " movb %%al,%%ch \n\t"␊ |
235 | " popq %%rax "␊ |
236 | : "=A"(result), "=c"(*value));␊ |
237 | #endif␊ |
238 | ␊ |
239 | return result;␊ |
240 | }␊ |
241 | ␊ |
242 | /*␊ |
243 | * timeRDTSC()␊ |
244 | * This routine sets up PIT counter 2 to count down 1/20 of a second.␊ |
245 | * It pauses until the value is latched in the counter␊ |
246 | * and then reads the time stamp counter to return to the caller.␊ |
247 | */␊ |
248 | static uint64_t timeRDTSC(void)␊ |
249 | {␊ |
250 | int␉␉attempts = 0;␊ |
251 | uint64_t latchTime;␊ |
252 | uint64_t␉saveTime,intermediate;␊ |
253 | unsigned int timerValue, lastValue;␊ |
254 | //boolean_t␉int_enabled;␊ |
255 | /*␊ |
256 | * Table of correction factors to account for␊ |
257 | *␉ - timer counter quantization errors, and␊ |
258 | *␉ - undercounts 0..5␊ |
259 | */␊ |
260 | #define SAMPLE_CLKS_EXACT␉(((double) CLKNUM) / 20.0)␊ |
261 | #define SAMPLE_CLKS_INT␉␉((int) CLKNUM / 20)␊ |
262 | #define SAMPLE_NSECS␉␉(2000000000LL)␊ |
263 | #define SAMPLE_MULTIPLIER␉(((double)SAMPLE_NSECS)*SAMPLE_CLKS_EXACT)␊ |
264 | #define ROUND64(x)␉␉((uint64_t)((x) + 0.5))␊ |
265 | uint64_t␉scale[6] = {␊ |
266 | ␉␉ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-0)), ␊ |
267 | ␉␉ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-1)), ␊ |
268 | ␉␉ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-2)), ␊ |
269 | ␉␉ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-3)), ␊ |
270 | ␉␉ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-4)), ␊ |
271 | ␉␉ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-5))␊ |
272 | };␊ |
273 | ␊ |
274 | restart:␊ |
275 | if (attempts >= 9) // increase to up to 9 attempts.␊ |
276 | // This will flash-reboot. TODO: Use tscPanic instead.␊ |
277 | printf("Timestamp counter calibation failed with %d attempts\n", attempts);␊ |
278 | attempts++;␊ |
279 | enable_PIT2();␉␉// turn on PIT2␊ |
280 | set_PIT2(0);␉␉// reset timer 2 to be zero␊ |
281 | latchTime = rdtsc64();␉// get the time stamp to time ␊ |
282 | latchTime = get_PIT2(&timerValue) - latchTime; // time how long this takes␊ |
283 | set_PIT2(SAMPLE_CLKS_INT);␉// set up the timer for (almost) 1/20th a second␊ |
284 | saveTime = rdtsc64();␉// now time how long a 20th a second is...␊ |
285 | get_PIT2(&lastValue);␊ |
286 | get_PIT2(&lastValue);␉// read twice, first value may be unreliable␊ |
287 | do {␊ |
288 | ␉␉intermediate = get_PIT2(&timerValue);␊ |
289 | ␉␉if (timerValue > lastValue) {␊ |
290 | ␉␉␉// Timer wrapped␊ |
291 | ␉␉␉set_PIT2(0);␊ |
292 | ␉␉␉disable_PIT2();␊ |
293 | ␉␉␉goto restart;␊ |
294 | ␉␉}␊ |
295 | ␉␉lastValue = timerValue;␊ |
296 | } while (timerValue > 5);␊ |
297 | printf("timerValue␉ %d\n",timerValue);␊ |
298 | printf("intermediate 0x%016llx\n",intermediate);␊ |
299 | printf("saveTime␉ 0x%016llx\n",saveTime);␊ |
300 | ␊ |
301 | intermediate -= saveTime;␉␉// raw count for about 1/20 second␊ |
302 | intermediate *= scale[timerValue];␉// rescale measured time spent␊ |
303 | intermediate /= SAMPLE_NSECS;␉// so its exactly 1/20 a second␊ |
304 | intermediate += latchTime;␉␉// add on our save fudge␊ |
305 | ␊ |
306 | set_PIT2(0);␉␉␉// reset timer 2 to be zero␊ |
307 | disable_PIT2();␉␉␉// turn off PIT 2␊ |
308 | ␉␊ |
309 | return intermediate;␊ |
310 | }␊ |
311 | ␊ |
312 | #ifdef AMD_SUPPORT␊ |
313 | #define MSR_AMD_APERF 0x000000E8␊ |
314 | /*␊ |
315 | * Original comment/code:␊ |
316 | * "DFE: Measures the Max Performance Frequency in Hz (64-bit)"␊ |
317 | *␊ |
318 | * Measures the Actual Performance Frequency in Hz (64-bit)␊ |
319 | * (just a naming change, mperf --> aperf )␊ |
320 | */␊ |
321 | static uint64_t measure_aperf_frequency(void)␊ |
322 | {␊ |
323 | ␉uint64_t aperfStart;␊ |
324 | ␉uint64_t aperfEnd;␊ |
325 | ␉uint64_t aperfDelta = 0xffffffffffffffffULL;␊ |
326 | ␉unsigned long pollCount;␊ |
327 | ␉uint64_t retval = 0;␊ |
328 | ␉int i;␊ |
329 | ␉␊ |
330 | ␉/* Time how many APERF ticks elapse in 30 msec using the 8254 PIT␊ |
331 | ␉ * counter 2. We run this loop 3 times to make sure the cache␊ |
332 | ␉ * is hot and we take the minimum delta from all of the runs.␊ |
333 | ␉ * That is to say that we're biased towards measuring the minimum␊ |
334 | ␉ * number of APERF ticks that occur while waiting for the timer to␊ |
335 | ␉ * expire.␊ |
336 | ␉ */␊ |
337 | ␉for(i = 0; i < 10; ++i)␊ |
338 | ␉{␊ |
339 | ␉␉enable_PIT2();␊ |
340 | ␉␉set_PIT2_mode0(CALIBRATE_LATCH);␊ |
341 | ␉␉aperfStart = rdmsr64(MSR_AMD_APERF);␊ |
342 | ␉␉pollCount = poll_PIT2_gate();␊ |
343 | ␉␉aperfEnd = rdmsr64(MSR_AMD_APERF);␊ |
344 | ␉␉/* The poll loop must have run at least a few times for accuracy */␊ |
345 | ␉␉if (pollCount <= 1)␊ |
346 | ␉␉␉continue;␊ |
347 | ␉␉/* The TSC must increment at LEAST once every millisecond.␊ |
348 | ␉␉ * We should have waited exactly 30 msec so the APERF delta should␊ |
349 | ␉␉ * be >= 30. Anything less and the processor is way too slow.␊ |
350 | ␉␉ */␊ |
351 | ␉␉if ((aperfEnd - aperfStart) <= CALIBRATE_TIME_MSEC)␊ |
352 | ␉␉␉continue;␊ |
353 | ␉␉// tscDelta = MIN(tscDelta, (tscEnd - tscStart))␊ |
354 | ␉␉if ( (aperfEnd - aperfStart) < aperfDelta )␊ |
355 | ␉␉␉aperfDelta = aperfEnd - aperfStart;␊ |
356 | ␉}␊ |
357 | ␉/* mperfDelta is now the least number of MPERF ticks the processor made in␊ |
358 | ␉ * a timespan of 0.03 s (e.g. 30 milliseconds)␊ |
359 | ␉ */␊ |
360 | ␉␊ |
361 | ␉if (aperfDelta > (1ULL<<32))␊ |
362 | ␉␉retval = 0;␊ |
363 | ␉else␊ |
364 | ␉{␊ |
365 | ␉␉retval = aperfDelta * 1000 / 30;␊ |
366 | ␉}␊ |
367 | ␉disable_PIT2();␊ |
368 | ␉return retval;␊ |
369 | }␊ |
370 | #endif␊ |
371 | ␊ |
372 | #endif␊ |
373 | ␊ |
374 | /*␊ |
375 | License for x2apic_enabled, get_apicbase, compute_bclk.␊ |
376 | ␊ |
377 | Copyright (c) 2010, Intel Corporation␊ |
378 | All rights reserved.␊ |
379 | ␊ |
380 | Redistribution and use in source and binary forms, with or without␊ |
381 | modification, are permitted provided that the following conditions are met:␊ |
382 | ␊ |
383 | * Redistributions of source code must retain the above copyright notice,␊ |
384 | this list of conditions and the following disclaimer.␊ |
385 | * Redistributions in binary form must reproduce the above copyright notice,␊ |
386 | this list of conditions and the following disclaimer in the documentation␊ |
387 | and/or other materials provided with the distribution.␊ |
388 | * Neither the name of Intel Corporation nor the names of its contributors␊ |
389 | may be used to endorse or promote products derived from this software␊ |
390 | without specific prior written permission.␊ |
391 | ␊ |
392 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND␊ |
393 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED␊ |
394 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE␊ |
395 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR␊ |
396 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES␊ |
397 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;␊ |
398 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON␊ |
399 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT␊ |
400 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS␊ |
401 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.␊ |
402 | */␊ |
403 | static inline __attribute__((always_inline)) void rdmsr32(uint32_t msr, uint32_t * lo_data_addr, uint32_t * hi_data_addr);␊ |
404 | static inline __attribute__((always_inline)) void wrmsr32(uint32_t msr, uint32_t lo_data, uint32_t hi_data);␊ |
405 | static uint32_t x2apic_enabled(void);␊ |
406 | static uint32_t get_apicbase(void);␊ |
407 | static uint32_t compute_bclk(void);␊ |
408 | static inline __attribute__((always_inline)) void rdmsr32(uint32_t msr, uint32_t * lo_data_addr, uint32_t * hi_data_addr)␊ |
409 | { ␊ |
410 | __asm__ volatile(␊ |
411 | ␉␉␉␉␉ "rdmsr"␊ |
412 | ␉␉␉␉␉ : "=a" (*lo_data_addr), "=d" (*hi_data_addr)␊ |
413 | ␉␉␉␉␉ : "c" (msr)␊ |
414 | ␉␉␉␉␉ ); ␊ |
415 | }␉␊ |
416 | static inline __attribute__((always_inline)) void wrmsr32(uint32_t msr, uint32_t lo_data, uint32_t hi_data)␊ |
417 | {␊ |
418 | __asm__ __volatile__ (␊ |
419 | ␉␉␉␉␉␉ "wrmsr"␊ |
420 | ␉␉␉␉␉␉ : /* No outputs */␊ |
421 | ␉␉␉␉␉␉ : "c" (msr), "a" (lo_data), "d" (hi_data)␊ |
422 | ␉␉␉␉␉␉ );␊ |
423 | }␊ |
424 | #define MSR_APIC_BASE 0x1B␊ |
425 | #define APIC_TMR_INITIAL_CNT 0x380␊ |
426 | #define APIC_TMR_CURRENT_CNT 0x390␊ |
427 | #define APIC_TMR_DIVIDE_CFG 0x3E0␊ |
428 | #define MSR_APIC_TMR_INITIAL_CNT 0x838␊ |
429 | #define MSR_APIC_TMR_CURRENT_CNT 0x839␊ |
430 | #define MSR_APIC_TMR_DIVIDE_CFG 0x83E␊ |
431 | static uint32_t x2apic_enabled(void)␊ |
432 | {␊ |
433 | uint64_t temp64;␊ |
434 | ␉␊ |
435 | temp64 = rdmsr64(MSR_APIC_BASE);␊ |
436 | ␉␊ |
437 | return (uint32_t) (temp64 & (1 << 10)) ? 1 : 0;␊ |
438 | }␊ |
439 | static uint32_t get_apicbase(void)␊ |
440 | {␊ |
441 | uint64_t temp64;␊ |
442 | ␉␊ |
443 | temp64 = rdmsr64(MSR_APIC_BASE);␊ |
444 | ␉␊ |
445 | return (uint32_t) (temp64 & 0xfffff000);␊ |
446 | }␊ |
447 | static uint32_t compute_bclk(void)␊ |
448 | {␊ |
449 | uint32_t dummy;␊ |
450 | uint32_t start, stop;␊ |
451 | uint8_t temp8;␊ |
452 | uint16_t delay_count;␊ |
453 | uint32_t bclk;␊ |
454 | ␉␊ |
455 | #define DELAY_IN_US 1000␊ |
456 | ␉␊ |
457 | // Compute fixed delay as time␊ |
458 | // delay count = desired time * PIT frequency␊ |
459 | // PIT frequency = 1.193182 MHz␊ |
460 | delay_count = 1193182 / DELAY_IN_US;␊ |
461 | ␉␊ |
462 | // PIT channel 2 gate is controlled by IO port 0x61, bit 0␊ |
463 | #define PIT_CH2_LATCH_REG 0x61␊ |
464 | #define CH2_SPEAKER (1 << 1) // bit 1 -- 1 = speaker enabled 0 = speaker disabled␊ |
465 | #define CH2_GATE_IN (1 << 0) // bit 0 -- 1 = gate enabled, 0 = gate disabled␊ |
466 | #define CH2_GATE_OUT (1 << 5) // bit 5 -- 1 = gate latched, 0 = gate not latched␊ |
467 | ␉␊ |
468 | // PIT Command register␊ |
469 | #define PIT_MODE_COMMAND_REG 0x43␊ |
470 | #define SELECT_CH2 (2 << 6)␊ |
471 | #define ACCESS_MODE_LOBYTE_HIBYTE (3 << 4)␊ |
472 | #define MODE0_INTERRUPT_ON_TERMINAL_COUNT 0 // Despite name, no interrupts on CH2␊ |
473 | ␉␊ |
474 | // PIT Channel 2 data port␊ |
475 | #define PIT_CH2_DATA 0x42␊ |
476 | ␉␊ |
477 | // Disable the PIT channel 2 speaker and gate␊ |
478 | temp8 = inb(PIT_CH2_LATCH_REG);␊ |
479 | temp8 &= ~(CH2_SPEAKER | CH2_GATE_IN);␊ |
480 | outb(PIT_CH2_LATCH_REG, temp8);␊ |
481 | ␉␊ |
482 | // Setup command and mode␊ |
483 | outb(PIT_MODE_COMMAND_REG, SELECT_CH2 | ACCESS_MODE_LOBYTE_HIBYTE | MODE0_INTERRUPT_ON_TERMINAL_COUNT);␊ |
484 | ␉␊ |
485 | // Set time for fixed delay␊ |
486 | outb(PIT_CH2_DATA, (uint8_t) (delay_count));␊ |
487 | outb(PIT_CH2_DATA, (uint8_t) (delay_count >> 8));␊ |
488 | ␉␊ |
489 | // Prepare to enable channel 2 gate but leave the speaker disabled␊ |
490 | temp8 = inb(PIT_CH2_LATCH_REG);␊ |
491 | temp8 &= ~CH2_SPEAKER;␊ |
492 | temp8 |= CH2_GATE_IN;␊ |
493 | ␉␊ |
494 | if (x2apic_enabled())␊ |
495 | ␉{␊ |
496 | // Set APIC Timer Divide Value as 2␊ |
497 | wrmsr32(MSR_APIC_TMR_DIVIDE_CFG, 0, 0);␊ |
498 | ␉␉␊ |
499 | // start APIC timer with a known value␊ |
500 | start = ~0UL;␊ |
501 | wrmsr32(MSR_APIC_TMR_INITIAL_CNT, start, 0);␊ |
502 | }␊ |
503 | else␊ |
504 | ␉{␊ |
505 | // Set APIC Timer Divide Value as 2␊ |
506 | *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_DIVIDE_CFG) = 0UL;␊ |
507 | ␉␉␊ |
508 | // start APIC timer with a known value␊ |
509 | start = ~0UL;␊ |
510 | *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_INITIAL_CNT) = start;␊ |
511 | }␊ |
512 | ␉␊ |
513 | // Actually start the PIT channel 2␊ |
514 | outb(PIT_CH2_LATCH_REG, temp8);␊ |
515 | ␉␊ |
516 | // Wait for the fixed delay␊ |
517 | while (!(inb(PIT_CH2_LATCH_REG) & CH2_GATE_OUT));␊ |
518 | ␉␊ |
519 | if (x2apic_enabled())␊ |
520 | ␉{␊ |
521 | // read the APIC timer to determine the change that occurred over this fixed delay␊ |
522 | rdmsr32(MSR_APIC_TMR_CURRENT_CNT, &stop, &dummy);␊ |
523 | ␉␉␊ |
524 | // stop APIC timer␊ |
525 | wrmsr32(MSR_APIC_TMR_INITIAL_CNT, 0, 0);␊ |
526 | ␉␉␊ |
527 | }␊ |
528 | else␊ |
529 | ␉{␊ |
530 | // read the APIC timer to determine the change that occurred over this fixed delay␊ |
531 | stop = *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_CURRENT_CNT);␊ |
532 | ␉␉␊ |
533 | // stop APIC timer␊ |
534 | *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_INITIAL_CNT) = 0UL;␊ |
535 | }␊ |
536 | ␉␊ |
537 | // Disable channel 2 speaker and gate input␊ |
538 | temp8 = inb(PIT_CH2_LATCH_REG);␊ |
539 | temp8 &= ~(CH2_SPEAKER | CH2_GATE_IN);␊ |
540 | outb(PIT_CH2_LATCH_REG, temp8);␊ |
541 | ␉␊ |
542 | bclk = (start - stop) * 2 / DELAY_IN_US;␊ |
543 | ␉␊ |
544 | // Round bclk to the nearest 100/12 integer value␊ |
545 | bclk = ((((bclk * 24) + 100) / 200) * 200) / 24;␊ |
546 | ␉␊ |
547 | return bclk;␊ |
548 | }␊ |
549 | ␊ |
550 | ␊ |
551 | /*␊ |
552 | * Calculates the FSB and CPU frequencies using specific MSRs for each CPU␊ |
553 | * - multi. is read from a specific MSR. In the case of Intel, there is:␊ |
554 | * a max multi. (used to calculate the FSB freq.),␊ |
555 | * and a current multi. (used to calculate the CPU freq.)␊ |
556 | * - fsbFrequency = tscFrequency / multi␊ |
557 | * - cpuFrequency = fsbFrequency * multi␊ |
558 | */␊ |
559 | ␊ |
560 | void scan_cpu(void)␊ |
561 | {␉␊ |
562 | ␉uint64_t␉msr = 0; ␊ |
563 | ␊ |
564 | ␊ |
565 | uint64_t␉Features = 0;␉␉// CPU Features like MMX, SSE2, VT ...␊ |
566 | ␉uint64_t␉ExtFeatures = 0; // CPU Extended Features like SYSCALL, XD, EM64T, LAHF ...␊ |
567 | uint64_t␉TSCFreq = 0 ;␊ |
568 | uint64_t FSBFreq = 0 ; ␊ |
569 | uint64_t CPUFreq = 0;␊ |
570 | ␊ |
571 | uint32_t␉reg[4];␊ |
572 | uint32_t cores_per_package = 0;␊ |
573 | uint32_t logical_per_package = 0;␊ |
574 | ␊ |
575 | uint32_t␉Vendor = 0;␉␉␉// Vendor␊ |
576 | ␉uint32_t␉Signature = 0;␉␉// Signature␊ |
577 | ␉uint8_t Stepping = 0;␉␉// Stepping␊ |
578 | ␉uint8_t Model = 0;␉␉␉// Model␊ |
579 | ␉uint8_t ExtModel = 0;␉␉// Extended Model␊ |
580 | ␉uint8_t Family = 0;␉␉␉// Family␊ |
581 | ␉uint8_t ExtFamily = 0;␉␉// Extended Family␊ |
582 | ␉uint32_t␉NoCores = 0;␉␉// No Cores per Package␊ |
583 | ␉uint32_t␉NoThreads = 0;␉␉// Threads per Package␊ |
584 | ␉uint8_t Brand = 0; ␊ |
585 | ␉uint32_t␉MicrocodeVersion = 0; // The microcode version number a.k.a. signature a.k.a. BIOS ID ␊ |
586 | ␊ |
587 | ␉uint8_t isMobile = 0; ␊ |
588 | ␉␊ |
589 | ␉boolean_t␉dynamic_acceleration = 0;␊ |
590 | ␉boolean_t␉invariant_APIC_timer = 0;␊ |
591 | ␉boolean_t␉fine_grain_clock_mod = 0;␊ |
592 | ␉␊ |
593 | ␉uint32_t cpuid_max_basic = 0;␊ |
594 | ␉uint32_t cpuid_max_ext = 0;␊ |
595 | ␉uint32_t␉sub_Cstates = 0;␊ |
596 | ␉uint32_t extensions = 0; ␊ |
597 | ␊ |
598 | ␉uint8_t␉␉maxcoef = 0, maxdiv = 0, currcoef = 0, currdiv = 0;␊ |
599 | char␉␉CpuBrandString[48];␉// 48 Byte Branding String␊ |
600 | ␊ |
601 | ␉␊ |
602 | ␉do_cpuid(0, reg);␊ |
603 | ␉Vendor = reg[ebx];␊ |
604 | ␉cpuid_max_basic = reg[eax];␊ |
605 | ␊ |
606 | #ifndef AMD_SUPPORT␊ |
607 | do_cpuid2(0x00000004, 0, reg);␊ |
608 | cores_per_package␉␉= bitfield(reg[eax], 31, 26) + 1;␊ |
609 | #endif␊ |
610 | ␉␊ |
611 | /* get extended cpuid results */␊ |
612 | ␉do_cpuid(0x80000000, reg);␊ |
613 | ␉cpuid_max_ext = reg[eax];␊ |
614 | ␊ |
615 | ␉/* Begin of Copyright: from Apple's XNU cpuid.c */␊ |
616 | ␉␊ |
617 | ␉/* get brand string (if supported) */␊ |
618 | ␉if (cpuid_max_ext > 0x80000004)␊ |
619 | ␉{␉␉␊ |
620 | char str[128], *s;␊ |
621 | ␉␉/*␊ |
622 | ␉␉ * The brand string 48 bytes (max), guaranteed to␊ |
623 | ␉␉ * be NUL terminated.␊ |
624 | ␉␉ */␊ |
625 | ␉␉do_cpuid(0x80000002, reg);␊ |
626 | ␉␉bcopy((char *)reg, &str[0], 16);␊ |
627 | ␉␉do_cpuid(0x80000003, reg);␊ |
628 | ␉␉bcopy((char *)reg, &str[16], 16);␊ |
629 | ␉␉do_cpuid(0x80000004, reg);␊ |
630 | ␉␉bcopy((char *)reg, &str[32], 16);␊ |
631 | ␉␉for (s = str; *s != '\0'; s++)␊ |
632 | ␉␉{␊ |
633 | ␉␉␉if (*s != ' ') break;␊ |
634 | ␉␉}␊ |
635 | ␉␉␊ |
636 | ␉␉strlcpy(CpuBrandString,␉s, sizeof(CpuBrandString));␊ |
637 | ␉␉␊ |
638 | ␉␉if (!strncmp(CpuBrandString, CPUID_STRING_UNKNOWN, min(sizeof(CpuBrandString), (unsigned)strlen(CPUID_STRING_UNKNOWN) + 1)))␊ |
639 | ␉␉{␊ |
640 | /*␊ |
641 | * This string means we have a firmware-programmable brand string,␊ |
642 | * and the firmware couldn't figure out what sort of CPU we have.␊ |
643 | */␊ |
644 | CpuBrandString[0] = '\0';␊ |
645 | }␊ |
646 | ␉} ␊ |
647 | ␉␊ |
648 | /*␊ |
649 | ␉ * Get processor signature and decode␊ |
650 | ␉ * and bracket this with the approved procedure for reading the␊ |
651 | ␉ * the microcode version number a.k.a. signature a.k.a. BIOS ID␊ |
652 | ␉ */␊ |
653 | #ifndef AMD_SUPPORT␊ |
654 | ␉wrmsr64(MSR_IA32_BIOS_SIGN_ID, 0);␊ |
655 | ␉do_cpuid(1, reg);␊ |
656 | ␉MicrocodeVersion = (uint32_t) (rdmsr64(MSR_IA32_BIOS_SIGN_ID) >> 32); ␊ |
657 | #else␊ |
658 | ␉do_cpuid(1, reg);␊ |
659 | #endif␉␊ |
660 | ␉Signature = reg[eax];␊ |
661 | ␉Stepping = bitfield(reg[eax], 3, 0);␊ |
662 | ␉Model = bitfield(reg[eax], 7, 4);␊ |
663 | ␉Family = bitfield(reg[eax], 11, 8);␊ |
664 | ␉ExtModel = bitfield(reg[eax], 19, 16);␊ |
665 | ␉ExtFamily = bitfield(reg[eax], 27, 20);␊ |
666 | ␉Brand = bitfield(reg[ebx], 7, 0);␊ |
667 | ␉Features = quad(reg[ecx], reg[edx]);␊ |
668 | ␊ |
669 | /* Fold extensions into family/model */␊ |
670 | ␉if (Family == 0x0f)␊ |
671 | ␉␉Family += ExtFamily;␊ |
672 | ␉if (Family == 0x0f || Family == 0x06)␊ |
673 | ␉␉Model += (ExtModel << 4);␊ |
674 | ␊ |
675 | if (Features & CPUID_FEATURE_HTT)␊ |
676 | ␉␉logical_per_package =␊ |
677 | bitfield(reg[ebx], 23, 16);␊ |
678 | ␉else␊ |
679 | ␉␉logical_per_package = 1;␉ ␊ |
680 | ␉␊ |
681 | ␉␊ |
682 | ␉if (cpuid_max_ext >= 0x80000001)␊ |
683 | ␉{␊ |
684 | ␉␉do_cpuid(0x80000001, reg);␊ |
685 | ␉␉ExtFeatures =␊ |
686 | quad(reg[ecx], reg[edx]);␊ |
687 | ␉␉␊ |
688 | ␉}␊ |
689 | ␉␊ |
690 | ␉if (cpuid_max_ext >= 0x80000007)␊ |
691 | ␉{␊ |
692 | ␉␉do_cpuid(0x80000007, reg); ␊ |
693 | ␉␉␊ |
694 | ␉␉/* Fold in the Invariant TSC feature bit, if present */␊ |
695 | ␉␉ExtFeatures |=␊ |
696 | reg[edx] & (uint32_t)CPUID_EXTFEATURE_TSCI;␊ |
697 | ␉␉␊ |
698 | #ifdef AMD_SUPPORT␊ |
699 | ␉␉/* Fold in the Hardware P-State control feature bit, if present */␊ |
700 | ␉␉ExtFeatures |=␊ |
701 | reg[edx] & (uint32_t)_Bit(7);␊ |
702 | ␉␉␊ |
703 | ␉␉/* Fold in the read-only effective frequency interface feature bit, if present */␊ |
704 | ␉␉ExtFeatures |=␊ |
705 | reg[edx] & (uint32_t)_Bit(10);␊ |
706 | #endif␊ |
707 | ␉} ␊ |
708 | ␉␊ |
709 | #ifdef AMD_SUPPORT␊ |
710 | ␉if (cpuid_max_ext >= 0x80000008)␊ |
711 | ␉{␊ |
712 | ␉␉if (Features & CPUID_FEATURE_HTT) ␊ |
713 | ␉␉{␊ |
714 | ␉␉␉do_cpuid(0x80000008, reg);␊ |
715 | ␉␉␉cores_per_package␉␉= bitfield(reg[ecx], 7 , 0) + 1; // NC + 1␊ |
716 | ␉␉}␊ |
717 | ␉}␉␉␊ |
718 | #endif␊ |
719 | ␉␊ |
720 | if (cpuid_max_basic >= 0x5) { ␊ |
721 | ␉␉/*␊ |
722 | ␉␉ * Extract the Monitor/Mwait Leaf info:␊ |
723 | ␉␉ */␊ |
724 | ␉␉do_cpuid(5, reg);␊ |
725 | #ifndef AMD_SUPPORT␊ |
726 | sub_Cstates = reg[edx];␊ |
727 | #endif␊ |
728 | extensions = reg[ecx];␉␊ |
729 | ␉}␊ |
730 | ␉␊ |
731 | #ifndef AMD_SUPPORT ␊ |
732 | if (cpuid_max_basic >= 0x6)␊ |
733 | { ␊ |
734 | ␉␉/*␊ |
735 | ␉␉ * The thermal and Power Leaf:␊ |
736 | ␉␉ */␊ |
737 | ␉␉do_cpuid(6, reg);␊ |
738 | ␉␉dynamic_acceleration = bitfield(reg[eax], 1, 1); // "Dynamic Acceleration Technology (Turbo Mode)"␊ |
739 | ␉␉invariant_APIC_timer = bitfield(reg[eax], 2, 2); // "Invariant APIC Timer"␊ |
740 | fine_grain_clock_mod = bitfield(reg[eax], 4, 4);␊ |
741 | ␉}␊ |
742 | ␉␊ |
743 | if ((Vendor == CPUID_VENDOR_INTEL) && ␊ |
744 | ␉␉(Family == 0x06))␊ |
745 | ␉{␊ |
746 | ␉␉/*␊ |
747 | ␉␉ * Find the number of enabled cores and threads␊ |
748 | ␉␉ * (which determines whether SMT/Hyperthreading is active).␊ |
749 | ␉␉ */␊ |
750 | ␉␉switch (Model)␊ |
751 | ␉␉{␊ |
752 | ␉␉␉␉␊ |
753 | ␉␉␉case CPUID_MODEL_DALES_32NM:␊ |
754 | ␉␉␉case CPUID_MODEL_WESTMERE:␊ |
755 | ␉␉␉case CPUID_MODEL_WESTMERE_EX:␊ |
756 | ␉␉␉{␊ |
757 | ␉␉␉␉msr = rdmsr64(MSR_CORE_THREAD_COUNT);␊ |
758 | ␉␉␉␉NoThreads = bitfield((uint32_t)msr, 15, 0);␊ |
759 | ␉␉␉␉NoCores = bitfield((uint32_t)msr, 19, 16); ␊ |
760 | ␉␉␉␉break;␊ |
761 | ␉␉␉}␊ |
762 | ␉␉␉␉␊ |
763 | ␉␉␉case CPUID_MODEL_NEHALEM:␊ |
764 | ␉␉␉case CPUID_MODEL_FIELDS:␊ |
765 | ␉␉␉case CPUID_MODEL_DALES:␊ |
766 | ␉␉␉case CPUID_MODEL_NEHALEM_EX:␊ |
767 | ␉␉␉case CPUID_MODEL_SANDYBRIDGE:␊ |
768 | ␉␉␉case CPUID_MODEL_JAKETOWN:␊ |
769 | case CPUID_MODEL_IVYBRIDGE:␊ |
770 | ␉␉␉{␊ |
771 | ␉␉␉␉msr = rdmsr64(MSR_CORE_THREAD_COUNT);␊ |
772 | ␉␉␉␉NoThreads = bitfield((uint32_t)msr, 15, 0);␊ |
773 | ␉␉␉␉NoCores = bitfield((uint32_t)msr, 31, 16); ␊ |
774 | ␉␉␉␉break;␊ |
775 | ␉␉␉} ␊ |
776 | ␉␉}␊ |
777 | }␊ |
778 | #endif␊ |
779 | if (NoCores == 0)␊ |
780 | ␉{␊ |
781 | #ifdef AMD_SUPPORT␉␉␊ |
782 | ␉␉if (!cores_per_package) {␊ |
783 | ␉␉␉//legacy method␊ |
784 | ␉␉␉if ((ExtFeatures & _HBit(1)/* CmpLegacy */) && ( Features & CPUID_FEATURE_HTT) )␊ |
785 | ␉␉␉␉cores_per_package = logical_per_package; ␊ |
786 | ␉␉␉else ␊ |
787 | ␉␉␉␉cores_per_package = 1;␊ |
788 | ␉␉}␉␉␊ |
789 | #endif␊ |
790 | ␉␉NoThreads = logical_per_package;␊ |
791 | ␉␉NoCores = cores_per_package ? cores_per_package : 1 ;␊ |
792 | ␉}␊ |
793 | ␉␊ |
794 | ␉/* End of Copyright: from Apple's XNU cpuid.c */␊ |
795 | ␊ |
796 | ␉FSBFreq = (uint64_t)(compute_bclk() * 1000000);␊ |
797 | ␊ |
798 | #if LEGACY_CPU␊ |
799 | ␉TSCFreq = timeRDTSC() * 20/*measure_tsc_frequency()*/;␊ |
800 | #endif␉␊ |
801 | ␉␊ |
802 | #ifdef AMD_SUPPORT␊ |
803 | #define K8_FIDVID_STATUS␉␉0xC0010042␊ |
804 | #define K10_COFVID_STATUS␉␉0xC0010071␊ |
805 | ␉if (ExtFeatures & _Bit(10))␊ |
806 | ␉{␉␉␊ |
807 | ␉␉CPUFreq = measure_aperf_frequency();␊ |
808 | ␉}␊ |
809 | ␉␊ |
810 | if ((Vendor == CPUID_VENDOR_AMD) && (Family == 0x0f))␊ |
811 | ␉{␊ |
812 | ␉␉switch(ExtFamily)␊ |
813 | ␉␉{␊ |
814 | ␉␉␉case 0x00: /* K8 */␊ |
815 | ␉␉␉␉msr = rdmsr64(K8_FIDVID_STATUS);␊ |
816 | ␉␉␉␉maxcoef = bitfield(msr, 21, 16) / 2 + 4;␊ |
817 | ␉␉␉␉currcoef = bitfield(msr, 5, 0) / 2 + 4;␊ |
818 | ␉␉␉␉break;␊ |
819 | ␉␉␉␉␊ |
820 | ␉␉␉case 0x01: /* K10 */␊ |
821 | {␊ |
822 | //uint32_t reg[4];␊ |
823 | ␉␉␉␉msr = rdmsr64(K10_COFVID_STATUS);␊ |
824 | ␉␉␉␉/*␊ |
825 | do_cpuid2(0x00000006, 0, reg);␊ |
826 | ␉␉␉␉ EffFreq: effective frequency interface␊ |
827 | if (bitfield(reg[ecx], 0, 0) == 1)␊ |
828 | {␊ |
829 | uint64_t aperf = measure_aperf_frequency();␊ |
830 | CPUFreq = aperf;␊ |
831 | }␊ |
832 | */␉␉␉␉ ␊ |
833 | ␉␉␉␉// NOTE: tsc runs at the maccoeff (non turbo)␊ |
834 | ␉␉␉␉//␉␉␉*not* at the turbo frequency.␊ |
835 | ␉␉␉␉maxcoef␉ = bitfield(msr, 54, 49) / 2 + 4;␊ |
836 | ␉␉␉␉currcoef = bitfield(msr, 5, 0) + 0x10;␊ |
837 | ␉␉␉␉currdiv = 2 << bitfield(msr, 8, 6);␊ |
838 | ␉␉␉␉␊ |
839 | ␉␉␉␉break;␊ |
840 | ␉␉␉}␉␊ |
841 | ␉␉␉case 0x05: /* K14 */␊ |
842 | ␉␉␉␉msr = rdmsr64(K10_COFVID_STATUS);␊ |
843 | ␉␉␉␉currcoef = (bitfield(msr, 54, 49) + 0x10) << 2;␊ |
844 | ␉␉␉␉currdiv = (bitfield(msr, 8, 4) + 1) << 2;␊ |
845 | ␉␉␉␉currdiv += bitfield(msr, 3, 0);␊ |
846 | ␉␉␉␉␊ |
847 | ␉␉␉␉break;␊ |
848 | ␉␉␉␉␊ |
849 | ␉␉␉case 0x02: /* K11 */␊ |
850 | ␉␉␉␉DBG("K11 detected, but not supported !!!\n");␊ |
851 | ␉␉␉␉// not implimented␊ |
852 | ␉␉␉␉break;␊ |
853 | ␉␉}␊ |
854 | ␉␉␊ |
855 | ␉␉if (!FSBFreq)␊ |
856 | ␉␉{␊ |
857 | ␉␉␉if (maxcoef)␊ |
858 | ␉␉␉{␊ |
859 | ␉␉␉␉if (currdiv)␊ |
860 | ␉␉␉␉{␊ |
861 | ␉␉␉␉␉if (!currcoef) currcoef = maxcoef;␊ |
862 | ␉␉␉␉␉if (!CPUFreq)␊ |
863 | ␉␉␉␉␉␉FSBFreq = ((TSCFreq * currdiv) / currcoef);␊ |
864 | ␉␉␉␉␉else␊ |
865 | ␉␉␉␉␉␉FSBFreq = ((CPUFreq * currdiv) / currcoef);␊ |
866 | ␉␉␉␉␉␊ |
867 | ␉␉␉␉␉DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);␊ |
868 | ␉␉␉␉} else {␊ |
869 | ␉␉␉␉␉if (!CPUFreq)␊ |
870 | ␉␉␉␉␉␉FSBFreq = (TSCFreq / maxcoef);␊ |
871 | ␉␉␉␉␉else ␊ |
872 | ␉␉␉␉␉␉FSBFreq = (CPUFreq / maxcoef);␊ |
873 | ␉␉␉␉␉DBG("%d\n", currcoef);␊ |
874 | ␉␉␉␉}␊ |
875 | ␉␉␉}␊ |
876 | ␉␉␉else if (currcoef)␊ |
877 | ␉␉␉{␊ |
878 | ␉␉␉␉if (currdiv)␊ |
879 | ␉␉␉␉{␊ |
880 | ␉␉␉␉␉FSBFreq = ((TSCFreq * currdiv) / currcoef);␊ |
881 | ␉␉␉␉␉DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);␊ |
882 | ␉␉␉␉} else {␊ |
883 | ␉␉␉␉␉FSBFreq = (TSCFreq / currcoef);␊ |
884 | ␉␉␉␉␉DBG("%d\n", currcoef);␊ |
885 | ␉␉␉␉}␊ |
886 | ␉␉␉}␊ |
887 | ␉␉}␊ |
888 | ␉␉␊ |
889 | ␉}␊ |
890 | ␉␊ |
891 | ␉// NOTE: This is not the approved method,␊ |
892 | ␉// the method provided by AMD is: ␊ |
893 | ␉// if ((PowerNow == enabled (p->cpu->cpuid_max_ext >= 0x80000007)) && (StartupFID(??) != MaxFID(??))) then "mobile processor present"␊ |
894 | ␉␊ |
895 | ␉if (strstr(CpuBrandString, "obile")) ␊ |
896 | ␉␉isMobile = 1;␊ |
897 | ␉else ␊ |
898 | ␉␉isMobile = 0;␊ |
899 | ␉␊ |
900 | ␉DBG("%s platform detected.\n", isMobile?"Mobile":"Desktop");␊ |
901 | #else␊ |
902 | if ((Vendor == CPUID_VENDOR_INTEL) && ␊ |
903 | ␉␉((Family == 0x06) || ␊ |
904 | ␉␉ (Family == 0x0f)))␊ |
905 | ␉{␊ |
906 | ␉␉if ((Family == 0x06 && Model >= 0x0c) || ␊ |
907 | ␉␉␉(Family == 0x0f && Model >= 0x03))␊ |
908 | ␉␉{␊ |
909 | ␉␉␉/* Nehalem CPU model */␊ |
910 | ␉␉␉if (Family == 0x06 && (Model == CPUID_MODEL_NEHALEM || ␊ |
911 | Model == CPUID_MODEL_FIELDS || ␊ |
912 | Model == CPUID_MODEL_DALES || ␊ |
913 | Model == CPUID_MODEL_DALES_32NM || ␊ |
914 | Model == CPUID_MODEL_WESTMERE ||␊ |
915 | Model == CPUID_MODEL_NEHALEM_EX ||␊ |
916 | Model == CPUID_MODEL_WESTMERE_EX ||␊ |
917 | Model == CPUID_MODEL_SANDYBRIDGE ||␊ |
918 | Model == CPUID_MODEL_JAKETOWN ||␊ |
919 | Model == CPUID_MODEL_IVYBRIDGE)) ␊ |
920 | ␉␉␉{␊ |
921 | ␉␉␉␉uint8_t␉␉bus_ratio_max = 0;␊ |
922 | ␉␉␉␉uint64_t␉flex_ratio = 0;␊ |
923 | ␉␉␉␉msr = rdmsr64(MSR_PLATFORM_INFO);␊ |
924 | #if DEBUG_CPU␊ |
925 | uint32_t␉max_ratio = 0, bus_ratio_min = 0;␊ |
926 | ␊ |
927 | ␉␉␉␉DBG("msr(%d): platform_info %08x\n", __LINE__, msr & 0xffffffff);␊ |
928 | #endif␊ |
929 | ␉␉␉␉bus_ratio_max = (msr >> 8) & 0xff;␊ |
930 | ␉␉␉␉//bus_ratio_min = (msr >> 40) & 0xff; ␊ |
931 | ␉␉␉␉msr = rdmsr64(MSR_FLEX_RATIO);␊ |
932 | #if DEBUG_CPU␊ |
933 | ␉␉␉␉DBG("msr(%d): flex_ratio %08x\n", __LINE__, msr & 0xffffffff);␊ |
934 | #endif␊ |
935 | ␉␉␉␉if ((msr >> 16) & 0x01)␊ |
936 | ␉␉␉␉{␊ |
937 | ␉␉␉␉␉flex_ratio = (msr >> 8) & 0xff;␊ |
938 | ␉␉␉␉␉/* bcc9: at least on the gigabyte h67ma-ud2h,␊ |
939 | ␉␉␉␉␉ where the cpu multipler can't be changed to␊ |
940 | ␉␉␉␉␉ allow overclocking, the flex_ratio msr has unexpected (to OSX)␊ |
941 | ␉␉␉␉␉ contents. These contents cause mach_kernel to␊ |
942 | ␉␉␉␉␉ fail to compute the bus ratio correctly, instead␊ |
943 | ␉␉␉␉␉ causing the system to crash since tscGranularity␊ |
944 | ␉␉␉␉␉ is inadvertently set to 0.␊ |
945 | ␉␉␉␉␉ */␊ |
946 | ␉␉␉␉␉if (flex_ratio == 0)␊ |
947 | ␉␉␉␉␉{␊ |
948 | ␉␉␉␉␉␉/* Clear bit 16 (evidently the␊ |
949 | ␉␉␉␉␉␉ presence bit) */␊ |
950 | ␉␉␉␉␉␉wrmsr64(MSR_FLEX_RATIO, (msr & 0xFFFFFFFFFFFEFFFFULL));␊ |
951 | #if DEBUG_CPU␊ |
952 | msr = rdmsr64(MSR_FLEX_RATIO);␊ |
953 | ␊ |
954 | ␉␉␉␉␉␉DBG("Unusable flex ratio detected. MSR Patched to %08x\n", msr & 0xffffffff);␊ |
955 | #endif␊ |
956 | ␉␉␉␉␉}␊ |
957 | ␉␉␉␉␉else␊ |
958 | ␉␉␉␉␉{␊ |
959 | ␉␉␉␉␉␉if (bus_ratio_max > flex_ratio)␊ |
960 | ␉␉␉␉␉␉{␊ |
961 | ␉␉␉␉␉␉␉bus_ratio_max = flex_ratio;␊ |
962 | ␉␉␉␉␉␉}␊ |
963 | ␉␉␉␉␉}␊ |
964 | ␉␉␉␉}␊ |
965 | #if LEGACY_CPU␊ |
966 | ␉␉␉␉if (bus_ratio_max)␊ |
967 | ␉␉␉␉{␊ |
968 | ␉␉␉␉␉FSBFreq = (TSCFreq / bus_ratio_max);␊ |
969 | ␉␉␉␉}␊ |
970 | #endif␊ |
971 | ␉␉␉␉//valv: Turbo Ratio Limit␊ |
972 | ␉␉␉␉if ((Model != 0x2e) && (Model != 0x2f))␊ |
973 | ␉␉␉␉{␊ |
974 | ␉␉␉␉␉//msr = rdmsr64(MSR_TURBO_RATIO_LIMIT);␊ |
975 | ␉␉␉␉␉CPUFreq = bus_ratio_max * FSBFreq;␊ |
976 | ␉␉␉␉␉//max_ratio = bus_ratio_max * 10;␊ |
977 | ␉␉␉␉}␊ |
978 | ␉␉␉␉else␊ |
979 | ␉␉␉␉{␊ |
980 | #if LEGACY_CPU␊ |
981 | ␉␉␉␉␉CPUFreq = TSCFreq;␊ |
982 | #else␊ |
983 | ␉␉␉␉␉CPUFreq = bus_ratio_max * FSBFreq;␊ |
984 | #endif␊ |
985 | ␉␉␉␉}␉␉␉␉␉␉␉␉␊ |
986 | #if DEBUG_CPU␊ |
987 | ␉␉␉␉DBG("Sticking with [BCLK: %dMhz, Bus-Ratio: %d]\n", FSBFreq / 1000000, max_ratio);␊ |
988 | #endif␊ |
989 | ␉␉␉␉currcoef = bus_ratio_max;␊ |
990 | ␊ |
991 | TSCFreq = CPUFreq;␊ |
992 | ␉␉␉} ␊ |
993 | ␉␉␉else␊ |
994 | ␉␉␉{␊ |
995 | ␉␉␉␉msr = rdmsr64(MSR_IA32_PERF_STATUS);␊ |
996 | #if DEBUG_CPU␊ |
997 | ␉␉␉␉DBG("msr(%d): ia32_perf_stat 0x%08x\n", __LINE__, msr & 0xffffffff);␊ |
998 | #endif␊ |
999 | ␉␉␉␉currcoef = (msr >> 8) & 0x1f;␊ |
1000 | ␉␉␉␉/* Non-integer bus ratio for the max-multi*/␊ |
1001 | ␉␉␉␉maxdiv = (msr >> 46) & 0x01;␊ |
1002 | ␉␉␉␉/* Non-integer bus ratio for the current-multi (undocumented)*/␊ |
1003 | ␉␉␉␉currdiv = (msr >> 14) & 0x01;␊ |
1004 | ␊ |
1005 | ␉␉␉␉if ((Family == 0x06 && Model >= 0x0e) || ␊ |
1006 | ␉␉␉␉␉(Family == 0x0f)) // This will always be model >= 3␊ |
1007 | ␉␉␉␉{␊ |
1008 | ␉␉␉␉␉/* On these models, maxcoef defines TSC freq */␊ |
1009 | ␉␉␉␉␉maxcoef = (msr >> 40) & 0x1f;␊ |
1010 | ␉␉␉␉} ␊ |
1011 | ␉␉␉␉else ␊ |
1012 | ␉␉␉␉{␊ |
1013 | ␉␉␉␉␉/* On lower models, currcoef defines TSC freq */␊ |
1014 | ␉␉␉␉␉/* XXX */␊ |
1015 | ␉␉␉␉␉maxcoef = currcoef;␊ |
1016 | ␉␉␉␉}␊ |
1017 | ␉␉␉␉if (!currcoef) currcoef = maxcoef;␊ |
1018 | #if LEGACY_CPU␊ |
1019 | ␉␉␉␉if (maxcoef) ␊ |
1020 | ␉␉␉␉{␉␉␉␉␉␊ |
1021 | ␉␉␉␉␉␊ |
1022 | ␉␉␉␉␉if (maxdiv)␊ |
1023 | ␉␉␉␉␉{␊ |
1024 | ␉␉␉␉␉␉FSBFreq = ((TSCFreq * 2) / ((maxcoef * 2) + 1));␊ |
1025 | ␉␉␉␉␉}␊ |
1026 | ␉␉␉␉␉else ␊ |
1027 | ␉␉␉␉␉{␊ |
1028 | ␉␉␉␉␉␉FSBFreq = (TSCFreq / maxcoef);␊ |
1029 | ␉␉␉␉␉}␊ |
1030 | ␉␉␉␉␉␊ |
1031 | ␉␉␉␉␉if (currdiv) ␊ |
1032 | ␉␉␉␉␉{␊ |
1033 | ␉␉␉␉␉␉CPUFreq = (FSBFreq * ((currcoef * 2) + 1) / 2);␊ |
1034 | ␉␉␉␉␉}␊ |
1035 | ␉␉␉␉␉else ␊ |
1036 | ␉␉␉␉␉{␊ |
1037 | ␉␉␉␉␉␉CPUFreq = (FSBFreq * currcoef);␊ |
1038 | ␉␉␉␉␉}␊ |
1039 | #if DEBUG_CPU␊ |
1040 | ␉␉␉␉␉DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");␊ |
1041 | #endif␊ |
1042 | ␉␉␉␉}␊ |
1043 | #else␊ |
1044 | ␉␉␉␉␊ |
1045 | ␉␉␉␉␊ |
1046 | ␉␉␉␉if (currdiv) ␊ |
1047 | ␉␉␉␉{␊ |
1048 | ␉␉␉␉␉CPUFreq = (FSBFreq * ((currcoef * 2) + 1) / 2);␊ |
1049 | ␉␉␉␉}␊ |
1050 | ␉␉␉␉else ␊ |
1051 | ␉␉␉␉{␊ |
1052 | ␉␉␉␉␉CPUFreq = (FSBFreq * currcoef);␊ |
1053 | ␉␉␉␉}␊ |
1054 | ␉␉␉␉␊ |
1055 | ␉␉␉␉if (maxcoef) ␊ |
1056 | ␉␉␉␉{␊ |
1057 | ␉␉␉␉␉if (maxdiv)␊ |
1058 | ␉␉␉␉␉{␊ |
1059 | ␉␉␉␉␉␉TSCFreq = (FSBFreq * ((maxcoef * 2) + 1)) / 2;␊ |
1060 | ␉␉␉␉␉}␊ |
1061 | ␉␉␉␉␉else ␊ |
1062 | ␉␉␉␉␉{␊ |
1063 | ␉␉␉␉␉␉TSCFreq = FSBFreq * maxcoef;␊ |
1064 | ␉␉␉␉␉}␊ |
1065 | ␉␉␉␉}␉␉␉␉␉␉␉␉␊ |
1066 | #if DEBUG_CPU␊ |
1067 | ␉␉␉␉DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");␊ |
1068 | #endif␊ |
1069 | ␊ |
1070 | #endif // LEGACY_CPU␊ |
1071 | ␉␉␉␉␊ |
1072 | ␉␉␉}␊ |
1073 | ␉␉}␊ |
1074 | /* Mobile CPU ? */ ␊ |
1075 | ␉␉//Slice ␊ |
1076 | ␉ //isMobile = 0;␊ |
1077 | ␉␉switch (Model)␊ |
1078 | ␉␉{␊ |
1079 | ␉␉␉case 0x0D:␊ |
1080 | ␉␉␉␉isMobile = 1; ␊ |
1081 | ␉␉␉␉break;␉␉␉␊ |
1082 | ␉␉␉case 0x02:␊ |
1083 | ␉␉␉case 0x03:␊ |
1084 | ␉␉␉case 0x04:␊ |
1085 | ␉␉␉case 0x06:␉␊ |
1086 | ␉␉␉␉isMobile = (rdmsr64(0x2C) & (1 << 21))? 1 : 0;␊ |
1087 | ␉␉␉␉break;␊ |
1088 | ␉␉␉default:␊ |
1089 | ␉␉␉␉isMobile = (rdmsr64(0x17) & (1 << 28)) ? 1 : 0;␊ |
1090 | ␉␉␉␉break;␊ |
1091 | ␉␉}␊ |
1092 | ␊ |
1093 | ␉␉DBG("%s platform detected.\n", isMobile?"Mobile":"Desktop");␊ |
1094 | ␉}␊ |
1095 | #endif␊ |
1096 | ␉if (!CPUFreq) CPUFreq = TSCFreq;␊ |
1097 | if (!TSCFreq) TSCFreq = CPUFreq;␊ |
1098 | ␊ |
1099 | ␉set_env(envVendor, Vendor);␊ |
1100 | set_env(envModel, Model); ␊ |
1101 | set_env(envExtModel, ExtModel); ␊ |
1102 | ␊ |
1103 | ␉set_env(envCPUIDMaxBasic, cpuid_max_basic);␊ |
1104 | ␉set_env(envCPUIDMaxBasic, cpuid_max_ext);␊ |
1105 | #ifndef AMD_SUPPORT␊ |
1106 | ␉set_env(envMicrocodeVersion, MicrocodeVersion); ␊ |
1107 | #endif␊ |
1108 | set_env_copy(envBrandString, CpuBrandString, sizeof(CpuBrandString));␊ |
1109 | ␉set_env(envSignature, Signature); ␊ |
1110 | ␉set_env(envStepping, Stepping); ␊ |
1111 | ␉set_env(envFamily,␉ Family); ␊ |
1112 | ␉set_env(envExtModel, ExtModel); ␊ |
1113 | ␉set_env(envExtFamily, ExtFamily); ␊ |
1114 | ␉set_env(envBrand,␉ Brand); ␊ |
1115 | ␉set_env(envFeatures, Features);␊ |
1116 | set_env(envExtFeatures, ExtFeatures);␊ |
1117 | #ifndef AMD_SUPPORT␊ |
1118 | ␉set_env(envSubCstates, sub_Cstates); ␊ |
1119 | #endif␊ |
1120 | ␉set_env(envExtensions, extensions); ␊ |
1121 | #ifndef AMD_SUPPORT ␊ |
1122 | ␉set_env(envDynamicAcceleration, dynamic_acceleration); ␊ |
1123 | ␉set_env(envInvariantAPICTimer,␉ invariant_APIC_timer); ␊ |
1124 | ␉set_env(envFineGrainClockMod, fine_grain_clock_mod);␊ |
1125 | #endif␊ |
1126 | ␉set_env(envNoThreads,␉ NoThreads); ␊ |
1127 | ␉set_env(envNoCores,␉␉ NoCores);␊ |
1128 | ␉set_env(envIsMobile,␉␉ isMobile);␊ |
1129 | ␉␊ |
1130 | ␉set_env(envMaxCoef,␉␉ maxcoef); ␊ |
1131 | ␉set_env(envMaxDiv,␉␉ maxdiv);␊ |
1132 | ␉set_env(envCurrCoef,␉␉ currcoef);␊ |
1133 | ␉set_env(envCurrDiv,␉ currdiv); ␊ |
1134 | ␉set_env(envTSCFreq,␉ TSCFreq);␊ |
1135 | ␉set_env(envFSBFreq,␉ FSBFreq);␊ |
1136 | ␉set_env(envCPUFreq,␉ CPUFreq);␊ |
1137 | ␉␊ |
1138 | #ifdef AMD_SUPPORT␊ |
1139 | msglog("AMD CPU Detection Enabled\n");␊ |
1140 | #endif␊ |
1141 | ␉␊ |
1142 | } |