Chameleon

Chameleon Svn Source Tree

Root/branches/cparm/i386/libsaio/cpu_intel_amd.c

1/*
2 * Copyright 2008 Islam Ahmed Zaid. All rights reserved. <azismed@gmail.com>
3 * AsereBLN: 2009: cleanup and bugfix
4 */
5
6#include "libsaio.h"
7#include "platform.h"
8#include "cpu.h"
9
10#ifndef DEBUG_CPU
11#define DEBUG_CPU 0
12#endif
13
14#if DEBUG_CPU
15#define DBG(x...)printf(x)
16#else
17#define DBG(x...)msglog(x)
18#endif
19
20//#define LEGACY_CPU
21
22// DFE: enable_PIT2 and disable_PIT2 come from older xnu
23
24/*
25 * Enable or disable timer 2.
26 * Port 0x61 controls timer 2:
27 * bit 0 gates the clock,
28 * bit 1 gates output to speaker.
29 */
30static inline void enable_PIT2(void)
31{
32 /* Enable gate, disable speaker */
33 __asm__ volatile(
34 " inb $0x61,%%al \n\t"
35 " and $0xFC,%%al \n\t" /* & ~0x03 */
36 " or $1,%%al \n\t"
37 " outb %%al,$0x61 \n\t"
38 : : : "%al" );
39}
40
41static inline void disable_PIT2(void)
42{
43 /* Disable gate and output to speaker */
44 __asm__ volatile(
45 " inb $0x61,%%al \n\t"
46 " and $0xFC,%%al \n\t"/* & ~0x03 */
47 " outb %%al,$0x61 \n\t"
48 : : : "%al" );
49}
50
51// DFE: set_PIT2_mode0, poll_PIT2_gate, and measure_tsc_frequency are
52// roughly based on Linux code
53
54/* Set the 8254 channel 2 to mode 0 with the specified value.
55 In mode 0, the counter will initially set its gate low when the
56 timer expires. For this to be useful, you ought to set it high
57 before calling this function. The enable_PIT2 function does this.
58 */
59static inline void set_PIT2_mode0(uint16_t value)
60{
61 __asm__ volatile(
62 " movb $0xB0,%%al \n\t"
63 " outb%%al,$0x43\n\t"
64 " movb%%dl,%%al\n\t"
65 " outb%%al,$0x42\n\t"
66 " movb%%dh,%%al\n\t"
67 " outb%%al,$0x42"
68 : : "d"(value) /*: no clobber */ );
69}
70
71/* Returns the number of times the loop ran before the PIT2 signaled */
72static inline unsigned long poll_PIT2_gate(void)
73{
74 unsigned long count = 0;
75 unsigned char nmi_sc_val;
76 do {
77 ++count;
78 __asm__ volatile(
79 "inb$0x61,%0"
80 : "=q"(nmi_sc_val) /*:*/ /* no input */ /*:*/ /* no clobber */);
81 } while( (nmi_sc_val & 0x20) == 0);
82 return count;
83}
84
85#ifdef LEGACY_CPU
86static uint64_t measure_tsc_frequency(void);
87/*
88 * DFE: Measures the TSC frequency in Hz (64-bit) using the ACPI PM timer
89 */
90static uint64_t measure_tsc_frequency(void)
91{
92 uint64_t tscStart;
93 uint64_t tscEnd;
94 uint64_t tscDelta = 0xffffffffffffffffULL;
95 unsigned long pollCount;
96 uint64_t retval = 0;
97 int i;
98
99 /* Time how many TSC ticks elapse in 30 msec using the 8254 PIT
100 * counter 2. We run this loop 3 times to make sure the cache
101 * is hot and we take the minimum delta from all of the runs.
102 * That is to say that we're biased towards measuring the minimum
103 * number of TSC ticks that occur while waiting for the timer to
104 * expire. That theoretically helps avoid inconsistencies when
105 * running under a VM if the TSC is not virtualized and the host
106 * steals time. The TSC is normally virtualized for VMware.
107 */
108 for(i = 0; i < 10; ++i)
109 {
110 enable_PIT2();
111 set_PIT2_mode0(CALIBRATE_LATCH);
112 tscStart = rdtsc64();
113 pollCount = poll_PIT2_gate();
114 tscEnd = rdtsc64();
115 /* The poll loop must have run at least a few times for accuracy */
116 if(pollCount <= 1)
117 continue;
118 /* The TSC must increment at LEAST once every millisecond. We
119 * should have waited exactly 30 msec so the TSC delta should
120 * be >= 30. Anything less and the processor is way too slow.
121 */
122 if((tscEnd - tscStart) <= CALIBRATE_TIME_MSEC)
123 continue;
124 // tscDelta = min(tscDelta, (tscEnd - tscStart))
125 if( (tscEnd - tscStart) < tscDelta )
126 tscDelta = tscEnd - tscStart;
127 }
128 /* tscDelta is now the least number of TSC ticks the processor made in
129 * a timespan of 0.03 s (e.g. 30 milliseconds)
130 * Linux thus divides by 30 which gives the answer in kiloHertz because
131 * 1 / ms = kHz. But we're xnu and most of the rest of the code uses
132 * Hz so we need to convert our milliseconds to seconds. Since we're
133 * dividing by the milliseconds, we simply multiply by 1000.
134 */
135
136 /* Unlike linux, we're not limited to 32-bit, but we do need to take care
137 * that we're going to multiply by 1000 first so we do need at least some
138 * arithmetic headroom. For now, 32-bit should be enough.
139 * Also unlike Linux, our compiler can do 64-bit integer arithmetic.
140 */
141 if(tscDelta > (1ULL<<32))
142 retval = 0;
143 else
144 {
145 retval = tscDelta * 1000 / 30;
146 }
147 disable_PIT2();
148 return retval;
149}
150#endif
151
152
153#define MSR_AMD_APERF 0x000000E8
154/*
155 * Original comment/code:
156 * "DFE: Measures the Max Performance Frequency in Hz (64-bit)"
157 *
158 * Measures the Actual Performance Frequency in Hz (64-bit)
159 * (just a naming change, mperf --> aperf )
160 */
161static uint64_t measure_aperf_frequency(void)
162{
163uint64_t aperfStart;
164uint64_t aperfEnd;
165uint64_t aperfDelta = 0xffffffffffffffffULL;
166unsigned long pollCount;
167uint64_t retval = 0;
168int i;
169
170/* Time how many APERF ticks elapse in 30 msec using the 8254 PIT
171 * counter 2. We run this loop 3 times to make sure the cache
172 * is hot and we take the minimum delta from all of the runs.
173 * That is to say that we're biased towards measuring the minimum
174 * number of APERF ticks that occur while waiting for the timer to
175 * expire.
176 */
177for(i = 0; i < 10; ++i)
178{
179enable_PIT2();
180set_PIT2_mode0(CALIBRATE_LATCH);
181aperfStart = rdmsr64(MSR_AMD_APERF);
182pollCount = poll_PIT2_gate();
183aperfEnd = rdmsr64(MSR_AMD_APERF);
184/* The poll loop must have run at least a few times for accuracy */
185if (pollCount <= 1)
186continue;
187/* The TSC must increment at LEAST once every millisecond.
188 * We should have waited exactly 30 msec so the APERF delta should
189 * be >= 30. Anything less and the processor is way too slow.
190 */
191if ((aperfEnd - aperfStart) <= CALIBRATE_TIME_MSEC)
192continue;
193// tscDelta = MIN(tscDelta, (tscEnd - tscStart))
194if ( (aperfEnd - aperfStart) < aperfDelta )
195aperfDelta = aperfEnd - aperfStart;
196}
197/* mperfDelta is now the least number of MPERF ticks the processor made in
198 * a timespan of 0.03 s (e.g. 30 milliseconds)
199 */
200
201if (aperfDelta > (1ULL<<32))
202retval = 0;
203else
204{
205retval = aperfDelta * 1000 / 30;
206}
207disable_PIT2();
208return retval;
209}
210
211
212/*
213 License for x2apic_enabled, get_apicbase, compute_bclk.
214
215 Copyright (c) 2010, Intel Corporation
216 All rights reserved.
217
218 Redistribution and use in source and binary forms, with or without
219 modification, are permitted provided that the following conditions are met:
220
221 * Redistributions of source code must retain the above copyright notice,
222 this list of conditions and the following disclaimer.
223 * Redistributions in binary form must reproduce the above copyright notice,
224 this list of conditions and the following disclaimer in the documentation
225 and/or other materials provided with the distribution.
226 * Neither the name of Intel Corporation nor the names of its contributors
227 may be used to endorse or promote products derived from this software
228 without specific prior written permission.
229
230 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
231 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
232 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
233 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
234 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
235 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
236 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
237 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
238 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
239 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
240 */
241static inline __attribute__((always_inline)) void rdmsr32(uint32_t msr, uint32_t * lo_data_addr, uint32_t * hi_data_addr);
242static inline __attribute__((always_inline)) void wrmsr32(uint32_t msr, uint32_t lo_data, uint32_t hi_data);
243static uint32_t x2apic_enabled(void);
244static uint32_t get_apicbase(void);
245static uint32_t compute_bclk(void);
246static inline __attribute__((always_inline)) void rdmsr32(uint32_t msr, uint32_t * lo_data_addr, uint32_t * hi_data_addr)
247{
248 __asm__ volatile(
249 "rdmsr"
250 : "=a" (*lo_data_addr), "=d" (*hi_data_addr)
251 : "c" (msr)
252 );
253}
254static inline __attribute__((always_inline)) void wrmsr32(uint32_t msr, uint32_t lo_data, uint32_t hi_data)
255{
256 __asm__ __volatile__ (
257 "wrmsr"
258 : /* No outputs */
259 : "c" (msr), "a" (lo_data), "d" (hi_data)
260 );
261}
262#define MSR_APIC_BASE 0x1B
263#define APIC_TMR_INITIAL_CNT 0x380
264#define APIC_TMR_CURRENT_CNT 0x390
265#define APIC_TMR_DIVIDE_CFG 0x3E0
266#define MSR_APIC_TMR_INITIAL_CNT 0x838
267#define MSR_APIC_TMR_CURRENT_CNT 0x839
268#define MSR_APIC_TMR_DIVIDE_CFG 0x83E
269static uint32_t x2apic_enabled(void)
270{
271 uint64_t temp64;
272
273 temp64 = rdmsr64(MSR_APIC_BASE);
274
275 return (uint32_t) (temp64 & (1 << 10)) ? 1 : 0;
276}
277static uint32_t get_apicbase(void)
278{
279 uint64_t temp64;
280
281 temp64 = rdmsr64(MSR_APIC_BASE);
282
283 return (uint32_t) (temp64 & 0xfffff000);
284}
285static uint32_t compute_bclk(void)
286{
287 uint32_t dummy;
288 uint32_t start, stop;
289 uint8_t temp8;
290 uint16_t delay_count;
291 uint32_t bclk;
292
293#define DELAY_IN_US 1000
294
295 // Compute fixed delay as time
296 // delay count = desired time * PIT frequency
297 // PIT frequency = 1.193182 MHz
298 delay_count = 1193182 / DELAY_IN_US;
299
300 // PIT channel 2 gate is controlled by IO port 0x61, bit 0
301#define PIT_CH2_LATCH_REG 0x61
302#define CH2_SPEAKER (1 << 1) // bit 1 -- 1 = speaker enabled 0 = speaker disabled
303#define CH2_GATE_IN (1 << 0) // bit 0 -- 1 = gate enabled, 0 = gate disabled
304#define CH2_GATE_OUT (1 << 5) // bit 5 -- 1 = gate latched, 0 = gate not latched
305
306 // PIT Command register
307#define PIT_MODE_COMMAND_REG 0x43
308#define SELECT_CH2 (2 << 6)
309#define ACCESS_MODE_LOBYTE_HIBYTE (3 << 4)
310#define MODE0_INTERRUPT_ON_TERMINAL_COUNT 0 // Despite name, no interrupts on CH2
311
312 // PIT Channel 2 data port
313#define PIT_CH2_DATA 0x42
314
315 // Disable the PIT channel 2 speaker and gate
316 temp8 = inb(PIT_CH2_LATCH_REG);
317 temp8 &= ~(CH2_SPEAKER | CH2_GATE_IN);
318 outb(PIT_CH2_LATCH_REG, temp8);
319
320 // Setup command and mode
321 outb(PIT_MODE_COMMAND_REG, SELECT_CH2 | ACCESS_MODE_LOBYTE_HIBYTE | MODE0_INTERRUPT_ON_TERMINAL_COUNT);
322
323 // Set time for fixed delay
324 outb(PIT_CH2_DATA, (uint8_t) (delay_count));
325 outb(PIT_CH2_DATA, (uint8_t) (delay_count >> 8));
326
327 // Prepare to enable channel 2 gate but leave the speaker disabled
328 temp8 = inb(PIT_CH2_LATCH_REG);
329 temp8 &= ~CH2_SPEAKER;
330 temp8 |= CH2_GATE_IN;
331
332 if (x2apic_enabled())
333{
334 // Set APIC Timer Divide Value as 2
335 wrmsr32(MSR_APIC_TMR_DIVIDE_CFG, 0, 0);
336
337 // start APIC timer with a known value
338 start = ~0UL;
339 wrmsr32(MSR_APIC_TMR_INITIAL_CNT, start, 0);
340 }
341 else
342{
343 // Set APIC Timer Divide Value as 2
344 *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_DIVIDE_CFG) = 0UL;
345
346 // start APIC timer with a known value
347 start = ~0UL;
348 *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_INITIAL_CNT) = start;
349 }
350
351 // Actually start the PIT channel 2
352 outb(PIT_CH2_LATCH_REG, temp8);
353
354 // Wait for the fixed delay
355 while (!(inb(PIT_CH2_LATCH_REG) & CH2_GATE_OUT));
356
357 if (x2apic_enabled())
358{
359 // read the APIC timer to determine the change that occurred over this fixed delay
360 rdmsr32(MSR_APIC_TMR_CURRENT_CNT, &stop, &dummy);
361
362 // stop APIC timer
363 wrmsr32(MSR_APIC_TMR_INITIAL_CNT, 0, 0);
364
365 }
366 else
367{
368 // read the APIC timer to determine the change that occurred over this fixed delay
369 stop = *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_CURRENT_CNT);
370
371 // stop APIC timer
372 *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_INITIAL_CNT) = 0UL;
373 }
374
375 // Disable channel 2 speaker and gate input
376 temp8 = inb(PIT_CH2_LATCH_REG);
377 temp8 &= ~(CH2_SPEAKER | CH2_GATE_IN);
378 outb(PIT_CH2_LATCH_REG, temp8);
379
380 bclk = (start - stop) * 2 / DELAY_IN_US;
381
382 // Round bclk to the nearest 100/12 integer value
383 bclk = ((((bclk * 24) + 100) / 200) * 200) / 24;
384
385 return bclk;
386}
387
388
389/*
390 * Calculates the FSB and CPU frequencies using specific MSRs for each CPU
391 * - multi. is read from a specific MSR. In the case of Intel, there is:
392 * a max multi. (used to calculate the FSB freq.),
393 * and a current multi. (used to calculate the CPU freq.)
394 * - fsbFrequency = tscFrequency / multi
395 * - cpuFrequency = fsbFrequency * multi
396 */
397
398void scan_cpu(PlatformInfo_t *p)
399{
400uint64_ttscFrequency = 0, fsbFrequency = 0, cpuFrequency = 0;
401uint64_tmsr;
402uint8_tmaxcoef = 0, maxdiv = 0, currcoef = 0, currdiv = 0;
403 uint32_treg[4];
404 uint32_t cores_per_package = 0;
405 uint32_t logical_per_package = 0;
406
407do_cpuid(0, reg);
408 p->CPU.Vendor= reg[ebx];
409 p->CPU.cpuid_max_basic = reg[eax];
410
411 if (p->CPU.Vendor == 0x756E6547 /* Intel */)
412 {
413 do_cpuid2(0x00000004, 0, reg);
414 cores_per_package= bitfield(reg[eax], 31, 26) + 1;
415 }
416 else if (p->CPU.Vendor != 0x68747541 /* AMD */)
417 {
418 stop("Error: CPU unsupported\n");
419 halt();
420 }
421
422 /* get extended cpuid results */
423do_cpuid(0x80000000, reg);
424p->CPU.cpuid_max_ext = reg[eax];
425
426/* Begin of Copyright: from Apple's XNU cpuid.c */
427
428/* get brand string (if supported) */
429if (p->CPU.cpuid_max_ext > 0x80000004)
430{
431 char str[128], *s;
432/*
433 * The brand string 48 bytes (max), guaranteed to
434 * be NUL terminated.
435 */
436do_cpuid(0x80000002, reg);
437bcopy((char *)reg, &str[0], 16);
438do_cpuid(0x80000003, reg);
439bcopy((char *)reg, &str[16], 16);
440do_cpuid(0x80000004, reg);
441bcopy((char *)reg, &str[32], 16);
442for (s = str; *s != '\0'; s++)
443{
444if (*s != ' ') break;
445}
446
447strlcpy(p->CPU.BrandString,s, sizeof(p->CPU.BrandString));
448
449if (!strncmp(p->CPU.BrandString, CPUID_STRING_UNKNOWN, min(sizeof(p->CPU.BrandString), (unsigned)strlen(CPUID_STRING_UNKNOWN) + 1)))
450{
451 /*
452 * This string means we have a firmware-programmable brand string,
453 * and the firmware couldn't figure out what sort of CPU we have.
454 */
455 p->CPU.BrandString[0] = '\0';
456 }
457}
458
459 /*
460 * Get processor signature and decode
461 * and bracket this with the approved procedure for reading the
462 * the microcode version number a.k.a. signature a.k.a. BIOS ID
463 */
464 if (p->CPU.Vendor == 0x756E6547 /* Intel */)
465 {
466 wrmsr64(MSR_IA32_BIOS_SIGN_ID, 0);
467 do_cpuid(1, reg);
468 p->CPU.MicrocodeVersion =
469 (uint32_t) (rdmsr64(MSR_IA32_BIOS_SIGN_ID) >> 32);
470 }
471 else if (p->CPU.Vendor != 0x68747541 /* AMD */)
472 do_cpuid(1, reg);
473
474p->CPU.Signature = reg[eax];
475p->CPU.Stepping = bitfield(reg[eax], 3, 0);
476p->CPU.Model = bitfield(reg[eax], 7, 4);
477p->CPU.Family = bitfield(reg[eax], 11, 8);
478p->CPU.ExtModel = bitfield(reg[eax], 19, 16);
479p->CPU.ExtFamily = bitfield(reg[eax], 27, 20);
480p->CPU.Brand = bitfield(reg[ebx], 7, 0);
481p->CPU.Features = quad(reg[ecx], reg[edx]);
482
483 /* Fold extensions into family/model */
484if (p->CPU.Family == 0x0f)
485p->CPU.Family += p->CPU.ExtFamily;
486if (p->CPU.Family == 0x0f || p->CPU.Family == 0x06)
487p->CPU.Model += (p->CPU.ExtModel << 4);
488
489 if (p->CPU.Features & CPUID_FEATURE_HTT)
490logical_per_package =
491 bitfield(reg[ebx], 23, 16);
492else
493logical_per_package = 1;
494
495if (p->CPU.cpuid_max_ext >= 0x80000001)
496{
497do_cpuid(0x80000001, reg);
498p->CPU.ExtFeatures =
499 quad(reg[ecx], reg[edx]);
500
501}
502
503if (p->CPU.cpuid_max_ext >= 0x80000007)
504{
505do_cpuid(0x80000007, reg);
506
507/* Fold in the Invariant TSC feature bit, if present */
508p->CPU.ExtFeatures |=
509 reg[edx] & (uint32_t)CPUID_EXTFEATURE_TSCI;
510
511 if (p->CPU.Vendor == 0x68747541 /* AMD */)
512 {
513 /* Fold in the Hardware P-State control feature bit, if present */
514 p->CPU.ExtFeatures |=
515 reg[edx] & (uint32_t)_Bit(7);
516
517 /* Fold in the read-only effective frequency interface feature bit, if present */
518 p->CPU.ExtFeatures |=
519 reg[edx] & (uint32_t)_Bit(10);
520 }
521
522}
523
524 if (p->CPU.Vendor == 0x68747541 /* AMD */)
525 {
526 if (p->CPU.cpuid_max_ext >= 0x80000008)
527 {
528 if (p->CPU.Features & CPUID_FEATURE_HTT)
529 {
530 do_cpuid(0x80000008, reg);
531 cores_per_package= bitfield(reg[ecx], 7 , 0) + 1; // NC + 1
532 }
533 }
534 }
535 if (p->CPU.cpuid_max_basic >= 0x5) {
536/*
537 * Extract the Monitor/Mwait Leaf info:
538 */
539do_cpuid(5, reg);
540 if (p->CPU.Vendor == 0x756E6547 /* Intel */)
541 {
542 p->CPU.sub_Cstates = reg[edx];
543 }
544
545 p->CPU.extensions = reg[ecx];
546}
547
548 if (p->CPU.Vendor == 0x756E6547 /* Intel */)
549 {
550 if (p->CPU.cpuid_max_basic >= 0x6)
551 {
552 /*
553 * The thermal and Power Leaf:
554 */
555 do_cpuid(6, reg);
556 p->CPU.dynamic_acceleration = bitfield(reg[eax], 1, 1); // "Dynamic Acceleration Technology (Turbo Mode)"
557 p->CPU.invariant_APIC_timer = bitfield(reg[eax], 2, 2); // "Invariant APIC Timer"
558 p->CPU.fine_grain_clock_mod = bitfield(reg[eax], 4, 4);
559 }
560
561 if ((p->CPU.Vendor == 0x756E6547 /* Intel */) &&
562 (p->CPU.Family == 0x06))
563 {
564 /*
565 * Find the number of enabled cores and threads
566 * (which determines whether SMT/Hyperthreading is active).
567 */
568 switch (p->CPU.Model)
569 {
570
571 case CPUID_MODEL_DALES_32NM:
572 case CPUID_MODEL_WESTMERE:
573 case CPUID_MODEL_WESTMERE_EX:
574 {
575 msr = rdmsr64(MSR_CORE_THREAD_COUNT);
576 p->CPU.NoThreads = bitfield((uint32_t)msr, 15, 0);
577 p->CPU.NoCores = bitfield((uint32_t)msr, 19, 16);
578 break;
579 }
580
581 case CPUID_MODEL_NEHALEM:
582 case CPUID_MODEL_FIELDS:
583 case CPUID_MODEL_DALES:
584 case CPUID_MODEL_NEHALEM_EX:
585 case CPUID_MODEL_SANDYBRIDGE:
586 case CPUID_MODEL_JAKETOWN:
587 {
588 msr = rdmsr64(MSR_CORE_THREAD_COUNT);
589 p->CPU.NoThreads = bitfield((uint32_t)msr, 15, 0);
590 p->CPU.NoCores = bitfield((uint32_t)msr, 31, 16);
591 break;
592 }
593 }
594 }
595 }
596
597 if (p->CPU.NoCores == 0)
598{
599 if (p->CPU.Vendor == 0x68747541 /* AMD */)
600 {
601 if (!cores_per_package) {
602 //legacy method
603 if ((p->CPU.ExtFeatures & _HBit(1)/* CmpLegacy */) && ( p->CPU.Features & CPUID_FEATURE_HTT) )
604 cores_per_package = logical_per_package;
605 else
606 cores_per_package = 1;
607 }
608 }
609p->CPU.NoThreads = logical_per_package;
610p->CPU.NoCores = cores_per_package ? cores_per_package : 1 ;
611}
612
613/* End of Copyright: from Apple's XNU cpuid.c */
614
615fsbFrequency = (uint64_t)(compute_bclk() * 1000000);
616
617#ifdef LEGACY_CPU
618tscFrequency = measure_tsc_frequency();
619#endif
620
621 if (p->CPU.Vendor == 0x68747541 /* AMD */)
622 {
623
624#define K8_FIDVID_STATUS0xC0010042
625#define K10_COFVID_STATUS0xC0010071
626 if (p->CPU.ExtFeatures & _Bit(10))
627 {
628 cpuFrequency = measure_aperf_frequency();
629 }
630
631 if ((p->CPU.Vendor == 0x68747541 /* AMD */) && (p->CPU.Family == 0x0f))
632 {
633 switch(p->CPU.ExtFamily)
634 {
635 case 0x00: /* K8 */
636 msr = rdmsr64(K8_FIDVID_STATUS);
637 maxcoef = bitfield(msr, 21, 16) / 2 + 4;
638 currcoef = bitfield(msr, 5, 0) / 2 + 4;
639 break;
640
641 case 0x01: /* K10 */
642 {
643 //uint32_t reg[4];
644 msr = rdmsr64(K10_COFVID_STATUS);
645 /*
646 do_cpuid2(0x00000006, 0, reg);
647 EffFreq: effective frequency interface
648 if (bitfield(reg[ecx], 0, 0) == 1)
649 {
650 uint64_t aperf = measure_aperf_frequency();
651 cpuFrequency = aperf;
652 }
653 */
654 // NOTE: tsc runs at the maccoeff (non turbo)
655 //*not* at the turbo frequency.
656 maxcoef = bitfield(msr, 54, 49) / 2 + 4;
657 currcoef = bitfield(msr, 5, 0) + 0x10;
658 currdiv = 2 << bitfield(msr, 8, 6);
659
660 break;
661 }
662 case 0x05: /* K14 */
663 msr = rdmsr64(K10_COFVID_STATUS);
664 currcoef = (bitfield(msr, 54, 49) + 0x10) << 2;
665 currdiv = (bitfield(msr, 8, 4) + 1) << 2;
666 currdiv += bitfield(msr, 3, 0);
667
668 break;
669
670 case 0x02: /* K11 */
671 DBG("K11 detected, but not supported !!!\n");
672 // not implimented
673 break;
674 }
675
676 if (!fsbFrequency)
677 {
678 if (maxcoef)
679 {
680 if (currdiv)
681 {
682 if (!currcoef) currcoef = maxcoef;
683 if (!cpuFrequency)
684 fsbFrequency = ((tscFrequency * currdiv) / currcoef);
685 else
686 fsbFrequency = ((cpuFrequency * currdiv) / currcoef);
687
688 DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);
689 } else {
690 if (!cpuFrequency)
691 fsbFrequency = (tscFrequency / maxcoef);
692 else
693 fsbFrequency = (cpuFrequency / maxcoef);
694 DBG("%d\n", currcoef);
695 }
696 }
697 else if (currcoef)
698 {
699 if (currdiv)
700 {
701 fsbFrequency = ((tscFrequency * currdiv) / currcoef);
702 DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);
703 } else {
704 fsbFrequency = (tscFrequency / currcoef);
705 DBG("%d\n", currcoef);
706 }
707 }
708 }
709
710 }
711
712 // NOTE: This is not the approved method,
713 // the method provided by AMD is:
714 // if ((PowerNow == enabled (p->CPU.cpuid_max_ext >= 0x80000007)) && (StartupFID(??) != MaxFID(??))) then "mobile processor present"
715
716 if (strstr(p->CPU.BrandString, "obile"))
717 p->CPU.isMobile = true;
718 else
719 p->CPU.isMobile = false;
720
721 DBG("%s platform detected.\n", p->CPU.isMobile?"Mobile":"Desktop");
722 }
723 else if ((p->CPU.Vendor == 0x756E6547 /* Intel */) &&
724((p->CPU.Family == 0x06) ||
725 (p->CPU.Family == 0x0f)))
726{
727if ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0c) ||
728(p->CPU.Family == 0x0f && p->CPU.Model >= 0x03))
729{
730/* Nehalem CPU model */
731if (p->CPU.Family == 0x06 && (p->CPU.Model == CPUID_MODEL_NEHALEM ||
732 p->CPU.Model == CPUID_MODEL_FIELDS ||
733 p->CPU.Model == CPUID_MODEL_DALES ||
734 p->CPU.Model == CPUID_MODEL_DALES_32NM ||
735 p->CPU.Model == CPUID_MODEL_WESTMERE ||
736 p->CPU.Model == CPUID_MODEL_NEHALEM_EX ||
737 p->CPU.Model == CPUID_MODEL_WESTMERE_EX ||
738 p->CPU.Model == CPUID_MODEL_SANDYBRIDGE ||
739 p->CPU.Model == CPUID_MODEL_JAKETOWN))
740{
741uint8_tbus_ratio_max = 0, bus_ratio_min = 0;
742uint32_tmax_ratio = 0;
743uint64_tflex_ratio = 0;
744msr = rdmsr64(MSR_PLATFORM_INFO);
745#if DEBUG_CPU
746DBG("msr(%d): platform_info %08x\n", __LINE__, msr & 0xffffffff);
747#endif
748bus_ratio_max = (msr >> 8) & 0xff;
749bus_ratio_min = (msr >> 40) & 0xff;
750msr = rdmsr64(MSR_FLEX_RATIO);
751#if DEBUG_CPU
752DBG("msr(%d): flex_ratio %08x\n", __LINE__, msr & 0xffffffff);
753#endif
754if ((msr >> 16) & 0x01)
755{
756flex_ratio = (msr >> 8) & 0xff;
757/* bcc9: at least on the gigabyte h67ma-ud2h,
758 where the cpu multipler can't be changed to
759 allow overclocking, the flex_ratio msr has unexpected (to OSX)
760 contents. These contents cause mach_kernel to
761 fail to compute the bus ratio correctly, instead
762 causing the system to crash since tscGranularity
763 is inadvertently set to 0.
764 */
765if (flex_ratio == 0)
766{
767/* Clear bit 16 (evidently the
768 presence bit) */
769wrmsr64(MSR_FLEX_RATIO, (msr & 0xFFFFFFFFFFFEFFFFULL));
770msr = rdmsr64(MSR_FLEX_RATIO);
771#if DEBUG_CPU
772DBG("Unusable flex ratio detected. MSR Patched to %08x\n", msr & 0xffffffff);
773#endif
774}
775else
776{
777if (bus_ratio_max > flex_ratio)
778{
779bus_ratio_max = flex_ratio;
780}
781}
782}
783#ifdef LEGACY_CPU
784if (bus_ratio_max)
785{
786fsbFrequency = (tscFrequency / bus_ratio_max);
787}
788#endif
789//valv: Turbo Ratio Limit
790if ((p->CPU.Model != 0x2e) && (p->CPU.Model != 0x2f))
791{
792//msr = rdmsr64(MSR_TURBO_RATIO_LIMIT);
793cpuFrequency = bus_ratio_max * fsbFrequency;
794max_ratio = bus_ratio_max * 10;
795}
796else
797{
798#ifdef LEGACY_CPU
799cpuFrequency = tscFrequency;
800#else
801cpuFrequency = bus_ratio_max * fsbFrequency;
802#endif
803}
804#if DEBUG_CPU
805DBG("Sticking with [BCLK: %dMhz, Bus-Ratio: %d]\n", fsbFrequency / 1000000, max_ratio);
806#endif
807currcoef = bus_ratio_max;
808
809 tscFrequency = cpuFrequency;
810}
811else
812{
813msr = rdmsr64(MSR_IA32_PERF_STATUS);
814#if DEBUG_CPU
815DBG("msr(%d): ia32_perf_stat 0x%08x\n", __LINE__, msr & 0xffffffff);
816#endif
817currcoef = (msr >> 8) & 0x1f;
818/* Non-integer bus ratio for the max-multi*/
819maxdiv = (msr >> 46) & 0x01;
820/* Non-integer bus ratio for the current-multi (undocumented)*/
821currdiv = (msr >> 14) & 0x01;
822
823if ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0e) ||
824(p->CPU.Family == 0x0f)) // This will always be model >= 3
825{
826/* On these models, maxcoef defines TSC freq */
827maxcoef = (msr >> 40) & 0x1f;
828}
829else
830{
831/* On lower models, currcoef defines TSC freq */
832/* XXX */
833maxcoef = currcoef;
834}
835if (!currcoef) currcoef = maxcoef;
836#ifdef LEGACY_CPU
837if (maxcoef)
838{
839
840if (maxdiv)
841{
842fsbFrequency = ((tscFrequency * 2) / ((maxcoef * 2) + 1));
843}
844else
845{
846fsbFrequency = (tscFrequency / maxcoef);
847}
848
849if (currdiv)
850{
851cpuFrequency = (fsbFrequency * ((currcoef * 2) + 1) / 2);
852}
853else
854{
855cpuFrequency = (fsbFrequency * currcoef);
856}
857#if DEBUG_CPU
858DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");
859#endif
860}
861#else
862
863
864if (currdiv)
865{
866cpuFrequency = (fsbFrequency * ((currcoef * 2) + 1) / 2);
867}
868else
869{
870cpuFrequency = (fsbFrequency * currcoef);
871}
872
873if (maxcoef)
874{
875if (maxdiv)
876{
877tscFrequency = (fsbFrequency * ((maxcoef * 2) + 1)) / 2;
878}
879else
880{
881tscFrequency = fsbFrequency * maxcoef;
882}
883}
884#if DEBUG_CPU
885DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");
886#endif
887
888#endif // LEGACY_CPU
889
890}
891}
892 /* Mobile CPU ? */
893//Slice
894 p->CPU.isMobile = false;
895switch (p->CPU.Model)
896{
897case 0x0D:
898p->CPU.isMobile = true;
899break;
900case 0x02:
901case 0x03:
902case 0x04:
903case 0x06:
904p->CPU.isMobile = (rdmsr64(0x2C) & (1 << 21));
905break;
906default:
907p->CPU.isMobile = (rdmsr64(0x17) & (1 << 28));
908break;
909}
910
911DBG("%s platform detected.\n", p->CPU.isMobile?"Mobile":"Desktop");
912}
913
914if (!cpuFrequency) cpuFrequency = tscFrequency;
915 if (!tscFrequency) tscFrequency = cpuFrequency;
916
917p->CPU.MaxCoef = maxcoef;
918p->CPU.MaxDiv = maxdiv;
919p->CPU.CurrCoef = currcoef;
920p->CPU.CurrDiv = currdiv;
921
922 p->CPU.TSCFrequency = tscFrequency ;
923p->CPU.FSBFrequency = fsbFrequency ;
924p->CPU.CPUFrequency = cpuFrequency ;
925
926DBG("CPU: Vendor/Model/ExtModel: 0x%x/0x%x/0x%x\n", p->CPU.Vendor, p->CPU.Model, p->CPU.ExtModel);
927DBG("CPU: Family/ExtFamily: 0x%x/0x%x\n", p->CPU.Family, p->CPU.ExtFamily);
928 if (p->CPU.Vendor == 0x68747541 /* AMD */)
929 {
930 DBG("CPU (AMD): TSCFreq: %dMHz\n", p->CPU.TSCFrequency / 1000000);
931 DBG("CPU (AMD): FSBFreq: %dMHz\n", p->CPU.FSBFrequency / 1000000);
932 DBG("CPU (AMD): CPUFreq: %dMHz\n", p->CPU.CPUFrequency / 1000000);
933 DBG("CPU (AMD): MaxCoef/CurrCoef: 0x%x/0x%x\n", p->CPU.MaxCoef, p->CPU.CurrCoef);
934 DBG("CPU (AMD): MaxDiv/CurrDiv: 0x%x/0x%x\n", p->CPU.MaxDiv, p->CPU.CurrDiv);
935 }
936 else
937 {
938 DBG("CPU: TSCFreq: %dMHz\n", p->CPU.TSCFrequency / 1000000);
939 DBG("CPU: FSBFreq: %dMHz\n", p->CPU.FSBFrequency / 1000000);
940 DBG("CPU: CPUFreq: %dMHz\n", p->CPU.CPUFrequency / 1000000);
941 DBG("CPU: MaxCoef/CurrCoef: 0x%x/0x%x\n", p->CPU.MaxCoef, p->CPU.CurrCoef);
942 DBG("CPU: MaxDiv/CurrDiv: 0x%x/0x%x\n", p->CPU.MaxDiv, p->CPU.CurrDiv);
943 }
944
945DBG("CPU: NoCores/NoThreads: %d/%d\n", p->CPU.NoCores, p->CPU.NoThreads);
946DBG("CPU: Features: 0x%08x\n", p->CPU.Features);
947 DBG("CPU: ExtFeatures: 0x%08x\n", p->CPU.ExtFeatures);
948
949 if (p->CPU.Vendor == 0x756E6547 /* Intel */)
950 DBG("CPU: MicrocodeVersion: %d\n", p->CPU.MicrocodeVersion);
951
952#if DEBUG_CPU
953pause();
954#endif
955
956}
957

Archive Download this file

Revision: 1804