Chameleon

Chameleon Svn Source Tree

Root/branches/cparm/i386/libsaio/cpu_intel_amd.c

1/*
2 * Copyright 2008 Islam Ahmed Zaid. All rights reserved. <azismed@gmail.com>
3 * AsereBLN: 2009: cleanup and bugfix
4 */
5
6#include "libsaio.h"
7#include "platform.h"
8#include "cpu.h"
9
10#ifndef DEBUG_CPU
11#define DEBUG_CPU 0
12#endif
13
14#if DEBUG_CPU
15#define DBG(x...)printf(x)
16#else
17#define DBG(x...)msglog(x)
18#endif
19
20//#define LEGACY_CPU
21
22// DFE: enable_PIT2 and disable_PIT2 come from older xnu
23
24/*
25 * Enable or disable timer 2.
26 * Port 0x61 controls timer 2:
27 * bit 0 gates the clock,
28 * bit 1 gates output to speaker.
29 */
30static inline void enable_PIT2(void)
31{
32 /* Enable gate, disable speaker */
33 __asm__ volatile(
34 " inb $0x61,%%al \n\t"
35 " and $0xFC,%%al \n\t" /* & ~0x03 */
36 " or $1,%%al \n\t"
37 " outb %%al,$0x61 \n\t"
38 : : : "%al" );
39}
40
41static inline void disable_PIT2(void)
42{
43 /* Disable gate and output to speaker */
44 __asm__ volatile(
45 " inb $0x61,%%al \n\t"
46 " and $0xFC,%%al \n\t"/* & ~0x03 */
47 " outb %%al,$0x61 \n\t"
48 : : : "%al" );
49}
50
51// DFE: set_PIT2_mode0, poll_PIT2_gate, and measure_tsc_frequency are
52// roughly based on Linux code
53
54/* Set the 8254 channel 2 to mode 0 with the specified value.
55 In mode 0, the counter will initially set its gate low when the
56 timer expires. For this to be useful, you ought to set it high
57 before calling this function. The enable_PIT2 function does this.
58 */
59static inline void set_PIT2_mode0(uint16_t value)
60{
61 __asm__ volatile(
62 " movb $0xB0,%%al \n\t"
63 " outb%%al,$0x43\n\t"
64 " movb%%dl,%%al\n\t"
65 " outb%%al,$0x42\n\t"
66 " movb%%dh,%%al\n\t"
67 " outb%%al,$0x42"
68 : : "d"(value) /*: no clobber */ );
69}
70
71/* Returns the number of times the loop ran before the PIT2 signaled */
72static inline unsigned long poll_PIT2_gate(void)
73{
74 unsigned long count = 0;
75 unsigned char nmi_sc_val;
76 do {
77 ++count;
78 __asm__ volatile(
79 "inb$0x61,%0"
80 : "=q"(nmi_sc_val) /*:*/ /* no input */ /*:*/ /* no clobber */);
81 } while( (nmi_sc_val & 0x20) == 0);
82 return count;
83}
84
85#ifdef LEGACY_CPU
86static uint64_t measure_tsc_frequency(void);
87/*
88 * DFE: Measures the TSC frequency in Hz (64-bit) using the ACPI PM timer
89 */
90static uint64_t measure_tsc_frequency(void)
91{
92 uint64_t tscStart;
93 uint64_t tscEnd;
94 uint64_t tscDelta = 0xffffffffffffffffULL;
95 unsigned long pollCount;
96 uint64_t retval = 0;
97 int i;
98
99 /* Time how many TSC ticks elapse in 30 msec using the 8254 PIT
100 * counter 2. We run this loop 3 times to make sure the cache
101 * is hot and we take the minimum delta from all of the runs.
102 * That is to say that we're biased towards measuring the minimum
103 * number of TSC ticks that occur while waiting for the timer to
104 * expire. That theoretically helps avoid inconsistencies when
105 * running under a VM if the TSC is not virtualized and the host
106 * steals time. The TSC is normally virtualized for VMware.
107 */
108 for(i = 0; i < 10; ++i)
109 {
110 enable_PIT2();
111 set_PIT2_mode0(CALIBRATE_LATCH);
112 tscStart = rdtsc64();
113 pollCount = poll_PIT2_gate();
114 tscEnd = rdtsc64();
115 /* The poll loop must have run at least a few times for accuracy */
116 if(pollCount <= 1)
117 continue;
118 /* The TSC must increment at LEAST once every millisecond. We
119 * should have waited exactly 30 msec so the TSC delta should
120 * be >= 30. Anything less and the processor is way too slow.
121 */
122 if((tscEnd - tscStart) <= CALIBRATE_TIME_MSEC)
123 continue;
124 // tscDelta = min(tscDelta, (tscEnd - tscStart))
125 if( (tscEnd - tscStart) < tscDelta )
126 tscDelta = tscEnd - tscStart;
127 }
128 /* tscDelta is now the least number of TSC ticks the processor made in
129 * a timespan of 0.03 s (e.g. 30 milliseconds)
130 * Linux thus divides by 30 which gives the answer in kiloHertz because
131 * 1 / ms = kHz. But we're xnu and most of the rest of the code uses
132 * Hz so we need to convert our milliseconds to seconds. Since we're
133 * dividing by the milliseconds, we simply multiply by 1000.
134 */
135
136 /* Unlike linux, we're not limited to 32-bit, but we do need to take care
137 * that we're going to multiply by 1000 first so we do need at least some
138 * arithmetic headroom. For now, 32-bit should be enough.
139 * Also unlike Linux, our compiler can do 64-bit integer arithmetic.
140 */
141 if(tscDelta > (1ULL<<32))
142 retval = 0;
143 else
144 {
145 retval = tscDelta * 1000 / 30;
146 }
147 disable_PIT2();
148 return retval;
149}
150#endif
151
152
153#define MSR_AMD_APERF 0x000000E8
154/*
155 * Original comment/code:
156 * "DFE: Measures the Max Performance Frequency in Hz (64-bit)"
157 *
158 * Measures the Actual Performance Frequency in Hz (64-bit)
159 * (just a naming change, mperf --> aperf )
160 */
161static uint64_t measure_aperf_frequency(void)
162{
163uint64_t aperfStart;
164uint64_t aperfEnd;
165uint64_t aperfDelta = 0xffffffffffffffffULL;
166unsigned long pollCount;
167uint64_t retval = 0;
168int i;
169
170/* Time how many APERF ticks elapse in 30 msec using the 8254 PIT
171 * counter 2. We run this loop 3 times to make sure the cache
172 * is hot and we take the minimum delta from all of the runs.
173 * That is to say that we're biased towards measuring the minimum
174 * number of APERF ticks that occur while waiting for the timer to
175 * expire.
176 */
177for(i = 0; i < 10; ++i)
178{
179enable_PIT2();
180set_PIT2_mode0(CALIBRATE_LATCH);
181aperfStart = rdmsr64(MSR_AMD_APERF);
182pollCount = poll_PIT2_gate();
183aperfEnd = rdmsr64(MSR_AMD_APERF);
184/* The poll loop must have run at least a few times for accuracy */
185if (pollCount <= 1)
186continue;
187/* The TSC must increment at LEAST once every millisecond.
188 * We should have waited exactly 30 msec so the APERF delta should
189 * be >= 30. Anything less and the processor is way too slow.
190 */
191if ((aperfEnd - aperfStart) <= CALIBRATE_TIME_MSEC)
192continue;
193// tscDelta = MIN(tscDelta, (tscEnd - tscStart))
194if ( (aperfEnd - aperfStart) < aperfDelta )
195aperfDelta = aperfEnd - aperfStart;
196}
197/* mperfDelta is now the least number of MPERF ticks the processor made in
198 * a timespan of 0.03 s (e.g. 30 milliseconds)
199 */
200
201if (aperfDelta > (1ULL<<32))
202retval = 0;
203else
204{
205retval = aperfDelta * 1000 / 30;
206}
207disable_PIT2();
208return retval;
209}
210
211
212/*
213 License for x2apic_enabled, get_apicbase, compute_bclk.
214
215 Copyright (c) 2010, Intel Corporation
216 All rights reserved.
217
218 Redistribution and use in source and binary forms, with or without
219 modification, are permitted provided that the following conditions are met:
220
221 * Redistributions of source code must retain the above copyright notice,
222 this list of conditions and the following disclaimer.
223 * Redistributions in binary form must reproduce the above copyright notice,
224 this list of conditions and the following disclaimer in the documentation
225 and/or other materials provided with the distribution.
226 * Neither the name of Intel Corporation nor the names of its contributors
227 may be used to endorse or promote products derived from this software
228 without specific prior written permission.
229
230 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
231 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
232 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
233 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
234 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
235 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
236 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
237 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
238 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
239 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
240 */
241static inline __attribute__((always_inline)) void rdmsr32(uint32_t msr, uint32_t * lo_data_addr, uint32_t * hi_data_addr);
242static inline __attribute__((always_inline)) void wrmsr32(uint32_t msr, uint32_t lo_data, uint32_t hi_data);
243static uint32_t x2apic_enabled(void);
244static uint32_t get_apicbase(void);
245static uint32_t compute_bclk(void);
246static inline __attribute__((always_inline)) void rdmsr32(uint32_t msr, uint32_t * lo_data_addr, uint32_t * hi_data_addr)
247{
248 __asm__ volatile(
249 "rdmsr"
250 : "=a" (*lo_data_addr), "=d" (*hi_data_addr)
251 : "c" (msr)
252 );
253}
254static inline __attribute__((always_inline)) void wrmsr32(uint32_t msr, uint32_t lo_data, uint32_t hi_data)
255{
256 __asm__ __volatile__ (
257 "wrmsr"
258 : /* No outputs */
259 : "c" (msr), "a" (lo_data), "d" (hi_data)
260 );
261}
262#define MSR_APIC_BASE 0x1B
263#define APIC_TMR_INITIAL_CNT 0x380
264#define APIC_TMR_CURRENT_CNT 0x390
265#define APIC_TMR_DIVIDE_CFG 0x3E0
266#define MSR_APIC_TMR_INITIAL_CNT 0x838
267#define MSR_APIC_TMR_CURRENT_CNT 0x839
268#define MSR_APIC_TMR_DIVIDE_CFG 0x83E
269static uint32_t x2apic_enabled(void)
270{
271 uint64_t temp64;
272
273 temp64 = rdmsr64(MSR_APIC_BASE);
274
275 return (uint32_t) (temp64 & (1 << 10)) ? 1 : 0;
276}
277static uint32_t get_apicbase(void)
278{
279 uint64_t temp64;
280
281 temp64 = rdmsr64(MSR_APIC_BASE);
282
283 return (uint32_t) (temp64 & 0xfffff000);
284}
285static uint32_t compute_bclk(void)
286{
287 uint32_t dummy;
288 uint32_t start, stop;
289 uint8_t temp8;
290 uint16_t delay_count;
291 uint32_t bclk;
292
293#define DELAY_IN_US 1000
294
295 // Compute fixed delay as time
296 // delay count = desired time * PIT frequency
297 // PIT frequency = 1.193182 MHz
298 delay_count = 1193182 / DELAY_IN_US;
299
300 // PIT channel 2 gate is controlled by IO port 0x61, bit 0
301#define PIT_CH2_LATCH_REG 0x61
302#define CH2_SPEAKER (1 << 1) // bit 1 -- 1 = speaker enabled 0 = speaker disabled
303#define CH2_GATE_IN (1 << 0) // bit 0 -- 1 = gate enabled, 0 = gate disabled
304#define CH2_GATE_OUT (1 << 5) // bit 5 -- 1 = gate latched, 0 = gate not latched
305
306 // PIT Command register
307#define PIT_MODE_COMMAND_REG 0x43
308#define SELECT_CH2 (2 << 6)
309#define ACCESS_MODE_LOBYTE_HIBYTE (3 << 4)
310#define MODE0_INTERRUPT_ON_TERMINAL_COUNT 0 // Despite name, no interrupts on CH2
311
312 // PIT Channel 2 data port
313#define PIT_CH2_DATA 0x42
314
315 // Disable the PIT channel 2 speaker and gate
316 temp8 = inb(PIT_CH2_LATCH_REG);
317 temp8 &= ~(CH2_SPEAKER | CH2_GATE_IN);
318 outb(PIT_CH2_LATCH_REG, temp8);
319
320 // Setup command and mode
321 outb(PIT_MODE_COMMAND_REG, SELECT_CH2 | ACCESS_MODE_LOBYTE_HIBYTE | MODE0_INTERRUPT_ON_TERMINAL_COUNT);
322
323 // Set time for fixed delay
324 outb(PIT_CH2_DATA, (uint8_t) (delay_count));
325 outb(PIT_CH2_DATA, (uint8_t) (delay_count >> 8));
326
327 // Prepare to enable channel 2 gate but leave the speaker disabled
328 temp8 = inb(PIT_CH2_LATCH_REG);
329 temp8 &= ~CH2_SPEAKER;
330 temp8 |= CH2_GATE_IN;
331
332 if (x2apic_enabled())
333{
334 // Set APIC Timer Divide Value as 2
335 wrmsr32(MSR_APIC_TMR_DIVIDE_CFG, 0, 0);
336
337 // start APIC timer with a known value
338 start = ~0UL;
339 wrmsr32(MSR_APIC_TMR_INITIAL_CNT, start, 0);
340 }
341 else
342{
343 // Set APIC Timer Divide Value as 2
344 *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_DIVIDE_CFG) = 0UL;
345
346 // start APIC timer with a known value
347 start = ~0UL;
348 *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_INITIAL_CNT) = start;
349 }
350
351 // Actually start the PIT channel 2
352 outb(PIT_CH2_LATCH_REG, temp8);
353
354 // Wait for the fixed delay
355 while (!(inb(PIT_CH2_LATCH_REG) & CH2_GATE_OUT));
356
357 if (x2apic_enabled())
358{
359 // read the APIC timer to determine the change that occurred over this fixed delay
360 rdmsr32(MSR_APIC_TMR_CURRENT_CNT, &stop, &dummy);
361
362 // stop APIC timer
363 wrmsr32(MSR_APIC_TMR_INITIAL_CNT, 0, 0);
364
365 }
366 else
367{
368 // read the APIC timer to determine the change that occurred over this fixed delay
369 stop = *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_CURRENT_CNT);
370
371 // stop APIC timer
372 *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_INITIAL_CNT) = 0UL;
373 }
374
375 // Disable channel 2 speaker and gate input
376 temp8 = inb(PIT_CH2_LATCH_REG);
377 temp8 &= ~(CH2_SPEAKER | CH2_GATE_IN);
378 outb(PIT_CH2_LATCH_REG, temp8);
379
380 bclk = (start - stop) * 2 / DELAY_IN_US;
381
382 // Round bclk to the nearest 100/12 integer value
383 bclk = ((((bclk * 24) + 100) / 200) * 200) / 24;
384
385 return bclk;
386}
387
388
389/*
390 * Calculates the FSB and CPU frequencies using specific MSRs for each CPU
391 * - multi. is read from a specific MSR. In the case of Intel, there is:
392 * a max multi. (used to calculate the FSB freq.),
393 * and a current multi. (used to calculate the CPU freq.)
394 * - FSBFreq = TSCFreq / multi
395 * - CPUFreq = FSBFreq * multi
396 */
397
398void scan_cpu(void)
399{
400uint64_tmsr = 0;
401
402
403 uint64_tFeatures = 0;// CPU Features like MMX, SSE2, VT ...
404uint64_tExtFeatures = 0; // CPU Extended Features like SYSCALL, XD, EM64T, LAHF ...
405 uint64_tTSCFreq = 0 ;
406 uint64_t FSBFreq = 0 ;
407 uint64_t CPUFreq = 0;
408
409 uint32_treg[4];
410 uint32_t cores_per_package = 0;
411 uint32_t logical_per_package = 0;
412
413 uint32_tVendor = 0;// Vendor
414uint32_tSignature = 0;// Signature
415uint8_t Stepping = 0;// Stepping
416uint8_t Model = 0;// Model
417uint8_t ExtModel = 0;// Extended Model
418uint8_t Family = 0;// Family
419uint8_t ExtFamily = 0;// Extended Family
420uint32_tNoCores = 0;// No Cores per Package
421uint32_tNoThreads = 0;// Threads per Package
422uint8_t Brand = 0;
423uint32_tMicrocodeVersion = 0; // The microcode version number a.k.a. signature a.k.a. BIOS ID
424
425uint8_t isMobile = 0;
426
427boolean_tdynamic_acceleration = 0;
428boolean_tinvariant_APIC_timer = 0;
429boolean_tfine_grain_clock_mod = 0;
430
431uint32_t cpuid_max_basic = 0;
432uint32_t cpuid_max_ext = 0;
433uint32_tsub_Cstates = 0;
434uint32_t extensions = 0;
435
436uint8_tmaxcoef = 0, maxdiv = 0, currcoef = 0, currdiv = 0;
437 charCpuBrandString[48];// 48 Byte Branding String
438
439do_cpuid(0, reg);
440 Vendor= reg[ebx];
441 cpuid_max_basic = reg[eax];
442
443 if (Vendor == CPUID_VENDOR_INTEL)
444 {
445 do_cpuid2(0x00000004, 0, reg);
446 cores_per_package= bitfield(reg[eax], 31, 26) + 1;
447 }
448 else if (Vendor != CPUID_VENDOR_AMD)
449 {
450 stop("Error: CPU unsupported\n");
451 return;
452 }
453
454 /* get extended cpuid results */
455do_cpuid(0x80000000, reg);
456cpuid_max_ext = reg[eax];
457
458/* Begin of Copyright: from Apple's XNU cpuid.c */
459
460/* get brand string (if supported) */
461if (cpuid_max_ext > 0x80000004)
462{
463 char str[128], *s;
464/*
465 * The brand string 48 bytes (max), guaranteed to
466 * be NUL terminated.
467 */
468do_cpuid(0x80000002, reg);
469bcopy((char *)reg, &str[0], 16);
470do_cpuid(0x80000003, reg);
471bcopy((char *)reg, &str[16], 16);
472do_cpuid(0x80000004, reg);
473bcopy((char *)reg, &str[32], 16);
474for (s = str; *s != '\0'; s++)
475{
476if (*s != ' ') break;
477}
478
479strlcpy(CpuBrandString,s, sizeof(CpuBrandString));
480
481if (!strncmp(CpuBrandString, CPUID_STRING_UNKNOWN, min(sizeof(CpuBrandString), (unsigned)strlen(CPUID_STRING_UNKNOWN) + 1)))
482{
483 /*
484 * This string means we have a firmware-programmable brand string,
485 * and the firmware couldn't figure out what sort of CPU we have.
486 */
487 CpuBrandString[0] = '\0';
488 }
489}
490
491 /*
492 * Get processor signature and decode
493 * and bracket this with the approved procedure for reading the
494 * the microcode version number a.k.a. signature a.k.a. BIOS ID
495 */
496 if (Vendor == CPUID_VENDOR_INTEL )
497 {
498 wrmsr64(MSR_IA32_BIOS_SIGN_ID, 0);
499 do_cpuid(1, reg);
500 MicrocodeVersion =
501 (uint32_t) (rdmsr64(MSR_IA32_BIOS_SIGN_ID) >> 32);
502 }
503 else if (Vendor != CPUID_VENDOR_AMD)
504 do_cpuid(1, reg);
505
506Signature = reg[eax];
507Stepping = bitfield(reg[eax], 3, 0);
508Model = bitfield(reg[eax], 7, 4);
509Family = bitfield(reg[eax], 11, 8);
510ExtModel = bitfield(reg[eax], 19, 16);
511ExtFamily = bitfield(reg[eax], 27, 20);
512Brand = bitfield(reg[ebx], 7, 0);
513Features = quad(reg[ecx], reg[edx]);
514
515 /* Fold extensions into family/model */
516if (Family == 0x0f)
517Family += ExtFamily;
518if (Family == 0x0f || Family == 0x06)
519Model += (ExtModel << 4);
520
521 if (Features & CPUID_FEATURE_HTT)
522logical_per_package =
523 bitfield(reg[ebx], 23, 16);
524else
525logical_per_package = 1;
526
527if (cpuid_max_ext >= 0x80000001)
528{
529do_cpuid(0x80000001, reg);
530ExtFeatures =
531 quad(reg[ecx], reg[edx]);
532
533}
534
535if (cpuid_max_ext >= 0x80000007)
536{
537do_cpuid(0x80000007, reg);
538
539/* Fold in the Invariant TSC feature bit, if present */
540ExtFeatures |=
541 reg[edx] & (uint32_t)CPUID_EXTFEATURE_TSCI;
542
543 if (Vendor == CPUID_VENDOR_AMD)
544 {
545 /* Fold in the Hardware P-State control feature bit, if present */
546ExtFeatures |=
547 reg[edx] & (uint32_t)_Bit(7);
548
549 /* Fold in the read-only effective frequency interface feature bit, if present */
550 ExtFeatures |=
551 reg[edx] & (uint32_t)_Bit(10);
552 }
553
554}
555
556 if (Vendor == CPUID_VENDOR_AMD )
557 {
558 if (cpuid_max_ext >= 0x80000008)
559 {
560 if (Features & CPUID_FEATURE_HTT)
561 {
562 do_cpuid(0x80000008, reg);
563 cores_per_package= bitfield(reg[ecx], 7 , 0) + 1; // NC + 1
564 }
565 }
566 }
567 if (cpuid_max_basic >= 0x5) {
568/*
569 * Extract the Monitor/Mwait Leaf info:
570 */
571do_cpuid(5, reg);
572 if (Vendor == CPUID_VENDOR_INTEL )
573 {
574 sub_Cstates = reg[edx];
575 }
576
577 extensions = reg[ecx];
578}
579
580 if (Vendor == CPUID_VENDOR_INTEL)
581 {
582 if (cpuid_max_basic >= 0x6)
583 {
584 /*
585 * The thermal and Power Leaf:
586 */
587 do_cpuid(6, reg);
588 dynamic_acceleration = bitfield(reg[eax], 1, 1); // "Dynamic Acceleration Technology (Turbo Mode)"
589 invariant_APIC_timer = bitfield(reg[eax], 2, 2); // "Invariant APIC Timer"
590 fine_grain_clock_mod = bitfield(reg[eax], 4, 4);
591 }
592
593 if ((Vendor == 0x756E6547 /* Intel */) &&
594 (Family == 0x06))
595 {
596 /*
597 * Find the number of enabled cores and threads
598 * (which determines whether SMT/Hyperthreading is active).
599 */
600 switch (Model)
601 {
602
603 case CPUID_MODEL_DALES_32NM:
604 case CPUID_MODEL_WESTMERE:
605 case CPUID_MODEL_WESTMERE_EX:
606 {
607 msr = rdmsr64(MSR_CORE_THREAD_COUNT);
608 NoThreads = bitfield((uint32_t)msr, 15, 0);
609 NoCores = bitfield((uint32_t)msr, 19, 16);
610 break;
611 }
612
613 case CPUID_MODEL_NEHALEM:
614 case CPUID_MODEL_FIELDS:
615 case CPUID_MODEL_DALES:
616 case CPUID_MODEL_NEHALEM_EX:
617 case CPUID_MODEL_SANDYBRIDGE:
618 case CPUID_MODEL_JAKETOWN:
619case CPUID_MODEL_IVYBRIDGE:
620 {
621 msr = rdmsr64(MSR_CORE_THREAD_COUNT);
622 NoThreads = bitfield((uint32_t)msr, 15, 0);
623 NoCores = bitfield((uint32_t)msr, 31, 16);
624 break;
625 }
626 }
627 }
628 }
629
630 if (NoCores == 0)
631{
632 if (Vendor == CPUID_VENDOR_AMD)
633 {
634 if (!cores_per_package) {
635 //legacy method
636 if ((ExtFeatures & _HBit(1)/* CmpLegacy */) && ( Features & CPUID_FEATURE_HTT) )
637 cores_per_package = logical_per_package;
638 else
639 cores_per_package = 1;
640 }
641 }
642NoThreads = logical_per_package;
643NoCores = cores_per_package ? cores_per_package : 1 ;
644}
645
646/* End of Copyright: from Apple's XNU cpuid.c */
647
648FSBFreq = (uint64_t)(compute_bclk() * 1000000);
649
650#ifdef LEGACY_CPU
651TSCFreq = measure_tsc_frequency();
652#endif
653
654 if (Vendor == CPUID_VENDOR_AMD)
655 {
656
657#define K8_FIDVID_STATUS0xC0010042
658#define K10_COFVID_STATUS0xC0010071
659 if (ExtFeatures & _Bit(10))
660 {
661 CPUFreq = measure_aperf_frequency();
662 }
663
664 if ((Vendor == CPUID_VENDOR_AMD) && (Family == 0x0f))
665 {
666 switch(ExtFamily)
667 {
668 case 0x00: /* K8 */
669 msr = rdmsr64(K8_FIDVID_STATUS);
670 maxcoef = bitfield(msr, 21, 16) / 2 + 4;
671 currcoef = bitfield(msr, 5, 0) / 2 + 4;
672 break;
673
674 case 0x01: /* K10 */
675 {
676 //uint32_t reg[4];
677 msr = rdmsr64(K10_COFVID_STATUS);
678 /*
679 do_cpuid2(0x00000006, 0, reg);
680 EffFreq: effective frequency interface
681 if (bitfield(reg[ecx], 0, 0) == 1)
682 {
683 uint64_t aperf = measure_aperf_frequency();
684 CPUFreq = aperf;
685 }
686 */
687 // NOTE: tsc runs at the maccoeff (non turbo)
688 //*not* at the turbo frequency.
689 maxcoef = bitfield(msr, 54, 49) / 2 + 4;
690 currcoef = bitfield(msr, 5, 0) + 0x10;
691 currdiv = 2 << bitfield(msr, 8, 6);
692
693 break;
694 }
695 case 0x05: /* K14 */
696 msr = rdmsr64(K10_COFVID_STATUS);
697 currcoef = (bitfield(msr, 54, 49) + 0x10) << 2;
698 currdiv = (bitfield(msr, 8, 4) + 1) << 2;
699 currdiv += bitfield(msr, 3, 0);
700
701 break;
702
703 case 0x02: /* K11 */
704 DBG("K11 detected, but not supported !!!\n");
705 // not implimented
706 break;
707 }
708
709 if (!FSBFreq)
710 {
711 if (maxcoef)
712 {
713 if (currdiv)
714 {
715 if (!currcoef) currcoef = maxcoef;
716 if (!CPUFreq)
717 FSBFreq = ((TSCFreq * currdiv) / currcoef);
718 else
719 FSBFreq = ((CPUFreq * currdiv) / currcoef);
720
721 DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);
722 } else {
723 if (!CPUFreq)
724 FSBFreq = (TSCFreq / maxcoef);
725 else
726 FSBFreq = (CPUFreq / maxcoef);
727 DBG("%d\n", currcoef);
728 }
729 }
730 else if (currcoef)
731 {
732 if (currdiv)
733 {
734 FSBFreq = ((TSCFreq * currdiv) / currcoef);
735 DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);
736 } else {
737 FSBFreq = (TSCFreq / currcoef);
738 DBG("%d\n", currcoef);
739 }
740 }
741 }
742
743 }
744
745 // NOTE: This is not the approved method,
746 // the method provided by AMD is:
747 // if ((PowerNow == enabled (cpuid_max_ext >= 0x80000007)) && (StartupFID(??) != MaxFID(??))) then "mobile processor present"
748
749 if (strstr(CpuBrandString, "obile"))
750 isMobile = true;
751 else
752 isMobile = false;
753
754 DBG("%s platform detected.\n", isMobile?"Mobile":"Desktop");
755 }
756 else if ((Vendor == CPUID_VENDOR_INTEL) &&
757 ((Family == 0x06) ||
758 (Family == 0x0f)))
759{
760if ((Family == 0x06 && Model >= 0x0c) ||
761(Family == 0x0f && Model >= 0x03))
762{
763/* Nehalem CPU model */
764if (Family == 0x06 && (Model == CPUID_MODEL_NEHALEM ||
765 Model == CPUID_MODEL_FIELDS ||
766 Model == CPUID_MODEL_DALES ||
767 Model == CPUID_MODEL_DALES_32NM ||
768 Model == CPUID_MODEL_WESTMERE ||
769 Model == CPUID_MODEL_NEHALEM_EX ||
770 Model == CPUID_MODEL_WESTMERE_EX ||
771 Model == CPUID_MODEL_SANDYBRIDGE ||
772 Model == CPUID_MODEL_JAKETOWN))
773{
774uint8_tbus_ratio_max = 0, bus_ratio_min = 0;
775uint32_tmax_ratio = 0;
776uint64_tflex_ratio = 0;
777msr = rdmsr64(MSR_PLATFORM_INFO);
778#if DEBUG_CPU
779DBG("msr(%d): platform_info %08x\n", __LINE__, msr & 0xffffffff);
780#endif
781bus_ratio_max = (msr >> 8) & 0xff;
782bus_ratio_min = (msr >> 40) & 0xff;
783msr = rdmsr64(MSR_FLEX_RATIO);
784#if DEBUG_CPU
785DBG("msr(%d): flex_ratio %08x\n", __LINE__, msr & 0xffffffff);
786#endif
787if ((msr >> 16) & 0x01)
788{
789flex_ratio = (msr >> 8) & 0xff;
790/* bcc9: at least on the gigabyte h67ma-ud2h,
791 where the cpu multipler can't be changed to
792 allow overclocking, the flex_ratio msr has unexpected (to OSX)
793 contents. These contents cause mach_kernel to
794 fail to compute the bus ratio correctly, instead
795 causing the system to crash since tscGranularity
796 is inadvertently set to 0.
797 */
798if (flex_ratio == 0)
799{
800/* Clear bit 16 (evidently the
801 presence bit) */
802wrmsr64(MSR_FLEX_RATIO, (msr & 0xFFFFFFFFFFFEFFFFULL));
803msr = rdmsr64(MSR_FLEX_RATIO);
804#if DEBUG_CPU
805DBG("Unusable flex ratio detected. MSR Patched to %08x\n", msr & 0xffffffff);
806#endif
807}
808else
809{
810if (bus_ratio_max > flex_ratio)
811{
812bus_ratio_max = flex_ratio;
813}
814}
815}
816#ifdef LEGACY_CPU
817if (bus_ratio_max)
818{
819FSBFreq = (TSCFreq / bus_ratio_max);
820}
821#endif
822//valv: Turbo Ratio Limit
823if ((Model != 0x2e) && (Model != 0x2f))
824{
825//msr = rdmsr64(MSR_TURBO_RATIO_LIMIT);
826CPUFreq = bus_ratio_max * FSBFreq;
827max_ratio = bus_ratio_max * 10;
828}
829else
830{
831#ifdef LEGACY_CPU
832CPUFreq = TSCFreq;
833#else
834CPUFreq = bus_ratio_max * FSBFreq;
835#endif
836}
837#if DEBUG_CPU
838DBG("Sticking with [BCLK: %dMhz, Bus-Ratio: %d]\n", FSBFreq / 1000000, max_ratio);
839#endif
840currcoef = bus_ratio_max;
841
842 TSCFreq = CPUFreq;
843}
844else
845{
846msr = rdmsr64(MSR_IA32_PERF_STATUS);
847#if DEBUG_CPU
848DBG("msr(%d): ia32_perf_stat 0x%08x\n", __LINE__, msr & 0xffffffff);
849#endif
850currcoef = (msr >> 8) & 0x1f;
851/* Non-integer bus ratio for the max-multi*/
852maxdiv = (msr >> 46) & 0x01;
853/* Non-integer bus ratio for the current-multi (undocumented)*/
854currdiv = (msr >> 14) & 0x01;
855
856if ((Family == 0x06 && Model >= 0x0e) ||
857(Family == 0x0f)) // This will always be model >= 3
858{
859/* On these models, maxcoef defines TSC freq */
860maxcoef = (msr >> 40) & 0x1f;
861}
862else
863{
864/* On lower models, currcoef defines TSC freq */
865/* XXX */
866maxcoef = currcoef;
867}
868if (!currcoef) currcoef = maxcoef;
869#ifdef LEGACY_CPU
870if (maxcoef)
871{
872
873if (maxdiv)
874{
875FSBFreq = ((TSCFreq * 2) / ((maxcoef * 2) + 1));
876}
877else
878{
879FSBFreq = (TSCFreq / maxcoef);
880}
881
882if (currdiv)
883{
884CPUFreq = (FSBFreq * ((currcoef * 2) + 1) / 2);
885}
886else
887{
888CPUFreq = (FSBFreq * currcoef);
889}
890#if DEBUG_CPU
891DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");
892#endif
893}
894#else
895
896
897if (currdiv)
898{
899CPUFreq = (FSBFreq * ((currcoef * 2) + 1) / 2);
900}
901else
902{
903CPUFreq = (FSBFreq * currcoef);
904}
905
906if (maxcoef)
907{
908if (maxdiv)
909{
910TSCFreq = (FSBFreq * ((maxcoef * 2) + 1)) / 2;
911}
912else
913{
914TSCFreq = FSBFreq * maxcoef;
915}
916}
917#if DEBUG_CPU
918DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");
919#endif
920
921#endif // LEGACY_CPU
922
923}
924}
925 /* Mobile CPU ? */
926//Slice
927 isMobile = false;
928switch (Model)
929{
930case 0x0D:
931isMobile = true;
932break;
933case 0x02:
934case 0x03:
935case 0x04:
936case 0x06:
937isMobile = (rdmsr64(0x2C) & (1 << 21));
938break;
939default:
940isMobile = (rdmsr64(0x17) & (1 << 28));
941break;
942}
943
944DBG("%s platform detected.\n",isMobile?"Mobile":"Desktop");
945}
946
947if (!CPUFreq) CPUFreq = TSCFreq;
948 if (!TSCFreq) TSCFreq = CPUFreq;
949
950if (Vendor == CPUID_VENDOR_INTEL) {
951set_env(envDynamicAcceleration, dynamic_acceleration);
952set_env(envInvariantAPICTimer, invariant_APIC_timer);
953set_env(envFineGrainClockMod, fine_grain_clock_mod);
954set_env(envMicrocodeVersion, MicrocodeVersion);
955set_env(envSubCstates, sub_Cstates);
956}
957set_env(envVendor, Vendor);
958 set_env(envModel, Model);
959 set_env(envExtModel, ExtModel);
960
961set_env(envCPUIDMaxBasic, cpuid_max_basic);
962set_env(envCPUIDMaxBasic, cpuid_max_ext);
963
964 set_env_ptr(envBrandString, CpuBrandString, sizeof(CpuBrandString));
965set_env(envSignature, Signature);
966set_env(envStepping, Stepping);
967set_env(envFamily, Family);
968set_env(envExtModel, ExtModel);
969set_env(envExtFamily, ExtFamily);
970set_env(envBrand, Brand);
971set_env(envFeatures, Features);
972 set_env(envExtFeatures, ExtFeatures);
973
974set_env(envExtensions, extensions);
975
976set_env(envNoThreads, NoThreads);
977set_env(envNoCores, NoCores);
978set_env(envIsMobile, isMobile);
979
980set_env(envMaxCoef, maxcoef);
981set_env(envMaxDiv, maxdiv);
982set_env(envCurrCoef, currcoef);
983set_env(envCurrDiv, currdiv);
984set_env(envTSCFreq, TSCFreq);
985set_env(envFSBFreq, FSBFreq);
986set_env(envCPUFreq, CPUFreq);
987
988}
989

Archive Download this file

Revision: 2182