Chameleon

Chameleon Svn Source Tree

Root/branches/cparm/i386/libsaio/cpu_intel_amd.c

1/*
2 * Copyright 2008 Islam Ahmed Zaid. All rights reserved. <azismed@gmail.com>
3 * AsereBLN: 2009: cleanup and bugfix
4 */
5
6#include "libsaio.h"
7#include "platform.h"
8#include "cpu.h"
9
10#ifndef DEBUG_CPU
11#define DEBUG_CPU 0
12#endif
13
14#if DEBUG_CPU
15#define DBG(x...)printf(x)
16#else
17#define DBG(x...)msglog(x)
18#endif
19
20//#define LEGACY_CPU
21
22// DFE: enable_PIT2 and disable_PIT2 come from older xnu
23
24/*
25 * Enable or disable timer 2.
26 * Port 0x61 controls timer 2:
27 * bit 0 gates the clock,
28 * bit 1 gates output to speaker.
29 */
30static inline void enable_PIT2(void)
31{
32 /* Enable gate, disable speaker */
33 __asm__ volatile(
34 " inb $0x61,%%al \n\t"
35 " and $0xFC,%%al \n\t" /* & ~0x03 */
36 " or $1,%%al \n\t"
37 " outb %%al,$0x61 \n\t"
38 : : : "%al" );
39}
40
41static inline void disable_PIT2(void)
42{
43 /* Disable gate and output to speaker */
44 __asm__ volatile(
45 " inb $0x61,%%al \n\t"
46 " and $0xFC,%%al \n\t"/* & ~0x03 */
47 " outb %%al,$0x61 \n\t"
48 : : : "%al" );
49}
50
51// DFE: set_PIT2_mode0, poll_PIT2_gate, and measure_tsc_frequency are
52// roughly based on Linux code
53
54/* Set the 8254 channel 2 to mode 0 with the specified value.
55 In mode 0, the counter will initially set its gate low when the
56 timer expires. For this to be useful, you ought to set it high
57 before calling this function. The enable_PIT2 function does this.
58 */
59static inline void set_PIT2_mode0(uint16_t value)
60{
61 __asm__ volatile(
62 " movb $0xB0,%%al \n\t"
63 " outb%%al,$0x43\n\t"
64 " movb%%dl,%%al\n\t"
65 " outb%%al,$0x42\n\t"
66 " movb%%dh,%%al\n\t"
67 " outb%%al,$0x42"
68 : : "d"(value) /*: no clobber */ );
69}
70
71/* Returns the number of times the loop ran before the PIT2 signaled */
72static inline unsigned long poll_PIT2_gate(void)
73{
74 unsigned long count = 0;
75 unsigned char nmi_sc_val;
76 do {
77 ++count;
78 __asm__ volatile(
79 "inb$0x61,%0"
80 : "=q"(nmi_sc_val) /*:*/ /* no input */ /*:*/ /* no clobber */);
81 } while( (nmi_sc_val & 0x20) == 0);
82 return count;
83}
84
85#ifdef LEGACY_CPU
86static uint64_t measure_tsc_frequency(void);
87/*
88 * DFE: Measures the TSC frequency in Hz (64-bit) using the ACPI PM timer
89 */
90static uint64_t measure_tsc_frequency(void)
91{
92 uint64_t tscStart;
93 uint64_t tscEnd;
94 uint64_t tscDelta = 0xffffffffffffffffULL;
95 unsigned long pollCount;
96 uint64_t retval = 0;
97 int i;
98
99 /* Time how many TSC ticks elapse in 30 msec using the 8254 PIT
100 * counter 2. We run this loop 3 times to make sure the cache
101 * is hot and we take the minimum delta from all of the runs.
102 * That is to say that we're biased towards measuring the minimum
103 * number of TSC ticks that occur while waiting for the timer to
104 * expire. That theoretically helps avoid inconsistencies when
105 * running under a VM if the TSC is not virtualized and the host
106 * steals time. The TSC is normally virtualized for VMware.
107 */
108 for(i = 0; i < 10; ++i)
109 {
110 enable_PIT2();
111 set_PIT2_mode0(CALIBRATE_LATCH);
112 tscStart = rdtsc64();
113 pollCount = poll_PIT2_gate();
114 tscEnd = rdtsc64();
115 /* The poll loop must have run at least a few times for accuracy */
116 if(pollCount <= 1)
117 continue;
118 /* The TSC must increment at LEAST once every millisecond. We
119 * should have waited exactly 30 msec so the TSC delta should
120 * be >= 30. Anything less and the processor is way too slow.
121 */
122 if((tscEnd - tscStart) <= CALIBRATE_TIME_MSEC)
123 continue;
124 // tscDelta = min(tscDelta, (tscEnd - tscStart))
125 if( (tscEnd - tscStart) < tscDelta )
126 tscDelta = tscEnd - tscStart;
127 }
128 /* tscDelta is now the least number of TSC ticks the processor made in
129 * a timespan of 0.03 s (e.g. 30 milliseconds)
130 * Linux thus divides by 30 which gives the answer in kiloHertz because
131 * 1 / ms = kHz. But we're xnu and most of the rest of the code uses
132 * Hz so we need to convert our milliseconds to seconds. Since we're
133 * dividing by the milliseconds, we simply multiply by 1000.
134 */
135
136 /* Unlike linux, we're not limited to 32-bit, but we do need to take care
137 * that we're going to multiply by 1000 first so we do need at least some
138 * arithmetic headroom. For now, 32-bit should be enough.
139 * Also unlike Linux, our compiler can do 64-bit integer arithmetic.
140 */
141 if(tscDelta > (1ULL<<32))
142 retval = 0;
143 else
144 {
145 retval = tscDelta * 1000 / 30;
146 }
147 disable_PIT2();
148 return retval;
149}
150#endif
151
152
153#define MSR_AMD_APERF 0x000000E8
154/*
155 * Original comment/code:
156 * "DFE: Measures the Max Performance Frequency in Hz (64-bit)"
157 *
158 * Measures the Actual Performance Frequency in Hz (64-bit)
159 * (just a naming change, mperf --> aperf )
160 */
161static uint64_t measure_aperf_frequency(void)
162{
163uint64_t aperfStart;
164uint64_t aperfEnd;
165uint64_t aperfDelta = 0xffffffffffffffffULL;
166unsigned long pollCount;
167uint64_t retval = 0;
168int i;
169
170/* Time how many APERF ticks elapse in 30 msec using the 8254 PIT
171 * counter 2. We run this loop 3 times to make sure the cache
172 * is hot and we take the minimum delta from all of the runs.
173 * That is to say that we're biased towards measuring the minimum
174 * number of APERF ticks that occur while waiting for the timer to
175 * expire.
176 */
177for(i = 0; i < 10; ++i)
178{
179enable_PIT2();
180set_PIT2_mode0(CALIBRATE_LATCH);
181aperfStart = rdmsr64(MSR_AMD_APERF);
182pollCount = poll_PIT2_gate();
183aperfEnd = rdmsr64(MSR_AMD_APERF);
184/* The poll loop must have run at least a few times for accuracy */
185if (pollCount <= 1)
186continue;
187/* The TSC must increment at LEAST once every millisecond.
188 * We should have waited exactly 30 msec so the APERF delta should
189 * be >= 30. Anything less and the processor is way too slow.
190 */
191if ((aperfEnd - aperfStart) <= CALIBRATE_TIME_MSEC)
192continue;
193// tscDelta = MIN(tscDelta, (tscEnd - tscStart))
194if ( (aperfEnd - aperfStart) < aperfDelta )
195aperfDelta = aperfEnd - aperfStart;
196}
197/* mperfDelta is now the least number of MPERF ticks the processor made in
198 * a timespan of 0.03 s (e.g. 30 milliseconds)
199 */
200
201if (aperfDelta > (1ULL<<32))
202retval = 0;
203else
204{
205retval = aperfDelta * 1000 / 30;
206}
207disable_PIT2();
208return retval;
209}
210
211
212/*
213 License for x2apic_enabled, get_apicbase, compute_bclk.
214
215 Copyright (c) 2010, Intel Corporation
216 All rights reserved.
217
218 Redistribution and use in source and binary forms, with or without
219 modification, are permitted provided that the following conditions are met:
220
221 * Redistributions of source code must retain the above copyright notice,
222 this list of conditions and the following disclaimer.
223 * Redistributions in binary form must reproduce the above copyright notice,
224 this list of conditions and the following disclaimer in the documentation
225 and/or other materials provided with the distribution.
226 * Neither the name of Intel Corporation nor the names of its contributors
227 may be used to endorse or promote products derived from this software
228 without specific prior written permission.
229
230 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
231 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
232 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
233 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
234 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
235 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
236 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
237 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
238 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
239 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
240 */
241static inline __attribute__((always_inline)) void rdmsr32(uint32_t msr, uint32_t * lo_data_addr, uint32_t * hi_data_addr);
242static inline __attribute__((always_inline)) void wrmsr32(uint32_t msr, uint32_t lo_data, uint32_t hi_data);
243static uint32_t x2apic_enabled(void);
244static uint32_t get_apicbase(void);
245static uint32_t compute_bclk(void);
246static inline __attribute__((always_inline)) void rdmsr32(uint32_t msr, uint32_t * lo_data_addr, uint32_t * hi_data_addr)
247{
248 __asm__ volatile(
249 "rdmsr"
250 : "=a" (*lo_data_addr), "=d" (*hi_data_addr)
251 : "c" (msr)
252 );
253}
254static inline __attribute__((always_inline)) void wrmsr32(uint32_t msr, uint32_t lo_data, uint32_t hi_data)
255{
256 __asm__ __volatile__ (
257 "wrmsr"
258 : /* No outputs */
259 : "c" (msr), "a" (lo_data), "d" (hi_data)
260 );
261}
262#define MSR_APIC_BASE 0x1B
263#define APIC_TMR_INITIAL_CNT 0x380
264#define APIC_TMR_CURRENT_CNT 0x390
265#define APIC_TMR_DIVIDE_CFG 0x3E0
266#define MSR_APIC_TMR_INITIAL_CNT 0x838
267#define MSR_APIC_TMR_CURRENT_CNT 0x839
268#define MSR_APIC_TMR_DIVIDE_CFG 0x83E
269static uint32_t x2apic_enabled(void)
270{
271 uint64_t temp64;
272
273 temp64 = rdmsr64(MSR_APIC_BASE);
274
275 return (uint32_t) (temp64 & (1 << 10)) ? 1 : 0;
276}
277static uint32_t get_apicbase(void)
278{
279 uint64_t temp64;
280
281 temp64 = rdmsr64(MSR_APIC_BASE);
282
283 return (uint32_t) (temp64 & 0xfffff000);
284}
285static uint32_t compute_bclk(void)
286{
287 uint32_t dummy;
288 uint32_t start, stop;
289 uint8_t temp8;
290 uint16_t delay_count;
291 uint32_t bclk;
292
293#define DELAY_IN_US 1000
294
295 // Compute fixed delay as time
296 // delay count = desired time * PIT frequency
297 // PIT frequency = 1.193182 MHz
298 delay_count = 1193182 / DELAY_IN_US;
299
300 // PIT channel 2 gate is controlled by IO port 0x61, bit 0
301#define PIT_CH2_LATCH_REG 0x61
302#define CH2_SPEAKER (1 << 1) // bit 1 -- 1 = speaker enabled 0 = speaker disabled
303#define CH2_GATE_IN (1 << 0) // bit 0 -- 1 = gate enabled, 0 = gate disabled
304#define CH2_GATE_OUT (1 << 5) // bit 5 -- 1 = gate latched, 0 = gate not latched
305
306 // PIT Command register
307#define PIT_MODE_COMMAND_REG 0x43
308#define SELECT_CH2 (2 << 6)
309#define ACCESS_MODE_LOBYTE_HIBYTE (3 << 4)
310#define MODE0_INTERRUPT_ON_TERMINAL_COUNT 0 // Despite name, no interrupts on CH2
311
312 // PIT Channel 2 data port
313#define PIT_CH2_DATA 0x42
314
315 // Disable the PIT channel 2 speaker and gate
316 temp8 = inb(PIT_CH2_LATCH_REG);
317 temp8 &= ~(CH2_SPEAKER | CH2_GATE_IN);
318 outb(PIT_CH2_LATCH_REG, temp8);
319
320 // Setup command and mode
321 outb(PIT_MODE_COMMAND_REG, SELECT_CH2 | ACCESS_MODE_LOBYTE_HIBYTE | MODE0_INTERRUPT_ON_TERMINAL_COUNT);
322
323 // Set time for fixed delay
324 outb(PIT_CH2_DATA, (uint8_t) (delay_count));
325 outb(PIT_CH2_DATA, (uint8_t) (delay_count >> 8));
326
327 // Prepare to enable channel 2 gate but leave the speaker disabled
328 temp8 = inb(PIT_CH2_LATCH_REG);
329 temp8 &= ~CH2_SPEAKER;
330 temp8 |= CH2_GATE_IN;
331
332 if (x2apic_enabled())
333{
334 // Set APIC Timer Divide Value as 2
335 wrmsr32(MSR_APIC_TMR_DIVIDE_CFG, 0, 0);
336
337 // start APIC timer with a known value
338 start = ~0UL;
339 wrmsr32(MSR_APIC_TMR_INITIAL_CNT, start, 0);
340 }
341 else
342{
343 // Set APIC Timer Divide Value as 2
344 *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_DIVIDE_CFG) = 0UL;
345
346 // start APIC timer with a known value
347 start = ~0UL;
348 *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_INITIAL_CNT) = start;
349 }
350
351 // Actually start the PIT channel 2
352 outb(PIT_CH2_LATCH_REG, temp8);
353
354 // Wait for the fixed delay
355 while (!(inb(PIT_CH2_LATCH_REG) & CH2_GATE_OUT));
356
357 if (x2apic_enabled())
358{
359 // read the APIC timer to determine the change that occurred over this fixed delay
360 rdmsr32(MSR_APIC_TMR_CURRENT_CNT, &stop, &dummy);
361
362 // stop APIC timer
363 wrmsr32(MSR_APIC_TMR_INITIAL_CNT, 0, 0);
364
365 }
366 else
367{
368 // read the APIC timer to determine the change that occurred over this fixed delay
369 stop = *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_CURRENT_CNT);
370
371 // stop APIC timer
372 *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_INITIAL_CNT) = 0UL;
373 }
374
375 // Disable channel 2 speaker and gate input
376 temp8 = inb(PIT_CH2_LATCH_REG);
377 temp8 &= ~(CH2_SPEAKER | CH2_GATE_IN);
378 outb(PIT_CH2_LATCH_REG, temp8);
379
380 bclk = (start - stop) * 2 / DELAY_IN_US;
381
382 // Round bclk to the nearest 100/12 integer value
383 bclk = ((((bclk * 24) + 100) / 200) * 200) / 24;
384
385 return bclk;
386}
387
388
389/*
390 * Calculates the FSB and CPU frequencies using specific MSRs for each CPU
391 * - multi. is read from a specific MSR. In the case of Intel, there is:
392 * a max multi. (used to calculate the FSB freq.),
393 * and a current multi. (used to calculate the CPU freq.)
394 * - FSBFreq = TSCFreq / multi
395 * - CPUFreq = FSBFreq * multi
396 */
397
398void scan_cpu(PlatformInfo_t *p)
399{
400uint64_tmsr = 0;
401
402
403 uint64_tFeatures = 0;// CPU Features like MMX, SSE2, VT ...
404uint64_tExtFeatures = 0; // CPU Extended Features like SYSCALL, XD, EM64T, LAHF ...
405 uint64_tTSCFreq = 0 ;
406 uint64_t FSBFreq = 0 ;
407 uint64_t CPUFreq = 0;
408
409 uint32_treg[4];
410 uint32_t cores_per_package = 0;
411 uint32_t logical_per_package = 0;
412
413 uint32_tVendor = 0;// Vendor
414uint32_tSignature = 0;// Signature
415uint8_t Stepping = 0;// Stepping
416uint8_t Model = 0;// Model
417uint8_t ExtModel = 0;// Extended Model
418uint8_t Family = 0;// Family
419uint8_t ExtFamily = 0;// Extended Family
420uint32_tNoCores = 0;// No Cores per Package
421uint32_tNoThreads = 0;// Threads per Package
422uint8_t Brand = 0;
423uint32_tMicrocodeVersion = 0; // The microcode version number a.k.a. signature a.k.a. BIOS ID
424
425uint8_t isMobile = 0;
426
427boolean_tdynamic_acceleration = 0;
428boolean_tinvariant_APIC_timer = 0;
429boolean_tfine_grain_clock_mod = 0;
430
431uint32_t cpuid_max_basic = 0;
432uint32_t cpuid_max_ext = 0;
433uint32_tsub_Cstates = 0;
434uint32_t extensions = 0;
435
436uint8_tmaxcoef = 0, maxdiv = 0, currcoef = 0, currdiv = 0;
437 charCpuBrandString[48];// 48 Byte Branding String
438
439do_cpuid(0, reg);
440 Vendor= reg[ebx];
441 cpuid_max_basic = reg[eax];
442
443 if (Vendor == CPUID_VENDOR_INTEL)
444 {
445 do_cpuid2(0x00000004, 0, reg);
446 cores_per_package= bitfield(reg[eax], 31, 26) + 1;
447 }
448 else if (Vendor != CPUID_VENDOR_AMD)
449 {
450 stop("Error: CPU unsupported\n");
451 halt();
452 }
453
454 /* get extended cpuid results */
455do_cpuid(0x80000000, reg);
456cpuid_max_ext = reg[eax];
457
458/* Begin of Copyright: from Apple's XNU cpuid.c */
459
460/* get brand string (if supported) */
461if (cpuid_max_ext > 0x80000004)
462{
463 char str[128], *s;
464/*
465 * The brand string 48 bytes (max), guaranteed to
466 * be NUL terminated.
467 */
468do_cpuid(0x80000002, reg);
469bcopy((char *)reg, &str[0], 16);
470do_cpuid(0x80000003, reg);
471bcopy((char *)reg, &str[16], 16);
472do_cpuid(0x80000004, reg);
473bcopy((char *)reg, &str[32], 16);
474for (s = str; *s != '\0'; s++)
475{
476if (*s != ' ') break;
477}
478
479strlcpy(CpuBrandString,s, sizeof(CpuBrandString));
480
481if (!strncmp(CpuBrandString, CPUID_STRING_UNKNOWN, min(sizeof(CpuBrandString), (unsigned)strlen(CPUID_STRING_UNKNOWN) + 1)))
482{
483 /*
484 * This string means we have a firmware-programmable brand string,
485 * and the firmware couldn't figure out what sort of CPU we have.
486 */
487 CpuBrandString[0] = '\0';
488 }
489}
490
491 /*
492 * Get processor signature and decode
493 * and bracket this with the approved procedure for reading the
494 * the microcode version number a.k.a. signature a.k.a. BIOS ID
495 */
496 if (Vendor == CPUID_VENDOR_INTEL )
497 {
498 wrmsr64(MSR_IA32_BIOS_SIGN_ID, 0);
499 do_cpuid(1, reg);
500 MicrocodeVersion =
501 (uint32_t) (rdmsr64(MSR_IA32_BIOS_SIGN_ID) >> 32);
502 }
503 else if (Vendor != CPUID_VENDOR_AMD)
504 do_cpuid(1, reg);
505
506Signature = reg[eax];
507Stepping = bitfield(reg[eax], 3, 0);
508Model = bitfield(reg[eax], 7, 4);
509Family = bitfield(reg[eax], 11, 8);
510ExtModel = bitfield(reg[eax], 19, 16);
511ExtFamily = bitfield(reg[eax], 27, 20);
512Brand = bitfield(reg[ebx], 7, 0);
513Features = quad(reg[ecx], reg[edx]);
514
515 /* Fold extensions into family/model */
516if (Family == 0x0f)
517Family += ExtFamily;
518if (Family == 0x0f || Family == 0x06)
519Model += (ExtModel << 4);
520
521 if (Features & CPUID_FEATURE_HTT)
522logical_per_package =
523 bitfield(reg[ebx], 23, 16);
524else
525logical_per_package = 1;
526
527if (cpuid_max_ext >= 0x80000001)
528{
529do_cpuid(0x80000001, reg);
530ExtFeatures =
531 quad(reg[ecx], reg[edx]);
532
533}
534
535if (cpuid_max_ext >= 0x80000007)
536{
537do_cpuid(0x80000007, reg);
538
539/* Fold in the Invariant TSC feature bit, if present */
540ExtFeatures |=
541 reg[edx] & (uint32_t)CPUID_EXTFEATURE_TSCI;
542
543 if (Vendor == CPUID_VENDOR_AMD)
544 {
545 /* Fold in the Hardware P-State control feature bit, if present */
546 ExtFeatures |=
547 reg[edx] & (uint32_t)_Bit(7);
548
549 /* Fold in the read-only effective frequency interface feature bit, if present */
550 ExtFeatures |=
551 reg[edx] & (uint32_t)_Bit(10);
552 }
553
554}
555
556 if (Vendor == CPUID_VENDOR_AMD )
557 {
558 if (cpuid_max_ext >= 0x80000008)
559 {
560 if (Features & CPUID_FEATURE_HTT)
561 {
562 do_cpuid(0x80000008, reg);
563 cores_per_package= bitfield(reg[ecx], 7 , 0) + 1; // NC + 1
564 }
565 }
566 }
567 if (cpuid_max_basic >= 0x5) {
568/*
569 * Extract the Monitor/Mwait Leaf info:
570 */
571do_cpuid(5, reg);
572 if (Vendor == CPUID_VENDOR_INTEL )
573 {
574 sub_Cstates = reg[edx];
575 }
576
577 extensions = reg[ecx];
578}
579
580 if (Vendor == CPUID_VENDOR_INTEL)
581 {
582 if (cpuid_max_basic >= 0x6)
583 {
584 /*
585 * The thermal and Power Leaf:
586 */
587 do_cpuid(6, reg);
588 dynamic_acceleration = bitfield(reg[eax], 1, 1); // "Dynamic Acceleration Technology (Turbo Mode)"
589 invariant_APIC_timer = bitfield(reg[eax], 2, 2); // "Invariant APIC Timer"
590 fine_grain_clock_mod = bitfield(reg[eax], 4, 4);
591 }
592
593 if ((Vendor == 0x756E6547 /* Intel */) &&
594 (Family == 0x06))
595 {
596 /*
597 * Find the number of enabled cores and threads
598 * (which determines whether SMT/Hyperthreading is active).
599 */
600 switch (Model)
601 {
602
603 case CPUID_MODEL_DALES_32NM:
604 case CPUID_MODEL_WESTMERE:
605 case CPUID_MODEL_WESTMERE_EX:
606 {
607 msr = rdmsr64(MSR_CORE_THREAD_COUNT);
608 NoThreads = bitfield((uint32_t)msr, 15, 0);
609 NoCores = bitfield((uint32_t)msr, 19, 16);
610 break;
611 }
612
613 case CPUID_MODEL_NEHALEM:
614 case CPUID_MODEL_FIELDS:
615 case CPUID_MODEL_DALES:
616 case CPUID_MODEL_NEHALEM_EX:
617 case CPUID_MODEL_SANDYBRIDGE:
618 case CPUID_MODEL_JAKETOWN:
619 {
620 msr = rdmsr64(MSR_CORE_THREAD_COUNT);
621 NoThreads = bitfield((uint32_t)msr, 15, 0);
622 NoCores = bitfield((uint32_t)msr, 31, 16);
623 break;
624 }
625 }
626 }
627 }
628
629 if (NoCores == 0)
630{
631 if (p->CPU.Vendor == CPUID_VENDOR_AMD)
632 {
633 if (!cores_per_package) {
634 //legacy method
635 if ((ExtFeatures & _HBit(1)/* CmpLegacy */) && ( Features & CPUID_FEATURE_HTT) )
636 cores_per_package = logical_per_package;
637 else
638 cores_per_package = 1;
639 }
640 }
641NoThreads = logical_per_package;
642NoCores = cores_per_package ? cores_per_package : 1 ;
643}
644
645/* End of Copyright: from Apple's XNU cpuid.c */
646
647FSBFreq = (uint64_t)(compute_bclk() * 1000000);
648
649#ifdef LEGACY_CPU
650TSCFreq = measure_tsc_frequency();
651#endif
652
653 if (Vendor == CPUID_VENDOR_AMD)
654 {
655
656#define K8_FIDVID_STATUS0xC0010042
657#define K10_COFVID_STATUS0xC0010071
658 if (ExtFeatures & _Bit(10))
659 {
660 CPUFreq = measure_aperf_frequency();
661 }
662
663 if ((Vendor == 0x68747541 /* AMD */) && (Family == 0x0f))
664 {
665 switch(ExtFamily)
666 {
667 case 0x00: /* K8 */
668 msr = rdmsr64(K8_FIDVID_STATUS);
669 maxcoef = bitfield(msr, 21, 16) / 2 + 4;
670 currcoef = bitfield(msr, 5, 0) / 2 + 4;
671 break;
672
673 case 0x01: /* K10 */
674 {
675 //uint32_t reg[4];
676 msr = rdmsr64(K10_COFVID_STATUS);
677 /*
678 do_cpuid2(0x00000006, 0, reg);
679 EffFreq: effective frequency interface
680 if (bitfield(reg[ecx], 0, 0) == 1)
681 {
682 uint64_t aperf = measure_aperf_frequency();
683 CPUFreq = aperf;
684 }
685 */
686 // NOTE: tsc runs at the maccoeff (non turbo)
687 //*not* at the turbo frequency.
688 maxcoef = bitfield(msr, 54, 49) / 2 + 4;
689 currcoef = bitfield(msr, 5, 0) + 0x10;
690 currdiv = 2 << bitfield(msr, 8, 6);
691
692 break;
693 }
694 case 0x05: /* K14 */
695 msr = rdmsr64(K10_COFVID_STATUS);
696 currcoef = (bitfield(msr, 54, 49) + 0x10) << 2;
697 currdiv = (bitfield(msr, 8, 4) + 1) << 2;
698 currdiv += bitfield(msr, 3, 0);
699
700 break;
701
702 case 0x02: /* K11 */
703 DBG("K11 detected, but not supported !!!\n");
704 // not implimented
705 break;
706 }
707
708 if (!FSBFreq)
709 {
710 if (maxcoef)
711 {
712 if (currdiv)
713 {
714 if (!currcoef) currcoef = maxcoef;
715 if (!CPUFreq)
716 FSBFreq = ((TSCFreq * currdiv) / currcoef);
717 else
718 FSBFreq = ((CPUFreq * currdiv) / currcoef);
719
720 DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);
721 } else {
722 if (!CPUFreq)
723 FSBFreq = (TSCFreq / maxcoef);
724 else
725 FSBFreq = (CPUFreq / maxcoef);
726 DBG("%d\n", currcoef);
727 }
728 }
729 else if (currcoef)
730 {
731 if (currdiv)
732 {
733 FSBFreq = ((TSCFreq * currdiv) / currcoef);
734 DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);
735 } else {
736 FSBFreq = (TSCFreq / currcoef);
737 DBG("%d\n", currcoef);
738 }
739 }
740 }
741
742 }
743
744 // NOTE: This is not the approved method,
745 // the method provided by AMD is:
746 // if ((PowerNow == enabled (cpuid_max_ext >= 0x80000007)) && (StartupFID(??) != MaxFID(??))) then "mobile processor present"
747
748 if (strstr(CpuBrandString, "obile"))
749 isMobile = true;
750 else
751 isMobile = false;
752
753 DBG("%s platform detected.\n", isMobile?"Mobile":"Desktop");
754 }
755 else if ((Vendor == CPUID_VENDOR_INTEL) &&
756((Family == 0x06) ||
757 (Family == 0x0f)))
758{
759if ((Family == 0x06 && Model >= 0x0c) ||
760(Family == 0x0f && Model >= 0x03))
761{
762/* Nehalem CPU model */
763if (Family == 0x06 && (Model == CPUID_MODEL_NEHALEM ||
764 Model == CPUID_MODEL_FIELDS ||
765 Model == CPUID_MODEL_DALES ||
766 Model == CPUID_MODEL_DALES_32NM ||
767 Model == CPUID_MODEL_WESTMERE ||
768 Model == CPUID_MODEL_NEHALEM_EX ||
769 Model == CPUID_MODEL_WESTMERE_EX ||
770 Model == CPUID_MODEL_SANDYBRIDGE ||
771 Model == CPUID_MODEL_JAKETOWN))
772{
773uint8_tbus_ratio_max = 0, bus_ratio_min = 0;
774uint32_tmax_ratio = 0;
775uint64_tflex_ratio = 0;
776msr = rdmsr64(MSR_PLATFORM_INFO);
777#if DEBUG_CPU
778DBG("msr(%d): platform_info %08x\n", __LINE__, msr & 0xffffffff);
779#endif
780bus_ratio_max = (msr >> 8) & 0xff;
781bus_ratio_min = (msr >> 40) & 0xff;
782msr = rdmsr64(MSR_FLEX_RATIO);
783#if DEBUG_CPU
784DBG("msr(%d): flex_ratio %08x\n", __LINE__, msr & 0xffffffff);
785#endif
786if ((msr >> 16) & 0x01)
787{
788flex_ratio = (msr >> 8) & 0xff;
789/* bcc9: at least on the gigabyte h67ma-ud2h,
790 where the cpu multipler can't be changed to
791 allow overclocking, the flex_ratio msr has unexpected (to OSX)
792 contents. These contents cause mach_kernel to
793 fail to compute the bus ratio correctly, instead
794 causing the system to crash since tscGranularity
795 is inadvertently set to 0.
796 */
797if (flex_ratio == 0)
798{
799/* Clear bit 16 (evidently the
800 presence bit) */
801wrmsr64(MSR_FLEX_RATIO, (msr & 0xFFFFFFFFFFFEFFFFULL));
802msr = rdmsr64(MSR_FLEX_RATIO);
803#if DEBUG_CPU
804DBG("Unusable flex ratio detected. MSR Patched to %08x\n", msr & 0xffffffff);
805#endif
806}
807else
808{
809if (bus_ratio_max > flex_ratio)
810{
811bus_ratio_max = flex_ratio;
812}
813}
814}
815#ifdef LEGACY_CPU
816if (bus_ratio_max)
817{
818FSBFreq = (TSCFreq / bus_ratio_max);
819}
820#endif
821//valv: Turbo Ratio Limit
822if ((Model != 0x2e) && (Model != 0x2f))
823{
824//msr = rdmsr64(MSR_TURBO_RATIO_LIMIT);
825CPUFreq = bus_ratio_max * FSBFreq;
826max_ratio = bus_ratio_max * 10;
827}
828else
829{
830#ifdef LEGACY_CPU
831CPUFreq = TSCFreq;
832#else
833CPUFreq = bus_ratio_max * FSBFreq;
834#endif
835}
836#if DEBUG_CPU
837DBG("Sticking with [BCLK: %dMhz, Bus-Ratio: %d]\n", FSBFreq / 1000000, max_ratio);
838#endif
839currcoef = bus_ratio_max;
840
841 TSCFreq = CPUFreq;
842}
843else
844{
845msr = rdmsr64(MSR_IA32_PERF_STATUS);
846#if DEBUG_CPU
847DBG("msr(%d): ia32_perf_stat 0x%08x\n", __LINE__, msr & 0xffffffff);
848#endif
849currcoef = (msr >> 8) & 0x1f;
850/* Non-integer bus ratio for the max-multi*/
851maxdiv = (msr >> 46) & 0x01;
852/* Non-integer bus ratio for the current-multi (undocumented)*/
853currdiv = (msr >> 14) & 0x01;
854
855if ((Family == 0x06 && Model >= 0x0e) ||
856(Family == 0x0f)) // This will always be model >= 3
857{
858/* On these models, maxcoef defines TSC freq */
859maxcoef = (msr >> 40) & 0x1f;
860}
861else
862{
863/* On lower models, currcoef defines TSC freq */
864/* XXX */
865maxcoef = currcoef;
866}
867if (!currcoef) currcoef = maxcoef;
868#ifdef LEGACY_CPU
869if (maxcoef)
870{
871
872if (maxdiv)
873{
874FSBFreq = ((TSCFreq * 2) / ((maxcoef * 2) + 1));
875}
876else
877{
878FSBFreq = (TSCFreq / maxcoef);
879}
880
881if (currdiv)
882{
883CPUFreq = (FSBFreq * ((currcoef * 2) + 1) / 2);
884}
885else
886{
887CPUFreq = (FSBFreq * currcoef);
888}
889#if DEBUG_CPU
890DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");
891#endif
892}
893#else
894
895
896if (currdiv)
897{
898CPUFreq = (FSBFreq * ((currcoef * 2) + 1) / 2);
899}
900else
901{
902CPUFreq = (FSBFreq * currcoef);
903}
904
905if (maxcoef)
906{
907if (maxdiv)
908{
909TSCFreq = (FSBFreq * ((maxcoef * 2) + 1)) / 2;
910}
911else
912{
913TSCFreq = FSBFreq * maxcoef;
914}
915}
916#if DEBUG_CPU
917DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");
918#endif
919
920#endif // LEGACY_CPU
921
922}
923}
924 /* Mobile CPU ? */
925//Slice
926 isMobile = false;
927switch (Model)
928{
929case 0x0D:
930isMobile = true;
931break;
932case 0x02:
933case 0x03:
934case 0x04:
935case 0x06:
936isMobile = (rdmsr64(0x2C) & (1 << 21));
937break;
938default:
939isMobile = (rdmsr64(0x17) & (1 << 28));
940break;
941}
942
943DBG("%s platform detected.\n",isMobile?"Mobile":"Desktop");
944}
945
946if (!CPUFreq) CPUFreq = TSCFreq;
947 if (!TSCFreq) TSCFreq = CPUFreq;
948
949if (Vendor == CPUID_VENDOR_INTEL) {
950set_env(envDynamicAcceleration, dynamic_acceleration);
951set_env(envInvariantAPICTimer, invariant_APIC_timer);
952set_env(envFineGrainClockMod, fine_grain_clock_mod);
953set_env(envMicrocodeVersion, MicrocodeVersion);
954set_env(envSubCstates, sub_Cstates);
955}
956set_env(envVendor, Vendor);
957 set_env(envModel, Model);
958 set_env(envExtModel, ExtModel);
959
960set_env(envCPUIDMaxBasic, cpuid_max_basic);
961set_env(envCPUIDMaxBasic, cpuid_max_ext);
962
963 set_env_copy(envBrandString, CpuBrandString, sizeof(CpuBrandString));
964set_env(envSignature, Signature);
965set_env(envStepping, Stepping);
966set_env(envFamily, Family);
967set_env(envExtModel, ExtModel);
968set_env(envExtFamily, ExtFamily);
969set_env(envBrand, Brand);
970set_env(envFeatures, Features);
971 set_env(envExtFeatures, ExtFeatures);
972
973set_env(envExtensions, extensions);
974
975set_env(envNoThreads, NoThreads);
976set_env(envNoCores, NoCores);
977set_env(envIsMobile, isMobile);
978
979set_env(envMaxCoef, maxcoef);
980set_env(envMaxDiv, maxdiv);
981set_env(envCurrCoef, currcoef);
982set_env(envCurrDiv, currdiv);
983set_env(envTSCFreq, TSCFreq);
984set_env(envFSBFreq, FSBFreq);
985set_env(envCPUFreq, CPUFreq);
986
987}
988

Archive Download this file

Revision: 2006