Chameleon

Chameleon Svn Source Tree

Root/branches/cparm/i386/libsaio/cpu.c

1/*
2 * Copyright 2008 Islam Ahmed Zaid. All rights reserved. <azismed@gmail.com>
3 * AsereBLN: 2009: cleanup and bugfix
4 */
5
6#include "libsaio.h"
7#include "platform.h"
8#include "cpu.h"
9
10#ifndef DEBUG_CPU
11#define DEBUG_CPU 0
12#endif
13
14#if DEBUG_CPU
15#define DBG(x...)printf(x)
16#else
17#define DBG(x...)msglog(x)
18#endif
19
20//#define AMD_SUPPORT
21
22#ifndef INTEL_SUPPORT
23#define INTEL_SUPPORT 0 //Default (0: nolegacy, 1 : legacy)
24#endif
25
26#ifdef AMD_SUPPORT
27#ifdef LEGACY_CPU
28#undef LEGACY_CPU
29#endif
30#ifdef INTEL_SUPPORT
31#undef INTEL_SUPPORT
32#endif
33#define LEGACY_CPU 1
34#endif
35
36#ifdef INTEL_SUPPORT
37#ifdef LEGACY_CPU
38#undef LEGACY_CPU
39#endif
40#define LEGACY_CPU INTEL_SUPPORT
41#endif
42// (?) : if AMD_SUPPORT then LEGACY_CPU = 1, INTEL_SUPPORT = disabled
43// else LEGACY_CPU = INTEL_SUPPORT
44
45
46#if LEGACY_CPU
47static uint64_t measure_tsc_frequency(void);
48
49// DFE: enable_PIT2 and disable_PIT2 come from older xnu
50
51/*
52 * Enable or disable timer 2.
53 * Port 0x61 controls timer 2:
54 * bit 0 gates the clock,
55 * bit 1 gates output to speaker.
56 */
57static inline void enable_PIT2(void)
58{
59 /* Enable gate, disable speaker */
60 __asm__ volatile(
61 " inb $0x61,%%al \n\t"
62 " and $0xFC,%%al \n\t" /* & ~0x03 */
63 " or $1,%%al \n\t"
64 " outb %%al,$0x61 \n\t"
65 : : : "%al" );
66}
67
68static inline void disable_PIT2(void)
69{
70 /* Disable gate and output to speaker */
71 __asm__ volatile(
72 " inb $0x61,%%al \n\t"
73 " and $0xFC,%%al \n\t"/* & ~0x03 */
74 " outb %%al,$0x61 \n\t"
75 : : : "%al" );
76}
77
78// DFE: set_PIT2_mode0, poll_PIT2_gate, and measure_tsc_frequency are
79// roughly based on Linux code
80
81/* Set the 8254 channel 2 to mode 0 with the specified value.
82 In mode 0, the counter will initially set its gate low when the
83 timer expires. For this to be useful, you ought to set it high
84 before calling this function. The enable_PIT2 function does this.
85 */
86static inline void set_PIT2_mode0(uint16_t value)
87{
88 __asm__ volatile(
89 " movb $0xB0,%%al \n\t"
90 " outb%%al,$0x43\n\t"
91 " movb%%dl,%%al\n\t"
92 " outb%%al,$0x42\n\t"
93 " movb%%dh,%%al\n\t"
94 " outb%%al,$0x42"
95 : : "d"(value) /*: no clobber */ );
96}
97
98/* Returns the number of times the loop ran before the PIT2 signaled */
99static inline unsigned long poll_PIT2_gate(void)
100{
101 unsigned long count = 0;
102 unsigned char nmi_sc_val;
103 do {
104 ++count;
105 __asm__ volatile(
106 "inb$0x61,%0"
107 : "=q"(nmi_sc_val) /*:*/ /* no input */ /*:*/ /* no clobber */);
108 } while( (nmi_sc_val & 0x20) == 0);
109 return count;
110}
111/*
112 * DFE: Measures the TSC frequency in Hz (64-bit) using the ACPI PM timer
113 */
114static uint64_t measure_tsc_frequency(void)
115{
116 uint64_t tscStart;
117 uint64_t tscEnd;
118 uint64_t tscDelta = 0xffffffffffffffffULL;
119 unsigned long pollCount;
120 uint64_t retval = 0;
121 int i;
122
123 /* Time how many TSC ticks elapse in 30 msec using the 8254 PIT
124 * counter 2. We run this loop 3 times to make sure the cache
125 * is hot and we take the minimum delta from all of the runs.
126 * That is to say that we're biased towards measuring the minimum
127 * number of TSC ticks that occur while waiting for the timer to
128 * expire. That theoretically helps avoid inconsistencies when
129 * running under a VM if the TSC is not virtualized and the host
130 * steals time. The TSC is normally virtualized for VMware.
131 */
132 for(i = 0; i < 10; ++i)
133 {
134 enable_PIT2();
135 set_PIT2_mode0(CALIBRATE_LATCH);
136 tscStart = rdtsc64();
137 pollCount = poll_PIT2_gate();
138 tscEnd = rdtsc64();
139 /* The poll loop must have run at least a few times for accuracy */
140 if(pollCount <= 1)
141 continue;
142 /* The TSC must increment at LEAST once every millisecond. We
143 * should have waited exactly 30 msec so the TSC delta should
144 * be >= 30. Anything less and the processor is way too slow.
145 */
146 if((tscEnd - tscStart) <= CALIBRATE_TIME_MSEC)
147 continue;
148 // tscDelta = min(tscDelta, (tscEnd - tscStart))
149 if( (tscEnd - tscStart) < tscDelta )
150 tscDelta = tscEnd - tscStart;
151 }
152 /* tscDelta is now the least number of TSC ticks the processor made in
153 * a timespan of 0.03 s (e.g. 30 milliseconds)
154 * Linux thus divides by 30 which gives the answer in kiloHertz because
155 * 1 / ms = kHz. But we're xnu and most of the rest of the code uses
156 * Hz so we need to convert our milliseconds to seconds. Since we're
157 * dividing by the milliseconds, we simply multiply by 1000.
158 */
159
160 /* Unlike linux, we're not limited to 32-bit, but we do need to take care
161 * that we're going to multiply by 1000 first so we do need at least some
162 * arithmetic headroom. For now, 32-bit should be enough.
163 * Also unlike Linux, our compiler can do 64-bit integer arithmetic.
164 */
165 if(tscDelta > (1ULL<<32))
166 retval = 0;
167 else
168 {
169 retval = tscDelta * 1000 / 30;
170 }
171 disable_PIT2();
172 return retval;
173}
174
175#ifdef AMD_SUPPORT
176#define MSR_AMD_APERF 0x000000E8
177/*
178 * Original comment/code:
179 * "DFE: Measures the Max Performance Frequency in Hz (64-bit)"
180 *
181 * Measures the Actual Performance Frequency in Hz (64-bit)
182 * (just a naming change, mperf --> aperf )
183 */
184static uint64_t measure_aperf_frequency(void)
185{
186uint64_t aperfStart;
187uint64_t aperfEnd;
188uint64_t aperfDelta = 0xffffffffffffffffULL;
189unsigned long pollCount;
190uint64_t retval = 0;
191int i;
192
193/* Time how many APERF ticks elapse in 30 msec using the 8254 PIT
194 * counter 2. We run this loop 3 times to make sure the cache
195 * is hot and we take the minimum delta from all of the runs.
196 * That is to say that we're biased towards measuring the minimum
197 * number of APERF ticks that occur while waiting for the timer to
198 * expire.
199 */
200for(i = 0; i < 10; ++i)
201{
202enable_PIT2();
203set_PIT2_mode0(CALIBRATE_LATCH);
204aperfStart = rdmsr64(MSR_AMD_APERF);
205pollCount = poll_PIT2_gate();
206aperfEnd = rdmsr64(MSR_AMD_APERF);
207/* The poll loop must have run at least a few times for accuracy */
208if (pollCount <= 1)
209continue;
210/* The TSC must increment at LEAST once every millisecond.
211 * We should have waited exactly 30 msec so the APERF delta should
212 * be >= 30. Anything less and the processor is way too slow.
213 */
214if ((aperfEnd - aperfStart) <= CALIBRATE_TIME_MSEC)
215continue;
216// tscDelta = MIN(tscDelta, (tscEnd - tscStart))
217if ( (aperfEnd - aperfStart) < aperfDelta )
218aperfDelta = aperfEnd - aperfStart;
219}
220/* mperfDelta is now the least number of MPERF ticks the processor made in
221 * a timespan of 0.03 s (e.g. 30 milliseconds)
222 */
223
224if (aperfDelta > (1ULL<<32))
225retval = 0;
226else
227{
228retval = aperfDelta * 1000 / 30;
229}
230disable_PIT2();
231return retval;
232}
233#endif
234
235#endif
236
237/*
238 License for x2apic_enabled, get_apicbase, compute_bclk.
239
240 Copyright (c) 2010, Intel Corporation
241 All rights reserved.
242
243 Redistribution and use in source and binary forms, with or without
244 modification, are permitted provided that the following conditions are met:
245
246 * Redistributions of source code must retain the above copyright notice,
247 this list of conditions and the following disclaimer.
248 * Redistributions in binary form must reproduce the above copyright notice,
249 this list of conditions and the following disclaimer in the documentation
250 and/or other materials provided with the distribution.
251 * Neither the name of Intel Corporation nor the names of its contributors
252 may be used to endorse or promote products derived from this software
253 without specific prior written permission.
254
255 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
256 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
257 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
258 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
259 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
260 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
261 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
262 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
263 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
264 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
265 */
266static inline __attribute__((always_inline)) void rdmsr32(uint32_t msr, uint32_t * lo_data_addr, uint32_t * hi_data_addr);
267static inline __attribute__((always_inline)) void wrmsr32(uint32_t msr, uint32_t lo_data, uint32_t hi_data);
268static uint32_t x2apic_enabled(void);
269static uint32_t get_apicbase(void);
270static uint32_t compute_bclk(void);
271static inline __attribute__((always_inline)) void rdmsr32(uint32_t msr, uint32_t * lo_data_addr, uint32_t * hi_data_addr)
272{
273 __asm__ volatile(
274 "rdmsr"
275 : "=a" (*lo_data_addr), "=d" (*hi_data_addr)
276 : "c" (msr)
277 );
278}
279static inline __attribute__((always_inline)) void wrmsr32(uint32_t msr, uint32_t lo_data, uint32_t hi_data)
280{
281 __asm__ __volatile__ (
282 "wrmsr"
283 : /* No outputs */
284 : "c" (msr), "a" (lo_data), "d" (hi_data)
285 );
286}
287#define MSR_APIC_BASE 0x1B
288#define APIC_TMR_INITIAL_CNT 0x380
289#define APIC_TMR_CURRENT_CNT 0x390
290#define APIC_TMR_DIVIDE_CFG 0x3E0
291#define MSR_APIC_TMR_INITIAL_CNT 0x838
292#define MSR_APIC_TMR_CURRENT_CNT 0x839
293#define MSR_APIC_TMR_DIVIDE_CFG 0x83E
294static uint32_t x2apic_enabled(void)
295{
296 uint64_t temp64;
297
298 temp64 = rdmsr64(MSR_APIC_BASE);
299
300 return (uint32_t) (temp64 & (1 << 10)) ? 1 : 0;
301}
302static uint32_t get_apicbase(void)
303{
304 uint64_t temp64;
305
306 temp64 = rdmsr64(MSR_APIC_BASE);
307
308 return (uint32_t) (temp64 & 0xfffff000);
309}
310static uint32_t compute_bclk(void)
311{
312 uint32_t dummy;
313 uint32_t start, stop;
314 uint8_t temp8;
315 uint16_t delay_count;
316 uint32_t bclk;
317
318#define DELAY_IN_US 1000
319
320 // Compute fixed delay as time
321 // delay count = desired time * PIT frequency
322 // PIT frequency = 1.193182 MHz
323 delay_count = 1193182 / DELAY_IN_US;
324
325 // PIT channel 2 gate is controlled by IO port 0x61, bit 0
326#define PIT_CH2_LATCH_REG 0x61
327#define CH2_SPEAKER (1 << 1) // bit 1 -- 1 = speaker enabled 0 = speaker disabled
328#define CH2_GATE_IN (1 << 0) // bit 0 -- 1 = gate enabled, 0 = gate disabled
329#define CH2_GATE_OUT (1 << 5) // bit 5 -- 1 = gate latched, 0 = gate not latched
330
331 // PIT Command register
332#define PIT_MODE_COMMAND_REG 0x43
333#define SELECT_CH2 (2 << 6)
334#define ACCESS_MODE_LOBYTE_HIBYTE (3 << 4)
335#define MODE0_INTERRUPT_ON_TERMINAL_COUNT 0 // Despite name, no interrupts on CH2
336
337 // PIT Channel 2 data port
338#define PIT_CH2_DATA 0x42
339
340 // Disable the PIT channel 2 speaker and gate
341 temp8 = inb(PIT_CH2_LATCH_REG);
342 temp8 &= ~(CH2_SPEAKER | CH2_GATE_IN);
343 outb(PIT_CH2_LATCH_REG, temp8);
344
345 // Setup command and mode
346 outb(PIT_MODE_COMMAND_REG, SELECT_CH2 | ACCESS_MODE_LOBYTE_HIBYTE | MODE0_INTERRUPT_ON_TERMINAL_COUNT);
347
348 // Set time for fixed delay
349 outb(PIT_CH2_DATA, (uint8_t) (delay_count));
350 outb(PIT_CH2_DATA, (uint8_t) (delay_count >> 8));
351
352 // Prepare to enable channel 2 gate but leave the speaker disabled
353 temp8 = inb(PIT_CH2_LATCH_REG);
354 temp8 &= ~CH2_SPEAKER;
355 temp8 |= CH2_GATE_IN;
356
357 if (x2apic_enabled())
358{
359 // Set APIC Timer Divide Value as 2
360 wrmsr32(MSR_APIC_TMR_DIVIDE_CFG, 0, 0);
361
362 // start APIC timer with a known value
363 start = ~0UL;
364 wrmsr32(MSR_APIC_TMR_INITIAL_CNT, start, 0);
365 }
366 else
367{
368 // Set APIC Timer Divide Value as 2
369 *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_DIVIDE_CFG) = 0UL;
370
371 // start APIC timer with a known value
372 start = ~0UL;
373 *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_INITIAL_CNT) = start;
374 }
375
376 // Actually start the PIT channel 2
377 outb(PIT_CH2_LATCH_REG, temp8);
378
379 // Wait for the fixed delay
380 while (!(inb(PIT_CH2_LATCH_REG) & CH2_GATE_OUT));
381
382 if (x2apic_enabled())
383{
384 // read the APIC timer to determine the change that occurred over this fixed delay
385 rdmsr32(MSR_APIC_TMR_CURRENT_CNT, &stop, &dummy);
386
387 // stop APIC timer
388 wrmsr32(MSR_APIC_TMR_INITIAL_CNT, 0, 0);
389
390 }
391 else
392{
393 // read the APIC timer to determine the change that occurred over this fixed delay
394 stop = *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_CURRENT_CNT);
395
396 // stop APIC timer
397 *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_INITIAL_CNT) = 0UL;
398 }
399
400 // Disable channel 2 speaker and gate input
401 temp8 = inb(PIT_CH2_LATCH_REG);
402 temp8 &= ~(CH2_SPEAKER | CH2_GATE_IN);
403 outb(PIT_CH2_LATCH_REG, temp8);
404
405 bclk = (start - stop) * 2 / DELAY_IN_US;
406
407 // Round bclk to the nearest 100/12 integer value
408 bclk = ((((bclk * 24) + 100) / 200) * 200) / 24;
409
410 return bclk;
411}
412
413
414/*
415 * Calculates the FSB and CPU frequencies using specific MSRs for each CPU
416 * - multi. is read from a specific MSR. In the case of Intel, there is:
417 * a max multi. (used to calculate the FSB freq.),
418 * and a current multi. (used to calculate the CPU freq.)
419 * - fsbFrequency = tscFrequency / multi
420 * - cpuFrequency = fsbFrequency * multi
421 */
422
423void scan_cpu(PlatformInfo_t *p)
424{
425uint64_ttscFrequency = 0, fsbFrequency = 0, cpuFrequency = 0;
426uint64_tmsr;
427uint8_tmaxcoef = 0, maxdiv = 0, currcoef = 0, currdiv = 0;
428 uint32_treg[4];
429 uint32_t cores_per_package;
430 uint32_t logical_per_package;
431
432do_cpuid(0, reg);
433 p->CPU.Vendor= reg[ebx];
434 p->CPU.cpuid_max_basic = reg[eax];
435
436 do_cpuid2(0x00000004, 0, reg);
437 cores_per_package= bitfield(reg[eax], 31, 26) + 1;
438
439 /* get extended cpuid results */
440do_cpuid(0x80000000, reg);
441p->CPU.cpuid_max_ext = reg[eax];
442
443
444/* Begin of Copyright: from Apple's XNU cpuid.c */
445
446/* get brand string (if supported) */
447if (p->CPU.cpuid_max_ext > 0x80000004)
448{
449 char str[128], *s;
450/*
451 * The brand string 48 bytes (max), guaranteed to
452 * be NUL terminated.
453 */
454do_cpuid(0x80000002, reg);
455bcopy((char *)reg, &str[0], 16);
456do_cpuid(0x80000003, reg);
457bcopy((char *)reg, &str[16], 16);
458do_cpuid(0x80000004, reg);
459bcopy((char *)reg, &str[32], 16);
460for (s = str; *s != '\0'; s++)
461{
462if (*s != ' ') break;
463}
464
465strlcpy(p->CPU.BrandString,s, sizeof(p->CPU.BrandString));
466
467if (!strncmp(p->CPU.BrandString, CPUID_STRING_UNKNOWN, min(sizeof(p->CPU.BrandString), (unsigned)strlen(CPUID_STRING_UNKNOWN) + 1)))
468{
469 /*
470 * This string means we have a firmware-programmable brand string,
471 * and the firmware couldn't figure out what sort of CPU we have.
472 */
473 p->CPU.BrandString[0] = '\0';
474 }
475}
476
477 /*
478 * Get processor signature and decode
479 * and bracket this with the approved procedure for reading the
480 * the microcode version number a.k.a. signature a.k.a. BIOS ID
481 */
482#ifndef AMD_SUPPORT
483wrmsr64(MSR_IA32_BIOS_SIGN_ID, 0);
484do_cpuid(1, reg);
485 p->CPU.MicrocodeVersion =
486 (uint32_t) (rdmsr64(MSR_IA32_BIOS_SIGN_ID) >> 32);
487#else
488do_cpuid(1, reg);
489#endif
490p->CPU.Signature = reg[eax];
491p->CPU.Stepping = bitfield(reg[eax], 3, 0);
492p->CPU.Model = bitfield(reg[eax], 7, 4);
493p->CPU.Family = bitfield(reg[eax], 11, 8);
494p->CPU.ExtModel = bitfield(reg[eax], 19, 16);
495p->CPU.ExtFamily = bitfield(reg[eax], 27, 20);
496p->CPU.Brand = bitfield(reg[ebx], 7, 0);
497p->CPU.Features = quad(reg[ecx], reg[edx]);
498
499 /* Fold extensions into family/model */
500if (p->CPU.Family == 0x0f)
501p->CPU.Family += p->CPU.ExtFamily;
502if (p->CPU.Family == 0x0f || p->CPU.Family == 0x06)
503p->CPU.Model += (p->CPU.ExtModel << 4);
504
505 if (p->CPU.Features & CPUID_FEATURE_HTT)
506logical_per_package =
507 bitfield(reg[ebx], 23, 16);
508else
509logical_per_package = 1;
510
511 if (p->CPU.cpuid_max_ext >= 0x80000001)
512{
513do_cpuid(0x80000001, reg);
514p->CPU.ExtFeatures =
515 quad(reg[ecx], reg[edx]);
516
517}
518
519if (p->CPU.cpuid_max_ext >= 0x80000007)
520{
521do_cpuid(0x80000007, reg);
522
523/* Fold in the Invariant TSC feature bit, if present */
524p->CPU.ExtFeatures |=
525 reg[edx] & (uint32_t)CPUID_EXTFEATURE_TSCI;
526
527#ifdef AMD_SUPPORT
528/* Fold in the Hardware P-State control feature bit, if present */
529p->CPU.ExtFeatures |=
530 reg[edx] & (uint32_t)_Bit(7);
531
532/* Fold in the read-only effective frequency interface feature bit, if present */
533p->CPU.ExtFeatures |=
534 reg[edx] & (uint32_t)_Bit(10);
535#endif
536}
537
538 if (p->CPU.cpuid_max_basic >= 0x5) {
539/*
540 * Extract the Monitor/Mwait Leaf info:
541 */
542do_cpuid(5, reg);
543#ifndef AMD_SUPPORT
544 p->CPU.sub_Cstates = reg[edx];
545#endif
546 p->CPU.extensions = reg[ecx];
547}
548
549#ifndef AMD_SUPPORT
550 if (p->CPU.cpuid_max_basic >= 0x6)
551 {
552/*
553 * The thermal and Power Leaf:
554 */
555do_cpuid(6, reg);
556p->CPU.dynamic_acceleration = bitfield(reg[eax], 1, 1); // "Dynamic Acceleration Technology (Turbo Mode)"
557p->CPU.invariant_APIC_timer = bitfield(reg[eax], 2, 2); // "Invariant APIC Timer"
558 p->CPU.fine_grain_clock_mod = bitfield(reg[eax], 4, 4);
559}
560
561 if ((p->CPU.Vendor == 0x756E6547 /* Intel */) &&
562(p->CPU.Family == 0x06))
563{
564/*
565 * Find the number of enabled cores and threads
566 * (which determines whether SMT/Hyperthreading is active).
567 */
568switch (p->CPU.Model)
569{
570
571case CPUID_MODEL_DALES_32NM:
572case CPUID_MODEL_WESTMERE:
573case CPUID_MODEL_WESTMERE_EX:
574{
575msr = rdmsr64(MSR_CORE_THREAD_COUNT);
576p->CPU.NoThreads = bitfield((uint32_t)msr, 15, 0);
577p->CPU.NoCores = bitfield((uint32_t)msr, 19, 16);
578break;
579}
580
581case CPUID_MODEL_NEHALEM:
582case CPUID_MODEL_FIELDS:
583case CPUID_MODEL_DALES:
584case CPUID_MODEL_NEHALEM_EX:
585case CPUID_MODEL_SANDYBRIDGE:
586case CPUID_MODEL_JAKETOWN:
587{
588msr = rdmsr64(MSR_CORE_THREAD_COUNT);
589p->CPU.NoThreads = bitfield((uint32_t)msr, 15, 0);
590p->CPU.NoCores = bitfield((uint32_t)msr, 31, 16);
591break;
592}
593}
594 }
595#endif
596 if (p->CPU.NoCores == 0)
597{
598p->CPU.NoThreads = logical_per_package;
599p->CPU.NoCores = cores_per_package ? cores_per_package : 1 ;
600}
601
602/* End of Copyright: from Apple's XNU cpuid.c */
603
604fsbFrequency = (uint64_t)(compute_bclk() * 1000000);
605
606#if LEGACY_CPU
607tscFrequency = measure_tsc_frequency();
608#endif
609
610#ifdef AMD_SUPPORT
611#define K8_FIDVID_STATUS0xC0010042
612#define K10_COFVID_STATUS0xC0010071
613if (p->CPU.ExtFeatures & _Bit(10))
614{
615cpuFrequency = measure_aperf_frequency();
616}
617
618 if ((p->CPU.Vendor == 0x68747541 /* AMD */) && (p->CPU.Family == 0x0f))
619{
620switch(p->CPU.ExtFamily)
621{
622case 0x00: /* K8 */
623msr = rdmsr64(K8_FIDVID_STATUS);
624maxcoef = bitfield(msr, 21, 16) / 2 + 4;
625currcoef = bitfield(msr, 5, 0) / 2 + 4;
626break;
627
628case 0x01: /* K10 */
629 {
630 //uint32_t reg[4];
631msr = rdmsr64(K10_COFVID_STATUS);
632/*
633do_cpuid2(0x00000006, 0, reg);
634 EffFreq: effective frequency interface
635if (bitfield(reg[ecx], 0, 0) == 1)
636{
637uint64_t aperf = measure_aperf_frequency();
638cpuFrequency = aperf;
639}
640*/
641// NOTE: tsc runs at the maccoeff (non turbo)
642//*not* at the turbo frequency.
643maxcoef = bitfield(msr, 54, 49) / 2 + 4;
644currcoef = bitfield(msr, 5, 0) + 0x10;
645currdiv = 2 << bitfield(msr, 8, 6);
646
647break;
648}
649case 0x05: /* K14 */
650msr = rdmsr64(K10_COFVID_STATUS);
651currcoef = (bitfield(msr, 54, 49) + 0x10) << 2;
652currdiv = (bitfield(msr, 8, 4) + 1) << 2;
653currdiv += bitfield(msr, 3, 0);
654
655break;
656
657case 0x02: /* K11 */
658DBG("K11 detected, but not supported !!!\n");
659// not implimented
660break;
661}
662
663if (!fsbFrequency)
664{
665if (maxcoef)
666{
667if (currdiv)
668{
669if (!currcoef) currcoef = maxcoef;
670if (!cpuFrequency)
671fsbFrequency = ((tscFrequency * currdiv) / currcoef);
672else
673fsbFrequency = ((cpuFrequency * currdiv) / currcoef);
674
675DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);
676} else {
677if (!cpuFrequency)
678fsbFrequency = (tscFrequency / maxcoef);
679else
680fsbFrequency = (cpuFrequency / maxcoef);
681DBG("%d\n", currcoef);
682}
683}
684else if (currcoef)
685{
686if (currdiv)
687{
688fsbFrequency = ((tscFrequency * currdiv) / currcoef);
689DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);
690} else {
691fsbFrequency = (tscFrequency / currcoef);
692DBG("%d\n", currcoef);
693}
694}
695}
696
697}
698#else
699 if ((p->CPU.Vendor == 0x756E6547 /* Intel */) &&
700((p->CPU.Family == 0x06) ||
701 (p->CPU.Family == 0x0f)))
702{
703if ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0c) ||
704(p->CPU.Family == 0x0f && p->CPU.Model >= 0x03))
705{
706/* Nehalem CPU model */
707if (p->CPU.Family == 0x06 && (p->CPU.Model == CPUID_MODEL_NEHALEM ||
708 p->CPU.Model == CPUID_MODEL_FIELDS ||
709 p->CPU.Model == CPUID_MODEL_DALES ||
710 p->CPU.Model == CPUID_MODEL_DALES_32NM ||
711 p->CPU.Model == CPUID_MODEL_WESTMERE ||
712 p->CPU.Model == CPUID_MODEL_NEHALEM_EX ||
713 p->CPU.Model == CPUID_MODEL_WESTMERE_EX ||
714 p->CPU.Model == CPUID_MODEL_SANDYBRIDGE ||
715 p->CPU.Model == CPUID_MODEL_JAKETOWN))
716{
717uint8_tbus_ratio_max = 0, bus_ratio_min = 0;
718uint32_tmax_ratio = 0;
719uint64_tflex_ratio = 0;
720msr = rdmsr64(MSR_PLATFORM_INFO);
721#if DEBUG_CPU
722DBG("msr(%d): platform_info %08x\n", __LINE__, msr & 0xffffffff);
723#endif
724bus_ratio_max = (msr >> 8) & 0xff;
725bus_ratio_min = (msr >> 40) & 0xff;
726msr = rdmsr64(MSR_FLEX_RATIO);
727#if DEBUG_CPU
728DBG("msr(%d): flex_ratio %08x\n", __LINE__, msr & 0xffffffff);
729#endif
730if ((msr >> 16) & 0x01)
731{
732flex_ratio = (msr >> 8) & 0xff;
733/* bcc9: at least on the gigabyte h67ma-ud2h,
734 where the cpu multipler can't be changed to
735 allow overclocking, the flex_ratio msr has unexpected (to OSX)
736 contents. These contents cause mach_kernel to
737 fail to compute the bus ratio correctly, instead
738 causing the system to crash since tscGranularity
739 is inadvertently set to 0.
740 */
741if (flex_ratio == 0)
742{
743/* Clear bit 16 (evidently the
744 presence bit) */
745wrmsr64(MSR_FLEX_RATIO, (msr & 0xFFFFFFFFFFFEFFFFULL));
746msr = rdmsr64(MSR_FLEX_RATIO);
747#if DEBUG_CPU
748DBG("Unusable flex ratio detected. MSR Patched to %08x\n", msr & 0xffffffff);
749#endif
750}
751else
752{
753if (bus_ratio_max > flex_ratio)
754{
755bus_ratio_max = flex_ratio;
756}
757}
758}
759#if LEGACY_CPU
760if (bus_ratio_max)
761{
762fsbFrequency = (tscFrequency / bus_ratio_max);
763}
764#endif
765//valv: Turbo Ratio Limit
766if ((p->CPU.Model != 0x2e) && (p->CPU.Model != 0x2f))
767{
768//msr = rdmsr64(MSR_TURBO_RATIO_LIMIT);
769cpuFrequency = bus_ratio_max * fsbFrequency;
770max_ratio = bus_ratio_max * 10;
771}
772else
773{
774#if LEGACY_CPU
775cpuFrequency = tscFrequency;
776#else
777cpuFrequency = bus_ratio_max * fsbFrequency;
778#endif
779}
780#if DEBUG_CPU
781DBG("Sticking with [BCLK: %dMhz, Bus-Ratio: %d]\n", fsbFrequency / 1000000, max_ratio);
782#endif
783currcoef = bus_ratio_max;
784}
785else
786{
787msr = rdmsr64(MSR_IA32_PERF_STATUS);
788#if DEBUG_CPU
789DBG("msr(%d): ia32_perf_stat 0x%08x\n", __LINE__, msr & 0xffffffff);
790#endif
791currcoef = (msr >> 8) & 0x1f;
792/* Non-integer bus ratio for the max-multi*/
793maxdiv = (msr >> 46) & 0x01;
794/* Non-integer bus ratio for the current-multi (undocumented)*/
795currdiv = (msr >> 14) & 0x01;
796
797if ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0e) ||
798(p->CPU.Family == 0x0f)) // This will always be model >= 3
799{
800/* On these models, maxcoef defines TSC freq */
801maxcoef = (msr >> 40) & 0x1f;
802}
803else
804{
805/* On lower models, currcoef defines TSC freq */
806/* XXX */
807maxcoef = currcoef;
808}
809if (!currcoef) currcoef = maxcoef;
810#if LEGACY_CPU
811if (maxcoef)
812{
813
814if (maxdiv)
815{
816fsbFrequency = ((tscFrequency * 2) / ((maxcoef * 2) + 1));
817}
818else
819{
820fsbFrequency = (tscFrequency / maxcoef);
821}
822
823if (currdiv)
824{
825cpuFrequency = (fsbFrequency * ((currcoef * 2) + 1) / 2);
826}
827else
828{
829cpuFrequency = (fsbFrequency * currcoef);
830}
831#if DEBUG_CPU
832DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");
833#endif
834}
835#else
836
837
838if (currdiv)
839{
840cpuFrequency = (fsbFrequency * ((currcoef * 2) + 1) / 2);
841}
842else
843{
844cpuFrequency = (fsbFrequency * currcoef);
845}
846
847if (maxcoef)
848{
849if (maxdiv)
850{
851tscFrequency = (fsbFrequency * ((maxcoef * 2) + 1)) / 2;
852}
853else
854{
855tscFrequency = fsbFrequency * maxcoef;
856}
857}
858#if DEBUG_CPU
859DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");
860#endif
861
862#endif // LEGACY_CPU
863
864}
865}
866 /* Mobile CPU ? */
867//Slice
868 p->CPU.isMobile = false;
869switch (p->CPU.Model)
870{
871case 0x0D:
872p->CPU.isMobile = true;
873break;
874case 0x02:
875case 0x03:
876case 0x04:
877case 0x06:
878p->CPU.isMobile = (rdmsr64(0x2C) & (1 << 21));
879break;
880default:
881p->CPU.isMobile = (rdmsr64(0x17) & (1 << 28));
882break;
883}
884 // TODO: this part of code seems to work very well for the intel platforms, need to find the equivalent for AMD
885DBG("%s platform found.\n", p->CPU.isMobile?"Mobile":"Desktop");
886}
887#endif
888if (!cpuFrequency) cpuFrequency = tscFrequency;
889
890p->CPU.MaxCoef = maxcoef;
891p->CPU.MaxDiv = maxdiv;
892p->CPU.CurrCoef = currcoef;
893p->CPU.CurrDiv = currdiv;
894
895 p->CPU.TSCFrequency = tscFrequency ;
896p->CPU.FSBFrequency = fsbFrequency ;
897p->CPU.CPUFrequency = cpuFrequency ;
898#ifdef AMD_SUPPORT
899 msglog("AMD CPU Detection Enabled\n");
900#endif
901DBG("CPU: Vendor/Model/ExtModel: 0x%x/0x%x/0x%x\n", p->CPU.Vendor, p->CPU.Model, p->CPU.ExtModel);
902DBG("CPU: Family/ExtFamily: 0x%x/0x%x\n", p->CPU.Family, p->CPU.ExtFamily);
903#ifdef AMD_SUPPORT
904DBG("CPU (AMD): TSCFreq: %dMHz\n", p->CPU.TSCFrequency / 1000000);
905DBG("CPU (AMD): FSBFreq: %dMHz\n", p->CPU.FSBFrequency / 1000000);
906DBG("CPU (AMD): CPUFreq: %dMHz\n", p->CPU.CPUFrequency / 1000000);
907DBG("CPU (AMD): MaxCoef/CurrCoef: 0x%x/0x%x\n", p->CPU.MaxCoef, p->CPU.CurrCoef);
908DBG("CPU (AMD): MaxDiv/CurrDiv: 0x%x/0x%x\n", p->CPU.MaxDiv, p->CPU.CurrDiv);
909#else
910DBG("CPU: TSCFreq: %dMHz\n", p->CPU.TSCFrequency / 1000000);
911DBG("CPU: FSBFreq: %dMHz\n", p->CPU.FSBFrequency / 1000000);
912DBG("CPU: CPUFreq: %dMHz\n", p->CPU.CPUFrequency / 1000000);
913DBG("CPU: MaxCoef/CurrCoef: 0x%x/0x%x\n", p->CPU.MaxCoef, p->CPU.CurrCoef);
914DBG("CPU: MaxDiv/CurrDiv: 0x%x/0x%x\n", p->CPU.MaxDiv, p->CPU.CurrDiv);
915#endif
916
917DBG("CPU: NoCores/NoThreads: %d/%d\n", p->CPU.NoCores, p->CPU.NoThreads);
918DBG("CPU: Features: 0x%08x\n", p->CPU.Features);
919 DBG("CPU: ExtFeatures: 0x%08x\n", p->CPU.ExtFeatures);
920#ifndef AMD_SUPPORT
921 DBG("CPU: MicrocodeVersion: %d\n", p->CPU.MicrocodeVersion);
922#endif
923#if DEBUG_CPU
924pause();
925#endif
926
927}
928

Archive Download this file

Revision: 1526