Chameleon

Chameleon Svn Source Tree

Root/branches/cparm/i386/libsaio/cpu.c

1/*
2 * Copyright 2008 Islam Ahmed Zaid. All rights reserved. <azismed@gmail.com>
3 * AsereBLN: 2009: cleanup and bugfix
4 */
5
6#include "libsaio.h"
7#include "platform.h"
8#include "cpu.h"
9
10#ifndef DEBUG_CPU
11#define DEBUG_CPU 0
12#endif
13
14#if DEBUG_CPU
15#define DBG(x...)printf(x)
16#else
17#define DBG(x...)msglog(x)
18#endif
19
20//#define AMD_SUPPORT
21
22#ifndef INTEL_SUPPORT
23#define INTEL_SUPPORT 0 //Default (0: nolegacy, 1 : legacy)
24#endif
25
26#ifdef AMD_SUPPORT
27#ifdef LEGACY_CPU
28#undef LEGACY_CPU
29#endif
30#ifdef INTEL_SUPPORT
31#undef INTEL_SUPPORT
32#endif
33#define LEGACY_CPU 1
34#endif
35
36#ifdef INTEL_SUPPORT
37#ifdef LEGACY_CPU
38#undef LEGACY_CPU
39#endif
40#define LEGACY_CPU INTEL_SUPPORT
41#endif
42// (?) : if AMD_SUPPORT then LEGACY_CPU = 1, INTEL_SUPPORT = disabled
43// else LEGACY_CPU = INTEL_SUPPORT
44
45
46#if LEGACY_CPU
47static uint64_t measure_tsc_frequency(void);
48
49// DFE: enable_PIT2 and disable_PIT2 come from older xnu
50
51/*
52 * Enable or disable timer 2.
53 * Port 0x61 controls timer 2:
54 * bit 0 gates the clock,
55 * bit 1 gates output to speaker.
56 */
57static inline void enable_PIT2(void)
58{
59 /* Enable gate, disable speaker */
60 __asm__ volatile(
61 " inb $0x61,%%al \n\t"
62 " and $0xFC,%%al \n\t" /* & ~0x03 */
63 " or $1,%%al \n\t"
64 " outb %%al,$0x61 \n\t"
65 : : : "%al" );
66}
67
68static inline void disable_PIT2(void)
69{
70 /* Disable gate and output to speaker */
71 __asm__ volatile(
72 " inb $0x61,%%al \n\t"
73 " and $0xFC,%%al \n\t"/* & ~0x03 */
74 " outb %%al,$0x61 \n\t"
75 : : : "%al" );
76}
77
78// DFE: set_PIT2_mode0, poll_PIT2_gate, and measure_tsc_frequency are
79// roughly based on Linux code
80
81/* Set the 8254 channel 2 to mode 0 with the specified value.
82 In mode 0, the counter will initially set its gate low when the
83 timer expires. For this to be useful, you ought to set it high
84 before calling this function. The enable_PIT2 function does this.
85 */
86static inline void set_PIT2_mode0(uint16_t value)
87{
88 __asm__ volatile(
89 " movb $0xB0,%%al \n\t"
90 " outb%%al,$0x43\n\t"
91 " movb%%dl,%%al\n\t"
92 " outb%%al,$0x42\n\t"
93 " movb%%dh,%%al\n\t"
94 " outb%%al,$0x42"
95 : : "d"(value) /*: no clobber */ );
96}
97
98/* Returns the number of times the loop ran before the PIT2 signaled */
99static inline unsigned long poll_PIT2_gate(void)
100{
101 unsigned long count = 0;
102 unsigned char nmi_sc_val;
103 do {
104 ++count;
105 __asm__ volatile(
106 "inb$0x61,%0"
107 : "=q"(nmi_sc_val) /*:*/ /* no input */ /*:*/ /* no clobber */);
108 } while( (nmi_sc_val & 0x20) == 0);
109 return count;
110}
111/*
112 * DFE: Measures the TSC frequency in Hz (64-bit) using the ACPI PM timer
113 */
114static uint64_t measure_tsc_frequency(void)
115{
116 uint64_t tscStart;
117 uint64_t tscEnd;
118 uint64_t tscDelta = 0xffffffffffffffffULL;
119 unsigned long pollCount;
120 uint64_t retval = 0;
121 int i;
122
123 /* Time how many TSC ticks elapse in 30 msec using the 8254 PIT
124 * counter 2. We run this loop 3 times to make sure the cache
125 * is hot and we take the minimum delta from all of the runs.
126 * That is to say that we're biased towards measuring the minimum
127 * number of TSC ticks that occur while waiting for the timer to
128 * expire. That theoretically helps avoid inconsistencies when
129 * running under a VM if the TSC is not virtualized and the host
130 * steals time. The TSC is normally virtualized for VMware.
131 */
132 for(i = 0; i < 10; ++i)
133 {
134 enable_PIT2();
135 set_PIT2_mode0(CALIBRATE_LATCH);
136 tscStart = rdtsc64();
137 pollCount = poll_PIT2_gate();
138 tscEnd = rdtsc64();
139 /* The poll loop must have run at least a few times for accuracy */
140 if(pollCount <= 1)
141 continue;
142 /* The TSC must increment at LEAST once every millisecond. We
143 * should have waited exactly 30 msec so the TSC delta should
144 * be >= 30. Anything less and the processor is way too slow.
145 */
146 if((tscEnd - tscStart) <= CALIBRATE_TIME_MSEC)
147 continue;
148 // tscDelta = min(tscDelta, (tscEnd - tscStart))
149 if( (tscEnd - tscStart) < tscDelta )
150 tscDelta = tscEnd - tscStart;
151 }
152 /* tscDelta is now the least number of TSC ticks the processor made in
153 * a timespan of 0.03 s (e.g. 30 milliseconds)
154 * Linux thus divides by 30 which gives the answer in kiloHertz because
155 * 1 / ms = kHz. But we're xnu and most of the rest of the code uses
156 * Hz so we need to convert our milliseconds to seconds. Since we're
157 * dividing by the milliseconds, we simply multiply by 1000.
158 */
159
160 /* Unlike linux, we're not limited to 32-bit, but we do need to take care
161 * that we're going to multiply by 1000 first so we do need at least some
162 * arithmetic headroom. For now, 32-bit should be enough.
163 * Also unlike Linux, our compiler can do 64-bit integer arithmetic.
164 */
165 if(tscDelta > (1ULL<<32))
166 retval = 0;
167 else
168 {
169 retval = tscDelta * 1000 / 30;
170 }
171 disable_PIT2();
172 return retval;
173}
174
175#ifdef AMD_SUPPORT
176#define MSR_AMD_APERF 0x000000E8
177/*
178 * Original comment/code:
179 * "DFE: Measures the Max Performance Frequency in Hz (64-bit)"
180 *
181 * Measures the Actual Performance Frequency in Hz (64-bit)
182 * (just a naming change, mperf --> aperf )
183 */
184static uint64_t measure_aperf_frequency(void)
185{
186uint64_t aperfStart;
187uint64_t aperfEnd;
188uint64_t aperfDelta = 0xffffffffffffffffULL;
189unsigned long pollCount;
190uint64_t retval = 0;
191int i;
192
193/* Time how many APERF ticks elapse in 30 msec using the 8254 PIT
194 * counter 2. We run this loop 3 times to make sure the cache
195 * is hot and we take the minimum delta from all of the runs.
196 * That is to say that we're biased towards measuring the minimum
197 * number of APERF ticks that occur while waiting for the timer to
198 * expire.
199 */
200for(i = 0; i < 10; ++i)
201{
202enable_PIT2();
203set_PIT2_mode0(CALIBRATE_LATCH);
204aperfStart = rdmsr64(MSR_AMD_APERF);
205pollCount = poll_PIT2_gate();
206aperfEnd = rdmsr64(MSR_AMD_APERF);
207/* The poll loop must have run at least a few times for accuracy */
208if (pollCount <= 1)
209continue;
210/* The TSC must increment at LEAST once every millisecond.
211 * We should have waited exactly 30 msec so the APERF delta should
212 * be >= 30. Anything less and the processor is way too slow.
213 */
214if ((aperfEnd - aperfStart) <= CALIBRATE_TIME_MSEC)
215continue;
216// tscDelta = MIN(tscDelta, (tscEnd - tscStart))
217if ( (aperfEnd - aperfStart) < aperfDelta )
218aperfDelta = aperfEnd - aperfStart;
219}
220/* mperfDelta is now the least number of MPERF ticks the processor made in
221 * a timespan of 0.03 s (e.g. 30 milliseconds)
222 */
223
224if (aperfDelta > (1ULL<<32))
225retval = 0;
226else
227{
228retval = aperfDelta * 1000 / 30;
229}
230disable_PIT2();
231return retval;
232}
233#endif
234
235#endif
236
237/*
238 License for x2apic_enabled, get_apicbase, compute_bclk.
239
240 Copyright (c) 2010, Intel Corporation
241 All rights reserved.
242
243 Redistribution and use in source and binary forms, with or without
244 modification, are permitted provided that the following conditions are met:
245
246 * Redistributions of source code must retain the above copyright notice,
247 this list of conditions and the following disclaimer.
248 * Redistributions in binary form must reproduce the above copyright notice,
249 this list of conditions and the following disclaimer in the documentation
250 and/or other materials provided with the distribution.
251 * Neither the name of Intel Corporation nor the names of its contributors
252 may be used to endorse or promote products derived from this software
253 without specific prior written permission.
254
255 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
256 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
257 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
258 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
259 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
260 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
261 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
262 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
263 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
264 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
265 */
266static inline __attribute__((always_inline)) void rdmsr32(uint32_t msr, uint32_t * lo_data_addr, uint32_t * hi_data_addr);
267static inline __attribute__((always_inline)) void wrmsr32(uint32_t msr, uint32_t lo_data, uint32_t hi_data);
268static uint32_t x2apic_enabled(void);
269static uint32_t get_apicbase(void);
270static uint32_t compute_bclk(void);
271static inline __attribute__((always_inline)) void rdmsr32(uint32_t msr, uint32_t * lo_data_addr, uint32_t * hi_data_addr)
272{
273 __asm__ volatile(
274 "rdmsr"
275 : "=a" (*lo_data_addr), "=d" (*hi_data_addr)
276 : "c" (msr)
277 );
278}
279static inline __attribute__((always_inline)) void wrmsr32(uint32_t msr, uint32_t lo_data, uint32_t hi_data)
280{
281 __asm__ __volatile__ (
282 "wrmsr"
283 : /* No outputs */
284 : "c" (msr), "a" (lo_data), "d" (hi_data)
285 );
286}
287#define MSR_APIC_BASE 0x1B
288#define APIC_TMR_INITIAL_CNT 0x380
289#define APIC_TMR_CURRENT_CNT 0x390
290#define APIC_TMR_DIVIDE_CFG 0x3E0
291#define MSR_APIC_TMR_INITIAL_CNT 0x838
292#define MSR_APIC_TMR_CURRENT_CNT 0x839
293#define MSR_APIC_TMR_DIVIDE_CFG 0x83E
294static uint32_t x2apic_enabled(void)
295{
296 uint64_t temp64;
297
298 temp64 = rdmsr64(MSR_APIC_BASE);
299
300 return (uint32_t) (temp64 & (1 << 10)) ? 1 : 0;
301}
302static uint32_t get_apicbase(void)
303{
304 uint64_t temp64;
305
306 temp64 = rdmsr64(MSR_APIC_BASE);
307
308 return (uint32_t) (temp64 & 0xfffff000);
309}
310static uint32_t compute_bclk(void)
311{
312 uint32_t dummy;
313 uint32_t start, stop;
314 uint8_t temp8;
315 uint16_t delay_count;
316 uint32_t bclk;
317
318#define DELAY_IN_US 1000
319
320 // Compute fixed delay as time
321 // delay count = desired time * PIT frequency
322 // PIT frequency = 1.193182 MHz
323 delay_count = 1193182 / DELAY_IN_US;
324
325 // PIT channel 2 gate is controlled by IO port 0x61, bit 0
326#define PIT_CH2_LATCH_REG 0x61
327#define CH2_SPEAKER (1 << 1) // bit 1 -- 1 = speaker enabled 0 = speaker disabled
328#define CH2_GATE_IN (1 << 0) // bit 0 -- 1 = gate enabled, 0 = gate disabled
329#define CH2_GATE_OUT (1 << 5) // bit 5 -- 1 = gate latched, 0 = gate not latched
330
331 // PIT Command register
332#define PIT_MODE_COMMAND_REG 0x43
333#define SELECT_CH2 (2 << 6)
334#define ACCESS_MODE_LOBYTE_HIBYTE (3 << 4)
335#define MODE0_INTERRUPT_ON_TERMINAL_COUNT 0 // Despite name, no interrupts on CH2
336
337 // PIT Channel 2 data port
338#define PIT_CH2_DATA 0x42
339
340 // Disable the PIT channel 2 speaker and gate
341 temp8 = inb(PIT_CH2_LATCH_REG);
342 temp8 &= ~(CH2_SPEAKER | CH2_GATE_IN);
343 outb(PIT_CH2_LATCH_REG, temp8);
344
345 // Setup command and mode
346 outb(PIT_MODE_COMMAND_REG, SELECT_CH2 | ACCESS_MODE_LOBYTE_HIBYTE | MODE0_INTERRUPT_ON_TERMINAL_COUNT);
347
348 // Set time for fixed delay
349 outb(PIT_CH2_DATA, (uint8_t) (delay_count));
350 outb(PIT_CH2_DATA, (uint8_t) (delay_count >> 8));
351
352 // Prepare to enable channel 2 gate but leave the speaker disabled
353 temp8 = inb(PIT_CH2_LATCH_REG);
354 temp8 &= ~CH2_SPEAKER;
355 temp8 |= CH2_GATE_IN;
356
357 if (x2apic_enabled())
358{
359 // Set APIC Timer Divide Value as 2
360 wrmsr32(MSR_APIC_TMR_DIVIDE_CFG, 0, 0);
361
362 // start APIC timer with a known value
363 start = ~0UL;
364 wrmsr32(MSR_APIC_TMR_INITIAL_CNT, start, 0);
365 }
366 else
367{
368 // Set APIC Timer Divide Value as 2
369 *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_DIVIDE_CFG) = 0UL;
370
371 // start APIC timer with a known value
372 start = ~0UL;
373 *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_INITIAL_CNT) = start;
374 }
375
376 // Actually start the PIT channel 2
377 outb(PIT_CH2_LATCH_REG, temp8);
378
379 // Wait for the fixed delay
380 while (!(inb(PIT_CH2_LATCH_REG) & CH2_GATE_OUT));
381
382 if (x2apic_enabled())
383{
384 // read the APIC timer to determine the change that occurred over this fixed delay
385 rdmsr32(MSR_APIC_TMR_CURRENT_CNT, &stop, &dummy);
386
387 // stop APIC timer
388 wrmsr32(MSR_APIC_TMR_INITIAL_CNT, 0, 0);
389
390 }
391 else
392{
393 // read the APIC timer to determine the change that occurred over this fixed delay
394 stop = *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_CURRENT_CNT);
395
396 // stop APIC timer
397 *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_INITIAL_CNT) = 0UL;
398 }
399
400 // Disable channel 2 speaker and gate input
401 temp8 = inb(PIT_CH2_LATCH_REG);
402 temp8 &= ~(CH2_SPEAKER | CH2_GATE_IN);
403 outb(PIT_CH2_LATCH_REG, temp8);
404
405 bclk = (start - stop) * 2 / DELAY_IN_US;
406
407 // Round bclk to the nearest 100/12 integer value
408 bclk = ((((bclk * 24) + 100) / 200) * 200) / 24;
409
410 return bclk;
411}
412
413
414/*
415 * Calculates the FSB and CPU frequencies using specific MSRs for each CPU
416 * - multi. is read from a specific MSR. In the case of Intel, there is:
417 * a max multi. (used to calculate the FSB freq.),
418 * and a current multi. (used to calculate the CPU freq.)
419 * - fsbFrequency = tscFrequency / multi
420 * - cpuFrequency = fsbFrequency * multi
421 */
422
423void scan_cpu(PlatformInfo_t *p)
424{
425uint64_ttscFrequency = 0, fsbFrequency = 0, cpuFrequency = 0;
426uint64_tmsr;
427uint8_tmaxcoef = 0, maxdiv = 0, currcoef = 0, currdiv = 0;
428 uint32_treg[4];
429 uint32_t cores_per_package = 0;
430 uint32_t logical_per_package = 0;
431
432do_cpuid(0, reg);
433 p->CPU.Vendor= reg[ebx];
434 p->CPU.cpuid_max_basic = reg[eax];
435
436#ifndef AMD_SUPPORT
437 do_cpuid2(0x00000004, 0, reg);
438 cores_per_package= bitfield(reg[eax], 31, 26) + 1;
439#endif
440
441 /* get extended cpuid results */
442do_cpuid(0x80000000, reg);
443p->CPU.cpuid_max_ext = reg[eax];
444
445/* Begin of Copyright: from Apple's XNU cpuid.c */
446
447/* get brand string (if supported) */
448if (p->CPU.cpuid_max_ext > 0x80000004)
449{
450 char str[128], *s;
451/*
452 * The brand string 48 bytes (max), guaranteed to
453 * be NUL terminated.
454 */
455do_cpuid(0x80000002, reg);
456bcopy((char *)reg, &str[0], 16);
457do_cpuid(0x80000003, reg);
458bcopy((char *)reg, &str[16], 16);
459do_cpuid(0x80000004, reg);
460bcopy((char *)reg, &str[32], 16);
461for (s = str; *s != '\0'; s++)
462{
463if (*s != ' ') break;
464}
465
466strlcpy(p->CPU.BrandString,s, sizeof(p->CPU.BrandString));
467
468if (!strncmp(p->CPU.BrandString, CPUID_STRING_UNKNOWN, min(sizeof(p->CPU.BrandString), (unsigned)strlen(CPUID_STRING_UNKNOWN) + 1)))
469{
470 /*
471 * This string means we have a firmware-programmable brand string,
472 * and the firmware couldn't figure out what sort of CPU we have.
473 */
474 p->CPU.BrandString[0] = '\0';
475 }
476}
477
478 /*
479 * Get processor signature and decode
480 * and bracket this with the approved procedure for reading the
481 * the microcode version number a.k.a. signature a.k.a. BIOS ID
482 */
483#ifndef AMD_SUPPORT
484wrmsr64(MSR_IA32_BIOS_SIGN_ID, 0);
485do_cpuid(1, reg);
486 p->CPU.MicrocodeVersion =
487 (uint32_t) (rdmsr64(MSR_IA32_BIOS_SIGN_ID) >> 32);
488#else
489do_cpuid(1, reg);
490#endif
491p->CPU.Signature = reg[eax];
492p->CPU.Stepping = bitfield(reg[eax], 3, 0);
493p->CPU.Model = bitfield(reg[eax], 7, 4);
494p->CPU.Family = bitfield(reg[eax], 11, 8);
495p->CPU.ExtModel = bitfield(reg[eax], 19, 16);
496p->CPU.ExtFamily = bitfield(reg[eax], 27, 20);
497p->CPU.Brand = bitfield(reg[ebx], 7, 0);
498p->CPU.Features = quad(reg[ecx], reg[edx]);
499
500 /* Fold extensions into family/model */
501if (p->CPU.Family == 0x0f)
502p->CPU.Family += p->CPU.ExtFamily;
503if (p->CPU.Family == 0x0f || p->CPU.Family == 0x06)
504p->CPU.Model += (p->CPU.ExtModel << 4);
505
506 if (p->CPU.Features & CPUID_FEATURE_HTT)
507logical_per_package =
508 bitfield(reg[ebx], 23, 16);
509else
510logical_per_package = 1;
511
512if (p->CPU.cpuid_max_ext >= 0x80000001)
513{
514do_cpuid(0x80000001, reg);
515p->CPU.ExtFeatures =
516 quad(reg[ecx], reg[edx]);
517
518}
519
520if (p->CPU.cpuid_max_ext >= 0x80000007)
521{
522do_cpuid(0x80000007, reg);
523
524/* Fold in the Invariant TSC feature bit, if present */
525p->CPU.ExtFeatures |=
526 reg[edx] & (uint32_t)CPUID_EXTFEATURE_TSCI;
527
528#ifdef AMD_SUPPORT
529/* Fold in the Hardware P-State control feature bit, if present */
530p->CPU.ExtFeatures |=
531 reg[edx] & (uint32_t)_Bit(7);
532
533/* Fold in the read-only effective frequency interface feature bit, if present */
534p->CPU.ExtFeatures |=
535 reg[edx] & (uint32_t)_Bit(10);
536#endif
537}
538
539#ifdef AMD_SUPPORT
540if (p->CPU.cpuid_max_ext >= 0x80000008)
541{
542if (p->CPU.Features & CPUID_FEATURE_HTT)
543{
544do_cpuid(0x80000008, reg);
545cores_per_package= bitfield(reg[ecx], 7 , 0) + 1; // NC + 1
546}
547}
548#endif
549
550 if (p->CPU.cpuid_max_basic >= 0x5) {
551/*
552 * Extract the Monitor/Mwait Leaf info:
553 */
554do_cpuid(5, reg);
555#ifndef AMD_SUPPORT
556 p->CPU.sub_Cstates = reg[edx];
557#endif
558 p->CPU.extensions = reg[ecx];
559}
560
561#ifndef AMD_SUPPORT
562 if (p->CPU.cpuid_max_basic >= 0x6)
563 {
564/*
565 * The thermal and Power Leaf:
566 */
567do_cpuid(6, reg);
568p->CPU.dynamic_acceleration = bitfield(reg[eax], 1, 1); // "Dynamic Acceleration Technology (Turbo Mode)"
569p->CPU.invariant_APIC_timer = bitfield(reg[eax], 2, 2); // "Invariant APIC Timer"
570 p->CPU.fine_grain_clock_mod = bitfield(reg[eax], 4, 4);
571}
572
573 if ((p->CPU.Vendor == 0x756E6547 /* Intel */) &&
574(p->CPU.Family == 0x06))
575{
576/*
577 * Find the number of enabled cores and threads
578 * (which determines whether SMT/Hyperthreading is active).
579 */
580switch (p->CPU.Model)
581{
582
583case CPUID_MODEL_DALES_32NM:
584case CPUID_MODEL_WESTMERE:
585case CPUID_MODEL_WESTMERE_EX:
586{
587msr = rdmsr64(MSR_CORE_THREAD_COUNT);
588p->CPU.NoThreads = bitfield((uint32_t)msr, 15, 0);
589p->CPU.NoCores = bitfield((uint32_t)msr, 19, 16);
590break;
591}
592
593case CPUID_MODEL_NEHALEM:
594case CPUID_MODEL_FIELDS:
595case CPUID_MODEL_DALES:
596case CPUID_MODEL_NEHALEM_EX:
597case CPUID_MODEL_SANDYBRIDGE:
598case CPUID_MODEL_JAKETOWN:
599{
600msr = rdmsr64(MSR_CORE_THREAD_COUNT);
601p->CPU.NoThreads = bitfield((uint32_t)msr, 15, 0);
602p->CPU.NoCores = bitfield((uint32_t)msr, 31, 16);
603break;
604}
605}
606 }
607#endif
608 if (p->CPU.NoCores == 0)
609{
610#ifdef AMD_SUPPORT
611if (!cores_per_package) {
612//legacy method
613if ((p->CPU.ExtFeatures & _HBit(1)/* CmpLegacy */) && ( p->CPU.Features & CPUID_FEATURE_HTT) )
614cores_per_package = logical_per_package;
615else
616cores_per_package = 1;
617}
618#endif
619p->CPU.NoThreads = logical_per_package;
620p->CPU.NoCores = cores_per_package ? cores_per_package : 1 ;
621}
622
623/* End of Copyright: from Apple's XNU cpuid.c */
624
625fsbFrequency = (uint64_t)(compute_bclk() * 1000000);
626
627#if LEGACY_CPU
628tscFrequency = measure_tsc_frequency();
629#endif
630
631#ifdef AMD_SUPPORT
632#define K8_FIDVID_STATUS0xC0010042
633#define K10_COFVID_STATUS0xC0010071
634if (p->CPU.ExtFeatures & _Bit(10))
635{
636cpuFrequency = measure_aperf_frequency();
637}
638
639 if ((p->CPU.Vendor == 0x68747541 /* AMD */) && (p->CPU.Family == 0x0f))
640{
641switch(p->CPU.ExtFamily)
642{
643case 0x00: /* K8 */
644msr = rdmsr64(K8_FIDVID_STATUS);
645maxcoef = bitfield(msr, 21, 16) / 2 + 4;
646currcoef = bitfield(msr, 5, 0) / 2 + 4;
647break;
648
649case 0x01: /* K10 */
650 {
651 //uint32_t reg[4];
652msr = rdmsr64(K10_COFVID_STATUS);
653/*
654do_cpuid2(0x00000006, 0, reg);
655 EffFreq: effective frequency interface
656if (bitfield(reg[ecx], 0, 0) == 1)
657{
658uint64_t aperf = measure_aperf_frequency();
659cpuFrequency = aperf;
660}
661*/
662// NOTE: tsc runs at the maccoeff (non turbo)
663//*not* at the turbo frequency.
664maxcoef = bitfield(msr, 54, 49) / 2 + 4;
665currcoef = bitfield(msr, 5, 0) + 0x10;
666currdiv = 2 << bitfield(msr, 8, 6);
667
668break;
669}
670case 0x05: /* K14 */
671msr = rdmsr64(K10_COFVID_STATUS);
672currcoef = (bitfield(msr, 54, 49) + 0x10) << 2;
673currdiv = (bitfield(msr, 8, 4) + 1) << 2;
674currdiv += bitfield(msr, 3, 0);
675
676break;
677
678case 0x02: /* K11 */
679DBG("K11 detected, but not supported !!!\n");
680// not implimented
681break;
682}
683
684if (!fsbFrequency)
685{
686if (maxcoef)
687{
688if (currdiv)
689{
690if (!currcoef) currcoef = maxcoef;
691if (!cpuFrequency)
692fsbFrequency = ((tscFrequency * currdiv) / currcoef);
693else
694fsbFrequency = ((cpuFrequency * currdiv) / currcoef);
695
696DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);
697} else {
698if (!cpuFrequency)
699fsbFrequency = (tscFrequency / maxcoef);
700else
701fsbFrequency = (cpuFrequency / maxcoef);
702DBG("%d\n", currcoef);
703}
704}
705else if (currcoef)
706{
707if (currdiv)
708{
709fsbFrequency = ((tscFrequency * currdiv) / currcoef);
710DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);
711} else {
712fsbFrequency = (tscFrequency / currcoef);
713DBG("%d\n", currcoef);
714}
715}
716}
717
718}
719
720// NOTE: This is not the approved method,
721// the method provided by AMD is:
722// if ((PowerNow == enabled (p->CPU.cpuid_max_ext >= 0x80000007)) && (StartupFID(??) != MaxFID(??))) then "mobile processor present"
723
724if (strstr(p->CPU.BrandString, "obile"))
725p->CPU.isMobile = true;
726else
727p->CPU.isMobile = false;
728
729DBG("%s platform detected.\n", p->CPU.isMobile?"Mobile":"Desktop");
730#else
731 if ((p->CPU.Vendor == 0x756E6547 /* Intel */) &&
732((p->CPU.Family == 0x06) ||
733 (p->CPU.Family == 0x0f)))
734{
735if ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0c) ||
736(p->CPU.Family == 0x0f && p->CPU.Model >= 0x03))
737{
738/* Nehalem CPU model */
739if (p->CPU.Family == 0x06 && (p->CPU.Model == CPUID_MODEL_NEHALEM ||
740 p->CPU.Model == CPUID_MODEL_FIELDS ||
741 p->CPU.Model == CPUID_MODEL_DALES ||
742 p->CPU.Model == CPUID_MODEL_DALES_32NM ||
743 p->CPU.Model == CPUID_MODEL_WESTMERE ||
744 p->CPU.Model == CPUID_MODEL_NEHALEM_EX ||
745 p->CPU.Model == CPUID_MODEL_WESTMERE_EX ||
746 p->CPU.Model == CPUID_MODEL_SANDYBRIDGE ||
747 p->CPU.Model == CPUID_MODEL_JAKETOWN))
748{
749uint8_tbus_ratio_max = 0, bus_ratio_min = 0;
750uint32_tmax_ratio = 0;
751uint64_tflex_ratio = 0;
752msr = rdmsr64(MSR_PLATFORM_INFO);
753#if DEBUG_CPU
754DBG("msr(%d): platform_info %08x\n", __LINE__, msr & 0xffffffff);
755#endif
756bus_ratio_max = (msr >> 8) & 0xff;
757bus_ratio_min = (msr >> 40) & 0xff;
758msr = rdmsr64(MSR_FLEX_RATIO);
759#if DEBUG_CPU
760DBG("msr(%d): flex_ratio %08x\n", __LINE__, msr & 0xffffffff);
761#endif
762if ((msr >> 16) & 0x01)
763{
764flex_ratio = (msr >> 8) & 0xff;
765/* bcc9: at least on the gigabyte h67ma-ud2h,
766 where the cpu multipler can't be changed to
767 allow overclocking, the flex_ratio msr has unexpected (to OSX)
768 contents. These contents cause mach_kernel to
769 fail to compute the bus ratio correctly, instead
770 causing the system to crash since tscGranularity
771 is inadvertently set to 0.
772 */
773if (flex_ratio == 0)
774{
775/* Clear bit 16 (evidently the
776 presence bit) */
777wrmsr64(MSR_FLEX_RATIO, (msr & 0xFFFFFFFFFFFEFFFFULL));
778msr = rdmsr64(MSR_FLEX_RATIO);
779#if DEBUG_CPU
780DBG("Unusable flex ratio detected. MSR Patched to %08x\n", msr & 0xffffffff);
781#endif
782}
783else
784{
785if (bus_ratio_max > flex_ratio)
786{
787bus_ratio_max = flex_ratio;
788}
789}
790}
791#if LEGACY_CPU
792if (bus_ratio_max)
793{
794fsbFrequency = (tscFrequency / bus_ratio_max);
795}
796#endif
797//valv: Turbo Ratio Limit
798if ((p->CPU.Model != 0x2e) && (p->CPU.Model != 0x2f))
799{
800//msr = rdmsr64(MSR_TURBO_RATIO_LIMIT);
801cpuFrequency = bus_ratio_max * fsbFrequency;
802max_ratio = bus_ratio_max * 10;
803}
804else
805{
806#if LEGACY_CPU
807cpuFrequency = tscFrequency;
808#else
809cpuFrequency = bus_ratio_max * fsbFrequency;
810#endif
811}
812#if DEBUG_CPU
813DBG("Sticking with [BCLK: %dMhz, Bus-Ratio: %d]\n", fsbFrequency / 1000000, max_ratio);
814#endif
815currcoef = bus_ratio_max;
816
817 tscFrequency = cpuFrequency;
818}
819else
820{
821msr = rdmsr64(MSR_IA32_PERF_STATUS);
822#if DEBUG_CPU
823DBG("msr(%d): ia32_perf_stat 0x%08x\n", __LINE__, msr & 0xffffffff);
824#endif
825currcoef = (msr >> 8) & 0x1f;
826/* Non-integer bus ratio for the max-multi*/
827maxdiv = (msr >> 46) & 0x01;
828/* Non-integer bus ratio for the current-multi (undocumented)*/
829currdiv = (msr >> 14) & 0x01;
830
831if ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0e) ||
832(p->CPU.Family == 0x0f)) // This will always be model >= 3
833{
834/* On these models, maxcoef defines TSC freq */
835maxcoef = (msr >> 40) & 0x1f;
836}
837else
838{
839/* On lower models, currcoef defines TSC freq */
840/* XXX */
841maxcoef = currcoef;
842}
843if (!currcoef) currcoef = maxcoef;
844#if LEGACY_CPU
845if (maxcoef)
846{
847
848if (maxdiv)
849{
850fsbFrequency = ((tscFrequency * 2) / ((maxcoef * 2) + 1));
851}
852else
853{
854fsbFrequency = (tscFrequency / maxcoef);
855}
856
857if (currdiv)
858{
859cpuFrequency = (fsbFrequency * ((currcoef * 2) + 1) / 2);
860}
861else
862{
863cpuFrequency = (fsbFrequency * currcoef);
864}
865#if DEBUG_CPU
866DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");
867#endif
868}
869#else
870
871
872if (currdiv)
873{
874cpuFrequency = (fsbFrequency * ((currcoef * 2) + 1) / 2);
875}
876else
877{
878cpuFrequency = (fsbFrequency * currcoef);
879}
880
881if (maxcoef)
882{
883if (maxdiv)
884{
885tscFrequency = (fsbFrequency * ((maxcoef * 2) + 1)) / 2;
886}
887else
888{
889tscFrequency = fsbFrequency * maxcoef;
890}
891}
892#if DEBUG_CPU
893DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");
894#endif
895
896#endif // LEGACY_CPU
897
898}
899}
900 /* Mobile CPU ? */
901//Slice
902 p->CPU.isMobile = false;
903switch (p->CPU.Model)
904{
905case 0x0D:
906p->CPU.isMobile = true;
907break;
908case 0x02:
909case 0x03:
910case 0x04:
911case 0x06:
912p->CPU.isMobile = (rdmsr64(0x2C) & (1 << 21));
913break;
914default:
915p->CPU.isMobile = (rdmsr64(0x17) & (1 << 28));
916break;
917}
918
919DBG("%s platform detected.\n", p->CPU.isMobile?"Mobile":"Desktop");
920}
921#endif
922if (!cpuFrequency) cpuFrequency = tscFrequency;
923 if (!tscFrequency) tscFrequency = cpuFrequency;
924
925p->CPU.MaxCoef = maxcoef;
926p->CPU.MaxDiv = maxdiv;
927p->CPU.CurrCoef = currcoef;
928p->CPU.CurrDiv = currdiv;
929
930 p->CPU.TSCFrequency = tscFrequency ;
931p->CPU.FSBFrequency = fsbFrequency ;
932p->CPU.CPUFrequency = cpuFrequency ;
933#ifdef AMD_SUPPORT
934 msglog("AMD CPU Detection Enabled\n");
935#endif
936DBG("CPU: Vendor/Model/ExtModel: 0x%x/0x%x/0x%x\n", p->CPU.Vendor, p->CPU.Model, p->CPU.ExtModel);
937DBG("CPU: Family/ExtFamily: 0x%x/0x%x\n", p->CPU.Family, p->CPU.ExtFamily);
938#ifdef AMD_SUPPORT
939DBG("CPU (AMD): TSCFreq: %dMHz\n", p->CPU.TSCFrequency / 1000000);
940DBG("CPU (AMD): FSBFreq: %dMHz\n", p->CPU.FSBFrequency / 1000000);
941DBG("CPU (AMD): CPUFreq: %dMHz\n", p->CPU.CPUFrequency / 1000000);
942DBG("CPU (AMD): MaxCoef/CurrCoef: 0x%x/0x%x\n", p->CPU.MaxCoef, p->CPU.CurrCoef);
943DBG("CPU (AMD): MaxDiv/CurrDiv: 0x%x/0x%x\n", p->CPU.MaxDiv, p->CPU.CurrDiv);
944#else
945DBG("CPU: TSCFreq: %dMHz\n", p->CPU.TSCFrequency / 1000000);
946DBG("CPU: FSBFreq: %dMHz\n", p->CPU.FSBFrequency / 1000000);
947DBG("CPU: CPUFreq: %dMHz\n", p->CPU.CPUFrequency / 1000000);
948DBG("CPU: MaxCoef/CurrCoef: 0x%x/0x%x\n", p->CPU.MaxCoef, p->CPU.CurrCoef);
949DBG("CPU: MaxDiv/CurrDiv: 0x%x/0x%x\n", p->CPU.MaxDiv, p->CPU.CurrDiv);
950#endif
951
952DBG("CPU: NoCores/NoThreads: %d/%d\n", p->CPU.NoCores, p->CPU.NoThreads);
953DBG("CPU: Features: 0x%08x\n", p->CPU.Features);
954 DBG("CPU: ExtFeatures: 0x%08x\n", p->CPU.ExtFeatures);
955#ifndef AMD_SUPPORT
956 DBG("CPU: MicrocodeVersion: %d\n", p->CPU.MicrocodeVersion);
957#endif
958#if DEBUG_CPU
959pause();
960#endif
961
962}
963

Archive Download this file

Revision: 1804