Chameleon

Chameleon Svn Source Tree

Root/branches/cparm/i386/libsaio/cpu.c

1/*
2 * Copyright 2008 Islam Ahmed Zaid. All rights reserved. <azismed@gmail.com>
3 * AsereBLN: 2009: cleanup and bugfix
4 */
5
6#include "libsaio.h"
7#include "platform.h"
8#include "cpu.h"
9
10#ifndef DEBUG_CPU
11#define DEBUG_CPU 0
12#endif
13
14#if DEBUG_CPU
15#define DBG(x...)printf(x)
16#else
17#define DBG(x...)msglog(x)
18#endif
19
20//#define AMD_SUPPORT
21
22#ifndef INTEL_SUPPORT
23#define INTEL_SUPPORT 0 //Default (0: nolegacy, 1 : legacy)
24#endif
25
26#ifdef AMD_SUPPORT
27#ifdef LEGACY_CPU
28#undef LEGACY_CPU
29#endif
30#ifdef INTEL_SUPPORT
31#undef INTEL_SUPPORT
32#endif
33#define LEGACY_CPU 1
34#endif
35
36#ifdef INTEL_SUPPORT
37#ifdef LEGACY_CPU
38#undef LEGACY_CPU
39#endif
40#define LEGACY_CPU INTEL_SUPPORT
41#endif
42// (?) : if AMD_SUPPORT then (LEGACY_CPU = 1 && INTEL_SUPPORT = disabled)
43// else LEGACY_CPU = INTEL_SUPPORT
44
45
46#if LEGACY_CPU
47static uint64_t measure_tsc_frequency(void);
48
49// DFE: enable_PIT2 and disable_PIT2 come from older xnu
50
51/*
52 * Enable or disable timer 2.
53 * Port 0x61 controls timer 2:
54 * bit 0 gates the clock,
55 * bit 1 gates output to speaker.
56 */
57static inline void enable_PIT2(void)
58{
59 /* Enable gate, disable speaker */
60 __asm__ volatile(
61 " inb $0x61,%%al \n\t"
62 " and $0xFC,%%al \n\t" /* & ~0x03 */
63 " or $1,%%al \n\t"
64 " outb %%al,$0x61 \n\t"
65 : : : "%al" );
66}
67
68static inline void disable_PIT2(void)
69{
70 /* Disable gate and output to speaker */
71 __asm__ volatile(
72 " inb $0x61,%%al \n\t"
73 " and $0xFC,%%al \n\t"/* & ~0x03 */
74 " outb %%al,$0x61 \n\t"
75 : : : "%al" );
76}
77
78// DFE: set_PIT2_mode0, poll_PIT2_gate, and measure_tsc_frequency are
79// roughly based on Linux code
80
81/* Set the 8254 channel 2 to mode 0 with the specified value.
82 In mode 0, the counter will initially set its gate low when the
83 timer expires. For this to be useful, you ought to set it high
84 before calling this function. The enable_PIT2 function does this.
85 */
86static inline void set_PIT2_mode0(uint16_t value)
87{
88 __asm__ volatile(
89 " movb $0xB0,%%al \n\t"
90 " outb%%al,$0x43\n\t"
91 " movb%%dl,%%al\n\t"
92 " outb%%al,$0x42\n\t"
93 " movb%%dh,%%al\n\t"
94 " outb%%al,$0x42"
95 : : "d"(value) /*: no clobber */ );
96}
97
98/* Returns the number of times the loop ran before the PIT2 signaled */
99static inline unsigned long poll_PIT2_gate(void)
100{
101 unsigned long count = 0;
102 unsigned char nmi_sc_val;
103 do {
104 ++count;
105 __asm__ volatile(
106 "inb$0x61,%0"
107 : "=q"(nmi_sc_val) /*:*/ /* no input */ /*:*/ /* no clobber */);
108 } while( (nmi_sc_val & 0x20) == 0);
109 return count;
110}
111/*
112 * DFE: Measures the TSC frequency in Hz (64-bit) using the ACPI PM timer
113 */
114static uint64_t measure_tsc_frequency(void)
115{
116 uint64_t tscStart;
117 uint64_t tscEnd;
118 uint64_t tscDelta = 0xffffffffffffffffULL;
119 unsigned long pollCount;
120 uint64_t retval = 0;
121 int i;
122
123 /* Time how many TSC ticks elapse in 30 msec using the 8254 PIT
124 * counter 2. We run this loop 3 times to make sure the cache
125 * is hot and we take the minimum delta from all of the runs.
126 * That is to say that we're biased towards measuring the minimum
127 * number of TSC ticks that occur while waiting for the timer to
128 * expire. That theoretically helps avoid inconsistencies when
129 * running under a VM if the TSC is not virtualized and the host
130 * steals time. The TSC is normally virtualized for VMware.
131 */
132 for(i = 0; i < 10; ++i)
133 {
134 enable_PIT2();
135 set_PIT2_mode0(CALIBRATE_LATCH);
136 tscStart = rdtsc64();
137 pollCount = poll_PIT2_gate();
138 tscEnd = rdtsc64();
139 /* The poll loop must have run at least a few times for accuracy */
140 if(pollCount <= 1)
141 continue;
142 /* The TSC must increment at LEAST once every millisecond. We
143 * should have waited exactly 30 msec so the TSC delta should
144 * be >= 30. Anything less and the processor is way too slow.
145 */
146 if((tscEnd - tscStart) <= CALIBRATE_TIME_MSEC)
147 continue;
148 // tscDelta = min(tscDelta, (tscEnd - tscStart))
149 if( (tscEnd - tscStart) < tscDelta )
150 tscDelta = tscEnd - tscStart;
151 }
152 /* tscDelta is now the least number of TSC ticks the processor made in
153 * a timespan of 0.03 s (e.g. 30 milliseconds)
154 * Linux thus divides by 30 which gives the answer in kiloHertz because
155 * 1 / ms = kHz. But we're xnu and most of the rest of the code uses
156 * Hz so we need to convert our milliseconds to seconds. Since we're
157 * dividing by the milliseconds, we simply multiply by 1000.
158 */
159
160 /* Unlike linux, we're not limited to 32-bit, but we do need to take care
161 * that we're going to multiply by 1000 first so we do need at least some
162 * arithmetic headroom. For now, 32-bit should be enough.
163 * Also unlike Linux, our compiler can do 64-bit integer arithmetic.
164 */
165 if(tscDelta > (1ULL<<32))
166 retval = 0;
167 else
168 {
169 retval = tscDelta * 1000 / 30;
170 }
171 disable_PIT2();
172 return retval;
173}
174
175#ifdef AMD_SUPPORT
176#define MSR_AMD_APERF 0x000000E8
177/*
178 * Original comment/code:
179 * "DFE: Measures the Max Performance Frequency in Hz (64-bit)"
180 *
181 * Measures the Actual Performance Frequency in Hz (64-bit)
182 * (just a naming change, mperf --> aperf )
183 */
184static uint64_t measure_aperf_frequency(void)
185{
186uint64_t aperfStart;
187uint64_t aperfEnd;
188uint64_t aperfDelta = 0xffffffffffffffffULL;
189unsigned long pollCount;
190uint64_t retval = 0;
191int i;
192
193/* Time how many APERF ticks elapse in 30 msec using the 8254 PIT
194 * counter 2. We run this loop 3 times to make sure the cache
195 * is hot and we take the minimum delta from all of the runs.
196 * That is to say that we're biased towards measuring the minimum
197 * number of APERF ticks that occur while waiting for the timer to
198 * expire.
199 */
200for(i = 0; i < 10; ++i)
201{
202enable_PIT2();
203set_PIT2_mode0(CALIBRATE_LATCH);
204aperfStart = rdmsr64(MSR_AMD_APERF);
205pollCount = poll_PIT2_gate();
206aperfEnd = rdmsr64(MSR_AMD_APERF);
207/* The poll loop must have run at least a few times for accuracy */
208if (pollCount <= 1)
209continue;
210/* The TSC must increment at LEAST once every millisecond.
211 * We should have waited exactly 30 msec so the APERF delta should
212 * be >= 30. Anything less and the processor is way too slow.
213 */
214if ((aperfEnd - aperfStart) <= CALIBRATE_TIME_MSEC)
215continue;
216// tscDelta = MIN(tscDelta, (tscEnd - tscStart))
217if ( (aperfEnd - aperfStart) < aperfDelta )
218aperfDelta = aperfEnd - aperfStart;
219}
220/* mperfDelta is now the least number of MPERF ticks the processor made in
221 * a timespan of 0.03 s (e.g. 30 milliseconds)
222 */
223
224if (aperfDelta > (1ULL<<32))
225retval = 0;
226else
227{
228retval = aperfDelta * 1000 / 30;
229}
230disable_PIT2();
231return retval;
232}
233#endif
234
235#endif
236
237/*
238 License for x2apic_enabled, get_apicbase, compute_bclk.
239
240 Copyright (c) 2010, Intel Corporation
241 All rights reserved.
242
243 Redistribution and use in source and binary forms, with or without
244 modification, are permitted provided that the following conditions are met:
245
246 * Redistributions of source code must retain the above copyright notice,
247 this list of conditions and the following disclaimer.
248 * Redistributions in binary form must reproduce the above copyright notice,
249 this list of conditions and the following disclaimer in the documentation
250 and/or other materials provided with the distribution.
251 * Neither the name of Intel Corporation nor the names of its contributors
252 may be used to endorse or promote products derived from this software
253 without specific prior written permission.
254
255 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
256 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
257 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
258 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
259 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
260 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
261 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
262 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
263 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
264 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
265 */
266static inline __attribute__((always_inline)) void rdmsr32(uint32_t msr, uint32_t * lo_data_addr, uint32_t * hi_data_addr);
267static inline __attribute__((always_inline)) void wrmsr32(uint32_t msr, uint32_t lo_data, uint32_t hi_data);
268static uint32_t x2apic_enabled(void);
269static uint32_t get_apicbase(void);
270static uint32_t compute_bclk(void);
271static inline __attribute__((always_inline)) void rdmsr32(uint32_t msr, uint32_t * lo_data_addr, uint32_t * hi_data_addr)
272{
273 __asm__ volatile(
274 "rdmsr"
275 : "=a" (*lo_data_addr), "=d" (*hi_data_addr)
276 : "c" (msr)
277 );
278}
279static inline __attribute__((always_inline)) void wrmsr32(uint32_t msr, uint32_t lo_data, uint32_t hi_data)
280{
281 __asm__ __volatile__ (
282 "wrmsr"
283 : /* No outputs */
284 : "c" (msr), "a" (lo_data), "d" (hi_data)
285 );
286}
287#define MSR_APIC_BASE 0x1B
288#define APIC_TMR_INITIAL_CNT 0x380
289#define APIC_TMR_CURRENT_CNT 0x390
290#define APIC_TMR_DIVIDE_CFG 0x3E0
291#define MSR_APIC_TMR_INITIAL_CNT 0x838
292#define MSR_APIC_TMR_CURRENT_CNT 0x839
293#define MSR_APIC_TMR_DIVIDE_CFG 0x83E
294static uint32_t x2apic_enabled(void)
295{
296 uint64_t temp64;
297
298 temp64 = rdmsr64(MSR_APIC_BASE);
299
300 return (uint32_t) (temp64 & (1 << 10)) ? 1 : 0;
301}
302static uint32_t get_apicbase(void)
303{
304 uint64_t temp64;
305
306 temp64 = rdmsr64(MSR_APIC_BASE);
307
308 return (uint32_t) (temp64 & 0xfffff000);
309}
310static uint32_t compute_bclk(void)
311{
312 uint32_t dummy;
313 uint32_t start, stop;
314 uint8_t temp8;
315 uint16_t delay_count;
316 uint32_t bclk;
317
318#define DELAY_IN_US 1000
319
320 // Compute fixed delay as time
321 // delay count = desired time * PIT frequency
322 // PIT frequency = 1.193182 MHz
323 delay_count = 1193182 / DELAY_IN_US;
324
325 // PIT channel 2 gate is controlled by IO port 0x61, bit 0
326#define PIT_CH2_LATCH_REG 0x61
327#define CH2_SPEAKER (1 << 1) // bit 1 -- 1 = speaker enabled 0 = speaker disabled
328#define CH2_GATE_IN (1 << 0) // bit 0 -- 1 = gate enabled, 0 = gate disabled
329#define CH2_GATE_OUT (1 << 5) // bit 5 -- 1 = gate latched, 0 = gate not latched
330
331 // PIT Command register
332#define PIT_MODE_COMMAND_REG 0x43
333#define SELECT_CH2 (2 << 6)
334#define ACCESS_MODE_LOBYTE_HIBYTE (3 << 4)
335#define MODE0_INTERRUPT_ON_TERMINAL_COUNT 0 // Despite name, no interrupts on CH2
336
337 // PIT Channel 2 data port
338#define PIT_CH2_DATA 0x42
339
340 // Disable the PIT channel 2 speaker and gate
341 temp8 = inb(PIT_CH2_LATCH_REG);
342 temp8 &= ~(CH2_SPEAKER | CH2_GATE_IN);
343 outb(PIT_CH2_LATCH_REG, temp8);
344
345 // Setup command and mode
346 outb(PIT_MODE_COMMAND_REG, SELECT_CH2 | ACCESS_MODE_LOBYTE_HIBYTE | MODE0_INTERRUPT_ON_TERMINAL_COUNT);
347
348 // Set time for fixed delay
349 outb(PIT_CH2_DATA, (uint8_t) (delay_count));
350 outb(PIT_CH2_DATA, (uint8_t) (delay_count >> 8));
351
352 // Prepare to enable channel 2 gate but leave the speaker disabled
353 temp8 = inb(PIT_CH2_LATCH_REG);
354 temp8 &= ~CH2_SPEAKER;
355 temp8 |= CH2_GATE_IN;
356
357 if (x2apic_enabled())
358{
359 // Set APIC Timer Divide Value as 2
360 wrmsr32(MSR_APIC_TMR_DIVIDE_CFG, 0, 0);
361
362 // start APIC timer with a known value
363 start = ~0UL;
364 wrmsr32(MSR_APIC_TMR_INITIAL_CNT, start, 0);
365 }
366 else
367{
368 // Set APIC Timer Divide Value as 2
369 *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_DIVIDE_CFG) = 0UL;
370
371 // start APIC timer with a known value
372 start = ~0UL;
373 *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_INITIAL_CNT) = start;
374 }
375
376 // Actually start the PIT channel 2
377 outb(PIT_CH2_LATCH_REG, temp8);
378
379 // Wait for the fixed delay
380 while (!(inb(PIT_CH2_LATCH_REG) & CH2_GATE_OUT));
381
382 if (x2apic_enabled())
383{
384 // read the APIC timer to determine the change that occurred over this fixed delay
385 rdmsr32(MSR_APIC_TMR_CURRENT_CNT, &stop, &dummy);
386
387 // stop APIC timer
388 wrmsr32(MSR_APIC_TMR_INITIAL_CNT, 0, 0);
389
390 }
391 else
392{
393 // read the APIC timer to determine the change that occurred over this fixed delay
394 stop = *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_CURRENT_CNT);
395
396 // stop APIC timer
397 *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_INITIAL_CNT) = 0UL;
398 }
399
400 // Disable channel 2 speaker and gate input
401 temp8 = inb(PIT_CH2_LATCH_REG);
402 temp8 &= ~(CH2_SPEAKER | CH2_GATE_IN);
403 outb(PIT_CH2_LATCH_REG, temp8);
404
405 bclk = (start - stop) * 2 / DELAY_IN_US;
406
407 // Round bclk to the nearest 100/12 integer value
408 bclk = ((((bclk * 24) + 100) / 200) * 200) / 24;
409
410 return bclk;
411}
412
413
414/*
415 * Calculates the FSB and CPU frequencies using specific MSRs for each CPU
416 * - multi. is read from a specific MSR. In the case of Intel, there is:
417 * a max multi. (used to calculate the FSB freq.),
418 * and a current multi. (used to calculate the CPU freq.)
419 * - fsbFrequency = tscFrequency / multi
420 * - cpuFrequency = fsbFrequency * multi
421 */
422
423void scan_cpu(void)
424{
425uint64_tmsr = 0;
426
427
428 uint64_tFeatures = 0;// CPU Features like MMX, SSE2, VT ...
429uint64_tExtFeatures = 0; // CPU Extended Features like SYSCALL, XD, EM64T, LAHF ...
430 uint64_tTSCFreq = 0 ;
431 uint64_t FSBFreq = 0 ;
432 uint64_t CPUFreq = 0;
433
434 uint32_treg[4];
435 uint32_t cores_per_package = 0;
436 uint32_t logical_per_package = 0;
437
438 uint32_tVendor = 0;// Vendor
439uint32_tSignature = 0;// Signature
440uint8_t Stepping = 0;// Stepping
441uint8_t Model = 0;// Model
442uint8_t ExtModel = 0;// Extended Model
443uint8_t Family = 0;// Family
444uint8_t ExtFamily = 0;// Extended Family
445uint32_tNoCores = 0;// No Cores per Package
446uint32_tNoThreads = 0;// Threads per Package
447uint8_t Brand = 0;
448uint32_tMicrocodeVersion = 0; // The microcode version number a.k.a. signature a.k.a. BIOS ID
449
450uint8_t isMobile = 0;
451
452boolean_tdynamic_acceleration = 0;
453boolean_tinvariant_APIC_timer = 0;
454boolean_tfine_grain_clock_mod = 0;
455
456uint32_t cpuid_max_basic = 0;
457uint32_t cpuid_max_ext = 0;
458uint32_tsub_Cstates = 0;
459uint32_t extensions = 0;
460
461uint8_tmaxcoef = 0, maxdiv = 0, currcoef = 0, currdiv = 0;
462 charCpuBrandString[48];// 48 Byte Branding String
463
464
465do_cpuid(0, reg);
466Vendor = reg[ebx];
467cpuid_max_basic = reg[eax];
468
469#ifndef AMD_SUPPORT
470 do_cpuid2(0x00000004, 0, reg);
471 cores_per_package= bitfield(reg[eax], 31, 26) + 1;
472#endif
473
474 /* get extended cpuid results */
475do_cpuid(0x80000000, reg);
476cpuid_max_ext = reg[eax];
477
478/* Begin of Copyright: from Apple's XNU cpuid.c */
479
480/* get brand string (if supported) */
481if (cpuid_max_ext > 0x80000004)
482{
483 char str[128], *s;
484/*
485 * The brand string 48 bytes (max), guaranteed to
486 * be NUL terminated.
487 */
488do_cpuid(0x80000002, reg);
489bcopy((char *)reg, &str[0], 16);
490do_cpuid(0x80000003, reg);
491bcopy((char *)reg, &str[16], 16);
492do_cpuid(0x80000004, reg);
493bcopy((char *)reg, &str[32], 16);
494for (s = str; *s != '\0'; s++)
495{
496if (*s != ' ') break;
497}
498
499strlcpy(CpuBrandString,s, sizeof(CpuBrandString));
500
501if (!strncmp(CpuBrandString, CPUID_STRING_UNKNOWN, min(sizeof(CpuBrandString), (unsigned)strlen(CPUID_STRING_UNKNOWN) + 1)))
502{
503 /*
504 * This string means we have a firmware-programmable brand string,
505 * and the firmware couldn't figure out what sort of CPU we have.
506 */
507 CpuBrandString[0] = '\0';
508 }
509}
510
511 /*
512 * Get processor signature and decode
513 * and bracket this with the approved procedure for reading the
514 * the microcode version number a.k.a. signature a.k.a. BIOS ID
515 */
516#ifndef AMD_SUPPORT
517wrmsr64(MSR_IA32_BIOS_SIGN_ID, 0);
518do_cpuid(1, reg);
519MicrocodeVersion = (uint32_t) (rdmsr64(MSR_IA32_BIOS_SIGN_ID) >> 32);
520#else
521do_cpuid(1, reg);
522#endif
523Signature = reg[eax];
524Stepping = bitfield(reg[eax], 3, 0);
525Model = bitfield(reg[eax], 7, 4);
526Family = bitfield(reg[eax], 11, 8);
527ExtModel = bitfield(reg[eax], 19, 16);
528ExtFamily = bitfield(reg[eax], 27, 20);
529Brand = bitfield(reg[ebx], 7, 0);
530Features = quad(reg[ecx], reg[edx]);
531
532 /* Fold extensions into family/model */
533if (Family == 0x0f)
534Family += ExtFamily;
535if (Family == 0x0f || Family == 0x06)
536Model += (ExtModel << 4);
537
538 if (Features & CPUID_FEATURE_HTT)
539logical_per_package =
540 bitfield(reg[ebx], 23, 16);
541else
542logical_per_package = 1;
543
544
545if (cpuid_max_ext >= 0x80000001)
546{
547do_cpuid(0x80000001, reg);
548ExtFeatures =
549 quad(reg[ecx], reg[edx]);
550
551}
552
553if (cpuid_max_ext >= 0x80000007)
554{
555do_cpuid(0x80000007, reg);
556
557/* Fold in the Invariant TSC feature bit, if present */
558ExtFeatures |=
559 reg[edx] & (uint32_t)CPUID_EXTFEATURE_TSCI;
560
561#ifdef AMD_SUPPORT
562/* Fold in the Hardware P-State control feature bit, if present */
563ExtFeatures |=
564 reg[edx] & (uint32_t)_Bit(7);
565
566/* Fold in the read-only effective frequency interface feature bit, if present */
567ExtFeatures |=
568 reg[edx] & (uint32_t)_Bit(10);
569#endif
570}
571
572#ifdef AMD_SUPPORT
573if (cpuid_max_ext >= 0x80000008)
574{
575if (Features & CPUID_FEATURE_HTT)
576{
577do_cpuid(0x80000008, reg);
578cores_per_package= bitfield(reg[ecx], 7 , 0) + 1; // NC + 1
579}
580}
581#endif
582
583 if (cpuid_max_basic >= 0x5) {
584/*
585 * Extract the Monitor/Mwait Leaf info:
586 */
587do_cpuid(5, reg);
588#ifndef AMD_SUPPORT
589 sub_Cstates = reg[edx];
590#endif
591 extensions = reg[ecx];
592}
593
594#ifndef AMD_SUPPORT
595 if (cpuid_max_basic >= 0x6)
596 {
597/*
598 * The thermal and Power Leaf:
599 */
600do_cpuid(6, reg);
601dynamic_acceleration = bitfield(reg[eax], 1, 1); // "Dynamic Acceleration Technology (Turbo Mode)"
602invariant_APIC_timer = bitfield(reg[eax], 2, 2); // "Invariant APIC Timer"
603 fine_grain_clock_mod = bitfield(reg[eax], 4, 4);
604}
605
606 if ((Vendor == CPUID_VENDOR_INTEL) &&
607(Family == 0x06))
608{
609/*
610 * Find the number of enabled cores and threads
611 * (which determines whether SMT/Hyperthreading is active).
612 */
613switch (Model)
614{
615
616case CPUID_MODEL_DALES_32NM:
617case CPUID_MODEL_WESTMERE:
618case CPUID_MODEL_WESTMERE_EX:
619{
620msr = rdmsr64(MSR_CORE_THREAD_COUNT);
621NoThreads = bitfield((uint32_t)msr, 15, 0);
622NoCores = bitfield((uint32_t)msr, 19, 16);
623break;
624}
625
626case CPUID_MODEL_NEHALEM:
627case CPUID_MODEL_FIELDS:
628case CPUID_MODEL_DALES:
629case CPUID_MODEL_NEHALEM_EX:
630case CPUID_MODEL_SANDYBRIDGE:
631case CPUID_MODEL_JAKETOWN:
632{
633msr = rdmsr64(MSR_CORE_THREAD_COUNT);
634NoThreads = bitfield((uint32_t)msr, 15, 0);
635NoCores = bitfield((uint32_t)msr, 31, 16);
636break;
637}
638}
639 }
640#endif
641 if (NoCores == 0)
642{
643#ifdef AMD_SUPPORT
644if (!cores_per_package) {
645//legacy method
646if ((ExtFeatures & _HBit(1)/* CmpLegacy */) && ( Features & CPUID_FEATURE_HTT) )
647cores_per_package = logical_per_package;
648else
649cores_per_package = 1;
650}
651#endif
652NoThreads = logical_per_package;
653NoCores = cores_per_package ? cores_per_package : 1 ;
654}
655
656/* End of Copyright: from Apple's XNU cpuid.c */
657
658FSBFreq = (uint64_t)(compute_bclk() * 1000000);
659
660#if LEGACY_CPU
661TSCFreq = measure_tsc_frequency();
662#endif
663
664#ifdef AMD_SUPPORT
665#define K8_FIDVID_STATUS0xC0010042
666#define K10_COFVID_STATUS0xC0010071
667if (ExtFeatures & _Bit(10))
668{
669CPUFreq = measure_aperf_frequency();
670}
671
672 if ((Vendor == CPUID_VENDOR_AMD) && (Family == 0x0f))
673{
674switch(ExtFamily)
675{
676case 0x00: /* K8 */
677msr = rdmsr64(K8_FIDVID_STATUS);
678maxcoef = bitfield(msr, 21, 16) / 2 + 4;
679currcoef = bitfield(msr, 5, 0) / 2 + 4;
680break;
681
682case 0x01: /* K10 */
683 {
684 //uint32_t reg[4];
685msr = rdmsr64(K10_COFVID_STATUS);
686/*
687 do_cpuid2(0x00000006, 0, reg);
688 EffFreq: effective frequency interface
689 if (bitfield(reg[ecx], 0, 0) == 1)
690 {
691 uint64_t aperf = measure_aperf_frequency();
692 CPUFreq = aperf;
693 }
694 */
695// NOTE: tsc runs at the maccoeff (non turbo)
696//*not* at the turbo frequency.
697maxcoef = bitfield(msr, 54, 49) / 2 + 4;
698currcoef = bitfield(msr, 5, 0) + 0x10;
699currdiv = 2 << bitfield(msr, 8, 6);
700
701break;
702}
703case 0x05: /* K14 */
704msr = rdmsr64(K10_COFVID_STATUS);
705currcoef = (bitfield(msr, 54, 49) + 0x10) << 2;
706currdiv = (bitfield(msr, 8, 4) + 1) << 2;
707currdiv += bitfield(msr, 3, 0);
708
709break;
710
711case 0x02: /* K11 */
712DBG("K11 detected, but not supported !!!\n");
713// not implimented
714break;
715}
716
717if (!FSBFreq)
718{
719if (maxcoef)
720{
721if (currdiv)
722{
723if (!currcoef) currcoef = maxcoef;
724if (!CPUFreq)
725FSBFreq = ((TSCFreq * currdiv) / currcoef);
726else
727FSBFreq = ((CPUFreq * currdiv) / currcoef);
728
729DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);
730} else {
731if (!CPUFreq)
732FSBFreq = (TSCFreq / maxcoef);
733else
734FSBFreq = (CPUFreq / maxcoef);
735DBG("%d\n", currcoef);
736}
737}
738else if (currcoef)
739{
740if (currdiv)
741{
742FSBFreq = ((TSCFreq * currdiv) / currcoef);
743DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);
744} else {
745FSBFreq = (TSCFreq / currcoef);
746DBG("%d\n", currcoef);
747}
748}
749}
750
751}
752
753// NOTE: This is not the approved method,
754// the method provided by AMD is:
755// if ((PowerNow == enabled (p->cpu->cpuid_max_ext >= 0x80000007)) && (StartupFID(??) != MaxFID(??))) then "mobile processor present"
756
757if (strstr(CpuBrandString, "obile"))
758isMobile = 1;
759else
760isMobile = 0;
761
762DBG("%s platform detected.\n", isMobile?"Mobile":"Desktop");
763#else
764 if ((Vendor == CPUID_VENDOR_INTEL) &&
765((Family == 0x06) ||
766 (Family == 0x0f)))
767{
768if ((Family == 0x06 && Model >= 0x0c) ||
769(Family == 0x0f && Model >= 0x03))
770{
771/* Nehalem CPU model */
772if (Family == 0x06 && (Model == CPUID_MODEL_NEHALEM ||
773 Model == CPUID_MODEL_FIELDS ||
774 Model == CPUID_MODEL_DALES ||
775 Model == CPUID_MODEL_DALES_32NM ||
776 Model == CPUID_MODEL_WESTMERE ||
777 Model == CPUID_MODEL_NEHALEM_EX ||
778 Model == CPUID_MODEL_WESTMERE_EX ||
779 Model == CPUID_MODEL_SANDYBRIDGE ||
780 Model == CPUID_MODEL_JAKETOWN))
781{
782uint8_tbus_ratio_max = 0;
783uint64_tflex_ratio = 0;
784msr = rdmsr64(MSR_PLATFORM_INFO);
785#if DEBUG_CPU
786 uint32_tmax_ratio = 0, bus_ratio_min = 0;
787
788DBG("msr(%d): platform_info %08x\n", __LINE__, msr & 0xffffffff);
789#endif
790bus_ratio_max = (msr >> 8) & 0xff;
791//bus_ratio_min = (msr >> 40) & 0xff;
792msr = rdmsr64(MSR_FLEX_RATIO);
793#if DEBUG_CPU
794DBG("msr(%d): flex_ratio %08x\n", __LINE__, msr & 0xffffffff);
795#endif
796if ((msr >> 16) & 0x01)
797{
798flex_ratio = (msr >> 8) & 0xff;
799/* bcc9: at least on the gigabyte h67ma-ud2h,
800 where the cpu multipler can't be changed to
801 allow overclocking, the flex_ratio msr has unexpected (to OSX)
802 contents. These contents cause mach_kernel to
803 fail to compute the bus ratio correctly, instead
804 causing the system to crash since tscGranularity
805 is inadvertently set to 0.
806 */
807if (flex_ratio == 0)
808{
809/* Clear bit 16 (evidently the
810 presence bit) */
811wrmsr64(MSR_FLEX_RATIO, (msr & 0xFFFFFFFFFFFEFFFFULL));
812#if DEBUG_CPU
813 msr = rdmsr64(MSR_FLEX_RATIO);
814
815DBG("Unusable flex ratio detected. MSR Patched to %08x\n", msr & 0xffffffff);
816#endif
817}
818else
819{
820if (bus_ratio_max > flex_ratio)
821{
822bus_ratio_max = flex_ratio;
823}
824}
825}
826#if LEGACY_CPU
827if (bus_ratio_max)
828{
829FSBFreq = (TSCFreq / bus_ratio_max);
830}
831#endif
832//valv: Turbo Ratio Limit
833if ((Model != 0x2e) && (Model != 0x2f))
834{
835//msr = rdmsr64(MSR_TURBO_RATIO_LIMIT);
836CPUFreq = bus_ratio_max * FSBFreq;
837//max_ratio = bus_ratio_max * 10;
838}
839else
840{
841#if LEGACY_CPU
842CPUFreq = TSCFreq;
843#else
844CPUFreq = bus_ratio_max * FSBFreq;
845#endif
846}
847#if DEBUG_CPU
848DBG("Sticking with [BCLK: %dMhz, Bus-Ratio: %d]\n", FSBFreq / 1000000, max_ratio);
849#endif
850currcoef = bus_ratio_max;
851
852 TSCFreq = CPUFreq;
853}
854else
855{
856msr = rdmsr64(MSR_IA32_PERF_STATUS);
857#if DEBUG_CPU
858DBG("msr(%d): ia32_perf_stat 0x%08x\n", __LINE__, msr & 0xffffffff);
859#endif
860currcoef = (msr >> 8) & 0x1f;
861/* Non-integer bus ratio for the max-multi*/
862maxdiv = (msr >> 46) & 0x01;
863/* Non-integer bus ratio for the current-multi (undocumented)*/
864currdiv = (msr >> 14) & 0x01;
865
866if ((Family == 0x06 && Model >= 0x0e) ||
867(Family == 0x0f)) // This will always be model >= 3
868{
869/* On these models, maxcoef defines TSC freq */
870maxcoef = (msr >> 40) & 0x1f;
871}
872else
873{
874/* On lower models, currcoef defines TSC freq */
875/* XXX */
876maxcoef = currcoef;
877}
878if (!currcoef) currcoef = maxcoef;
879#if LEGACY_CPU
880if (maxcoef)
881{
882
883if (maxdiv)
884{
885FSBFreq = ((TSCFreq * 2) / ((maxcoef * 2) + 1));
886}
887else
888{
889FSBFreq = (TSCFreq / maxcoef);
890}
891
892if (currdiv)
893{
894CPUFreq = (FSBFreq * ((currcoef * 2) + 1) / 2);
895}
896else
897{
898CPUFreq = (FSBFreq * currcoef);
899}
900#if DEBUG_CPU
901DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");
902#endif
903}
904#else
905
906
907if (currdiv)
908{
909CPUFreq = (FSBFreq * ((currcoef * 2) + 1) / 2);
910}
911else
912{
913CPUFreq = (FSBFreq * currcoef);
914}
915
916if (maxcoef)
917{
918if (maxdiv)
919{
920TSCFreq = (FSBFreq * ((maxcoef * 2) + 1)) / 2;
921}
922else
923{
924TSCFreq = FSBFreq * maxcoef;
925}
926}
927#if DEBUG_CPU
928DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");
929#endif
930
931#endif // LEGACY_CPU
932
933}
934}
935 /* Mobile CPU ? */
936//Slice
937 //isMobile = 0;
938switch (Model)
939{
940case 0x0D:
941isMobile = 1;
942break;
943case 0x02:
944case 0x03:
945case 0x04:
946case 0x06:
947isMobile = (rdmsr64(0x2C) & (1 << 21))? 1 : 0;
948break;
949default:
950isMobile = (rdmsr64(0x17) & (1 << 28)) ? 1 : 0;
951break;
952}
953
954DBG("%s platform detected.\n", isMobile?"Mobile":"Desktop");
955}
956#endif
957if (!CPUFreq) CPUFreq = TSCFreq;
958 if (!TSCFreq) TSCFreq = CPUFreq;
959
960set_env(envVendor, Vendor);
961 set_env(envModel, Model);
962 set_env(envExtModel, ExtModel);
963
964set_env(envCPUIDMaxBasic, cpuid_max_basic);
965set_env(envCPUIDMaxBasic, cpuid_max_ext);
966#ifndef AMD_SUPPORT
967set_env(envMicrocodeVersion, MicrocodeVersion);
968#endif
969 set_env_copy(envBrandString, CpuBrandString, sizeof(CpuBrandString));
970set_env(envSignature, Signature);
971set_env(envStepping, Stepping);
972set_env(envFamily, Family);
973set_env(envExtModel, ExtModel);
974set_env(envExtFamily, ExtFamily);
975set_env(envBrand, Brand);
976set_env(envFeatures, Features);
977 set_env(envExtFeatures, ExtFeatures);
978#ifndef AMD_SUPPORT
979set_env(envSubCstates, sub_Cstates);
980#endif
981set_env(envExtensions, extensions);
982#ifndef AMD_SUPPORT
983set_env(envDynamicAcceleration, dynamic_acceleration);
984set_env(envInvariantAPICTimer, invariant_APIC_timer);
985set_env(envFineGrainClockMod, fine_grain_clock_mod);
986#endif
987set_env(envNoThreads, NoThreads);
988set_env(envNoCores, NoCores);
989set_env(envIsMobile, isMobile);
990
991set_env(envMaxCoef, maxcoef);
992set_env(envMaxDiv, maxdiv);
993set_env(envCurrCoef, currcoef);
994set_env(envCurrDiv, currdiv);
995set_env(envTSCFreq, TSCFreq);
996set_env(envFSBFreq, FSBFreq);
997set_env(envCPUFreq, CPUFreq);
998
999#ifdef AMD_SUPPORT
1000 msglog("AMD CPU Detection Enabled\n");
1001#endif
1002
1003}

Archive Download this file

Revision: 1913