Chameleon

Chameleon Svn Source Tree

Root/branches/cparm/i386/libsaio/cpu.c

1/*
2 * Copyright 2008 Islam Ahmed Zaid. All rights reserved. <azismed@gmail.com>
3 * AsereBLN: 2009: cleanup and bugfix
4 */
5
6#include "libsaio.h"
7#include "platform.h"
8#include "cpu.h"
9
10#ifndef DEBUG_CPU
11#define DEBUG_CPU 0
12#endif
13
14#if DEBUG_CPU
15#define DBG(x...)printf(x)
16#else
17#define DBG(x...)msglog(x)
18#endif
19
20//#define AMD_SUPPORT
21
22#ifndef INTEL_SUPPORT
23#define INTEL_SUPPORT 0 //Default (0: nolegacy, 1 : legacy)
24#endif
25
26#ifdef AMD_SUPPORT
27#ifdef LEGACY_CPU
28#undef LEGACY_CPU
29#endif
30#ifdef INTEL_SUPPORT
31#undef INTEL_SUPPORT
32#endif
33#define LEGACY_CPU 1
34#endif
35
36#ifdef INTEL_SUPPORT
37#ifdef LEGACY_CPU
38#undef LEGACY_CPU
39#endif
40#define LEGACY_CPU INTEL_SUPPORT
41#endif
42// (?) : if AMD_SUPPORT then (LEGACY_CPU = 1 && INTEL_SUPPORT = disabled)
43// else LEGACY_CPU = INTEL_SUPPORT
44
45
46#if LEGACY_CPU
47
48
49// DFE: enable_PIT2 and disable_PIT2 come from older xnu
50
51/*
52 * Enable or disable timer 2.
53 * Port 0x61 controls timer 2:
54 * bit 0 gates the clock,
55 * bit 1 gates output to speaker.
56 */
57static inline void enable_PIT2(void)
58{
59 /* Enable gate, disable speaker */
60 __asm__ volatile(
61 " inb $0x61,%%al \n\t"
62 " and $0xFC,%%al \n\t" /* & ~0x03 */
63 " or $1,%%al \n\t"
64 " outb %%al,$0x61 \n\t"
65 : : : "%al" );
66}
67
68static inline void disable_PIT2(void)
69{
70 /* Disable gate and output to speaker */
71 __asm__ volatile(
72 " inb $0x61,%%al \n\t"
73 " and $0xFC,%%al \n\t"/* & ~0x03 */
74 " outb %%al,$0x61 \n\t"
75 : : : "%al" );
76}
77#if 0
78static uint64_t measure_tsc_frequency(void);
79// DFE: set_PIT2_mode0, poll_PIT2_gate, and measure_tsc_frequency are
80// roughly based on Linux code
81
82/* Set the 8254 channel 2 to mode 0 with the specified value.
83 In mode 0, the counter will initially set its gate low when the
84 timer expires. For this to be useful, you ought to set it high
85 before calling this function. The enable_PIT2 function does this.
86 */
87static inline void set_PIT2_mode0(uint16_t value)
88{
89 __asm__ volatile(
90 " movb $0xB0,%%al \n\t"
91 " outb%%al,$0x43\n\t"
92 " movb%%dl,%%al\n\t"
93 " outb%%al,$0x42\n\t"
94 " movb%%dh,%%al\n\t"
95 " outb%%al,$0x42"
96 : : "d"(value) /*: no clobber */ );
97}
98
99/* Returns the number of times the loop ran before the PIT2 signaled */
100static inline unsigned long poll_PIT2_gate(void)
101{
102 unsigned long count = 0;
103 unsigned char nmi_sc_val;
104 do {
105 ++count;
106 __asm__ volatile(
107 "inb$0x61,%0"
108 : "=q"(nmi_sc_val) /*:*/ /* no input */ /*:*/ /* no clobber */);
109 } while( (nmi_sc_val & 0x20) == 0);
110 return count;
111}
112/*
113 * DFE: Measures the TSC frequency in Hz (64-bit) using the ACPI PM timer
114 */
115static uint64_t measure_tsc_frequency(void)
116{
117 uint64_t tscStart;
118 uint64_t tscEnd;
119 uint64_t tscDelta = 0xffffffffffffffffULL;
120 unsigned long pollCount;
121 uint64_t retval = 0;
122 int i;
123
124 /* Time how many TSC ticks elapse in 30 msec using the 8254 PIT
125 * counter 2. We run this loop 3 times to make sure the cache
126 * is hot and we take the minimum delta from all of the runs.
127 * That is to say that we're biased towards measuring the minimum
128 * number of TSC ticks that occur while waiting for the timer to
129 * expire. That theoretically helps avoid inconsistencies when
130 * running under a VM if the TSC is not virtualized and the host
131 * steals time. The TSC is normally virtualized for VMware.
132 */
133 for(i = 0; i < 10; ++i)
134 {
135 enable_PIT2();
136 set_PIT2_mode0(CALIBRATE_LATCH);
137 tscStart = rdtsc64();
138 pollCount = poll_PIT2_gate();
139 tscEnd = rdtsc64();
140 /* The poll loop must have run at least a few times for accuracy */
141 if(pollCount <= 1)
142 continue;
143 /* The TSC must increment at LEAST once every millisecond. We
144 * should have waited exactly 30 msec so the TSC delta should
145 * be >= 30. Anything less and the processor is way too slow.
146 */
147 if((tscEnd - tscStart) <= CALIBRATE_TIME_MSEC)
148 continue;
149 // tscDelta = min(tscDelta, (tscEnd - tscStart))
150 if( (tscEnd - tscStart) < tscDelta )
151 tscDelta = tscEnd - tscStart;
152 }
153 /* tscDelta is now the least number of TSC ticks the processor made in
154 * a timespan of 0.03 s (e.g. 30 milliseconds)
155 * Linux thus divides by 30 which gives the answer in kiloHertz because
156 * 1 / ms = kHz. But we're xnu and most of the rest of the code uses
157 * Hz so we need to convert our milliseconds to seconds. Since we're
158 * dividing by the milliseconds, we simply multiply by 1000.
159 */
160
161 /* Unlike linux, we're not limited to 32-bit, but we do need to take care
162 * that we're going to multiply by 1000 first so we do need at least some
163 * arithmetic headroom. For now, 32-bit should be enough.
164 * Also unlike Linux, our compiler can do 64-bit integer arithmetic.
165 */
166 if(tscDelta > (1ULL<<32))
167 retval = 0;
168 else
169 {
170 retval = tscDelta * 1000 / 30;
171 }
172 disable_PIT2();
173 return retval;
174}
175#endif
176
177static inline static void
178set_PIT2(int value)
179{
180 /*
181 * First, tell the clock we are going to write 16 bits to the counter
182 * and enable one-shot mode (command 0xB8 to port 0x43)
183 * Then write the two bytes into the PIT2 clock register (port 0x42).
184 * Loop until the value is "realized" in the clock,
185 * this happens on the next tick.
186 */
187 asm volatile(
188 " movb $0xB8,%%al \n\t"
189 " outb %%al,$0x43 \n\t"
190 " movb %%dl,%%al \n\t"
191 " outb %%al,$0x42 \n\t"
192 " movb %%dh,%%al \n\t"
193 " outb %%al,$0x42 \n"
194 "1: inb $0x42,%%al \n\t"
195 " inb $0x42,%%al \n\t"
196 " cmp %%al,%%dh \n\t"
197 " jne 1b"
198 : : "d"(value) : "%al");
199}
200
201
202static inline static uint64_t
203get_PIT2(unsigned int *value)
204{
205 register uint64_t result;
206 /*
207 * This routine first latches the time (command 0x80 to port 0x43),
208 * then gets the time stamp so we know how long the read will take later.
209 * Read (from port 0x42) and return the current value of the timer.
210 */
211#ifdef __i386__
212 asm volatile(
213 " xorl %%ecx,%%ecx \n\t"
214 " movb $0x80,%%al \n\t"
215 " outb %%al,$0x43 \n\t"
216 " rdtsc \n\t"
217 " pushl %%eax \n\t"
218 " inb $0x42,%%al \n\t"
219 " movb %%al,%%cl \n\t"
220 " inb $0x42,%%al \n\t"
221 " movb %%al,%%ch \n\t"
222 " popl %%eax "
223 : "=A"(result), "=c"(*value));
224#else /* __x86_64__ */
225 asm volatile(
226 " xorq %%rcx,%%rcx \n\t"
227 " movb $0x80,%%al \n\t"
228 " outb %%al,$0x43 \n\t"
229 " rdtsc \n\t"
230 " pushq %%rax \n\t"
231 " inb $0x42,%%al \n\t"
232 " movb %%al,%%cl \n\t"
233 " inb $0x42,%%al \n\t"
234 " movb %%al,%%ch \n\t"
235 " popq %%rax "
236 : "=A"(result), "=c"(*value));
237#endif
238
239 return result;
240}
241
242/*
243 * timeRDTSC()
244 * This routine sets up PIT counter 2 to count down 1/20 of a second.
245 * It pauses until the value is latched in the counter
246 * and then reads the time stamp counter to return to the caller.
247 */
248static uint64_t timeRDTSC(void)
249{
250 intattempts = 0;
251 uint64_t latchTime;
252 uint64_tsaveTime,intermediate;
253 unsigned int timerValue, lastValue;
254 //boolean_tint_enabled;
255 /*
256 * Table of correction factors to account for
257 * - timer counter quantization errors, and
258 * - undercounts 0..5
259 */
260#define SAMPLE_CLKS_EXACT(((double) CLKNUM) / 20.0)
261#define SAMPLE_CLKS_INT((int) CLKNUM / 20)
262#define SAMPLE_NSECS(2000000000LL)
263#define SAMPLE_MULTIPLIER(((double)SAMPLE_NSECS)*SAMPLE_CLKS_EXACT)
264#define ROUND64(x)((uint64_t)((x) + 0.5))
265 uint64_tscale[6] = {
266ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-0)),
267ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-1)),
268ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-2)),
269ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-3)),
270ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-4)),
271ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-5))
272 };
273
274restart:
275 if (attempts >= 9) // increase to up to 9 attempts.
276 // This will flash-reboot. TODO: Use tscPanic instead.
277 printf("Timestamp counter calibation failed with %d attempts\n", attempts);
278 attempts++;
279 enable_PIT2();// turn on PIT2
280 set_PIT2(0);// reset timer 2 to be zero
281 latchTime = rdtsc64();// get the time stamp to time
282 latchTime = get_PIT2(&timerValue) - latchTime; // time how long this takes
283 set_PIT2(SAMPLE_CLKS_INT);// set up the timer for (almost) 1/20th a second
284 saveTime = rdtsc64();// now time how long a 20th a second is...
285 get_PIT2(&lastValue);
286 get_PIT2(&lastValue);// read twice, first value may be unreliable
287 do {
288intermediate = get_PIT2(&timerValue);
289if (timerValue > lastValue) {
290// Timer wrapped
291set_PIT2(0);
292disable_PIT2();
293goto restart;
294}
295lastValue = timerValue;
296 } while (timerValue > 5);
297 printf("timerValue %d\n",timerValue);
298 printf("intermediate 0x%016llx\n",intermediate);
299 printf("saveTime 0x%016llx\n",saveTime);
300
301 intermediate -= saveTime;// raw count for about 1/20 second
302 intermediate *= scale[timerValue];// rescale measured time spent
303 intermediate /= SAMPLE_NSECS;// so its exactly 1/20 a second
304 intermediate += latchTime;// add on our save fudge
305
306 set_PIT2(0);// reset timer 2 to be zero
307 disable_PIT2();// turn off PIT 2
308
309 return intermediate;
310}
311
312#ifdef AMD_SUPPORT
313#define MSR_AMD_APERF 0x000000E8
314/*
315 * Original comment/code:
316 * "DFE: Measures the Max Performance Frequency in Hz (64-bit)"
317 *
318 * Measures the Actual Performance Frequency in Hz (64-bit)
319 * (just a naming change, mperf --> aperf )
320 */
321static uint64_t measure_aperf_frequency(void)
322{
323uint64_t aperfStart;
324uint64_t aperfEnd;
325uint64_t aperfDelta = 0xffffffffffffffffULL;
326unsigned long pollCount;
327uint64_t retval = 0;
328int i;
329
330/* Time how many APERF ticks elapse in 30 msec using the 8254 PIT
331 * counter 2. We run this loop 3 times to make sure the cache
332 * is hot and we take the minimum delta from all of the runs.
333 * That is to say that we're biased towards measuring the minimum
334 * number of APERF ticks that occur while waiting for the timer to
335 * expire.
336 */
337for(i = 0; i < 10; ++i)
338{
339enable_PIT2();
340set_PIT2_mode0(CALIBRATE_LATCH);
341aperfStart = rdmsr64(MSR_AMD_APERF);
342pollCount = poll_PIT2_gate();
343aperfEnd = rdmsr64(MSR_AMD_APERF);
344/* The poll loop must have run at least a few times for accuracy */
345if (pollCount <= 1)
346continue;
347/* The TSC must increment at LEAST once every millisecond.
348 * We should have waited exactly 30 msec so the APERF delta should
349 * be >= 30. Anything less and the processor is way too slow.
350 */
351if ((aperfEnd - aperfStart) <= CALIBRATE_TIME_MSEC)
352continue;
353// tscDelta = MIN(tscDelta, (tscEnd - tscStart))
354if ( (aperfEnd - aperfStart) < aperfDelta )
355aperfDelta = aperfEnd - aperfStart;
356}
357/* mperfDelta is now the least number of MPERF ticks the processor made in
358 * a timespan of 0.03 s (e.g. 30 milliseconds)
359 */
360
361if (aperfDelta > (1ULL<<32))
362retval = 0;
363else
364{
365retval = aperfDelta * 1000 / 30;
366}
367disable_PIT2();
368return retval;
369}
370#endif
371
372#endif
373
374/*
375 License for x2apic_enabled, get_apicbase, compute_bclk.
376
377 Copyright (c) 2010, Intel Corporation
378 All rights reserved.
379
380 Redistribution and use in source and binary forms, with or without
381 modification, are permitted provided that the following conditions are met:
382
383 * Redistributions of source code must retain the above copyright notice,
384 this list of conditions and the following disclaimer.
385 * Redistributions in binary form must reproduce the above copyright notice,
386 this list of conditions and the following disclaimer in the documentation
387 and/or other materials provided with the distribution.
388 * Neither the name of Intel Corporation nor the names of its contributors
389 may be used to endorse or promote products derived from this software
390 without specific prior written permission.
391
392 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
393 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
394 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
395 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
396 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
397 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
398 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
399 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
400 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
401 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
402 */
403static inline __attribute__((always_inline)) void rdmsr32(uint32_t msr, uint32_t * lo_data_addr, uint32_t * hi_data_addr);
404static inline __attribute__((always_inline)) void wrmsr32(uint32_t msr, uint32_t lo_data, uint32_t hi_data);
405static uint32_t x2apic_enabled(void);
406static uint32_t get_apicbase(void);
407static uint32_t compute_bclk(void);
408static inline __attribute__((always_inline)) void rdmsr32(uint32_t msr, uint32_t * lo_data_addr, uint32_t * hi_data_addr)
409{
410 __asm__ volatile(
411 "rdmsr"
412 : "=a" (*lo_data_addr), "=d" (*hi_data_addr)
413 : "c" (msr)
414 );
415}
416static inline __attribute__((always_inline)) void wrmsr32(uint32_t msr, uint32_t lo_data, uint32_t hi_data)
417{
418 __asm__ __volatile__ (
419 "wrmsr"
420 : /* No outputs */
421 : "c" (msr), "a" (lo_data), "d" (hi_data)
422 );
423}
424#define MSR_APIC_BASE 0x1B
425#define APIC_TMR_INITIAL_CNT 0x380
426#define APIC_TMR_CURRENT_CNT 0x390
427#define APIC_TMR_DIVIDE_CFG 0x3E0
428#define MSR_APIC_TMR_INITIAL_CNT 0x838
429#define MSR_APIC_TMR_CURRENT_CNT 0x839
430#define MSR_APIC_TMR_DIVIDE_CFG 0x83E
431static uint32_t x2apic_enabled(void)
432{
433 uint64_t temp64;
434
435 temp64 = rdmsr64(MSR_APIC_BASE);
436
437 return (uint32_t) (temp64 & (1 << 10)) ? 1 : 0;
438}
439static uint32_t get_apicbase(void)
440{
441 uint64_t temp64;
442
443 temp64 = rdmsr64(MSR_APIC_BASE);
444
445 return (uint32_t) (temp64 & 0xfffff000);
446}
447static uint32_t compute_bclk(void)
448{
449 uint32_t dummy;
450 uint32_t start, stop;
451 uint8_t temp8;
452 uint16_t delay_count;
453 uint32_t bclk;
454
455#define DELAY_IN_US 1000
456
457 // Compute fixed delay as time
458 // delay count = desired time * PIT frequency
459 // PIT frequency = 1.193182 MHz
460 delay_count = 1193182 / DELAY_IN_US;
461
462 // PIT channel 2 gate is controlled by IO port 0x61, bit 0
463#define PIT_CH2_LATCH_REG 0x61
464#define CH2_SPEAKER (1 << 1) // bit 1 -- 1 = speaker enabled 0 = speaker disabled
465#define CH2_GATE_IN (1 << 0) // bit 0 -- 1 = gate enabled, 0 = gate disabled
466#define CH2_GATE_OUT (1 << 5) // bit 5 -- 1 = gate latched, 0 = gate not latched
467
468 // PIT Command register
469#define PIT_MODE_COMMAND_REG 0x43
470#define SELECT_CH2 (2 << 6)
471#define ACCESS_MODE_LOBYTE_HIBYTE (3 << 4)
472#define MODE0_INTERRUPT_ON_TERMINAL_COUNT 0 // Despite name, no interrupts on CH2
473
474 // PIT Channel 2 data port
475#define PIT_CH2_DATA 0x42
476
477 // Disable the PIT channel 2 speaker and gate
478 temp8 = inb(PIT_CH2_LATCH_REG);
479 temp8 &= ~(CH2_SPEAKER | CH2_GATE_IN);
480 outb(PIT_CH2_LATCH_REG, temp8);
481
482 // Setup command and mode
483 outb(PIT_MODE_COMMAND_REG, SELECT_CH2 | ACCESS_MODE_LOBYTE_HIBYTE | MODE0_INTERRUPT_ON_TERMINAL_COUNT);
484
485 // Set time for fixed delay
486 outb(PIT_CH2_DATA, (uint8_t) (delay_count));
487 outb(PIT_CH2_DATA, (uint8_t) (delay_count >> 8));
488
489 // Prepare to enable channel 2 gate but leave the speaker disabled
490 temp8 = inb(PIT_CH2_LATCH_REG);
491 temp8 &= ~CH2_SPEAKER;
492 temp8 |= CH2_GATE_IN;
493
494 if (x2apic_enabled())
495{
496 // Set APIC Timer Divide Value as 2
497 wrmsr32(MSR_APIC_TMR_DIVIDE_CFG, 0, 0);
498
499 // start APIC timer with a known value
500 start = ~0UL;
501 wrmsr32(MSR_APIC_TMR_INITIAL_CNT, start, 0);
502 }
503 else
504{
505 // Set APIC Timer Divide Value as 2
506 *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_DIVIDE_CFG) = 0UL;
507
508 // start APIC timer with a known value
509 start = ~0UL;
510 *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_INITIAL_CNT) = start;
511 }
512
513 // Actually start the PIT channel 2
514 outb(PIT_CH2_LATCH_REG, temp8);
515
516 // Wait for the fixed delay
517 while (!(inb(PIT_CH2_LATCH_REG) & CH2_GATE_OUT));
518
519 if (x2apic_enabled())
520{
521 // read the APIC timer to determine the change that occurred over this fixed delay
522 rdmsr32(MSR_APIC_TMR_CURRENT_CNT, &stop, &dummy);
523
524 // stop APIC timer
525 wrmsr32(MSR_APIC_TMR_INITIAL_CNT, 0, 0);
526
527 }
528 else
529{
530 // read the APIC timer to determine the change that occurred over this fixed delay
531 stop = *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_CURRENT_CNT);
532
533 // stop APIC timer
534 *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_INITIAL_CNT) = 0UL;
535 }
536
537 // Disable channel 2 speaker and gate input
538 temp8 = inb(PIT_CH2_LATCH_REG);
539 temp8 &= ~(CH2_SPEAKER | CH2_GATE_IN);
540 outb(PIT_CH2_LATCH_REG, temp8);
541
542 bclk = (start - stop) * 2 / DELAY_IN_US;
543
544 // Round bclk to the nearest 100/12 integer value
545 bclk = ((((bclk * 24) + 100) / 200) * 200) / 24;
546
547 return bclk;
548}
549
550
551/*
552 * Calculates the FSB and CPU frequencies using specific MSRs for each CPU
553 * - multi. is read from a specific MSR. In the case of Intel, there is:
554 * a max multi. (used to calculate the FSB freq.),
555 * and a current multi. (used to calculate the CPU freq.)
556 * - fsbFrequency = tscFrequency / multi
557 * - cpuFrequency = fsbFrequency * multi
558 */
559
560void scan_cpu(void)
561{
562uint64_tmsr = 0;
563
564
565 uint64_tFeatures = 0;// CPU Features like MMX, SSE2, VT ...
566uint64_tExtFeatures = 0; // CPU Extended Features like SYSCALL, XD, EM64T, LAHF ...
567 uint64_tTSCFreq = 0 ;
568 uint64_t FSBFreq = 0 ;
569 uint64_t CPUFreq = 0;
570
571 uint32_treg[4];
572 uint32_t cores_per_package = 0;
573 uint32_t logical_per_package = 0;
574
575 uint32_tVendor = 0;// Vendor
576uint32_tSignature = 0;// Signature
577uint8_t Stepping = 0;// Stepping
578uint8_t Model = 0;// Model
579uint8_t ExtModel = 0;// Extended Model
580uint8_t Family = 0;// Family
581uint8_t ExtFamily = 0;// Extended Family
582uint32_tNoCores = 0;// No Cores per Package
583uint32_tNoThreads = 0;// Threads per Package
584uint8_t Brand = 0;
585uint32_tMicrocodeVersion = 0; // The microcode version number a.k.a. signature a.k.a. BIOS ID
586
587uint8_t isMobile = 0;
588
589boolean_tdynamic_acceleration = 0;
590boolean_tinvariant_APIC_timer = 0;
591boolean_tfine_grain_clock_mod = 0;
592
593uint32_t cpuid_max_basic = 0;
594uint32_t cpuid_max_ext = 0;
595uint32_tsub_Cstates = 0;
596uint32_t extensions = 0;
597
598uint8_tmaxcoef = 0, maxdiv = 0, currcoef = 0, currdiv = 0;
599 charCpuBrandString[48];// 48 Byte Branding String
600
601
602do_cpuid(0, reg);
603Vendor = reg[ebx];
604cpuid_max_basic = reg[eax];
605
606#ifndef AMD_SUPPORT
607 do_cpuid2(0x00000004, 0, reg);
608 cores_per_package= bitfield(reg[eax], 31, 26) + 1;
609#endif
610
611 /* get extended cpuid results */
612do_cpuid(0x80000000, reg);
613cpuid_max_ext = reg[eax];
614
615/* Begin of Copyright: from Apple's XNU cpuid.c */
616
617/* get brand string (if supported) */
618if (cpuid_max_ext > 0x80000004)
619{
620 char str[128], *s;
621/*
622 * The brand string 48 bytes (max), guaranteed to
623 * be NUL terminated.
624 */
625do_cpuid(0x80000002, reg);
626bcopy((char *)reg, &str[0], 16);
627do_cpuid(0x80000003, reg);
628bcopy((char *)reg, &str[16], 16);
629do_cpuid(0x80000004, reg);
630bcopy((char *)reg, &str[32], 16);
631for (s = str; *s != '\0'; s++)
632{
633if (*s != ' ') break;
634}
635
636strlcpy(CpuBrandString,s, sizeof(CpuBrandString));
637
638if (!strncmp(CpuBrandString, CPUID_STRING_UNKNOWN, min(sizeof(CpuBrandString), (unsigned)strlen(CPUID_STRING_UNKNOWN) + 1)))
639{
640 /*
641 * This string means we have a firmware-programmable brand string,
642 * and the firmware couldn't figure out what sort of CPU we have.
643 */
644 CpuBrandString[0] = '\0';
645 }
646}
647
648 /*
649 * Get processor signature and decode
650 * and bracket this with the approved procedure for reading the
651 * the microcode version number a.k.a. signature a.k.a. BIOS ID
652 */
653#ifndef AMD_SUPPORT
654wrmsr64(MSR_IA32_BIOS_SIGN_ID, 0);
655do_cpuid(1, reg);
656MicrocodeVersion = (uint32_t) (rdmsr64(MSR_IA32_BIOS_SIGN_ID) >> 32);
657#else
658do_cpuid(1, reg);
659#endif
660Signature = reg[eax];
661Stepping = bitfield(reg[eax], 3, 0);
662Model = bitfield(reg[eax], 7, 4);
663Family = bitfield(reg[eax], 11, 8);
664ExtModel = bitfield(reg[eax], 19, 16);
665ExtFamily = bitfield(reg[eax], 27, 20);
666Brand = bitfield(reg[ebx], 7, 0);
667Features = quad(reg[ecx], reg[edx]);
668
669 /* Fold extensions into family/model */
670if (Family == 0x0f)
671Family += ExtFamily;
672if (Family == 0x0f || Family == 0x06)
673Model += (ExtModel << 4);
674
675 if (Features & CPUID_FEATURE_HTT)
676logical_per_package =
677 bitfield(reg[ebx], 23, 16);
678else
679logical_per_package = 1;
680
681
682if (cpuid_max_ext >= 0x80000001)
683{
684do_cpuid(0x80000001, reg);
685ExtFeatures =
686 quad(reg[ecx], reg[edx]);
687
688}
689
690if (cpuid_max_ext >= 0x80000007)
691{
692do_cpuid(0x80000007, reg);
693
694/* Fold in the Invariant TSC feature bit, if present */
695ExtFeatures |=
696 reg[edx] & (uint32_t)CPUID_EXTFEATURE_TSCI;
697
698#ifdef AMD_SUPPORT
699/* Fold in the Hardware P-State control feature bit, if present */
700ExtFeatures |=
701 reg[edx] & (uint32_t)_Bit(7);
702
703/* Fold in the read-only effective frequency interface feature bit, if present */
704ExtFeatures |=
705 reg[edx] & (uint32_t)_Bit(10);
706#endif
707}
708
709#ifdef AMD_SUPPORT
710if (cpuid_max_ext >= 0x80000008)
711{
712if (Features & CPUID_FEATURE_HTT)
713{
714do_cpuid(0x80000008, reg);
715cores_per_package= bitfield(reg[ecx], 7 , 0) + 1; // NC + 1
716}
717}
718#endif
719
720 if (cpuid_max_basic >= 0x5) {
721/*
722 * Extract the Monitor/Mwait Leaf info:
723 */
724do_cpuid(5, reg);
725#ifndef AMD_SUPPORT
726 sub_Cstates = reg[edx];
727#endif
728 extensions = reg[ecx];
729}
730
731#ifndef AMD_SUPPORT
732 if (cpuid_max_basic >= 0x6)
733 {
734/*
735 * The thermal and Power Leaf:
736 */
737do_cpuid(6, reg);
738dynamic_acceleration = bitfield(reg[eax], 1, 1); // "Dynamic Acceleration Technology (Turbo Mode)"
739invariant_APIC_timer = bitfield(reg[eax], 2, 2); // "Invariant APIC Timer"
740 fine_grain_clock_mod = bitfield(reg[eax], 4, 4);
741}
742
743 if ((Vendor == CPUID_VENDOR_INTEL) &&
744(Family == 0x06))
745{
746/*
747 * Find the number of enabled cores and threads
748 * (which determines whether SMT/Hyperthreading is active).
749 */
750switch (Model)
751{
752
753case CPUID_MODEL_DALES_32NM:
754case CPUID_MODEL_WESTMERE:
755case CPUID_MODEL_WESTMERE_EX:
756{
757msr = rdmsr64(MSR_CORE_THREAD_COUNT);
758NoThreads = bitfield((uint32_t)msr, 15, 0);
759NoCores = bitfield((uint32_t)msr, 19, 16);
760break;
761}
762
763case CPUID_MODEL_NEHALEM:
764case CPUID_MODEL_FIELDS:
765case CPUID_MODEL_DALES:
766case CPUID_MODEL_NEHALEM_EX:
767case CPUID_MODEL_SANDYBRIDGE:
768case CPUID_MODEL_JAKETOWN:
769{
770msr = rdmsr64(MSR_CORE_THREAD_COUNT);
771NoThreads = bitfield((uint32_t)msr, 15, 0);
772NoCores = bitfield((uint32_t)msr, 31, 16);
773break;
774}
775}
776 }
777#endif
778 if (NoCores == 0)
779{
780#ifdef AMD_SUPPORT
781if (!cores_per_package) {
782//legacy method
783if ((ExtFeatures & _HBit(1)/* CmpLegacy */) && ( Features & CPUID_FEATURE_HTT) )
784cores_per_package = logical_per_package;
785else
786cores_per_package = 1;
787}
788#endif
789NoThreads = logical_per_package;
790NoCores = cores_per_package ? cores_per_package : 1 ;
791}
792
793/* End of Copyright: from Apple's XNU cpuid.c */
794
795FSBFreq = (uint64_t)(compute_bclk() * 1000000);
796
797#if LEGACY_CPU
798TSCFreq = timeRDTSC() * 20/*measure_tsc_frequency()*/;
799#endif
800
801#ifdef AMD_SUPPORT
802#define K8_FIDVID_STATUS0xC0010042
803#define K10_COFVID_STATUS0xC0010071
804if (ExtFeatures & _Bit(10))
805{
806CPUFreq = measure_aperf_frequency();
807}
808
809 if ((Vendor == CPUID_VENDOR_AMD) && (Family == 0x0f))
810{
811switch(ExtFamily)
812{
813case 0x00: /* K8 */
814msr = rdmsr64(K8_FIDVID_STATUS);
815maxcoef = bitfield(msr, 21, 16) / 2 + 4;
816currcoef = bitfield(msr, 5, 0) / 2 + 4;
817break;
818
819case 0x01: /* K10 */
820 {
821 //uint32_t reg[4];
822msr = rdmsr64(K10_COFVID_STATUS);
823/*
824 do_cpuid2(0x00000006, 0, reg);
825 EffFreq: effective frequency interface
826 if (bitfield(reg[ecx], 0, 0) == 1)
827 {
828 uint64_t aperf = measure_aperf_frequency();
829 CPUFreq = aperf;
830 }
831 */
832// NOTE: tsc runs at the maccoeff (non turbo)
833//*not* at the turbo frequency.
834maxcoef = bitfield(msr, 54, 49) / 2 + 4;
835currcoef = bitfield(msr, 5, 0) + 0x10;
836currdiv = 2 << bitfield(msr, 8, 6);
837
838break;
839}
840case 0x05: /* K14 */
841msr = rdmsr64(K10_COFVID_STATUS);
842currcoef = (bitfield(msr, 54, 49) + 0x10) << 2;
843currdiv = (bitfield(msr, 8, 4) + 1) << 2;
844currdiv += bitfield(msr, 3, 0);
845
846break;
847
848case 0x02: /* K11 */
849DBG("K11 detected, but not supported !!!\n");
850// not implimented
851break;
852}
853
854if (!FSBFreq)
855{
856if (maxcoef)
857{
858if (currdiv)
859{
860if (!currcoef) currcoef = maxcoef;
861if (!CPUFreq)
862FSBFreq = ((TSCFreq * currdiv) / currcoef);
863else
864FSBFreq = ((CPUFreq * currdiv) / currcoef);
865
866DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);
867} else {
868if (!CPUFreq)
869FSBFreq = (TSCFreq / maxcoef);
870else
871FSBFreq = (CPUFreq / maxcoef);
872DBG("%d\n", currcoef);
873}
874}
875else if (currcoef)
876{
877if (currdiv)
878{
879FSBFreq = ((TSCFreq * currdiv) / currcoef);
880DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);
881} else {
882FSBFreq = (TSCFreq / currcoef);
883DBG("%d\n", currcoef);
884}
885}
886}
887
888}
889
890// NOTE: This is not the approved method,
891// the method provided by AMD is:
892// if ((PowerNow == enabled (p->cpu->cpuid_max_ext >= 0x80000007)) && (StartupFID(??) != MaxFID(??))) then "mobile processor present"
893
894if (strstr(CpuBrandString, "obile"))
895isMobile = 1;
896else
897isMobile = 0;
898
899DBG("%s platform detected.\n", isMobile?"Mobile":"Desktop");
900#else
901 if ((Vendor == CPUID_VENDOR_INTEL) &&
902((Family == 0x06) ||
903 (Family == 0x0f)))
904{
905if ((Family == 0x06 && Model >= 0x0c) ||
906(Family == 0x0f && Model >= 0x03))
907{
908/* Nehalem CPU model */
909if (Family == 0x06 && (Model == CPUID_MODEL_NEHALEM ||
910 Model == CPUID_MODEL_FIELDS ||
911 Model == CPUID_MODEL_DALES ||
912 Model == CPUID_MODEL_DALES_32NM ||
913 Model == CPUID_MODEL_WESTMERE ||
914 Model == CPUID_MODEL_NEHALEM_EX ||
915 Model == CPUID_MODEL_WESTMERE_EX ||
916 Model == CPUID_MODEL_SANDYBRIDGE ||
917 Model == CPUID_MODEL_JAKETOWN))
918{
919uint8_tbus_ratio_max = 0;
920uint64_tflex_ratio = 0;
921msr = rdmsr64(MSR_PLATFORM_INFO);
922#if DEBUG_CPU
923 uint32_tmax_ratio = 0, bus_ratio_min = 0;
924
925DBG("msr(%d): platform_info %08x\n", __LINE__, msr & 0xffffffff);
926#endif
927bus_ratio_max = (msr >> 8) & 0xff;
928//bus_ratio_min = (msr >> 40) & 0xff;
929msr = rdmsr64(MSR_FLEX_RATIO);
930#if DEBUG_CPU
931DBG("msr(%d): flex_ratio %08x\n", __LINE__, msr & 0xffffffff);
932#endif
933if ((msr >> 16) & 0x01)
934{
935flex_ratio = (msr >> 8) & 0xff;
936/* bcc9: at least on the gigabyte h67ma-ud2h,
937 where the cpu multipler can't be changed to
938 allow overclocking, the flex_ratio msr has unexpected (to OSX)
939 contents. These contents cause mach_kernel to
940 fail to compute the bus ratio correctly, instead
941 causing the system to crash since tscGranularity
942 is inadvertently set to 0.
943 */
944if (flex_ratio == 0)
945{
946/* Clear bit 16 (evidently the
947 presence bit) */
948wrmsr64(MSR_FLEX_RATIO, (msr & 0xFFFFFFFFFFFEFFFFULL));
949#if DEBUG_CPU
950 msr = rdmsr64(MSR_FLEX_RATIO);
951
952DBG("Unusable flex ratio detected. MSR Patched to %08x\n", msr & 0xffffffff);
953#endif
954}
955else
956{
957if (bus_ratio_max > flex_ratio)
958{
959bus_ratio_max = flex_ratio;
960}
961}
962}
963#if LEGACY_CPU
964if (bus_ratio_max)
965{
966FSBFreq = (TSCFreq / bus_ratio_max);
967}
968#endif
969//valv: Turbo Ratio Limit
970if ((Model != 0x2e) && (Model != 0x2f))
971{
972//msr = rdmsr64(MSR_TURBO_RATIO_LIMIT);
973CPUFreq = bus_ratio_max * FSBFreq;
974//max_ratio = bus_ratio_max * 10;
975}
976else
977{
978#if LEGACY_CPU
979CPUFreq = TSCFreq;
980#else
981CPUFreq = bus_ratio_max * FSBFreq;
982#endif
983}
984#if DEBUG_CPU
985DBG("Sticking with [BCLK: %dMhz, Bus-Ratio: %d]\n", FSBFreq / 1000000, max_ratio);
986#endif
987currcoef = bus_ratio_max;
988
989 TSCFreq = CPUFreq;
990}
991else
992{
993msr = rdmsr64(MSR_IA32_PERF_STATUS);
994#if DEBUG_CPU
995DBG("msr(%d): ia32_perf_stat 0x%08x\n", __LINE__, msr & 0xffffffff);
996#endif
997currcoef = (msr >> 8) & 0x1f;
998/* Non-integer bus ratio for the max-multi*/
999maxdiv = (msr >> 46) & 0x01;
1000/* Non-integer bus ratio for the current-multi (undocumented)*/
1001currdiv = (msr >> 14) & 0x01;
1002
1003if ((Family == 0x06 && Model >= 0x0e) ||
1004(Family == 0x0f)) // This will always be model >= 3
1005{
1006/* On these models, maxcoef defines TSC freq */
1007maxcoef = (msr >> 40) & 0x1f;
1008}
1009else
1010{
1011/* On lower models, currcoef defines TSC freq */
1012/* XXX */
1013maxcoef = currcoef;
1014}
1015if (!currcoef) currcoef = maxcoef;
1016#if LEGACY_CPU
1017if (maxcoef)
1018{
1019
1020if (maxdiv)
1021{
1022FSBFreq = ((TSCFreq * 2) / ((maxcoef * 2) + 1));
1023}
1024else
1025{
1026FSBFreq = (TSCFreq / maxcoef);
1027}
1028
1029if (currdiv)
1030{
1031CPUFreq = (FSBFreq * ((currcoef * 2) + 1) / 2);
1032}
1033else
1034{
1035CPUFreq = (FSBFreq * currcoef);
1036}
1037#if DEBUG_CPU
1038DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");
1039#endif
1040}
1041#else
1042
1043
1044if (currdiv)
1045{
1046CPUFreq = (FSBFreq * ((currcoef * 2) + 1) / 2);
1047}
1048else
1049{
1050CPUFreq = (FSBFreq * currcoef);
1051}
1052
1053if (maxcoef)
1054{
1055if (maxdiv)
1056{
1057TSCFreq = (FSBFreq * ((maxcoef * 2) + 1)) / 2;
1058}
1059else
1060{
1061TSCFreq = FSBFreq * maxcoef;
1062}
1063}
1064#if DEBUG_CPU
1065DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");
1066#endif
1067
1068#endif // LEGACY_CPU
1069
1070}
1071}
1072 /* Mobile CPU ? */
1073//Slice
1074 //isMobile = 0;
1075switch (Model)
1076{
1077case 0x0D:
1078isMobile = 1;
1079break;
1080case 0x02:
1081case 0x03:
1082case 0x04:
1083case 0x06:
1084isMobile = (rdmsr64(0x2C) & (1 << 21))? 1 : 0;
1085break;
1086default:
1087isMobile = (rdmsr64(0x17) & (1 << 28)) ? 1 : 0;
1088break;
1089}
1090
1091DBG("%s platform detected.\n", isMobile?"Mobile":"Desktop");
1092}
1093#endif
1094if (!CPUFreq) CPUFreq = TSCFreq;
1095 if (!TSCFreq) TSCFreq = CPUFreq;
1096
1097set_env(envVendor, Vendor);
1098 set_env(envModel, Model);
1099 set_env(envExtModel, ExtModel);
1100
1101set_env(envCPUIDMaxBasic, cpuid_max_basic);
1102set_env(envCPUIDMaxBasic, cpuid_max_ext);
1103#ifndef AMD_SUPPORT
1104set_env(envMicrocodeVersion, MicrocodeVersion);
1105#endif
1106 set_env_copy(envBrandString, CpuBrandString, sizeof(CpuBrandString));
1107set_env(envSignature, Signature);
1108set_env(envStepping, Stepping);
1109set_env(envFamily, Family);
1110set_env(envExtModel, ExtModel);
1111set_env(envExtFamily, ExtFamily);
1112set_env(envBrand, Brand);
1113set_env(envFeatures, Features);
1114 set_env(envExtFeatures, ExtFeatures);
1115#ifndef AMD_SUPPORT
1116set_env(envSubCstates, sub_Cstates);
1117#endif
1118set_env(envExtensions, extensions);
1119#ifndef AMD_SUPPORT
1120set_env(envDynamicAcceleration, dynamic_acceleration);
1121set_env(envInvariantAPICTimer, invariant_APIC_timer);
1122set_env(envFineGrainClockMod, fine_grain_clock_mod);
1123#endif
1124set_env(envNoThreads, NoThreads);
1125set_env(envNoCores, NoCores);
1126set_env(envIsMobile, isMobile);
1127
1128set_env(envMaxCoef, maxcoef);
1129set_env(envMaxDiv, maxdiv);
1130set_env(envCurrCoef, currcoef);
1131set_env(envCurrDiv, currdiv);
1132set_env(envTSCFreq, TSCFreq);
1133set_env(envFSBFreq, FSBFreq);
1134set_env(envCPUFreq, CPUFreq);
1135
1136#ifdef AMD_SUPPORT
1137 msglog("AMD CPU Detection Enabled\n");
1138#endif
1139
1140}

Archive Download this file

Revision: 1972