Chameleon

Chameleon Svn Source Tree

Root/branches/cparm/i386/libsaio/cpu.c

1/*
2 * Copyright 2008 Islam Ahmed Zaid. All rights reserved. <azismed@gmail.com>
3 * AsereBLN: 2009: cleanup and bugfix
4 */
5
6#include "libsaio.h"
7#include "platform.h"
8#include "cpu.h"
9
10#ifndef DEBUG_CPU
11#define DEBUG_CPU 0
12#endif
13
14#if DEBUG_CPU
15#define DBG(x...)printf(x)
16#else
17#define DBG(x...)msglog(x)
18#endif
19
20//#define AMD_SUPPORT
21
22#ifndef INTEL_SUPPORT
23#define INTEL_SUPPORT 0 //Default (0: nolegacy, 1 : legacy)
24#endif
25
26#ifdef AMD_SUPPORT
27#ifdef LEGACY_CPU
28#undef LEGACY_CPU
29#endif
30#ifdef INTEL_SUPPORT
31#undef INTEL_SUPPORT
32#endif
33#define LEGACY_CPU 1
34#endif
35
36#ifdef INTEL_SUPPORT
37#ifdef LEGACY_CPU
38#undef LEGACY_CPU
39#endif
40#define LEGACY_CPU INTEL_SUPPORT
41#endif
42// (?) : if AMD_SUPPORT then (LEGACY_CPU = 1 && INTEL_SUPPORT = disabled)
43// else LEGACY_CPU = INTEL_SUPPORT
44
45static uint64_t __tsc = 0;
46
47#if LEGACY_CPU
48
49
50// DFE: enable_PIT2 and disable_PIT2 come from older xnu
51
52/*
53 * Enable or disable timer 2.
54 * Port 0x61 controls timer 2:
55 * bit 0 gates the clock,
56 * bit 1 gates output to speaker.
57 */
58static inline void enable_PIT2(void)
59{
60 /* Enable gate, disable speaker */
61 __asm__ volatile(
62 " inb $0x61,%%al \n\t"
63 " and $0xFC,%%al \n\t" /* & ~0x03 */
64 " or $1,%%al \n\t"
65 " outb %%al,$0x61 \n\t"
66 : : : "%al" );
67}
68
69static inline void disable_PIT2(void)
70{
71 /* Disable gate and output to speaker */
72 __asm__ volatile(
73 " inb $0x61,%%al \n\t"
74 " and $0xFC,%%al \n\t"/* & ~0x03 */
75 " outb %%al,$0x61 \n\t"
76 : : : "%al" );
77}
78#if 0
79static uint64_t measure_tsc_frequency(void);
80// DFE: set_PIT2_mode0, poll_PIT2_gate, and measure_tsc_frequency are
81// roughly based on Linux code
82
83/* Set the 8254 channel 2 to mode 0 with the specified value.
84 In mode 0, the counter will initially set its gate low when the
85 timer expires. For this to be useful, you ought to set it high
86 before calling this function. The enable_PIT2 function does this.
87 */
88static inline void set_PIT2_mode0(uint16_t value)
89{
90 __asm__ volatile(
91 " movb $0xB0,%%al \n\t"
92 " outb%%al,$0x43\n\t"
93 " movb%%dl,%%al\n\t"
94 " outb%%al,$0x42\n\t"
95 " movb%%dh,%%al\n\t"
96 " outb%%al,$0x42"
97 : : "d"(value) /*: no clobber */ );
98}
99
100/* Returns the number of times the loop ran before the PIT2 signaled */
101static inline unsigned long poll_PIT2_gate(void)
102{
103 unsigned long count = 0;
104 unsigned char nmi_sc_val;
105 do {
106 ++count;
107 __asm__ volatile(
108 "inb$0x61,%0"
109 : "=q"(nmi_sc_val) /*:*/ /* no input */ /*:*/ /* no clobber */);
110 } while( (nmi_sc_val & 0x20) == 0);
111 return count;
112}
113/*
114 * DFE: Measures the TSC frequency in Hz (64-bit) using the ACPI PM timer
115 */
116static uint64_t measure_tsc_frequency(void)
117{
118 uint64_t tscStart;
119 uint64_t tscEnd;
120 uint64_t tscDelta = 0xffffffffffffffffULL;
121 unsigned long pollCount;
122 uint64_t retval = 0;
123 int i;
124
125 /* Time how many TSC ticks elapse in 30 msec using the 8254 PIT
126 * counter 2. We run this loop 3 times to make sure the cache
127 * is hot and we take the minimum delta from all of the runs.
128 * That is to say that we're biased towards measuring the minimum
129 * number of TSC ticks that occur while waiting for the timer to
130 * expire. That theoretically helps avoid inconsistencies when
131 * running under a VM if the TSC is not virtualized and the host
132 * steals time. The TSC is normally virtualized for VMware.
133 */
134 for(i = 0; i < 10; ++i)
135 {
136 enable_PIT2();
137 set_PIT2_mode0(CALIBRATE_LATCH);
138 tscStart = rdtsc64();
139 pollCount = poll_PIT2_gate();
140 tscEnd = rdtsc64();
141 /* The poll loop must have run at least a few times for accuracy */
142 if(pollCount <= 1)
143 continue;
144 /* The TSC must increment at LEAST once every millisecond. We
145 * should have waited exactly 30 msec so the TSC delta should
146 * be >= 30. Anything less and the processor is way too slow.
147 */
148 if((tscEnd - tscStart) <= CALIBRATE_TIME_MSEC)
149 continue;
150 // tscDelta = min(tscDelta, (tscEnd - tscStart))
151 if( (tscEnd - tscStart) < tscDelta )
152 tscDelta = tscEnd - tscStart;
153 }
154 /* tscDelta is now the least number of TSC ticks the processor made in
155 * a timespan of 0.03 s (e.g. 30 milliseconds)
156 * Linux thus divides by 30 which gives the answer in kiloHertz because
157 * 1 / ms = kHz. But we're xnu and most of the rest of the code uses
158 * Hz so we need to convert our milliseconds to seconds. Since we're
159 * dividing by the milliseconds, we simply multiply by 1000.
160 */
161
162 /* Unlike linux, we're not limited to 32-bit, but we do need to take care
163 * that we're going to multiply by 1000 first so we do need at least some
164 * arithmetic headroom. For now, 32-bit should be enough.
165 * Also unlike Linux, our compiler can do 64-bit integer arithmetic.
166 */
167 if(tscDelta > (1ULL<<32))
168 retval = 0;
169 else
170 {
171 retval = tscDelta * 1000 / 30;
172 }
173 disable_PIT2();
174 return retval;
175}
176#endif
177
178static inline static void
179set_PIT2(int value)
180{
181 /*
182 * First, tell the clock we are going to write 16 bits to the counter
183 * and enable one-shot mode (command 0xB8 to port 0x43)
184 * Then write the two bytes into the PIT2 clock register (port 0x42).
185 * Loop until the value is "realized" in the clock,
186 * this happens on the next tick.
187 */
188 asm volatile(
189 " movb $0xB8,%%al \n\t"
190 " outb %%al,$0x43 \n\t"
191 " movb %%dl,%%al \n\t"
192 " outb %%al,$0x42 \n\t"
193 " movb %%dh,%%al \n\t"
194 " outb %%al,$0x42 \n"
195 "1: inb $0x42,%%al \n\t"
196 " inb $0x42,%%al \n\t"
197 " cmp %%al,%%dh \n\t"
198 " jne 1b"
199 : : "d"(value) : "%al");
200}
201
202
203static inline static uint64_t
204get_PIT2(unsigned int *value)
205{
206 register uint64_t result;
207 /*
208 * This routine first latches the time (command 0x80 to port 0x43),
209 * then gets the time stamp so we know how long the read will take later.
210 * Read (from port 0x42) and return the current value of the timer.
211 */
212#ifdef __i386__
213 asm volatile(
214 " xorl %%ecx,%%ecx \n\t"
215 " movb $0x80,%%al \n\t"
216 " outb %%al,$0x43 \n\t"
217 " rdtsc \n\t"
218 " pushl %%eax \n\t"
219 " inb $0x42,%%al \n\t"
220 " movb %%al,%%cl \n\t"
221 " inb $0x42,%%al \n\t"
222 " movb %%al,%%ch \n\t"
223 " popl %%eax "
224 : "=A"(result), "=c"(*value));
225#else /* __x86_64__ */
226 asm volatile(
227 " xorq %%rcx,%%rcx \n\t"
228 " movb $0x80,%%al \n\t"
229 " outb %%al,$0x43 \n\t"
230 " rdtsc \n\t"
231 " pushq %%rax \n\t"
232 " inb $0x42,%%al \n\t"
233 " movb %%al,%%cl \n\t"
234 " inb $0x42,%%al \n\t"
235 " movb %%al,%%ch \n\t"
236 " popq %%rax "
237 : "=A"(result), "=c"(*value));
238#endif
239
240 return result;
241}
242
243/*
244 * timeRDTSC()
245 * This routine sets up PIT counter 2 to count down 1/20 of a second.
246 * It pauses until the value is latched in the counter
247 * and then reads the time stamp counter to return to the caller.
248 */
249static uint64_t timeRDTSC(void)
250{
251 intattempts = 0;
252 uint64_t latchTime;
253 uint64_tsaveTime,intermediate;
254 unsigned int timerValue, lastValue;
255 //boolean_tint_enabled;
256 /*
257 * Table of correction factors to account for
258 * - timer counter quantization errors, and
259 * - undercounts 0..5
260 */
261#define SAMPLE_CLKS_EXACT(((double) CLKNUM) / 20.0)
262#define SAMPLE_CLKS_INT((int) CLKNUM / 20)
263#define SAMPLE_NSECS(2000000000LL)
264#define SAMPLE_MULTIPLIER(((double)SAMPLE_NSECS)*SAMPLE_CLKS_EXACT)
265#define ROUND64(x)((uint64_t)((x) + 0.5))
266 uint64_tscale[6] = {
267ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-0)),
268ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-1)),
269ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-2)),
270ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-3)),
271ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-4)),
272ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-5))
273 };
274
275restart:
276 if (attempts >= 9) // increase to up to 9 attempts.
277 // This will flash-reboot. TODO: Use tscPanic instead.
278 printf("Timestamp counter calibation failed with %d attempts\n", attempts);
279 attempts++;
280 enable_PIT2();// turn on PIT2
281 set_PIT2(0);// reset timer 2 to be zero
282 latchTime = rdtsc64();// get the time stamp to time
283 latchTime = get_PIT2(&timerValue) - latchTime; // time how long this takes
284 set_PIT2(SAMPLE_CLKS_INT);// set up the timer for (almost) 1/20th a second
285 saveTime = rdtsc64();// now time how long a 20th a second is...
286 get_PIT2(&lastValue);
287 get_PIT2(&lastValue);// read twice, first value may be unreliable
288 do {
289intermediate = get_PIT2(&timerValue);
290if (timerValue > lastValue) {
291// Timer wrapped
292set_PIT2(0);
293disable_PIT2();
294goto restart;
295}
296lastValue = timerValue;
297 } while (timerValue > 5);
298 printf("timerValue %d\n",timerValue);
299 printf("intermediate 0x%016llx\n",intermediate);
300 printf("saveTime 0x%016llx\n",saveTime);
301
302 intermediate -= saveTime;// raw count for about 1/20 second
303 intermediate *= scale[timerValue];// rescale measured time spent
304 intermediate /= SAMPLE_NSECS;// so its exactly 1/20 a second
305 intermediate += latchTime;// add on our save fudge
306
307 set_PIT2(0);// reset timer 2 to be zero
308 disable_PIT2();// turn off PIT 2
309
310 return intermediate;
311}
312
313#ifdef AMD_SUPPORT
314#define MSR_AMD_APERF 0x000000E8
315/*
316 * Original comment/code:
317 * "DFE: Measures the Max Performance Frequency in Hz (64-bit)"
318 *
319 * Measures the Actual Performance Frequency in Hz (64-bit)
320 * (just a naming change, mperf --> aperf )
321 */
322static uint64_t measure_aperf_frequency(void)
323{
324uint64_t aperfStart;
325uint64_t aperfEnd;
326uint64_t aperfDelta = 0xffffffffffffffffULL;
327unsigned long pollCount;
328uint64_t retval = 0;
329int i;
330
331/* Time how many APERF ticks elapse in 30 msec using the 8254 PIT
332 * counter 2. We run this loop 3 times to make sure the cache
333 * is hot and we take the minimum delta from all of the runs.
334 * That is to say that we're biased towards measuring the minimum
335 * number of APERF ticks that occur while waiting for the timer to
336 * expire.
337 */
338for(i = 0; i < 10; ++i)
339{
340enable_PIT2();
341set_PIT2_mode0(CALIBRATE_LATCH);
342aperfStart = rdmsr64(MSR_AMD_APERF);
343pollCount = poll_PIT2_gate();
344aperfEnd = rdmsr64(MSR_AMD_APERF);
345/* The poll loop must have run at least a few times for accuracy */
346if (pollCount <= 1)
347continue;
348/* The TSC must increment at LEAST once every millisecond.
349 * We should have waited exactly 30 msec so the APERF delta should
350 * be >= 30. Anything less and the processor is way too slow.
351 */
352if ((aperfEnd - aperfStart) <= CALIBRATE_TIME_MSEC)
353continue;
354// tscDelta = MIN(tscDelta, (tscEnd - tscStart))
355if ( (aperfEnd - aperfStart) < aperfDelta )
356aperfDelta = aperfEnd - aperfStart;
357}
358/* mperfDelta is now the least number of MPERF ticks the processor made in
359 * a timespan of 0.03 s (e.g. 30 milliseconds)
360 */
361
362if (aperfDelta > (1ULL<<32))
363retval = 0;
364else
365{
366retval = aperfDelta * 1000 / 30;
367}
368disable_PIT2();
369return retval;
370}
371#endif
372
373#endif
374
375/*
376 License for x2apic_enabled, get_apicbase, compute_bclk.
377
378 Copyright (c) 2010, Intel Corporation
379 All rights reserved.
380
381 Redistribution and use in source and binary forms, with or without
382 modification, are permitted provided that the following conditions are met:
383
384 * Redistributions of source code must retain the above copyright notice,
385 this list of conditions and the following disclaimer.
386 * Redistributions in binary form must reproduce the above copyright notice,
387 this list of conditions and the following disclaimer in the documentation
388 and/or other materials provided with the distribution.
389 * Neither the name of Intel Corporation nor the names of its contributors
390 may be used to endorse or promote products derived from this software
391 without specific prior written permission.
392
393 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
394 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
395 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
396 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
397 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
398 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
399 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
400 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
401 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
402 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
403 */
404static inline __attribute__((always_inline)) void rdmsr32(uint32_t msr, uint32_t * lo_data_addr, uint32_t * hi_data_addr);
405static inline __attribute__((always_inline)) void wrmsr32(uint32_t msr, uint32_t lo_data, uint32_t hi_data);
406static uint32_t x2apic_enabled(void);
407static uint32_t get_apicbase(void);
408static uint32_t compute_bclk(void);
409static inline __attribute__((always_inline)) void rdmsr32(uint32_t msr, uint32_t * lo_data_addr, uint32_t * hi_data_addr)
410{
411 __asm__ volatile(
412 "rdmsr"
413 : "=a" (*lo_data_addr), "=d" (*hi_data_addr)
414 : "c" (msr)
415 );
416}
417static inline __attribute__((always_inline)) void wrmsr32(uint32_t msr, uint32_t lo_data, uint32_t hi_data)
418{
419 __asm__ __volatile__ (
420 "wrmsr"
421 : /* No outputs */
422 : "c" (msr), "a" (lo_data), "d" (hi_data)
423 );
424}
425#define MSR_APIC_BASE 0x1B
426#define APIC_TMR_INITIAL_CNT 0x380
427#define APIC_TMR_CURRENT_CNT 0x390
428#define APIC_TMR_DIVIDE_CFG 0x3E0
429#define MSR_APIC_TMR_INITIAL_CNT 0x838
430#define MSR_APIC_TMR_CURRENT_CNT 0x839
431#define MSR_APIC_TMR_DIVIDE_CFG 0x83E
432static uint32_t x2apic_enabled(void)
433{
434 uint64_t temp64;
435
436 temp64 = rdmsr64(MSR_APIC_BASE);
437
438 return (uint32_t) (temp64 & (1 << 10)) ? 1 : 0;
439}
440static uint32_t get_apicbase(void)
441{
442 uint64_t temp64;
443
444 temp64 = rdmsr64(MSR_APIC_BASE);
445
446 return (uint32_t) (temp64 & 0xfffff000);
447}
448static uint32_t compute_bclk(void)
449{
450 uint32_t dummy;
451 uint32_t start, stop;
452 uint8_t temp8;
453 uint16_t delay_count;
454 uint32_t bclk;
455
456#define DELAY_IN_US 1000
457
458 // Compute fixed delay as time
459 // delay count = desired time * PIT frequency
460 // PIT frequency = 1.193182 MHz
461 delay_count = 1193182 / DELAY_IN_US;
462
463 // PIT channel 2 gate is controlled by IO port 0x61, bit 0
464#define PIT_CH2_LATCH_REG 0x61
465#define CH2_SPEAKER (1 << 1) // bit 1 -- 1 = speaker enabled 0 = speaker disabled
466#define CH2_GATE_IN (1 << 0) // bit 0 -- 1 = gate enabled, 0 = gate disabled
467#define CH2_GATE_OUT (1 << 5) // bit 5 -- 1 = gate latched, 0 = gate not latched
468
469 // PIT Command register
470#define PIT_MODE_COMMAND_REG 0x43
471#define SELECT_CH2 (2 << 6)
472#define ACCESS_MODE_LOBYTE_HIBYTE (3 << 4)
473#define MODE0_INTERRUPT_ON_TERMINAL_COUNT 0 // Despite name, no interrupts on CH2
474
475 // PIT Channel 2 data port
476#define PIT_CH2_DATA 0x42
477
478 // Disable the PIT channel 2 speaker and gate
479 temp8 = inb(PIT_CH2_LATCH_REG);
480 temp8 &= ~(CH2_SPEAKER | CH2_GATE_IN);
481 outb(PIT_CH2_LATCH_REG, temp8);
482
483 // Setup command and mode
484 outb(PIT_MODE_COMMAND_REG, SELECT_CH2 | ACCESS_MODE_LOBYTE_HIBYTE | MODE0_INTERRUPT_ON_TERMINAL_COUNT);
485
486 // Set time for fixed delay
487 outb(PIT_CH2_DATA, (uint8_t) (delay_count));
488 outb(PIT_CH2_DATA, (uint8_t) (delay_count >> 8));
489
490 // Prepare to enable channel 2 gate but leave the speaker disabled
491 temp8 = inb(PIT_CH2_LATCH_REG);
492 temp8 &= ~CH2_SPEAKER;
493 temp8 |= CH2_GATE_IN;
494
495 if (x2apic_enabled())
496{
497 // Set APIC Timer Divide Value as 2
498 wrmsr32(MSR_APIC_TMR_DIVIDE_CFG, 0, 0);
499
500 // start APIC timer with a known value
501 start = ~0UL;
502 wrmsr32(MSR_APIC_TMR_INITIAL_CNT, start, 0);
503 }
504 else
505{
506 // Set APIC Timer Divide Value as 2
507 *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_DIVIDE_CFG) = 0UL;
508
509 // start APIC timer with a known value
510 start = ~0UL;
511 *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_INITIAL_CNT) = start;
512 }
513
514 // Actually start the PIT channel 2
515 outb(PIT_CH2_LATCH_REG, temp8);
516
517 // Wait for the fixed delay
518 while (!(inb(PIT_CH2_LATCH_REG) & CH2_GATE_OUT));
519
520 if (x2apic_enabled())
521{
522 // read the APIC timer to determine the change that occurred over this fixed delay
523 rdmsr32(MSR_APIC_TMR_CURRENT_CNT, &stop, &dummy);
524
525 // stop APIC timer
526 wrmsr32(MSR_APIC_TMR_INITIAL_CNT, 0, 0);
527
528 }
529 else
530{
531 // read the APIC timer to determine the change that occurred over this fixed delay
532 stop = *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_CURRENT_CNT);
533
534 // stop APIC timer
535 *(volatile uint32_t *)(uint32_t) (get_apicbase() + APIC_TMR_INITIAL_CNT) = 0UL;
536 }
537
538 // Disable channel 2 speaker and gate input
539 temp8 = inb(PIT_CH2_LATCH_REG);
540 temp8 &= ~(CH2_SPEAKER | CH2_GATE_IN);
541 outb(PIT_CH2_LATCH_REG, temp8);
542 bclk = (start - stop) * 2 / DELAY_IN_US;
543 __tsc = (start - stop) * 1000;
544
545 // Round bclk to the nearest 100/12 integer value
546 bclk = ((((bclk * 24) + 100) / 200) * 200) / 24;
547
548 return bclk;
549}
550
551
552/*
553 * Calculates the FSB and CPU frequencies using specific MSRs for each CPU
554 * - multi. is read from a specific MSR. In the case of Intel, there is:
555 * a max multi. (used to calculate the FSB freq.),
556 * and a current multi. (used to calculate the CPU freq.)
557 * - fsbFrequency = tscFrequency / multi
558 * - cpuFrequency = fsbFrequency * multi
559 */
560
561void scan_cpu(void)
562{
563uint64_tmsr = 0;
564
565
566 uint64_tFeatures = 0;// CPU Features like MMX, SSE2, VT ...
567uint64_tExtFeatures = 0; // CPU Extended Features like SYSCALL, XD, EM64T, LAHF ...
568 uint64_tTSCFreq = 0 ;
569 uint64_t FSBFreq = 0 ;
570 uint64_t CPUFreq = 0;
571
572 uint32_treg[4];
573 uint32_t cores_per_package = 0;
574 uint32_t logical_per_package = 0;
575
576 uint32_tVendor = 0;// Vendor
577uint32_tSignature = 0;// Signature
578uint8_t Stepping = 0;// Stepping
579uint8_t Model = 0;// Model
580uint8_t ExtModel = 0;// Extended Model
581uint8_t Family = 0;// Family
582uint8_t ExtFamily = 0;// Extended Family
583uint32_tNoCores = 0;// No Cores per Package
584uint32_tNoThreads = 0;// Threads per Package
585uint8_t Brand = 0;
586uint32_tMicrocodeVersion = 0; // The microcode version number a.k.a. signature a.k.a. BIOS ID
587
588uint8_t isMobile = 0;
589
590boolean_tdynamic_acceleration = 0;
591boolean_tinvariant_APIC_timer = 0;
592boolean_tfine_grain_clock_mod = 0;
593
594uint32_t cpuid_max_basic = 0;
595uint32_t cpuid_max_ext = 0;
596uint32_tsub_Cstates = 0;
597uint32_t extensions = 0;
598#ifndef AMD_SUPPORT
599
600 uint32_tCpuid_cache_linesize = 0;
601uint32_tCpuid_cache_size = 0;
602 uint32_tCpuid_cores_per_package = 0;
603
604 uint32_tCache_size[LCACHE_MAX];
605uint32_tCache_linesize = 0;
606
607 uint32_tcpuid_leaf7_features = 0;
608
609#endif
610uint8_tmaxcoef = 0, maxdiv = 0, currcoef = 0, currdiv = 0;
611 charCpuBrandString[48];// 48 Byte Branding String
612
613
614do_cpuid(0, reg);
615Vendor = reg[ebx];
616cpuid_max_basic = reg[eax];
617
618#ifndef AMD_SUPPORT
619 do_cpuid2(0x00000004, 0, reg);
620 cores_per_package= bitfield(reg[eax], 31, 26) + 1;
621#endif
622
623 /* get extended cpuid results */
624do_cpuid(0x80000000, reg);
625cpuid_max_ext = reg[eax];
626
627/* Begin of Copyright: from Apple's XNU cpuid.c */
628
629/* get brand string (if supported) */
630if (cpuid_max_ext > 0x80000004)
631{
632 char str[128], *s;
633/*
634 * The brand string 48 bytes (max), guaranteed to
635 * be NUL terminated.
636 */
637do_cpuid(0x80000002, reg);
638bcopy((char *)reg, &str[0], 16);
639do_cpuid(0x80000003, reg);
640bcopy((char *)reg, &str[16], 16);
641do_cpuid(0x80000004, reg);
642bcopy((char *)reg, &str[32], 16);
643for (s = str; *s != '\0'; s++)
644{
645if (*s != ' ') break;
646}
647
648strlcpy(CpuBrandString,s, sizeof(CpuBrandString));
649
650if (!strncmp(CpuBrandString, CPUID_STRING_UNKNOWN, min(sizeof(CpuBrandString), (unsigned)strlen(CPUID_STRING_UNKNOWN) + 1)))
651{
652 /*
653 * This string means we have a firmware-programmable brand string,
654 * and the firmware couldn't figure out what sort of CPU we have.
655 */
656 CpuBrandString[0] = '\0';
657 }
658}
659
660 /* Get cache and addressing info. */
661if (cpuid_max_ext >= 0x80000006) {
662do_cpuid(0x80000006, reg);
663Cpuid_cache_linesize = bitfield32(reg[ecx], 7, 0);
664Cpuid_cache_size = bitfield32(reg[ecx],31,16);
665
666}
667
668 /*
669 * Get processor signature and decode
670 * and bracket this with the approved procedure for reading the
671 * the microcode version number a.k.a. signature a.k.a. BIOS ID
672 */
673#ifndef AMD_SUPPORT
674wrmsr64(MSR_IA32_BIOS_SIGN_ID, 0);
675do_cpuid(1, reg);
676MicrocodeVersion = (uint32_t) (rdmsr64(MSR_IA32_BIOS_SIGN_ID) >> 32);
677#else
678do_cpuid(1, reg);
679#endif
680Signature = reg[eax];
681Stepping = bitfield(reg[eax], 3, 0);
682Model = bitfield(reg[eax], 7, 4);
683Family = bitfield(reg[eax], 11, 8);
684ExtModel = bitfield(reg[eax], 19, 16);
685ExtFamily = bitfield(reg[eax], 27, 20);
686Brand = bitfield(reg[ebx], 7, 0);
687Features = quad(reg[ecx], reg[edx]);
688
689 /* Fold extensions into family/model */
690if (Family == 0x0f)
691Family += ExtFamily;
692if (Family == 0x0f || Family == 0x06)
693Model += (ExtModel << 4);
694
695 if (Features & CPUID_FEATURE_HTT)
696logical_per_package =
697 bitfield(reg[ebx], 23, 16);
698else
699logical_per_package = 1;
700
701
702if (cpuid_max_ext >= 0x80000001)
703{
704do_cpuid(0x80000001, reg);
705ExtFeatures =
706 quad(reg[ecx], reg[edx]);
707
708}
709
710if (cpuid_max_ext >= 0x80000007)
711{
712do_cpuid(0x80000007, reg);
713
714/* Fold in the Invariant TSC feature bit, if present */
715ExtFeatures |=
716 reg[edx] & (uint32_t)CPUID_EXTFEATURE_TSCI;
717
718#ifdef AMD_SUPPORT
719/* Fold in the Hardware P-State control feature bit, if present */
720ExtFeatures |=
721 reg[edx] & (uint32_t)_Bit(7);
722
723/* Fold in the read-only effective frequency interface feature bit, if present */
724ExtFeatures |=
725 reg[edx] & (uint32_t)_Bit(10);
726#endif
727}
728
729#ifdef AMD_SUPPORT
730if (cpuid_max_ext >= 0x80000008)
731{
732if (Features & CPUID_FEATURE_HTT)
733{
734do_cpuid(0x80000008, reg);
735cores_per_package= bitfield(reg[ecx], 7 , 0) + 1; // NC + 1
736}
737}
738#endif
739
740 if (cpuid_max_basic >= 0x5) {
741/*
742 * Extract the Monitor/Mwait Leaf info:
743 */
744do_cpuid(5, reg);
745#ifndef AMD_SUPPORT
746 sub_Cstates = reg[edx];
747#endif
748 extensions = reg[ecx];
749}
750
751#ifndef AMD_SUPPORT
752 if (cpuid_max_basic >= 0x6)
753 {
754/*
755 * The thermal and Power Leaf:
756 */
757do_cpuid(6, reg);
758dynamic_acceleration = bitfield(reg[eax], 1, 1); // "Dynamic Acceleration Technology (Turbo Mode)"
759invariant_APIC_timer = bitfield(reg[eax], 2, 2); // "Invariant APIC Timer"
760 fine_grain_clock_mod = bitfield(reg[eax], 4, 4);
761}
762
763 if (Model == CPUID_MODEL_IVYBRIDGE) {
764/*
765 * XSAVE Features:
766 */
767do_cpuid(0x7, reg);
768cpuid_leaf7_features = reg[ebx];
769
770DBG(" Feature Leaf7:\n");
771DBG(" EBX : 0x%x\n", reg[ebx]);
772}
773
774 {
775 uint32_tcpuid_result[4];
776 uint32_treg[4];
777 uint32_tindex;
778 uint32_tlinesizes[LCACHE_MAX];
779 boolcpuid_deterministic_supported = false;
780
781 DBG("cpuid_set_cache_info\n");
782
783 bzero( linesizes, sizeof(linesizes) );
784
785 /*
786 * Get cache info using leaf 4, the "deterministic cache parameters."
787 * Most processors Mac OS X supports implement this flavor of CPUID.
788 * Loop over each cache on the processor.
789 */
790 do_cpuid(0, cpuid_result);
791 if (cpuid_result[eax] >= 4)
792 cpuid_deterministic_supported = true;
793
794 for (index = 0; cpuid_deterministic_supported; index++) {
795 cache_type_ttype = Lnone;
796 uint32_tcache_type;
797 uint32_tcache_level;
798 //uint32_tcache_sharing;
799 uint32_tcache_linesize;
800 uint32_tcache_sets;
801 uint32_tcache_associativity;
802 uint32_tcache_size;
803 uint32_tcache_partitions;
804
805 reg[eax] = 4;/* cpuid request 4 */
806 reg[ecx] = index;/* index starting at 0 */
807 cpuid(reg);
808 DBG("cpuid(4) index=%d eax=0x%x\n", index, reg[eax]);
809 cache_type = bitfield32(reg[eax], 4, 0);
810 if (cache_type == 0)
811 break;/* no more caches */
812 cache_level = bitfield32(reg[eax], 7, 5);
813 //cache_sharing = bitfield32(reg[eax], 25, 14) + 1;
814 Cpuid_cores_per_package
815 = bitfield32(reg[eax], 31, 26) + 1;
816 cache_linesize= bitfield32(reg[ebx], 11, 0) + 1;
817 cache_partitions= bitfield32(reg[ebx], 21, 12) + 1;
818 cache_associativity= bitfield32(reg[ebx], 31, 22) + 1;
819 cache_sets = bitfield32(reg[ecx], 31, 0) + 1;
820
821 /* Map type/levels returned by CPUID into cache_type_t */
822 switch (cache_level) {
823 case 1:
824 type = cache_type == 1 ? L1D :
825 cache_type == 2 ? L1I :
826 Lnone;
827 break;
828 case 2:
829 type = cache_type == 3 ? L2U :
830 Lnone;
831 break;
832 case 3:
833 type = cache_type == 3 ? L3U :
834 Lnone;
835 break;
836 default:
837 type = Lnone;
838 }
839
840 /* The total size of a cache is:
841 *( linesize * sets * associativity * partitions )
842 */
843 if (type != Lnone) {
844 cache_size = cache_linesize * cache_sets *
845 cache_associativity * cache_partitions;
846 Cache_size[type] = cache_size;
847 linesizes[type] = cache_linesize;
848
849
850 }
851 }
852
853 /*
854 * If deterministic cache parameters are not available, use
855 * something else
856 */
857 if (Cpuid_cores_per_package == 0) {
858 Cpuid_cores_per_package = 1;
859
860 /* cpuid define in 1024 quantities */
861 Cache_size[L2U] = Cpuid_cache_size * 1024;
862
863 linesizes[L2U] = Cpuid_cache_linesize;
864
865 DBG(" cache_size[L2U] : %d\n",
866 Cache_size[L2U]);
867 DBG(" linesizes[L2U] : %d\n",
868 Cpuid_cache_linesize);
869 }
870
871 /*
872 * What linesize to publish? We use the L2 linesize if any,
873 * else the L1D.
874 */
875 if ( linesizes[L2U] )
876 Cache_linesize = linesizes[L2U];
877 else if (linesizes[L1D])
878 Cache_linesize = linesizes[L1D];
879 else stop("no linesize");
880 DBG(" cache_linesize : %d\n", Cache_linesize);
881
882 }
883
884 if ((Vendor == CPUID_VENDOR_INTEL) &&
885(Family == 0x06))
886{
887/*
888 * Find the number of enabled cores and threads
889 * (which determines whether SMT/Hyperthreading is active).
890 */
891switch (Model)
892{
893
894case CPUID_MODEL_DALES_32NM:
895case CPUID_MODEL_WESTMERE:
896case CPUID_MODEL_WESTMERE_EX:
897{
898msr = rdmsr64(MSR_CORE_THREAD_COUNT);
899NoThreads = bitfield((uint32_t)msr, 15, 0);
900NoCores = bitfield((uint32_t)msr, 19, 16);
901break;
902}
903
904case CPUID_MODEL_NEHALEM:
905case CPUID_MODEL_FIELDS:
906case CPUID_MODEL_DALES:
907case CPUID_MODEL_NEHALEM_EX:
908case CPUID_MODEL_SANDYBRIDGE:
909case CPUID_MODEL_JAKETOWN:
910 case CPUID_MODEL_IVYBRIDGE:
911{
912msr = rdmsr64(MSR_CORE_THREAD_COUNT);
913NoThreads = bitfield((uint32_t)msr, 15, 0);
914NoCores = bitfield((uint32_t)msr, 31, 16);
915break;
916}
917}
918 }
919#endif
920 if (NoCores == 0)
921{
922#ifdef AMD_SUPPORT
923if (!cores_per_package) {
924//legacy method
925if ((ExtFeatures & _HBit(1)/* CmpLegacy */) && ( Features & CPUID_FEATURE_HTT) )
926cores_per_package = logical_per_package;
927else
928cores_per_package = 1;
929}
930#endif
931#ifndef AMD_SUPPORT
932 if (Cpuid_cores_per_package) {
933 cores_per_package = Cpuid_cores_per_package ;
934 }
935#endif
936NoThreads = logical_per_package;
937NoCores = cores_per_package ? cores_per_package : 1 ;
938
939
940
941}
942
943/* End of Copyright: from Apple's XNU cpuid.c */
944
945FSBFreq = (uint64_t)(compute_bclk() * 1000000);
946
947#if LEGACY_CPU
948TSCFreq = timeRDTSC() * 20/*measure_tsc_frequency()*/;
949#endif
950
951#ifdef AMD_SUPPORT
952#define K8_FIDVID_STATUS0xC0010042
953#define K10_COFVID_STATUS0xC0010071
954if (ExtFeatures & _Bit(10))
955{
956CPUFreq = measure_aperf_frequency();
957}
958
959 if ((Vendor == CPUID_VENDOR_AMD) && (Family == 0x0f))
960{
961switch(ExtFamily)
962{
963case 0x00: /* K8 */
964msr = rdmsr64(K8_FIDVID_STATUS);
965maxcoef = bitfield(msr, 21, 16) / 2 + 4;
966currcoef = bitfield(msr, 5, 0) / 2 + 4;
967break;
968
969case 0x01: /* K10 */
970 {
971 //uint32_t reg[4];
972msr = rdmsr64(K10_COFVID_STATUS);
973/*
974 do_cpuid2(0x00000006, 0, reg);
975 EffFreq: effective frequency interface
976 if (bitfield(reg[ecx], 0, 0) == 1)
977 {
978 uint64_t aperf = measure_aperf_frequency();
979 CPUFreq = aperf;
980 }
981 */
982// NOTE: tsc runs at the maccoeff (non turbo)
983//*not* at the turbo frequency.
984maxcoef = bitfield(msr, 54, 49) / 2 + 4;
985currcoef = bitfield(msr, 5, 0) + 0x10;
986currdiv = 2 << bitfield(msr, 8, 6);
987
988break;
989}
990case 0x05: /* K14 */
991msr = rdmsr64(K10_COFVID_STATUS);
992currcoef = (bitfield(msr, 54, 49) + 0x10) << 2;
993currdiv = (bitfield(msr, 8, 4) + 1) << 2;
994currdiv += bitfield(msr, 3, 0);
995
996break;
997
998case 0x02: /* K11 */
999DBG("K11 detected, but not supported !!!\n");
1000// not implimented
1001break;
1002}
1003
1004if (!FSBFreq)
1005{
1006if (maxcoef)
1007{
1008if (currdiv)
1009{
1010if (!currcoef) currcoef = maxcoef;
1011if (!CPUFreq)
1012FSBFreq = ((TSCFreq * currdiv) / currcoef);
1013else
1014FSBFreq = ((CPUFreq * currdiv) / currcoef);
1015
1016DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);
1017} else {
1018if (!CPUFreq)
1019FSBFreq = (TSCFreq / maxcoef);
1020else
1021FSBFreq = (CPUFreq / maxcoef);
1022DBG("%d\n", currcoef);
1023}
1024}
1025else if (currcoef)
1026{
1027if (currdiv)
1028{
1029FSBFreq = ((TSCFreq * currdiv) / currcoef);
1030DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);
1031} else {
1032FSBFreq = (TSCFreq / currcoef);
1033DBG("%d\n", currcoef);
1034}
1035}
1036}
1037
1038}
1039
1040// NOTE: This is not the approved method,
1041// the method provided by AMD is:
1042// if ((PowerNow == enabled (p->cpu->cpuid_max_ext >= 0x80000007)) && (StartupFID(??) != MaxFID(??))) then "mobile processor present"
1043
1044if (strstr(CpuBrandString, "obile"))
1045isMobile = 1;
1046else
1047isMobile = 0;
1048
1049DBG("%s platform detected.\n", isMobile?"Mobile":"Desktop");
1050#else
1051 if ((Vendor == CPUID_VENDOR_INTEL) &&
1052((Family == 0x06) ||
1053 (Family == 0x0f)))
1054{
1055if ((Family == 0x06 && Model >= 0x0c) ||
1056(Family == 0x0f && Model >= 0x03))
1057{
1058/* Nehalem CPU model */
1059if (Family == 0x06 && (Model == CPUID_MODEL_NEHALEM ||
1060 Model == CPUID_MODEL_FIELDS ||
1061 Model == CPUID_MODEL_DALES ||
1062 Model == CPUID_MODEL_DALES_32NM ||
1063 Model == CPUID_MODEL_WESTMERE ||
1064 Model == CPUID_MODEL_NEHALEM_EX ||
1065 Model == CPUID_MODEL_WESTMERE_EX ||
1066 Model == CPUID_MODEL_SANDYBRIDGE ||
1067 Model == CPUID_MODEL_JAKETOWN ||
1068 Model == CPUID_MODEL_IVYBRIDGE))
1069{
1070uint8_tbus_ratio_max = 0;
1071uint64_tflex_ratio = 0;
1072msr = rdmsr64(MSR_PLATFORM_INFO);
1073#if DEBUG_CPU
1074 uint32_tmax_ratio = 0, bus_ratio_min = 0;
1075
1076DBG("msr(%d): platform_info %08x\n", __LINE__, msr & 0xffffffff);
1077#endif
1078bus_ratio_max = (msr >> 8) & 0xff;
1079//bus_ratio_min = (msr >> 40) & 0xff;
1080msr = rdmsr64(MSR_FLEX_RATIO);
1081#if DEBUG_CPU
1082DBG("msr(%d): flex_ratio %08x\n", __LINE__, msr & 0xffffffff);
1083#endif
1084if ((msr >> 16) & 0x01)
1085{
1086flex_ratio = (msr >> 8) & 0xff;
1087/* bcc9: at least on the gigabyte h67ma-ud2h,
1088 where the cpu multipler can't be changed to
1089 allow overclocking, the flex_ratio msr has unexpected (to OSX)
1090 contents. These contents cause mach_kernel to
1091 fail to compute the bus ratio correctly, instead
1092 causing the system to crash since tscGranularity
1093 is inadvertently set to 0.
1094 */
1095if (flex_ratio == 0)
1096{
1097/* Clear bit 16 (evidently the
1098 presence bit) */
1099wrmsr64(MSR_FLEX_RATIO, (msr & 0xFFFFFFFFFFFEFFFFULL));
1100#if DEBUG_CPU
1101 msr = rdmsr64(MSR_FLEX_RATIO);
1102
1103DBG("Unusable flex ratio detected. MSR Patched to %08x\n", msr & 0xffffffff);
1104#endif
1105}
1106else
1107{
1108if (bus_ratio_max > flex_ratio)
1109{
1110bus_ratio_max = flex_ratio;
1111}
1112}
1113}
1114#if LEGACY_CPU
1115if (bus_ratio_max)
1116{
1117FSBFreq = (TSCFreq / bus_ratio_max);
1118}
1119#endif
1120//valv: Turbo Ratio Limit
1121if ((Model != 0x2e) && (Model != 0x2f))
1122{
1123//msr = rdmsr64(MSR_TURBO_RATIO_LIMIT);
1124CPUFreq = bus_ratio_max * FSBFreq;
1125//max_ratio = bus_ratio_max * 10;
1126}
1127else
1128{
1129#if LEGACY_CPU
1130CPUFreq = TSCFreq;
1131#else
1132CPUFreq = bus_ratio_max * FSBFreq;
1133#endif
1134}
1135#if DEBUG_CPU
1136DBG("Sticking with [BCLK: %dMhz, Bus-Ratio: %d]\n", FSBFreq / 1000000, max_ratio);
1137#endif
1138currcoef = bus_ratio_max;
1139
1140 TSCFreq = CPUFreq;
1141}
1142else
1143{
1144msr = rdmsr64(MSR_IA32_PERF_STATUS);
1145#if DEBUG_CPU
1146DBG("msr(%d): ia32_perf_stat 0x%08x\n", __LINE__, msr & 0xffffffff);
1147#endif
1148currcoef = (msr >> 8) & 0x1f;
1149/* Non-integer bus ratio for the max-multi*/
1150maxdiv = (msr >> 46) & 0x01;
1151/* Non-integer bus ratio for the current-multi (undocumented)*/
1152currdiv = (msr >> 14) & 0x01;
1153
1154if ((Family == 0x06 && Model >= 0x0e) ||
1155(Family == 0x0f)) // This will always be model >= 3
1156{
1157/* On these models, maxcoef defines TSC freq */
1158maxcoef = (msr >> 40) & 0x1f;
1159}
1160else
1161{
1162/* On lower models, currcoef defines TSC freq */
1163/* XXX */
1164maxcoef = currcoef;
1165}
1166if (!currcoef) currcoef = maxcoef;
1167#if LEGACY_CPU
1168if (maxcoef)
1169{
1170
1171if (maxdiv)
1172{
1173FSBFreq = ((TSCFreq * 2) / ((maxcoef * 2) + 1));
1174}
1175else
1176{
1177FSBFreq = (TSCFreq / maxcoef);
1178}
1179
1180if (currdiv)
1181{
1182CPUFreq = (FSBFreq * ((currcoef * 2) + 1) / 2);
1183}
1184else
1185{
1186CPUFreq = (FSBFreq * currcoef);
1187}
1188#if DEBUG_CPU
1189DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");
1190#endif
1191}
1192#else
1193
1194
1195if (currdiv)
1196{
1197CPUFreq = (FSBFreq * ((currcoef * 2) + 1) / 2);
1198}
1199else
1200{
1201CPUFreq = (FSBFreq * currcoef);
1202}
1203
1204if (maxcoef)
1205{
1206if (maxdiv)
1207{
1208TSCFreq = (FSBFreq * ((maxcoef * 2) + 1)) / 2;
1209}
1210else
1211{
1212TSCFreq = FSBFreq * maxcoef;
1213}
1214}
1215#if DEBUG_CPU
1216DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");
1217#endif
1218
1219#endif // LEGACY_CPU
1220
1221}
1222}
1223 /* Mobile CPU ? */
1224//Slice
1225 //isMobile = 0;
1226switch (Model)
1227{
1228case 0x0D:
1229isMobile = 1;
1230break;
1231case 0x02:
1232case 0x03:
1233case 0x04:
1234case 0x06:
1235isMobile = (rdmsr64(0x2C) & (1 << 21))? 1 : 0;
1236break;
1237default:
1238isMobile = (rdmsr64(0x17) & (1 << 28)) ? 1 : 0;
1239break;
1240}
1241
1242DBG("%s platform detected.\n", isMobile?"Mobile":"Desktop");
1243}
1244#endif
1245if (!CPUFreq) CPUFreq = TSCFreq;
1246 if (!TSCFreq) TSCFreq = CPUFreq;
1247
1248set_env(envVendor, Vendor);
1249 set_env(envModel, Model);
1250 set_env(envExtModel, ExtModel);
1251
1252set_env(envCPUIDMaxBasic, cpuid_max_basic);
1253set_env(envCPUIDMaxBasic, cpuid_max_ext);
1254#ifndef AMD_SUPPORT
1255set_env(envMicrocodeVersion, MicrocodeVersion);
1256#endif
1257 set_env_ptr(envBrandString, CpuBrandString, sizeof(CpuBrandString));
1258set_env(envSignature, Signature);
1259set_env(envStepping, Stepping);
1260set_env(envFamily, Family);
1261set_env(envExtModel, ExtModel);
1262set_env(envExtFamily, ExtFamily);
1263set_env(envBrand, Brand);
1264set_env(envFeatures, Features);
1265 set_env(envExtFeatures, ExtFeatures);
1266#ifndef AMD_SUPPORT
1267set_env(envSubCstates, sub_Cstates);
1268#endif
1269set_env(envExtensions, extensions);
1270#ifndef AMD_SUPPORT
1271set_env(envDynamicAcceleration, dynamic_acceleration);
1272set_env(envInvariantAPICTimer, invariant_APIC_timer);
1273set_env(envFineGrainClockMod, fine_grain_clock_mod);
1274 set_env_ptr(envCacheSize, Cache_size, sizeof(uint32_t) * LCACHE_MAX);
1275set_env(envCacheLinesize, Cache_linesize);
1276 set_env(envLeaf7Features, cpuid_leaf7_features);
1277 set_env(envTSC__ , __tsc);
1278
1279#endif
1280set_env(envNoThreads, NoThreads);
1281set_env(envNoCores, NoCores);
1282set_env(envIsMobile, isMobile);
1283
1284set_env(envMaxCoef, maxcoef);
1285set_env(envMaxDiv, maxdiv);
1286set_env(envCurrCoef, currcoef);
1287set_env(envCurrDiv, currdiv);
1288set_env(envTSCFreq, TSCFreq);
1289set_env(envFSBFreq, FSBFreq);
1290set_env(envCPUFreq, CPUFreq);
1291
1292#ifdef AMD_SUPPORT
1293 msglog("AMD CPU Detection Enabled\n");
1294#endif
1295}

Archive Download this file

Revision: HEAD