Chameleon

Chameleon Svn Source Tree

Root/trunk/i386/libsaio/cpu.c

1/*
2 * Copyright 2008 Islam Ahmed Zaid. All rights reserved. <azismed@gmail.com>
3 * AsereBLN: 2009: cleanup and bugfix
4 * Bronya: 2015 Improve AMD support, cleanup and bugfix
5 */
6
7#include "libsaio.h"
8#include "platform.h"
9#include "cpu.h"
10#include "bootstruct.h"
11#include "boot.h"
12
13#ifndef DEBUG_CPU
14#define DEBUG_CPU 0
15#endif
16
17#if DEBUG_CPU
18#define DBG(x...)printf(x)
19#else
20#define DBG(x...)
21#endif
22
23#define UI_CPUFREQ_ROUNDING_FACTOR10000000
24
25clock_frequency_info_t gPEClockFrequencyInfo;
26
27static __unused uint64_t rdtsc32(void)
28{
29unsigned int lo,hi;
30__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
31return ((uint64_t)hi << 32) | lo;
32}
33
34/*
35 * timeRDTSC()
36 * This routine sets up PIT counter 2 to count down 1/20 of a second.
37 * It pauses until the value is latched in the counter
38 * and then reads the time stamp counter to return to the caller.
39 */
40static uint64_t timeRDTSC(void)
41{
42intattempts = 0;
43uint32_t latchTime;
44uint64_tsaveTime,intermediate;
45unsigned inttimerValue, lastValue;
46//boolean_tint_enabled;
47/*
48 * Table of correction factors to account for
49 * - timer counter quantization errors, and
50 * - undercounts 0..5
51 */
52#define SAMPLE_CLKS_EXACT(((double) CLKNUM) / 20.0)
53#define SAMPLE_CLKS_INT((int) CLKNUM / 20)
54#define SAMPLE_NSECS(2000000000LL)
55#define SAMPLE_MULTIPLIER(((double)SAMPLE_NSECS)*SAMPLE_CLKS_EXACT)
56#define ROUND64(x)((uint64_t)((x) + 0.5))
57uint64_tscale[6] = {
58ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-0)),
59ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-1)),
60ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-2)),
61ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-3)),
62ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-4)),
63ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-5))
64};
65
66//int_enabled = ml_set_interrupts_enabled(false);
67
68restart:
69if (attempts >= 3) // increase to up to 9 attempts.
70{
71// This will flash-reboot. TODO: Use tscPanic instead.
72//printf("Timestamp counter calibation failed with %d attempts\n", attempts);
73}
74attempts++;
75enable_PIT2();// turn on PIT2
76set_PIT2(0);// reset timer 2 to be zero
77latchTime = rdtsc32();// get the time stamp to time
78latchTime = get_PIT2(&timerValue) - latchTime; // time how long this takes
79set_PIT2(SAMPLE_CLKS_INT);// set up the timer for (almost) 1/20th a second
80saveTime = rdtsc32();// now time how long a 20th a second is...
81get_PIT2(&lastValue);
82get_PIT2(&lastValue);// read twice, first value may be unreliable
83do {
84intermediate = get_PIT2(&timerValue);
85if (timerValue > lastValue)
86{
87// Timer wrapped
88set_PIT2(0);
89disable_PIT2();
90goto restart;
91}
92lastValue = timerValue;
93} while (timerValue > 5);
94//printf("timerValue %d\n",timerValue);
95//printf("intermediate 0x%016llX\n",intermediate);
96//printf("saveTime 0x%016llX\n",saveTime);
97
98intermediate -= saveTime;// raw count for about 1/20 second
99intermediate *= scale[timerValue];// rescale measured time spent
100intermediate /= SAMPLE_NSECS;// so its exactly 1/20 a second
101intermediate += latchTime;// add on our save fudge
102
103set_PIT2(0);// reset timer 2 to be zero
104disable_PIT2();// turn off PIT 2
105
106//ml_set_interrupts_enabled(int_enabled);
107return intermediate;
108}
109
110/*
111 * DFE: Measures the TSC frequency in Hz (64-bit) using the ACPI PM timer
112 */
113static uint64_t __unused measure_tsc_frequency(void)
114{
115uint64_t tscStart;
116uint64_t tscEnd;
117uint64_t tscDelta = 0xffffffffffffffffULL;
118unsigned long pollCount;
119uint64_t retval = 0;
120int i;
121
122/* Time how many TSC ticks elapse in 30 msec using the 8254 PIT
123 * counter 2. We run this loop 3 times to make sure the cache
124 * is hot and we take the minimum delta from all of the runs.
125 * That is to say that we're biased towards measuring the minimum
126 * number of TSC ticks that occur while waiting for the timer to
127 * expire. That theoretically helps avoid inconsistencies when
128 * running under a VM if the TSC is not virtualized and the host
129 * steals time. The TSC is normally virtualized for VMware.
130 */
131for(i = 0; i < 10; ++i)
132{
133enable_PIT2();
134set_PIT2_mode0(CALIBRATE_LATCH);
135tscStart = rdtsc64();
136pollCount = poll_PIT2_gate();
137tscEnd = rdtsc64();
138/* The poll loop must have run at least a few times for accuracy */
139if (pollCount <= 1)
140{
141continue;
142}
143/* The TSC must increment at LEAST once every millisecond.
144 * We should have waited exactly 30 msec so the TSC delta should
145 * be >= 30. Anything less and the processor is way too slow.
146 */
147if ((tscEnd - tscStart) <= CALIBRATE_TIME_MSEC)
148{
149continue;
150}
151// tscDelta = MIN(tscDelta, (tscEnd - tscStart))
152if ( (tscEnd - tscStart) < tscDelta )
153{
154tscDelta = tscEnd - tscStart;
155}
156}
157/* tscDelta is now the least number of TSC ticks the processor made in
158 * a timespan of 0.03 s (e.g. 30 milliseconds)
159 * Linux thus divides by 30 which gives the answer in kiloHertz because
160 * 1 / ms = kHz. But we're xnu and most of the rest of the code uses
161 * Hz so we need to convert our milliseconds to seconds. Since we're
162 * dividing by the milliseconds, we simply multiply by 1000.
163 */
164
165/* Unlike linux, we're not limited to 32-bit, but we do need to take care
166 * that we're going to multiply by 1000 first so we do need at least some
167 * arithmetic headroom. For now, 32-bit should be enough.
168 * Also unlike Linux, our compiler can do 64-bit integer arithmetic.
169 */
170if (tscDelta > (1ULL<<32))
171{
172retval = 0;
173}
174else
175{
176retval = tscDelta * 1000 / 30;
177}
178disable_PIT2();
179return retval;
180}
181
182static uint64_trtc_set_cyc_per_sec(uint64_t cycles);
183#define RTC_FAST_DENOM0xFFFFFFFF
184
185inline static uint32_t
186create_mul_quant_GHZ(int shift, uint32_t quant)
187{
188return (uint32_t)((((uint64_t)NSEC_PER_SEC/20) << shift) / quant);
189}
190
191struct{
192mach_timespec_tcalend_offset;
193boolean_tcalend_is_set;
194
195int64_tcalend_adjtotal;
196int32_tcalend_adjdelta;
197
198uint32_tboottime;
199
200mach_timebase_info_data_ttimebase_const;
201
202decl_simple_lock_data(,lock)/* real-time clock device lock */
203} rtclock;
204
205uint32_trtc_quant_shift;/* clock to nanos right shift */
206uint32_trtc_quant_scale;/* clock to nanos multiplier */
207uint64_trtc_cyc_per_sec;/* processor cycles per sec */
208uint64_trtc_cycle_count;/* clocks in 1/20th second */
209
210static uint64_t rtc_set_cyc_per_sec(uint64_t cycles)
211{
212
213if (cycles > (NSEC_PER_SEC/20))
214{
215// we can use just a "fast" multiply to get nanos
216rtc_quant_shift = 32;
217rtc_quant_scale = create_mul_quant_GHZ(rtc_quant_shift, (uint32_t)cycles);
218rtclock.timebase_const.numer = rtc_quant_scale; // timeRDTSC is 1/20
219rtclock.timebase_const.denom = (uint32_t)RTC_FAST_DENOM;
220}
221else
222{
223rtc_quant_shift = 26;
224rtc_quant_scale = create_mul_quant_GHZ(rtc_quant_shift, (uint32_t)cycles);
225rtclock.timebase_const.numer = NSEC_PER_SEC/20; // timeRDTSC is 1/20
226rtclock.timebase_const.denom = (uint32_t)cycles;
227}
228rtc_cyc_per_sec = cycles*20;// multiply it by 20 and we are done..
229// BUT we also want to calculate...
230
231cycles = ((rtc_cyc_per_sec + (UI_CPUFREQ_ROUNDING_FACTOR/2))
232 / UI_CPUFREQ_ROUNDING_FACTOR)
233* UI_CPUFREQ_ROUNDING_FACTOR;
234
235/*
236 * Set current measured speed.
237 */
238if (cycles >= 0x100000000ULL)
239{
240gPEClockFrequencyInfo.cpu_clock_rate_hz = 0xFFFFFFFFUL;
241}
242else
243{
244gPEClockFrequencyInfo.cpu_clock_rate_hz = (unsigned long)cycles;
245}
246gPEClockFrequencyInfo.cpu_frequency_hz = cycles;
247
248//printf("[RTCLOCK_1] frequency %llu (%llu) %llu\n", cycles, rtc_cyc_per_sec,timeRDTSC() * 20);
249return(rtc_cyc_per_sec);
250}
251
252// Bronya C1E fix
253static void post_startup_cpu_fixups(void)
254{
255/*
256 * Some AMD processors support C1E state. Entering this state will
257 * cause the local APIC timer to stop, which we can't deal with at
258 * this time.
259 */
260
261uint64_t reg;
262verbose("\tLooking to disable C1E if is already enabled by the BIOS:\n");
263reg = rdmsr64(MSR_AMD_INT_PENDING_CMP_HALT);
264/* Disable C1E state if it is enabled by the BIOS */
265if ((reg >> AMD_ACTONCMPHALT_SHIFT) & AMD_ACTONCMPHALT_MASK)
266{
267reg &= ~(AMD_ACTONCMPHALT_MASK << AMD_ACTONCMPHALT_SHIFT);
268wrmsr64(MSR_AMD_INT_PENDING_CMP_HALT, reg);
269verbose("\tC1E disabled!\n");
270}
271}
272
273/*
274 * Large memcpy() into MMIO space can take longer than 1 clock tick (55ms).
275 * The timer interrupt must remain responsive when updating VRAM so
276 * as not to miss timer interrupts during countdown().
277 *
278 * If interrupts are enabled, use normal memcpy.
279 *
280 * If interrupts are disabled, breaks memcpy down
281 * into 128K chunks, times itself and makes a bios
282 * real-mode call every 25 msec in order to service
283 * pending interrupts.
284 *
285 * -- zenith432, May 22nd, 2016
286 */
287void* memcpy_interruptible(void* dst, const void* src, size_t len)
288{
289uint64_t tscFreq, lastTsc;
290uint32_t eflags, threshold;
291ptrdiff_t offset;
292const size_t chunk = 131072U;// 128K
293
294if (len <= chunk)
295{
296/*
297 * Short memcpy - use normal.
298 */
299return memcpy(dst, src, len);
300}
301
302__asm__ volatile("pushfl; popl %0" : "=r"(eflags));
303if (eflags & 0x200U)
304{
305/*
306 * Interrupts are enabled - use normal memcpy.
307 */
308return memcpy(dst, src, len);
309}
310
311tscFreq = Platform.CPU.TSCFrequency;
312if ((uint32_t) (tscFreq >> 32))
313{
314/*
315 * If TSC Frequency >= 2 ** 32, use a default time threshold.
316 */
317threshold = (~0U) / 40U;
318}
319else if (!(uint32_t) tscFreq)
320{
321/*
322 * If early on and TSC Frequency hasn't been estimated yet,
323 * use normal memcpy.
324 */
325return memcpy(dst, src, len);
326}
327else
328{
329threshold = ((uint32_t) tscFreq) / 40U;
330}
331
332/*
333 * Do the work
334 */
335offset = 0;
336lastTsc = rdtsc64();
337do
338{
339(void) memcpy((char*) dst + offset, (const char*) src + offset, chunk);
340offset += (ptrdiff_t) chunk;
341len -= chunk;
342if ((rdtsc64() - lastTsc) < threshold)
343{
344continue;
345}
346(void) readKeyboardStatus();// visit real-mode
347lastTsc = rdtsc64();
348}
349while (len > chunk);
350if (len)
351{
352(void) memcpy((char*) dst + offset, (const char*) src + offset, len);
353}
354return dst;
355}
356
357/*
358 * Calculates the FSB and CPU frequencies using specific MSRs for each CPU
359 * - multi. is read from a specific MSR. In the case of Intel, there is:
360 * a max multi. (used to calculate the FSB freq.),
361 * and a current multi. (used to calculate the CPU freq.)
362 * - busFrequency = tscFrequency / multi
363 * - cpuFrequency = busFrequency * multi
364 */
365
366/* Decimal powers: */
367#define kilo (1000ULL)
368#define Mega (kilo * kilo)
369#define Giga (kilo * Mega)
370#define Tera (kilo * Giga)
371#define Peta (kilo * Tera)
372
373#define quad(hi,lo)(((uint64_t)(hi)) << 32 | (lo))
374
375void get_cpuid(PlatformInfo_t *p)
376{
377
378charstr[128];
379uint32_treg[4];
380char*s= 0;
381
382
383do_cpuid(0x00000000, p->CPU.CPUID[CPUID_0]); // MaxFn, Vendor
384do_cpuid(0x00000001, p->CPU.CPUID[CPUID_1]); // Signature, stepping, features
385do_cpuid(0x00000002, p->CPU.CPUID[CPUID_2]); // TLB/Cache/Prefetch
386
387do_cpuid(0x00000003, p->CPU.CPUID[CPUID_3]); // S/N
388do_cpuid(0x80000000, p->CPU.CPUID[CPUID_80]); // Get the max extended cpuid
389
390if ((p->CPU.CPUID[CPUID_80][0] & 0x0000000f) >= 8)
391{
392do_cpuid(0x80000008, p->CPU.CPUID[CPUID_88]);
393do_cpuid(0x80000001, p->CPU.CPUID[CPUID_81]);
394}
395else if ((p->CPU.CPUID[CPUID_80][0] & 0x0000000f) >= 1)
396{
397do_cpuid(0x80000001, p->CPU.CPUID[CPUID_81]);
398}
399
400// ==============================================================
401
402/* get BrandString (if supported) */
403/* Copyright: from Apple's XNU cpuid.c */
404if (p->CPU.CPUID[CPUID_80][0] > 0x80000004)
405{
406bzero(str, 128);
407/*
408 * The BrandString 48 bytes (max), guaranteed to
409 * be NULL terminated.
410 */
411do_cpuid(0x80000002, reg);
412memcpy(&str[0], (char *)reg, 16);
413do_cpuid(0x80000003, reg);
414memcpy(&str[16], (char *)reg, 16);
415do_cpuid(0x80000004, reg);
416memcpy(&str[32], (char *)reg, 16);
417for (s = str; *s != '\0'; s++)
418{
419if (*s != ' ')
420{
421break;
422}
423}
424strlcpy(p->CPU.BrandString, s, 48);
425
426if (!strncmp(p->CPU.BrandString, CPU_STRING_UNKNOWN, MIN(sizeof(p->CPU.BrandString), (unsigned)strlen(CPU_STRING_UNKNOWN) + 1)))
427{
428/*
429 * This string means we have a firmware-programmable brand string,
430 * and the firmware couldn't figure out what sort of CPU we have.
431 */
432p->CPU.BrandString[0] = '\0';
433}
434p->CPU.BrandString[47] = '\0';
435//DBG("\tBrandstring = %s\n", p->CPU.BrandString);
436}
437
438// ==============================================================
439
440switch(p->CPU.BrandString[0])
441{
442case 'A':
443/* AMD Processors */
444// The cache information is only in ecx and edx so only save
445// those registers
446
447do_cpuid(5, p->CPU.CPUID[CPUID_5]); // Monitor/Mwait
448
449do_cpuid(0x80000005, p->CPU.CPUID[CPUID_85]); // TLB/Cache/Prefetch
450do_cpuid(0x80000006, p->CPU.CPUID[CPUID_86]); // TLB/Cache/Prefetch
451do_cpuid(0x80000008, p->CPU.CPUID[CPUID_88]);
452
453break;
454
455case 'G':
456/* Intel Processors */
457do_cpuid2(0x00000004, 0, p->CPU.CPUID[CPUID_4]); // Cache Index for Inte
458
459if (p->CPU.CPUID[CPUID_0][0] >= 0x5)// Monitor/Mwait
460{
461do_cpuid(5, p->CPU.CPUID[CPUID_5]);
462}
463
464if (p->CPU.CPUID[CPUID_0][0] >= 6)// Thermal/Power
465{
466do_cpuid(6, p->CPU.CPUID[CPUID_6]);
467}
468
469break;
470}
471}
472void scan_cpu(PlatformInfo_t *p)
473{
474verbose("[ CPU INFO ]\n");
475get_cpuid(p);
476
477uint64_tbusFCvtt2n;
478uint64_ttscFCvtt2n;
479uint64_ttscFreq= 0;
480uint64_tbusFrequency= 0;
481uint64_tcpuFrequency= 0;
482uint64_tmsr= 0;
483uint64_tflex_ratio= 0;
484uint64_tcpuid_features;
485
486uint32_tmax_ratio= 0;
487uint32_tmin_ratio= 0;
488uint32_treg[4];
489uint32_tcores_per_package= 0;
490uint32_tlogical_per_package= 1;
491uint32_tthreads_per_core= 1;
492
493uint8_tbus_ratio_max= 0;
494uint8_tbus_ratio_min= 0;
495uint8_tcurrdiv= 0;
496uint8_tcurrcoef= 0;
497uint8_tmaxdiv= 0;
498uint8_tmaxcoef= 0;
499uint8_tpic0_mask;
500uint8_tcpuMultN2= 0;
501
502const char*newratio;
503
504intlen= 0;
505intmyfsb= 0;
506inti= 0;
507
508
509/* http://www.flounder.com/cpuid_explorer2.htm
510 EAX (Intel):
511 31 28 27 20 19 16 1514 1312 11 8 7 4 3 0
512 +--------+----------------+--------+----+----+--------+--------+--------+
513 |########|Extended family |Extmodel|####|type|familyid| model |stepping|
514 +--------+----------------+--------+----+----+--------+--------+--------+
515
516 EAX (AMD):
517 31 28 27 20 19 16 1514 1312 11 8 7 4 3 0
518 +--------+----------------+--------+----+----+--------+--------+--------+
519 |########|Extended family |Extmodel|####|####|familyid| model |stepping|
520 +--------+----------------+--------+----+----+--------+--------+--------+
521*/
522///////////////////-- MaxFn,Vendor --////////////////////////
523p->CPU.Vendor= p->CPU.CPUID[CPUID_0][1];
524
525///////////////////-- Signature, stepping, features -- //////
526cpuid_features = quad(p->CPU.CPUID[CPUID_1][ecx], p->CPU.CPUID[CPUID_1][edx]);
527if (bit(28) & p->CPU.CPUID[CPUID_1][edx]) // HTT/Multicore
528{
529logical_per_package = bitfield(p->CPU.CPUID[CPUID_1][ebx], 23, 16);
530}
531else
532{
533logical_per_package = 1;
534}
535
536p->CPU.Signature= p->CPU.CPUID[CPUID_1][0];
537p->CPU.Stepping= (uint8_t)bitfield(p->CPU.CPUID[CPUID_1][0], 3, 0);// stepping = cpu_feat_eax & 0xF;
538p->CPU.Model= (uint8_t)bitfield(p->CPU.CPUID[CPUID_1][0], 7, 4);// model = (cpu_feat_eax >> 4) & 0xF;
539p->CPU.Family= (uint8_t)bitfield(p->CPU.CPUID[CPUID_1][0], 11, 8);// family = (cpu_feat_eax >> 8) & 0xF;
540//p->CPU.Type= (uint8_t)bitfield(p->CPU.CPUID[CPUID_1][0], 13, 12);// type = (cpu_feat_eax >> 12) & 0x3;
541p->CPU.ExtModel= (uint8_t)bitfield(p->CPU.CPUID[CPUID_1][0], 19, 16);// ext_model = (cpu_feat_eax >> 16) & 0xF;
542p->CPU.ExtFamily= (uint8_t)bitfield(p->CPU.CPUID[CPUID_1][0], 27, 20);// ext_family = (cpu_feat_eax >> 20) & 0xFF;
543
544if (p->CPU.Family == 0x0f)
545{
546p->CPU.Family += p->CPU.ExtFamily;
547}
548
549if (p->CPU.Family == 0x0f || p->CPU.Family == 0x06)
550{
551p->CPU.Model += (p->CPU.ExtModel << 4);
552}
553
554switch (p->CPU.Vendor)
555{
556case CPUID_VENDOR_INTEL:
557{
558/* Based on Apple's XNU cpuid.c - Deterministic cache parameters */
559if ((p->CPU.CPUID[CPUID_0][eax] > 3) && (p->CPU.CPUID[CPUID_0][eax] < 0x80000000))
560{
561for (i = 0; i < 0xFF; i++) // safe loop
562{
563do_cpuid2(0x00000004, i, reg); // AX=4: Fn, CX=i: cache index
564if (bitfield(reg[eax], 4, 0) == 0)
565{
566break;
567}
568cores_per_package = bitfield(reg[eax], 31, 26) + 1;
569}
570}
571
572if (i > 0)
573{
574cores_per_package = bitfield(p->CPU.CPUID[CPUID_4][eax], 31, 26) + 1; // i = cache index
575threads_per_core = bitfield(p->CPU.CPUID[CPUID_4][eax], 25, 14) + 1;
576}
577
578if (cores_per_package == 0)
579{
580cores_per_package = 1;
581}
582
583switch (p->CPU.Model)
584{
585case CPUID_MODEL_NEHALEM: // Intel Core i7 LGA1366 (45nm)
586case CPUID_MODEL_FIELDS: // Intel Core i5, i7 LGA1156 (45nm)
587case CPUID_MODEL_CLARKDALE: // Intel Core i3, i5, i7 LGA1156 (32nm)
588case CPUID_MODEL_NEHALEM_EX:
589case CPUID_MODEL_JAKETOWN:
590case CPUID_MODEL_SANDYBRIDGE:
591case CPUID_MODEL_IVYBRIDGE:
592case CPUID_MODEL_IVYBRIDGE_XEON:
593case CPUID_MODEL_HASWELL_U5:
594case CPUID_MODEL_HASWELL:
595case CPUID_MODEL_HASWELL_SVR:
596case CPUID_MODEL_HASWELL_ULT:
597case CPUID_MODEL_HASWELL_ULX:
598case CPUID_MODEL_BROADWELL_HQ:
599case CPUID_MODEL_BRASWELL:
600case CPUID_MODEL_AVOTON:
601case CPUID_MODEL_SKYLAKE:
602case CPUID_MODEL_BRODWELL_SVR:
603case CPUID_MODEL_BRODWELL_MSVR:
604case CPUID_MODEL_KNIGHT:
605case CPUID_MODEL_ANNIDALE:
606case CPUID_MODEL_GOLDMONT:
607case CPUID_MODEL_VALLEYVIEW:
608case CPUID_MODEL_SKYLAKE_S:
609case CPUID_MODEL_SKYLAKE_AVX:
610case CPUID_MODEL_CANNONLAKE:
611msr = rdmsr64(MSR_CORE_THREAD_COUNT); // 0x35
612p->CPU.NoCores= (uint32_t)bitfield((uint32_t)msr, 31, 16);
613p->CPU.NoThreads= (uint32_t)bitfield((uint32_t)msr, 15, 0);
614break;
615
616case CPUID_MODEL_DALES:
617case CPUID_MODEL_WESTMERE: // Intel Core i7 LGA1366 (32nm) 6 Core
618case CPUID_MODEL_WESTMERE_EX:
619msr = rdmsr64(MSR_CORE_THREAD_COUNT);
620p->CPU.NoCores= (uint32_t)bitfield((uint32_t)msr, 19, 16);
621p->CPU.NoThreads= (uint32_t)bitfield((uint32_t)msr, 15, 0);
622break;
623case CPUID_MODEL_ATOM_3700:
624p->CPU.NoCores= 4;
625p->CPU.NoThreads= 4;
626break;
627case CPUID_MODEL_ATOM:
628p->CPU.NoCores= 2;
629p->CPU.NoThreads= 2;
630break;
631default:
632p->CPU.NoCores= 0;
633break;
634}
635
636// workaround for Xeon Harpertown and Yorkfield
637if ((p->CPU.Model == CPUID_MODEL_PENRYN) &&
638(p->CPU.NoCores== 0))
639{
640if ((strstr(p->CPU.BrandString, "X54")) ||
641(strstr(p->CPU.BrandString, "E54")) ||
642(strstr(p->CPU.BrandString, "W35")) ||
643(strstr(p->CPU.BrandString, "X34")) ||
644(strstr(p->CPU.BrandString, "X33")) ||
645(strstr(p->CPU.BrandString, "L33")) ||
646(strstr(p->CPU.BrandString, "X32")) ||
647(strstr(p->CPU.BrandString, "L3426")) ||
648(strstr(p->CPU.BrandString, "L54")))
649{
650p->CPU.NoCores= 4;
651p->CPU.NoThreads= 4;
652} else if (strstr(p->CPU.BrandString, "W36")) {
653p->CPU.NoCores= 6;
654p->CPU.NoThreads= 6;
655} else { //other Penryn and Wolfdale
656p->CPU.NoCores= 0;
657p->CPU.NoThreads= 0;
658}
659}
660
661if (p->CPU.NoCores == 0)
662{
663p->CPU.NoCores= cores_per_package;
664p->CPU.NoThreads= logical_per_package;
665}
666
667// MSR is *NOT* available on the Intel Atom CPU
668// workaround for N270. I don't know why it detected wrong
669if ((p->CPU.Model == CPUID_MODEL_ATOM) && (strstr(p->CPU.BrandString, "270")))
670{
671p->CPU.NoCores= 1;
672p->CPU.NoThreads= 2;
673}
674
675// workaround for Quad
676if ( strstr(p->CPU.BrandString, "Quad") )
677{
678p->CPU.NoCores= 4;
679p->CPU.NoThreads= 4;
680}
681}
682
683break;
684
685case CPUID_VENDOR_AMD:
686{
687post_startup_cpu_fixups();
688cores_per_package = bitfield(p->CPU.CPUID[CPUID_88][ecx], 7, 0) + 1;
689threads_per_core = cores_per_package;
690
691if (cores_per_package == 0)
692{
693cores_per_package = 1;
694}
695
696p->CPU.NoCores= cores_per_package;
697p->CPU.NoThreads= logical_per_package;
698
699if (p->CPU.NoCores == 0)
700{
701p->CPU.NoCores = 1;
702p->CPU.NoThreads= 1;
703}
704}
705break;
706
707default :
708stop("Unsupported CPU detected! System halted.");
709}
710
711/* setup features */
712if ((bit(23) & p->CPU.CPUID[CPUID_1][3]) != 0)
713{
714p->CPU.Features |= CPU_FEATURE_MMX;
715}
716
717if ((bit(25) & p->CPU.CPUID[CPUID_1][3]) != 0)
718{
719p->CPU.Features |= CPU_FEATURE_SSE;
720}
721
722if ((bit(26) & p->CPU.CPUID[CPUID_1][3]) != 0)
723{
724p->CPU.Features |= CPU_FEATURE_SSE2;
725}
726
727if ((bit(0) & p->CPU.CPUID[CPUID_1][2]) != 0)
728{
729p->CPU.Features |= CPU_FEATURE_SSE3;
730}
731
732if ((bit(19) & p->CPU.CPUID[CPUID_1][2]) != 0)
733{
734p->CPU.Features |= CPU_FEATURE_SSE41;
735}
736
737if ((bit(20) & p->CPU.CPUID[CPUID_1][2]) != 0)
738{
739p->CPU.Features |= CPU_FEATURE_SSE42;
740}
741
742if ((bit(29) & p->CPU.CPUID[CPUID_81][3]) != 0)
743{
744p->CPU.Features |= CPU_FEATURE_EM64T;
745}
746
747if ((bit(5) & p->CPU.CPUID[CPUID_1][3]) != 0)
748{
749p->CPU.Features |= CPU_FEATURE_MSR;
750}
751
752if ((p->CPU.NoThreads > p->CPU.NoCores))
753{
754p->CPU.Features |= CPU_FEATURE_HTT;
755}
756
757pic0_mask = inb(0x21U);
758outb(0x21U, 0xFFU); // mask PIC0 interrupts for duration of timing tests
759
760uint64_t cycles;
761cycles = timeRDTSC();
762tscFreq = rtc_set_cyc_per_sec(cycles);
763DBG("cpu freq classic = 0x%016llx\n", tscFreq);
764// if usual method failed
765if ( tscFreq < 1000 )//TEST
766{
767tscFreq = measure_tsc_frequency();//timeRDTSC() * 20;//measure_tsc_frequency();
768// DBG("cpu freq timeRDTSC = 0x%016llx\n", tscFrequency);
769}
770
771if (p->CPU.Vendor==CPUID_VENDOR_INTEL && ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0c) || (p->CPU.Family == 0x0f && p->CPU.Model >= 0x03)))
772{
773int intelCPU = p->CPU.Model;
774if (p->CPU.Family == 0x06)
775{
776/* Nehalem CPU model */
777switch (p->CPU.Model)
778{
779case CPUID_MODEL_NEHALEM:
780case CPUID_MODEL_FIELDS:
781case CPUID_MODEL_CLARKDALE:
782case CPUID_MODEL_DALES:
783case CPUID_MODEL_WESTMERE:
784case CPUID_MODEL_NEHALEM_EX:
785case CPUID_MODEL_WESTMERE_EX:
786/* --------------------------------------------------------- */
787case CPUID_MODEL_SANDYBRIDGE:
788case CPUID_MODEL_JAKETOWN:
789case CPUID_MODEL_IVYBRIDGE_XEON:
790case CPUID_MODEL_IVYBRIDGE:
791case CPUID_MODEL_ATOM_3700:
792case CPUID_MODEL_HASWELL:
793case CPUID_MODEL_HASWELL_U5:
794case CPUID_MODEL_HASWELL_SVR:
795
796case CPUID_MODEL_HASWELL_ULT:
797case CPUID_MODEL_HASWELL_ULX:
798case CPUID_MODEL_BROADWELL_HQ:
799case CPUID_MODEL_SKYLAKE_S:
800/* --------------------------------------------------------- */
801msr = rdmsr64(MSR_PLATFORM_INFO);
802DBG("msr(%d): platform_info %08x\n", __LINE__, bitfield(msr, 31, 0));
803bus_ratio_max = bitfield(msr, 15, 8);
804bus_ratio_min = bitfield(msr, 47, 40); //valv: not sure about this one (Remarq.1)
805msr = rdmsr64(MSR_FLEX_RATIO);
806DBG("msr(%d): flex_ratio %08x\n", __LINE__, bitfield(msr, 31, 0));
807if (bitfield(msr, 16, 16))
808{
809flex_ratio = bitfield(msr, 15, 8);
810// bcc9: at least on the gigabyte h67ma-ud2h,
811// where the cpu multipler can't be changed to
812// allow overclocking, the flex_ratio msr has unexpected (to OSX)
813// contents.These contents cause mach_kernel to
814// fail to compute the bus ratio correctly, instead
815// causing the system to crash since tscGranularity
816// is inadvertently set to 0.
817
818if (flex_ratio == 0)
819{
820// Clear bit 16 (evidently the presence bit)
821wrmsr64(MSR_FLEX_RATIO, (msr & 0xFFFFFFFFFFFEFFFFULL));
822msr = rdmsr64(MSR_FLEX_RATIO);
823DBG("CPU: Unusable flex ratio detected. Patched MSR now %08x\n", bitfield(msr, 31, 0));
824}
825else
826{
827if (bus_ratio_max > flex_ratio)
828{
829bus_ratio_max = flex_ratio;
830}
831}
832}
833
834if (bus_ratio_max)
835{
836busFrequency = (tscFreq / bus_ratio_max);
837}
838
839//valv: Turbo Ratio Limit
840if ((intelCPU != 0x2e) && (intelCPU != 0x2f))
841{
842msr = rdmsr64(MSR_TURBO_RATIO_LIMIT);
843
844cpuFrequency = bus_ratio_max * busFrequency;
845max_ratio = bus_ratio_max * 10;
846}
847else
848{
849cpuFrequency = tscFreq;
850}
851
852if ((getValueForKey(kbusratio, &newratio, &len, &bootInfo->chameleonConfig)) && (len <= 4))
853{
854max_ratio = atoi(newratio);
855max_ratio = (max_ratio * 10);
856if (len >= 3)
857{
858max_ratio = (max_ratio + 5);
859}
860
861verbose("\tBus-Ratio: min=%d, max=%s\n", bus_ratio_min, newratio);
862
863// extreme overclockers may love 320 ;)
864if ((max_ratio >= min_ratio) && (max_ratio <= 320))
865{
866cpuFrequency = (busFrequency * max_ratio) / 10;
867if (len >= 3)
868{
869maxdiv = 1;
870}
871else
872{
873maxdiv = 0;
874}
875}
876else
877{
878max_ratio = (bus_ratio_max * 10);
879}
880}
881//valv: to be uncommented if Remarq.1 didn't stick
882//if (bus_ratio_max > 0) bus_ratio = flex_ratio;
883p->CPU.MaxRatio = max_ratio;
884p->CPU.MinRatio = min_ratio;
885
886myfsb = busFrequency / 1000000;
887verbose("\tSticking with [BCLK: %dMhz, Bus-Ratio: %d]\n", myfsb, max_ratio/10); // Bungo: fixed wrong Bus-Ratio readout
888currcoef = bus_ratio_max;
889
890break;
891
892default:
893msr = rdmsr64(MSR_IA32_PERF_STATUS);
894DBG("msr(%d): ia32_perf_stat 0x%08x\n", __LINE__, bitfield(msr, 31, 0));
895currcoef = bitfield(msr, 12, 8); // Bungo: reverted to 2263 state because of wrong old CPUs freq. calculating
896// Non-integer bus ratio for the max-multi
897maxdiv = bitfield(msr, 46, 46);
898// Non-integer bus ratio for the current-multi (undocumented)
899currdiv = bitfield(msr, 14, 14);
900
901// This will always be model >= 3
902if ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0e) || (p->CPU.Family == 0x0f))
903{
904/* On these models, maxcoef defines TSC freq */
905maxcoef = bitfield(msr, 44, 40);
906}
907else
908{
909// On lower models, currcoef defines TSC freq
910// XXX
911maxcoef = currcoef;
912}
913
914if (!currcoef)
915{
916currcoef = maxcoef;
917}
918
919if (maxcoef)
920{
921if (maxdiv)
922{
923busFrequency = ((tscFreq * 2) / ((maxcoef * 2) + 1));
924}
925else
926{
927busFrequency = (tscFreq / maxcoef);
928}
929
930if (currdiv)
931{
932cpuFrequency = (busFrequency * ((currcoef * 2) + 1) / 2);
933}
934else
935{
936cpuFrequency = (busFrequency * currcoef);
937}
938
939DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");
940}
941break;
942}
943}
944// Mobile CPU
945if (rdmsr64(MSR_IA32_PLATFORM_ID) & (1<<28))
946{
947p->CPU.Features |= CPU_FEATURE_MOBILE;
948}
949}
950
951else if (p->CPU.Vendor==CPUID_VENDOR_AMD)
952{
953switch(p->CPU.Family)
954{
955case 0xF: /* K8 */
956{
957uint64_t fidvid = 0;
958uint64_t cpuMult;
959uint64_t fid;
960
961fidvid = rdmsr64(K8_FIDVID_STATUS);
962fid = bitfield(fidvid, 5, 0);
963
964cpuMult = (fid + 8) / 2;
965currcoef = cpuMult;
966
967cpuMultN2 = (fidvid & (uint64_t)bit(0));
968currdiv = cpuMultN2;
969/****** Addon END ******/
970}
971break;
972
973case 0x10: /*** AMD Family 10h ***/
974{
975uint64_t cofvid = 0;
976uint64_t cpuMult;
977uint64_t divisor = 0;
978uint64_t did;
979uint64_t fid;
980
981cofvid = rdmsr64(K10_COFVID_STATUS);
982did = bitfield(cofvid, 8, 6);
983fid = bitfield(cofvid, 5, 0);
984if (did == 0) divisor = 2;
985else if (did == 1) divisor = 4;
986else if (did == 2) divisor = 8;
987else if (did == 3) divisor = 16;
988else if (did == 4) divisor = 32;
989
990cpuMult = (fid + 16) / divisor;
991currcoef = cpuMult;
992
993cpuMultN2 = (cofvid & (uint64_t)bit(0));
994currdiv = cpuMultN2;
995
996/****** Addon END ******/
997}
998break;
999
1000case 0x11: /*** AMD Family 11h ***/
1001{
1002uint64_t cofvid = 0;
1003uint64_t cpuMult;
1004uint64_t divisor = 0;
1005uint64_t did;
1006uint64_t fid;
1007
1008cofvid = rdmsr64(K10_COFVID_STATUS);
1009did = bitfield(cofvid, 8, 6);
1010fid = bitfield(cofvid, 5, 0);
1011if (did == 0) divisor = 2;
1012else if (did == 1) divisor = 4;
1013else if (did == 2) divisor = 8;
1014else if (did == 3) divisor = 16;
1015else if (did == 4) divisor = 32;
1016
1017cpuMult = (fid + 8) / divisor;
1018currcoef = cpuMult;
1019
1020cpuMultN2 = (cofvid & (uint64_t)bit(0));
1021currdiv = cpuMultN2;
1022
1023/****** Addon END ******/
1024}
1025 break;
1026
1027case 0x12: /*** AMD Family 12h ***/
1028{
1029// 8:4 CpuFid: current CPU core frequency ID
1030// 3:0 CpuDid: current CPU core divisor ID
1031uint64_t prfsts,CpuFid,CpuDid;
1032prfsts = rdmsr64(K10_COFVID_STATUS);
1033
1034CpuDid = bitfield(prfsts, 3, 0) ;
1035CpuFid = bitfield(prfsts, 8, 4) ;
1036uint64_t divisor;
1037switch (CpuDid)
1038{
1039case 0: divisor = 1; break;
1040case 1: divisor = (3/2); break;
1041case 2: divisor = 2; break;
1042case 3: divisor = 3; break;
1043case 4: divisor = 4; break;
1044case 5: divisor = 6; break;
1045case 6: divisor = 8; break;
1046case 7: divisor = 12; break;
1047case 8: divisor = 16; break;
1048default: divisor = 1; break;
1049}
1050currcoef = (CpuFid + 0x10) / divisor;
1051
1052cpuMultN2 = (prfsts & (uint64_t)bit(0));
1053currdiv = cpuMultN2;
1054
1055}
1056break;
1057
1058case 0x14: /* K14 */
1059
1060{
1061// 8:4: current CPU core divisor ID most significant digit
1062// 3:0: current CPU core divisor ID least significant digit
1063uint64_t prfsts;
1064prfsts = rdmsr64(K10_COFVID_STATUS);
1065
1066uint64_t CpuDidMSD,CpuDidLSD;
1067CpuDidMSD = bitfield(prfsts, 8, 4) ;
1068CpuDidLSD = bitfield(prfsts, 3, 0) ;
1069
1070uint64_t frequencyId = 0x10;
1071currcoef = (frequencyId + 0x10) /
1072(CpuDidMSD + (CpuDidLSD * 0.25) + 1);
1073currdiv = ((CpuDidMSD) + 1) << 2;
1074currdiv += bitfield(msr, 3, 0);
1075
1076cpuMultN2 = (prfsts & (uint64_t)bit(0));
1077currdiv = cpuMultN2;
1078}
1079
1080break;
1081
1082case 0x15: /*** AMD Family 15h ***/
1083case 0x06: /*** AMD Family 06h ***/
1084{
1085
1086uint64_t cofvid = 0;
1087uint64_t cpuMult;
1088uint64_t divisor = 0;
1089uint64_t did;
1090uint64_t fid;
1091
1092cofvid = rdmsr64(K10_COFVID_STATUS);
1093did = bitfield(cofvid, 8, 6);
1094fid = bitfield(cofvid, 5, 0);
1095if (did == 0) divisor = 2;
1096else if (did == 1) divisor = 4;
1097else if (did == 2) divisor = 8;
1098else if (did == 3) divisor = 16;
1099else if (did == 4) divisor = 32;
1100
1101cpuMult = (fid + 16) / divisor;
1102currcoef = cpuMult;
1103
1104cpuMultN2 = (cofvid & (uint64_t)bit(0));
1105currdiv = cpuMultN2;
1106}
1107break;
1108
1109case 0x16: /*** AMD Family 16h kabini ***/
1110{
1111uint64_t cofvid = 0;
1112uint64_t cpuMult;
1113uint64_t divisor = 0;
1114uint64_t did;
1115uint64_t fid;
1116
1117cofvid = rdmsr64(K10_COFVID_STATUS);
1118did = bitfield(cofvid, 8, 6);
1119fid = bitfield(cofvid, 5, 0);
1120if (did == 0) divisor = 1;
1121else if (did == 1) divisor = 2;
1122else if (did == 2) divisor = 4;
1123else if (did == 3) divisor = 8;
1124else if (did == 4) divisor = 16;
1125
1126cpuMult = (fid + 16) / divisor;
1127currcoef = cpuMult;
1128
1129cpuMultN2 = (cofvid & (uint64_t)bit(0));
1130currdiv = cpuMultN2;
1131/****** Addon END ******/
1132}
1133break;
1134
1135default:
1136{
1137typedef unsigned long long vlong;
1138uint64_t prfsts;
1139prfsts = rdmsr64(K10_COFVID_STATUS);
1140uint64_t r;
1141vlong hz;
1142r = (prfsts>>6) & 0x07;
1143hz = (((prfsts & 0x3f)+0x10)*100000000ll)/(1<<r);
1144
1145currcoef = hz / (200 * Mega);
1146}
1147}
1148
1149if (currcoef)
1150{
1151if (currdiv)
1152{
1153busFrequency = ((tscFreq * 2) / ((currcoef * 2) + 1));
1154busFCvtt2n = ((1 * Giga) << 32) / busFrequency;
1155tscFCvtt2n = busFCvtt2n * 2 / (1 + (2 * currcoef));
1156cpuFrequency = ((1 * Giga) << 32) / tscFCvtt2n;
1157
1158DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);
1159}
1160else
1161{
1162busFrequency = (tscFreq / currcoef);
1163busFCvtt2n = ((1 * Giga) << 32) / busFrequency;
1164tscFCvtt2n = busFCvtt2n / currcoef;
1165cpuFrequency = ((1 * Giga) << 32) / tscFCvtt2n;
1166DBG("%d\n", currcoef);
1167}
1168}
1169else if (!cpuFrequency)
1170{
1171cpuFrequency = tscFreq;
1172}
1173}
1174
1175#if 0
1176if (!busFrequency)
1177{
1178busFrequency = (DEFAULT_FSB * 1000);
1179DBG("\tCPU: busFrequency = 0! using the default value for FSB!\n");
1180cpuFrequency = tscFreq;
1181}
1182
1183DBG("\tcpu freq = 0x%016llxn", timeRDTSC() * 20);
1184
1185#endif
1186
1187outb(0x21U, pic0_mask); // restore PIC0 interrupts
1188
1189p->CPU.MaxCoef = maxcoef = currcoef;
1190p->CPU.MaxDiv = maxdiv = currdiv;
1191p->CPU.CurrCoef = currcoef;
1192p->CPU.CurrDiv = currdiv;
1193p->CPU.TSCFrequency = tscFreq;
1194p->CPU.FSBFrequency = busFrequency;
1195p->CPU.CPUFrequency = cpuFrequency;
1196
1197// keep formatted with spaces instead of tabs
1198
1199DBG("\tCPUID Raw Values:\n");
1200for (i = 0; i < CPUID_MAX; i++)
1201{
1202DBG("\t%02d: %08X-%08X-%08X-%08X\n", i, p->CPU.CPUID[i][eax], p->CPU.CPUID[i][ebx], p->CPU.CPUID[i][ecx], p->CPU.CPUID[i][edx]);
1203}
1204DBG("\n");
1205DBG("\tBrand String: %s\n",p->CPU.BrandString);// Processor name (BIOS)
1206DBG("\tVendor: 0x%X\n",p->CPU.Vendor);// Vendor ex: GenuineIntel
1207DBG("\tFamily: 0x%X\n",p->CPU.Family);// Family ex: 6 (06h)
1208DBG("\tExtFamily: 0x%X\n",p->CPU.ExtFamily);
1209DBG("\tSignature: 0x%08X\n",p->CPU.Signature);// CPUID signature
1210/*switch (p->CPU.Type) {
1211case PT_OEM:
1212DBG("\tProcessor type: Intel Original OEM Processor\n");
1213break;
1214case PT_OD:
1215DBG("\tProcessor type: Intel Over Drive Processor\n");
1216break;
1217case PT_DUAL:
1218DBG("\tProcessor type: Intel Dual Processor\n");
1219break;
1220case PT_RES:
1221DBG("\tProcessor type: Intel Reserved\n");
1222break;
1223default:
1224break;
1225}*/
1226DBG("\tModel: 0x%X\n",p->CPU.Model);// Model ex: 37 (025h)
1227DBG("\tExtModel: 0x%X\n",p->CPU.ExtModel);
1228DBG("\tStepping: 0x%X\n",p->CPU.Stepping);// Stepping ex: 5 (05h)
1229DBG("\tMaxCoef: %d\n",p->CPU.MaxCoef);
1230DBG("\tCurrCoef: %d\n",p->CPU.CurrCoef);
1231DBG("\tMaxDiv: %d\n",p->CPU.MaxDiv);
1232DBG("\tCurrDiv: %d\n",p->CPU.CurrDiv);
1233DBG("\tTSCFreq: %dMHz\n",p->CPU.TSCFrequency / 1000000);
1234DBG("\tFSBFreq: %dMHz\n",p->CPU.FSBFrequency / 1000000);
1235DBG("\tCPUFreq: %dMHz\n",p->CPU.CPUFrequency / 1000000);
1236DBG("\tCores: %d\n",p->CPU.NoCores);// Cores
1237DBG("\tLogical processor: %d\n",p->CPU.NoThreads);// Logical procesor
1238DBG("\tFeatures: 0x%08x\n",p->CPU.Features);
1239
1240verbose("\n");
1241#if DEBUG_CPU
1242pause();
1243#endif
1244}
1245

Archive Download this file

Revision: 2842