Chameleon

Chameleon Svn Source Tree

Root/branches/ErmaC/Enoch/i386/libsaio/cpu.c

1/*
2 * Copyright 2008 Islam Ahmed Zaid. All rights reserved. <azismed@gmail.com>
3 * AsereBLN: 2009: cleanup and bugfix
4 * Bronya: 2015 Improve AMD support, cleanup and bugfix
5 */
6
7#include "libsaio.h"
8#include "platform.h"
9#include "cpu.h"
10#include "bootstruct.h"
11#include "boot.h"
12
13#ifndef DEBUG_CPU
14#define DEBUG_CPU 0
15#endif
16
17#if DEBUG_CPU
18#define DBG(x...)printf(x)
19#else
20#define DBG(x...)
21#endif
22
23#define UI_CPUFREQ_ROUNDING_FACTOR10000000
24
25clock_frequency_info_t gPEClockFrequencyInfo;
26
27static __unused uint64_t rdtsc32(void)
28{
29unsigned int lo,hi;
30__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
31return ((uint64_t)hi << 32) | lo;
32}
33
34/*
35 * timeRDTSC()
36 * This routine sets up PIT counter 2 to count down 1/20 of a second.
37 * It pauses until the value is latched in the counter
38 * and then reads the time stamp counter to return to the caller.
39 */
40static uint64_t timeRDTSC(void)
41{
42intattempts = 0;
43uint32_t latchTime;
44uint64_tsaveTime,intermediate;
45unsigned inttimerValue, lastValue;
46//boolean_tint_enabled;
47/*
48 * Table of correction factors to account for
49 * - timer counter quantization errors, and
50 * - undercounts 0..5
51 */
52#define SAMPLE_CLKS_EXACT(((double) CLKNUM) / 20.0)
53#define SAMPLE_CLKS_INT((int) CLKNUM / 20)
54#define SAMPLE_NSECS(2000000000LL)
55#define SAMPLE_MULTIPLIER(((double)SAMPLE_NSECS)*SAMPLE_CLKS_EXACT)
56#define ROUND64(x)((uint64_t)((x) + 0.5))
57uint64_tscale[6] = {
58ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-0)),
59ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-1)),
60ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-2)),
61ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-3)),
62ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-4)),
63ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-5))
64};
65
66//int_enabled = ml_set_interrupts_enabled(false);
67
68restart:
69if (attempts >= 3) // increase to up to 9 attempts.
70{
71// This will flash-reboot. TODO: Use tscPanic instead.
72//printf("Timestamp counter calibation failed with %d attempts\n", attempts);
73}
74attempts++;
75enable_PIT2();// turn on PIT2
76set_PIT2(0);// reset timer 2 to be zero
77latchTime = rdtsc32();// get the time stamp to time
78latchTime = get_PIT2(&timerValue) - latchTime; // time how long this takes
79set_PIT2(SAMPLE_CLKS_INT);// set up the timer for (almost) 1/20th a second
80saveTime = rdtsc32();// now time how long a 20th a second is...
81get_PIT2(&lastValue);
82get_PIT2(&lastValue);// read twice, first value may be unreliable
83do {
84intermediate = get_PIT2(&timerValue);
85if (timerValue > lastValue)
86{
87// Timer wrapped
88set_PIT2(0);
89disable_PIT2();
90goto restart;
91}
92lastValue = timerValue;
93} while (timerValue > 5);
94//printf("timerValue %d\n",timerValue);
95//printf("intermediate 0x%016llX\n",intermediate);
96//printf("saveTime 0x%016llX\n",saveTime);
97
98intermediate -= saveTime;// raw count for about 1/20 second
99intermediate *= scale[timerValue];// rescale measured time spent
100intermediate /= SAMPLE_NSECS;// so its exactly 1/20 a second
101intermediate += latchTime;// add on our save fudge
102
103set_PIT2(0);// reset timer 2 to be zero
104disable_PIT2();// turn off PIT 2
105
106//ml_set_interrupts_enabled(int_enabled);
107return intermediate;
108}
109
110/*
111 * DFE: Measures the TSC frequency in Hz (64-bit) using the ACPI PM timer
112 */
113static uint64_t __unused measure_tsc_frequency(void)
114{
115uint64_t tscStart;
116uint64_t tscEnd;
117uint64_t tscDelta = 0xffffffffffffffffULL;
118unsigned long pollCount;
119uint64_t retval = 0;
120int i;
121
122/* Time how many TSC ticks elapse in 30 msec using the 8254 PIT
123 * counter 2. We run this loop 3 times to make sure the cache
124 * is hot and we take the minimum delta from all of the runs.
125 * That is to say that we're biased towards measuring the minimum
126 * number of TSC ticks that occur while waiting for the timer to
127 * expire. That theoretically helps avoid inconsistencies when
128 * running under a VM if the TSC is not virtualized and the host
129 * steals time. The TSC is normally virtualized for VMware.
130 */
131for(i = 0; i < 10; ++i)
132{
133enable_PIT2();
134set_PIT2_mode0(CALIBRATE_LATCH);
135tscStart = rdtsc64();
136pollCount = poll_PIT2_gate();
137tscEnd = rdtsc64();
138/* The poll loop must have run at least a few times for accuracy */
139if (pollCount <= 1)
140{
141continue;
142}
143/* The TSC must increment at LEAST once every millisecond.
144 * We should have waited exactly 30 msec so the TSC delta should
145 * be >= 30. Anything less and the processor is way too slow.
146 */
147if ((tscEnd - tscStart) <= CALIBRATE_TIME_MSEC)
148{
149continue;
150}
151// tscDelta = MIN(tscDelta, (tscEnd - tscStart))
152if ( (tscEnd - tscStart) < tscDelta )
153{
154tscDelta = tscEnd - tscStart;
155}
156}
157/* tscDelta is now the least number of TSC ticks the processor made in
158 * a timespan of 0.03 s (e.g. 30 milliseconds)
159 * Linux thus divides by 30 which gives the answer in kiloHertz because
160 * 1 / ms = kHz. But we're xnu and most of the rest of the code uses
161 * Hz so we need to convert our milliseconds to seconds. Since we're
162 * dividing by the milliseconds, we simply multiply by 1000.
163 */
164
165/* Unlike linux, we're not limited to 32-bit, but we do need to take care
166 * that we're going to multiply by 1000 first so we do need at least some
167 * arithmetic headroom. For now, 32-bit should be enough.
168 * Also unlike Linux, our compiler can do 64-bit integer arithmetic.
169 */
170if (tscDelta > (1ULL<<32))
171{
172retval = 0;
173}
174else
175{
176retval = tscDelta * 1000 / 30;
177}
178disable_PIT2();
179return retval;
180}
181
182static uint64_trtc_set_cyc_per_sec(uint64_t cycles);
183#define RTC_FAST_DENOM0xFFFFFFFF
184
185inline static uint32_t
186create_mul_quant_GHZ(int shift, uint32_t quant)
187{
188return (uint32_t)((((uint64_t)NSEC_PER_SEC/20) << shift) / quant);
189}
190
191struct{
192mach_timespec_tcalend_offset;
193boolean_tcalend_is_set;
194
195int64_tcalend_adjtotal;
196int32_tcalend_adjdelta;
197
198uint32_tboottime;
199
200mach_timebase_info_data_ttimebase_const;
201
202decl_simple_lock_data(,lock)/* real-time clock device lock */
203} rtclock;
204
205uint32_trtc_quant_shift;/* clock to nanos right shift */
206uint32_trtc_quant_scale;/* clock to nanos multiplier */
207uint64_trtc_cyc_per_sec;/* processor cycles per sec */
208uint64_trtc_cycle_count;/* clocks in 1/20th second */
209
210static uint64_t rtc_set_cyc_per_sec(uint64_t cycles)
211{
212
213if (cycles > (NSEC_PER_SEC/20))
214{
215// we can use just a "fast" multiply to get nanos
216rtc_quant_shift = 32;
217rtc_quant_scale = create_mul_quant_GHZ(rtc_quant_shift, (uint32_t)cycles);
218rtclock.timebase_const.numer = rtc_quant_scale; // timeRDTSC is 1/20
219rtclock.timebase_const.denom = (uint32_t)RTC_FAST_DENOM;
220}
221else
222{
223rtc_quant_shift = 26;
224rtc_quant_scale = create_mul_quant_GHZ(rtc_quant_shift, (uint32_t)cycles);
225rtclock.timebase_const.numer = NSEC_PER_SEC/20; // timeRDTSC is 1/20
226rtclock.timebase_const.denom = (uint32_t)cycles;
227}
228rtc_cyc_per_sec = cycles*20;// multiply it by 20 and we are done..
229// BUT we also want to calculate...
230
231cycles = ((rtc_cyc_per_sec + (UI_CPUFREQ_ROUNDING_FACTOR/2))
232 / UI_CPUFREQ_ROUNDING_FACTOR)
233* UI_CPUFREQ_ROUNDING_FACTOR;
234
235/*
236 * Set current measured speed.
237 */
238if (cycles >= 0x100000000ULL)
239{
240gPEClockFrequencyInfo.cpu_clock_rate_hz = 0xFFFFFFFFUL;
241}
242else
243{
244gPEClockFrequencyInfo.cpu_clock_rate_hz = (unsigned long)cycles;
245}
246gPEClockFrequencyInfo.cpu_frequency_hz = cycles;
247
248//printf("[RTCLOCK_1] frequency %llu (%llu) %llu\n", cycles, rtc_cyc_per_sec,timeRDTSC() * 20);
249return(rtc_cyc_per_sec);
250}
251
252// Bronya C1E fix
253static void post_startup_cpu_fixups(void)
254{
255/*
256 * Some AMD processors support C1E state. Entering this state will
257 * cause the local APIC timer to stop, which we can't deal with at
258 * this time.
259 */
260
261uint64_t reg;
262verbose("\tLooking to disable C1E if is already enabled by the BIOS:\n");
263reg = rdmsr64(MSR_AMD_INT_PENDING_CMP_HALT);
264/* Disable C1E state if it is enabled by the BIOS */
265if ((reg >> AMD_ACTONCMPHALT_SHIFT) & AMD_ACTONCMPHALT_MASK)
266{
267reg &= ~(AMD_ACTONCMPHALT_MASK << AMD_ACTONCMPHALT_SHIFT);
268wrmsr64(MSR_AMD_INT_PENDING_CMP_HALT, reg);
269verbose("\tC1E disabled!\n");
270}
271}
272
273/*
274 * Large memcpy() into MMIO space can take longer than 1 clock tick (55ms).
275 * The timer interrupt must remain responsive when updating VRAM so
276 * as not to miss timer interrupts during countdown().
277 *
278 * If interrupts are enabled, use normal memcpy.
279 *
280 * If interrupts are disabled, breaks memcpy down
281 * into 128K chunks, times itself and makes a bios
282 * real-mode call every 25 msec in order to service
283 * pending interrupts.
284 *
285 * -- zenith432, May 22nd, 2016
286 */
287void *memcpy_interruptible(void *dst, const void *src, size_t len)
288{
289uint64_t tscFreq, lastTsc;
290uint32_t eflags, threshold;
291ptrdiff_t offset;
292const size_t chunk = 131072U;// 128K
293
294if (len <= chunk)
295{
296/*
297 * Short memcpy - use normal.
298 */
299return memcpy(dst, src, len);
300}
301
302__asm__ volatile("pushfl; popl %0" : "=r"(eflags));
303if (eflags & 0x200U)
304{
305/*
306 * Interrupts are enabled - use normal memcpy.
307 */
308return memcpy(dst, src, len);
309}
310
311tscFreq = Platform.CPU.TSCFrequency;
312if ((uint32_t) (tscFreq >> 32))
313{
314/*
315 * If TSC Frequency >= 2 ** 32, use a default time threshold.
316 */
317threshold = (~0U) / 40U;
318}
319else if (!(uint32_t) tscFreq)
320{
321/*
322 * If early on and TSC Frequency hasn't been estimated yet,
323 * use normal memcpy.
324 */
325return memcpy(dst, src, len);
326}
327else
328{
329threshold = ((uint32_t) tscFreq) / 40U;
330}
331
332/*
333 * Do the work
334 */
335offset = 0;
336lastTsc = rdtsc64();
337do
338{
339(void) memcpy((char*) dst + offset, (const char*) src + offset, chunk);
340offset += (ptrdiff_t) chunk;
341len -= chunk;
342if ((rdtsc64() - lastTsc) < threshold)
343{
344continue;
345}
346(void) readKeyboardStatus();// visit real-mode
347lastTsc = rdtsc64();
348}
349while (len > chunk);
350if (len)
351{
352(void) memcpy((char*) dst + offset, (const char*) src + offset, len);
353}
354return dst;
355}
356
357/*
358 * Calculates the FSB and CPU frequencies using specific MSRs for each CPU
359 * - multi. is read from a specific MSR. In the case of Intel, there is:
360 * a max multi. (used to calculate the FSB freq.),
361 * and a current multi. (used to calculate the CPU freq.)
362 * - busFrequency = tscFrequency / multi
363 * - cpuFrequency = busFrequency * multi
364 */
365
366/* Decimal powers: */
367#define kilo (1000ULL)
368#define Mega (kilo * kilo)
369#define Giga (kilo * Mega)
370#define Tera (kilo * Giga)
371#define Peta (kilo * Tera)
372
373#define quad(hi,lo)(((uint64_t)(hi)) << 32 | (lo))
374
375void get_cpuid(PlatformInfo_t *p)
376{
377
378charstr[128];
379uint32_treg[4];
380char*s= 0;
381
382
383do_cpuid(0x00000000, p->CPU.CPUID[CPUID_0]); // MaxFn, Vendor
384do_cpuid(0x00000001, p->CPU.CPUID[CPUID_1]); // Signature, stepping, features
385do_cpuid(0x00000002, p->CPU.CPUID[CPUID_2]); // TLB/Cache/Prefetch
386
387do_cpuid(0x00000003, p->CPU.CPUID[CPUID_3]); // S/N
388do_cpuid(0x80000000, p->CPU.CPUID[CPUID_80]); // Get the max extended cpuid
389
390if ((p->CPU.CPUID[CPUID_80][0] & 0x0000000f) >= 8)
391{
392do_cpuid(0x80000008, p->CPU.CPUID[CPUID_88]);
393do_cpuid(0x80000001, p->CPU.CPUID[CPUID_81]);
394}
395else if ((p->CPU.CPUID[CPUID_80][0] & 0x0000000f) >= 1)
396{
397do_cpuid(0x80000001, p->CPU.CPUID[CPUID_81]);
398}
399
400// ==============================================================
401
402/* get BrandString (if supported) */
403/* Copyright: from Apple's XNU cpuid.c */
404if (p->CPU.CPUID[CPUID_80][0] > 0x80000004)
405{
406bzero(str, 128);
407/*
408 * The BrandString 48 bytes (max), guaranteed to
409 * be NULL terminated.
410 */
411do_cpuid(0x80000002, reg); // Processor Brand String
412memcpy(&str[0], (char *)reg, 16);
413
414
415do_cpuid(0x80000003, reg); // Processor Brand String
416memcpy(&str[16], (char *)reg, 16);
417do_cpuid(0x80000004, reg); // Processor Brand String
418memcpy(&str[32], (char *)reg, 16);
419for (s = str; *s != '\0'; s++)
420{
421if (*s != ' ')
422{
423break;
424}
425}
426strlcpy(p->CPU.BrandString, s, 48);
427
428if (!strncmp(p->CPU.BrandString, CPU_STRING_UNKNOWN, MIN(sizeof(p->CPU.BrandString), (unsigned)strlen(CPU_STRING_UNKNOWN) + 1)))
429{
430/*
431 * This string means we have a firmware-programmable brand string,
432 * and the firmware couldn't figure out what sort of CPU we have.
433 */
434p->CPU.BrandString[0] = '\0';
435}
436p->CPU.BrandString[47] = '\0';
437//DBG("\tBrandstring = %s\n", p->CPU.BrandString);
438}
439
440// ==============================================================
441
442switch(p->CPU.BrandString[0])
443{
444case 'A':
445/* AMD Processors */
446// The cache information is only in ecx and edx so only save
447// those registers
448
449do_cpuid(5, p->CPU.CPUID[CPUID_5]); // Monitor/Mwait
450
451do_cpuid(0x80000005, p->CPU.CPUID[CPUID_85]); // TLB/Cache/Prefetch
452do_cpuid(0x80000006, p->CPU.CPUID[CPUID_86]); // TLB/Cache/Prefetch
453do_cpuid(0x80000008, p->CPU.CPUID[CPUID_88]);
454
455break;
456
457case 'G':
458/* Intel Processors */
459do_cpuid2(0x00000004, 0, p->CPU.CPUID[CPUID_4]); // Cache Index for Inte
460
461if (p->CPU.CPUID[CPUID_0][0] >= 0x5)// Monitor/Mwait
462{
463do_cpuid(5, p->CPU.CPUID[CPUID_5]);
464}
465
466if (p->CPU.CPUID[CPUID_0][0] >= 6)// Thermal/Power
467{
468do_cpuid(6, p->CPU.CPUID[CPUID_6]);
469}
470
471break;
472}
473}
474
475void scan_cpu(PlatformInfo_t *p)
476{
477verbose("[ CPU INFO ]\n");
478get_cpuid(p);
479
480uint64_tbusFCvtt2n;
481uint64_ttscFCvtt2n;
482uint64_ttscFreq= 0;
483uint64_tbusFrequency= 0;
484uint64_tcpuFrequency= 0;
485uint64_tmsr= 0;
486uint64_tflex_ratio= 0;
487uint64_tcpuid_features;
488
489uint32_tmax_ratio= 0;
490uint32_tmin_ratio= 0;
491uint32_treg[4];
492uint32_tcores_per_package= 0;
493uint32_tlogical_per_package= 1;
494uint32_tthreads_per_core= 1;
495
496uint8_tbus_ratio_max= 0;
497uint8_tbus_ratio_min= 0;
498uint8_tcurrdiv= 0;
499uint8_tcurrcoef= 0;
500uint8_tmaxdiv= 0;
501uint8_tmaxcoef= 0;
502uint8_tpic0_mask;
503uint8_tcpuMultN2= 0;
504
505const char*newratio;
506
507intlen= 0;
508intmyfsb= 0;
509inti= 0;
510
511
512/* http://www.flounder.com/cpuid_explorer2.htm
513 EAX (Intel):
514 31 28 27 20 19 16 1514 1312 11 8 7 4 3 0
515 +--------+----------------+--------+----+----+--------+--------+--------+
516 |########|Extended family |Extmodel|####|type|familyid| model |stepping|
517 +--------+----------------+--------+----+----+--------+--------+--------+
518
519 EAX (AMD):
520 31 28 27 20 19 16 1514 1312 11 8 7 4 3 0
521 +--------+----------------+--------+----+----+--------+--------+--------+
522 |########|Extended family |Extmodel|####|####|familyid| model |stepping|
523 +--------+----------------+--------+----+----+--------+--------+--------+
524*/
525///////////////////-- MaxFn,Vendor --////////////////////////
526p->CPU.Vendor= p->CPU.CPUID[CPUID_0][1];
527
528///////////////////-- Signature, stepping, features -- //////
529cpuid_features = quad(p->CPU.CPUID[CPUID_1][ecx], p->CPU.CPUID[CPUID_1][edx]);
530if (bit(28) & p->CPU.CPUID[CPUID_1][edx]) // HTT/Multicore
531{
532logical_per_package = bitfield(p->CPU.CPUID[CPUID_1][ebx], 23, 16);
533}
534else
535{
536logical_per_package = 1;
537}
538
539p->CPU.Signature= p->CPU.CPUID[CPUID_1][0];
540p->CPU.Stepping= (uint8_t)bitfield(p->CPU.CPUID[CPUID_1][0], 3, 0);// stepping = cpu_feat_eax & 0xF;
541p->CPU.Model= (uint8_t)bitfield(p->CPU.CPUID[CPUID_1][0], 7, 4);// model = (cpu_feat_eax >> 4) & 0xF;
542p->CPU.Family= (uint8_t)bitfield(p->CPU.CPUID[CPUID_1][0], 11, 8);// family = (cpu_feat_eax >> 8) & 0xF;
543//p->CPU.Type= (uint8_t)bitfield(p->CPU.CPUID[CPUID_1][0], 13, 12);// type = (cpu_feat_eax >> 12) & 0x3;
544p->CPU.ExtModel= (uint8_t)bitfield(p->CPU.CPUID[CPUID_1][0], 19, 16);// ext_model = (cpu_feat_eax >> 16) & 0xF;
545p->CPU.ExtFamily= (uint8_t)bitfield(p->CPU.CPUID[CPUID_1][0], 27, 20);// ext_family = (cpu_feat_eax >> 20) & 0xFF;
546
547if (p->CPU.Family == 0x0f)
548{
549p->CPU.Family += p->CPU.ExtFamily;
550}
551
552if (p->CPU.Family == 0x0f || p->CPU.Family == 0x06)
553{
554p->CPU.Model += (p->CPU.ExtModel << 4);
555}
556
557switch (p->CPU.Vendor)
558{
559case CPUID_VENDOR_INTEL:
560{
561/* Based on Apple's XNU cpuid.c - Deterministic cache parameters */
562if ((p->CPU.CPUID[CPUID_0][eax] > 3) && (p->CPU.CPUID[CPUID_0][eax] < 0x80000000))
563{
564for (i = 0; i < 0xFF; i++) // safe loop
565{
566do_cpuid2(0x00000004, i, reg); // AX=4: Fn, CX=i: cache index
567if (bitfield(reg[eax], 4, 0) == 0)
568{
569break;
570}
571cores_per_package = bitfield(reg[eax], 31, 26) + 1;
572}
573}
574
575if (i > 0)
576{
577cores_per_package = bitfield(p->CPU.CPUID[CPUID_4][eax], 31, 26) + 1; // i = cache index
578threads_per_core = bitfield(p->CPU.CPUID[CPUID_4][eax], 25, 14) + 1;
579}
580
581if (cores_per_package == 0)
582{
583cores_per_package = 1;
584}
585
586switch (p->CPU.Model)
587{
588case CPUID_MODEL_NEHALEM: // Intel Core i7 LGA1366 (45nm)
589case CPUID_MODEL_FIELDS: // Intel Core i5, i7 LGA1156 (45nm)
590case CPUID_MODEL_CLARKDALE: // Intel Core i3, i5, i7 LGA1156 (32nm)
591case CPUID_MODEL_NEHALEM_EX:
592case CPUID_MODEL_JAKETOWN:
593case CPUID_MODEL_SANDYBRIDGE: // 0x2A
594case CPUID_MODEL_IVYBRIDGE: // 0x3A
595case CPUID_MODEL_IVYBRIDGE_XEON:// 0x3E
596case CPUID_MODEL_HASWELL_U5: // 0x3D
597case CPUID_MODEL_HASWELL: // 0x3C
598case CPUID_MODEL_HASWELL_SVR: // 0x3F
599case CPUID_MODEL_HASWELL_ULT: // 0x45
600case CPUID_MODEL_HASWELL_ULX: // 0x46
601case CPUID_MODEL_BROADWELL_HQ: // 0x47
602case CPUID_MODEL_BRASWELL:// 0x4C
603case CPUID_MODEL_AVOTON: // 0x4D
604case CPUID_MODEL_SKYLAKE: // 0x4E
605case CPUID_MODEL_BRODWELL_SVR: // 0x4F
606case CPUID_MODEL_BRODWELL_MSVR: // 0x56
607case CPUID_MODEL_KNIGHT: // 0x57
608case CPUID_MODEL_ANNIDALE: // 0x5A
609case CPUID_MODEL_GOLDMONT: // 0x5C
610case CPUID_MODEL_VALLEYVIEW: // 0x5D
611case CPUID_MODEL_SKYLAKE_S: // 0x5E
612case CPUID_MODEL_SKYLAKE_AVX: // 0x55
613case CPUID_MODEL_CANNONLAKE: // 0x66
614msr = rdmsr64(MSR_CORE_THREAD_COUNT); // 0x35
615p->CPU.NoCores= (uint32_t)bitfield((uint32_t)msr, 31, 16);
616p->CPU.NoThreads= (uint32_t)bitfield((uint32_t)msr, 15, 0);
617break;
618
619case CPUID_MODEL_DALES:
620case CPUID_MODEL_WESTMERE: // Intel Core i7 LGA1366 (32nm) 6 Core
621case CPUID_MODEL_WESTMERE_EX:
622msr = rdmsr64(MSR_CORE_THREAD_COUNT);
623p->CPU.NoCores= (uint32_t)bitfield((uint32_t)msr, 19, 16);
624p->CPU.NoThreads= (uint32_t)bitfield((uint32_t)msr, 15, 0);
625break;
626case CPUID_MODEL_ATOM_3700:
627p->CPU.NoCores= 4;
628p->CPU.NoThreads= 4;
629break;
630case CPUID_MODEL_ATOM:
631p->CPU.NoCores= 2;
632p->CPU.NoThreads= 2;
633break;
634default:
635p->CPU.NoCores= 0;
636break;
637}
638
639// workaround for Xeon Harpertown and Yorkfield
640if ((p->CPU.Model == CPUID_MODEL_PENRYN) &&
641(p->CPU.NoCores== 0))
642{
643if ((strstr(p->CPU.BrandString, "X54")) ||
644(strstr(p->CPU.BrandString, "E54")) ||
645(strstr(p->CPU.BrandString, "W35")) ||
646(strstr(p->CPU.BrandString, "X34")) ||
647(strstr(p->CPU.BrandString, "X33")) ||
648(strstr(p->CPU.BrandString, "L33")) ||
649(strstr(p->CPU.BrandString, "X32")) ||
650(strstr(p->CPU.BrandString, "L3426")) ||
651(strstr(p->CPU.BrandString, "L54")))
652{
653p->CPU.NoCores= 4;
654p->CPU.NoThreads= 4;
655} else if (strstr(p->CPU.BrandString, "W36")) {
656p->CPU.NoCores= 6;
657p->CPU.NoThreads= 6;
658} else { //other Penryn and Wolfdale
659p->CPU.NoCores= 0;
660p->CPU.NoThreads= 0;
661}
662}
663
664if (p->CPU.NoCores == 0)
665{
666p->CPU.NoCores= cores_per_package;
667p->CPU.NoThreads= logical_per_package;
668}
669
670// MSR is *NOT* available on the Intel Atom CPU
671// workaround for N270. I don't know why it detected wrong
672if ((p->CPU.Model == CPUID_MODEL_ATOM) && (strstr(p->CPU.BrandString, "270")))
673{
674p->CPU.NoCores= 1;
675p->CPU.NoThreads= 2;
676}
677
678// workaround for Quad
679if ( strstr(p->CPU.BrandString, "Quad") )
680{
681p->CPU.NoCores= 4;
682p->CPU.NoThreads= 4;
683}
684}
685
686break;
687
688case CPUID_VENDOR_AMD:
689{
690post_startup_cpu_fixups();
691cores_per_package = bitfield(p->CPU.CPUID[CPUID_88][ecx], 7, 0) + 1;
692threads_per_core = cores_per_package;
693
694if (cores_per_package == 0)
695{
696cores_per_package = 1;
697}
698
699p->CPU.NoCores= cores_per_package;
700p->CPU.NoThreads= logical_per_package;
701
702if (p->CPU.NoCores == 0)
703{
704p->CPU.NoCores = 1;
705p->CPU.NoThreads= 1;
706}
707}
708break;
709
710default :
711stop("Unsupported CPU detected! System halted.");
712}
713
714/* setup features */
715if ((bit(23) & p->CPU.CPUID[CPUID_1][3]) != 0)
716{
717p->CPU.Features |= CPU_FEATURE_MMX;
718}
719
720if ((bit(25) & p->CPU.CPUID[CPUID_1][3]) != 0)
721{
722p->CPU.Features |= CPU_FEATURE_SSE;
723}
724
725if ((bit(26) & p->CPU.CPUID[CPUID_1][3]) != 0)
726{
727p->CPU.Features |= CPU_FEATURE_SSE2;
728}
729
730if ((bit(0) & p->CPU.CPUID[CPUID_1][2]) != 0)
731{
732p->CPU.Features |= CPU_FEATURE_SSE3;
733}
734
735if ((bit(19) & p->CPU.CPUID[CPUID_1][2]) != 0)
736{
737p->CPU.Features |= CPU_FEATURE_SSE41;
738}
739
740if ((bit(20) & p->CPU.CPUID[CPUID_1][2]) != 0)
741{
742p->CPU.Features |= CPU_FEATURE_SSE42;
743}
744
745if ((bit(29) & p->CPU.CPUID[CPUID_81][3]) != 0)
746{
747p->CPU.Features |= CPU_FEATURE_EM64T;
748}
749
750if ((bit(5) & p->CPU.CPUID[CPUID_1][3]) != 0)
751{
752p->CPU.Features |= CPU_FEATURE_MSR;
753}
754
755if ((p->CPU.NoThreads > p->CPU.NoCores))
756{
757p->CPU.Features |= CPU_FEATURE_HTT;
758}
759
760pic0_mask = inb(0x21U);
761outb(0x21U, 0xFFU); // mask PIC0 interrupts for duration of timing tests
762
763uint64_t cycles;
764cycles = timeRDTSC();
765tscFreq = rtc_set_cyc_per_sec(cycles);
766DBG("cpu freq classic = 0x%016llx\n", tscFreq);
767// if usual method failed
768if ( tscFreq < 1000 )//TEST
769{
770tscFreq = measure_tsc_frequency();//timeRDTSC() * 20;//measure_tsc_frequency();
771// DBG("cpu freq timeRDTSC = 0x%016llx\n", tscFrequency);
772}
773
774if (p->CPU.Vendor==CPUID_VENDOR_INTEL && ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0c) || (p->CPU.Family == 0x0f && p->CPU.Model >= 0x03)))
775{
776int intelCPU = p->CPU.Model;
777if (p->CPU.Family == 0x06)
778{
779/* Nehalem CPU model */
780switch (p->CPU.Model)
781{
782case CPUID_MODEL_NEHALEM:
783case CPUID_MODEL_FIELDS:
784case CPUID_MODEL_CLARKDALE:
785case CPUID_MODEL_DALES:
786case CPUID_MODEL_WESTMERE:
787case CPUID_MODEL_NEHALEM_EX:
788case CPUID_MODEL_WESTMERE_EX:
789/* --------------------------------------------------------- */
790case CPUID_MODEL_SANDYBRIDGE:
791case CPUID_MODEL_JAKETOWN:
792case CPUID_MODEL_IVYBRIDGE_XEON:
793case CPUID_MODEL_IVYBRIDGE:
794case CPUID_MODEL_ATOM_3700:
795case CPUID_MODEL_HASWELL:
796case CPUID_MODEL_HASWELL_U5:
797case CPUID_MODEL_HASWELL_SVR:
798
799case CPUID_MODEL_HASWELL_ULT:
800case CPUID_MODEL_HASWELL_ULX:
801case CPUID_MODEL_BROADWELL_HQ:
802case CPUID_MODEL_SKYLAKE_S:
803/* --------------------------------------------------------- */
804msr = rdmsr64(MSR_PLATFORM_INFO);
805DBG("msr(%d): platform_info %08x\n", __LINE__, bitfield(msr, 31, 0));
806bus_ratio_max = bitfield(msr, 15, 8);
807bus_ratio_min = bitfield(msr, 47, 40); //valv: not sure about this one (Remarq.1)
808msr = rdmsr64(MSR_FLEX_RATIO);
809DBG("msr(%d): flex_ratio %08x\n", __LINE__, bitfield(msr, 31, 0));
810if (bitfield(msr, 16, 16))
811{
812flex_ratio = bitfield(msr, 15, 8);
813// bcc9: at least on the gigabyte h67ma-ud2h,
814// where the cpu multipler can't be changed to
815// allow overclocking, the flex_ratio msr has unexpected (to OSX)
816// contents.These contents cause mach_kernel to
817// fail to compute the bus ratio correctly, instead
818// causing the system to crash since tscGranularity
819// is inadvertently set to 0.
820
821if (flex_ratio == 0)
822{
823// Clear bit 16 (evidently the presence bit)
824wrmsr64(MSR_FLEX_RATIO, (msr & 0xFFFFFFFFFFFEFFFFULL));
825msr = rdmsr64(MSR_FLEX_RATIO);
826DBG("CPU: Unusable flex ratio detected. Patched MSR now %08x\n", bitfield(msr, 31, 0));
827}
828else
829{
830if (bus_ratio_max > flex_ratio)
831{
832bus_ratio_max = flex_ratio;
833}
834}
835}
836
837if (bus_ratio_max)
838{
839busFrequency = (tscFreq / bus_ratio_max);
840}
841
842//valv: Turbo Ratio Limit
843if ((intelCPU != 0x2e) && (intelCPU != 0x2f))
844{
845msr = rdmsr64(MSR_TURBO_RATIO_LIMIT);
846
847cpuFrequency = bus_ratio_max * busFrequency;
848max_ratio = bus_ratio_max * 10;
849}
850else
851{
852cpuFrequency = tscFreq;
853}
854
855if ((getValueForKey(kbusratio, &newratio, &len, &bootInfo->chameleonConfig)) && (len <= 4))
856{
857max_ratio = atoi(newratio);
858max_ratio = (max_ratio * 10);
859if (len >= 3)
860{
861max_ratio = (max_ratio + 5);
862}
863
864verbose("\tBus-Ratio: min=%d, max=%s\n", bus_ratio_min, newratio);
865
866// extreme overclockers may love 320 ;)
867if ((max_ratio >= min_ratio) && (max_ratio <= 320))
868{
869cpuFrequency = (busFrequency * max_ratio) / 10;
870if (len >= 3)
871{
872maxdiv = 1;
873}
874else
875{
876maxdiv = 0;
877}
878}
879else
880{
881max_ratio = (bus_ratio_max * 10);
882}
883}
884//valv: to be uncommented if Remarq.1 didn't stick
885//if (bus_ratio_max > 0) bus_ratio = flex_ratio;
886p->CPU.MaxRatio = max_ratio;
887p->CPU.MinRatio = min_ratio;
888
889myfsb = busFrequency / 1000000;
890verbose("\tSticking with [BCLK: %dMhz, Bus-Ratio: %d]\n", myfsb, max_ratio/10); // Bungo: fixed wrong Bus-Ratio readout
891currcoef = bus_ratio_max;
892
893break;
894
895default:
896msr = rdmsr64(MSR_IA32_PERF_STATUS);
897DBG("msr(%d): ia32_perf_stat 0x%08x\n", __LINE__, bitfield(msr, 31, 0));
898currcoef = bitfield(msr, 12, 8); // Bungo: reverted to 2263 state because of wrong old CPUs freq. calculating
899// Non-integer bus ratio for the max-multi
900maxdiv = bitfield(msr, 46, 46);
901// Non-integer bus ratio for the current-multi (undocumented)
902currdiv = bitfield(msr, 14, 14);
903
904// This will always be model >= 3
905if ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0e) || (p->CPU.Family == 0x0f))
906{
907/* On these models, maxcoef defines TSC freq */
908maxcoef = bitfield(msr, 44, 40);
909}
910else
911{
912// On lower models, currcoef defines TSC freq
913// XXX
914maxcoef = currcoef;
915}
916
917if (!currcoef)
918{
919currcoef = maxcoef;
920}
921
922if (maxcoef)
923{
924if (maxdiv)
925{
926busFrequency = ((tscFreq * 2) / ((maxcoef * 2) + 1));
927}
928else
929{
930busFrequency = (tscFreq / maxcoef);
931}
932
933if (currdiv)
934{
935cpuFrequency = (busFrequency * ((currcoef * 2) + 1) / 2);
936}
937else
938{
939cpuFrequency = (busFrequency * currcoef);
940}
941
942DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");
943}
944break;
945}
946}
947// Mobile CPU
948if (rdmsr64(MSR_IA32_PLATFORM_ID) & (1<<28))
949{
950p->CPU.Features |= CPU_FEATURE_MOBILE;
951}
952}
953
954else if (p->CPU.Vendor==CPUID_VENDOR_AMD)
955{
956switch(p->CPU.Family)
957{
958case 0xF: /* K8 */
959{
960uint64_t fidvid = 0;
961uint64_t cpuMult;
962uint64_t fid;
963
964fidvid = rdmsr64(K8_FIDVID_STATUS);
965fid = bitfield(fidvid, 5, 0);
966
967cpuMult = (fid + 8) / 2;
968currcoef = cpuMult;
969
970cpuMultN2 = (fidvid & (uint64_t)bit(0));
971currdiv = cpuMultN2;
972/****** Addon END ******/
973}
974break;
975
976case 0x10: /*** AMD Family 10h ***/
977{
978uint64_t cofvid = 0;
979uint64_t cpuMult;
980uint64_t divisor = 0;
981uint64_t did;
982uint64_t fid;
983
984cofvid = rdmsr64(K10_COFVID_STATUS);
985did = bitfield(cofvid, 8, 6);
986fid = bitfield(cofvid, 5, 0);
987if (did == 0) divisor = 2;
988else if (did == 1) divisor = 4;
989else if (did == 2) divisor = 8;
990else if (did == 3) divisor = 16;
991else if (did == 4) divisor = 32;
992
993cpuMult = (fid + 16) / divisor;
994currcoef = cpuMult;
995
996cpuMultN2 = (cofvid & (uint64_t)bit(0));
997currdiv = cpuMultN2;
998
999/****** Addon END ******/
1000}
1001break;
1002
1003case 0x11: /*** AMD Family 11h ***/
1004{
1005uint64_t cofvid = 0;
1006uint64_t cpuMult;
1007uint64_t divisor = 0;
1008uint64_t did;
1009uint64_t fid;
1010
1011cofvid = rdmsr64(K10_COFVID_STATUS);
1012did = bitfield(cofvid, 8, 6);
1013fid = bitfield(cofvid, 5, 0);
1014if (did == 0) divisor = 2;
1015else if (did == 1) divisor = 4;
1016else if (did == 2) divisor = 8;
1017else if (did == 3) divisor = 16;
1018else if (did == 4) divisor = 32;
1019
1020cpuMult = (fid + 8) / divisor;
1021currcoef = cpuMult;
1022
1023cpuMultN2 = (cofvid & (uint64_t)bit(0));
1024currdiv = cpuMultN2;
1025
1026/****** Addon END ******/
1027}
1028 break;
1029
1030case 0x12: /*** AMD Family 12h ***/
1031{
1032// 8:4 CpuFid: current CPU core frequency ID
1033// 3:0 CpuDid: current CPU core divisor ID
1034uint64_t prfsts,CpuFid,CpuDid;
1035prfsts = rdmsr64(K10_COFVID_STATUS);
1036
1037CpuDid = bitfield(prfsts, 3, 0) ;
1038CpuFid = bitfield(prfsts, 8, 4) ;
1039uint64_t divisor;
1040switch (CpuDid)
1041{
1042case 0: divisor = 1; break;
1043case 1: divisor = (3/2); break;
1044case 2: divisor = 2; break;
1045case 3: divisor = 3; break;
1046case 4: divisor = 4; break;
1047case 5: divisor = 6; break;
1048case 6: divisor = 8; break;
1049case 7: divisor = 12; break;
1050case 8: divisor = 16; break;
1051default: divisor = 1; break;
1052}
1053currcoef = (CpuFid + 0x10) / divisor;
1054
1055cpuMultN2 = (prfsts & (uint64_t)bit(0));
1056currdiv = cpuMultN2;
1057
1058}
1059break;
1060
1061case 0x14: /* K14 */
1062
1063{
1064// 8:4: current CPU core divisor ID most significant digit
1065// 3:0: current CPU core divisor ID least significant digit
1066uint64_t prfsts;
1067prfsts = rdmsr64(K10_COFVID_STATUS);
1068
1069uint64_t CpuDidMSD,CpuDidLSD;
1070CpuDidMSD = bitfield(prfsts, 8, 4) ;
1071CpuDidLSD = bitfield(prfsts, 3, 0) ;
1072
1073uint64_t frequencyId = 0x10;
1074currcoef = (frequencyId + 0x10) /
1075(CpuDidMSD + (CpuDidLSD * 0.25) + 1);
1076currdiv = ((CpuDidMSD) + 1) << 2;
1077currdiv += bitfield(msr, 3, 0);
1078
1079cpuMultN2 = (prfsts & (uint64_t)bit(0));
1080currdiv = cpuMultN2;
1081}
1082
1083break;
1084
1085case 0x15: /*** AMD Family 15h ***/
1086case 0x06: /*** AMD Family 06h ***/
1087{
1088
1089uint64_t cofvid = 0;
1090uint64_t cpuMult;
1091uint64_t divisor = 0;
1092uint64_t did;
1093uint64_t fid;
1094
1095cofvid = rdmsr64(K10_COFVID_STATUS);
1096did = bitfield(cofvid, 8, 6);
1097fid = bitfield(cofvid, 5, 0);
1098if (did == 0) divisor = 2;
1099else if (did == 1) divisor = 4;
1100else if (did == 2) divisor = 8;
1101else if (did == 3) divisor = 16;
1102else if (did == 4) divisor = 32;
1103
1104cpuMult = (fid + 16) / divisor;
1105currcoef = cpuMult;
1106
1107cpuMultN2 = (cofvid & (uint64_t)bit(0));
1108currdiv = cpuMultN2;
1109}
1110break;
1111
1112case 0x16: /*** AMD Family 16h kabini ***/
1113{
1114uint64_t cofvid = 0;
1115uint64_t cpuMult;
1116uint64_t divisor = 0;
1117uint64_t did;
1118uint64_t fid;
1119
1120cofvid = rdmsr64(K10_COFVID_STATUS);
1121did = bitfield(cofvid, 8, 6);
1122fid = bitfield(cofvid, 5, 0);
1123if (did == 0) divisor = 1;
1124else if (did == 1) divisor = 2;
1125else if (did == 2) divisor = 4;
1126else if (did == 3) divisor = 8;
1127else if (did == 4) divisor = 16;
1128
1129cpuMult = (fid + 16) / divisor;
1130currcoef = cpuMult;
1131
1132cpuMultN2 = (cofvid & (uint64_t)bit(0));
1133currdiv = cpuMultN2;
1134/****** Addon END ******/
1135}
1136break;
1137
1138default:
1139{
1140typedef unsigned long long vlong;
1141uint64_t prfsts;
1142prfsts = rdmsr64(K10_COFVID_STATUS);
1143uint64_t r;
1144vlong hz;
1145r = (prfsts>>6) & 0x07;
1146hz = (((prfsts & 0x3f)+0x10)*100000000ll)/(1<<r);
1147
1148currcoef = hz / (200 * Mega);
1149}
1150}
1151
1152if (currcoef)
1153{
1154if (currdiv)
1155{
1156busFrequency = ((tscFreq * 2) / ((currcoef * 2) + 1));
1157busFCvtt2n = ((1 * Giga) << 32) / busFrequency;
1158tscFCvtt2n = busFCvtt2n * 2 / (1 + (2 * currcoef));
1159cpuFrequency = ((1 * Giga) << 32) / tscFCvtt2n;
1160
1161DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);
1162}
1163else
1164{
1165busFrequency = (tscFreq / currcoef);
1166busFCvtt2n = ((1 * Giga) << 32) / busFrequency;
1167tscFCvtt2n = busFCvtt2n / currcoef;
1168cpuFrequency = ((1 * Giga) << 32) / tscFCvtt2n;
1169DBG("%d\n", currcoef);
1170}
1171}
1172else if (!cpuFrequency)
1173{
1174cpuFrequency = tscFreq;
1175}
1176}
1177
1178#if 0
1179if (!busFrequency)
1180{
1181busFrequency = (DEFAULT_FSB * 1000);
1182DBG("\tCPU: busFrequency = 0! using the default value for FSB!\n");
1183cpuFrequency = tscFreq;
1184}
1185
1186DBG("\tcpu freq = 0x%016llxn", timeRDTSC() * 20);
1187
1188#endif
1189
1190outb(0x21U, pic0_mask); // restore PIC0 interrupts
1191
1192p->CPU.MaxCoef = maxcoef = currcoef;
1193p->CPU.MaxDiv = maxdiv = currdiv;
1194p->CPU.CurrCoef = currcoef;
1195p->CPU.CurrDiv = currdiv;
1196p->CPU.TSCFrequency = tscFreq;
1197p->CPU.FSBFrequency = busFrequency;
1198p->CPU.CPUFrequency = cpuFrequency;
1199
1200// keep formatted with spaces instead of tabs
1201
1202DBG("\tCPUID Raw Values:\n");
1203for (i = 0; i < CPUID_MAX; i++)
1204{
1205DBG("\t%02d: %08X-%08X-%08X-%08X\n", i, p->CPU.CPUID[i][eax], p->CPU.CPUID[i][ebx], p->CPU.CPUID[i][ecx], p->CPU.CPUID[i][edx]);
1206}
1207DBG("\n");
1208DBG("\tBrand String: %s\n",p->CPU.BrandString);// Processor name (BIOS)
1209DBG("\tVendor: 0x%X\n",p->CPU.Vendor);// Vendor ex: GenuineIntel
1210DBG("\tFamily: 0x%X\n",p->CPU.Family);// Family ex: 6 (06h)
1211DBG("\tExtFamily: 0x%X\n",p->CPU.ExtFamily);
1212DBG("\tSignature: 0x%08X\n",p->CPU.Signature);// CPUID signature
1213/*switch (p->CPU.Type) {
1214case PT_OEM:
1215DBG("\tProcessor type: Intel Original OEM Processor\n");
1216break;
1217case PT_OD:
1218DBG("\tProcessor type: Intel Over Drive Processor\n");
1219break;
1220case PT_DUAL:
1221DBG("\tProcessor type: Intel Dual Processor\n");
1222break;
1223case PT_RES:
1224DBG("\tProcessor type: Intel Reserved\n");
1225break;
1226default:
1227break;
1228}*/
1229DBG("\tModel: 0x%X\n",p->CPU.Model);// Model ex: 37 (025h)
1230DBG("\tExtModel: 0x%X\n",p->CPU.ExtModel);
1231DBG("\tStepping: 0x%X\n",p->CPU.Stepping);// Stepping ex: 5 (05h)
1232DBG("\tMaxCoef: %d\n",p->CPU.MaxCoef);
1233DBG("\tCurrCoef: %d\n",p->CPU.CurrCoef);
1234DBG("\tMaxDiv: %d\n",p->CPU.MaxDiv);
1235DBG("\tCurrDiv: %d\n",p->CPU.CurrDiv);
1236DBG("\tTSCFreq: %dMHz\n",p->CPU.TSCFrequency / 1000000);
1237DBG("\tFSBFreq: %dMHz\n",(p->CPU.FSBFrequency + 500000) / 1000000);
1238DBG("\tCPUFreq: %dMHz\n",p->CPU.CPUFrequency / 1000000);
1239DBG("\tCores: %d\n",p->CPU.NoCores);// Cores
1240DBG("\tLogical processor: %d\n",p->CPU.NoThreads);// Logical procesor
1241DBG("\tFeatures: 0x%08x\n",p->CPU.Features);
1242//DBG("\tMicrocode version: %d\n",p->CPU.MCodeVersion);// CPU microcode version
1243
1244verbose("\n");
1245#if DEBUG_CPU
1246pause();
1247#endif
1248}
1249

Archive Download this file

Revision: 2849