Chameleon

Chameleon Svn Source Tree

Root/branches/ErmaC/Enoch/i386/libsaio/cpu.c

1/*
2 * Copyright 2008 Islam Ahmed Zaid. All rights reserved. <azismed@gmail.com>
3 * AsereBLN: 2009: cleanup and bugfix
4 * Bronya: 2015 Improve AMD support, cleanup and bugfix
5 */
6
7#include "config.h"
8#include "libsaio.h"
9#include "platform.h"
10#include "cpu.h"
11#include "bootstruct.h"
12#include "boot.h"
13
14#if DEBUG_CPU
15#define DBG(x...)printf(x)
16#else
17#define DBG(x...)
18#endif
19
20#define UI_CPUFREQ_ROUNDING_FACTOR10000000
21
22clock_frequency_info_t gPEClockFrequencyInfo;
23
24static __unused uint64_t rdtsc32(void)
25{
26unsigned int lo,hi;
27__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
28return ((uint64_t)hi << 32) | lo;
29}
30
31/*
32 * timeRDTSC()
33 * This routine sets up PIT counter 2 to count down 1/20 of a second.
34 * It pauses until the value is latched in the counter
35 * and then reads the time stamp counter to return to the caller.
36 */
37static uint64_t timeRDTSC(void)
38{
39intattempts = 0;
40uint32_t latchTime;
41uint64_tsaveTime,intermediate;
42unsigned inttimerValue, lastValue;
43//boolean_tint_enabled;
44/*
45 * Table of correction factors to account for
46 * - timer counter quantization errors, and
47 * - undercounts 0..5
48 */
49#define SAMPLE_CLKS_EXACT(((double) CLKNUM) / 20.0)
50#define SAMPLE_CLKS_INT((int) CLKNUM / 20)
51#define SAMPLE_NSECS(2000000000LL)
52#define SAMPLE_MULTIPLIER(((double)SAMPLE_NSECS)*SAMPLE_CLKS_EXACT)
53#define ROUND64(x)((uint64_t)((x) + 0.5))
54uint64_tscale[6] = {
55ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-0)),
56ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-1)),
57ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-2)),
58ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-3)),
59ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-4)),
60ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-5))
61};
62
63//int_enabled = ml_set_interrupts_enabled(false);
64
65restart:
66if (attempts >= 3) // increase to up to 9 attempts.
67{
68// This will flash-reboot. TODO: Use tscPanic instead.
69//printf("Timestamp counter calibation failed with %d attempts\n", attempts);
70}
71attempts++;
72enable_PIT2();// turn on PIT2
73set_PIT2(0);// reset timer 2 to be zero
74latchTime = rdtsc32();// get the time stamp to time
75latchTime = get_PIT2(&timerValue) - latchTime; // time how long this takes
76set_PIT2(SAMPLE_CLKS_INT);// set up the timer for (almost) 1/20th a second
77saveTime = rdtsc32();// now time how long a 20th a second is...
78get_PIT2(&lastValue);
79get_PIT2(&lastValue);// read twice, first value may be unreliable
80do {
81intermediate = get_PIT2(&timerValue);
82if (timerValue > lastValue)
83{
84// Timer wrapped
85set_PIT2(0);
86disable_PIT2();
87goto restart;
88}
89lastValue = timerValue;
90} while (timerValue > 5);
91//printf("timerValue %d\n",timerValue);
92//printf("intermediate 0x%016llX\n",intermediate);
93//printf("saveTime 0x%016llX\n",saveTime);
94
95intermediate -= saveTime;// raw count for about 1/20 second
96intermediate *= scale[timerValue];// rescale measured time spent
97intermediate /= SAMPLE_NSECS;// so its exactly 1/20 a second
98intermediate += latchTime;// add on our save fudge
99
100set_PIT2(0);// reset timer 2 to be zero
101disable_PIT2();// turn off PIT 2
102
103//ml_set_interrupts_enabled(int_enabled);
104return intermediate;
105}
106
107/*
108 * DFE: Measures the TSC frequency in Hz (64-bit) using the ACPI PM timer
109 */
110static uint64_t __unused measure_tsc_frequency(void)
111{
112uint64_t tscStart;
113uint64_t tscEnd;
114uint64_t tscDelta = 0xffffffffffffffffULL;
115unsigned long pollCount;
116uint64_t retval = 0;
117int i;
118
119/* Time how many TSC ticks elapse in 30 msec using the 8254 PIT
120 * counter 2. We run this loop 3 times to make sure the cache
121 * is hot and we take the minimum delta from all of the runs.
122 * That is to say that we're biased towards measuring the minimum
123 * number of TSC ticks that occur while waiting for the timer to
124 * expire. That theoretically helps avoid inconsistencies when
125 * running under a VM if the TSC is not virtualized and the host
126 * steals time. The TSC is normally virtualized for VMware.
127 */
128for(i = 0; i < 10; ++i)
129{
130enable_PIT2();
131set_PIT2_mode0(CALIBRATE_LATCH);
132tscStart = rdtsc64();
133pollCount = poll_PIT2_gate();
134tscEnd = rdtsc64();
135/* The poll loop must have run at least a few times for accuracy */
136if (pollCount <= 1)
137{
138continue;
139}
140/* The TSC must increment at LEAST once every millisecond.
141 * We should have waited exactly 30 msec so the TSC delta should
142 * be >= 30. Anything less and the processor is way too slow.
143 */
144if ((tscEnd - tscStart) <= CALIBRATE_TIME_MSEC)
145{
146continue;
147}
148// tscDelta = MIN(tscDelta, (tscEnd - tscStart))
149if ( (tscEnd - tscStart) < tscDelta )
150{
151tscDelta = tscEnd - tscStart;
152}
153}
154/* tscDelta is now the least number of TSC ticks the processor made in
155 * a timespan of 0.03 s (e.g. 30 milliseconds)
156 * Linux thus divides by 30 which gives the answer in kiloHertz because
157 * 1 / ms = kHz. But we're xnu and most of the rest of the code uses
158 * Hz so we need to convert our milliseconds to seconds. Since we're
159 * dividing by the milliseconds, we simply multiply by 1000.
160 */
161
162/* Unlike linux, we're not limited to 32-bit, but we do need to take care
163 * that we're going to multiply by 1000 first so we do need at least some
164 * arithmetic headroom. For now, 32-bit should be enough.
165 * Also unlike Linux, our compiler can do 64-bit integer arithmetic.
166 */
167if (tscDelta > (1ULL<<32))
168{
169retval = 0;
170}
171else
172{
173retval = tscDelta * 1000 / 30;
174}
175disable_PIT2();
176return retval;
177}
178
179static uint64_trtc_set_cyc_per_sec(uint64_t cycles);
180#define RTC_FAST_DENOM0xFFFFFFFF
181
182inline static uint32_t
183create_mul_quant_GHZ(int shift, uint32_t quant)
184{
185return (uint32_t)((((uint64_t)NSEC_PER_SEC/20) << shift) / quant);
186}
187
188struct{
189mach_timespec_tcalend_offset;
190boolean_tcalend_is_set;
191
192int64_tcalend_adjtotal;
193int32_tcalend_adjdelta;
194
195uint32_tboottime;
196
197mach_timebase_info_data_ttimebase_const;
198
199decl_simple_lock_data(,lock)/* real-time clock device lock */
200} rtclock;
201
202uint32_trtc_quant_shift;/* clock to nanos right shift */
203uint32_trtc_quant_scale;/* clock to nanos multiplier */
204uint64_trtc_cyc_per_sec;/* processor cycles per sec */
205uint64_trtc_cycle_count;/* clocks in 1/20th second */
206
207static uint64_t rtc_set_cyc_per_sec(uint64_t cycles)
208{
209
210if (cycles > (NSEC_PER_SEC/20))
211{
212// we can use just a "fast" multiply to get nanos
213rtc_quant_shift = 32;
214rtc_quant_scale = create_mul_quant_GHZ(rtc_quant_shift, (uint32_t)cycles);
215rtclock.timebase_const.numer = rtc_quant_scale; // timeRDTSC is 1/20
216rtclock.timebase_const.denom = (uint32_t)RTC_FAST_DENOM;
217}
218else
219{
220rtc_quant_shift = 26;
221rtc_quant_scale = create_mul_quant_GHZ(rtc_quant_shift, (uint32_t)cycles);
222rtclock.timebase_const.numer = NSEC_PER_SEC/20; // timeRDTSC is 1/20
223rtclock.timebase_const.denom = (uint32_t)cycles;
224}
225rtc_cyc_per_sec = cycles*20;// multiply it by 20 and we are done..
226// BUT we also want to calculate...
227
228cycles = ((rtc_cyc_per_sec + (UI_CPUFREQ_ROUNDING_FACTOR/2))
229 / UI_CPUFREQ_ROUNDING_FACTOR)
230* UI_CPUFREQ_ROUNDING_FACTOR;
231
232/*
233 * Set current measured speed.
234 */
235if (cycles >= 0x100000000ULL)
236{
237gPEClockFrequencyInfo.cpu_clock_rate_hz = 0xFFFFFFFFUL;
238}
239else
240{
241gPEClockFrequencyInfo.cpu_clock_rate_hz = (unsigned long)cycles;
242}
243gPEClockFrequencyInfo.cpu_frequency_hz = cycles;
244
245//printf("[RTCLOCK_1] frequency %llu (%llu) %llu\n", cycles, rtc_cyc_per_sec,timeRDTSC() * 20);
246return(rtc_cyc_per_sec);
247}
248
249// Bronya C1E fix
250static void post_startup_cpu_fixups(void)
251{
252/*
253 * Some AMD processors support C1E state. Entering this state will
254 * cause the local APIC timer to stop, which we can't deal with at
255 * this time.
256 */
257
258uint64_t reg;
259verbose("\tLooking to disable C1E if is already enabled by the BIOS:\n");
260reg = rdmsr64(MSR_AMD_INT_PENDING_CMP_HALT);
261/* Disable C1E state if it is enabled by the BIOS */
262if ((reg >> AMD_ACTONCMPHALT_SHIFT) & AMD_ACTONCMPHALT_MASK)
263{
264reg &= ~(AMD_ACTONCMPHALT_MASK << AMD_ACTONCMPHALT_SHIFT);
265wrmsr64(MSR_AMD_INT_PENDING_CMP_HALT, reg);
266verbose("\tC1E disabled!\n");
267}
268}
269
270/*
271 * Large memcpy() into MMIO space can take longer than 1 clock tick (55ms).
272 * The timer interrupt must remain responsive when updating VRAM so
273 * as not to miss timer interrupts during countdown().
274 *
275 * If interrupts are enabled, use normal memcpy.
276 *
277 * If interrupts are disabled, breaks memcpy down
278 * into 128K chunks, times itself and makes a bios
279 * real-mode call every 25 msec in order to service
280 * pending interrupts.
281 *
282 * -- zenith432, May 22nd, 2016
283 */
284void *memcpy_interruptible(void *dst, const void *src, size_t len)
285{
286uint64_t tscFreq, lastTsc;
287uint32_t eflags, threshold;
288ptrdiff_t offset;
289const size_t chunk = 131072U;// 128K
290
291if (len <= chunk)
292{
293/*
294 * Short memcpy - use normal.
295 */
296return memcpy(dst, src, len);
297}
298
299__asm__ volatile("pushfl; popl %0" : "=r"(eflags));
300if (eflags & 0x200U)
301{
302/*
303 * Interrupts are enabled - use normal memcpy.
304 */
305return memcpy(dst, src, len);
306}
307
308tscFreq = Platform.CPU.TSCFrequency;
309if ((uint32_t) (tscFreq >> 32))
310{
311/*
312 * If TSC Frequency >= 2 ** 32, use a default time threshold.
313 */
314threshold = (~0U) / 40U;
315}
316else if (!(uint32_t) tscFreq)
317{
318/*
319 * If early on and TSC Frequency hasn't been estimated yet,
320 * use normal memcpy.
321 */
322return memcpy(dst, src, len);
323}
324else
325{
326threshold = ((uint32_t) tscFreq) / 40U;
327}
328
329/*
330 * Do the work
331 */
332offset = 0;
333lastTsc = rdtsc64();
334do
335{
336(void) memcpy((char*) dst + offset, (const char*) src + offset, chunk);
337offset += (ptrdiff_t) chunk;
338len -= chunk;
339if ((rdtsc64() - lastTsc) < threshold)
340{
341continue;
342}
343(void) readKeyboardStatus();// visit real-mode
344lastTsc = rdtsc64();
345}
346while (len > chunk);
347if (len)
348{
349(void) memcpy((char*) dst + offset, (const char*) src + offset, len);
350}
351return dst;
352}
353
354/*
355 * Calculates the FSB and CPU frequencies using specific MSRs for each CPU
356 * - multi. is read from a specific MSR. In the case of Intel, there is:
357 * a max multi. (used to calculate the FSB freq.),
358 * and a current multi. (used to calculate the CPU freq.)
359 * - busFrequency = tscFrequency / multi
360 * - cpuFrequency = busFrequency * multi
361 */
362
363/* Decimal powers: */
364#define kilo (1000ULL)
365#define Mega (kilo * kilo)
366#define Giga (kilo * Mega)
367#define Tera (kilo * Giga)
368#define Peta (kilo * Tera)
369
370#define quad(hi,lo)(((uint64_t)(hi)) << 32 | (lo))
371
372void get_cpuid(PlatformInfo_t *p)
373{
374
375charstr[128];
376uint32_treg[4];
377char*s= 0;
378
379
380do_cpuid(0x00000000, p->CPU.CPUID[CPUID_0]); // MaxFn, Vendor
381do_cpuid(0x00000001, p->CPU.CPUID[CPUID_1]); // Signature, stepping, features
382do_cpuid(0x00000002, p->CPU.CPUID[CPUID_2]); // TLB/Cache/Prefetch
383
384do_cpuid(0x00000003, p->CPU.CPUID[CPUID_3]); // S/N
385do_cpuid(0x80000000, p->CPU.CPUID[CPUID_80]); // Get the max extended cpuid
386
387if ((p->CPU.CPUID[CPUID_80][0] & 0x0000000f) >= 8)
388{
389do_cpuid(0x80000008, p->CPU.CPUID[CPUID_88]);
390do_cpuid(0x80000001, p->CPU.CPUID[CPUID_81]);
391}
392else if ((p->CPU.CPUID[CPUID_80][0] & 0x0000000f) >= 1)
393{
394do_cpuid(0x80000001, p->CPU.CPUID[CPUID_81]);
395}
396
397// ==============================================================
398
399/* get BrandString (if supported) */
400/* Copyright: from Apple's XNU cpuid.c */
401if (p->CPU.CPUID[CPUID_80][0] > 0x80000004)
402{
403bzero(str, 128);
404/*
405 * The BrandString 48 bytes (max), guaranteed to
406 * be NULL terminated.
407 */
408do_cpuid(0x80000002, reg); // Processor Brand String
409memcpy(&str[0], (char *)reg, 16);
410
411
412do_cpuid(0x80000003, reg); // Processor Brand String
413memcpy(&str[16], (char *)reg, 16);
414do_cpuid(0x80000004, reg); // Processor Brand String
415memcpy(&str[32], (char *)reg, 16);
416for (s = str; *s != '\0'; s++)
417{
418if (*s != ' ')
419{
420break;
421}
422}
423strlcpy(p->CPU.BrandString, s, 48);
424
425if (!strncmp(p->CPU.BrandString, CPU_STRING_UNKNOWN, MIN(sizeof(p->CPU.BrandString), (unsigned)strlen(CPU_STRING_UNKNOWN) + 1)))
426{
427/*
428 * This string means we have a firmware-programmable brand string,
429 * and the firmware couldn't figure out what sort of CPU we have.
430 */
431p->CPU.BrandString[0] = '\0';
432}
433p->CPU.BrandString[47] = '\0';
434//DBG("\tBrandstring = %s\n", p->CPU.BrandString);
435}
436
437// ==============================================================
438
439switch(p->CPU.BrandString[0])
440{
441case 'A':
442/* AMD Processors */
443// The cache information is only in ecx and edx so only save
444// those registers
445
446do_cpuid(5, p->CPU.CPUID[CPUID_5]); // Monitor/Mwait
447
448do_cpuid(0x80000005, p->CPU.CPUID[CPUID_85]); // TLB/Cache/Prefetch
449do_cpuid(0x80000006, p->CPU.CPUID[CPUID_86]); // TLB/Cache/Prefetch
450do_cpuid(0x80000008, p->CPU.CPUID[CPUID_88]);
451
452break;
453
454case 'G':
455/* Intel Processors */
456do_cpuid2(0x00000004, 0, p->CPU.CPUID[CPUID_4]); // Cache Index for Inte
457
458if (p->CPU.CPUID[CPUID_0][0] >= 0x5)// Monitor/Mwait
459{
460do_cpuid(5, p->CPU.CPUID[CPUID_5]);
461}
462
463if (p->CPU.CPUID[CPUID_0][0] >= 6)// Thermal/Power
464{
465do_cpuid(6, p->CPU.CPUID[CPUID_6]);
466}
467
468break;
469}
470}
471
472void scan_cpu(PlatformInfo_t *p)
473{
474verbose("[ CPU INFO ]\n");
475get_cpuid(p);
476
477uint64_tbusFCvtt2n;
478uint64_ttscFCvtt2n;
479uint64_ttscFreq= 0;
480uint64_tbusFrequency= 0;
481uint64_tcpuFrequency= 0;
482uint64_tmsr= 0;
483uint64_tflex_ratio= 0;
484uint64_tcpuid_features;
485
486uint32_tmax_ratio= 0;
487uint32_tmin_ratio= 0;
488uint32_treg[4];
489uint32_tcores_per_package= 0;
490uint32_tlogical_per_package= 1;
491uint32_tthreads_per_core= 1;
492
493uint8_tbus_ratio_max= 0;
494uint8_tbus_ratio_min= 0;
495uint8_tcurrdiv= 0;
496uint8_tcurrcoef= 0;
497uint8_tmaxdiv= 0;
498uint8_tmaxcoef= 0;
499uint8_tpic0_mask;
500uint8_tcpuMultN2= 0;
501
502const char*newratio;
503
504intlen= 0;
505intmyfsb= 0;
506inti= 0;
507
508
509/* http://www.flounder.com/cpuid_explorer2.htm
510 EAX (Intel):
511 31 28 27 20 19 16 1514 1312 11 8 7 4 3 0
512 +--------+----------------+--------+----+----+--------+--------+--------+
513 |########|Extended family |Extmodel|####|type|familyid| model |stepping|
514 +--------+----------------+--------+----+----+--------+--------+--------+
515
516 EAX (AMD):
517 31 28 27 20 19 16 1514 1312 11 8 7 4 3 0
518 +--------+----------------+--------+----+----+--------+--------+--------+
519 |########|Extended family |Extmodel|####|####|familyid| model |stepping|
520 +--------+----------------+--------+----+----+--------+--------+--------+
521*/
522///////////////////-- MaxFn,Vendor --////////////////////////
523p->CPU.Vendor= p->CPU.CPUID[CPUID_0][1];
524
525///////////////////-- Signature, stepping, features -- //////
526cpuid_features = quad(p->CPU.CPUID[CPUID_1][ecx], p->CPU.CPUID[CPUID_1][edx]);
527if (bit(28) & p->CPU.CPUID[CPUID_1][edx]) // HTT/Multicore
528{
529logical_per_package = bitfield(p->CPU.CPUID[CPUID_1][ebx], 23, 16);
530}
531else
532{
533logical_per_package = 1;
534}
535
536p->CPU.Signature= p->CPU.CPUID[CPUID_1][0];
537p->CPU.Stepping= (uint8_t)bitfield(p->CPU.CPUID[CPUID_1][0], 3, 0);// stepping = cpu_feat_eax & 0xF;
538p->CPU.Model= (uint8_t)bitfield(p->CPU.CPUID[CPUID_1][0], 7, 4);// model = (cpu_feat_eax >> 4) & 0xF;
539p->CPU.Family= (uint8_t)bitfield(p->CPU.CPUID[CPUID_1][0], 11, 8);// family = (cpu_feat_eax >> 8) & 0xF;
540//p->CPU.Type= (uint8_t)bitfield(p->CPU.CPUID[CPUID_1][0], 13, 12);// type = (cpu_feat_eax >> 12) & 0x3;
541p->CPU.ExtModel= (uint8_t)bitfield(p->CPU.CPUID[CPUID_1][0], 19, 16);// ext_model = (cpu_feat_eax >> 16) & 0xF;
542p->CPU.ExtFamily= (uint8_t)bitfield(p->CPU.CPUID[CPUID_1][0], 27, 20);// ext_family = (cpu_feat_eax >> 20) & 0xFF;
543
544if (p->CPU.Family == 0x0f)
545{
546p->CPU.Family += p->CPU.ExtFamily;
547}
548
549if (p->CPU.Family == 0x0f || p->CPU.Family == 0x06)
550{
551p->CPU.Model += (p->CPU.ExtModel << 4);
552}
553
554switch (p->CPU.Vendor)
555{
556case CPUID_VENDOR_INTEL:
557{
558/* Based on Apple's XNU cpuid.c - Deterministic cache parameters */
559if ((p->CPU.CPUID[CPUID_0][eax] > 3) && (p->CPU.CPUID[CPUID_0][eax] < 0x80000000))
560{
561for (i = 0; i < 0xFF; i++) // safe loop
562{
563do_cpuid2(0x00000004, i, reg); // AX=4: Fn, CX=i: cache index
564if (bitfield(reg[eax], 4, 0) == 0)
565{
566break;
567}
568cores_per_package = bitfield(reg[eax], 31, 26) + 1;
569}
570}
571
572if (i > 0)
573{
574cores_per_package = bitfield(p->CPU.CPUID[CPUID_4][eax], 31, 26) + 1; // i = cache index
575threads_per_core = bitfield(p->CPU.CPUID[CPUID_4][eax], 25, 14) + 1;
576}
577
578if (cores_per_package == 0)
579{
580cores_per_package = 1;
581}
582
583switch (p->CPU.Model)
584{
585case CPUID_MODEL_NEHALEM: // Intel Core i7 LGA1366 (45nm)
586case CPUID_MODEL_FIELDS: // Intel Core i5, i7 LGA1156 (45nm)
587case CPUID_MODEL_CLARKDALE: // Intel Core i3, i5, i7 LGA1156 (32nm)
588case CPUID_MODEL_NEHALEM_EX:
589case CPUID_MODEL_JAKETOWN:
590case CPUID_MODEL_SANDYBRIDGE:
591case CPUID_MODEL_IVYBRIDGE:
592case CPUID_MODEL_IVYBRIDGE_XEON:
593case CPUID_MODEL_HASWELL_U5:
594case CPUID_MODEL_HASWELL:
595case CPUID_MODEL_HASWELL_SVR:
596case CPUID_MODEL_HASWELL_ULT:
597case CPUID_MODEL_HASWELL_ULX:
598case CPUID_MODEL_BROADWELL_HQ:
599case CPUID_MODEL_BRASWELL:
600case CPUID_MODEL_AVOTON:
601case CPUID_MODEL_SKYLAKE:
602case CPUID_MODEL_BRODWELL_SVR:
603case CPUID_MODEL_BRODWELL_MSVR:
604case CPUID_MODEL_KNIGHT:
605case CPUID_MODEL_ANNIDALE:
606case CPUID_MODEL_GOLDMONT:
607case CPUID_MODEL_VALLEYVIEW:
608case CPUID_MODEL_SKYLAKE_S:
609case CPUID_MODEL_SKYLAKE_AVX:
610case CPUID_MODEL_CANNONLAKE:
611msr = rdmsr64(MSR_CORE_THREAD_COUNT); // 0x35
612p->CPU.NoCores= (uint32_t)bitfield((uint32_t)msr, 31, 16);
613p->CPU.NoThreads= (uint32_t)bitfield((uint32_t)msr, 15, 0);
614break;
615
616case CPUID_MODEL_DALES:
617case CPUID_MODEL_WESTMERE: // Intel Core i7 LGA1366 (32nm) 6 Core
618case CPUID_MODEL_WESTMERE_EX:
619msr = rdmsr64(MSR_CORE_THREAD_COUNT);
620p->CPU.NoCores= (uint32_t)bitfield((uint32_t)msr, 19, 16);
621p->CPU.NoThreads= (uint32_t)bitfield((uint32_t)msr, 15, 0);
622break;
623case CPUID_MODEL_ATOM_3700:
624p->CPU.NoCores= 4;
625p->CPU.NoThreads= 4;
626break;
627case CPUID_MODEL_ATOM:
628p->CPU.NoCores= 2;
629p->CPU.NoThreads= 2;
630break;
631default:
632p->CPU.NoCores= 0;
633break;
634}
635
636// workaround for Xeon Harpertown and Yorkfield
637if ((p->CPU.Model == CPUID_MODEL_PENRYN) &&
638(p->CPU.NoCores== 0))
639{
640if ((strstr(p->CPU.BrandString, "X54")) ||
641(strstr(p->CPU.BrandString, "E54")) ||
642(strstr(p->CPU.BrandString, "W35")) ||
643(strstr(p->CPU.BrandString, "X34")) ||
644(strstr(p->CPU.BrandString, "X33")) ||
645(strstr(p->CPU.BrandString, "L33")) ||
646(strstr(p->CPU.BrandString, "X32")) ||
647(strstr(p->CPU.BrandString, "L3426")) ||
648(strstr(p->CPU.BrandString, "L54")))
649{
650p->CPU.NoCores= 4;
651p->CPU.NoThreads= 4;
652} else if (strstr(p->CPU.BrandString, "W36")) {
653p->CPU.NoCores= 6;
654p->CPU.NoThreads= 6;
655} else { //other Penryn and Wolfdale
656p->CPU.NoCores= 0;
657p->CPU.NoThreads= 0;
658}
659}
660
661if (p->CPU.NoCores == 0)
662{
663p->CPU.NoCores= cores_per_package;
664p->CPU.NoThreads= logical_per_package;
665}
666
667// MSR is *NOT* available on the Intel Atom CPU
668// workaround for N270. I don't know why it detected wrong
669if ((p->CPU.Model == CPUID_MODEL_ATOM) && (strstr(p->CPU.BrandString, "270")))
670{
671p->CPU.NoCores= 1;
672p->CPU.NoThreads= 2;
673}
674
675// workaround for Quad
676if ( strstr(p->CPU.BrandString, "Quad") )
677{
678p->CPU.NoCores= 4;
679p->CPU.NoThreads= 4;
680}
681}
682
683break;
684
685case CPUID_VENDOR_AMD:
686{
687post_startup_cpu_fixups();
688cores_per_package = bitfield(p->CPU.CPUID[CPUID_88][ecx], 7, 0) + 1;
689threads_per_core = cores_per_package;
690
691if (cores_per_package == 0)
692{
693cores_per_package = 1;
694}
695
696p->CPU.NoCores= cores_per_package;
697p->CPU.NoThreads= logical_per_package;
698
699if (p->CPU.NoCores == 0)
700{
701p->CPU.NoCores = 1;
702p->CPU.NoThreads= 1;
703}
704}
705break;
706
707default :
708stop("Unsupported CPU detected! System halted.");
709}
710
711/* setup features */
712if ((bit(23) & p->CPU.CPUID[CPUID_1][3]) != 0)
713{
714p->CPU.Features |= CPU_FEATURE_MMX;
715}
716
717if ((bit(25) & p->CPU.CPUID[CPUID_1][3]) != 0)
718{
719p->CPU.Features |= CPU_FEATURE_SSE;
720}
721
722if ((bit(26) & p->CPU.CPUID[CPUID_1][3]) != 0)
723{
724p->CPU.Features |= CPU_FEATURE_SSE2;
725}
726
727if ((bit(0) & p->CPU.CPUID[CPUID_1][2]) != 0)
728{
729p->CPU.Features |= CPU_FEATURE_SSE3;
730}
731
732if ((bit(19) & p->CPU.CPUID[CPUID_1][2]) != 0)
733{
734p->CPU.Features |= CPU_FEATURE_SSE41;
735}
736
737if ((bit(20) & p->CPU.CPUID[CPUID_1][2]) != 0)
738{
739p->CPU.Features |= CPU_FEATURE_SSE42;
740}
741
742if ((bit(29) & p->CPU.CPUID[CPUID_81][3]) != 0)
743{
744p->CPU.Features |= CPU_FEATURE_EM64T;
745}
746
747if ((bit(5) & p->CPU.CPUID[CPUID_1][3]) != 0)
748{
749p->CPU.Features |= CPU_FEATURE_MSR;
750}
751
752if ((p->CPU.NoThreads > p->CPU.NoCores))
753{
754p->CPU.Features |= CPU_FEATURE_HTT;
755}
756
757pic0_mask = inb(0x21U);
758outb(0x21U, 0xFFU); // mask PIC0 interrupts for duration of timing tests
759
760uint64_t cycles;
761cycles = timeRDTSC();
762tscFreq = rtc_set_cyc_per_sec(cycles);
763DBG("cpu freq classic = 0x%016llx\n", tscFreq);
764// if usual method failed
765if ( tscFreq < 1000 )//TEST
766{
767tscFreq = measure_tsc_frequency();//timeRDTSC() * 20;//measure_tsc_frequency();
768// DBG("cpu freq timeRDTSC = 0x%016llx\n", tscFrequency);
769}
770
771if (p->CPU.Vendor==CPUID_VENDOR_INTEL && ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0c) || (p->CPU.Family == 0x0f && p->CPU.Model >= 0x03)))
772{
773int intelCPU = p->CPU.Model;
774if (p->CPU.Family == 0x06)
775{
776/* Nehalem CPU model */
777switch (p->CPU.Model)
778{
779case CPUID_MODEL_NEHALEM:
780case CPUID_MODEL_FIELDS:
781case CPUID_MODEL_CLARKDALE:
782case CPUID_MODEL_DALES:
783case CPUID_MODEL_WESTMERE:
784case CPUID_MODEL_NEHALEM_EX:
785case CPUID_MODEL_WESTMERE_EX:
786/* --------------------------------------------------------- */
787case CPUID_MODEL_SANDYBRIDGE:
788case CPUID_MODEL_JAKETOWN:
789case CPUID_MODEL_IVYBRIDGE_XEON:
790case CPUID_MODEL_IVYBRIDGE:
791case CPUID_MODEL_ATOM_3700:
792case CPUID_MODEL_HASWELL:
793case CPUID_MODEL_HASWELL_U5:
794case CPUID_MODEL_HASWELL_SVR:
795
796case CPUID_MODEL_HASWELL_ULT:
797case CPUID_MODEL_HASWELL_ULX:
798case CPUID_MODEL_BROADWELL_HQ:
799case CPUID_MODEL_SKYLAKE_S:
800/* --------------------------------------------------------- */
801msr = rdmsr64(MSR_PLATFORM_INFO);
802DBG("msr(%d): platform_info %08x\n", __LINE__, bitfield(msr, 31, 0));
803bus_ratio_max = bitfield(msr, 15, 8);
804bus_ratio_min = bitfield(msr, 47, 40); //valv: not sure about this one (Remarq.1)
805msr = rdmsr64(MSR_FLEX_RATIO);
806DBG("msr(%d): flex_ratio %08x\n", __LINE__, bitfield(msr, 31, 0));
807if (bitfield(msr, 16, 16))
808{
809flex_ratio = bitfield(msr, 15, 8);
810// bcc9: at least on the gigabyte h67ma-ud2h,
811// where the cpu multipler can't be changed to
812// allow overclocking, the flex_ratio msr has unexpected (to OSX)
813// contents.These contents cause mach_kernel to
814// fail to compute the bus ratio correctly, instead
815// causing the system to crash since tscGranularity
816// is inadvertently set to 0.
817
818if (flex_ratio == 0)
819{
820// Clear bit 16 (evidently the presence bit)
821wrmsr64(MSR_FLEX_RATIO, (msr & 0xFFFFFFFFFFFEFFFFULL));
822msr = rdmsr64(MSR_FLEX_RATIO);
823DBG("CPU: Unusable flex ratio detected. Patched MSR now %08x\n", bitfield(msr, 31, 0));
824}
825else
826{
827if (bus_ratio_max > flex_ratio)
828{
829bus_ratio_max = flex_ratio;
830}
831}
832}
833
834if (bus_ratio_max)
835{
836busFrequency = (tscFreq / bus_ratio_max);
837}
838
839//valv: Turbo Ratio Limit
840if ((intelCPU != 0x2e) && (intelCPU != 0x2f))
841{
842msr = rdmsr64(MSR_TURBO_RATIO_LIMIT);
843
844cpuFrequency = bus_ratio_max * busFrequency;
845max_ratio = bus_ratio_max * 10;
846}
847else
848{
849cpuFrequency = tscFreq;
850}
851
852if ((getValueForKey(kbusratio, &newratio, &len, &bootInfo->chameleonConfig)) && (len <= 4))
853{
854max_ratio = atoi(newratio);
855max_ratio = (max_ratio * 10);
856if (len >= 3)
857{
858max_ratio = (max_ratio + 5);
859}
860
861verbose("\tBus-Ratio: min=%d, max=%s\n", bus_ratio_min, newratio);
862
863// extreme overclockers may love 320 ;)
864if ((max_ratio >= min_ratio) && (max_ratio <= 320))
865{
866cpuFrequency = (busFrequency * max_ratio) / 10;
867if (len >= 3)
868{
869maxdiv = 1;
870}
871else
872{
873maxdiv = 0;
874}
875}
876else
877{
878max_ratio = (bus_ratio_max * 10);
879}
880}
881//valv: to be uncommented if Remarq.1 didn't stick
882//if (bus_ratio_max > 0) bus_ratio = flex_ratio;
883p->CPU.MaxRatio = max_ratio;
884p->CPU.MinRatio = min_ratio;
885
886myfsb = busFrequency / 1000000;
887verbose("\tSticking with [BCLK: %dMhz, Bus-Ratio: %d]\n", myfsb, max_ratio/10); // Bungo: fixed wrong Bus-Ratio readout
888currcoef = bus_ratio_max;
889
890break;
891
892default:
893msr = rdmsr64(MSR_IA32_PERF_STATUS);
894DBG("msr(%d): ia32_perf_stat 0x%08x\n", __LINE__, bitfield(msr, 31, 0));
895currcoef = bitfield(msr, 12, 8); // Bungo: reverted to 2263 state because of wrong old CPUs freq. calculating
896// Non-integer bus ratio for the max-multi
897maxdiv = bitfield(msr, 46, 46);
898// Non-integer bus ratio for the current-multi (undocumented)
899currdiv = bitfield(msr, 14, 14);
900
901// This will always be model >= 3
902if ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0e) || (p->CPU.Family == 0x0f))
903{
904/* On these models, maxcoef defines TSC freq */
905maxcoef = bitfield(msr, 44, 40);
906}
907else
908{
909// On lower models, currcoef defines TSC freq
910// XXX
911maxcoef = currcoef;
912}
913
914if (!currcoef)
915{
916currcoef = maxcoef;
917}
918
919if (maxcoef)
920{
921if (maxdiv)
922{
923busFrequency = ((tscFreq * 2) / ((maxcoef * 2) + 1));
924}
925else
926{
927busFrequency = (tscFreq / maxcoef);
928}
929
930if (currdiv)
931{
932cpuFrequency = (busFrequency * ((currcoef * 2) + 1) / 2);
933}
934else
935{
936cpuFrequency = (busFrequency * currcoef);
937}
938
939DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");
940}
941break;
942}
943}
944// Mobile CPU
945if (rdmsr64(MSR_IA32_PLATFORM_ID) & (1<<28))
946{
947p->CPU.Features |= CPU_FEATURE_MOBILE;
948}
949}
950
951else if (p->CPU.Vendor==CPUID_VENDOR_AMD)
952{
953switch(p->CPU.Family)
954{
955case 0xF: /* K8 */
956{
957uint64_t fidvid = 0;
958uint64_t cpuMult;
959uint64_t fid;
960
961fidvid = rdmsr64(K8_FIDVID_STATUS);
962fid = bitfield(fidvid, 5, 0);
963
964cpuMult = (fid + 8) / 2;
965currcoef = cpuMult;
966
967cpuMultN2 = (fidvid & (uint64_t)bit(0));
968currdiv = cpuMultN2;
969/****** Addon END ******/
970}
971break;
972
973case 0x10: /*** AMD Family 10h ***/
974{
975uint64_t cofvid = 0;
976uint64_t cpuMult;
977uint64_t divisor = 0;
978uint64_t did;
979uint64_t fid;
980
981cofvid = rdmsr64(K10_COFVID_STATUS);
982did = bitfield(cofvid, 8, 6);
983fid = bitfield(cofvid, 5, 0);
984if (did == 0) divisor = 2;
985else if (did == 1) divisor = 4;
986else if (did == 2) divisor = 8;
987else if (did == 3) divisor = 16;
988else if (did == 4) divisor = 32;
989
990cpuMult = (fid + 16) / divisor;
991currcoef = cpuMult;
992
993cpuMultN2 = (cofvid & (uint64_t)bit(0));
994currdiv = cpuMultN2;
995
996/****** Addon END ******/
997}
998break;
999
1000case 0x11: /*** AMD Family 11h ***/
1001{
1002uint64_t cofvid = 0;
1003uint64_t cpuMult;
1004uint64_t divisor = 0;
1005uint64_t did;
1006uint64_t fid;
1007
1008cofvid = rdmsr64(K10_COFVID_STATUS);
1009did = bitfield(cofvid, 8, 6);
1010fid = bitfield(cofvid, 5, 0);
1011if (did == 0) divisor = 2;
1012else if (did == 1) divisor = 4;
1013else if (did == 2) divisor = 8;
1014else if (did == 3) divisor = 16;
1015else if (did == 4) divisor = 32;
1016
1017cpuMult = (fid + 8) / divisor;
1018currcoef = cpuMult;
1019
1020cpuMultN2 = (cofvid & (uint64_t)bit(0));
1021currdiv = cpuMultN2;
1022
1023/****** Addon END ******/
1024}
1025 break;
1026
1027case 0x12: /*** AMD Family 12h ***/
1028{
1029// 8:4 CpuFid: current CPU core frequency ID
1030// 3:0 CpuDid: current CPU core divisor ID
1031uint64_t prfsts,CpuFid,CpuDid;
1032prfsts = rdmsr64(K10_COFVID_STATUS);
1033
1034CpuDid = bitfield(prfsts, 3, 0) ;
1035CpuFid = bitfield(prfsts, 8, 4) ;
1036uint64_t divisor;
1037switch (CpuDid)
1038{
1039case 0: divisor = 1; break;
1040case 1: divisor = (3/2); break;
1041case 2: divisor = 2; break;
1042case 3: divisor = 3; break;
1043case 4: divisor = 4; break;
1044case 5: divisor = 6; break;
1045case 6: divisor = 8; break;
1046case 7: divisor = 12; break;
1047case 8: divisor = 16; break;
1048default: divisor = 1; break;
1049}
1050currcoef = (CpuFid + 0x10) / divisor;
1051
1052cpuMultN2 = (prfsts & (uint64_t)bit(0));
1053currdiv = cpuMultN2;
1054
1055}
1056break;
1057
1058case 0x14: /* K14 */
1059
1060{
1061// 8:4: current CPU core divisor ID most significant digit
1062// 3:0: current CPU core divisor ID least significant digit
1063uint64_t prfsts;
1064prfsts = rdmsr64(K10_COFVID_STATUS);
1065
1066uint64_t CpuDidMSD,CpuDidLSD;
1067CpuDidMSD = bitfield(prfsts, 8, 4) ;
1068CpuDidLSD = bitfield(prfsts, 3, 0) ;
1069
1070uint64_t frequencyId = 0x10;
1071currcoef = (frequencyId + 0x10) /
1072(CpuDidMSD + (CpuDidLSD * 0.25) + 1);
1073currdiv = ((CpuDidMSD) + 1) << 2;
1074currdiv += bitfield(msr, 3, 0);
1075
1076cpuMultN2 = (prfsts & (uint64_t)bit(0));
1077currdiv = cpuMultN2;
1078}
1079
1080break;
1081
1082case 0x15: /*** AMD Family 15h ***/
1083case 0x06: /*** AMD Family 06h ***/
1084{
1085
1086uint64_t cofvid = 0;
1087uint64_t cpuMult;
1088uint64_t divisor = 0;
1089uint64_t did;
1090uint64_t fid;
1091
1092cofvid = rdmsr64(K10_COFVID_STATUS);
1093did = bitfield(cofvid, 8, 6);
1094fid = bitfield(cofvid, 5, 0);
1095if (did == 0) divisor = 2;
1096else if (did == 1) divisor = 4;
1097else if (did == 2) divisor = 8;
1098else if (did == 3) divisor = 16;
1099else if (did == 4) divisor = 32;
1100
1101cpuMult = (fid + 16) / divisor;
1102currcoef = cpuMult;
1103
1104cpuMultN2 = (cofvid & (uint64_t)bit(0));
1105currdiv = cpuMultN2;
1106}
1107break;
1108
1109case 0x16: /*** AMD Family 16h kabini ***/
1110{
1111uint64_t cofvid = 0;
1112uint64_t cpuMult;
1113uint64_t divisor = 0;
1114uint64_t did;
1115uint64_t fid;
1116
1117cofvid = rdmsr64(K10_COFVID_STATUS);
1118did = bitfield(cofvid, 8, 6);
1119fid = bitfield(cofvid, 5, 0);
1120if (did == 0) divisor = 1;
1121else if (did == 1) divisor = 2;
1122else if (did == 2) divisor = 4;
1123else if (did == 3) divisor = 8;
1124else if (did == 4) divisor = 16;
1125
1126cpuMult = (fid + 16) / divisor;
1127currcoef = cpuMult;
1128
1129cpuMultN2 = (cofvid & (uint64_t)bit(0));
1130currdiv = cpuMultN2;
1131/****** Addon END ******/
1132}
1133break;
1134
1135default:
1136{
1137typedef unsigned long long vlong;
1138uint64_t prfsts;
1139prfsts = rdmsr64(K10_COFVID_STATUS);
1140uint64_t r;
1141vlong hz;
1142r = (prfsts>>6) & 0x07;
1143hz = (((prfsts & 0x3f)+0x10)*100000000ll)/(1<<r);
1144
1145currcoef = hz / (200 * Mega);
1146}
1147}
1148
1149if (currcoef)
1150{
1151if (currdiv)
1152{
1153busFrequency = ((tscFreq * 2) / ((currcoef * 2) + 1));
1154busFCvtt2n = ((1 * Giga) << 32) / busFrequency;
1155tscFCvtt2n = busFCvtt2n * 2 / (1 + (2 * currcoef));
1156cpuFrequency = ((1 * Giga) << 32) / tscFCvtt2n;
1157
1158DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);
1159}
1160else
1161{
1162busFrequency = (tscFreq / currcoef);
1163busFCvtt2n = ((1 * Giga) << 32) / busFrequency;
1164tscFCvtt2n = busFCvtt2n / currcoef;
1165cpuFrequency = ((1 * Giga) << 32) / tscFCvtt2n;
1166DBG("%d\n", currcoef);
1167}
1168}
1169else if (!cpuFrequency)
1170{
1171cpuFrequency = tscFreq;
1172}
1173}
1174
1175#if 0
1176if (!busFrequency)
1177{
1178busFrequency = (DEFAULT_FSB * 1000);
1179DBG("\tCPU: busFrequency = 0! using the default value for FSB!\n");
1180cpuFrequency = tscFreq;
1181}
1182
1183DBG("\tcpu freq = 0x%016llxn", timeRDTSC() * 20);
1184
1185#endif
1186
1187outb(0x21U, pic0_mask); // restore PIC0 interrupts
1188
1189p->CPU.MaxCoef = maxcoef = currcoef;
1190p->CPU.MaxDiv = maxdiv = currdiv;
1191p->CPU.CurrCoef = currcoef;
1192p->CPU.CurrDiv = currdiv;
1193p->CPU.TSCFrequency = tscFreq;
1194p->CPU.FSBFrequency = busFrequency;
1195p->CPU.CPUFrequency = cpuFrequency;
1196
1197// keep formatted with spaces instead of tabs
1198
1199DBG("\tCPUID Raw Values:\n");
1200for (i = 0; i < CPUID_MAX; i++)
1201{
1202DBG("\t%02d: %08X-%08X-%08X-%08X\n", i, p->CPU.CPUID[i][eax], p->CPU.CPUID[i][ebx], p->CPU.CPUID[i][ecx], p->CPU.CPUID[i][edx]);
1203}
1204DBG("\n");
1205DBG("\tBrand String: %s\n",p->CPU.BrandString);// Processor name (BIOS)
1206DBG("\tVendor: 0x%X\n",p->CPU.Vendor);// Vendor ex: GenuineIntel
1207DBG("\tFamily: 0x%X\n",p->CPU.Family);// Family ex: 6 (06h)
1208DBG("\tExtFamily: 0x%X\n",p->CPU.ExtFamily);
1209DBG("\tSignature: 0x%08X\n",p->CPU.Signature);// CPUID signature
1210/*switch (p->CPU.Type) {
1211case PT_OEM:
1212DBG("\tProcessor type: Intel Original OEM Processor\n");
1213break;
1214case PT_OD:
1215DBG("\tProcessor type: Intel Over Drive Processor\n");
1216break;
1217case PT_DUAL:
1218DBG("\tProcessor type: Intel Dual Processor\n");
1219break;
1220case PT_RES:
1221DBG("\tProcessor type: Intel Reserved\n");
1222break;
1223default:
1224break;
1225}*/
1226DBG("\tModel: 0x%X\n",p->CPU.Model);// Model ex: 37 (025h)
1227DBG("\tExtModel: 0x%X\n",p->CPU.ExtModel);
1228DBG("\tStepping: 0x%X\n",p->CPU.Stepping);// Stepping ex: 5 (05h)
1229DBG("\tMaxCoef: %d\n",p->CPU.MaxCoef);
1230DBG("\tCurrCoef: %d\n",p->CPU.CurrCoef);
1231DBG("\tMaxDiv: %d\n",p->CPU.MaxDiv);
1232DBG("\tCurrDiv: %d\n",p->CPU.CurrDiv);
1233DBG("\tTSCFreq: %dMHz\n",p->CPU.TSCFrequency / 1000000);
1234DBG("\tFSBFreq: %dMHz\n",(p->CPU.FSBFrequency + 500000) / 1000000);
1235DBG("\tCPUFreq: %dMHz\n",p->CPU.CPUFrequency / 1000000);
1236DBG("\tCores: %d\n",p->CPU.NoCores);// Cores
1237DBG("\tLogical processor: %d\n",p->CPU.NoThreads);// Logical procesor
1238DBG("\tFeatures: 0x%08x\n",p->CPU.Features);
1239//DBG("\tMicrocode version: %d\n",p->CPU.MCodeVersion);// CPU microcode version
1240
1241verbose("\n");
1242#if DEBUG_CPU
1243pause();
1244#endif
1245}
1246

Archive Download this file

Revision: 2864