Chameleon

Chameleon Svn Source Tree

Root/branches/cparm/i386/libsaio/cpu.c

1/*
2 * Copyright 2008 Islam Ahmed Zaid. All rights reserved. <azismed@gmail.com>
3 * AsereBLN: 2009: cleanup and bugfix
4 */
5
6#include "libsaio.h"
7#include "platform.h"
8#include "cpu.h"
9
10#ifndef DEBUG_CPU
11#define DEBUG_CPU 0
12#endif
13
14#if DEBUG_CPU
15#define DBG(x...)printf(x)
16#else
17#define DBG(x...)msglog(x)
18#endif
19
20/*
21 * DFE: Measures the TSC frequency in Hz (64-bit) using the ACPI PM timer
22 */
23static uint64_t measure_tsc_frequency(void)
24{
25 uint64_t tscStart;
26 uint64_t tscEnd;
27 uint64_t tscDelta = 0xffffffffffffffffULL;
28 unsigned long pollCount;
29 uint64_t retval = 0;
30 int i;
31
32 /* Time how many TSC ticks elapse in 30 msec using the 8254 PIT
33 * counter 2. We run this loop 3 times to make sure the cache
34 * is hot and we take the minimum delta from all of the runs.
35 * That is to say that we're biased towards measuring the minimum
36 * number of TSC ticks that occur while waiting for the timer to
37 * expire. That theoretically helps avoid inconsistencies when
38 * running under a VM if the TSC is not virtualized and the host
39 * steals time. The TSC is normally virtualized for VMware.
40 */
41 for(i = 0; i < 10; ++i)
42 {
43 enable_PIT2();
44 set_PIT2_mode0(CALIBRATE_LATCH);
45 tscStart = rdtsc64();
46 pollCount = poll_PIT2_gate();
47 tscEnd = rdtsc64();
48 /* The poll loop must have run at least a few times for accuracy */
49 if(pollCount <= 1)
50 continue;
51 /* The TSC must increment at LEAST once every millisecond. We
52 * should have waited exactly 30 msec so the TSC delta should
53 * be >= 30. Anything less and the processor is way too slow.
54 */
55 if((tscEnd - tscStart) <= CALIBRATE_TIME_MSEC)
56 continue;
57 // tscDelta = min(tscDelta, (tscEnd - tscStart))
58 if( (tscEnd - tscStart) < tscDelta )
59 tscDelta = tscEnd - tscStart;
60 }
61 /* tscDelta is now the least number of TSC ticks the processor made in
62 * a timespan of 0.03 s (e.g. 30 milliseconds)
63 * Linux thus divides by 30 which gives the answer in kiloHertz because
64 * 1 / ms = kHz. But we're xnu and most of the rest of the code uses
65 * Hz so we need to convert our milliseconds to seconds. Since we're
66 * dividing by the milliseconds, we simply multiply by 1000.
67 */
68
69 /* Unlike linux, we're not limited to 32-bit, but we do need to take care
70 * that we're going to multiply by 1000 first so we do need at least some
71 * arithmetic headroom. For now, 32-bit should be enough.
72 * Also unlike Linux, our compiler can do 64-bit integer arithmetic.
73 */
74 if(tscDelta > (1ULL<<32))
75 retval = 0;
76 else
77 {
78 retval = tscDelta * 1000 / 30;
79 }
80 disable_PIT2();
81 return retval;
82}
83
84/*
85 * Calculates the FSB and CPU frequencies using specific MSRs for each CPU
86 * - multi. is read from a specific MSR. In the case of Intel, there is:
87 * a max multi. (used to calculate the FSB freq.),
88 * and a current multi. (used to calculate the CPU freq.)
89 * - fsbFrequency = tscFrequency / multi
90 * - cpuFrequency = fsbFrequency * multi
91 */
92
93void scan_cpu(PlatformInfo_t *p)
94{
95uint64_ttscFrequency, fsbFrequency, cpuFrequency;
96uint64_tmsr;
97uint8_tmaxcoef, maxdiv, currcoef, currdiv;
98 uint32_treg[4];
99 uint32_tCPUID[CPUID_MAX][4];// CPUID 0..4, 80..81 Raw Values
100 uint32_t cores_per_package;
101 uint32_t logical_per_package;
102maxcoef = maxdiv = currcoef = currdiv = 0;
103
104do_cpuid(0, CPUID[0]);
105 p->CPU.Vendor= CPUID[CPUID_0][1];
106
107 do_cpuid2(0x00000004, 0, CPUID[CPUID_4]);
108 cores_per_package= bitfield(CPUID[CPUID_4][0], 31, 26) + 1;
109
110 /* get extended cpuid results */
111do_cpuid(0x80000000, reg);
112uint32_t cpuid_max_ext = reg[eax];
113
114 /* get brand string (if supported) */
115/* Copyright: from Apple's XNU cpuid.c */
116if (cpuid_max_ext > 0x80000004) {
117 char str[128], *s;
118/*
119 * The brand string 48 bytes (max), guaranteed to
120 * be NUL terminated.
121 */
122do_cpuid(0x80000002, reg);
123bcopy((char *)reg, &str[0], 16);
124do_cpuid(0x80000003, reg);
125bcopy((char *)reg, &str[16], 16);
126do_cpuid(0x80000004, reg);
127bcopy((char *)reg, &str[32], 16);
128for (s = str; *s != '\0'; s++) {
129if (*s != ' ') break;
130}
131
132strlcpy(p->CPU.BrandString,s, sizeof(p->CPU.BrandString));
133
134if (!strncmp(p->CPU.BrandString, CPUID_STRING_UNKNOWN, min(sizeof(p->CPU.BrandString), (unsigned)strlen(CPUID_STRING_UNKNOWN) + 1))) {
135 /*
136 * This string means we have a firmware-programmable brand string,
137 * and the firmware couldn't figure out what sort of CPU we have.
138 */
139 p->CPU.BrandString[0] = '\0';
140 }
141}
142
143 /* get processor signature and decode */
144do_cpuid(1, reg);
145p->CPU.Signature = reg[eax];
146p->CPU.Stepping = bitfield(reg[eax], 3, 0);
147p->CPU.Model = bitfield(reg[eax], 7, 4);
148p->CPU.Family = bitfield(reg[eax], 11, 8);
149p->CPU.ExtModel = bitfield(reg[eax], 19, 16);
150p->CPU.ExtFamily = bitfield(reg[eax], 27, 20);
151p->CPU.Brand = bitfield(reg[ebx], 7, 0);
152p->CPU.Features = quad(reg[ecx], reg[edx]);
153 //p->CPU.Type = bitfield(reg[eax], 13, 12);
154
155 /* Fold extensions into family/model */
156if (p->CPU.Family == 0x0f)
157p->CPU.Family += p->CPU.ExtFamily;
158if (p->CPU.Family == 0x0f || p->CPU.Family == 0x06)
159p->CPU.Model += (p->CPU.ExtModel << 4);
160
161 if (p->CPU.Features & CPUID_FEATURE_HTT)
162logical_per_package =
163 bitfield(reg[ebx], 23, 16);
164else
165logical_per_package = 1;
166
167if (cpuid_max_ext >= 0x80000001) {
168do_cpuid(0x80000001, reg);
169p->CPU.ExtFeatures =
170 quad(reg[ecx], reg[edx]);
171
172}
173
174/* Fold in the Invariant TSC feature bit, if present */
175if (cpuid_max_ext >= 0x80000007) {
176do_cpuid(0x80000007, reg);
177p->CPU.ExtFeatures |=
178 reg[edx] & (uint32_t)CPUID_EXTFEATURE_TSCI;
179}
180
181 /* Find the microcode version number a.k.a. signature a.k.a. BIOS ID */
182 p->CPU.MicrocodeVersion =
183 (uint32_t) (rdmsr64(MSR_IA32_BIOS_SIGN_ID) >> 32);
184
185 if ((p->CPU.Vendor == 0x756E6547 /* Intel */) &&
186(p->CPU.Family == 0x06)) {
187 /*
188 * Find the number of enabled cores and threads
189 * (which determines whether SMT/Hyperthreading is active).
190 */
191 switch (p->CPU.Model) {
192 /*
193 * This should be the same as Nehalem but an A0 silicon bug returns
194 * invalid data in the top 12 bits. Hence, we use only bits [19..16]
195 * rather than [31..16] for core count - which actually can't exceed 8.
196 */
197 case CPUID_MODEL_DALES_32NM:
198 case CPUID_MODEL_WESTMERE:
199 case CPUID_MODEL_WESTMERE_EX:
200 {
201 msr = rdmsr64(MSR_CORE_THREAD_COUNT);
202 p->CPU.NoThreads = bitfield((uint32_t)msr, 15, 0);
203 p->CPU.NoCores = bitfield((uint32_t)msr, 19, 16);
204 break;
205 }
206
207 case CPUID_MODEL_NEHALEM:
208 case CPUID_MODEL_FIELDS:
209 case CPUID_MODEL_DALES:
210 case CPUID_MODEL_NEHALEM_EX:
211case CPUID_MODEL_SANDYBRIDGE:
212case CPUID_MODEL_JAKETOWN:
213 {
214 msr = rdmsr64(MSR_CORE_THREAD_COUNT);
215 p->CPU.NoThreads = bitfield((uint32_t)msr, 15, 0);
216 p->CPU.NoCores = bitfield((uint32_t)msr, 31, 16);
217 break;
218 }
219 }
220 }
221
222 if (p->CPU.NoCores == 0) {
223p->CPU.NoThreads = cores_per_package;
224p->CPU.NoCores = logical_per_package;
225}
226
227
228tscFrequency = measure_tsc_frequency();
229fsbFrequency = 0;
230cpuFrequency = 0;
231
232
233if ((p->CPU.Vendor == 0x756E6547 /* Intel */) &&
234((p->CPU.Family == 0x06) ||
235 (p->CPU.Family == 0x0f)))
236{
237if ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0c) ||
238(p->CPU.Family == 0x0f && p->CPU.Model >= 0x03))
239{
240/* Nehalem CPU model */
241if (p->CPU.Family == 0x06 && (p->CPU.Model == CPUID_MODEL_NEHALEM ||
242 p->CPU.Model == CPUID_MODEL_FIELDS ||
243 p->CPU.Model == CPUID_MODEL_DALES ||
244 p->CPU.Model == CPUID_MODEL_DALES_32NM ||
245 p->CPU.Model == CPUID_MODEL_WESTMERE ||
246 p->CPU.Model == CPUID_MODEL_NEHALEM_EX ||
247 p->CPU.Model == CPUID_MODEL_WESTMERE_EX ||
248 p->CPU.Model == CPUID_MODEL_SANDYBRIDGE ||
249 p->CPU.Model == CPUID_MODEL_JAKETOWN))
250{
251uint8_tbus_ratio_max = 0, bus_ratio_min = 0;
252uint32_tmax_ratio = 0;
253uint64_tflex_ratio = 0;
254msr = rdmsr64(MSR_PLATFORM_INFO);
255#if DEBUG_CPU
256DBG("msr(%d): platform_info %08x\n", __LINE__, msr & 0xffffffff);
257#endif
258bus_ratio_max = (msr >> 8) & 0xff;
259bus_ratio_min = (msr >> 40) & 0xff; //valv: not sure about this one (Remarq.1)
260msr = rdmsr64(MSR_FLEX_RATIO);
261#if DEBUG_CPU
262DBG("msr(%d): flex_ratio %08x\n", __LINE__, msr & 0xffffffff);
263#endif
264if ((msr >> 16) & 0x01) {
265flex_ratio = (msr >> 8) & 0xff;
266/* bcc9: at least on the gigabyte h67ma-ud2h,
267 where the cpu multipler can't be changed to
268 allow overclocking, the flex_ratio msr has unexpected (to OSX)
269 contents. These contents cause mach_kernel to
270 fail to compute the bus ratio correctly, instead
271 causing the system to crash since tscGranularity
272 is inadvertently set to 0.
273 */
274if (flex_ratio == 0) {
275/* Clear bit 16 (evidently the
276 presence bit) */
277wrmsr64(MSR_FLEX_RATIO, (msr & 0xFFFFFFFFFFFEFFFFULL));
278msr = rdmsr64(MSR_FLEX_RATIO);
279#if DEBUG_CPU
280DBG("Unusable flex ratio detected. MSR Patched to %08x\n", msr & 0xffffffff);
281#endif
282} else {
283if (bus_ratio_max > flex_ratio) {
284bus_ratio_max = flex_ratio;
285}
286}
287}
288
289if (bus_ratio_max) {
290fsbFrequency = (tscFrequency / bus_ratio_max);
291}
292//valv: Turbo Ratio Limit
293if ((p->CPU.Model != 0x2e) && (p->CPU.Model != 0x2f)) {
294msr = rdmsr64(MSR_TURBO_RATIO_LIMIT);
295cpuFrequency = bus_ratio_max * fsbFrequency;
296max_ratio = bus_ratio_max * 10;
297} else {
298cpuFrequency = tscFrequency;
299}
300#if DEBUG_CPU
301DBG("Sticking with [BCLK: %dMhz, Bus-Ratio: %d]\n", fsbFrequency / 1000000, max_ratio);
302#endif
303currcoef = bus_ratio_max;
304}
305else
306{
307msr = rdmsr64(MSR_IA32_PERF_STATUS);
308#if DEBUG_CPU
309DBG("msr(%d): ia32_perf_stat 0x%08x\n", __LINE__, msr & 0xffffffff);
310#endif
311currcoef = (msr >> 8) & 0x1f;
312/* Non-integer bus ratio for the max-multi*/
313maxdiv = (msr >> 46) & 0x01;
314/* Non-integer bus ratio for the current-multi (undocumented)*/
315currdiv = (msr >> 14) & 0x01;
316
317if ((p->CPU.Family == 0x06 && p->CPU.Model >= 0x0e) ||
318(p->CPU.Family == 0x0f)) // This will always be model >= 3
319{
320/* On these models, maxcoef defines TSC freq */
321maxcoef = (msr >> 40) & 0x1f;
322}
323else
324{
325/* On lower models, currcoef defines TSC freq */
326/* XXX */
327maxcoef = currcoef;
328}
329
330if (maxcoef)
331{
332if (maxdiv)
333{
334fsbFrequency = ((tscFrequency * 2) / ((maxcoef * 2) + 1));
335}
336else
337{
338fsbFrequency = (tscFrequency / maxcoef);
339}
340
341if (currdiv)
342{
343cpuFrequency = (fsbFrequency * ((currcoef * 2) + 1) / 2);
344}
345else
346{
347cpuFrequency = (fsbFrequency * currcoef);
348}
349#if DEBUG_CPU
350DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");
351#endif
352}
353}
354}
355 /* Mobile CPU ? */
356//Slice
357 p->CPU.isMobile = false;
358switch (p->CPU.Model) {
359case 0x0D:
360p->CPU.isMobile = true;
361break;
362case 0x02:
363case 0x03:
364case 0x04:
365case 0x06:
366p->CPU.isMobile = (rdmsr64(0x2C) & (1 << 21));
367break;
368default:
369p->CPU.isMobile = (rdmsr64(0x17) & (1 << 28));
370break;
371}
372
373DBG("%s platform found.\n", p->CPU.isMobile?"Mobile":"Desktop");
374}
375
376p->CPU.MaxCoef = maxcoef;
377p->CPU.MaxDiv = maxdiv;
378p->CPU.CurrCoef = currcoef;
379p->CPU.CurrDiv = currdiv;
380
381p->CPU.TSCFrequency = (tscFrequency / 1000000) * 1000000;
382p->CPU.FSBFrequency = (fsbFrequency / 1000000) * 1000000;
383p->CPU.CPUFrequency = (cpuFrequency / 1000000) * 1000000;
384
385 //p->CPU.TSCFrequency = tscFrequency ;
386//p->CPU.FSBFrequency = fsbFrequency ;
387//p->CPU.CPUFrequency = cpuFrequency ;
388
389DBG("CPU: Vendor/Model/ExtModel: 0x%x/0x%x/0x%x\n", p->CPU.Vendor, p->CPU.Model, p->CPU.ExtModel);
390DBG("CPU: Family/ExtFamily: 0x%x/0x%x\n", p->CPU.Family, p->CPU.ExtFamily);
391DBG("CPU: TSCFreq: %dMHz\n", p->CPU.TSCFrequency / 1000000);
392if(p->CPU.Vendor == 0x756E6547 /* Intel */)
393{
394DBG("CPU: FSBFreq: %dMHz\n", p->CPU.FSBFrequency / 1000000);
395DBG("CPU: CPUFreq: %dMHz\n", p->CPU.CPUFrequency / 1000000);
396DBG("CPU: MaxCoef/CurrCoef: 0x%x/0x%x\n", p->CPU.MaxCoef, p->CPU.CurrCoef);
397DBG("CPU: MaxDiv/CurrDiv: 0x%x/0x%x\n", p->CPU.MaxDiv, p->CPU.CurrDiv);
398}
399
400DBG("CPU: NoCores/NoThreads: %d/%d\n", p->CPU.NoCores, p->CPU.NoThreads);
401DBG("CPU: Features: 0x%08x\n", p->CPU.Features);
402 DBG("CPU: ExtFeatures: 0x%08x\n", p->CPU.ExtFeatures); // where is SYSCALL ??
403 DBG("CPU: MicrocodeVersion: %d\n", p->CPU.MicrocodeVersion);
404#if DEBUG_CPU
405pause();
406#endif
407
408}
409

Archive Download this file

Revision: 1119