1 | /*␊ |
2 | * Copyright 2008 Islam Ahmed Zaid. All rights reserved. <azismed@gmail.com>␊ |
3 | */␊ |
4 | ␊ |
5 | #include "libsaio.h"␊ |
6 | #include "freq_detect.h"␊ |
7 | ␊ |
8 | // DFE: enable_PIT2 and disable_PIT2 come from older xnu␊ |
9 | ␊ |
10 | /*␊ |
11 | * Enable or disable timer 2.␊ |
12 | * Port 0x61 controls timer 2:␊ |
13 | * bit 0 gates the clock,␊ |
14 | * bit 1 gates output to speaker.␊ |
15 | */␊ |
16 | inline static void␊ |
17 | enable_PIT2(void)␊ |
18 | {␊ |
19 | /* Enable gate, disable speaker */␊ |
20 | __asm__ volatile(␊ |
21 | " inb $0x61,%%al \n\t"␊ |
22 | " and $0xFC,%%al \n\t" /* & ~0x03 */␊ |
23 | " or $1,%%al \n\t"␊ |
24 | " outb %%al,$0x61 \n\t"␊ |
25 | : : : "%al" );␊ |
26 | }␊ |
27 | ␊ |
28 | inline static void␊ |
29 | disable_PIT2(void)␊ |
30 | {␊ |
31 | /* Disable gate and output to speaker */␊ |
32 | __asm__ volatile(␊ |
33 | " inb $0x61,%%al \n\t"␊ |
34 | " and $0xFC,%%al \n\t"␉/* & ~0x03 */␊ |
35 | " outb %%al,$0x61 \n\t"␊ |
36 | : : : "%al" );␊ |
37 | }␊ |
38 | ␊ |
39 | // DFE: set_PIT2_mode0, poll_PIT2_gate, and measure_tsc_frequency are␊ |
40 | // roughly based on Linux code␊ |
41 | ␊ |
42 | /* Set the 8254 channel 2 to mode 0 with the specified value.␊ |
43 | In mode 0, the counter will initially set its gate low when the␊ |
44 | timer expires. For this to be useful, you ought to set it high␊ |
45 | before calling this function. The enable_PIT2 function does this.␊ |
46 | */␊ |
47 | static inline void set_PIT2_mode0(uint16_t value)␊ |
48 | {␊ |
49 | __asm__ volatile(␊ |
50 | " movb $0xB0,%%al \n\t"␊ |
51 | " outb␉%%al,$0x43␉\n\t"␊ |
52 | " movb␉%%dl,%%al␉\n\t"␊ |
53 | " outb␉%%al,$0x42␉\n\t"␊ |
54 | " movb␉%%dh,%%al␉\n\t"␊ |
55 | " outb␉%%al,$0x42"␊ |
56 | : : "d"(value) /*: no clobber */ );␊ |
57 | }␊ |
58 | ␊ |
59 | /* Returns the number of times the loop ran before the PIT2 signaled */␊ |
60 | static inline unsigned long poll_PIT2_gate(void)␊ |
61 | {␊ |
62 | unsigned long count = 0;␊ |
63 | unsigned char nmi_sc_val;␊ |
64 | do {␊ |
65 | ++count;␊ |
66 | __asm__ volatile(␊ |
67 | "inb␉$0x61,%0"␊ |
68 | : "=q"(nmi_sc_val) /*:*/ /* no input */ /*:*/ /* no clobber */);␊ |
69 | } while( (nmi_sc_val & 0x20) == 0);␊ |
70 | return count;␊ |
71 | }␊ |
72 | ␊ |
73 | /*␊ |
74 | * DFE: Measures the TSC frequency in Hz (64-bit) using the ACPI PM timer␊ |
75 | */␊ |
76 | uint64_t measure_tsc_frequency(void)␊ |
77 | {␊ |
78 | uint64_t tscStart;␊ |
79 | uint64_t tscEnd;␊ |
80 | uint64_t tscDelta = 0xffffffffffffffffULL;␊ |
81 | unsigned long pollCount;␊ |
82 | uint64_t retval = 0;␊ |
83 | int i;␊ |
84 | ␊ |
85 | /* Time how many TSC ticks elapse in 30 msec using the 8254 PIT␊ |
86 | * counter 2. We run this loop 3 times to make sure the cache␊ |
87 | * is hot and we take the minimum delta from all of the runs.␊ |
88 | * That is to say that we're biased towards measuring the minimum␊ |
89 | * number of TSC ticks that occur while waiting for the timer to␊ |
90 | * expire. That theoretically helps avoid inconsistencies when␊ |
91 | * running under a VM if the TSC is not virtualized and the host␊ |
92 | * steals time. The TSC is normally virtualized for VMware.␊ |
93 | */␊ |
94 | for(i = 0; i < 3; ++i)␊ |
95 | {␊ |
96 | enable_PIT2();␊ |
97 | set_PIT2_mode0(CALIBRATE_LATCH);␊ |
98 | tscStart = rdtsc64();␊ |
99 | pollCount = poll_PIT2_gate();␊ |
100 | tscEnd = rdtsc64();␊ |
101 | /* The poll loop must have run at least a few times for accuracy */␊ |
102 | if(pollCount <= 1)␊ |
103 | continue;␊ |
104 | /* The TSC must increment at LEAST once every millisecond. We␊ |
105 | * should have waited exactly 30 msec so the TSC delta should␊ |
106 | * be >= 30. Anything less and the processor is way too slow.␊ |
107 | */␊ |
108 | if((tscEnd - tscStart) <= CALIBRATE_TIME_MSEC)␊ |
109 | continue;␊ |
110 | // tscDelta = min(tscDelta, (tscEnd - tscStart))␊ |
111 | if( (tscEnd - tscStart) < tscDelta )␊ |
112 | tscDelta = tscEnd - tscStart;␊ |
113 | }␊ |
114 | /* tscDelta is now the least number of TSC ticks the processor made in␊ |
115 | * a timespan of 0.03 s (e.g. 30 milliseconds)␊ |
116 | * Linux thus divides by 30 which gives the answer in kiloHertz because␊ |
117 | * 1 / ms = kHz. But we're xnu and most of the rest of the code uses␊ |
118 | * Hz so we need to convert our milliseconds to seconds. Since we're␊ |
119 | * dividing by the milliseconds, we simply multiply by 1000.␊ |
120 | */␊ |
121 | ␊ |
122 | /* Unlike linux, we're not limited to 32-bit, but we do need to take care␊ |
123 | * that we're going to multiply by 1000 first so we do need at least some␊ |
124 | * arithmetic headroom. For now, 32-bit should be enough.␊ |
125 | * Also unlike Linux, our compiler can do 64-bit integer arithmetic.␊ |
126 | */␊ |
127 | if(tscDelta > (1ULL<<32))␊ |
128 | retval = 0;␊ |
129 | else␊ |
130 | {␊ |
131 | retval = tscDelta * 1000 / 30;␊ |
132 | }␊ |
133 | disable_PIT2();␊ |
134 | return retval;␊ |
135 | }␊ |
136 | ␊ |
137 | uint64_t tscFrequency = 0;␊ |
138 | uint64_t fsbFrequency = 0;␊ |
139 | uint64_t cpuFrequency = 0;␊ |
140 | ␊ |
141 | /*␊ |
142 | * Calculates the FSB and CPU frequencies using specific MSRs for each CPU␊ |
143 | * - multi. is read from a specific MSR. In the case of Intel, there is:␊ |
144 | * a max multi. (used to calculate the FSB freq.),␊ |
145 | * and a current multi. (used to calculate the CPU freq.)␊ |
146 | * - fsbFrequency = tscFrequency / multi␊ |
147 | * - cpuFrequency = fsbFrequency * multi␊ |
148 | */␊ |
149 | ␊ |
150 | void calculate_freq(void)␊ |
151 | {␊ |
152 | ␉uint32_t␉cpuid_reg[4], cpu_vendor;␊ |
153 | ␉uint8_t␉␉cpu_family, cpu_model, cpu_extfamily, cpu_extmodel;␊ |
154 | ␉uint64_t␉msr, flex_ratio;␊ |
155 | ␉uint8_t␉␉maxcoef, maxdiv, currcoef, currdiv;␊ |
156 | ␉␊ |
157 | ␉do_cpuid(0, cpuid_reg);␊ |
158 | ␉cpu_vendor = cpuid_reg[1];␊ |
159 | ␉␊ |
160 | ␉do_cpuid(1, cpuid_reg);␊ |
161 | ␉cpu_model = bitfield(cpuid_reg[0], 7, 4);␊ |
162 | ␉cpu_family = bitfield(cpuid_reg[0], 11, 8);␊ |
163 | ␉cpu_extmodel = bitfield(cpuid_reg[0], 19, 16);␊ |
164 | ␉cpu_extfamily = bitfield(cpuid_reg[0], 27, 20);␊ |
165 | ␉␊ |
166 | ␉cpu_model += (cpu_extmodel << 4);␊ |
167 | ␊ |
168 | ␉DBG("\nCPU Model: %d - CPU Family: %d - CPU Ext. Family: %d\n", cpu_model, cpu_family, cpu_extfamily);␊ |
169 | ␉DBG("The booter will now attempt to read the CPU Multiplier (using RDMSR).\n");␊ |
170 | ␉DBG("Press any key to continue..\n\n");␊ |
171 | #if DEBUG_FREQ␊ |
172 | getc();␊ |
173 | #endif␊ |
174 | ␊ |
175 | ␉tscFrequency = measure_tsc_frequency();␊ |
176 | ␊ |
177 | ␉DBG("CPU Multiplier: ");␊ |
178 | ␊ |
179 | ␉if((cpu_vendor == 0x756E6547 /* Intel */) && ((cpu_family == 0x06) || (cpu_family == 0x0f)))␊ |
180 | ␉{␊ |
181 | ␉␉if ((cpu_family == 0x06 && cpu_model >= 0x0c) ||␊ |
182 | ␉␉␉(cpu_family == 0x0f && cpu_model >= 0x03))␊ |
183 | ␉␉{␊ |
184 | ␉␉␉/* Nehalem CPU model */␊ |
185 | ␉␉␉if (cpu_family == 0x06 && (cpu_model == 0x1a || cpu_model == 0x1e))␊ |
186 | ␉␉␉{␊ |
187 | ␉␉␉␉msr = rdmsr64(MSR_PLATFORM_INFO);␊ |
188 | ␉␉␉␉currcoef = (msr >> 8) & 0xff;␊ |
189 | ␉␉␉␉msr = rdmsr64(MSR_FLEX_RATIO);␊ |
190 | ␉␉␉␉if ((msr >> 16) & 0x01)␊ |
191 | ␉␉␉␉{␊ |
192 | ␉␉␉␉␉flex_ratio = (msr >> 8) & 0xff;␊ |
193 | ␉␉␉␉␉if (currcoef > flex_ratio)␊ |
194 | ␉␉␉␉␉␉currcoef = flex_ratio;␊ |
195 | ␉␉␉␉}␊ |
196 | ␊ |
197 | ␉␉␉␉if (currcoef)␊ |
198 | ␉␉␉␉{␊ |
199 | ␉␉␉␉␉DBG("%d\n", currcoef);␊ |
200 | ␉␉␉␉␉fsbFrequency = (tscFrequency / currcoef);␊ |
201 | ␉␉␉␉}␊ |
202 | ␉␉␉␉cpuFrequency = tscFrequency;␊ |
203 | ␉␉␉}␊ |
204 | ␉␉␉else␊ |
205 | ␉␉␉{␊ |
206 | ␉␉␉␉msr = rdmsr64(IA32_PERF_STATUS);␊ |
207 | ␉␉␉␉currcoef = (msr >> 8) & 0x1f;␊ |
208 | ␉␉␉␉/* Non-integer bus ratio for the max-multi*/␊ |
209 | ␉␉␉␉maxdiv = (msr >> 46) & 0x01;␊ |
210 | ␉␉␉␉/* Non-integer bus ratio for the current-multi (undocumented)*/␊ |
211 | ␉␉␉␉currdiv = (msr >> 14) & 0x01;␊ |
212 | ␊ |
213 | ␉␉␉␉if ((cpu_family == 0x06 && cpu_model >= 0x0e) ||␊ |
214 | ␉␉␉␉␉(cpu_family == 0x0f)) // This will always be model >= 3␊ |
215 | ␉␉␉␉{␊ |
216 | ␉␉␉␉␉/* On these models, maxcoef defines TSC freq */␊ |
217 | ␉␉␉␉␉maxcoef = (msr >> 40) & 0x1f;␊ |
218 | ␉␉␉␉}␊ |
219 | ␉␉␉␉else␊ |
220 | ␉␉␉␉{␊ |
221 | ␉␉␉␉␉/* On lower models, currcoef defines TSC freq */␊ |
222 | ␉␉␉␉␉/* XXX */␊ |
223 | ␉␉␉␉␉maxcoef = currcoef;␊ |
224 | ␉␉␉␉}␊ |
225 | ␊ |
226 | ␉␉␉␉if (maxcoef)␊ |
227 | ␉␉␉␉{␊ |
228 | ␉␉␉␉␉if (maxdiv)␊ |
229 | ␉␉␉␉␉␉fsbFrequency = ((tscFrequency * 2) / ((maxcoef * 2) + 1));␊ |
230 | ␉␉␉␉␉else␊ |
231 | ␉␉␉␉␉␉fsbFrequency = (tscFrequency / maxcoef);␊ |
232 | ␊ |
233 | ␉␉␉␉␉if (currdiv)␊ |
234 | ␉␉␉␉␉␉cpuFrequency = (fsbFrequency * ((currcoef * 2) + 1) / 2);␊ |
235 | ␉␉␉␉␉else␊ |
236 | ␉␉␉␉␉␉cpuFrequency = (fsbFrequency * currcoef);␊ |
237 | ␉␉␉␉␉DBG("max: %d%s current: %d%s\n", maxcoef, maxdiv ? ".5" : "",currcoef, currdiv ? ".5" : "");␊ |
238 | ␉␉␉␉}␊ |
239 | ␉␉␉}␊ |
240 | ␉␉}␊ |
241 | ␉}␊ |
242 | ␉else if((cpu_vendor == 0x68747541 /* AMD */) && (cpu_family == 0x0f))␊ |
243 | ␉{␊ |
244 | ␉␉if(cpu_extfamily == 0x00 /* K8 */)␊ |
245 | ␉␉{␊ |
246 | ␉␉␉msr = rdmsr64(K8_FIDVID_STATUS);␊ |
247 | ␉␉␉currcoef = (msr & 0x3f) / 2 + 4;␊ |
248 | ␉␉␉currdiv = (msr & 0x01) * 2;␊ |
249 | ␉␉}␊ |
250 | ␉␉else if(cpu_extfamily >= 0x01 /* K10+ */)␊ |
251 | ␉␉{␊ |
252 | ␉␉␉msr = rdmsr64(K10_COFVID_STATUS);␊ |
253 | ␉␉␉if(cpu_extfamily == 0x01 /* K10 */)␊ |
254 | ␉␉␉␉currcoef = (msr & 0x3f) + 0x10;␊ |
255 | ␉␉␉else /* K11+ */␊ |
256 | ␉␉␉␉currcoef = (msr & 0x3f) + 0x08;␊ |
257 | ␉␉␉currdiv = (2 << ((msr >> 6) & 0x07));␊ |
258 | ␉␉}␊ |
259 | ␊ |
260 | ␉␉if (currcoef)␊ |
261 | ␉␉{␊ |
262 | ␉␉␉if (currdiv)␊ |
263 | ␉␉␉{␊ |
264 | ␉␉␉␉fsbFrequency = ((tscFrequency * currdiv) / currcoef);␊ |
265 | ␉␉␉␉DBG("%d.%d\n", currcoef / currdiv, ((currcoef % currdiv) * 100) / currdiv);␊ |
266 | ␉␉␉}␊ |
267 | ␉␉␉else␊ |
268 | ␉␉␉{␊ |
269 | ␉␉␉␉fsbFrequency = (tscFrequency / currcoef);␊ |
270 | ␉␉␉␉DBG("%d\n", currcoef);␊ |
271 | ␉␉␉}␊ |
272 | ␉␉␉fsbFrequency = (tscFrequency / currcoef);␊ |
273 | ␉␉␉cpuFrequency = tscFrequency;␊ |
274 | ␉␉}␊ |
275 | ␉}␊ |
276 | ␊ |
277 | ␉if (!fsbFrequency)␊ |
278 | ␉{␊ |
279 | ␉␉fsbFrequency = (DEFAULT_FSB * 1000);␊ |
280 | ␉␉cpuFrequency = tscFrequency;␊ |
281 | ␉␉DBG("0 ! using the default value for FSB !\n");␊ |
282 | ␉}␊ |
283 | ␊ |
284 | ␉DBG("TSC Frequency: %dMHz\n", tscFrequency / 1000000);␊ |
285 | ␉DBG("CPU Frequency: %dMHz\n", cpuFrequency / 1000000);␊ |
286 | ␉DBG("FSB Frequency: %dMHz\n", fsbFrequency / 1000000);␊ |
287 | ␉DBG("Press [Enter] to continue..\n");␊ |
288 | #if DEBUG_FREQ␊ |
289 | ␉while (getc() != 0x0d) ;␊ |
290 | #endif␊ |
291 | }␊ |
292 | |