1 | /*␊ |
2 | * Copyright 2008 Islam Ahmed Zaid. All rights reserved. <azismed@gmail.com>␊ |
3 | * AsereBLN: 2009: cleanup and bugfix␊ |
4 | */␊ |
5 | ␊ |
6 | #ifndef __LIBSAIO_CPU_H␊ |
7 | #define __LIBSAIO_CPU_H␊ |
8 | ␊ |
9 | #include "platform.h"␊ |
10 | ␊ |
11 | extern void scan_cpu(PlatformInfo_t *);␊ |
12 | ␊ |
13 | #define bit(n)␉␉␉(1ULL << (n))␊ |
14 | #define bitmask(h,l)␉␉((bit(h)|(bit(h)-1)) & ~(bit(l)-1))␊ |
15 | #define bitfield(x,h,l)␉␉(((x) & bitmask(h,l)) >> l)␊ |
16 | ␊ |
17 | #define CPU_STRING_UNKNOWN␉␉"Unknown CPU Type"␊ |
18 | ␊ |
19 | //-- processor type -> p_type:␊ |
20 | #define PT_OEM␉0x00␉// Intel Original OEM Processor;␊ |
21 | #define PT_OD␉0x01 ␉// Intel Over Drive Processor;␊ |
22 | #define PT_DUAL␉0x02␉// Intel Dual Processor;␊ |
23 | #define PT_RES␉0x03␉// Intel Reserved;␊ |
24 | ␊ |
25 | /* Known MSR registers */␊ |
26 | #define MSR_IA32_PLATFORM_ID 0x0017␊ |
27 | #define MSR_CORE_THREAD_COUNT 0x0035␉ /* limited use - not for Penryn or older␉*/␊ |
28 | #define IA32_TSC_ADJUST 0x003B␊ |
29 | #define MSR_IA32_BIOS_SIGN_ID 0x008B /* microcode version */␊ |
30 | #define MSR_FSB_FREQ 0x00CD␉ /* limited use - not for i7␉␉␉␉␉␉*/␊ |
31 | #define␉MSR_PLATFORM_INFO 0x00CE /* limited use - MinRatio for i7 but Max for Yonah␉*/␊ |
32 | /* turbo for penryn */␊ |
33 | #define MSR_PKG_CST_CONFIG_CONTROL 0x00E2 /* sandy and ivy */␊ |
34 | #define IA32_MPERF 0x00E7 /* TSC in C0 only */␊ |
35 | #define IA32_APERF 0x00E8 /* actual clocks in C0 */␊ |
36 | #define MSR_IA32_EXT_CONFIG 0x00EE␉ /* limited use - not for i7␉␉␉␉␉␉*/␊ |
37 | #define MSR_FLEX_RATIO 0x0194␉ /* limited use - not for Penryn or older␉␉␉*/␊ |
38 | //see no value on most CPUs␊ |
39 | #define␉MSR_IA32_PERF_STATUS 0x0198␊ |
40 | #define MSR_IA32_PERF_CONTROL 0x0199␊ |
41 | #define MSR_IA32_CLOCK_MODULATION 0x019A␊ |
42 | #define MSR_THERMAL_STATUS 0x019C␊ |
43 | #define MSR_IA32_MISC_ENABLE 0x01A0␊ |
44 | #define MSR_THERMAL_TARGET 0x01A2␉ /* limited use - not for Penryn or older␉␉␉*/␊ |
45 | #define MSR_TURBO_RATIO_LIMIT 0x01AD␉ /* limited use - not for Penryn or older␉␉␉*/␊ |
46 | ␊ |
47 | #define IA32_ENERGY_PERF_BIAS␉␉0x01B0␊ |
48 | //MSR 000001B0 0000-0000-0000-0005␊ |
49 | //MSR 000001B1 0000-0000-8838-0000␊ |
50 | #define IA32_PLATFORM_DCA_CAP␉␉0x01F8␊ |
51 | //MSR 000001FC 0000-0000-0004-005F␊ |
52 | ␊ |
53 | // Sandy Bridge & JakeTown specific 'Running Average Power Limit' MSR's.␊ |
54 | #define MSR_RAPL_POWER_UNIT␉␉␉0x606 /* R/O */␊ |
55 | //MSR 00000606 0000-0000-000A-1003␊ |
56 | #define MSR_PKGC3_IRTL 0x60A /* RW time limit to go C3 */␊ |
57 | // bit 15 = 1 -- the value valid for C-state PM␊ |
58 | #define MSR_PKGC6_IRTL 0x60B /* RW time limit to go C6 */␊ |
59 | //MSR 0000060B 0000-0000-0000-8854␊ |
60 | //Valid + 010=1024ns + 0x54=84mks␊ |
61 | #define MSR_PKGC7_IRTL 0x60C /* RW time limit to go C7 */␊ |
62 | //MSR 0000060C 0000-0000-0000-8854␊ |
63 | #define MSR_PKG_C2_RESIDENCY 0x60D /* same as TSC but in C2 only */␊ |
64 | ␊ |
65 | #define MSR_PKG_RAPL_POWER_LIMIT␉0x610␊ |
66 | //MSR 00000610 0000-A580-0000-8960␊ |
67 | #define MSR_PKG_ENERGY_STATUS␉␉0x611␊ |
68 | //MSR 00000611 0000-0000-3212-A857␊ |
69 | #define MSR_PKG_POWER_INFO␉␉␉0x614␊ |
70 | //MSR 00000614 0000-0000-01E0-02F8␊ |
71 | // Sandy Bridge IA (Core) domain MSR's.␊ |
72 | #define MSR_PP0_POWER_LIMIT␉␉␉0x638␊ |
73 | #define MSR_PP0_ENERGY_STATUS␉␉0x639␊ |
74 | #define MSR_PP0_POLICY 0x63A␊ |
75 | #define MSR_PP0_PERF_STATUS␉␉␉0x63B␊ |
76 | ␊ |
77 | // Sandy Bridge Uncore (IGPU) domain MSR's (Not on JakeTown).␊ |
78 | #define MSR_PP1_POWER_LIMIT␉␉␉0x640␊ |
79 | #define MSR_PP1_ENERGY_STATUS␉␉0x641␊ |
80 | //MSR 00000641 0000-0000-0000-0000␊ |
81 | #define MSR_PP1_POLICY 0x642␊ |
82 | ␊ |
83 | // JakeTown only Memory MSR's.␊ |
84 | #define MSR_PKG_PERF_STATUS␉␉␉0x613␊ |
85 | #define MSR_DRAM_POWER_LIMIT␉␉0x618␊ |
86 | #define MSR_DRAM_ENERGY_STATUS␉0x619␊ |
87 | #define MSR_DRAM_PERF_STATUS␉␉0x61B␊ |
88 | #define MSR_DRAM_POWER_INFO␉␉␉0x61C␊ |
89 | ␊ |
90 | //IVY_BRIDGE␊ |
91 | #define MSR_CONFIG_TDP_NOMINAL 0x648␊ |
92 | #define MSR_CONFIG_TDP_LEVEL1 0x649␊ |
93 | #define MSR_CONFIG_TDP_LEVEL2 0x64A␊ |
94 | #define MSR_CONFIG_TDP_CONTROL 0x64B /* write once to lock */␊ |
95 | #define MSR_TURBO_ACTIVATION_RATIO 0x64C␊ |
96 | ␊ |
97 | //AMD␊ |
98 | #define K8_FIDVID_STATUS 0xC0010042␊ |
99 | #define K10_COFVID_LIMIT 0xC0010061␊ |
100 | #define K10_PSTATE_STATUS 0xC0010064␊ |
101 | #define K10_COFVID_STATUS 0xC0010071␊ |
102 | ␊ |
103 | #define MSR_AMD_MPERF 0x000000E7␊ |
104 | #define MSR_AMD_APERF 0x000000E8␊ |
105 | ␊ |
106 | #define DEFAULT_FSB␉␉100000 /* for now, hardcoding 100MHz for old CPUs */␊ |
107 | ␊ |
108 | // DFE: This constant comes from older xnu:␊ |
109 | #define CLKNUM␉␉␉1193182␉␉/* formerly 1193167 */␊ |
110 | ␊ |
111 | // DFE: These two constants come from Linux except CLOCK_TICK_RATE replaced with CLKNUM␊ |
112 | #define CALIBRATE_TIME_MSEC␉30␉␉/* 30 msecs */␊ |
113 | #define CALIBRATE_LATCH␉␉((CLKNUM * CALIBRATE_TIME_MSEC + 1000/2)/1000)␊ |
114 | ␊ |
115 | // CPUID Values Reference␊ |
116 | /*␊ |
117 | #define CPUID_MODEL_PRESCOTT␉␉3 // 0x03 Celeron D, Pentium 4 (90nm)␊ |
118 | #define CPUID_MODEL_NOCONA␉␉4 // 0x04 Xeon Nocona, Irwindale (90nm)␊ |
119 | #define CPUID_MODEL_PRESLER␉␉6 // 0x06 Pentium 4, Pentium D (65nm)␊ |
120 | #define CPUID_MODEL_PENTIUM_M␉␉9 // 0x09␊ |
121 | #define CPUID_MODEL_DOTHAN␉␉13 // 0x0D Dothan␊ |
122 | #define CPUID_MODEL_YONAH␉␉14 // 0x0E Intel Mobile Core Solo, Duo␊ |
123 | #define CPUID_MODEL_MEROM␉␉15 // 0x0F Intel Mobile Core 2 Solo, Duo, Xeon 30xx, Xeon 51xx, Xeon X53xx, Xeon E53xx, Xeon X32xx␊ |
124 | #define CPUID_MODEL_CONROE␉␉15 // 0x0F␊ |
125 | #define CPUID_MODEL_CELERON␉␉22 // 0x16␊ |
126 | #define CPUID_MODEL_PENRYN␉␉23 // 0x17 Intel Core 2 Solo, Duo, Quad, Extreme, Xeon X54xx, Xeon X33xx␊ |
127 | #define CPUID_MODEL_WOLFDALE␉␉23 // 0x17␊ |
128 | #define CPUID_MODEL_NEHALEM␉␉26 // 0x1A Intel Core i7, Xeon W35xx, Xeon X55xx, Xeon E55xx LGA1366 (45nm)␊ |
129 | #define CPUID_MODEL_ATOM␉␉28 // 0x1C Intel Atom (45nm) Pineview, Silverthorne␊ |
130 | #define CPUID_MODEL_XEON_MP␉␉29 // 0x1D MP 7400␊ |
131 | #define CPUID_MODEL_FIELDS␉␉30 // 0x1E Intel Core i5, i7, Xeon X34xx LGA1156 (45nm),(Clarksfiled, Lynnfield, Jasper Forest)␊ |
132 | #define CPUID_MODEL_DALES␉␉31 // 0x1F Havendale, Auburndale␊ |
133 | #define CPUID_MODEL_DALES_32NM␉␉37 // 0x25 Intel Core i3, i5 LGA1156 (32nm), (Arrandale, Clarksdale)␊ |
134 | #define CPUID_MODEL_ATOM_SAN␉␉38 // 0x26␊ |
135 | #define CPUID_MODEL_LINCROFT␉␉39 // 0x27 Intel Atom (45nm) Z6xx (single core)␊ |
136 | #define CPUID_MODEL_SANDYBRIDGE␉␉42 // 0x2A Intel Core i3, i5, i7 LGA1155 (32nm)␊ |
137 | #define CPUID_MODEL_WESTMERE␉␉44 // 0x2C Intel Core i7, Xeon X56xx, Xeon E56xx, Xeon W36xx LGA1366 (32nm) 6 Core␊ |
138 | #define CPUID_MODEL_JAKETOWN 45 // 0x2D Intel Xeon E5 LGA2011 (32nm), SandyBridge-E, SandyBridge-EN, SandyBridge-EP␊ |
139 | #define CPUID_MODEL_NEHALEM_EX␉␉46 // 0x2E Intel Xeon X75xx, Xeon X65xx, Xeon E75xx, Xeon E65x␊ |
140 | #define CPUID_MODEL_WESTMERE_EX␉␉47 // 0x2F Intel Xeon E7␊ |
141 | #define CPUID_MODEL_ATOM_2000␉␉54 // 0x36 Intel Atom (32nm) Cedarview␊ |
142 | #define CPUID_MODEL_IVYBRIDGE␉␉58 // 0x3A Intel Core i5, i7 LGA1155 (22nm)␊ |
143 | #define CPUID_MODEL_HASWELL␉␉60 // 0x3C Desktop version␊ |
144 | #define CPUID_MODEL_IVYBRIDGE_XEON␉62 // 0x3E␊ |
145 | #define CPUID_MODEL_HASWELL_MB␉␉63 // 0x3F Mobile/Laptop version␊ |
146 | //#define CPUID_MODEL_HASWELL_H␉␉?? // 0x??␊ |
147 | #define CPUID_MODEL_HASWELL_ULT␉␉69 // 0x45␊ |
148 | #define CPUID_MODEL_CRYSTALWELL␉␉70 // 0x46␊ |
149 | */␊ |
150 | /* HASWELL-DT HASWELL-MB HASWELL-H HASWELL-ULT HASWELL ULX*/␊ |
151 | ␊ |
152 | //BROADWELL-ROCKWELL␊ |
153 | ␊ |
154 | static inline uint64_t rdtsc64(void)␊ |
155 | {␊ |
156 | ␉uint64_t ret;␊ |
157 | ␉__asm__ volatile("rdtsc" : "=A" (ret));␊ |
158 | ␉return ret;␊ |
159 | }␊ |
160 | ␊ |
161 | static inline uint64_t rdmsr64(uint32_t msr)␊ |
162 | {␊ |
163 | uint64_t ret;␊ |
164 | __asm__ volatile("rdmsr" : "=A" (ret) : "c" (msr));␊ |
165 | return ret;␊ |
166 | }␊ |
167 | ␊ |
168 | static inline void wrmsr64(uint32_t msr, uint64_t val)␊ |
169 | {␊ |
170 | ␉__asm__ volatile("wrmsr" : : "c" (msr), "A" (val));␊ |
171 | }␊ |
172 | ␊ |
173 | static inline void intel_waitforsts(void) {␊ |
174 | ␉uint32_t inline_timeout = 100000;␊ |
175 | ␉while (rdmsr64(MSR_IA32_PERF_STATUS) & (1 << 21)) { if (!inline_timeout--) break; }␊ |
176 | }␊ |
177 | ␊ |
178 | static inline void do_cpuid(uint32_t selector, uint32_t *data)␊ |
179 | {␊ |
180 | ␉asm volatile ("cpuid"␊ |
181 | ␉␉␉␉ : "=a" (data[0]),␊ |
182 | ␉␉␉␉ "=b" (data[1]),␊ |
183 | ␉␉␉␉ "=c" (data[2]),␊ |
184 | ␉␉␉␉ "=d" (data[3])␊ |
185 | ␉␉␉␉ : "a" (selector));␊ |
186 | }␊ |
187 | ␊ |
188 | static inline void do_cpuid2(uint32_t selector, uint32_t selector2, uint32_t *data)␊ |
189 | {␊ |
190 | ␉asm volatile ("cpuid"␊ |
191 | ␉␉␉␉ : "=a" (data[0]),␊ |
192 | ␉␉␉␉ "=b" (data[1]),␊ |
193 | ␉␉␉␉ "=c" (data[2]),␊ |
194 | ␉␉␉␉ "=d" (data[3])␊ |
195 | ␉␉␉␉ : "a" (selector), "c" (selector2));␊ |
196 | }␊ |
197 | ␊ |
198 | // DFE: enable_PIT2 and disable_PIT2 come from older xnu␊ |
199 | ␊ |
200 | /*␊ |
201 | * Enable or disable timer 2.␊ |
202 | * Port 0x61 controls timer 2:␊ |
203 | * bit 0 gates the clock,␊ |
204 | * bit 1 gates output to speaker.␊ |
205 | */␊ |
206 | static inline void enable_PIT2(void)␊ |
207 | {␊ |
208 | /* Enable gate, disable speaker */␊ |
209 | __asm__ volatile(␊ |
210 | ␉␉␉␉␉ " inb $0x61,%%al \n\t"␊ |
211 | ␉␉␉␉␉ " and $0xFC,%%al \n\t" /* & ~0x03 */␊ |
212 | ␉␉␉␉␉ " or $1,%%al \n\t"␊ |
213 | ␉␉␉␉␉ " outb %%al,$0x61 \n\t"␊ |
214 | ␉␉␉␉␉ : : : "%al" );␊ |
215 | }␊ |
216 | ␊ |
217 | static inline void disable_PIT2(void)␊ |
218 | {␊ |
219 | /* Disable gate and output to speaker */␊ |
220 | __asm__ volatile(␊ |
221 | ␉␉␉␉␉ " inb $0x61,%%al \n\t"␊ |
222 | ␉␉␉␉␉ " and $0xFC,%%al \n\t"␉/* & ~0x03 */␊ |
223 | ␉␉␉␉␉ " outb %%al,$0x61 \n\t"␊ |
224 | ␉␉␉␉␉ : : : "%al" );␊ |
225 | }␊ |
226 | ␊ |
227 | // DFE: set_PIT2_mode0, poll_PIT2_gate, and measure_tsc_frequency are␊ |
228 | // roughly based on Linux code␊ |
229 | ␊ |
230 | /* Set the 8254 channel 2 to mode 0 with the specified value.␊ |
231 | In mode 0, the counter will initially set its gate low when the␊ |
232 | timer expires. For this to be useful, you ought to set it high␊ |
233 | before calling this function. The enable_PIT2 function does this.␊ |
234 | */␊ |
235 | static inline void set_PIT2_mode0(uint16_t value)␊ |
236 | {␊ |
237 | __asm__ volatile(␊ |
238 | ␉␉␉␉␉ " movb $0xB0,%%al \n\t"␊ |
239 | ␉␉␉␉␉ " outb␉%%al,$0x43␉\n\t"␊ |
240 | ␉␉␉␉␉ " movb␉%%dl,%%al␉\n\t"␊ |
241 | ␉␉␉␉␉ " outb␉%%al,$0x42␉\n\t"␊ |
242 | ␉␉␉␉␉ " movb␉%%dh,%%al␉\n\t"␊ |
243 | ␉␉␉␉␉ " outb␉%%al,$0x42"␊ |
244 | ␉␉␉␉␉ : : "d"(value) /*: no clobber */ );␊ |
245 | }␊ |
246 | ␊ |
247 | /* Returns the number of times the loop ran before the PIT2 signaled */␊ |
248 | static inline unsigned long poll_PIT2_gate(void)␊ |
249 | {␊ |
250 | unsigned long count = 0;␊ |
251 | unsigned char nmi_sc_val;␊ |
252 | do {␊ |
253 | ++count;␊ |
254 | __asm__ volatile(␊ |
255 | ␉␉␉␉␉␉ "inb␉$0x61,%0"␊ |
256 | ␉␉␉␉␉␉ : "=a"(nmi_sc_val) /*:*/ /* no input */ /*:*/ /* no clobber */);␊ |
257 | } while( (nmi_sc_val & 0x20) == 0);␊ |
258 | return count;␊ |
259 | }␊ |
260 | ␊ |
261 | inline static void␊ |
262 | set_PIT2(int value)␊ |
263 | {␊ |
264 | /*␊ |
265 | * First, tell the clock we are going to write 16 bits to the counter␊ |
266 | * and enable one-shot mode (command 0xB8 to port 0x43)␊ |
267 | * Then write the two bytes into the PIT2 clock register (port 0x42).␊ |
268 | * Loop until the value is "realized" in the clock,␊ |
269 | * this happens on the next tick.␊ |
270 | */␊ |
271 | asm volatile(␊ |
272 | " movb $0xB8,%%al \n\t"␊ |
273 | " outb %%al,$0x43 \n\t"␊ |
274 | " movb %%dl,%%al \n\t"␊ |
275 | " outb %%al,$0x42 \n\t"␊ |
276 | " movb %%dh,%%al \n\t"␊ |
277 | " outb %%al,$0x42 \n"␊ |
278 | "1: inb $0x42,%%al \n\t" ␊ |
279 | " inb $0x42,%%al \n\t"␊ |
280 | " cmp %%al,%%dh \n\t"␊ |
281 | " jne 1b"␊ |
282 | : : "d"(value) : "%al");␊ |
283 | }␊ |
284 | ␊ |
285 | ␊ |
286 | inline static uint64_t␊ |
287 | get_PIT2(unsigned int *value)␊ |
288 | {␊ |
289 | register uint64_t result;␊ |
290 | /*␊ |
291 | * This routine first latches the time (command 0x80 to port 0x43),␊ |
292 | * then gets the time stamp so we know how long the read will take later.␊ |
293 | * Read (from port 0x42) and return the current value of the timer.␊ |
294 | */␊ |
295 | #ifdef __i386__␊ |
296 | asm volatile(␊ |
297 | " xorl %%ecx,%%ecx \n\t"␊ |
298 | " movb $0x80,%%al \n\t"␊ |
299 | " outb %%al,$0x43 \n\t"␊ |
300 | " rdtsc \n\t"␊ |
301 | " pushl %%eax \n\t"␊ |
302 | " inb $0x42,%%al \n\t"␊ |
303 | " movb %%al,%%cl \n\t"␊ |
304 | " inb $0x42,%%al \n\t"␊ |
305 | " movb %%al,%%ch \n\t"␊ |
306 | " popl %%eax "␊ |
307 | : "=A"(result), "=c"(*value));␊ |
308 | #else /* __x86_64__ */␊ |
309 | asm volatile(␊ |
310 | ␉␉" xorq %%rcx,%%rcx \n\t"␊ |
311 | ␉␉" movb $0x80,%%al \n\t"␊ |
312 | ␉␉" outb %%al,$0x43 \n\t"␊ |
313 | ␉␉" rdtsc \n\t"␊ |
314 | ␉␉" pushq %%rax \n\t"␊ |
315 | ␉␉" inb $0x42,%%al \n\t"␊ |
316 | ␉␉" movb %%al,%%cl \n\t"␊ |
317 | ␉␉" inb $0x42,%%al \n\t"␊ |
318 | ␉␉" movb %%al,%%ch \n\t"␊ |
319 | ␉␉" popq %%rax "␊ |
320 | ␉␉: "=A"(result), "=c"(*value));␊ |
321 | #endif␊ |
322 | ␊ |
323 | return result;␊ |
324 | }␊ |
325 | ␊ |
326 | #endif /* !__LIBSAIO_CPU_H */␊ |
327 | |