time.c 29.9 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
/*
 * Common time routines among all ppc machines.
 *
 * Written by Cort Dougan (cort@cs.nmt.edu) to merge
 * Paul Mackerras' version and mine for PReP and Pmac.
 * MPC8xx/MBX changes by Dan Malek (dmalek@jlc.net).
 * Converted for 64-bit by Mike Corrigan (mikejc@us.ibm.com)
 *
 * First round of bugfixes by Gabriel Paubert (paubert@iram.es)
 * to make clock more stable (2.4.0-test5). The only thing
 * that this code assumes is that the timebases have been synchronized
 * by firmware on SMP and are never stopped (never do sleep
 * on SMP then, nap and doze are OK).
 * 
 * Speeded up do_gettimeofday by getting rid of references to
 * xtime (which required locks for consistency). (mikejc@us.ibm.com)
 *
 * TODO (not necessarily in this file):
 * - improve precision and reproducibility of timebase frequency
 * measurement at boot time. (for iSeries, we calibrate the timebase
 * against the Titan chip's clock.)
 * - for astronomical applications: add a new function to get
 * non ambiguous timestamps even around leap seconds. This needs
 * a new timestamp format and a good name.
 *
 * 1997-09-10  Updated NTP code according to technical memorandum Jan '96
 *             "A Kernel Model for Precision Timekeeping" by Dave Mills
 *
 *      This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

#include <linux/errno.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/param.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/timex.h>
#include <linux/kernel_stat.h>
#include <linux/time.h>
#include <linux/init.h>
#include <linux/profile.h>
#include <linux/cpu.h>
#include <linux/security.h>
50
51
#include <linux/percpu.h>
#include <linux/rtc.h>
52
#include <linux/jiffies.h>
53
#include <linux/posix-timers.h>
54
#include <linux/irq.h>
55
#include <linux/delay.h>
56
#include <linux/irq_work.h>
57
#include <asm/trace.h>
Linus Torvalds's avatar
Linus Torvalds committed
58
59
60
61
62
63
64
65
66

#include <asm/io.h>
#include <asm/processor.h>
#include <asm/nvram.h>
#include <asm/cache.h>
#include <asm/machdep.h>
#include <asm/uaccess.h>
#include <asm/time.h>
#include <asm/prom.h>
67
68
#include <asm/irq.h>
#include <asm/div64.h>
69
#include <asm/smp.h>
70
#include <asm/vdso_datapage.h>
71
#include <asm/firmware.h>
72
#include <asm/cputime.h>
73
#ifdef CONFIG_PPC_ISERIES
74
#include <asm/iseries/it_lp_queue.h>
75
#include <asm/iseries/hv_call_xm.h>
76
#endif
Linus Torvalds's avatar
Linus Torvalds committed
77

78
79
/* powerpc clocksource/clockevent code */

80
#include <linux/clockchips.h>
81
82
#include <linux/clocksource.h>

83
static cycle_t rtc_read(struct clocksource *);
84
85
86
87
88
89
90
91
92
93
static struct clocksource clocksource_rtc = {
	.name         = "rtc",
	.rating       = 400,
	.flags        = CLOCK_SOURCE_IS_CONTINUOUS,
	.mask         = CLOCKSOURCE_MASK(64),
	.shift        = 22,
	.mult         = 0,	/* To be filled in */
	.read         = rtc_read,
};

94
static cycle_t timebase_read(struct clocksource *);
95
96
97
98
99
100
101
102
103
104
static struct clocksource clocksource_timebase = {
	.name         = "timebase",
	.rating       = 400,
	.flags        = CLOCK_SOURCE_IS_CONTINUOUS,
	.mask         = CLOCKSOURCE_MASK(64),
	.shift        = 22,
	.mult         = 0,	/* To be filled in */
	.read         = timebase_read,
};

105
106
107
108
109
110
111
112
113
114
#define DECREMENTER_MAX	0x7fffffff

static int decrementer_set_next_event(unsigned long evt,
				      struct clock_event_device *dev);
static void decrementer_set_mode(enum clock_event_mode mode,
				 struct clock_event_device *dev);

static struct clock_event_device decrementer_clockevent = {
       .name           = "decrementer",
       .rating         = 200,
115
       .shift          = 0,	/* To be filled in */
116
117
118
119
120
121
122
       .mult           = 0,	/* To be filled in */
       .irq            = 0,
       .set_next_event = decrementer_set_next_event,
       .set_mode       = decrementer_set_mode,
       .features       = CLOCK_EVT_FEAT_ONESHOT,
};

123
124
125
126
127
128
struct decrementer_clock {
	struct clock_event_device event;
	u64 next_tb;
};

static DEFINE_PER_CPU(struct decrementer_clock, decrementers);
129

Linus Torvalds's avatar
Linus Torvalds committed
130
#ifdef CONFIG_PPC_ISERIES
131
132
static unsigned long __initdata iSeries_recal_titan;
static signed long __initdata iSeries_recal_tb;
133
134

/* Forward declaration is only needed for iSereis compiles */
135
static void __init clocksource_init(void);
Linus Torvalds's avatar
Linus Torvalds committed
136
137
138
139
#endif

#define XSEC_PER_SEC (1024*1024)

140
141
142
143
144
145
146
#ifdef CONFIG_PPC64
#define SCALE_XSEC(xsec, max)	(((xsec) * max) / XSEC_PER_SEC)
#else
/* compute ((xsec << 12) * max) >> 32 */
#define SCALE_XSEC(xsec, max)	mulhwu((xsec) << 12, max)
#endif

Linus Torvalds's avatar
Linus Torvalds committed
147
148
149
150
unsigned long tb_ticks_per_jiffy;
unsigned long tb_ticks_per_usec = 100; /* sane default */
EXPORT_SYMBOL(tb_ticks_per_usec);
unsigned long tb_ticks_per_sec;
151
EXPORT_SYMBOL(tb_ticks_per_sec);	/* for cputime_t conversions */
152

Linus Torvalds's avatar
Linus Torvalds committed
153
DEFINE_SPINLOCK(rtc_lock);
154
EXPORT_SYMBOL_GPL(rtc_lock);
Linus Torvalds's avatar
Linus Torvalds committed
155

156
157
static u64 tb_to_ns_scale __read_mostly;
static unsigned tb_to_ns_shift __read_mostly;
158
static u64 boot_tb __read_mostly;
Linus Torvalds's avatar
Linus Torvalds committed
159
160

extern struct timezone sys_tz;
161
static long timezone_offset;
Linus Torvalds's avatar
Linus Torvalds committed
162

163
unsigned long ppc_proc_freq;
164
EXPORT_SYMBOL_GPL(ppc_proc_freq);
165
unsigned long ppc_tb_freq;
166
EXPORT_SYMBOL_GPL(ppc_tb_freq);
167

168
169
170
171
172
173
174
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
/*
 * Factors for converting from cputime_t (timebase ticks) to
 * jiffies, milliseconds, seconds, and clock_t (1/USER_HZ seconds).
 * These are all stored as 0.64 fixed-point binary fractions.
 */
u64 __cputime_jiffies_factor;
175
EXPORT_SYMBOL(__cputime_jiffies_factor);
176
u64 __cputime_msec_factor;
177
EXPORT_SYMBOL(__cputime_msec_factor);
178
u64 __cputime_sec_factor;
179
EXPORT_SYMBOL(__cputime_sec_factor);
180
u64 __cputime_clockt_factor;
181
EXPORT_SYMBOL(__cputime_clockt_factor);
182
183
DEFINE_PER_CPU(unsigned long, cputime_last_delta);
DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta);
184

185
186
cputime_t cputime_one_jiffy;

187
188
void (*dtl_consumer)(struct dtl_entry *, u64);

189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
static void calc_cputime_factors(void)
{
	struct div_result res;

	div128_by_32(HZ, 0, tb_ticks_per_sec, &res);
	__cputime_jiffies_factor = res.result_low;
	div128_by_32(1000, 0, tb_ticks_per_sec, &res);
	__cputime_msec_factor = res.result_low;
	div128_by_32(1, 0, tb_ticks_per_sec, &res);
	__cputime_sec_factor = res.result_low;
	div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res);
	__cputime_clockt_factor = res.result_low;
}

/*
204
205
 * Read the SPURR on systems that have it, otherwise the PURR,
 * or if that doesn't exist return the timebase value passed in.
206
 */
207
static u64 read_spurr(u64 tb)
208
{
209
210
	if (cpu_has_feature(CPU_FTR_SPURR))
		return mfspr(SPRN_SPURR);
211
212
	if (cpu_has_feature(CPU_FTR_PURR))
		return mfspr(SPRN_PURR);
213
	return tb;
214
215
}

216
217
#ifdef CONFIG_PPC_SPLPAR

218
/*
219
220
 * Scan the dispatch trace log and count up the stolen time.
 * Should be called with interrupts disabled.
221
 */
222
static u64 scan_dispatch_log(u64 stop_tb)
223
{
224
	u64 i = local_paca->dtl_ridx;
225
226
227
228
229
230
231
232
233
234
	struct dtl_entry *dtl = local_paca->dtl_curr;
	struct dtl_entry *dtl_end = local_paca->dispatch_log_end;
	struct lppaca *vpa = local_paca->lppaca_ptr;
	u64 tb_delta;
	u64 stolen = 0;
	u64 dtb;

	if (i == vpa->dtl_idx)
		return 0;
	while (i < vpa->dtl_idx) {
235
236
		if (dtl_consumer)
			dtl_consumer(dtl, i);
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
		dtb = dtl->timebase;
		tb_delta = dtl->enqueue_to_dispatch_time +
			dtl->ready_to_enqueue_time;
		barrier();
		if (i + N_DISPATCH_LOG < vpa->dtl_idx) {
			/* buffer has overflowed */
			i = vpa->dtl_idx - N_DISPATCH_LOG;
			dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
			continue;
		}
		if (dtb > stop_tb)
			break;
		stolen += tb_delta;
		++i;
		++dtl;
		if (dtl == dtl_end)
			dtl = local_paca->dispatch_log;
	}
	local_paca->dtl_ridx = i;
	local_paca->dtl_curr = dtl;
	return stolen;
258
259
}

260
261
262
263
264
265
266
267
/*
 * Accumulate stolen time by scanning the dispatch trace log.
 * Called on entry from user mode.
 */
void accumulate_stolen_time(void)
{
	u64 sst, ust;

268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
	u8 save_soft_enabled = local_paca->soft_enabled;
	u8 save_hard_enabled = local_paca->hard_enabled;

	/* We are called early in the exception entry, before
	 * soft/hard_enabled are sync'ed to the expected state
	 * for the exception. We are hard disabled but the PACA
	 * needs to reflect that so various debug stuff doesn't
	 * complain
	 */
	local_paca->soft_enabled = 0;
	local_paca->hard_enabled = 0;

	sst = scan_dispatch_log(local_paca->starttime_user);
	ust = scan_dispatch_log(local_paca->starttime);
	local_paca->system_time -= sst;
	local_paca->user_time -= ust;
	local_paca->stolen_time += ust + sst;

	local_paca->soft_enabled = save_soft_enabled;
	local_paca->hard_enabled = save_hard_enabled;
288
289
290
291
292
293
294
295
296
297
298
299
300
301
}

static inline u64 calculate_stolen_time(u64 stop_tb)
{
	u64 stolen = 0;

	if (get_paca()->dtl_ridx != get_paca()->lppaca_ptr->dtl_idx) {
		stolen = scan_dispatch_log(stop_tb);
		get_paca()->system_time -= stolen;
	}

	stolen += get_paca()->stolen_time;
	get_paca()->stolen_time = 0;
	return stolen;
302
303
}

304
305
306
307
308
309
310
311
#else /* CONFIG_PPC_SPLPAR */
static inline u64 calculate_stolen_time(u64 stop_tb)
{
	return 0;
}

#endif /* CONFIG_PPC_SPLPAR */

312
313
314
315
316
317
/*
 * Account time for a transition between system, hard irq
 * or soft irq state.
 */
void account_system_vtime(struct task_struct *tsk)
{
318
	u64 now, nowscaled, delta, deltascaled;
319
	unsigned long flags;
320
	u64 stolen, udelta, sys_scaled, user_scaled;
321
322

	local_irq_save(flags);
323
	now = mftb();
324
	nowscaled = read_spurr(now);
325
326
	get_paca()->system_time += now - get_paca()->starttime;
	get_paca()->starttime = now;
327
328
	deltascaled = nowscaled - get_paca()->startspurr;
	get_paca()->startspurr = nowscaled;
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364

	stolen = calculate_stolen_time(now);

	delta = get_paca()->system_time;
	get_paca()->system_time = 0;
	udelta = get_paca()->user_time - get_paca()->utime_sspurr;
	get_paca()->utime_sspurr = get_paca()->user_time;

	/*
	 * Because we don't read the SPURR on every kernel entry/exit,
	 * deltascaled includes both user and system SPURR ticks.
	 * Apportion these ticks to system SPURR ticks and user
	 * SPURR ticks in the same ratio as the system time (delta)
	 * and user time (udelta) values obtained from the timebase
	 * over the same interval.  The system ticks get accounted here;
	 * the user ticks get saved up in paca->user_time_scaled to be
	 * used by account_process_tick.
	 */
	sys_scaled = delta;
	user_scaled = udelta;
	if (deltascaled != delta + udelta) {
		if (udelta) {
			sys_scaled = deltascaled * delta / (delta + udelta);
			user_scaled = deltascaled - sys_scaled;
		} else {
			sys_scaled = deltascaled;
		}
	}
	get_paca()->user_time_scaled += user_scaled;

	if (in_irq() || idle_task(smp_processor_id()) != tsk) {
		account_system_time(tsk, 0, delta, sys_scaled);
		if (stolen)
			account_steal_time(stolen);
	} else {
		account_idle_time(delta + stolen);
365
366
367
	}
	local_irq_restore(flags);
}
Alexander Graf's avatar
Alexander Graf committed
368
EXPORT_SYMBOL_GPL(account_system_vtime);
369
370
371
372
373
374

/*
 * Transfer the user and system times accumulated in the paca
 * by the exception entry and exit code to the generic process
 * user and system time records.
 * Must be called with interrupts disabled.
375
376
377
 * Assumes that account_system_vtime() has been called recently
 * (i.e. since the last entry from usermode) so that
 * get_paca()->user_time_scaled is up to date.
378
 */
379
void account_process_tick(struct task_struct *tsk, int user_tick)
380
{
381
	cputime_t utime, utimescaled;
382
383

	utime = get_paca()->user_time;
384
	utimescaled = get_paca()->user_time_scaled;
385
	get_paca()->user_time = 0;
386
387
	get_paca()->user_time_scaled = 0;
	get_paca()->utime_sspurr = 0;
388
	account_user_time(tsk, utime, utimescaled);
389
390
391
392
393
394
}

#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
#define calc_cputime_factors()
#endif

395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
void __delay(unsigned long loops)
{
	unsigned long start;
	int diff;

	if (__USE_RTC()) {
		start = get_rtcl();
		do {
			/* the RTCL register wraps at 1000000000 */
			diff = get_rtcl() - start;
			if (diff < 0)
				diff += 1000000000;
		} while (diff < loops);
	} else {
		start = get_tbl();
		while (get_tbl() - start < loops)
			HMT_low();
		HMT_medium();
	}
}
EXPORT_SYMBOL(__delay);

void udelay(unsigned long usecs)
{
	__delay(tb_ticks_per_usec * usecs);
}
EXPORT_SYMBOL(udelay);

Linus Torvalds's avatar
Linus Torvalds committed
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
#ifdef CONFIG_SMP
unsigned long profile_pc(struct pt_regs *regs)
{
	unsigned long pc = instruction_pointer(regs);

	if (in_lock_functions(pc))
		return regs->link;

	return pc;
}
EXPORT_SYMBOL(profile_pc);
#endif

#ifdef CONFIG_PPC_ISERIES

/* 
 * This function recalibrates the timebase based on the 49-bit time-of-day
 * value in the Titan chip.  The Titan is much more accurate than the value
 * returned by the service processor for the timebase frequency.  
 */

444
static int __init iSeries_tb_recal(void)
Linus Torvalds's avatar
Linus Torvalds committed
445
446
{
	unsigned long titan, tb;
447
448
449
450
451

	/* Make sure we only run on iSeries */
	if (!firmware_has_feature(FW_FEATURE_ISERIES))
		return -ENODEV;

Linus Torvalds's avatar
Linus Torvalds committed
452
453
454
455
456
457
	tb = get_tb();
	titan = HvCallXm_loadTod();
	if ( iSeries_recal_titan ) {
		unsigned long tb_ticks = tb - iSeries_recal_tb;
		unsigned long titan_usec = (titan - iSeries_recal_titan) >> 12;
		unsigned long new_tb_ticks_per_sec   = (tb_ticks * USEC_PER_SEC)/titan_usec;
458
459
		unsigned long new_tb_ticks_per_jiffy =
			DIV_ROUND_CLOSEST(new_tb_ticks_per_sec, HZ);
Linus Torvalds's avatar
Linus Torvalds committed
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
		long tick_diff = new_tb_ticks_per_jiffy - tb_ticks_per_jiffy;
		char sign = '+';		
		/* make sure tb_ticks_per_sec and tb_ticks_per_jiffy are consistent */
		new_tb_ticks_per_sec = new_tb_ticks_per_jiffy * HZ;

		if ( tick_diff < 0 ) {
			tick_diff = -tick_diff;
			sign = '-';
		}
		if ( tick_diff ) {
			if ( tick_diff < tb_ticks_per_jiffy/25 ) {
				printk( "Titan recalibrate: new tb_ticks_per_jiffy = %lu (%c%ld)\n",
						new_tb_ticks_per_jiffy, sign, tick_diff );
				tb_ticks_per_jiffy = new_tb_ticks_per_jiffy;
				tb_ticks_per_sec   = new_tb_ticks_per_sec;
475
				calc_cputime_factors();
476
				vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
477
				setup_cputime_one_jiffy();
Linus Torvalds's avatar
Linus Torvalds committed
478
479
480
481
482
483
484
485
486
487
488
			}
			else {
				printk( "Titan recalibrate: FAILED (difference > 4 percent)\n"
					"                   new tb_ticks_per_jiffy = %lu\n"
					"                   old tb_ticks_per_jiffy = %lu\n",
					new_tb_ticks_per_jiffy, tb_ticks_per_jiffy );
			}
		}
	}
	iSeries_recal_titan = titan;
	iSeries_recal_tb = tb;
489

490
491
	/* Called here as now we know accurate values for the timebase */
	clocksource_init();
492
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
493
}
494
495
496
497
498
499
500
501
502
late_initcall(iSeries_tb_recal);

/* Called from platform early init */
void __init iSeries_time_init_early(void)
{
	iSeries_recal_tb = get_tb();
	iSeries_recal_titan = HvCallXm_loadTod();
}
#endif /* CONFIG_PPC_ISERIES */
Linus Torvalds's avatar
Linus Torvalds committed
503

504
#ifdef CONFIG_IRQ_WORK
505

506
507
508
509
/*
 * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
 */
#ifdef CONFIG_PPC64
510
static inline unsigned long test_irq_work_pending(void)
511
{
512
513
514
515
	unsigned long x;

	asm volatile("lbz %0,%1(13)"
		: "=r" (x)
516
		: "i" (offsetof(struct paca_struct, irq_work_pending)));
517
518
519
	return x;
}

520
static inline void set_irq_work_pending_flag(void)
521
522
523
{
	asm volatile("stb %0,%1(13)" : :
		"r" (1),
524
		"i" (offsetof(struct paca_struct, irq_work_pending)));
525
526
}

527
static inline void clear_irq_work_pending(void)
528
529
530
{
	asm volatile("stb %0,%1(13)" : :
		"r" (0),
531
		"i" (offsetof(struct paca_struct, irq_work_pending)));
532
533
}

534
535
#else /* 32-bit */

536
DEFINE_PER_CPU(u8, irq_work_pending);
537

538
539
540
#define set_irq_work_pending_flag()	__get_cpu_var(irq_work_pending) = 1
#define test_irq_work_pending()		__get_cpu_var(irq_work_pending)
#define clear_irq_work_pending()	__get_cpu_var(irq_work_pending) = 0
541

542
543
#endif /* 32 vs 64 bit */

544
void set_irq_work_pending(void)
545
546
{
	preempt_disable();
547
	set_irq_work_pending_flag();
548
549
550
551
	set_dec(1);
	preempt_enable();
}

552
#else  /* CONFIG_IRQ_WORK */
553

554
555
#define test_irq_work_pending()	0
#define clear_irq_work_pending()
556

557
#endif /* CONFIG_IRQ_WORK */
558

Linus Torvalds's avatar
Linus Torvalds committed
559
560
561
562
563
564
565
566
567
568
569
570
571
572
/*
 * For iSeries shared processors, we have to let the hypervisor
 * set the hardware decrementer.  We set a virtual decrementer
 * in the lppaca and call the hypervisor if the virtual
 * decrementer is less than the current value in the hardware
 * decrementer. (almost always the new decrementer value will
 * be greater than the current hardware decementer so the hypervisor
 * call will not be needed)
 */

/*
 * timer_interrupt - gets called when the decrementer overflows,
 * with interrupts disabled.
 */
573
void timer_interrupt(struct pt_regs * regs)
Linus Torvalds's avatar
Linus Torvalds committed
574
{
575
	struct pt_regs *old_regs;
576
577
	struct decrementer_clock *decrementer =  &__get_cpu_var(decrementers);
	struct clock_event_device *evt = &decrementer->event;
578
	u64 now;
579

580
581
	trace_timer_interrupt_entry(regs);

582
583
	__get_cpu_var(irq_stat).timer_irqs++;

584
585
586
	/* Ensure a positive value is written to the decrementer, or else
	 * some CPUs will continuue to take decrementer exceptions */
	set_dec(DECREMENTER_MAX);
587

588
#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC)
589
590
591
	if (atomic_read(&ppc_n_lost_interrupts) != 0)
		do_IRQ(regs);
#endif
Linus Torvalds's avatar
Linus Torvalds committed
592

593
	old_regs = set_irq_regs(regs);
Linus Torvalds's avatar
Linus Torvalds committed
594
595
	irq_enter();

596
597
598
	if (test_irq_work_pending()) {
		clear_irq_work_pending();
		irq_work_run();
599
600
	}

601
#ifdef CONFIG_PPC_ISERIES
602
603
	if (firmware_has_feature(FW_FEATURE_ISERIES))
		get_lppaca()->int_dword.fields.decr_int = 0;
604
605
#endif

606
607
608
609
610
611
612
613
614
615
	now = get_tb_or_rtc();
	if (now >= decrementer->next_tb) {
		decrementer->next_tb = ~(u64)0;
		if (evt->event_handler)
			evt->event_handler(evt);
	} else {
		now = decrementer->next_tb - now;
		if (now <= DECREMENTER_MAX)
			set_dec((int)now);
	}
Linus Torvalds's avatar
Linus Torvalds committed
616
617

#ifdef CONFIG_PPC_ISERIES
618
	if (firmware_has_feature(FW_FEATURE_ISERIES) && hvlpevent_is_pending())
619
		process_hvlpevents();
Linus Torvalds's avatar
Linus Torvalds committed
620
621
#endif

622
#ifdef CONFIG_PPC64
623
	/* collect purr register values often, for accurate calculations */
624
	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
Linus Torvalds's avatar
Linus Torvalds committed
625
626
627
		struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
		cu->current_tb = mfspr(SPRN_PURR);
	}
628
#endif
Linus Torvalds's avatar
Linus Torvalds committed
629
630

	irq_exit();
631
	set_irq_regs(old_regs);
632
633

	trace_timer_interrupt_exit(regs);
Linus Torvalds's avatar
Linus Torvalds committed
634
635
}

636
#ifdef CONFIG_SUSPEND
637
static void generic_suspend_disable_irqs(void)
638
639
640
641
642
643
644
645
646
647
{
	/* Disable the decrementer, so that it doesn't interfere
	 * with suspending.
	 */

	set_dec(0x7fffffff);
	local_irq_disable();
	set_dec(0x7fffffff);
}

648
static void generic_suspend_enable_irqs(void)
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
{
	local_irq_enable();
}

/* Overrides the weak version in kernel/power/main.c */
void arch_suspend_disable_irqs(void)
{
	if (ppc_md.suspend_disable_irqs)
		ppc_md.suspend_disable_irqs();
	generic_suspend_disable_irqs();
}

/* Overrides the weak version in kernel/power/main.c */
void arch_suspend_enable_irqs(void)
{
	generic_suspend_enable_irqs();
	if (ppc_md.suspend_enable_irqs)
		ppc_md.suspend_enable_irqs();
}
#endif

Linus Torvalds's avatar
Linus Torvalds committed
670
671
672
673
674
675
676
677
678
/*
 * Scheduler clock - returns current time in nanosec units.
 *
 * Note: mulhdu(a, b) (multiply high double unsigned) returns
 * the high 64 bits of a * b, i.e. (a * b) >> 64, where a and b
 * are 64-bit unsigned numbers.
 */
unsigned long long sched_clock(void)
{
679
680
	if (__USE_RTC())
		return get_rtc();
681
	return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
Linus Torvalds's avatar
Linus Torvalds committed
682
683
}

684
static int __init get_freq(char *name, int cells, unsigned long *val)
685
686
{
	struct device_node *cpu;
687
	const unsigned int *fp;
688
	int found = 0;
689

690
	/* The cpu node should have timebase and clock frequency properties */
691
692
	cpu = of_find_node_by_type(NULL, "cpu");

693
	if (cpu) {
694
		fp = of_get_property(cpu, name, NULL);
695
		if (fp) {
696
			found = 1;
697
			*val = of_read_ulong(fp, cells);
698
		}
699
700

		of_node_put(cpu);
701
	}
702
703
704
705

	return found;
}

706
707
708
709
710
711
712
713
714
715
716
717
/* should become __cpuinit when secondary_cpu_time_init also is */
void start_cpu_decrementer(void)
{
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
	/* Clear any pending timer interrupts */
	mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS);

	/* Enable decrementer interrupt */
	mtspr(SPRN_TCR, TCR_DIE);
#endif /* defined(CONFIG_BOOKE) || defined(CONFIG_40x) */
}

718
719
720
721
722
723
724
void __init generic_calibrate_decr(void)
{
	ppc_tb_freq = DEFAULT_TB_FREQ;		/* hardcoded default */

	if (!get_freq("ibm,extended-timebase-frequency", 2, &ppc_tb_freq) &&
	    !get_freq("timebase-frequency", 1, &ppc_tb_freq)) {

725
726
		printk(KERN_ERR "WARNING: Estimating decrementer frequency "
				"(not found)\n");
727
	}
728

729
730
731
732
733
734
735
	ppc_proc_freq = DEFAULT_PROC_FREQ;	/* hardcoded default */

	if (!get_freq("ibm,extended-clock-frequency", 2, &ppc_proc_freq) &&
	    !get_freq("clock-frequency", 1, &ppc_proc_freq)) {

		printk(KERN_ERR "WARNING: Estimating processor frequency "
				"(not found)\n");
736
737
738
	}
}

739
int update_persistent_clock(struct timespec now)
740
741
742
{
	struct rtc_time tm;

743
744
745
746
747
748
749
750
751
752
	if (!ppc_md.set_rtc_time)
		return 0;

	to_tm(now.tv_sec + 1 + timezone_offset, &tm);
	tm.tm_year -= 1900;
	tm.tm_mon -= 1;

	return ppc_md.set_rtc_time(&tm);
}

753
static void __read_persistent_clock(struct timespec *ts)
754
755
756
757
{
	struct rtc_time tm;
	static int first = 1;

758
	ts->tv_nsec = 0;
759
760
761
762
763
764
765
	/* XXX this is a litle fragile but will work okay in the short term */
	if (first) {
		first = 0;
		if (ppc_md.time_init)
			timezone_offset = ppc_md.time_init();

		/* get_boot_time() isn't guaranteed to be safe to call late */
766
767
768
769
770
771
772
773
		if (ppc_md.get_boot_time) {
			ts->tv_sec = ppc_md.get_boot_time() - timezone_offset;
			return;
		}
	}
	if (!ppc_md.get_rtc_time) {
		ts->tv_sec = 0;
		return;
774
	}
775
	ppc_md.get_rtc_time(&tm);
776

777
778
	ts->tv_sec = mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday,
			    tm.tm_hour, tm.tm_min, tm.tm_sec);
779
780
}

781
782
783
784
785
786
787
788
789
790
791
792
void read_persistent_clock(struct timespec *ts)
{
	__read_persistent_clock(ts);

	/* Sanitize it in case real time clock is set below EPOCH */
	if (ts->tv_sec < 0) {
		ts->tv_sec = 0;
		ts->tv_nsec = 0;
	}
		
}

793
/* clocksource code */
794
static cycle_t rtc_read(struct clocksource *cs)
795
796
797
798
{
	return (cycle_t)get_rtc();
}

799
static cycle_t timebase_read(struct clocksource *cs)
800
801
802
803
{
	return (cycle_t)get_tb();
}

804
805
void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
			struct clocksource *clock, u32 mult)
806
{
807
	u64 new_tb_to_xs, new_stamp_xsec;
808
	u32 frac_sec;
809
810
811
812
813
814
815
816
817
818

	if (clock != &clocksource_timebase)
		return;

	/* Make userspace gettimeofday spin until we're done. */
	++vdso_data->tb_update_count;
	smp_mb();

	/* XXX this assumes clock->shift == 22 */
	/* 4611686018 ~= 2^(20+64-22) / 1e9 */
819
	new_tb_to_xs = (u64) mult * 4611686018ULL;
John Stultz's avatar
John Stultz committed
820
	new_stamp_xsec = (u64) wall_time->tv_nsec * XSEC_PER_SEC;
821
	do_div(new_stamp_xsec, 1000000000);
John Stultz's avatar
John Stultz committed
822
	new_stamp_xsec += (u64) wall_time->tv_sec * XSEC_PER_SEC;
823

824
825
826
827
	BUG_ON(wall_time->tv_nsec >= NSEC_PER_SEC);
	/* this is tv_nsec / 1e9 as a 0.32 fraction */
	frac_sec = ((u64) wall_time->tv_nsec * 18446744073ULL) >> 32;

828
829
830
831
832
833
834
835
836
837
838
839
840
841
	/*
	 * tb_update_count is used to allow the userspace gettimeofday code
	 * to assure itself that it sees a consistent view of the tb_to_xs and
	 * stamp_xsec variables.  It reads the tb_update_count, then reads
	 * tb_to_xs and stamp_xsec and then reads tb_update_count again.  If
	 * the two values of tb_update_count match and are even then the
	 * tb_to_xs and stamp_xsec values are consistent.  If not, then it
	 * loops back and reads them again until this criteria is met.
	 * We expect the caller to have done the first increment of
	 * vdso_data->tb_update_count already.
	 */
	vdso_data->tb_orig_stamp = clock->cycle_last;
	vdso_data->stamp_xsec = new_stamp_xsec;
	vdso_data->tb_to_xs = new_tb_to_xs;
842
843
	vdso_data->wtom_clock_sec = wtm->tv_sec;
	vdso_data->wtom_clock_nsec = wtm->tv_nsec;
John Stultz's avatar
John Stultz committed
844
	vdso_data->stamp_xtime = *wall_time;
845
	vdso_data->stamp_sec_fraction = frac_sec;
846
847
	smp_wmb();
	++(vdso_data->tb_update_count);
848
849
850
851
852
853
854
855
856
857
858
859
860
}

void update_vsyscall_tz(void)
{
	/* Make userspace gettimeofday spin until we're done. */
	++vdso_data->tb_update_count;
	smp_mb();
	vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
	vdso_data->tz_dsttime = sys_tz.tz_dsttime;
	smp_mb();
	++vdso_data->tb_update_count;
}

861
static void __init clocksource_init(void)
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
{
	struct clocksource *clock;

	if (__USE_RTC())
		clock = &clocksource_rtc;
	else
		clock = &clocksource_timebase;

	clock->mult = clocksource_hz2mult(tb_ticks_per_sec, clock->shift);

	if (clocksource_register(clock)) {
		printk(KERN_ERR "clocksource: %s is already registered\n",
		       clock->name);
		return;
	}

	printk(KERN_INFO "clocksource: %s mult[%x] shift[%d] registered\n",
	       clock->name, clock->mult, clock->shift);
}

882
883
884
static int decrementer_set_next_event(unsigned long evt,
				      struct clock_event_device *dev)
{
885
	__get_cpu_var(decrementers).next_tb = get_tb_or_rtc() + evt;
886
887
888
889
890
891
892
893
894
895
896
	set_dec(evt);
	return 0;
}

static void decrementer_set_mode(enum clock_event_mode mode,
				 struct clock_event_device *dev)
{
	if (mode != CLOCK_EVT_MODE_ONESHOT)
		decrementer_set_next_event(DECREMENTER_MAX, dev);
}

897
898
899
900
901
902
903
904
905
static inline uint64_t div_sc64(unsigned long ticks, unsigned long nsec,
				int shift)
{
	uint64_t tmp = ((uint64_t)ticks) << shift;

	do_div(tmp, nsec);
	return tmp;
}

906
907
908
909
910
static void __init setup_clockevent_multiplier(unsigned long hz)
{
	u64 mult, shift = 32;

	while (1) {
911
		mult = div_sc64(hz, NSEC_PER_SEC, shift);
912
913
914
915
916
917
918
919
920
921
		if (mult && (mult >> 32UL) == 0UL)
			break;

		shift--;
	}

	decrementer_clockevent.shift = shift;
	decrementer_clockevent.mult = mult;
}

922
923
static void register_decrementer_clockevent(int cpu)
{
924
	struct clock_event_device *dec = &per_cpu(decrementers, cpu).event;
925
926

	*dec = decrementer_clockevent;
927
	dec->cpumask = cpumask_of(cpu);
928

929
930
	printk_once(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n",
		    dec->name, dec->mult, dec->shift, cpu);
931
932
933
934

	clockevents_register_device(dec);
}

935
static void __init init_decrementer_clockevent(void)
936
937
938
{
	int cpu = smp_processor_id();

939
	setup_clockevent_multiplier(ppc_tb_freq);
940
941
	decrementer_clockevent.max_delta_ns =
		clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent);
942
943
	decrementer_clockevent.min_delta_ns =
		clockevent_delta2ns(2, &decrementer_clockevent);
944
945
946
947
948
949

	register_decrementer_clockevent(cpu);
}

void secondary_cpu_time_init(void)
{
950
951
952
953
954
	/* Start the decrementer on CPUs that have manual control
	 * such as BookE
	 */
	start_cpu_decrementer();

955
956
957
958
959
	/* FIME: Should make unrelatred change to move snapshot_timebase
	 * call here ! */
	register_decrementer_clockevent(smp_processor_id());
}

960
/* This function is only called on the boot processor */
Linus Torvalds's avatar
Linus Torvalds committed
961
962
963
void __init time_init(void)
{
	struct div_result res;
964
	u64 scale;
965
966
	unsigned shift;

967
968
969
970
971
972
	if (__USE_RTC()) {
		/* 601 processor: dec counts down by 128 every 128ns */
		ppc_tb_freq = 1000000000;
	} else {
		/* Normal PowerPC with timebase register */
		ppc_md.calibrate_decr();
973
		printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n",
974
		       ppc_tb_freq / 1000000, ppc_tb_freq % 1000000);
975
		printk(KERN_DEBUG "time_init: processor frequency   = %lu.%.6lu MHz\n",
976
977
		       ppc_proc_freq / 1000000, ppc_proc_freq % 1000000);
	}
978
979

	tb_ticks_per_jiffy = ppc_tb_freq / HZ;
980
	tb_ticks_per_sec = ppc_tb_freq;
981
	tb_ticks_per_usec = ppc_tb_freq / 1000000;
982
	calc_cputime_factors();
983
	setup_cputime_one_jiffy();
984

Linus Torvalds's avatar
Linus Torvalds committed
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
	/*
	 * Compute scale factor for sched_clock.
	 * The calibrate_decr() function has set tb_ticks_per_sec,
	 * which is the timebase frequency.
	 * We compute 1e9 * 2^64 / tb_ticks_per_sec and interpret
	 * the 128-bit result as a 64.64 fixed-point number.
	 * We then shift that number right until it is less than 1.0,
	 * giving us the scale factor and shift count to use in
	 * sched_clock().
	 */
	div128_by_32(1000000000, 0, tb_ticks_per_sec, &res);
	scale = res.result_low;
	for (shift = 0; res.result_high != 0; ++shift) {
		scale = (scale >> 1) | (res.result_high << 63);
		res.result_high >>= 1;
	}
For faster browsing, not all history is shown. View entire blame