tlb_uv.c 55.5 KB
Newer Older
1
2
3
/*
 *	SGI UltraViolet TLB flush routines.
 *
4
 *	(c) 2008-2012 Cliff Wickman <cpw@sgi.com>, SGI.
5
6
7
8
 *
 *	This code is released under the GNU General Public License version 2 or
 *	later.
 */
9
#include <linux/seq_file.h>
10
#include <linux/proc_fs.h>
11
#include <linux/debugfs.h>
12
#include <linux/kernel.h>
13
#include <linux/slab.h>
14
#include <linux/delay.h>
15
16

#include <asm/mmu_context.h>
Tejun Heo's avatar
Tejun Heo committed
17
#include <asm/uv/uv.h>
18
#include <asm/uv/uv_mmrs.h>
19
#include <asm/uv/uv_hub.h>
20
#include <asm/uv/uv_bau.h>
Ingo Molnar's avatar
Ingo Molnar committed
21
#include <asm/apic.h>
22
#include <asm/idle.h>
23
#include <asm/tsc.h>
24
#include <asm/irq_vectors.h>
25
#include <asm/timer.h>
26

27
28
29
30
31
32
33
34
35
36
37
/* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */
static int timeout_base_ns[] = {
		20,
		160,
		1280,
		10240,
		81920,
		655360,
		5242880,
		167772160
};
Cliff Wickman's avatar
Cliff Wickman committed
38

39
static int timeout_us;
40
static int nobau;
41
static int nobau_perm;
42
static cycles_t congested_cycles;
43

44
/* tunables: */
Cliff Wickman's avatar
Cliff Wickman committed
45
46
47
48
static int max_concurr		= MAX_BAU_CONCURRENT;
static int max_concurr_const	= MAX_BAU_CONCURRENT;
static int plugged_delay	= PLUGGED_DELAY;
static int plugsb4reset		= PLUGSB4RESET;
49
static int giveup_limit		= GIVEUP_LIMIT;
Cliff Wickman's avatar
Cliff Wickman committed
50
51
52
53
54
static int timeoutsb4reset	= TIMEOUTSB4RESET;
static int ipi_reset_limit	= IPI_RESET_LIMIT;
static int complete_threshold	= COMPLETE_THRESHOLD;
static int congested_respns_us	= CONGESTED_RESPONSE_US;
static int congested_reps	= CONGESTED_REPS;
55
static int disabled_period	= DISABLED_PERIOD;
Cliff Wickman's avatar
Cliff Wickman committed
56
57
58
59
60
61
62
63
64
65

static struct tunables tunables[] = {
	{&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */
	{&plugged_delay, PLUGGED_DELAY},
	{&plugsb4reset, PLUGSB4RESET},
	{&timeoutsb4reset, TIMEOUTSB4RESET},
	{&ipi_reset_limit, IPI_RESET_LIMIT},
	{&complete_threshold, COMPLETE_THRESHOLD},
	{&congested_respns_us, CONGESTED_RESPONSE_US},
	{&congested_reps, CONGESTED_REPS},
66
67
	{&disabled_period, DISABLED_PERIOD},
	{&giveup_limit, GIVEUP_LIMIT}
Cliff Wickman's avatar
Cliff Wickman committed
68
69
};

70
71
static struct dentry *tunables_dir;
static struct dentry *tunables_file;
72

Cliff Wickman's avatar
Cliff Wickman committed
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
/* these correspond to the statistics printed by ptc_seq_show() */
static char *stat_description[] = {
	"sent:     number of shootdown messages sent",
	"stime:    time spent sending messages",
	"numuvhubs: number of hubs targeted with shootdown",
	"numuvhubs16: number times 16 or more hubs targeted",
	"numuvhubs8: number times 8 or more hubs targeted",
	"numuvhubs4: number times 4 or more hubs targeted",
	"numuvhubs2: number times 2 or more hubs targeted",
	"numuvhubs1: number times 1 hub targeted",
	"numcpus:  number of cpus targeted with shootdown",
	"dto:      number of destination timeouts",
	"retries:  destination timeout retries sent",
	"rok:   :  destination timeouts successfully retried",
	"resetp:   ipi-style resource resets for plugs",
	"resett:   ipi-style resource resets for timeouts",
	"giveup:   fall-backs to ipi-style shootdowns",
	"sto:      number of source timeouts",
	"bz:       number of stay-busy's",
	"throt:    number times spun in throttle",
	"swack:   image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE",
	"recv:     shootdown messages received",
	"rtime:    time spent processing messages",
	"all:      shootdown all-tlb messages",
	"one:      shootdown one-tlb messages",
	"mult:     interrupts that found multiple messages",
	"none:     interrupts that found no messages",
	"retry:    number of retry messages processed",
	"canc:     number messages canceled by retries",
	"nocan:    number retries that found nothing to cancel",
	"reset:    number of ipi-style reset requests processed",
	"rcan:     number messages canceled by reset requests",
	"disable:  number times use of the BAU was disabled",
	"enable:   number times use of the BAU was re-enabled"
};

static int __init
setup_nobau(char *arg)
111
112
113
114
115
{
	nobau = 1;
	return 0;
}
early_param("nobau", setup_nobau);
116

117
/* base pnode in this partition */
Cliff Wickman's avatar
Cliff Wickman committed
118
static int uv_base_pnode __read_mostly;
119

120
121
static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
static DEFINE_PER_CPU(struct bau_control, bau_control);
122
123
static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask);

124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
static void
set_bau_on(void)
{
	int cpu;
	struct bau_control *bcp;

	if (nobau_perm) {
		pr_info("BAU not initialized; cannot be turned on\n");
		return;
	}
	nobau = 0;
	for_each_present_cpu(cpu) {
		bcp = &per_cpu(bau_control, cpu);
		bcp->nobau = 0;
	}
	pr_info("BAU turned on\n");
	return;
}

static void
set_bau_off(void)
{
	int cpu;
	struct bau_control *bcp;

	nobau = 1;
	for_each_present_cpu(cpu) {
		bcp = &per_cpu(bau_control, cpu);
		bcp->nobau = 1;
	}
	pr_info("BAU turned off\n");
	return;
}

158
/*
159
160
 * Determine the first node on a uvhub. 'Nodes' are used for kernel
 * memory allocation.
161
 */
162
static int __init uvhub_to_first_node(int uvhub)
163
164
165
166
167
{
	int node, b;

	for_each_online_node(node) {
		b = uv_node_to_blade_id(node);
168
		if (uvhub == b)
169
170
			return node;
	}
171
	return -1;
172
173
174
}

/*
175
 * Determine the apicid of the first cpu on a uvhub.
176
 */
177
static int __init uvhub_to_first_apicid(int uvhub)
178
179
180
181
{
	int cpu;

	for_each_present_cpu(cpu)
182
		if (uvhub == uv_cpu_to_blade_id(cpu))
183
184
185
186
			return per_cpu(x86_cpu_to_apicid, cpu);
	return -1;
}

187
188
189
190
191
192
193
194
/*
 * Free a software acknowledge hardware resource by clearing its Pending
 * bit. This will return a reply to the sender.
 * If the message has timed out, a reply has already been sent by the
 * hardware but the resource has not been released. In that case our
 * clear of the Timeout bit (as well) will free the resource. No reply will
 * be sent (the hardware will only do one reply per message).
 */
Cliff Wickman's avatar
Cliff Wickman committed
195
196
static void reply_to_message(struct msg_desc *mdp, struct bau_control *bcp,
						int do_acknowledge)
197
{
198
	unsigned long dw;
Cliff Wickman's avatar
Cliff Wickman committed
199
	struct bau_pq_entry *msg;
200

201
	msg = mdp->msg;
Cliff Wickman's avatar
Cliff Wickman committed
202
	if (!msg->canceled && do_acknowledge) {
Cliff Wickman's avatar
Cliff Wickman committed
203
204
		dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec;
		write_mmr_sw_ack(dw);
205
	}
206
	msg->replied_to = 1;
Cliff Wickman's avatar
Cliff Wickman committed
207
	msg->swack_vec = 0;
208
209
210
}

/*
211
 * Process the receipt of a RETRY message
212
 */
Cliff Wickman's avatar
Cliff Wickman committed
213
214
static void bau_process_retry_msg(struct msg_desc *mdp,
					struct bau_control *bcp)
215
{
216
217
218
219
	int i;
	int cancel_count = 0;
	unsigned long msg_res;
	unsigned long mmr = 0;
Cliff Wickman's avatar
Cliff Wickman committed
220
221
222
	struct bau_pq_entry *msg = mdp->msg;
	struct bau_pq_entry *msg2;
	struct ptc_stats *stat = bcp->statp;
223

224
225
226
227
228
	stat->d_retries++;
	/*
	 * cancel any message from msg+1 to the retry itself
	 */
	for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) {
Cliff Wickman's avatar
Cliff Wickman committed
229
230
		if (msg2 > mdp->queue_last)
			msg2 = mdp->queue_first;
231
232
233
		if (msg2 == msg)
			break;

Cliff Wickman's avatar
Cliff Wickman committed
234
		/* same conditions for cancellation as do_reset */
235
		if ((msg2->replied_to == 0) && (msg2->canceled == 0) &&
Cliff Wickman's avatar
Cliff Wickman committed
236
237
		    (msg2->swack_vec) && ((msg2->swack_vec &
			msg->swack_vec) == 0) &&
238
239
		    (msg2->sending_cpu == msg->sending_cpu) &&
		    (msg2->msg_type != MSG_NOOP)) {
Cliff Wickman's avatar
Cliff Wickman committed
240
241
			mmr = read_mmr_sw_ack();
			msg_res = msg2->swack_vec;
242
243
244
245
246
247
			/*
			 * This is a message retry; clear the resources held
			 * by the previous message only if they timed out.
			 * If it has not timed out we have an unexpected
			 * situation to report.
			 */
248
			if (mmr & (msg_res << UV_SW_ACK_NPENDING)) {
Cliff Wickman's avatar
Cliff Wickman committed
249
				unsigned long mr;
250
				/*
Cliff Wickman's avatar
Cliff Wickman committed
251
252
				 * Is the resource timed out?
				 * Make everyone ignore the cancelled message.
253
254
255
256
				 */
				msg2->canceled = 1;
				stat->d_canceled++;
				cancel_count++;
Cliff Wickman's avatar
Cliff Wickman committed
257
258
				mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res;
				write_mmr_sw_ack(mr);
259
			}
260
261
262
263
264
		}
	}
	if (!cancel_count)
		stat->d_nocanceled++;
}
265

266
267
268
269
/*
 * Do all the things a cpu should do for a TLB shootdown message.
 * Other cpu's may come here at the same time for this message.
 */
Cliff Wickman's avatar
Cliff Wickman committed
270
271
static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
						int do_acknowledge)
272
273
{
	short socket_ack_count = 0;
Cliff Wickman's avatar
Cliff Wickman committed
274
275
276
277
	short *sp;
	struct atomic_short *asp;
	struct ptc_stats *stat = bcp->statp;
	struct bau_pq_entry *msg = mdp->msg;
278
	struct bau_control *smaster = bcp->socket_master;
279

280
281
282
	/*
	 * This must be a normal message, or retry of a normal message
	 */
283
284
	if (msg->address == TLB_FLUSH_ALL) {
		local_flush_tlb();
285
		stat->d_alltlb++;
286
287
	} else {
		__flush_tlb_one(msg->address);
288
		stat->d_onetlb++;
289
	}
290
291
292
293
294
295
296
297
298
	stat->d_requestee++;

	/*
	 * One cpu on each uvhub has the additional job on a RETRY
	 * of releasing the resource held by the message that is
	 * being retried.  That message is identified by sending
	 * cpu number.
	 */
	if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master)
Cliff Wickman's avatar
Cliff Wickman committed
299
		bau_process_retry_msg(mdp, bcp);
300

301
	/*
Cliff Wickman's avatar
Cliff Wickman committed
302
	 * This is a swack message, so we have to reply to it.
303
304
305
306
	 * Count each responding cpu on the socket. This avoids
	 * pinging the count's cache line back and forth between
	 * the sockets.
	 */
Cliff Wickman's avatar
Cliff Wickman committed
307
308
309
	sp = &smaster->socket_acknowledge_count[mdp->msg_slot];
	asp = (struct atomic_short *)sp;
	socket_ack_count = atom_asr(1, asp);
310
	if (socket_ack_count == bcp->cpus_in_socket) {
Cliff Wickman's avatar
Cliff Wickman committed
311
		int msg_ack_count;
312
313
314
315
		/*
		 * Both sockets dump their completed count total into
		 * the message's count.
		 */
316
		*sp = 0;
Cliff Wickman's avatar
Cliff Wickman committed
317
318
		asp = (struct atomic_short *)&msg->acknowledge_count;
		msg_ack_count = atom_asr(socket_ack_count, asp);
319
320
321
322

		if (msg_ack_count == bcp->cpus_in_uvhub) {
			/*
			 * All cpus in uvhub saw it; reply
Cliff Wickman's avatar
Cliff Wickman committed
323
			 * (unless we are in the UV2 workaround)
324
			 */
Cliff Wickman's avatar
Cliff Wickman committed
325
			reply_to_message(mdp, bcp, do_acknowledge);
326
327
		}
	}
328

329
	return;
330
331
332
}

/*
333
 * Determine the first cpu on a pnode.
334
 */
335
static int pnode_to_first_cpu(int pnode, struct bau_control *smaster)
336
337
{
	int cpu;
338
339
340
341
342
	struct hub_and_pnode *hpp;

	for_each_present_cpu(cpu) {
		hpp = &smaster->thp[cpu];
		if (pnode == hpp->pnode)
343
			return cpu;
344
	}
345
346
347
348
349
350
351
	return -1;
}

/*
 * Last resort when we get a large number of destination timeouts is
 * to clear resources held by a given cpu.
 * Do this with IPI so that all messages in the BAU message queue
Cliff Wickman's avatar
Cliff Wickman committed
352
 * can be identified by their nonzero swack_vec field.
353
 *
354
355
 * This is entered for a single cpu on the uvhub.
 * The sender want's this uvhub to free a specific message's
Cliff Wickman's avatar
Cliff Wickman committed
356
 * swack resources.
357
 */
Cliff Wickman's avatar
Cliff Wickman committed
358
static void do_reset(void *ptr)
359
{
360
	int i;
Cliff Wickman's avatar
Cliff Wickman committed
361
362
363
364
	struct bau_control *bcp = &per_cpu(bau_control, smp_processor_id());
	struct reset_args *rap = (struct reset_args *)ptr;
	struct bau_pq_entry *msg;
	struct ptc_stats *stat = bcp->statp;
365

366
367
368
	stat->d_resets++;
	/*
	 * We're looking for the given sender, and
Cliff Wickman's avatar
Cliff Wickman committed
369
	 * will free its swack resource.
370
371
372
	 * If all cpu's finally responded after the timeout, its
	 * message 'replied_to' was set.
	 */
Cliff Wickman's avatar
Cliff Wickman committed
373
374
375
376
	for (msg = bcp->queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) {
		unsigned long msg_res;
		/* do_reset: same conditions for cancellation as
		   bau_process_retry_msg() */
377
378
379
		if ((msg->replied_to == 0) &&
		    (msg->canceled == 0) &&
		    (msg->sending_cpu == rap->sender) &&
Cliff Wickman's avatar
Cliff Wickman committed
380
		    (msg->swack_vec) &&
381
		    (msg->msg_type != MSG_NOOP)) {
Cliff Wickman's avatar
Cliff Wickman committed
382
383
			unsigned long mmr;
			unsigned long mr;
384
385
386
387
388
389
390
			/*
			 * make everyone else ignore this message
			 */
			msg->canceled = 1;
			/*
			 * only reset the resource if it is still pending
			 */
Cliff Wickman's avatar
Cliff Wickman committed
391
392
393
			mmr = read_mmr_sw_ack();
			msg_res = msg->swack_vec;
			mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res;
394
395
			if (mmr & msg_res) {
				stat->d_rcanceled++;
Cliff Wickman's avatar
Cliff Wickman committed
396
				write_mmr_sw_ack(mr);
397
398
399
			}
		}
	}
400
	return;
401
402
403
}

/*
404
405
 * Use IPI to get all target uvhubs to release resources held by
 * a given sending cpu number.
406
 */
407
static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp)
408
{
409
410
	int pnode;
	int apnode;
Cliff Wickman's avatar
Cliff Wickman committed
411
	int maskbits;
412
	int sender = bcp->cpu;
413
	cpumask_t *mask = bcp->uvhub_master->cpumask;
414
	struct bau_control *smaster = bcp->socket_master;
415
	struct reset_args reset_args;
416

417
	reset_args.sender = sender;
418
	cpus_clear(*mask);
419
	/* find a single cpu for each uvhub in this distribution mask */
420
	maskbits = sizeof(struct pnmask) * BITSPERBYTE;
421
422
	/* each bit is a pnode relative to the partition base pnode */
	for (pnode = 0; pnode < maskbits; pnode++) {
Cliff Wickman's avatar
Cliff Wickman committed
423
		int cpu;
424
		if (!bau_uvhub_isset(pnode, distribution))
425
			continue;
426
427
		apnode = pnode + bcp->partition_base_pnode;
		cpu = pnode_to_first_cpu(apnode, smaster);
428
		cpu_set(cpu, *mask);
429
	}
Cliff Wickman's avatar
Cliff Wickman committed
430
431

	/* IPI all cpus; preemption is already disabled */
432
	smp_call_function_many(mask, do_reset, (void *)&reset_args, 1);
433
434
435
	return;
}

Cliff Wickman's avatar
Cliff Wickman committed
436
static inline unsigned long cycles_2_us(unsigned long long cyc)
437
438
439
{
	unsigned long long ns;
	unsigned long us;
Cliff Wickman's avatar
Cliff Wickman committed
440
441
442
	int cpu = smp_processor_id();

	ns =  (cyc * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR;
443
444
	us = ns / 1000;
	return us;
445
446
}

447
/*
448
449
450
451
 * wait for all cpus on this hub to finish their sends and go quiet
 * leaves uvhub_quiesce set so that no new broadcasts are started by
 * bau_flush_send_and_wait()
 */
Cliff Wickman's avatar
Cliff Wickman committed
452
static inline void quiesce_local_uvhub(struct bau_control *hmaster)
453
{
Cliff Wickman's avatar
Cliff Wickman committed
454
	atom_asr(1, (struct atomic_short *)&hmaster->uvhub_quiesce);
455
456
457
458
459
}

/*
 * mark this quiet-requestor as done
 */
Cliff Wickman's avatar
Cliff Wickman committed
460
static inline void end_uvhub_quiesce(struct bau_control *hmaster)
461
{
Cliff Wickman's avatar
Cliff Wickman committed
462
463
464
465
466
467
468
469
470
471
472
	atom_asr(-1, (struct atomic_short *)&hmaster->uvhub_quiesce);
}

static unsigned long uv1_read_status(unsigned long mmr_offset, int right_shift)
{
	unsigned long descriptor_status;

	descriptor_status = uv_read_local_mmr(mmr_offset);
	descriptor_status >>= right_shift;
	descriptor_status &= UV_ACT_STATUS_MASK;
	return descriptor_status;
473
474
475
476
477
}

/*
 * Wait for completion of a broadcast software ack message
 * return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP
478
 */
479
static int uv1_wait_completion(struct bau_desc *bau_desc,
Cliff Wickman's avatar
Cliff Wickman committed
480
481
				unsigned long mmr_offset, int right_shift,
				struct bau_control *bcp, long try)
482
483
{
	unsigned long descriptor_status;
Cliff Wickman's avatar
Cliff Wickman committed
484
	cycles_t ttm;
485
	struct ptc_stats *stat = bcp->statp;
486

Cliff Wickman's avatar
Cliff Wickman committed
487
	descriptor_status = uv1_read_status(mmr_offset, right_shift);
488
	/* spin on the status MMR, waiting for it to go idle */
Cliff Wickman's avatar
Cliff Wickman committed
489
	while ((descriptor_status != DS_IDLE)) {
490
		/*
491
492
493
494
		 * Our software ack messages may be blocked because
		 * there are no swack resources available.  As long
		 * as none of them has timed out hardware will NACK
		 * our message and its state will stay IDLE.
495
		 */
Cliff Wickman's avatar
Cliff Wickman committed
496
		if (descriptor_status == DS_SOURCE_TIMEOUT) {
497
498
			stat->s_stimeout++;
			return FLUSH_GIVEUP;
Cliff Wickman's avatar
Cliff Wickman committed
499
		} else if (descriptor_status == DS_DESTINATION_TIMEOUT) {
500
			stat->s_dtimeout++;
Cliff Wickman's avatar
Cliff Wickman committed
501
			ttm = get_cycles();
502
503
504
505
506
507
508

			/*
			 * Our retries may be blocked by all destination
			 * swack resources being consumed, and a timeout
			 * pending.  In that case hardware returns the
			 * ERROR that looks like a destination timeout.
			 */
Cliff Wickman's avatar
Cliff Wickman committed
509
			if (cycles_2_us(ttm - bcp->send_message) < timeout_us) {
510
511
512
513
514
515
516
517
518
519
520
521
				bcp->conseccompletes = 0;
				return FLUSH_RETRY_PLUGGED;
			}

			bcp->conseccompletes = 0;
			return FLUSH_RETRY_TIMEOUT;
		} else {
			/*
			 * descriptor_status is still BUSY
			 */
			cpu_relax();
		}
Cliff Wickman's avatar
Cliff Wickman committed
522
		descriptor_status = uv1_read_status(mmr_offset, right_shift);
523
524
525
526
527
	}
	bcp->conseccompletes++;
	return FLUSH_COMPLETE;
}

Cliff Wickman's avatar
Cliff Wickman committed
528
/*
529
530
 * UV2 could have an extra bit of status in the ACTIVATION_STATUS_2 register.
 * But not currently used.
Cliff Wickman's avatar
Cliff Wickman committed
531
 */
Cliff Wickman's avatar
Cliff Wickman committed
532
static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc)
533
534
{
	unsigned long descriptor_status;
Cliff Wickman's avatar
Cliff Wickman committed
535

536
537
	descriptor_status =
		((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK) << 1;
Cliff Wickman's avatar
Cliff Wickman committed
538
539
540
	return descriptor_status;
}

Cliff Wickman's avatar
Cliff Wickman committed
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
/*
 * Return whether the status of the descriptor that is normally used for this
 * cpu (the one indexed by its hub-relative cpu number) is busy.
 * The status of the original 32 descriptors is always reflected in the 64
 * bits of UVH_LB_BAU_SB_ACTIVATION_STATUS_0.
 * The bit provided by the activation_status_2 register is irrelevant to
 * the status if it is only being tested for busy or not busy.
 */
int normal_busy(struct bau_control *bcp)
{
	int cpu = bcp->uvhub_cpu;
	int mmr_offset;
	int right_shift;

	mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
	right_shift = cpu * UV_ACT_STATUS_SIZE;
	return (((((read_lmmr(mmr_offset) >> right_shift) &
				UV_ACT_STATUS_MASK)) << 1) == UV2H_DESC_BUSY);
}

/*
 * Entered when a bau descriptor has gone into a permanent busy wait because
 * of a hardware bug.
 * Workaround the bug.
 */
int handle_uv2_busy(struct bau_control *bcp)
{
	struct ptc_stats *stat = bcp->statp;

	stat->s_uv2_wars++;
571
572
	bcp->busy = 1;
	return FLUSH_GIVEUP;
Cliff Wickman's avatar
Cliff Wickman committed
573
574
}

Cliff Wickman's avatar
Cliff Wickman committed
575
576
577
578
579
580
static int uv2_wait_completion(struct bau_desc *bau_desc,
				unsigned long mmr_offset, int right_shift,
				struct bau_control *bcp, long try)
{
	unsigned long descriptor_stat;
	cycles_t ttm;
581
	int desc = bcp->uvhub_cpu;
Cliff Wickman's avatar
Cliff Wickman committed
582
	long busy_reps = 0;
583
584
	struct ptc_stats *stat = bcp->statp;

Cliff Wickman's avatar
Cliff Wickman committed
585
	descriptor_stat = uv2_read_status(mmr_offset, right_shift, desc);
Cliff Wickman's avatar
Cliff Wickman committed
586

587
	/* spin on the status MMR, waiting for it to go idle */
Cliff Wickman's avatar
Cliff Wickman committed
588
	while (descriptor_stat != UV2H_DESC_IDLE) {
589
590
591
592
593
594
595
596
		if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT)) {
			/*
			 * A h/w bug on the destination side may
			 * have prevented the message being marked
			 * pending, thus it doesn't get replied to
			 * and gets continually nacked until it times
			 * out with a SOURCE_TIMEOUT.
			 */
597
598
			stat->s_stimeout++;
			return FLUSH_GIVEUP;
Cliff Wickman's avatar
Cliff Wickman committed
599
		} else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) {
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
			ttm = get_cycles();

			/*
			 * Our retries may be blocked by all destination
			 * swack resources being consumed, and a timeout
			 * pending.  In that case hardware returns the
			 * ERROR that looks like a destination timeout.
			 * Without using the extended status we have to
			 * deduce from the short time that this was a
			 * strong nack.
			 */
			if (cycles_2_us(ttm - bcp->send_message) < timeout_us) {
				bcp->conseccompletes = 0;
				stat->s_plugged++;
				/* FLUSH_RETRY_PLUGGED causes hang on boot */
				return FLUSH_GIVEUP;
			}
617
618
			stat->s_dtimeout++;
			bcp->conseccompletes = 0;
619
620
			/* FLUSH_RETRY_TIMEOUT causes hang on boot */
			return FLUSH_GIVEUP;
621
		} else {
Cliff Wickman's avatar
Cliff Wickman committed
622
623
624
625
626
627
			busy_reps++;
			if (busy_reps > 1000000) {
				/* not to hammer on the clock */
				busy_reps = 0;
				ttm = get_cycles();
				if ((ttm - bcp->send_message) >
628
						bcp->timeout_interval)
Cliff Wickman's avatar
Cliff Wickman committed
629
630
					return handle_uv2_busy(bcp);
			}
631
			/*
Cliff Wickman's avatar
Cliff Wickman committed
632
			 * descriptor_stat is still BUSY
633
634
			 */
			cpu_relax();
635
		}
Cliff Wickman's avatar
Cliff Wickman committed
636
637
		descriptor_stat = uv2_read_status(mmr_offset, right_shift,
									desc);
638
	}
639
	bcp->conseccompletes++;
640
641
642
	return FLUSH_COMPLETE;
}

Cliff Wickman's avatar
Cliff Wickman committed
643
644
645
646
647
648
649
/*
 * There are 2 status registers; each and array[32] of 2 bits. Set up for
 * which register to read and position in that register based on cpu in
 * current hub.
 */
static int wait_completion(struct bau_desc *bau_desc,
				struct bau_control *bcp, long try)
650
{
Cliff Wickman's avatar
Cliff Wickman committed
651
652
	int right_shift;
	unsigned long mmr_offset;
653
	int desc = bcp->uvhub_cpu;
Cliff Wickman's avatar
Cliff Wickman committed
654

Cliff Wickman's avatar
Cliff Wickman committed
655
	if (desc < UV_CPUS_PER_AS) {
Cliff Wickman's avatar
Cliff Wickman committed
656
		mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
Cliff Wickman's avatar
Cliff Wickman committed
657
		right_shift = desc * UV_ACT_STATUS_SIZE;
Cliff Wickman's avatar
Cliff Wickman committed
658
659
	} else {
		mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
Cliff Wickman's avatar
Cliff Wickman committed
660
		right_shift = ((desc - UV_CPUS_PER_AS) * UV_ACT_STATUS_SIZE);
Cliff Wickman's avatar
Cliff Wickman committed
661
662
	}

663
	if (bcp->uvhub_version == 1)
664
		return uv1_wait_completion(bau_desc, mmr_offset, right_shift,
Cliff Wickman's avatar
Cliff Wickman committed
665
								bcp, try);
666
667
	else
		return uv2_wait_completion(bau_desc, mmr_offset, right_shift,
Cliff Wickman's avatar
Cliff Wickman committed
668
								bcp, try);
669
670
}

Cliff Wickman's avatar
Cliff Wickman committed
671
static inline cycles_t sec_2_cycles(unsigned long sec)
672
673
674
675
676
677
678
679
680
681
{
	unsigned long ns;
	cycles_t cyc;

	ns = sec * 1000000000;
	cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id()));
	return cyc;
}

/*
Cliff Wickman's avatar
Cliff Wickman committed
682
 * Our retries are blocked by all destination sw ack resources being
683
684
685
 * in use, and a timeout is pending. In that case hardware immediately
 * returns the ERROR that looks like a destination timeout.
 */
Cliff Wickman's avatar
Cliff Wickman committed
686
687
static void destination_plugged(struct bau_desc *bau_desc,
			struct bau_control *bcp,
688
689
690
691
			struct bau_control *hmaster, struct ptc_stats *stat)
{
	udelay(bcp->plugged_delay);
	bcp->plugged_tries++;
Cliff Wickman's avatar
Cliff Wickman committed
692

693
694
	if (bcp->plugged_tries >= bcp->plugsb4reset) {
		bcp->plugged_tries = 0;
Cliff Wickman's avatar
Cliff Wickman committed
695

696
		quiesce_local_uvhub(hmaster);
Cliff Wickman's avatar
Cliff Wickman committed
697

698
		spin_lock(&hmaster->queue_lock);
699
		reset_with_ipi(&bau_desc->distribution, bcp);
700
		spin_unlock(&hmaster->queue_lock);
Cliff Wickman's avatar
Cliff Wickman committed
701

702
		end_uvhub_quiesce(hmaster);
Cliff Wickman's avatar
Cliff Wickman committed
703

704
705
706
707
708
		bcp->ipi_attempts++;
		stat->s_resets_plug++;
	}
}

Cliff Wickman's avatar
Cliff Wickman committed
709
710
711
static void destination_timeout(struct bau_desc *bau_desc,
			struct bau_control *bcp, struct bau_control *hmaster,
			struct ptc_stats *stat)
712
{
Cliff Wickman's avatar
Cliff Wickman committed
713
	hmaster->max_concurr = 1;
714
715
716
	bcp->timeout_tries++;
	if (bcp->timeout_tries >= bcp->timeoutsb4reset) {
		bcp->timeout_tries = 0;
Cliff Wickman's avatar
Cliff Wickman committed
717

718
		quiesce_local_uvhub(hmaster);
Cliff Wickman's avatar
Cliff Wickman committed
719

720
		spin_lock(&hmaster->queue_lock);
721
		reset_with_ipi(&bau_desc->distribution, bcp);
722
		spin_unlock(&hmaster->queue_lock);
Cliff Wickman's avatar
Cliff Wickman committed
723

724
		end_uvhub_quiesce(hmaster);
Cliff Wickman's avatar
Cliff Wickman committed
725

726
727
728
729
730
		bcp->ipi_attempts++;
		stat->s_resets_timeout++;
	}
}

731
/*
732
733
 * Stop all cpus on a uvhub from using the BAU for a period of time.
 * This is reversed by check_enable.
734
 */
735
static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat)
736
{
737
738
739
740
741
742
743
744
	int tcpu;
	struct bau_control *tbcp;
	struct bau_control *hmaster;
	cycles_t tm1;

	hmaster = bcp->uvhub_master;
	spin_lock(&hmaster->disable_lock);
	if (!bcp->baudisabled) {
745
		stat->s_bau_disabled++;
746
		tm1 = get_cycles();
747
748
		for_each_present_cpu(tcpu) {
			tbcp = &per_cpu(bau_control, tcpu);
749
750
751
752
753
			if (tbcp->uvhub_master == hmaster) {
				tbcp->baudisabled = 1;
				tbcp->set_bau_on_time =
					tm1 + bcp->disabled_period;
			}
754
755
		}
	}
756
	spin_unlock(&hmaster->disable_lock);
757
758
}

Cliff Wickman's avatar
Cliff Wickman committed
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
static void count_max_concurr(int stat, struct bau_control *bcp,
				struct bau_control *hmaster)
{
	bcp->plugged_tries = 0;
	bcp->timeout_tries = 0;
	if (stat != FLUSH_COMPLETE)
		return;
	if (bcp->conseccompletes <= bcp->complete_threshold)
		return;
	if (hmaster->max_concurr >= hmaster->max_concurr_const)
		return;
	hmaster->max_concurr++;
}

static void record_send_stats(cycles_t time1, cycles_t time2,
		struct bau_control *bcp, struct ptc_stats *stat,
		int completion_status, int try)
{
	cycles_t elapsed;

	if (time2 > time1) {
		elapsed = time2 - time1;
		stat->s_time += elapsed;

		if ((completion_status == FLUSH_COMPLETE) && (try == 1)) {
			bcp->period_requests++;
			bcp->period_time += elapsed;
			if ((elapsed > congested_cycles) &&
787
788
789
790
791
792
			    (bcp->period_requests > bcp->cong_reps) &&
			    ((bcp->period_time / bcp->period_requests) >
							congested_cycles)) {
				stat->s_congested++;
				disable_for_period(bcp, stat);
			}
Cliff Wickman's avatar
Cliff Wickman committed
793
794
795
796
797
798
		}
	} else
		stat->s_requestor--;

	if (completion_status == FLUSH_COMPLETE && try > 1)
		stat->s_retriesok++;
799
	else if (completion_status == FLUSH_GIVEUP) {
Cliff Wickman's avatar
Cliff Wickman committed
800
		stat->s_giveup++;
801
802
803
804
805
806
807
808
809
810
		if (get_cycles() > bcp->period_end)
			bcp->period_giveups = 0;
		bcp->period_giveups++;
		if (bcp->period_giveups == 1)
			bcp->period_end = get_cycles() + bcp->disabled_period;
		if (bcp->period_giveups > bcp->giveup_limit) {
			disable_for_period(bcp, stat);
			stat->s_giveuplimit++;
		}
	}
Cliff Wickman's avatar
Cliff Wickman committed
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
}

/*
 * Because of a uv1 hardware bug only a limited number of concurrent
 * requests can be made.
 */
static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat)
{
	spinlock_t *lock = &hmaster->uvhub_lock;
	atomic_t *v;

	v = &hmaster->active_descriptor_count;
	if (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr)) {
		stat->s_throttles++;
		do {
			cpu_relax();
		} while (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr));
	}
}

/*
 * Handle the completion status of a message send.
 */
static void handle_cmplt(int completion_status, struct bau_desc *bau_desc,
			struct bau_control *bcp, struct bau_control *hmaster,
			struct ptc_stats *stat)
{
	if (completion_status == FLUSH_RETRY_PLUGGED)
		destination_plugged(bau_desc, bcp, hmaster, stat);
	else if (completion_status == FLUSH_RETRY_TIMEOUT)
		destination_timeout(bau_desc, bcp, hmaster, stat);
}

/*
845
 * Send a broadcast and wait for it to complete.
846
 *
847
 * The flush_mask contains the cpus the broadcast is to be sent to including
848
 * cpus that are on the local uvhub.
849
 *
850
851
852
 * Returns 0 if all flushing represented in the mask was done.
 * Returns 1 if it gives up entirely and the original cpu mask is to be
 * returned to the kernel.
853
 */
854
855
int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp,
	struct bau_desc *bau_desc)
856
{
857
	int seq_number = 0;
Cliff Wickman's avatar
Cliff Wickman committed
858
	int completion_stat = 0;
859
	int uv1 = 0;
860
	long try = 0;
861
	unsigned long index;
862
863
	cycles_t time1;
	cycles_t time2;
864
	struct ptc_stats *stat = bcp->statp;
865
	struct bau_control *hmaster = bcp->uvhub_master;
866
867
	struct uv1_bau_msg_header *uv1_hdr = NULL;
	struct uv2_bau_msg_header *uv2_hdr = NULL;
868

869
870
	if (bcp->uvhub_version == 1) {
		uv1 = 1;
Cliff Wickman's avatar
Cliff Wickman committed
871
		uv1_throttle(hmaster, stat);
872
	}
Cliff Wickman's avatar
Cliff Wickman committed
873

874
875
	while (hmaster->uvhub_quiesce)
		cpu_relax();
876
877

	time1 = get_cycles();
878
879
880
881
882
	if (uv1)
		uv1_hdr = &bau_desc->header.uv1_hdr;
	else
		uv2_hdr = &bau_desc->header.uv2_hdr;

883
	do {
884
		if (try == 0) {
885
886
887
888
			if (uv1)
				uv1_hdr->msg_type = MSG_REGULAR;
			else
				uv2_hdr->msg_type = MSG_REGULAR;
889
890
			seq_number = bcp->message_number++;
		} else {
891
892
893
894
			if (uv1)
				uv1_hdr->msg_type = MSG_RETRY;
			else
				uv2_hdr->msg_type = MSG_RETRY;
895
896
			stat->s_retry_messages++;
		}
Cliff Wickman's avatar
Cliff Wickman committed
897

898
899
900
901
		if (uv1)
			uv1_hdr->sequence = seq_number;
		else
			uv2_hdr->sequence = seq_number;
902
		index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu;
903
		bcp->send_message = get_cycles();
Cliff Wickman's avatar
Cliff Wickman committed
904
905
906

		write_mmr_activation(index);

907
		try++;
Cliff Wickman's avatar
Cliff Wickman committed
908
909
910
		completion_stat = wait_completion(bau_desc, bcp, try);

		handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat);
911

912
		if (bcp->ipi_attempts >= bcp->ipi_reset_limit) {
913
			bcp->ipi_attempts = 0;
914
			stat->s_overipilimit++;
Cliff Wickman's avatar
Cliff Wickman committed
915
			completion_stat = FLUSH_GIVEUP;
916
917
918
			break;
		}
		cpu_relax();
Cliff Wickman's avatar
Cliff Wickman committed
919
920
921
	} while ((completion_stat == FLUSH_RETRY_PLUGGED) ||
		 (completion_stat == FLUSH_RETRY_TIMEOUT));

922
	time2 = get_cycles();
Cliff Wickman's avatar
Cliff Wickman committed
923
924
925

	count_max_concurr(completion_stat, bcp, hmaster);

926
927
	while (hmaster->uvhub_quiesce)
		cpu_relax();
Cliff Wickman's avatar
Cliff Wickman committed
928

929
	atomic_dec(&hmaster->active_descriptor_count);
Cliff Wickman's avatar
Cliff Wickman committed
930
931
932
933

	record_send_stats(time1, time2, bcp, stat, completion_stat, try);

	if (completion_stat == FLUSH_GIVEUP)
Cliff Wickman's avatar
Cliff Wickman committed
934
		/* FLUSH_GIVEUP will fall back to using IPI's for tlb flush */
Cliff Wickman's avatar
Cliff Wickman committed
935
936
937
938
939
		return 1;
	return 0;
}

/*
940
941
942
 * The BAU is disabled for this uvhub. When the disabled time period has
 * expired re-enable it.
 * Return 0 if it is re-enabled for all cpus on this uvhub.
Cliff Wickman's avatar
Cliff Wickman committed
943
944
945
946
947
 */
static int check_enable(struct bau_control *bcp, struct ptc_stats *stat)
{
	int tcpu;
	struct bau_control *tbcp;
948
	struct bau_control *hmaster;
Cliff Wickman's avatar
Cliff Wickman committed
949

950
951
952
953
954
955
956
	hmaster = bcp->uvhub_master;
	spin_lock(&hmaster->disable_lock);
	if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) {
		stat->s_bau_reenabled++;
		for_each_present_cpu(tcpu) {
			tbcp = &per_cpu(bau_control, tcpu);
			if (tbcp->uvhub_master == hmaster) {
Cliff Wickman's avatar
Cliff Wickman committed
957
958
959
				tbcp->baudisabled = 0;
				tbcp->period_requests = 0;
				tbcp->period_time = 0;
960
				tbcp->period_giveups = 0;
961
962
			}
		}
963
964
		spin_unlock(&hmaster->disable_lock);
		return 0;
Cliff Wickman's avatar
Cliff Wickman committed
965
	}
966
	spin_unlock(&hmaster->disable_lock);
Cliff Wickman's avatar
Cliff Wickman committed
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
	return -1;
}

static void record_send_statistics(struct ptc_stats *stat, int locals, int hubs,
				int remotes, struct bau_desc *bau_desc)
{
	stat->s_requestor++;
	stat->s_ntargcpu += remotes + locals;
	stat->s_ntargremotes += remotes;
	stat->s_ntarglocals += locals;

	/* uvhub statistics */
	hubs = bau_uvhub_weight(&bau_desc->distribution);
	if (locals) {
		stat->s_ntarglocaluvhub++;
		stat->s_ntargremoteuvhub += (hubs - 1);
983
	} else
Cliff Wickman's avatar
Cliff Wickman committed
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
		stat->s_ntargremoteuvhub += hubs;

	stat->s_ntarguvhub += hubs;

	if (hubs >= 16)
		stat->s_ntarguvhub16++;
	else if (hubs >= 8)
		stat->s_ntarguvhub8++;
	else if (hubs >= 4)
		stat->s_ntarguvhub4++;
	else if (hubs >= 2)
		stat->s_ntarguvhub2++;
	else
		stat->s_ntarguvhub1++;
}

/*