amd_iommu.c 101 KB
Newer Older
1
/*
2
 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
Joerg Roedel's avatar
Joerg Roedel committed
3
 * Author: Joerg Roedel <jroedel@suse.de>
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
 *         Leo Duran <leo.duran@amd.com>
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 as published
 * by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 */

20
#include <linux/ratelimit.h>
21
#include <linux/pci.h>
22
#include <linux/acpi.h>
23
#include <linux/amba/bus.h>
24
#include <linux/platform_device.h>
25
#include <linux/pci-ats.h>
26
#include <linux/bitmap.h>
27
#include <linux/slab.h>
28
#include <linux/debugfs.h>
29
#include <linux/scatterlist.h>
30
#include <linux/dma-mapping.h>
31
#include <linux/iommu-helper.h>
32
#include <linux/iommu.h>
33
#include <linux/delay.h>
34
#include <linux/amd-iommu.h>
35
36
#include <linux/notifier.h>
#include <linux/export.h>
37
38
#include <linux/irq.h>
#include <linux/msi.h>
39
#include <linux/dma-contiguous.h>
40
#include <linux/irqdomain.h>
41
#include <linux/percpu.h>
42
#include <linux/iova.h>
43
44
45
46
#include <asm/irq_remapping.h>
#include <asm/io_apic.h>
#include <asm/apic.h>
#include <asm/hw_irq.h>
47
#include <asm/msidef.h>
48
#include <asm/proto.h>
49
#include <asm/iommu.h>
50
#include <asm/gart.h>
51
#include <asm/dma.h>
52
53
54

#include "amd_iommu_proto.h"
#include "amd_iommu_types.h"
55
#include "irq_remapping.h"
56

57
58
#define AMD_IOMMU_MAPPING_ERROR	0

59
60
#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))

61
#define LOOP_TIMEOUT	100000
62

63
64
65
66
67
/* IO virtual address start page frame number */
#define IOVA_START_PFN		(1)
#define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
#define DMA_32BIT_PFN		IOVA_PFN(DMA_BIT_MASK(32))

68
69
70
71
72
73
/* Reserved IOVA ranges */
#define MSI_RANGE_START		(0xfee00000)
#define MSI_RANGE_END		(0xfeefffff)
#define HT_RANGE_START		(0xfd00000000ULL)
#define HT_RANGE_END		(0xffffffffffULL)

74
75
76
77
78
79
/*
 * This bitmap is used to advertise the page sizes our hardware support
 * to the IOMMU core, which will then use this information to split
 * physically contiguous memory regions it is mapping into page sizes
 * that we support.
 *
80
 * 512GB Pages are not supported due to a hardware bug
81
 */
82
#define AMD_IOMMU_PGSIZES	((~0xFFFUL) & ~(2ULL << 38))
83

84
85
static DEFINE_RWLOCK(amd_iommu_devtable_lock);

86
87
88
89
/* List of all available dev_data structures */
static LIST_HEAD(dev_data_list);
static DEFINE_SPINLOCK(dev_data_list_lock);

90
91
LIST_HEAD(ioapic_map);
LIST_HEAD(hpet_map);
92
LIST_HEAD(acpihid_map);
93

94
95
96
97
/*
 * Domain for untranslated devices - only allocated
 * if iommu=pt passed on kernel cmd line.
 */
98
const struct iommu_ops amd_iommu_ops;
99

100
static ATOMIC_NOTIFIER_HEAD(ppr_notifier);
101
int amd_iommu_max_glx_val = -1;
102

103
static const struct dma_map_ops amd_iommu_dma_ops;
104

105
106
107
/*
 * general struct to manage commands send to an IOMMU
 */
108
struct iommu_cmd {
109
110
111
	u32 data[4];
};

112
113
struct kmem_cache *amd_iommu_irq_cache;

114
static void update_domain(struct protection_domain *domain);
115
static int protection_domain_init(struct protection_domain *domain);
116
static void detach_device(struct device *dev);
117
static void iova_domain_flush_tlb(struct iova_domain *iovad);
118

119
120
121
122
123
124
125
/*
 * Data container for a dma_ops specific protection domain
 */
struct dma_ops_domain {
	/* generic protection domain information */
	struct protection_domain domain;

126
127
	/* IOVA RB-Tree */
	struct iova_domain iovad;
128
129
};

130
131
132
static struct iova_domain reserved_iova_ranges;
static struct lock_class_key reserved_rbtree_key;

133
134
135
136
137
138
/****************************************************************************
 *
 * Helper functions
 *
 ****************************************************************************/

139
140
static inline int match_hid_uid(struct device *dev,
				struct acpihid_map_entry *entry)
141
{
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
	const char *hid, *uid;

	hid = acpi_device_hid(ACPI_COMPANION(dev));
	uid = acpi_device_uid(ACPI_COMPANION(dev));

	if (!hid || !(*hid))
		return -ENODEV;

	if (!uid || !(*uid))
		return strcmp(hid, entry->hid);

	if (!(*entry->uid))
		return strcmp(hid, entry->hid);

	return (strcmp(hid, entry->hid) || strcmp(uid, entry->uid));
157
158
}

159
static inline u16 get_pci_device_id(struct device *dev)
160
161
162
163
164
165
{
	struct pci_dev *pdev = to_pci_dev(dev);

	return PCI_DEVID(pdev->bus->number, pdev->devfn);
}

166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
static inline int get_acpihid_device_id(struct device *dev,
					struct acpihid_map_entry **entry)
{
	struct acpihid_map_entry *p;

	list_for_each_entry(p, &acpihid_map, list) {
		if (!match_hid_uid(dev, p)) {
			if (entry)
				*entry = p;
			return p->devid;
		}
	}
	return -EINVAL;
}

static inline int get_device_id(struct device *dev)
{
	int devid;

	if (dev_is_pci(dev))
		devid = get_pci_device_id(dev);
	else
		devid = get_acpihid_device_id(dev, NULL);

	return devid;
}

193
194
195
196
197
static struct protection_domain *to_pdomain(struct iommu_domain *dom)
{
	return container_of(dom, struct protection_domain, domain);
}

198
199
200
201
202
203
static struct dma_ops_domain* to_dma_ops_domain(struct protection_domain *domain)
{
	BUG_ON(domain->flags != PD_DMA_OPS_MASK);
	return container_of(domain, struct dma_ops_domain, domain);
}

204
static struct iommu_dev_data *alloc_dev_data(u16 devid)
205
206
207
208
209
210
211
212
{
	struct iommu_dev_data *dev_data;
	unsigned long flags;

	dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL);
	if (!dev_data)
		return NULL;

213
	dev_data->devid = devid;
214
215
216
217
218

	spin_lock_irqsave(&dev_data_list_lock, flags);
	list_add_tail(&dev_data->dev_data_list, &dev_data_list);
	spin_unlock_irqrestore(&dev_data_list_lock, flags);

219
220
	ratelimit_default_init(&dev_data->rs);

221
222
223
	return dev_data;
}

224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
static struct iommu_dev_data *search_dev_data(u16 devid)
{
	struct iommu_dev_data *dev_data;
	unsigned long flags;

	spin_lock_irqsave(&dev_data_list_lock, flags);
	list_for_each_entry(dev_data, &dev_data_list, dev_data_list) {
		if (dev_data->devid == devid)
			goto out_unlock;
	}

	dev_data = NULL;

out_unlock:
	spin_unlock_irqrestore(&dev_data_list_lock, flags);

	return dev_data;
}

243
244
245
246
247
248
249
250
251
252
253
static int __last_alias(struct pci_dev *pdev, u16 alias, void *data)
{
	*(u16 *)data = alias;
	return 0;
}

static u16 get_alias(struct device *dev)
{
	struct pci_dev *pdev = to_pci_dev(dev);
	u16 devid, ivrs_alias, pci_alias;

254
	/* The callers make sure that get_device_id() does not fail here */
255
	devid = get_device_id(dev);
256
257
258
259
260

	/* For ACPI HID devices, we simply return the devid as such */
	if (!dev_is_pci(dev))
		return devid;

261
	ivrs_alias = amd_iommu_alias_table[devid];
262

263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
	pci_for_each_dma_alias(pdev, __last_alias, &pci_alias);

	if (ivrs_alias == pci_alias)
		return ivrs_alias;

	/*
	 * DMA alias showdown
	 *
	 * The IVRS is fairly reliable in telling us about aliases, but it
	 * can't know about every screwy device.  If we don't have an IVRS
	 * reported alias, use the PCI reported alias.  In that case we may
	 * still need to initialize the rlookup and dev_table entries if the
	 * alias is to a non-existent device.
	 */
	if (ivrs_alias == devid) {
		if (!amd_iommu_rlookup_table[pci_alias]) {
			amd_iommu_rlookup_table[pci_alias] =
				amd_iommu_rlookup_table[devid];
			memcpy(amd_iommu_dev_table[pci_alias].data,
			       amd_iommu_dev_table[devid].data,
			       sizeof(amd_iommu_dev_table[pci_alias].data));
		}

		return pci_alias;
	}

	pr_info("AMD-Vi: Using IVRS reported alias %02x:%02x.%d "
		"for device %s[%04x:%04x], kernel reported alias "
		"%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias),
		PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device,
		PCI_BUS_NUM(pci_alias), PCI_SLOT(pci_alias),
		PCI_FUNC(pci_alias));

	/*
	 * If we don't have a PCI DMA alias and the IVRS alias is on the same
	 * bus, then the IVRS table may know about a quirk that we don't.
	 */
	if (pci_alias == devid &&
	    PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) {
302
		pci_add_dma_alias(pdev, ivrs_alias & 0xff);
303
304
305
306
307
308
309
310
		pr_info("AMD-Vi: Added PCI DMA alias %02x.%d for %s\n",
			PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias),
			dev_name(dev));
	}

	return ivrs_alias;
}

311
312
313
static struct iommu_dev_data *find_dev_data(u16 devid)
{
	struct iommu_dev_data *dev_data;
314
	struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
315
316
317

	dev_data = search_dev_data(devid);

318
	if (dev_data == NULL) {
319
		dev_data = alloc_dev_data(devid);
320
321
		if (!dev_data)
			return NULL;
322

323
324
325
326
		if (translation_pre_enabled(iommu))
			dev_data->defer_attach = true;
	}

327
328
329
	return dev_data;
}

330
struct iommu_dev_data *get_dev_data(struct device *dev)
331
332
333
{
	return dev->archdata.iommu;
}
334
EXPORT_SYMBOL(get_dev_data);
335

336
337
338
339
/*
* Find or create an IOMMU group for a acpihid device.
*/
static struct iommu_group *acpihid_device_group(struct device *dev)
340
{
341
	struct acpihid_map_entry *p, *entry = NULL;
342
	int devid;
343
344
345
346
347
348
349
350
351
352
353
354

	devid = get_acpihid_device_id(dev, &entry);
	if (devid < 0)
		return ERR_PTR(devid);

	list_for_each_entry(p, &acpihid_map, list) {
		if ((devid == p->devid) && p->group)
			entry->group = p->group;
	}

	if (!entry->group)
		entry->group = generic_device_group(dev);
355
356
	else
		iommu_group_ref_get(entry->group);
357
358

	return entry->group;
359
360
}

361
362
363
364
static bool pci_iommuv2_capable(struct pci_dev *pdev)
{
	static const int caps[] = {
		PCI_EXT_CAP_ID_ATS,
365
366
		PCI_EXT_CAP_ID_PRI,
		PCI_EXT_CAP_ID_PASID,
367
368
369
370
371
372
373
374
375
376
377
378
	};
	int i, pos;

	for (i = 0; i < 3; ++i) {
		pos = pci_find_ext_capability(pdev, caps[i]);
		if (pos == 0)
			return false;
	}

	return true;
}

379
380
381
382
383
384
385
386
387
static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum)
{
	struct iommu_dev_data *dev_data;

	dev_data = get_dev_data(&pdev->dev);

	return dev_data->errata & (1 << erratum) ? true : false;
}

388
389
390
391
392
393
/*
 * This function checks if the driver got a valid device from the caller to
 * avoid dereferencing invalid pointers.
 */
static bool check_device(struct device *dev)
{
394
	int devid;
395
396
397
398
399

	if (!dev || !dev->dma_mask)
		return false;

	devid = get_device_id(dev);
400
	if (devid < 0)
401
		return false;
402
403
404
405
406
407
408
409
410
411
412

	/* Out of our scope? */
	if (devid > amd_iommu_last_bdf)
		return false;

	if (amd_iommu_rlookup_table[devid] == NULL)
		return false;

	return true;
}

413
static void init_iommu_group(struct device *dev)
414
415
416
{
	struct iommu_group *group;

417
	group = iommu_group_get_for_dev(dev);
418
419
420
421
	if (IS_ERR(group))
		return;

	iommu_group_put(group);
422
423
424
425
426
}

static int iommu_init_device(struct device *dev)
{
	struct iommu_dev_data *dev_data;
427
	struct amd_iommu *iommu;
428
	int devid;
429
430
431
432

	if (dev->archdata.iommu)
		return 0;

433
	devid = get_device_id(dev);
434
	if (devid < 0)
435
436
		return devid;

437
438
	iommu = amd_iommu_rlookup_table[devid];

439
	dev_data = find_dev_data(devid);
440
441
442
	if (!dev_data)
		return -ENOMEM;

443
444
	dev_data->alias = get_alias(dev);

445
	if (dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) {
446
447
		struct amd_iommu *iommu;

448
		iommu = amd_iommu_rlookup_table[dev_data->devid];
449
450
451
		dev_data->iommu_v2 = iommu->is_iommu_v2;
	}

452
453
	dev->archdata.iommu = dev_data;

454
	iommu_device_link(&iommu->iommu, dev);
Alex Williamson's avatar
Alex Williamson committed
455

456
457
458
	return 0;
}

459
460
static void iommu_ignore_device(struct device *dev)
{
461
462
	u16 alias;
	int devid;
463
464

	devid = get_device_id(dev);
465
	if (devid < 0)
466
467
		return;

468
	alias = get_alias(dev);
469
470
471
472
473
474
475
476

	memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry));
	memset(&amd_iommu_dev_table[alias], 0, sizeof(struct dev_table_entry));

	amd_iommu_rlookup_table[devid] = NULL;
	amd_iommu_rlookup_table[alias] = NULL;
}

477
478
static void iommu_uninit_device(struct device *dev)
{
479
	struct iommu_dev_data *dev_data;
480
481
	struct amd_iommu *iommu;
	int devid;
482

483
	devid = get_device_id(dev);
484
	if (devid < 0)
485
		return;
486

487
488
	iommu = amd_iommu_rlookup_table[devid];

489
	dev_data = search_dev_data(devid);
490
491
492
	if (!dev_data)
		return;

493
494
495
	if (dev_data->domain)
		detach_device(dev);

496
	iommu_device_unlink(&iommu->iommu, dev);
Alex Williamson's avatar
Alex Williamson committed
497

498
499
	iommu_group_remove_device(dev);

500
	/* Remove dma-ops */
501
	dev->dma_ops = NULL;
502

503
	/*
504
505
	 * We keep dev_data around for unplugged devices and reuse it when the
	 * device is re-plugged - not doing so would introduce a ton of races.
506
	 */
507
}
508

509
510
511
512
513
514
/****************************************************************************
 *
 * Interrupt handling functions
 *
 ****************************************************************************/

515
516
517
518
static void dump_dte_entry(u16 devid)
{
	int i;

519
520
	for (i = 0; i < 4; ++i)
		pr_err("AMD-Vi: DTE[%d]: %016llx\n", i,
521
522
523
			amd_iommu_dev_table[devid].data[i]);
}

524
525
static void dump_command(unsigned long phys_addr)
{
526
	struct iommu_cmd *cmd = iommu_phys_to_virt(phys_addr);
527
528
529
530
531
532
	int i;

	for (i = 0; i < 4; ++i)
		pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
}

533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
					u64 address, int flags)
{
	struct iommu_dev_data *dev_data = NULL;
	struct pci_dev *pdev;

	pdev = pci_get_bus_and_slot(PCI_BUS_NUM(devid), devid & 0xff);
	if (pdev)
		dev_data = get_dev_data(&pdev->dev);

	if (dev_data && __ratelimit(&dev_data->rs)) {
		dev_err(&pdev->dev, "AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%016llx flags=0x%04x]\n",
			domain_id, address, flags);
	} else if (printk_ratelimit()) {
		pr_err("AMD-Vi: Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%016llx flags=0x%04x]\n",
			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
			domain_id, address, flags);
	}

	if (pdev)
		pci_dev_put(pdev);
}

556
static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
557
{
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
	int type, devid, domid, flags;
	volatile u32 *event = __evt;
	int count = 0;
	u64 address;

retry:
	type    = (event[1] >> EVENT_TYPE_SHIFT)  & EVENT_TYPE_MASK;
	devid   = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
	domid   = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK;
	flags   = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
	address = (u64)(((u64)event[3]) << 32) | event[2];

	if (type == 0) {
		/* Did we hit the erratum? */
		if (++count == LOOP_TIMEOUT) {
			pr_err("AMD-Vi: No event written to event log\n");
			return;
		}
		udelay(1);
		goto retry;
	}
579

580
581
582
583
584
585
	if (type == EVENT_TYPE_IO_FAULT) {
		amd_iommu_report_page_fault(devid, domid, address, flags);
		return;
	} else {
		printk(KERN_ERR "AMD-Vi: Event logged [");
	}
586
587
588
589
590

	switch (type) {
	case EVENT_TYPE_ILL_DEV:
		printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x "
		       "address=0x%016llx flags=0x%04x]\n",
591
		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
592
		       address, flags);
593
		dump_dte_entry(devid);
594
595
596
597
		break;
	case EVENT_TYPE_DEV_TAB_ERR:
		printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
		       "address=0x%016llx flags=0x%04x]\n",
598
		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
599
600
601
602
603
		       address, flags);
		break;
	case EVENT_TYPE_PAGE_TAB_ERR:
		printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
		       "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
604
		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
605
606
607
608
		       domid, address, flags);
		break;
	case EVENT_TYPE_ILL_CMD:
		printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
609
		dump_command(address);
610
611
612
613
614
615
616
617
		break;
	case EVENT_TYPE_CMD_HARD_ERR:
		printk("COMMAND_HARDWARE_ERROR address=0x%016llx "
		       "flags=0x%04x]\n", address, flags);
		break;
	case EVENT_TYPE_IOTLB_INV_TO:
		printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
		       "address=0x%016llx]\n",
618
		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
619
620
621
622
623
		       address);
		break;
	case EVENT_TYPE_INV_DEV_REQ:
		printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
		       "address=0x%016llx flags=0x%04x]\n",
624
		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
625
626
627
628
629
		       address, flags);
		break;
	default:
		printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type);
	}
630
631

	memset(__evt, 0, 4 * sizeof(u32));
632
633
634
635
636
637
638
639
640
641
}

static void iommu_poll_events(struct amd_iommu *iommu)
{
	u32 head, tail;

	head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
	tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);

	while (head != tail) {
642
		iommu_print_event(iommu, iommu->evt_buf + head);
643
		head = (head + EVENT_ENTRY_SIZE) % EVT_BUFFER_SIZE;
644
645
646
647
648
	}

	writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
}

649
static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw)
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
{
	struct amd_iommu_fault fault;

	if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) {
		pr_err_ratelimited("AMD-Vi: Unknown PPR request received\n");
		return;
	}

	fault.address   = raw[1];
	fault.pasid     = PPR_PASID(raw[0]);
	fault.device_id = PPR_DEVID(raw[0]);
	fault.tag       = PPR_TAG(raw[0]);
	fault.flags     = PPR_FLAGS(raw[0]);

	atomic_notifier_call_chain(&ppr_notifier, 0, &fault);
}

static void iommu_poll_ppr_log(struct amd_iommu *iommu)
{
	u32 head, tail;

	if (iommu->ppr_log == NULL)
		return;

	head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
	tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);

	while (head != tail) {
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
		volatile u64 *raw;
		u64 entry[2];
		int i;

		raw = (u64 *)(iommu->ppr_log + head);

		/*
		 * Hardware bug: Interrupt may arrive before the entry is
		 * written to memory. If this happens we need to wait for the
		 * entry to arrive.
		 */
		for (i = 0; i < LOOP_TIMEOUT; ++i) {
			if (PPR_REQ_TYPE(raw[0]) != 0)
				break;
			udelay(1);
		}
694

695
696
697
		/* Avoid memcpy function-call overhead */
		entry[0] = raw[0];
		entry[1] = raw[1];
698

699
700
701
702
703
704
705
		/*
		 * To detect the hardware bug we need to clear the entry
		 * back to zero.
		 */
		raw[0] = raw[1] = 0UL;

		/* Update head pointer of hardware ring-buffer */
706
707
		head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE;
		writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
708
709
710
711
712
713

		/* Handle PPR entry */
		iommu_handle_ppr_entry(iommu, entry);

		/* Refresh ring-buffer information */
		head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
714
715
716
717
		tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
	}
}

718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
#ifdef CONFIG_IRQ_REMAP
static int (*iommu_ga_log_notifier)(u32);

int amd_iommu_register_ga_log_notifier(int (*notifier)(u32))
{
	iommu_ga_log_notifier = notifier;

	return 0;
}
EXPORT_SYMBOL(amd_iommu_register_ga_log_notifier);

static void iommu_poll_ga_log(struct amd_iommu *iommu)
{
	u32 head, tail, cnt = 0;

	if (iommu->ga_log == NULL)
		return;

	head = readl(iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
	tail = readl(iommu->mmio_base + MMIO_GA_TAIL_OFFSET);

	while (head != tail) {
		volatile u64 *raw;
		u64 log_entry;

		raw = (u64 *)(iommu->ga_log + head);
		cnt++;

		/* Avoid memcpy function-call overhead */
		log_entry = *raw;

		/* Update head pointer of hardware ring-buffer */
		head = (head + GA_ENTRY_SIZE) % GA_LOG_SIZE;
		writel(head, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);

		/* Handle GA entry */
		switch (GA_REQ_TYPE(log_entry)) {
		case GA_GUEST_NR:
			if (!iommu_ga_log_notifier)
				break;

			pr_debug("AMD-Vi: %s: devid=%#x, ga_tag=%#x\n",
				 __func__, GA_DEVID(log_entry),
				 GA_TAG(log_entry));

			if (iommu_ga_log_notifier(GA_TAG(log_entry)) != 0)
				pr_err("AMD-Vi: GA log notifier failed.\n");
			break;
		default:
			break;
		}
	}
}
#endif /* CONFIG_IRQ_REMAP */

#define AMD_IOMMU_INT_MASK	\
	(MMIO_STATUS_EVT_INT_MASK | \
	 MMIO_STATUS_PPR_INT_MASK | \
	 MMIO_STATUS_GALOG_INT_MASK)

778
irqreturn_t amd_iommu_int_thread(int irq, void *data)
779
{
780
781
	struct amd_iommu *iommu = (struct amd_iommu *) data;
	u32 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
782

783
784
785
	while (status & AMD_IOMMU_INT_MASK) {
		/* Enable EVT and PPR and GA interrupts again */
		writel(AMD_IOMMU_INT_MASK,
786
			iommu->mmio_base + MMIO_STATUS_OFFSET);
787

788
789
790
791
		if (status & MMIO_STATUS_EVT_INT_MASK) {
			pr_devel("AMD-Vi: Processing IOMMU Event Log\n");
			iommu_poll_events(iommu);
		}
792

793
794
795
796
		if (status & MMIO_STATUS_PPR_INT_MASK) {
			pr_devel("AMD-Vi: Processing IOMMU PPR Log\n");
			iommu_poll_ppr_log(iommu);
		}
797

798
799
800
801
802
803
804
#ifdef CONFIG_IRQ_REMAP
		if (status & MMIO_STATUS_GALOG_INT_MASK) {
			pr_devel("AMD-Vi: Processing IOMMU GA Log\n");
			iommu_poll_ga_log(iommu);
		}
#endif

805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
		/*
		 * Hardware bug: ERBT1312
		 * When re-enabling interrupt (by writing 1
		 * to clear the bit), the hardware might also try to set
		 * the interrupt bit in the event status register.
		 * In this scenario, the bit will be set, and disable
		 * subsequent interrupts.
		 *
		 * Workaround: The IOMMU driver should read back the
		 * status register and check if the interrupt bits are cleared.
		 * If not, driver will need to go through the interrupt handler
		 * again and re-clear the bits
		 */
		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
	}
820
	return IRQ_HANDLED;
821
822
}

823
824
825
826
827
irqreturn_t amd_iommu_int_handler(int irq, void *data)
{
	return IRQ_WAKE_THREAD;
}

828
829
830
831
832
833
/****************************************************************************
 *
 * IOMMU command queuing functions
 *
 ****************************************************************************/

834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
static int wait_on_sem(volatile u64 *sem)
{
	int i = 0;

	while (*sem == 0 && i < LOOP_TIMEOUT) {
		udelay(1);
		i += 1;
	}

	if (i == LOOP_TIMEOUT) {
		pr_alert("AMD-Vi: Completion-Wait loop timed out\n");
		return -EIO;
	}

	return 0;
}

static void copy_cmd_to_buffer(struct amd_iommu *iommu,
852
			       struct iommu_cmd *cmd)
853
854
855
{
	u8 *target;

856
857
858
859
	target = iommu->cmd_buf + iommu->cmd_buf_tail;

	iommu->cmd_buf_tail += sizeof(*cmd);
	iommu->cmd_buf_tail %= CMD_BUFFER_SIZE;
860
861
862
863
864

	/* Copy command to buffer */
	memcpy(target, cmd, sizeof(*cmd));

	/* Tell the IOMMU about it */
865
	writel(iommu->cmd_buf_tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
866
}
867

868
static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
869
{
870
871
	u64 paddr = iommu_virt_to_phys((void *)address);

872
873
	WARN_ON(address & 0x7ULL);

874
	memset(cmd, 0, sizeof(*cmd));
875
876
	cmd->data[0] = lower_32_bits(paddr) | CMD_COMPL_WAIT_STORE_MASK;
	cmd->data[1] = upper_32_bits(paddr);
877
	cmd->data[2] = 1;
878
879
880
	CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
}

881
882
883
884
885
886
887
static void build_inv_dte(struct iommu_cmd *cmd, u16 devid)
{
	memset(cmd, 0, sizeof(*cmd));
	cmd->data[0] = devid;
	CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY);
}

888
889
890
891
static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
				  size_t size, u16 domid, int pde)
{
	u64 pages;
892
	bool s;
893
894

	pages = iommu_num_pages(address, size, PAGE_SIZE);
895
	s     = false;
896
897
898
899
900
901
902

	if (pages > 1) {
		/*
		 * If we have to flush more than one page, flush all
		 * TLB entries for this domain
		 */
		address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
903
		s = true;
904
905
906
907
908
909
910
911
912
913
914
	}

	address &= PAGE_MASK;

	memset(cmd, 0, sizeof(*cmd));
	cmd->data[1] |= domid;
	cmd->data[2]  = lower_32_bits(address);
	cmd->data[3]  = upper_32_bits(address);
	CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
	if (s) /* size bit - we flush more than one 4kb page */
		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
Frank Arnold's avatar
Frank Arnold committed
915
	if (pde) /* PDE bit - we want to flush everything, not only the PTEs */
916
917
918
		cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
}

919
920
921
922
static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
				  u64 address, size_t size)
{
	u64 pages;
923
	bool s;
924
925

	pages = iommu_num_pages(address, size, PAGE_SIZE);
926
	s     = false;
927
928
929
930
931
932
933

	if (pages > 1) {
		/*
		 * If we have to flush more than one page, flush all
		 * TLB entries for this domain
		 */
		address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
934
		s = true;
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
	}

	address &= PAGE_MASK;

	memset(cmd, 0, sizeof(*cmd));
	cmd->data[0]  = devid;
	cmd->data[0] |= (qdep & 0xff) << 24;
	cmd->data[1]  = devid;
	cmd->data[2]  = lower_32_bits(address);
	cmd->data[3]  = upper_32_bits(address);
	CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
	if (s)
		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
}

950
951
952
953
954
955
956
static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, int pasid,
				  u64 address, bool size)
{
	memset(cmd, 0, sizeof(*cmd));

	address &= ~(0xfffULL);

957
	cmd->data[0]  = pasid;
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
	cmd->data[1]  = domid;
	cmd->data[2]  = lower_32_bits(address);
	cmd->data[3]  = upper_32_bits(address);
	cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
	cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK;
	if (size)
		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
	CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
}

static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, int pasid,
				  int qdep, u64 address, bool size)
{
	memset(cmd, 0, sizeof(*cmd));

	address &= ~(0xfffULL);

	cmd->data[0]  = devid;
976
	cmd->data[0] |= ((pasid >> 8) & 0xff) << 16;
977
978
	cmd->data[0] |= (qdep  & 0xff) << 24;
	cmd->data[1]  = devid;
979
	cmd->data[1] |= (pasid & 0xff) << 16;
980
981
982
983
984
985
986
987
	cmd->data[2]  = lower_32_bits(address);
	cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK;
	cmd->data[3]  = upper_32_bits(address);
	if (size)
		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
	CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
}

988
989
990
991
992
993
994
static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, int pasid,
			       int status, int tag, bool gn)
{
	memset(cmd, 0, sizeof(*cmd));

	cmd->data[0]  = devid;
	if (gn) {
995
		cmd->data[1]  = pasid;
996
997
998
999
1000
1001
1002
1003
		cmd->data[2]  = CMD_INV_IOMMU_PAGES_GN_MASK;
	}
	cmd->data[3]  = tag & 0x1ff;
	cmd->data[3] |= (status & PPR_STATUS_MASK) << PPR_STATUS_SHIFT;

	CMD_SET_TYPE(cmd, CMD_COMPLETE_PPR);
}

1004
1005
1006
1007
static void build_inv_all(struct iommu_cmd *cmd)
{
	memset(cmd, 0, sizeof(*cmd));
	CMD_SET_TYPE(cmd, CMD_INV_ALL);
1008
1009
}

1010
1011
1012
1013
1014
1015
1016
static void build_inv_irt(struct iommu_cmd *cmd, u16 devid)
{
	memset(cmd, 0, sizeof(*cmd));
	cmd->data[0] = devid;
	CMD_SET_TYPE(cmd, CMD_INV_IRT);
}

1017
1018
/*
 * Writes the command to the IOMMUs command buffer and informs the
1019
 * hardware about the new command.
1020
 */
1021
1022
1023
static int __iommu_queue_command_sync(struct amd_iommu *iommu,
				      struct iommu_cmd *cmd,
				      bool sync)
1024
{
1025
	unsigned int count = 0;
1026
	u32 left, next_tail;
1027

1028
	next_tail = (iommu->cmd_buf_tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
1029
again:
1030
	left      = (iommu->cmd_buf_head - next_tail) % CMD_BUFFER_SIZE;
1031

1032
	if (left <= 0x20) {
1033
1034
1035
1036
1037
1038
		/* Skip udelay() the first time around */
		if (count++) {
			if (count == LOOP_TIMEOUT) {
				pr_err("AMD-Vi: Command buffer timeout\n");
				return -EIO;
			}
1039

1040
1041
			udelay(1);
		}
1042

1043
1044
1045
		/* Update head and recheck remaining space */
		iommu->cmd_buf_head = readl(iommu->mmio_base +
					    MMIO_CMD_HEAD_OFFSET);
1046
1047

		goto again;
1048
1049
	}

1050
	copy_cmd_to_buffer(iommu, cmd);
1051

1052
	/* Do we need to make sure all commands are processed? */
1053
	iommu->need_sync = sync;
1054

1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
	return 0;
}

static int iommu_queue_command_sync(struct amd_iommu *iommu,
				    struct iommu_cmd *cmd,
				    bool sync)
{
	unsigned long flags;
	int ret;

	spin_lock_irqsave(&iommu->lock, flags);
	ret = __iommu_queue_command_sync(iommu, cmd, sync);
1067
	spin_unlock_irqrestore(&iommu->lock, flags);
1068

1069
	return ret;
1070
1071
}

1072
1073
1074
1075
1076
static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
{
	return iommu_queue_command_sync(iommu, cmd, true);
}

1077
1078
1079
1080
/*
 * This function queues a completion wait command into the command
 * buffer of an IOMMU
 */
1081
static int iommu_completion_wait(struct amd_iommu *iommu)
1082
1083
{
	struct iommu_cmd cmd;
1084
	unsigned long flags;
1085
	int ret;
1086

1087
	if (!iommu->need_sync)
1088
		return 0;
1089

1090

1091
1092
1093
1094
1095
1096
1097
	build_completion_wait(&cmd, (u64)&iommu->cmd_sem);

	spin_lock_irqsave(&iommu->lock, flags);

	iommu->cmd_sem = 0;

	ret = __iommu_queue_command_sync(iommu, &cmd, false);
1098
	if (ret)
1099
1100
1101
1102
1103
1104
		goto out_unlock;

	ret = wait_on_sem(&iommu->cmd_sem);

out_unlock:
	spin_unlock_irqrestore(&iommu->lock, flags);
1105

1106
	return ret;
1107
1108
}

1109
static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
1110
{
1111
	struct iommu_cmd cmd;
1112

1113
	build_inv_dte(&cmd, devid);
1114

1115
1116
	return iommu_queue_command(iommu, &cmd);
}
1117

1118
static void amd_iommu_flush_dte_all(struct amd_iommu *iommu)
1119
1120
{
	u32 devid;
1121

1122
1123
	for (devid = 0; devid <= 0xffff; ++devid)
		iommu_flush_dte(iommu, devid);
1124

1125
1126
	iommu_completion_wait(iommu);
}
1127

1128
1129
1130
1131
/*
 * This function uses heavy locking and may disable irqs for some time. But
 * this is no issue because it is only called during resume.
 */
1132
static void amd_iommu_flush_tlb_all(struct amd_iommu *iommu)
1133
1134
{
	u32 dom_id;
1135

1136
1137
1138
1139
1140
1141
	for (dom_id = 0; dom_id <= 0xffff; ++dom_id) {
		struct iommu_cmd cmd;
		build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
				      dom_id, 1);
		iommu_queue_command(iommu, &cmd);
	}
1142

1143
	iommu_completion_wait(iommu);
1144
1145
}

1146
static void amd_iommu_flush_all(struct amd_iommu *iommu)
1147
{
1148
	struct iommu_cmd cmd;
1149

1150
	build_inv_all(&cmd);
1151

1152
1153
1154
1155
	iommu_queue_command(iommu, &cmd);
	iommu_completion_wait(iommu);
}

1156
1157
1158
1159
1160
1161
1162
1163
1164
static void iommu_flush_irt(struct amd_iommu *iommu, u16 devid)
{
	struct iommu_cmd cmd;

	build_inv_irt(&cmd, devid);

	iommu_queue_command(iommu, &cmd);
}

1165
static void amd_iommu_flush_irt_all(struct amd_iommu *iommu)
1166
1167
1168
1169
1170
1171
1172
1173
1174
{
	u32 devid;

	for (devid = 0; devid <= MAX_DEV_TABLE_ENTRIES; devid++)
		iommu_flush_irt(iommu, devid);

	iommu_completion_wait(iommu);
}

1175
1176
void iommu_flush_all_caches(struct amd_iommu *iommu)
{
1177
	if (iommu_feature(iommu, FEATURE_IA)) {
1178
		amd_iommu_flush_all(iommu);
1179
	} else {
1180
1181
1182
		amd_iommu_flush_dte_all(iommu);
		amd_iommu_flush_irt_all(iommu);
		amd_iommu_flush_tlb_all(iommu);
1183
1184
1185
	}
}

1186
/*
1187
 * Command send function for flushing on-device TLB
1188
 */
1189
1190
static int device_flush_iotlb(struct iommu_dev_data *dev_data,
			      u64 address, size_t size)
1191
1192
{
	struct amd_iommu *iommu;
1193
	struct iommu_cmd cmd;
1194
	int qdep;
1195

1196
1197
	qdep     = dev_data->ats.qdep;
	iommu    = amd_iommu_rlookup_table[dev_data->devid];
1198

1199
	build_inv_iotlb_pages(&cmd, dev_data->devid, qdep, address, size);
1200
1201

	return iommu_queue_command(iommu, &cmd);
1202
1203
}

1204
1205
1206
/*
 * Command send function for invalidating a device table entry
 */
1207
static int device_flush_dte(struct iommu_dev_data *dev_data)
1208
{
1209
	struct amd_iommu *iommu;
1210
	u16 alias;
1211
	int ret;
1212

1213
	iommu = amd_iommu_rlookup_table[dev_data->devid];
1214
	alias = dev_data->alias;
1215

1216
	ret = iommu_flush_dte(iommu, dev_data->devid);
1217
1218
	if (!ret && alias != dev_data->devid)
		ret = iommu_flush_dte(iommu, alias);
1219
1220
1221
	if (ret)
		return ret;

1222
	if (dev_data->ats.enabled)
1223
		ret = device_flush_iotlb(dev_data, 0, ~0UL);
1224
1225

	return ret;
1226
1227
}

1228
1229
1230
1231
1232
/*
 * TLB invalidation function which is called from the mapping functions.
 * It invalidates a single PTE if the range to flush is within a single
 * page. Otherwise it flushes the whole TLB of the IOMMU.
 */
1233
1234
static void __domain_flush_pages(struct protection_domain *domain,
				 u64 address, size_t size, int pde)