Commit 13588209 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (50 commits)
  x86, mm: Allow ZONE_DMA to be configurable
  x86, NUMA: Trim numa meminfo with max_pfn in a separate loop
  x86, NUMA: Rename setup_node_bootmem() to setup_node_data()
  x86, NUMA: Enable emulation on 32bit too
  x86, NUMA: Enable CONFIG_AMD_NUMA on 32bit too
  x86, NUMA: Rename amdtopology_64.c to amdtopology.c
  x86, NUMA: Make numa_init_array() static
  x86, NUMA: Make 32bit use common NUMA init path
  x86, NUMA: Initialize and use remap allocator from setup_node_bootmem()
  x86-32, NUMA: Add @start and @end to init_alloc_remap()
  x86, NUMA: Remove long 64bit assumption from numa.c
  x86, NUMA: Enable build of generic NUMA init code on 32bit
  x86, NUMA: Move NUMA init logic from numa_64.c to numa.c
  x86-32, NUMA: Update numaq to use new NUMA init protocol
  x86-32, NUMA: Replace srat_32.c with srat.c
  x86-32, NUMA: implement temporary NUMA init shims
  x86, NUMA: Move numa_nodes_parsed to numa.[hc]
  x86-32, NUMA: Move get_memcfg_numa() into numa_32.c
  x86, NUMA: make srat.c 32bit safe
  x86, NUMA: rename srat_64.c to srat.c
  ...
parents ac2941f5 dc382fd5
......@@ -112,7 +112,14 @@ config MMU
def_bool y
config ZONE_DMA
def_bool y
bool "DMA memory allocation support" if EXPERT
default y
help
DMA memory allocation support allows devices with less than 32-bit
addressing to allocate within the first 16MB of address space.
Disable if no such devices will be used.
If unsure, say Y.
config SBUS
bool
......@@ -1164,7 +1171,7 @@ comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
config AMD_NUMA
def_bool y
prompt "Old style AMD Opteron NUMA detection"
depends on X86_64 && NUMA && PCI
depends on NUMA && PCI
---help---
Enable AMD NUMA node topology detection. You should say Y here if
you have a multi processor AMD system. This uses an old method to
......@@ -1191,7 +1198,7 @@ config NODES_SPAN_OTHER_NODES
config NUMA_EMU
bool "NUMA emulation"
depends on X86_64 && NUMA
depends on NUMA
---help---
Enable NUMA emulation. A flat machine will be split
into virtual nodes when booted with "numa=fake=N", where N is the
......@@ -1213,6 +1220,10 @@ config HAVE_ARCH_BOOTMEM
def_bool y
depends on X86_32 && NUMA
config HAVE_ARCH_ALLOC_REMAP
def_bool y
depends on X86_32 && NUMA
config ARCH_HAVE_MEMORY_PRESENT
def_bool y
depends on X86_32 && DISCONTIGMEM
......@@ -1221,13 +1232,9 @@ config NEED_NODE_MEMMAP_SIZE
def_bool y
depends on X86_32 && (DISCONTIGMEM || SPARSEMEM)
config HAVE_ARCH_ALLOC_REMAP
def_bool y
depends on X86_32 && NUMA
config ARCH_FLATMEM_ENABLE
def_bool y
depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && !NUMA
depends on X86_32 && !NUMA
config ARCH_DISCONTIGMEM_ENABLE
def_bool y
......@@ -1237,20 +1244,16 @@ config ARCH_DISCONTIGMEM_DEFAULT
def_bool y
depends on NUMA && X86_32
config ARCH_PROC_KCORE_TEXT
def_bool y
depends on X86_64 && PROC_KCORE
config ARCH_SPARSEMEM_DEFAULT
def_bool y
depends on X86_64
config ARCH_SPARSEMEM_ENABLE
def_bool y
depends on X86_64 || NUMA || (EXPERIMENTAL && X86_32) || X86_32_NON_STANDARD
select SPARSEMEM_STATIC if X86_32
select SPARSEMEM_VMEMMAP_ENABLE if X86_64
config ARCH_SPARSEMEM_DEFAULT
def_bool y
depends on X86_64
config ARCH_SELECT_MEMORY_MODEL
def_bool y
depends on ARCH_SPARSEMEM_ENABLE
......@@ -1259,6 +1262,10 @@ config ARCH_MEMORY_PROBE
def_bool X86_64
depends on MEMORY_HOTPLUG
config ARCH_PROC_KCORE_TEXT
def_bool y
depends on X86_64 && PROC_KCORE
config ILLEGAL_POINTER_VALUE
hex
default 0 if X86_32
......@@ -1693,10 +1700,6 @@ config ARCH_ENABLE_MEMORY_HOTREMOVE
def_bool y
depends on MEMORY_HOTPLUG
config HAVE_ARCH_EARLY_PFN_TO_NID
def_bool X86_64
depends on NUMA
config USE_PERCPU_NUMA_NODE_ID
def_bool y
depends on NUMA
......
......@@ -183,8 +183,6 @@ static inline void disable_acpi(void) { }
#define ARCH_HAS_POWER_INIT 1
struct bootnode;
#ifdef CONFIG_ACPI_NUMA
extern int acpi_numa;
extern int x86_acpi_numa_init(void);
......
......@@ -11,7 +11,6 @@ struct amd_nb_bus_dev_range {
extern const struct pci_device_id amd_nb_misc_ids[];
extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
struct bootnode;
extern bool early_is_amd_nb(u32 value);
extern int amd_cache_northbridges(void);
......
......@@ -363,7 +363,12 @@ struct apic {
*/
int (*x86_32_early_logical_apicid)(int cpu);
/* determine CPU -> NUMA node mapping */
/*
* Optional method called from setup_local_APIC() after logical
* apicid is guaranteed to be known to initialize apicid -> node
* mapping if NUMA initialization hasn't done so already. Don't
* add new users.
*/
int (*x86_32_numa_cpu_node)(int cpu);
#endif
};
......@@ -537,8 +542,6 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
return cpuid_apic >> index_msb;
}
extern int default_x86_32_numa_cpu_node(int cpu);
#endif
static inline unsigned int
......
......@@ -208,8 +208,7 @@ extern const char * const x86_power_flags[32];
#define test_cpu_cap(c, bit) \
test_bit(bit, (unsigned long *)((c)->x86_capability))
#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && \
#define REQUIRED_MASK_BIT_SET(bit) \
( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) || \
(((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) || \
(((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) || \
......@@ -219,10 +218,16 @@ extern const char * const x86_power_flags[32];
(((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) || \
(((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) || \
(((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) || \
(((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) ) \
? 1 : \
(((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) )
#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
test_cpu_cap(c, bit))
#define this_cpu_has(bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability))
#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability))
......
......@@ -69,22 +69,18 @@
#define MAX_DMA_CHANNELS 8
#ifdef CONFIG_X86_32
/* The maximum address that we can perform a DMA transfer to on this platform */
#define MAX_DMA_ADDRESS (PAGE_OFFSET + 0x1000000)
#else
/* 16MB ISA DMA zone */
#define MAX_DMA_PFN ((16 * 1024 * 1024) >> PAGE_SHIFT)
/* 4GB broken PCI/AGP hardware bus master zone */
#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT)
#ifdef CONFIG_X86_32
/* The maximum address that we can perform a DMA transfer to on this platform */
#define MAX_DMA_ADDRESS (PAGE_OFFSET + 0x1000000)
#else
/* Compat define for old dma zone */
#define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT))
#endif
/* 8237 DMA controllers */
......
......@@ -13,31 +13,11 @@ extern struct pglist_data *node_data[];
#define NODE_DATA(nid) (node_data[nid])
#include <asm/numaq.h>
/* summit or generic arch */
#include <asm/srat.h>
extern int get_memcfg_numa_flat(void);
/*
* This allows any one NUMA architecture to be compiled
* for, and still fall back to the flat function if it
* fails.
*/
static inline void get_memcfg_numa(void)
{
if (get_memcfg_numaq())
return;
if (get_memcfg_from_srat())
return;
get_memcfg_numa_flat();
}
extern void resume_map_numa_kva(pgd_t *pgd);
#else /* !CONFIG_NUMA */
#define get_memcfg_numa get_memcfg_numa_flat
static inline void resume_map_numa_kva(pgd_t *pgd) {}
#endif /* CONFIG_NUMA */
......
......@@ -4,36 +4,13 @@
#ifndef _ASM_X86_MMZONE_64_H
#define _ASM_X86_MMZONE_64_H
#ifdef CONFIG_NUMA
#include <linux/mmdebug.h>
#include <asm/smp.h>
/* Simple perfect hash to map physical addresses to node numbers */
struct memnode {
int shift;
unsigned int mapsize;
s16 *map;
s16 embedded_map[64 - 8];
} ____cacheline_aligned; /* total size = 128 bytes */
extern struct memnode memnode;
#define memnode_shift memnode.shift
#define memnodemap memnode.map
#define memnodemapsize memnode.mapsize
extern struct pglist_data *node_data[];
static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
{
unsigned nid;
VIRTUAL_BUG_ON(!memnodemap);
nid = memnodemap[addr >> memnode_shift];
VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
return nid;
}
#define NODE_DATA(nid) (node_data[nid])
#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
......
#ifndef _ASM_X86_NUMA_H
#define _ASM_X86_NUMA_H
#include <linux/nodemask.h>
#include <asm/topology.h>
#include <asm/apicdef.h>
#ifdef CONFIG_NUMA
#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
/*
* Too small node sizes may confuse the VM badly. Usually they
* result from BIOS bugs. So dont recognize nodes as standalone
* NUMA entities that have less than this amount of RAM listed:
*/
#define NODE_MIN_SIZE (4*1024*1024)
extern int numa_off;
/*
* __apicid_to_node[] stores the raw mapping between physical apicid and
......@@ -17,15 +29,27 @@
* numa_cpu_node().
*/
extern s16 __apicid_to_node[MAX_LOCAL_APIC];
extern nodemask_t numa_nodes_parsed __initdata;
extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
extern void __init numa_set_distance(int from, int to, int distance);
static inline void set_apicid_to_node(int apicid, s16 node)
{
__apicid_to_node[apicid] = node;
}
extern int __cpuinit numa_cpu_node(int cpu);
#else /* CONFIG_NUMA */
static inline void set_apicid_to_node(int apicid, s16 node)
{
}
static inline int numa_cpu_node(int cpu)
{
return NUMA_NO_NODE;
}
#endif /* CONFIG_NUMA */
#ifdef CONFIG_X86_32
......@@ -37,14 +61,12 @@ static inline void set_apicid_to_node(int apicid, s16 node)
#ifdef CONFIG_NUMA
extern void __cpuinit numa_set_node(int cpu, int node);
extern void __cpuinit numa_clear_node(int cpu);
extern void __init numa_init_array(void);
extern void __init init_cpu_to_node(void);
extern void __cpuinit numa_add_cpu(int cpu);
extern void __cpuinit numa_remove_cpu(int cpu);
#else /* CONFIG_NUMA */
static inline void numa_set_node(int cpu, int node) { }
static inline void numa_clear_node(int cpu) { }
static inline void numa_init_array(void) { }
static inline void init_cpu_to_node(void) { }
static inline void numa_add_cpu(int cpu) { }
static inline void numa_remove_cpu(int cpu) { }
......@@ -54,4 +76,10 @@ static inline void numa_remove_cpu(int cpu) { }
void debug_cpumask_set_cpu(int cpu, int node, bool enable);
#endif
#ifdef CONFIG_NUMA_EMU
#define FAKE_NODE_MIN_SIZE ((u64)32 << 20)
#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
void numa_emu_cmdline(char *);
#endif /* CONFIG_NUMA_EMU */
#endif /* _ASM_X86_NUMA_H */
#ifndef _ASM_X86_NUMA_32_H
#define _ASM_X86_NUMA_32_H
extern int numa_off;
extern int pxm_to_nid(int pxm);
#ifdef CONFIG_NUMA
extern int __cpuinit numa_cpu_node(int cpu);
#else /* CONFIG_NUMA */
static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; }
#endif /* CONFIG_NUMA */
#ifdef CONFIG_HIGHMEM
extern void set_highmem_pages_init(void);
#else
......
#ifndef _ASM_X86_NUMA_64_H
#define _ASM_X86_NUMA_64_H
#include <linux/nodemask.h>
struct bootnode {
u64 start;
u64 end;
};
#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
extern int numa_off;
extern unsigned long numa_free_all_bootmem(void);
extern void setup_node_bootmem(int nodeid, unsigned long start,
unsigned long end);
#ifdef CONFIG_NUMA
/*
* Too small node sizes may confuse the VM badly. Usually they
* result from BIOS bugs. So dont recognize nodes as standalone
* NUMA entities that have less than this amount of RAM listed:
*/
#define NODE_MIN_SIZE (4*1024*1024)
extern nodemask_t numa_nodes_parsed __initdata;
extern int __cpuinit numa_cpu_node(int cpu);
extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
extern void __init numa_set_distance(int from, int to, int distance);
#ifdef CONFIG_NUMA_EMU
#define FAKE_NODE_MIN_SIZE ((u64)32 << 20)
#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
void numa_emu_cmdline(char *);
#endif /* CONFIG_NUMA_EMU */
#else
static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; }
#endif
#endif /* _ASM_X86_NUMA_64_H */
......@@ -29,7 +29,7 @@
#ifdef CONFIG_X86_NUMAQ
extern int found_numaq;
extern int get_memcfg_numaq(void);
extern int numaq_numa_init(void);
extern int pci_numaq_init(void);
extern void *xquad_portio;
......@@ -166,11 +166,6 @@ struct sys_cfg_data {
void numaq_tsc_disable(void);
#else
static inline int get_memcfg_numaq(void)
{
return 0;
}
#endif /* CONFIG_X86_NUMAQ */
#endif /* _ASM_X86_NUMAQ_H */
......@@ -542,6 +542,33 @@ do { \
old__; \
})
static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
const unsigned long __percpu *addr)
{
unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
return ((1UL << (nr % BITS_PER_LONG)) & percpu_read(*a)) != 0;
}
static inline int x86_this_cpu_variable_test_bit(int nr,
const unsigned long __percpu *addr)
{
int oldbit;
asm volatile("bt "__percpu_arg(2)",%1\n\t"
"sbb %0,%0"
: "=r" (oldbit)
: "m" (*(unsigned long *)addr), "Ir" (nr));
return oldbit;
}
#define x86_this_cpu_test_bit(nr, addr) \
(__builtin_constant_p((nr)) \
? x86_this_cpu_constant_test_bit((nr), (addr)) \
: x86_this_cpu_variable_test_bit((nr), (addr)))
#include <asm-generic/percpu.h>
/* We can use this directly for local CPU (faster). */
......
/*
* Some of the code in this file has been gleaned from the 64 bit
* discontigmem support code base.
*
* Copyright (C) 2002, IBM Corp.
*
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Send feedback to Pat Gaughen <gone@us.ibm.com>
*/
#ifndef _ASM_X86_SRAT_H
#define _ASM_X86_SRAT_H
#ifdef CONFIG_ACPI_NUMA
extern int get_memcfg_from_srat(void);
#else
static inline int get_memcfg_from_srat(void)
{
return 0;
}
#endif
#endif /* _ASM_X86_SRAT_H */
......@@ -93,19 +93,11 @@ extern void setup_node_to_cpumask_map(void);
#define pcibus_to_node(bus) __pcibus_to_node(bus)
#ifdef CONFIG_X86_32
extern unsigned long node_start_pfn[];
extern unsigned long node_end_pfn[];
extern unsigned long node_remap_size[];
#define node_has_online_mem(nid) (node_start_pfn[nid] != node_end_pfn[nid])
# define SD_CACHE_NICE_TRIES 1
# define SD_IDLE_IDX 1
#else
# define SD_CACHE_NICE_TRIES 2
# define SD_IDLE_IDX 2
#endif
/* sched_domains SD_NODE_INIT for NUMA machines */
......
......@@ -505,7 +505,7 @@ static void __cpuinit setup_APIC_timer(void)
{
struct clock_event_device *levt = &__get_cpu_var(lapic_events);
if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_ARAT)) {
if (this_cpu_has(X86_FEATURE_ARAT)) {
lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
/* Make LAPIC timer preferrable over percpu HPET */
lapic_clockevent.rating = 150;
......@@ -1237,6 +1237,17 @@ void __cpuinit setup_local_APIC(void)
/* always use the value from LDR */
early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
logical_smp_processor_id();
/*
* Some NUMA implementations (NUMAQ) don't initialize apicid to
* node mapping during NUMA init. Now that logical apicid is
* guaranteed to be known, give it another chance. This is already
* a bit too late - percpu allocation has already happened without
* proper NUMA affinity.
*/
if (apic->x86_32_numa_cpu_node)
set_apicid_to_node(early_per_cpu(x86_cpu_to_apicid, cpu),
apic->x86_32_numa_cpu_node(cpu));
#endif
/*
......@@ -2014,21 +2025,6 @@ void default_init_apic_ldr(void)
apic_write(APIC_LDR, val);
}
#ifdef CONFIG_X86_32
int default_x86_32_numa_cpu_node(int cpu)
{
#ifdef CONFIG_NUMA
int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
if (apicid != BAD_APICID)
return __apicid_to_node[apicid];
return NUMA_NO_NODE;
#else
return 0;
#endif
}
#endif
/*
* Power management
*/
......
......@@ -119,14 +119,6 @@ static void noop_apic_write(u32 reg, u32 v)
WARN_ON_ONCE(cpu_has_apic && !disable_apic);
}
#ifdef CONFIG_X86_32
static int noop_x86_32_numa_cpu_node(int cpu)
{
/* we're always on node 0 */
return 0;
}
#endif
struct apic apic_noop = {
.name = "noop",
.probe = noop_probe,
......@@ -195,6 +187,5 @@ struct apic apic_noop = {
#ifdef CONFIG_X86_32
.x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid,
.x86_32_numa_cpu_node = noop_x86_32_numa_cpu_node,
#endif
};
......@@ -253,5 +253,4 @@ struct apic apic_bigsmp = {
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
.x86_32_early_logical_apicid = bigsmp_early_logical_apicid,
.x86_32_numa_cpu_node = default_x86_32_numa_cpu_node,
};
......@@ -510,11 +510,6 @@ static void es7000_setup_apic_routing(void)
nr_ioapics, cpumask_bits(es7000_target_cpus())[0]);
}
static int es7000_numa_cpu_node(int cpu)
{
return 0;
}
static int es7000_cpu_present_to_apicid(int mps_cpu)
{
if (!mps_cpu)
......@@ -688,7 +683,6 @@ struct apic __refdata apic_es7000_cluster = {
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
.x86_32_early_logical_apicid = es7000_early_logical_apicid,
.x86_32_numa_cpu_node = es7000_numa_cpu_node,
};
struct apic __refdata apic_es7000 = {
......@@ -752,5 +746,4 @@ struct apic __refdata apic_es7000 = {
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
.x86_32_early_logical_apicid = es7000_early_logical_apicid,
.x86_32_numa_cpu_node = es7000_numa_cpu_node,
};
......@@ -48,8 +48,6 @@
#include <asm/e820.h>
#include <asm/ipi.h>
#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))