Skip to content
  • Peter Zijlstra's avatar
    perf/x86: Fix local vs remote memory events for NHM/WSM · 87e24f4b
    Peter Zijlstra authored
    
    
    Verified using the below proglet.. before:
    
    [root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 0
    remote write
    
     Performance counter stats for './numa 0':
    
             2,101,554 node-stores
             2,096,931 node-store-misses
    
           5.021546079 seconds time elapsed
    
    [root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 1
    local write
    
     Performance counter stats for './numa 1':
    
               501,137 node-stores
                   199 node-store-misses
    
           5.124451068 seconds time elapsed
    
    After:
    
    [root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 0
    remote write
    
     Performance counter stats for './numa 0':
    
             2,107,516 node-stores
             2,097,187 node-store-misses
    
           5.012755149 seconds time elapsed
    
    [root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 1
    local write
    
     Performance counter stats for './numa 1':
    
             2,063,355 node-stores
                   165 node-store-misses
    
           5.082091494 seconds time elapsed
    
    #define _GNU_SOURCE
    
    #include <sched.h>
    #include <stdio.h>
    #include <errno.h>
    #include <sys/mman.h>
    #include <sys/types.h>
    #include <dirent.h>
    #include <signal.h>
    #include <unistd.h>
    #include <numaif.h>
    #include <stdlib.h>
    
    #define SIZE (32*1024*1024)
    
    volatile int done;
    
    void sig_done(int sig)
    {
    	done = 1;
    }
    
    int main(int argc, char **argv)
    {
    	cpu_set_t *mask, *mask2;
    	size_t size;
    	int i, err, t;
    	int nrcpus = 1024;
    	char *mem;
    	unsigned long nodemask = 0x01; /* node 0 */
    	DIR *node;
    	struct dirent *de;
    	int read = 0;
    	int local = 0;
    
    	if (argc < 2) {
    		printf("usage: %s [0-3]\n", argv[0]);
    		printf("  bit0 - local/remote\n");
    		printf("  bit1 - read/write\n");
    		exit(0);
    	}
    
    	switch (atoi(argv[1])) {
    	case 0:
    		printf("remote write\n");
    		break;
    	case 1:
    		printf("local write\n");
    		local = 1;
    		break;
    	case 2:
    		printf("remote read\n");
    		read = 1;
    		break;
    	case 3:
    		printf("local read\n");
    		local = 1;
    		read = 1;
    		break;
    	}
    
    	mask = CPU_ALLOC(nrcpus);
    	size = CPU_ALLOC_SIZE(nrcpus);
    	CPU_ZERO_S(size, mask);
    
    	node = opendir("/sys/devices/system/node/node0/");
    	if (!node)
    		perror("opendir");
    	while ((de = readdir(node))) {
    		int cpu;
    
    		if (sscanf(de->d_name, "cpu%d", &cpu) == 1)
    			CPU_SET_S(cpu, size, mask);
    	}
    	closedir(node);
    
    	mask2 = CPU_ALLOC(nrcpus);
    	CPU_ZERO_S(size, mask2);
    	for (i = 0; i < size; i++)
    		CPU_SET_S(i, size, mask2);
    	CPU_XOR_S(size, mask2, mask2, mask); // invert
    
    	if (!local)
    		mask = mask2;
    
    	err = sched_setaffinity(0, size, mask);
    	if (err)
    		perror("sched_setaffinity");
    
    	mem = mmap(0, SIZE, PROT_READ|PROT_WRITE,
    			MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
    	err = mbind(mem, SIZE, MPOL_BIND, &nodemask, 8*sizeof(nodemask), MPOL_MF_MOVE);
    	if (err)
    		perror("mbind");
    
    	signal(SIGALRM, sig_done);
    	alarm(5);
    
    	if (!read) {
    		while (!done) {
    			for (i = 0; i < SIZE; i++)
    				mem[i] = 0x01;
    		}
    	} else {
    		while (!done) {
    			for (i = 0; i < SIZE; i++)
    				t += *(volatile char *)(mem + i);
    		}
    	}
    
    	return 0;
    }
    
    Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Stephane Eranian <eranian@google.com>
    Cc: <stable@kernel.org>
    Link: http://lkml.kernel.org/n/tip-tq73sxus35xmqpojf7ootxgs@git.kernel.org
    
    
    Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
    87e24f4b