printk.c 81 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
/*
 *  linux/kernel/printk.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 * Modified to make sys_syslog() more flexible: added commands to
 * return the last 4k of kernel messages, regardless of whether
 * they've been read or not.  Added option to suppress kernel printk's
 * to the console.  Added hook for sending the console messages
 * elsewhere, in preparation for a serial line console (someday).
 * Ted Ts'o, 2/11/93.
 * Modified for sysctl support, 1/8/97, Chris Horn.
13
 * Fixed SMP synchronization, 08/08/99, Manfred Spraul
14
 *     manfred@colorfullife.com
Linus Torvalds's avatar
Linus Torvalds committed
15
 * Rewrote bits to get rid of console_lock
16
 *	01Mar01 Andrew Morton
Linus Torvalds's avatar
Linus Torvalds committed
17
18
19
20
21
22
23
24
 */

#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/tty.h>
#include <linux/tty_driver.h>
#include <linux/console.h>
#include <linux/init.h>
Randy Dunlap's avatar
Randy Dunlap committed
25
26
#include <linux/jiffies.h>
#include <linux/nmi.h>
Linus Torvalds's avatar
Linus Torvalds committed
27
#include <linux/module.h>
Jan Engelhardt's avatar
Jan Engelhardt committed
28
#include <linux/moduleparam.h>
Linus Torvalds's avatar
Linus Torvalds committed
29
30
31
32
33
#include <linux/interrupt.h>			/* For in_interrupt() */
#include <linux/delay.h>
#include <linux/smp.h>
#include <linux/security.h>
#include <linux/bootmem.h>
34
#include <linux/memblock.h>
Linus Torvalds's avatar
Linus Torvalds committed
35
#include <linux/syscalls.h>
36
#include <linux/kexec.h>
37
#include <linux/kdb.h>
38
#include <linux/ratelimit.h>
39
#include <linux/kmsg_dump.h>
40
#include <linux/syslog.h>
41
42
#include <linux/cpu.h>
#include <linux/notifier.h>
43
#include <linux/rculist.h>
44
#include <linux/poll.h>
45
#include <linux/irq_work.h>
46
#include <linux/utsname.h>
Alex Elder's avatar
Alex Elder committed
47
#include <linux/ctype.h>
48
#include <linux/uio.h>
Linus Torvalds's avatar
Linus Torvalds committed
49
50

#include <asm/uaccess.h>
51
#include <asm-generic/sections.h>
Linus Torvalds's avatar
Linus Torvalds committed
52

53
54
55
#define CREATE_TRACE_POINTS
#include <trace/events/printk.h>

Joe Perches's avatar
Joe Perches committed
56
#include "console_cmdline.h"
57
#include "braille.h"
58
#include "internal.h"
Joe Perches's avatar
Joe Perches committed
59

Linus Torvalds's avatar
Linus Torvalds committed
60
int console_printk[4] = {
61
	CONSOLE_LOGLEVEL_DEFAULT,	/* console_loglevel */
62
	MESSAGE_LOGLEVEL_DEFAULT,	/* default_message_loglevel */
63
64
	CONSOLE_LOGLEVEL_MIN,		/* minimum_console_loglevel */
	CONSOLE_LOGLEVEL_DEFAULT,	/* default_console_loglevel */
Linus Torvalds's avatar
Linus Torvalds committed
65
66
67
};

/*
68
 * Low level drivers may need that to know if they can schedule in
Linus Torvalds's avatar
Linus Torvalds committed
69
70
71
72
73
74
75
76
77
78
 * their unblank() callback or not. So let's export it.
 */
int oops_in_progress;
EXPORT_SYMBOL(oops_in_progress);

/*
 * console_sem protects the console_drivers list, and also
 * provides serialisation for access to the entire console
 * driver system.
 */
79
static DEFINE_SEMAPHORE(console_sem);
Linus Torvalds's avatar
Linus Torvalds committed
80
struct console *console_drivers;
Ingo Molnar's avatar
Ingo Molnar committed
81
82
EXPORT_SYMBOL_GPL(console_drivers);

83
84
85
86
87
88
#ifdef CONFIG_LOCKDEP
static struct lockdep_map console_lock_dep_map = {
	.name = "console_lock"
};
#endif

89
90
91
92
93
94
95
96
97
98
99
100
/*
 * Number of registered extended console drivers.
 *
 * If extended consoles are present, in-kernel cont reassembly is disabled
 * and each fragment is stored as a separate log entry with proper
 * continuation flag so that every emitted message has full metadata.  This
 * doesn't change the result for regular consoles or /proc/kmsg.  For
 * /dev/kmsg, as long as the reader concatenates messages according to
 * consecutive continuation flags, the end result should be the same too.
 */
static int nr_ext_console_drivers;

101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
/*
 * Helper macros to handle lockdep when locking/unlocking console_sem. We use
 * macros instead of functions so that _RET_IP_ contains useful information.
 */
#define down_console_sem() do { \
	down(&console_sem);\
	mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);\
} while (0)

static int __down_trylock_console_sem(unsigned long ip)
{
	if (down_trylock(&console_sem))
		return 1;
	mutex_acquire(&console_lock_dep_map, 0, 1, ip);
	return 0;
}
#define down_trylock_console_sem() __down_trylock_console_sem(_RET_IP_)

#define up_console_sem() do { \
	mutex_release(&console_lock_dep_map, 1, _RET_IP_);\
	up(&console_sem);\
} while (0)

Linus Torvalds's avatar
Linus Torvalds committed
124
125
126
127
/*
 * This is used for debugging the mess that is the VT code by
 * keeping track if we have the console semaphore held. It's
 * definitely not the perfect debug tool (we don't know if _WE_
Alex Elder's avatar
Alex Elder committed
128
129
130
 * hold it and are racing, but it helps tracking those weird code
 * paths in the console code where we end up in places I want
 * locked without the console sempahore held).
Linus Torvalds's avatar
Linus Torvalds committed
131
 */
132
static int console_locked, console_suspended;
Linus Torvalds's avatar
Linus Torvalds committed
133

134
135
136
137
138
/*
 * If exclusive_console is non-NULL then only this console is to be printed to.
 */
static struct console *exclusive_console;

Linus Torvalds's avatar
Linus Torvalds committed
139
140
141
142
143
144
145
/*
 *	Array of consoles built from command line options (console=)
 */

#define MAX_CMDLINECONSOLES 8

static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
Joe Perches's avatar
Joe Perches committed
146

Linus Torvalds's avatar
Linus Torvalds committed
147
148
static int selected_console = -1;
static int preferred_console = -1;
149
150
int console_set_on_cmdline;
EXPORT_SYMBOL(console_set_on_cmdline);
Linus Torvalds's avatar
Linus Torvalds committed
151
152
153
154

/* Flag: console code may call schedule() */
static int console_may_schedule;

155
156
157
158
159
160
/*
 * The printk log buffer consists of a chain of concatenated variable
 * length records. Every record starts with a record header, containing
 * the overall length of the record.
 *
 * The heads to the first and last entry in the buffer, as well as the
Alex Elder's avatar
Alex Elder committed
161
162
 * sequence numbers of these entries are maintained when messages are
 * stored.
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
 *
 * If the heads indicate available messages, the length in the header
 * tells the start next message. A length == 0 for the next message
 * indicates a wrap-around to the beginning of the buffer.
 *
 * Every record carries the monotonic timestamp in microseconds, as well as
 * the standard userspace syslog level and syslog facility. The usual
 * kernel messages use LOG_KERN; userspace-injected messages always carry
 * a matching syslog facility, by default LOG_USER. The origin of every
 * message can be reliably determined that way.
 *
 * The human readable log message directly follows the message header. The
 * length of the message text is stored in the header, the stored message
 * is not terminated.
 *
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
 * Optionally, a message can carry a dictionary of properties (key/value pairs),
 * to provide userspace with a machine-readable message context.
 *
 * Examples for well-defined, commonly used property names are:
 *   DEVICE=b12:8               device identifier
 *                                b12:8         block dev_t
 *                                c127:3        char dev_t
 *                                n8            netdev ifindex
 *                                +sound:card0  subsystem:devname
 *   SUBSYSTEM=pci              driver-core subsystem name
 *
 * Valid characters in property names are [a-zA-Z0-9.-_]. The plain text value
 * follows directly after a '=' character. Every property is terminated by
 * a '\0' character. The last property is not terminated.
 *
 * Example of a message structure:
 *   0000  ff 8f 00 00 00 00 00 00      monotonic time in nsec
 *   0008  34 00                        record is 52 bytes long
 *   000a        0b 00                  text is 11 bytes long
 *   000c              1f 00            dictionary is 23 bytes long
 *   000e                    03 00      LOG_KERN (facility) LOG_ERR (level)
 *   0010  69 74 27 73 20 61 20 6c      "it's a l"
 *         69 6e 65                     "ine"
 *   001b           44 45 56 49 43      "DEVIC"
 *         45 3d 62 38 3a 32 00 44      "E=b8:2\0D"
 *         52 49 56 45 52 3d 62 75      "RIVER=bu"
 *         67                           "g"
 *   0032     00 00 00                  padding to next message header
 *
207
 * The 'struct printk_log' buffer header must never be directly exported to
208
209
210
211
 * userspace, it is a kernel-private implementation detail that might
 * need to be changed in the future, when the requirements change.
 *
 * /dev/kmsg exports the structured data in the following line format:
212
213
214
215
 *   "<level>,<sequnum>,<timestamp>,<contflag>[,additional_values, ... ];<message text>\n"
 *
 * Users of the export format should ignore possible additional values
 * separated by ',', and find the message after the ';' character.
216
217
218
219
 *
 * The optional key/value pairs are attached as continuation lines starting
 * with a space character and terminated by a newline. All possible
 * non-prinatable characters are escaped in the "\xff" notation.
220
221
 */

222
enum log_flags {
223
224
225
226
	LOG_NOCONS	= 1,	/* already flushed, do not print to console */
	LOG_NEWLINE	= 2,	/* text ended with a newline */
	LOG_PREFIX	= 4,	/* text started with a prefix */
	LOG_CONT	= 8,	/* text is a fragment of a continuation line */
227
228
};

229
struct printk_log {
230
231
232
233
	u64 ts_nsec;		/* timestamp in nanoseconds */
	u16 len;		/* length of entire record */
	u16 text_len;		/* length of text buffer */
	u16 dict_len;		/* length of dictionary buffer */
234
235
236
	u8 facility;		/* syslog facility */
	u8 flags:5;		/* internal record flags */
	u8 level:3;		/* syslog level */
237
238
239
240
241
}
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
__packed __aligned(4)
#endif
;
242
243

/*
244
245
246
 * The logbuf_lock protects kmsg buffer, indices, counters.  This can be taken
 * within the scheduler's rq lock. It must be released before calling
 * console_unlock() or anything else that might wake up a process.
247
248
 */
static DEFINE_RAW_SPINLOCK(logbuf_lock);
249

250
#ifdef CONFIG_PRINTK
251
DECLARE_WAIT_QUEUE_HEAD(log_wait);
252
253
254
/* the next printk record to read by syslog(READ) or /proc/kmsg */
static u64 syslog_seq;
static u32 syslog_idx;
255
static enum log_flags syslog_prev;
256
static size_t syslog_partial;
257
258
259
260
261
262
263
264
265

/* index and sequence number of the first record stored in the buffer */
static u64 log_first_seq;
static u32 log_first_idx;

/* index and sequence number of the next record to store in the buffer */
static u64 log_next_seq;
static u32 log_next_idx;

266
267
268
269
270
/* the next printk record to write to the console */
static u64 console_seq;
static u32 console_idx;
static enum log_flags console_prev;

271
272
273
274
/* the next printk record to read after the last 'clear' command */
static u64 clear_seq;
static u32 clear_idx;

275
#define PREFIX_MAX		32
Alex Elder's avatar
Alex Elder committed
276
#define LOG_LINE_MAX		(1024 - PREFIX_MAX)
277

278
279
280
#define LOG_LEVEL(v)		((v) & 0x07)
#define LOG_FACILITY(v)		((v) >> 3 & 0xff)

281
/* record buffer */
282
#define LOG_ALIGN __alignof__(struct printk_log)
283
#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
284
static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
285
286
287
static char *log_buf = __log_buf;
static u32 log_buf_len = __LOG_BUF_LEN;

288
289
290
291
292
293
294
295
296
297
298
299
/* Return log buffer address */
char *log_buf_addr_get(void)
{
	return log_buf;
}

/* Return log buffer size */
u32 log_buf_len_get(void)
{
	return log_buf_len;
}

300
/* human readable text of the record */
301
static char *log_text(const struct printk_log *msg)
302
{
303
	return (char *)msg + sizeof(struct printk_log);
304
305
306
}

/* optional key/value pair dictionary attached to the record */
307
static char *log_dict(const struct printk_log *msg)
308
{
309
	return (char *)msg + sizeof(struct printk_log) + msg->text_len;
310
311
312
}

/* get record by index; idx must point to valid msg */
313
static struct printk_log *log_from_idx(u32 idx)
314
{
315
	struct printk_log *msg = (struct printk_log *)(log_buf + idx);
316
317
318
319
320
321

	/*
	 * A length == 0 record is the end of buffer marker. Wrap around and
	 * read the message at the start of the buffer.
	 */
	if (!msg->len)
322
		return (struct printk_log *)log_buf;
323
324
325
326
327
328
	return msg;
}

/* get next record; idx must point to valid msg */
static u32 log_next(u32 idx)
{
329
	struct printk_log *msg = (struct printk_log *)(log_buf + idx);
330
331
332
333
334
335
336
337

	/* length == 0 indicates the end of the buffer; wrap */
	/*
	 * A length == 0 record is the end of buffer marker. Wrap around and
	 * read the message at the start of the buffer as *this* one, and
	 * return the one after that.
	 */
	if (!msg->len) {
338
		msg = (struct printk_log *)log_buf;
339
340
341
342
343
		return msg->len;
	}
	return idx + msg->len;
}

Petr Mladek's avatar
Petr Mladek committed
344
345
346
347
348
349
350
351
352
353
/*
 * Check whether there is enough free space for the given message.
 *
 * The same values of first_idx and next_idx mean that the buffer
 * is either empty or full.
 *
 * If the buffer is empty, we must respect the position of the indexes.
 * They cannot be reset to the beginning of the buffer.
 */
static int logbuf_has_space(u32 msg_size, bool empty)
354
355
356
{
	u32 free;

Petr Mladek's avatar
Petr Mladek committed
357
	if (log_next_idx > log_first_idx || empty)
358
359
360
361
362
363
364
365
366
367
368
		free = max(log_buf_len - log_next_idx, log_first_idx);
	else
		free = log_first_idx - log_next_idx;

	/*
	 * We need space also for an empty header that signalizes wrapping
	 * of the buffer.
	 */
	return free >= msg_size + sizeof(struct printk_log);
}

Petr Mladek's avatar
Petr Mladek committed
369
static int log_make_free_space(u32 msg_size)
370
{
371
372
	while (log_first_seq < log_next_seq &&
	       !logbuf_has_space(msg_size, false)) {
Alex Elder's avatar
Alex Elder committed
373
		/* drop old messages until we have enough contiguous space */
374
375
376
		log_first_idx = log_next(log_first_idx);
		log_first_seq++;
	}
Petr Mladek's avatar
Petr Mladek committed
377

378
379
380
381
382
	if (clear_seq < log_first_seq) {
		clear_seq = log_first_seq;
		clear_idx = log_first_idx;
	}

Petr Mladek's avatar
Petr Mladek committed
383
	/* sequence numbers are equal, so the log buffer is empty */
384
	if (logbuf_has_space(msg_size, log_first_seq == log_next_seq))
Petr Mladek's avatar
Petr Mladek committed
385
386
387
		return 0;

	return -ENOMEM;
388
389
}

390
391
392
393
394
395
396
397
398
399
400
401
/* compute the message size including the padding bytes */
static u32 msg_used_size(u16 text_len, u16 dict_len, u32 *pad_len)
{
	u32 size;

	size = sizeof(struct printk_log) + text_len + dict_len;
	*pad_len = (-size) & (LOG_ALIGN - 1);
	size += *pad_len;

	return size;
}

Petr Mladek's avatar
Petr Mladek committed
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
/*
 * Define how much of the log buffer we could take at maximum. The value
 * must be greater than two. Note that only half of the buffer is available
 * when the index points to the middle.
 */
#define MAX_LOG_TAKE_PART 4
static const char trunc_msg[] = "<truncated>";

static u32 truncate_msg(u16 *text_len, u16 *trunc_msg_len,
			u16 *dict_len, u32 *pad_len)
{
	/*
	 * The message should not take the whole buffer. Otherwise, it might
	 * get removed too soon.
	 */
	u32 max_text_len = log_buf_len / MAX_LOG_TAKE_PART;
	if (*text_len > max_text_len)
		*text_len = max_text_len;
	/* enable the warning message */
	*trunc_msg_len = strlen(trunc_msg);
	/* disable the "dict" completely */
	*dict_len = 0;
	/* compute the size again, count also the warning message */
	return msg_used_size(*text_len + *trunc_msg_len, 0, pad_len);
}

428
/* insert record into the buffer, discard old ones, update heads */
429
430
431
432
static int log_store(int facility, int level,
		     enum log_flags flags, u64 ts_nsec,
		     const char *dict, u16 dict_len,
		     const char *text, u16 text_len)
433
{
434
	struct printk_log *msg;
435
	u32 size, pad_len;
Petr Mladek's avatar
Petr Mladek committed
436
	u16 trunc_msg_len = 0;
437
438

	/* number of '\0' padding bytes to next message */
439
	size = msg_used_size(text_len, dict_len, &pad_len);
440

Petr Mladek's avatar
Petr Mladek committed
441
442
443
444
445
446
	if (log_make_free_space(size)) {
		/* truncate the message if it is too long for empty buffer */
		size = truncate_msg(&text_len, &trunc_msg_len,
				    &dict_len, &pad_len);
		/* survive when the log buffer is too small for trunc_msg */
		if (log_make_free_space(size))
447
			return 0;
Petr Mladek's avatar
Petr Mladek committed
448
	}
449

450
	if (log_next_idx + size + sizeof(struct printk_log) > log_buf_len) {
451
452
453
454
455
		/*
		 * This message + an additional empty header does not fit
		 * at the end of the buffer. Add an empty header with len == 0
		 * to signify a wrap around.
		 */
456
		memset(log_buf + log_next_idx, 0, sizeof(struct printk_log));
457
458
459
460
		log_next_idx = 0;
	}

	/* fill message */
461
	msg = (struct printk_log *)(log_buf + log_next_idx);
462
463
	memcpy(log_text(msg), text, text_len);
	msg->text_len = text_len;
Petr Mladek's avatar
Petr Mladek committed
464
465
466
467
	if (trunc_msg_len) {
		memcpy(log_text(msg) + text_len, trunc_msg, trunc_msg_len);
		msg->text_len += trunc_msg_len;
	}
468
469
	memcpy(log_dict(msg), dict, dict_len);
	msg->dict_len = dict_len;
470
471
472
473
474
475
476
	msg->facility = facility;
	msg->level = level & 7;
	msg->flags = flags & 0x1f;
	if (ts_nsec > 0)
		msg->ts_nsec = ts_nsec;
	else
		msg->ts_nsec = local_clock();
477
	memset(log_dict(msg) + dict_len, 0, pad_len);
478
	msg->len = size;
479
480
481
482

	/* insert message */
	log_next_idx += msg->len;
	log_next_seq++;
483
484

	return msg->text_len;
485
}
486

Alex Elder's avatar
Alex Elder committed
487
int dmesg_restrict = IS_ENABLED(CONFIG_SECURITY_DMESG_RESTRICT);
488
489
490
491
492
493
494
495
496
497
498
499
500

static int syslog_action_restricted(int type)
{
	if (dmesg_restrict)
		return 1;
	/*
	 * Unless restricted, we allow "read all" and "get buffer size"
	 * for everybody.
	 */
	return type != SYSLOG_ACTION_READ_ALL &&
	       type != SYSLOG_ACTION_SIZE_BUFFER;
}

501
int check_syslog_permissions(int type, int source)
502
503
504
505
506
{
	/*
	 * If this is from /proc/kmsg and we've already opened it, then we've
	 * already done the capabilities checks at open time.
	 */
507
	if (source == SYSLOG_FROM_PROC && type != SYSLOG_ACTION_OPEN)
508
		goto ok;
509
510
511

	if (syslog_action_restricted(type)) {
		if (capable(CAP_SYSLOG))
512
			goto ok;
513
514
515
516
517
518
519
520
521
		/*
		 * For historical reasons, accept CAP_SYS_ADMIN too, with
		 * a warning.
		 */
		if (capable(CAP_SYS_ADMIN)) {
			pr_warn_once("%s (%d): Attempt to access syslog with "
				     "CAP_SYS_ADMIN but no CAP_SYSLOG "
				     "(deprecated).\n",
				 current->comm, task_pid_nr(current));
522
			goto ok;
523
524
525
		}
		return -EPERM;
	}
526
ok:
527
528
	return security_syslog(type);
}
Geliang Tang's avatar
Geliang Tang committed
529
EXPORT_SYMBOL_GPL(check_syslog_permissions);
530

531
532
533
534
535
static void append_char(char **pp, char *e, char c)
{
	if (*pp < e)
		*(*pp)++ = c;
}
536

537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
static ssize_t msg_print_ext_header(char *buf, size_t size,
				    struct printk_log *msg, u64 seq,
				    enum log_flags prev_flags)
{
	u64 ts_usec = msg->ts_nsec;
	char cont = '-';

	do_div(ts_usec, 1000);

	/*
	 * If we couldn't merge continuation line fragments during the print,
	 * export the stored flags to allow an optional external merge of the
	 * records. Merging the records isn't always neccessarily correct, like
	 * when we hit a race during printing. In most cases though, it produces
	 * better readable output. 'c' in the record flags mark the first
	 * fragment of a line, '+' the following.
	 */
	if (msg->flags & LOG_CONT && !(prev_flags & LOG_CONT))
		cont = 'c';
	else if ((msg->flags & LOG_CONT) ||
		 ((prev_flags & LOG_CONT) && !(msg->flags & LOG_PREFIX)))
		cont = '+';

	return scnprintf(buf, size, "%u,%llu,%llu,%c;",
		       (msg->facility << 3) | msg->level, seq, ts_usec, cont);
}

static ssize_t msg_print_ext_body(char *buf, size_t size,
				  char *dict, size_t dict_len,
				  char *text, size_t text_len)
{
	char *p = buf, *e = buf + size;
	size_t i;

	/* escape non-printable characters */
	for (i = 0; i < text_len; i++) {
		unsigned char c = text[i];

		if (c < ' ' || c >= 127 || c == '\\')
			p += scnprintf(p, e - p, "\\x%02x", c);
		else
			append_char(&p, e, c);
	}
	append_char(&p, e, '\n');

	if (dict_len) {
		bool line = true;

		for (i = 0; i < dict_len; i++) {
			unsigned char c = dict[i];

			if (line) {
				append_char(&p, e, ' ');
				line = false;
			}

			if (c == '\0') {
				append_char(&p, e, '\n');
				line = true;
				continue;
			}

			if (c < ' ' || c >= 127 || c == '\\') {
				p += scnprintf(p, e - p, "\\x%02x", c);
				continue;
			}

			append_char(&p, e, c);
		}
		append_char(&p, e, '\n');
	}

	return p - buf;
}

612
613
614
615
/* /dev/kmsg - userspace message inject/listen interface */
struct devkmsg_user {
	u64 seq;
	u32 idx;
616
	enum log_flags prev;
617
	struct mutex lock;
618
	char buf[CONSOLE_EXT_LOG_MAX];
619
620
};

Al Viro's avatar
Al Viro committed
621
static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
622
623
624
625
{
	char *buf, *line;
	int level = default_message_loglevel;
	int facility = 1;	/* LOG_USER */
Christoph Hellwig's avatar
Christoph Hellwig committed
626
	size_t len = iov_iter_count(from);
627
628
629
630
631
632
633
634
	ssize_t ret = len;

	if (len > LOG_LINE_MAX)
		return -EINVAL;
	buf = kmalloc(len+1, GFP_KERNEL);
	if (buf == NULL)
		return -ENOMEM;

Al Viro's avatar
Al Viro committed
635
636
637
638
	buf[len] = '\0';
	if (copy_from_iter(buf, len, from) != len) {
		kfree(buf);
		return -EFAULT;
639
640
641
642
643
644
645
646
647
648
649
650
651
652
	}

	/*
	 * Extract and skip the syslog prefix <[0-9]*>. Coming from userspace
	 * the decimal value represents 32bit, the lower 3 bit are the log
	 * level, the rest are the log facility.
	 *
	 * If no prefix or no userspace facility is specified, we
	 * enforce LOG_USER, to be able to reliably distinguish
	 * kernel-generated messages from userspace-injected ones.
	 */
	line = buf;
	if (line[0] == '<') {
		char *endp = NULL;
653
		unsigned int u;
654

655
		u = simple_strtoul(line + 1, &endp, 10);
656
		if (endp && endp[0] == '>') {
657
658
659
			level = LOG_LEVEL(u);
			if (LOG_FACILITY(u) != 0)
				facility = LOG_FACILITY(u);
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
			endp++;
			len -= endp - line;
			line = endp;
		}
	}

	printk_emit(facility, level, NULL, 0, "%s", line);
	kfree(buf);
	return ret;
}

static ssize_t devkmsg_read(struct file *file, char __user *buf,
			    size_t count, loff_t *ppos)
{
	struct devkmsg_user *user = file->private_data;
675
	struct printk_log *msg;
676
677
678
679
680
681
	size_t len;
	ssize_t ret;

	if (!user)
		return -EBADF;

682
683
684
	ret = mutex_lock_interruptible(&user->lock);
	if (ret)
		return ret;
685
	raw_spin_lock_irq(&logbuf_lock);
686
687
688
	while (user->seq == log_next_seq) {
		if (file->f_flags & O_NONBLOCK) {
			ret = -EAGAIN;
689
			raw_spin_unlock_irq(&logbuf_lock);
690
691
692
			goto out;
		}

693
		raw_spin_unlock_irq(&logbuf_lock);
694
695
696
697
		ret = wait_event_interruptible(log_wait,
					       user->seq != log_next_seq);
		if (ret)
			goto out;
698
		raw_spin_lock_irq(&logbuf_lock);
699
700
701
702
703
704
705
	}

	if (user->seq < log_first_seq) {
		/* our last seen message is gone, return error and reset */
		user->idx = log_first_idx;
		user->seq = log_first_seq;
		ret = -EPIPE;
706
		raw_spin_unlock_irq(&logbuf_lock);
707
708
709
710
		goto out;
	}

	msg = log_from_idx(user->idx);
711
712
713
714
715
	len = msg_print_ext_header(user->buf, sizeof(user->buf),
				   msg, user->seq, user->prev);
	len += msg_print_ext_body(user->buf + len, sizeof(user->buf) - len,
				  log_dict(msg), msg->dict_len,
				  log_text(msg), msg->text_len);
716
717

	user->prev = msg->flags;
718
719
	user->idx = log_next(user->idx);
	user->seq++;
720
	raw_spin_unlock_irq(&logbuf_lock);
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746

	if (len > count) {
		ret = -EINVAL;
		goto out;
	}

	if (copy_to_user(buf, user->buf, len)) {
		ret = -EFAULT;
		goto out;
	}
	ret = len;
out:
	mutex_unlock(&user->lock);
	return ret;
}

static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
{
	struct devkmsg_user *user = file->private_data;
	loff_t ret = 0;

	if (!user)
		return -EBADF;
	if (offset)
		return -ESPIPE;

747
	raw_spin_lock_irq(&logbuf_lock);
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
	switch (whence) {
	case SEEK_SET:
		/* the first record */
		user->idx = log_first_idx;
		user->seq = log_first_seq;
		break;
	case SEEK_DATA:
		/*
		 * The first record after the last SYSLOG_ACTION_CLEAR,
		 * like issued by 'dmesg -c'. Reading /dev/kmsg itself
		 * changes no global state, and does not clear anything.
		 */
		user->idx = clear_idx;
		user->seq = clear_seq;
		break;
	case SEEK_END:
		/* after the last record */
		user->idx = log_next_idx;
		user->seq = log_next_seq;
		break;
	default:
		ret = -EINVAL;
	}
771
	raw_spin_unlock_irq(&logbuf_lock);
772
773
774
775
776
777
778
779
780
781
782
783
784
	return ret;
}

static unsigned int devkmsg_poll(struct file *file, poll_table *wait)
{
	struct devkmsg_user *user = file->private_data;
	int ret = 0;

	if (!user)
		return POLLERR|POLLNVAL;

	poll_wait(file, &log_wait, wait);

785
	raw_spin_lock_irq(&logbuf_lock);
786
787
788
789
	if (user->seq < log_next_seq) {
		/* return error when data has vanished underneath us */
		if (user->seq < log_first_seq)
			ret = POLLIN|POLLRDNORM|POLLERR|POLLPRI;
790
791
		else
			ret = POLLIN|POLLRDNORM;
792
	}
793
	raw_spin_unlock_irq(&logbuf_lock);
794
795
796
797
798
799
800
801
802
803
804
805
806

	return ret;
}

static int devkmsg_open(struct inode *inode, struct file *file)
{
	struct devkmsg_user *user;
	int err;

	/* write-only does not need any file context */
	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
		return 0;

807
808
	err = check_syslog_permissions(SYSLOG_ACTION_READ_ALL,
				       SYSLOG_FROM_READER);
809
810
811
812
813
814
815
816
817
	if (err)
		return err;

	user = kmalloc(sizeof(struct devkmsg_user), GFP_KERNEL);
	if (!user)
		return -ENOMEM;

	mutex_init(&user->lock);

818
	raw_spin_lock_irq(&logbuf_lock);
819
820
	user->idx = log_first_idx;
	user->seq = log_first_seq;
821
	raw_spin_unlock_irq(&logbuf_lock);
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841

	file->private_data = user;
	return 0;
}

static int devkmsg_release(struct inode *inode, struct file *file)
{
	struct devkmsg_user *user = file->private_data;

	if (!user)
		return 0;

	mutex_destroy(&user->lock);
	kfree(user);
	return 0;
}

const struct file_operations kmsg_fops = {
	.open = devkmsg_open,
	.read = devkmsg_read,
Al Viro's avatar
Al Viro committed
842
	.write_iter = devkmsg_write,
843
844
845
846
847
	.llseek = devkmsg_llseek,
	.poll = devkmsg_poll,
	.release = devkmsg_release,
};

848
#ifdef CONFIG_KEXEC_CORE
849
/*
850
 * This appends the listed symbols to /proc/vmcore
851
 *
852
 * /proc/vmcore is used by various utilities, like crash and makedumpfile to
853
854
855
856
857
858
859
860
 * obtain access to symbols that are otherwise very difficult to locate.  These
 * symbols are specifically used so that utilities can access and extract the
 * dmesg log from a vmcore file after a crash.
 */
void log_buf_kexec_setup(void)
{
	VMCOREINFO_SYMBOL(log_buf);
	VMCOREINFO_SYMBOL(log_buf_len);
861
	VMCOREINFO_SYMBOL(log_first_idx);
862
	VMCOREINFO_SYMBOL(clear_idx);
863
	VMCOREINFO_SYMBOL(log_next_idx);
864
	/*
865
	 * Export struct printk_log size and field offsets. User space tools can
866
867
	 * parse it and detect any changes to structure down the line.
	 */
868
869
870
871
872
	VMCOREINFO_STRUCT_SIZE(printk_log);
	VMCOREINFO_OFFSET(printk_log, ts_nsec);
	VMCOREINFO_OFFSET(printk_log, len);
	VMCOREINFO_OFFSET(printk_log, text_len);
	VMCOREINFO_OFFSET(printk_log, dict_len);
873
874
875
}
#endif

876
877
878
/* requested log_buf_len from kernel cmdline */
static unsigned long __initdata new_log_buf_len;

879
880
/* we practice scaling the ring buffer by powers of 2 */
static void __init log_buf_len_update(unsigned size)
Linus Torvalds's avatar
Linus Torvalds committed
881
882
883
{
	if (size)
		size = roundup_pow_of_two(size);
884
885
	if (size > log_buf_len)
		new_log_buf_len = size;
886
887
888
889
890
891
892
893
}

/* save requested log_buf_len since it's too early to process it */
static int __init log_buf_len_setup(char *str)
{
	unsigned size = memparse(str, &str);

	log_buf_len_update(size);
894
895

	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
896
}
897
898
early_param("log_buf_len", log_buf_len_setup);

899
900
901
#ifdef CONFIG_SMP
#define __LOG_CPU_MAX_BUF_LEN (1 << CONFIG_LOG_CPU_MAX_BUF_SHIFT)

902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
static void __init log_buf_add_cpu(void)
{
	unsigned int cpu_extra;

	/*
	 * archs should set up cpu_possible_bits properly with
	 * set_cpu_possible() after setup_arch() but just in
	 * case lets ensure this is valid.
	 */
	if (num_possible_cpus() == 1)
		return;

	cpu_extra = (num_possible_cpus() - 1) * __LOG_CPU_MAX_BUF_LEN;

	/* by default this will only continue through for large > 64 CPUs */
	if (cpu_extra <= __LOG_BUF_LEN / 2)
		return;

	pr_info("log_buf_len individual max cpu contribution: %d bytes\n",
		__LOG_CPU_MAX_BUF_LEN);
	pr_info("log_buf_len total cpu_extra contributions: %d bytes\n",
		cpu_extra);
	pr_info("log_buf_len min size: %d bytes\n", __LOG_BUF_LEN);

	log_buf_len_update(cpu_extra + __LOG_BUF_LEN);
}
928
929
930
#else /* !CONFIG_SMP */
static inline void log_buf_add_cpu(void) {}
#endif /* CONFIG_SMP */
931

932
933
934
935
936
937
void __init setup_log_buf(int early)
{
	unsigned long flags;
	char *new_log_buf;
	int free;

938
939
940
941
942
943
	if (log_buf != __log_buf)
		return;

	if (!early && !new_log_buf_len)
		log_buf_add_cpu();

944
945
	if (!new_log_buf_len)
		return;
Linus Torvalds's avatar
Linus Torvalds committed
946

947
	if (early) {
948
		new_log_buf =
949
			memblock_virt_alloc(new_log_buf_len, LOG_ALIGN);
950
	} else {
951
952
		new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len,
							  LOG_ALIGN);
953
954
955
956
957
958
959
960
	}

	if (unlikely(!new_log_buf)) {
		pr_err("log_buf_len: %ld bytes not available\n",
			new_log_buf_len);
		return;
	}

961
	raw_spin_lock_irqsave(&logbuf_lock, flags);
962
963
964
	log_buf_len = new_log_buf_len;
	log_buf = new_log_buf;
	new_log_buf_len = 0;
965
966
	free = __LOG_BUF_LEN - log_next_idx;
	memcpy(log_buf, __log_buf, __LOG_BUF_LEN);
967
	raw_spin_unlock_irqrestore(&logbuf_lock, flags);
968

969
	pr_info("log_buf_len: %d bytes\n", log_buf_len);
970
971
972
	pr_info("early log buf free: %d(%d%%)\n",
		free, (free * 100) / __LOG_BUF_LEN);
}
Linus Torvalds's avatar
Linus Torvalds committed
973

974
975
976
977
static bool __read_mostly ignore_loglevel;

static int __init ignore_loglevel_setup(char *str)
{
978
	ignore_loglevel = true;
979
	pr_info("debug: ignoring loglevel setting.\n");
980
981
982
983
984
985

	return 0;
}

early_param("ignore_loglevel", ignore_loglevel_setup);
module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR);
986
987
MODULE_PARM_DESC(ignore_loglevel,
		 "ignore loglevel setting (prints all kernel messages to the console)");
988

Randy Dunlap's avatar
Randy Dunlap committed
989
990
#ifdef CONFIG_BOOT_PRINTK_DELAY

991
static int boot_delay; /* msecs delay after each printk during bootup */
992
static unsigned long long loops_per_msec;	/* based on boot_delay */
Randy Dunlap's avatar
Randy Dunlap committed
993
994
995
996
997
998
999
1000
1001
1002
1003
1004

static int __init boot_delay_setup(char *str)
{
	unsigned long lpj;

	lpj = preset_lpj ? preset_lpj : 1000000;	/* some guess */
	loops_per_msec = (unsigned long long)lpj / 1000 * HZ;

	get_option(&str, &boot_delay);
	if (boot_delay > 10 * 1000)
		boot_delay = 0;

1005
1006
1007
	pr_debug("boot_delay: %u, preset_lpj: %ld, lpj: %lu, "
		"HZ: %d, loops_per_msec: %llu\n",
		boot_delay, preset_lpj, lpj, HZ, loops_per_msec);
1008
	return 0;
Randy Dunlap's avatar
Randy Dunlap committed
1009
}
1010
early_param("boot_delay", boot_delay_setup);
Randy Dunlap's avatar
Randy Dunlap committed
1011

1012
static void boot_delay_msec(int level)
Randy Dunlap's avatar
Randy Dunlap committed
1013
1014
1015
1016
{
	unsigned long long k;
	unsigned long timeout;

1017
1018
	if ((boot_delay == 0 || system_state != SYSTEM_BOOTING)
		|| (level >= console_loglevel && !ignore_loglevel)) {
Randy Dunlap's avatar
Randy Dunlap committed
1019
		return;
1020
	}
Randy Dunlap's avatar
Randy Dunlap committed
1021

1022
	k = (unsigned long long)loops_per_msec * boot_delay;
Randy Dunlap's avatar
Randy Dunlap committed
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038

	timeout = jiffies + msecs_to_jiffies(boot_delay);
	while (k) {
		k--;
		cpu_relax();
		/*
		 * use (volatile) jiffies to prevent
		 * compiler reduction; loop termination via jiffies
		 * is secondary and may or may not happen.
		 */
		if (time_after(jiffies, timeout))
			break;
		touch_nmi_watchdog();
	}
}
#else
1039
static inline void boot_delay_msec(int level)
Randy Dunlap's avatar
Randy Dunlap committed
1040
1041
1042
1043
{
}
#endif

Alex Elder's avatar
Alex Elder committed
1044
static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME);
1045
1046
module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);

1047
1048
1049
1050
1051
1052
1053
static size_t print_time(u64 ts, char *buf)
{
	unsigned long rem_nsec;

	if (!printk_time)
		return 0;

1054
1055
	rem_nsec = do_div(ts, 1000000000);

1056
	if (!buf)
1057
		return snprintf(NULL, 0, "[%5lu.000000] ", (unsigned long)ts);
1058
1059
1060
1061
1062

	return sprintf(buf, "[%5lu.%06lu] ",
		       (unsigned long)ts, rem_nsec / 1000);
}

1063
static size_t print_prefix(const struct printk_log *msg, bool syslog, char *buf)
1064
{
1065
	size_t len = 0;
1066
	unsigned int prefix = (msg->facility << 3) | msg->level;
1067

1068
1069
	if (syslog) {
		if (buf) {
1070
			len += sprintf(buf, "<%u>", prefix);
1071
1072
		} else {
			len += 3;
1073
1074
1075
1076
1077
			if (prefix > 999)
				len += 3;
			else if (prefix > 99)
				len += 2;
			else if (prefix > 9)
1078
1079
1080
				len++;
		}
	}
1081

1082
	len += print_time(msg->ts_nsec, buf ? buf + len : NULL);
1083
	return len;
1084
1085
}

1086
static size_t msg_print_text(const struct printk_log *msg, enum log_flags prev,
1087
			     bool syslog, char *buf, size_t size)
1088
{
1089
1090
	const char *text = log_text(msg);
	size_t text_size = msg->text_len;
1091
1092
	bool prefix = true;
	bool newline = true;
1093
1094
	size_t len = 0;

1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
	if ((prev & LOG_CONT) && !(msg->flags & LOG_PREFIX))
		prefix = false;

	if (msg->flags & LOG_CONT) {
		if ((prev & LOG_CONT) && !(prev & LOG_NEWLINE))
			prefix = false;

		if (!(msg->flags & LOG_NEWLINE))
			newline = false;
	}

1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
	do {
		const char *next = memchr(text, '\n', text_size);
		size_t text_len;

		if (next) {
			text_len = next - text;
			next++;
			text_size -= next - text;
		} else {
			text_len = text_size;
		}
1117

1118
1119
		if (buf) {
			if (print_prefix(msg, syslog, NULL) +
1120
			    text_len + 1 >= size - len)
1121
				break;
1122

1123
1124
			if (prefix)
				len += print_prefix(msg, syslog, buf + len);
1125
1126
			memcpy(buf + len, text, text_len);
			len += text_len;
1127
1128
			if (next || newline)
				buf[len++] = '\n';
1129
1130
		} else {
			/* SYSLOG_ACTION_* buffer size only calculation */
1131
1132
1133
1134
1135
			if (prefix)
				len += print_prefix(msg, syslog, NULL);
			len += text_len;
			if (next || newline)
				len++;
1136
		}
1137

1138
		prefix = true;
1139
1140
		text = next;
	} while (text);
1141
1142
1143
1144
1145
1146
1147

	return len;
}

static int syslog_print(char __user *buf, int size)
{
	char *text;
1148
	struct printk_log *msg;
1149
	int len = 0;
1150

1151
	text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
1152
1153
1154
	if (!text)
		return -ENOMEM;

1155
1156
	while (size > 0) {
		size_t n;
1157
		size_t skip;
1158
1159
1160
1161
1162
1163

		raw_spin_lock_irq(&logbuf_lock);
		if (syslog_seq < log_first_seq) {
			/* messages are gone, move to first one */
			syslog_seq = log_first_seq;
			syslog_idx = log_first_idx;
1164
			syslog_prev = 0;
1165
			syslog_partial = 0;
1166
1167
1168
1169
1170
		}
		if (syslog_seq == log_next_seq) {
			raw_spin_unlock_irq(&logbuf_lock);
			break;
		}
1171
1172

		skip = syslog_partial;
1173
		msg = log_from_idx(syslog_idx);
1174
1175
		n = msg_print_text(msg, syslog_prev, true, text,
				   LOG_LINE_MAX + PREFIX_MAX);
1176
1177
		if (n - syslog_partial <= size) {
			/* message fits into buffer, move forward */
1178
1179
			syslog_idx = log_next(syslog_idx);
			syslog_seq++;
1180
			syslog_prev = msg->flags;
1181
1182
1183
1184
1185
1186
			n -= syslog_partial;
			syslog_partial = 0;
		} else if (!len){
			/* partial read(), remember position */
			n = size;
			syslog_partial += n;
1187
1188
1189
1190
1191
1192
1193
		} else
			n = 0;
		raw_spin_unlock_irq(&logbuf_lock);

		if (!n)
			break;

1194
		if (copy_to_user(buf, text + skip, n)) {
1195
1196
1197
1198
			if (!len)
				len = -EFAULT;
			break;
		}
1199
1200
1201
1202

		len += n;
		size -= n;
		buf += n;
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
	}

	kfree(text);
	return len;
}

static int syslog_print_all(char __user *buf, int size, bool clear)
{
	char *text;
	int len = 0;

1214
	text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
1215
1216
1217
1218
1219
1220
1221
1222
	if (!text)
		return -ENOMEM;

	raw_spin_lock_irq(&logbuf_lock);
	if (buf) {
		u64 next_seq;
		u64 seq;
		u32 idx;
1223
		enum log_flags prev;
1224
1225
1226
1227

		/*
		 * Find first record that fits, including all following records,
		 * into the user-provided buffer for this dump.
1228
		 */
1229
1230
		seq = clear_seq;
		idx = clear_idx;
1231
		prev = 0;
1232
		while (seq < log_next_seq) {
1233
			struct printk_log *msg = log_from_idx(idx);
1234

1235
			len += msg_print_text(msg, prev, true, NULL, 0);
1236
			prev = msg->flags;
1237
1238
1239
			idx = log_next(idx);
			seq++;
		}
1240
1241

		/* move first record forward until length fits into the buffer */
1242
1243
		seq = clear_seq;
		idx = clear_idx;
1244
		prev = 0;
1245
		while (len > size && seq < log_next_seq) {
1246
			struct printk_log *msg = log_from_idx(idx);
1247

1248
			len -= msg_print_text(msg, prev, true, NULL, 0);
1249
			prev = msg->flags;
1250
1251
1252
1253
			idx = log_next(idx);
			seq++;
		}

1254
		/* last message fitting into this dump */
1255
1256
1257
1258
		next_seq = log_next_seq;

		len = 0;
		while (len >= 0 && seq < next_seq) {
1259
			struct printk_log *msg = log_from_idx(idx);
1260
1261
			int textlen;

1262
1263
			textlen = msg_print_text(msg, prev, true, text,
						 LOG_LINE_MAX + PREFIX_MAX);
1264
1265
1266
1267
1268
1269
			if (textlen < 0) {
				len = textlen;
				break;
			}
			idx = log_next(idx);
			seq++;
1270
			prev = msg->flags;
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282

			raw_spin_unlock_irq(&logbuf_lock);
			if (copy_to_user(buf + len, text, textlen))
				len = -EFAULT;
			else
				len += textlen;
			raw_spin_lock_irq(&logbuf_lock);

			if (seq < log_first_seq) {
				/* messages are gone, move to next one */
				seq = log_first_seq;
				idx = log_first_idx;
1283
				prev = 0;
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
			}
		}
	}

	if (clear) {
		clear_seq = log_next_seq;
		clear_idx = log_next_idx;
	}
	raw_spin_unlock_irq(&logbuf_lock);

	kfree(text);
	return len;
}

1298
int do_syslog(int type, char __user *buf, int len, int source)
Linus Torvalds's avatar
Linus Torvalds committed
1299
{
1300
	bool clear = false;
1301
	static int saved_console_loglevel = LOGLEVEL_DEFAULT;
1302
	int error;
Linus Torvalds's avatar
Linus Torvalds committed
1303

1304
	error = check_syslog_permissions(type, source);
1305
1306
	if (error)
		goto out;
1307

Linus Torvalds's avatar
Linus Torvalds committed
1308
	switch (type) {
1309
	case SYSLOG_ACTION_CLOSE:	/* Close log */
Linus Torvalds's avatar
Linus Torvalds committed