printk.c 81.8 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
/*
 *  linux/kernel/printk.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 * Modified to make sys_syslog() more flexible: added commands to
 * return the last 4k of kernel messages, regardless of whether
 * they've been read or not.  Added option to suppress kernel printk's
 * to the console.  Added hook for sending the console messages
 * elsewhere, in preparation for a serial line console (someday).
 * Ted Ts'o, 2/11/93.
 * Modified for sysctl support, 1/8/97, Chris Horn.
13
 * Fixed SMP synchronization, 08/08/99, Manfred Spraul
14
 *     manfred@colorfullife.com
Linus Torvalds's avatar
Linus Torvalds committed
15
 * Rewrote bits to get rid of console_lock
16
 *	01Mar01 Andrew Morton
Linus Torvalds's avatar
Linus Torvalds committed
17
18
19
20
21
22
23
24
 */

#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/tty.h>
#include <linux/tty_driver.h>
#include <linux/console.h>
#include <linux/init.h>
Randy Dunlap's avatar
Randy Dunlap committed
25
26
#include <linux/jiffies.h>
#include <linux/nmi.h>
Linus Torvalds's avatar
Linus Torvalds committed
27
#include <linux/module.h>
Jan Engelhardt's avatar
Jan Engelhardt committed
28
#include <linux/moduleparam.h>
Linus Torvalds's avatar
Linus Torvalds committed
29
30
31
32
#include <linux/delay.h>
#include <linux/smp.h>
#include <linux/security.h>
#include <linux/bootmem.h>
33
#include <linux/memblock.h>
Linus Torvalds's avatar
Linus Torvalds committed
34
#include <linux/syscalls.h>
35
#include <linux/kexec.h>
36
#include <linux/kdb.h>
37
#include <linux/ratelimit.h>
38
#include <linux/kmsg_dump.h>
39
#include <linux/syslog.h>
40
41
#include <linux/cpu.h>
#include <linux/notifier.h>
42
#include <linux/rculist.h>
43
#include <linux/poll.h>
44
#include <linux/irq_work.h>
45
#include <linux/utsname.h>
Alex Elder's avatar
Alex Elder committed
46
#include <linux/ctype.h>
47
#include <linux/uio.h>
Linus Torvalds's avatar
Linus Torvalds committed
48
49

#include <asm/uaccess.h>
50
#include <asm-generic/sections.h>
Linus Torvalds's avatar
Linus Torvalds committed
51

52
53
54
#define CREATE_TRACE_POINTS
#include <trace/events/printk.h>

Joe Perches's avatar
Joe Perches committed
55
#include "console_cmdline.h"
56
#include "braille.h"
57
#include "internal.h"
Joe Perches's avatar
Joe Perches committed
58

Linus Torvalds's avatar
Linus Torvalds committed
59
int console_printk[4] = {
60
	CONSOLE_LOGLEVEL_DEFAULT,	/* console_loglevel */
61
	MESSAGE_LOGLEVEL_DEFAULT,	/* default_message_loglevel */
62
63
	CONSOLE_LOGLEVEL_MIN,		/* minimum_console_loglevel */
	CONSOLE_LOGLEVEL_DEFAULT,	/* default_console_loglevel */
Linus Torvalds's avatar
Linus Torvalds committed
64
65
66
};

/*
67
 * Low level drivers may need that to know if they can schedule in
Linus Torvalds's avatar
Linus Torvalds committed
68
69
70
71
72
73
74
75
76
77
 * their unblank() callback or not. So let's export it.
 */
int oops_in_progress;
EXPORT_SYMBOL(oops_in_progress);

/*
 * console_sem protects the console_drivers list, and also
 * provides serialisation for access to the entire console
 * driver system.
 */
78
static DEFINE_SEMAPHORE(console_sem);
Linus Torvalds's avatar
Linus Torvalds committed
79
struct console *console_drivers;
Ingo Molnar's avatar
Ingo Molnar committed
80
81
EXPORT_SYMBOL_GPL(console_drivers);

82
83
84
85
86
87
#ifdef CONFIG_LOCKDEP
static struct lockdep_map console_lock_dep_map = {
	.name = "console_lock"
};
#endif

88
89
90
91
92
93
94
95
96
97
98
99
/*
 * Number of registered extended console drivers.
 *
 * If extended consoles are present, in-kernel cont reassembly is disabled
 * and each fragment is stored as a separate log entry with proper
 * continuation flag so that every emitted message has full metadata.  This
 * doesn't change the result for regular consoles or /proc/kmsg.  For
 * /dev/kmsg, as long as the reader concatenates messages according to
 * consecutive continuation flags, the end result should be the same too.
 */
static int nr_ext_console_drivers;

100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
/*
 * Helper macros to handle lockdep when locking/unlocking console_sem. We use
 * macros instead of functions so that _RET_IP_ contains useful information.
 */
#define down_console_sem() do { \
	down(&console_sem);\
	mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);\
} while (0)

static int __down_trylock_console_sem(unsigned long ip)
{
	if (down_trylock(&console_sem))
		return 1;
	mutex_acquire(&console_lock_dep_map, 0, 1, ip);
	return 0;
}
#define down_trylock_console_sem() __down_trylock_console_sem(_RET_IP_)

#define up_console_sem() do { \
	mutex_release(&console_lock_dep_map, 1, _RET_IP_);\
	up(&console_sem);\
} while (0)

Linus Torvalds's avatar
Linus Torvalds committed
123
124
125
126
/*
 * This is used for debugging the mess that is the VT code by
 * keeping track if we have the console semaphore held. It's
 * definitely not the perfect debug tool (we don't know if _WE_
Alex Elder's avatar
Alex Elder committed
127
128
129
 * hold it and are racing, but it helps tracking those weird code
 * paths in the console code where we end up in places I want
 * locked without the console sempahore held).
Linus Torvalds's avatar
Linus Torvalds committed
130
 */
131
static int console_locked, console_suspended;
Linus Torvalds's avatar
Linus Torvalds committed
132

133
134
135
136
137
/*
 * If exclusive_console is non-NULL then only this console is to be printed to.
 */
static struct console *exclusive_console;

Linus Torvalds's avatar
Linus Torvalds committed
138
139
140
141
142
143
144
/*
 *	Array of consoles built from command line options (console=)
 */

#define MAX_CMDLINECONSOLES 8

static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
Joe Perches's avatar
Joe Perches committed
145

Linus Torvalds's avatar
Linus Torvalds committed
146
147
static int selected_console = -1;
static int preferred_console = -1;
148
149
int console_set_on_cmdline;
EXPORT_SYMBOL(console_set_on_cmdline);
Linus Torvalds's avatar
Linus Torvalds committed
150
151
152
153

/* Flag: console code may call schedule() */
static int console_may_schedule;

154
155
156
157
158
159
/*
 * The printk log buffer consists of a chain of concatenated variable
 * length records. Every record starts with a record header, containing
 * the overall length of the record.
 *
 * The heads to the first and last entry in the buffer, as well as the
Alex Elder's avatar
Alex Elder committed
160
161
 * sequence numbers of these entries are maintained when messages are
 * stored.
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
 *
 * If the heads indicate available messages, the length in the header
 * tells the start next message. A length == 0 for the next message
 * indicates a wrap-around to the beginning of the buffer.
 *
 * Every record carries the monotonic timestamp in microseconds, as well as
 * the standard userspace syslog level and syslog facility. The usual
 * kernel messages use LOG_KERN; userspace-injected messages always carry
 * a matching syslog facility, by default LOG_USER. The origin of every
 * message can be reliably determined that way.
 *
 * The human readable log message directly follows the message header. The
 * length of the message text is stored in the header, the stored message
 * is not terminated.
 *
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
 * Optionally, a message can carry a dictionary of properties (key/value pairs),
 * to provide userspace with a machine-readable message context.
 *
 * Examples for well-defined, commonly used property names are:
 *   DEVICE=b12:8               device identifier
 *                                b12:8         block dev_t
 *                                c127:3        char dev_t
 *                                n8            netdev ifindex
 *                                +sound:card0  subsystem:devname
 *   SUBSYSTEM=pci              driver-core subsystem name
 *
 * Valid characters in property names are [a-zA-Z0-9.-_]. The plain text value
 * follows directly after a '=' character. Every property is terminated by
 * a '\0' character. The last property is not terminated.
 *
 * Example of a message structure:
 *   0000  ff 8f 00 00 00 00 00 00      monotonic time in nsec
 *   0008  34 00                        record is 52 bytes long
 *   000a        0b 00                  text is 11 bytes long
 *   000c              1f 00            dictionary is 23 bytes long
 *   000e                    03 00      LOG_KERN (facility) LOG_ERR (level)
 *   0010  69 74 27 73 20 61 20 6c      "it's a l"
 *         69 6e 65                     "ine"
 *   001b           44 45 56 49 43      "DEVIC"
 *         45 3d 62 38 3a 32 00 44      "E=b8:2\0D"
 *         52 49 56 45 52 3d 62 75      "RIVER=bu"
 *         67                           "g"
 *   0032     00 00 00                  padding to next message header
 *
206
 * The 'struct printk_log' buffer header must never be directly exported to
207
208
209
210
 * userspace, it is a kernel-private implementation detail that might
 * need to be changed in the future, when the requirements change.
 *
 * /dev/kmsg exports the structured data in the following line format:
211
212
213
214
 *   "<level>,<sequnum>,<timestamp>,<contflag>[,additional_values, ... ];<message text>\n"
 *
 * Users of the export format should ignore possible additional values
 * separated by ',', and find the message after the ';' character.
215
216
217
218
 *
 * The optional key/value pairs are attached as continuation lines starting
 * with a space character and terminated by a newline. All possible
 * non-prinatable characters are escaped in the "\xff" notation.
219
220
 */

221
enum log_flags {
222
223
224
225
	LOG_NOCONS	= 1,	/* already flushed, do not print to console */
	LOG_NEWLINE	= 2,	/* text ended with a newline */
	LOG_PREFIX	= 4,	/* text started with a prefix */
	LOG_CONT	= 8,	/* text is a fragment of a continuation line */
226
227
};

228
struct printk_log {
229
230
231
232
	u64 ts_nsec;		/* timestamp in nanoseconds */
	u16 len;		/* length of entire record */
	u16 text_len;		/* length of text buffer */
	u16 dict_len;		/* length of dictionary buffer */
233
234
235
	u8 facility;		/* syslog facility */
	u8 flags:5;		/* internal record flags */
	u8 level:3;		/* syslog level */
236
237
238
239
240
}
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
__packed __aligned(4)
#endif
;
241
242

/*
243
244
245
 * The logbuf_lock protects kmsg buffer, indices, counters.  This can be taken
 * within the scheduler's rq lock. It must be released before calling
 * console_unlock() or anything else that might wake up a process.
246
 */
247
DEFINE_RAW_SPINLOCK(logbuf_lock);
248

249
#ifdef CONFIG_PRINTK
250
DECLARE_WAIT_QUEUE_HEAD(log_wait);
251
252
253
/* the next printk record to read by syslog(READ) or /proc/kmsg */
static u64 syslog_seq;
static u32 syslog_idx;
254
static enum log_flags syslog_prev;
255
static size_t syslog_partial;
256
257
258
259
260
261
262
263
264

/* index and sequence number of the first record stored in the buffer */
static u64 log_first_seq;
static u32 log_first_idx;

/* index and sequence number of the next record to store in the buffer */
static u64 log_next_seq;
static u32 log_next_idx;

265
266
267
268
269
/* the next printk record to write to the console */
static u64 console_seq;
static u32 console_idx;
static enum log_flags console_prev;

270
271
272
273
/* the next printk record to read after the last 'clear' command */
static u64 clear_seq;
static u32 clear_idx;

274
#define PREFIX_MAX		32
Alex Elder's avatar
Alex Elder committed
275
#define LOG_LINE_MAX		(1024 - PREFIX_MAX)
276

277
278
279
#define LOG_LEVEL(v)		((v) & 0x07)
#define LOG_FACILITY(v)		((v) >> 3 & 0xff)

280
/* record buffer */
281
#define LOG_ALIGN __alignof__(struct printk_log)
282
#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
283
static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
284
285
286
static char *log_buf = __log_buf;
static u32 log_buf_len = __LOG_BUF_LEN;

287
288
289
290
291
292
293
294
295
296
297
298
/* Return log buffer address */
char *log_buf_addr_get(void)
{
	return log_buf;
}

/* Return log buffer size */
u32 log_buf_len_get(void)
{
	return log_buf_len;
}

299
/* human readable text of the record */
300
static char *log_text(const struct printk_log *msg)
301
{
302
	return (char *)msg + sizeof(struct printk_log);
303
304
305
}

/* optional key/value pair dictionary attached to the record */
306
static char *log_dict(const struct printk_log *msg)
307
{
308
	return (char *)msg + sizeof(struct printk_log) + msg->text_len;
309
310
311
}

/* get record by index; idx must point to valid msg */
312
static struct printk_log *log_from_idx(u32 idx)
313
{
314
	struct printk_log *msg = (struct printk_log *)(log_buf + idx);
315
316
317
318
319
320

	/*
	 * A length == 0 record is the end of buffer marker. Wrap around and
	 * read the message at the start of the buffer.
	 */
	if (!msg->len)
321
		return (struct printk_log *)log_buf;
322
323
324
325
326
327
	return msg;
}

/* get next record; idx must point to valid msg */
static u32 log_next(u32 idx)
{
328
	struct printk_log *msg = (struct printk_log *)(log_buf + idx);
329
330
331
332
333
334
335
336

	/* length == 0 indicates the end of the buffer; wrap */
	/*
	 * A length == 0 record is the end of buffer marker. Wrap around and
	 * read the message at the start of the buffer as *this* one, and
	 * return the one after that.
	 */
	if (!msg->len) {
337
		msg = (struct printk_log *)log_buf;
338
339
340
341
342
		return msg->len;
	}
	return idx + msg->len;
}

Petr Mladek's avatar
Petr Mladek committed
343
344
345
346
347
348
349
350
351
352
/*
 * Check whether there is enough free space for the given message.
 *
 * The same values of first_idx and next_idx mean that the buffer
 * is either empty or full.
 *
 * If the buffer is empty, we must respect the position of the indexes.
 * They cannot be reset to the beginning of the buffer.
 */
static int logbuf_has_space(u32 msg_size, bool empty)
353
354
355
{
	u32 free;

Petr Mladek's avatar
Petr Mladek committed
356
	if (log_next_idx > log_first_idx || empty)
357
358
359
360
361
362
363
364
365
366
367
		free = max(log_buf_len - log_next_idx, log_first_idx);
	else
		free = log_first_idx - log_next_idx;

	/*
	 * We need space also for an empty header that signalizes wrapping
	 * of the buffer.
	 */
	return free >= msg_size + sizeof(struct printk_log);
}

Petr Mladek's avatar
Petr Mladek committed
368
static int log_make_free_space(u32 msg_size)
369
{
370
371
	while (log_first_seq < log_next_seq &&
	       !logbuf_has_space(msg_size, false)) {
Alex Elder's avatar
Alex Elder committed
372
		/* drop old messages until we have enough contiguous space */
373
374
375
		log_first_idx = log_next(log_first_idx);
		log_first_seq++;
	}
Petr Mladek's avatar
Petr Mladek committed
376

377
378
379
380
381
	if (clear_seq < log_first_seq) {
		clear_seq = log_first_seq;
		clear_idx = log_first_idx;
	}

Petr Mladek's avatar
Petr Mladek committed
382
	/* sequence numbers are equal, so the log buffer is empty */
383
	if (logbuf_has_space(msg_size, log_first_seq == log_next_seq))
Petr Mladek's avatar
Petr Mladek committed
384
385
386
		return 0;

	return -ENOMEM;
387
388
}

389
390
391
392
393
394
395
396
397
398
399
400
/* compute the message size including the padding bytes */
static u32 msg_used_size(u16 text_len, u16 dict_len, u32 *pad_len)
{
	u32 size;

	size = sizeof(struct printk_log) + text_len + dict_len;
	*pad_len = (-size) & (LOG_ALIGN - 1);
	size += *pad_len;

	return size;
}

Petr Mladek's avatar
Petr Mladek committed
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
/*
 * Define how much of the log buffer we could take at maximum. The value
 * must be greater than two. Note that only half of the buffer is available
 * when the index points to the middle.
 */
#define MAX_LOG_TAKE_PART 4
static const char trunc_msg[] = "<truncated>";

static u32 truncate_msg(u16 *text_len, u16 *trunc_msg_len,
			u16 *dict_len, u32 *pad_len)
{
	/*
	 * The message should not take the whole buffer. Otherwise, it might
	 * get removed too soon.
	 */
	u32 max_text_len = log_buf_len / MAX_LOG_TAKE_PART;
	if (*text_len > max_text_len)
		*text_len = max_text_len;
	/* enable the warning message */
	*trunc_msg_len = strlen(trunc_msg);
	/* disable the "dict" completely */
	*dict_len = 0;
	/* compute the size again, count also the warning message */
	return msg_used_size(*text_len + *trunc_msg_len, 0, pad_len);
}

427
/* insert record into the buffer, discard old ones, update heads */
428
429
430
431
static int log_store(int facility, int level,
		     enum log_flags flags, u64 ts_nsec,
		     const char *dict, u16 dict_len,
		     const char *text, u16 text_len)
432
{
433
	struct printk_log *msg;
434
	u32 size, pad_len;
Petr Mladek's avatar
Petr Mladek committed
435
	u16 trunc_msg_len = 0;
436
437

	/* number of '\0' padding bytes to next message */
438
	size = msg_used_size(text_len, dict_len, &pad_len);
439

Petr Mladek's avatar
Petr Mladek committed
440
441
442
443
444
445
	if (log_make_free_space(size)) {
		/* truncate the message if it is too long for empty buffer */
		size = truncate_msg(&text_len, &trunc_msg_len,
				    &dict_len, &pad_len);
		/* survive when the log buffer is too small for trunc_msg */
		if (log_make_free_space(size))
446
			return 0;
Petr Mladek's avatar
Petr Mladek committed
447
	}
448

449
	if (log_next_idx + size + sizeof(struct printk_log) > log_buf_len) {
450
451
452
453
454
		/*
		 * This message + an additional empty header does not fit
		 * at the end of the buffer. Add an empty header with len == 0
		 * to signify a wrap around.
		 */
455
		memset(log_buf + log_next_idx, 0, sizeof(struct printk_log));
456
457
458
459
		log_next_idx = 0;
	}

	/* fill message */
460
	msg = (struct printk_log *)(log_buf + log_next_idx);
461
462
	memcpy(log_text(msg), text, text_len);
	msg->text_len = text_len;
Petr Mladek's avatar
Petr Mladek committed
463
464
465
466
	if (trunc_msg_len) {
		memcpy(log_text(msg) + text_len, trunc_msg, trunc_msg_len);
		msg->text_len += trunc_msg_len;
	}
467
468
	memcpy(log_dict(msg), dict, dict_len);
	msg->dict_len = dict_len;
469
470
471
472
473
474
475
	msg->facility = facility;
	msg->level = level & 7;
	msg->flags = flags & 0x1f;
	if (ts_nsec > 0)
		msg->ts_nsec = ts_nsec;
	else
		msg->ts_nsec = local_clock();
476
	memset(log_dict(msg) + dict_len, 0, pad_len);
477
	msg->len = size;
478
479
480
481

	/* insert message */
	log_next_idx += msg->len;
	log_next_seq++;
482
483

	return msg->text_len;
484
}
485

Alex Elder's avatar
Alex Elder committed
486
int dmesg_restrict = IS_ENABLED(CONFIG_SECURITY_DMESG_RESTRICT);
487
488
489
490
491
492
493
494
495
496
497
498
499

static int syslog_action_restricted(int type)
{
	if (dmesg_restrict)
		return 1;
	/*
	 * Unless restricted, we allow "read all" and "get buffer size"
	 * for everybody.
	 */
	return type != SYSLOG_ACTION_READ_ALL &&
	       type != SYSLOG_ACTION_SIZE_BUFFER;
}

500
int check_syslog_permissions(int type, int source)
501
502
503
504
505
{
	/*
	 * If this is from /proc/kmsg and we've already opened it, then we've
	 * already done the capabilities checks at open time.
	 */
506
	if (source == SYSLOG_FROM_PROC && type != SYSLOG_ACTION_OPEN)
507
		goto ok;
508
509
510

	if (syslog_action_restricted(type)) {
		if (capable(CAP_SYSLOG))
511
			goto ok;
512
513
514
515
516
517
518
519
520
		/*
		 * For historical reasons, accept CAP_SYS_ADMIN too, with
		 * a warning.
		 */
		if (capable(CAP_SYS_ADMIN)) {
			pr_warn_once("%s (%d): Attempt to access syslog with "
				     "CAP_SYS_ADMIN but no CAP_SYSLOG "
				     "(deprecated).\n",
				 current->comm, task_pid_nr(current));
521
			goto ok;
522
523
524
		}
		return -EPERM;
	}
525
ok:
526
527
	return security_syslog(type);
}
Geliang Tang's avatar
Geliang Tang committed
528
EXPORT_SYMBOL_GPL(check_syslog_permissions);
529

530
531
532
533
534
static void append_char(char **pp, char *e, char c)
{
	if (*pp < e)
		*(*pp)++ = c;
}
535

536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
static ssize_t msg_print_ext_header(char *buf, size_t size,
				    struct printk_log *msg, u64 seq,
				    enum log_flags prev_flags)
{
	u64 ts_usec = msg->ts_nsec;
	char cont = '-';

	do_div(ts_usec, 1000);

	/*
	 * If we couldn't merge continuation line fragments during the print,
	 * export the stored flags to allow an optional external merge of the
	 * records. Merging the records isn't always neccessarily correct, like
	 * when we hit a race during printing. In most cases though, it produces
	 * better readable output. 'c' in the record flags mark the first
	 * fragment of a line, '+' the following.
	 */
	if (msg->flags & LOG_CONT && !(prev_flags & LOG_CONT))
		cont = 'c';
	else if ((msg->flags & LOG_CONT) ||
		 ((prev_flags & LOG_CONT) && !(msg->flags & LOG_PREFIX)))
		cont = '+';

	return scnprintf(buf, size, "%u,%llu,%llu,%c;",
		       (msg->facility << 3) | msg->level, seq, ts_usec, cont);
}

static ssize_t msg_print_ext_body(char *buf, size_t size,
				  char *dict, size_t dict_len,
				  char *text, size_t text_len)
{
	char *p = buf, *e = buf + size;
	size_t i;

	/* escape non-printable characters */
	for (i = 0; i < text_len; i++) {
		unsigned char c = text[i];

		if (c < ' ' || c >= 127 || c == '\\')
			p += scnprintf(p, e - p, "\\x%02x", c);
		else
			append_char(&p, e, c);
	}
	append_char(&p, e, '\n');

	if (dict_len) {
		bool line = true;

		for (i = 0; i < dict_len; i++) {
			unsigned char c = dict[i];

			if (line) {
				append_char(&p, e, ' ');
				line = false;
			}

			if (c == '\0') {
				append_char(&p, e, '\n');
				line = true;
				continue;
			}

			if (c < ' ' || c >= 127 || c == '\\') {
				p += scnprintf(p, e - p, "\\x%02x", c);
				continue;
			}

			append_char(&p, e, c);
		}
		append_char(&p, e, '\n');
	}

	return p - buf;
}

611
612
613
614
/* /dev/kmsg - userspace message inject/listen interface */
struct devkmsg_user {
	u64 seq;
	u32 idx;
615
	enum log_flags prev;
616
	struct mutex lock;
617
	char buf[CONSOLE_EXT_LOG_MAX];
618
619
};

Al Viro's avatar
Al Viro committed
620
static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
621
622
623
624
{
	char *buf, *line;
	int level = default_message_loglevel;
	int facility = 1;	/* LOG_USER */
Christoph Hellwig's avatar
Christoph Hellwig committed
625
	size_t len = iov_iter_count(from);
626
627
628
629
630
631
632
633
	ssize_t ret = len;

	if (len > LOG_LINE_MAX)
		return -EINVAL;
	buf = kmalloc(len+1, GFP_KERNEL);
	if (buf == NULL)
		return -ENOMEM;

Al Viro's avatar
Al Viro committed
634
635
636
637
	buf[len] = '\0';
	if (copy_from_iter(buf, len, from) != len) {
		kfree(buf);
		return -EFAULT;
638
639
640
641
642
643
644
645
646
647
648
649
650
651
	}

	/*
	 * Extract and skip the syslog prefix <[0-9]*>. Coming from userspace
	 * the decimal value represents 32bit, the lower 3 bit are the log
	 * level, the rest are the log facility.
	 *
	 * If no prefix or no userspace facility is specified, we
	 * enforce LOG_USER, to be able to reliably distinguish
	 * kernel-generated messages from userspace-injected ones.
	 */
	line = buf;
	if (line[0] == '<') {
		char *endp = NULL;
652
		unsigned int u;
653

654
		u = simple_strtoul(line + 1, &endp, 10);
655
		if (endp && endp[0] == '>') {
656
657
658
			level = LOG_LEVEL(u);
			if (LOG_FACILITY(u) != 0)
				facility = LOG_FACILITY(u);
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
			endp++;
			len -= endp - line;
			line = endp;
		}
	}

	printk_emit(facility, level, NULL, 0, "%s", line);
	kfree(buf);
	return ret;
}

static ssize_t devkmsg_read(struct file *file, char __user *buf,
			    size_t count, loff_t *ppos)
{
	struct devkmsg_user *user = file->private_data;
674
	struct printk_log *msg;
675
676
677
678
679
680
	size_t len;
	ssize_t ret;

	if (!user)
		return -EBADF;

681
682
683
	ret = mutex_lock_interruptible(&user->lock);
	if (ret)
		return ret;
684
	raw_spin_lock_irq(&logbuf_lock);
685
686
687
	while (user->seq == log_next_seq) {
		if (file->f_flags & O_NONBLOCK) {
			ret = -EAGAIN;
688
			raw_spin_unlock_irq(&logbuf_lock);
689
690
691
			goto out;
		}

692
		raw_spin_unlock_irq(&logbuf_lock);
693
694
695
696
		ret = wait_event_interruptible(log_wait,
					       user->seq != log_next_seq);
		if (ret)
			goto out;
697
		raw_spin_lock_irq(&logbuf_lock);
698
699
700
701
702
703
704
	}

	if (user->seq < log_first_seq) {
		/* our last seen message is gone, return error and reset */
		user->idx = log_first_idx;
		user->seq = log_first_seq;
		ret = -EPIPE;
705
		raw_spin_unlock_irq(&logbuf_lock);
706
707
708
709
		goto out;
	}

	msg = log_from_idx(user->idx);
710
711
712
713
714
	len = msg_print_ext_header(user->buf, sizeof(user->buf),
				   msg, user->seq, user->prev);
	len += msg_print_ext_body(user->buf + len, sizeof(user->buf) - len,
				  log_dict(msg), msg->dict_len,
				  log_text(msg), msg->text_len);
715
716

	user->prev = msg->flags;
717
718
	user->idx = log_next(user->idx);
	user->seq++;
719
	raw_spin_unlock_irq(&logbuf_lock);
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745

	if (len > count) {
		ret = -EINVAL;
		goto out;
	}

	if (copy_to_user(buf, user->buf, len)) {
		ret = -EFAULT;
		goto out;
	}
	ret = len;
out:
	mutex_unlock(&user->lock);
	return ret;
}

static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
{
	struct devkmsg_user *user = file->private_data;
	loff_t ret = 0;

	if (!user)
		return -EBADF;
	if (offset)
		return -ESPIPE;

746
	raw_spin_lock_irq(&logbuf_lock);
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
	switch (whence) {
	case SEEK_SET:
		/* the first record */
		user->idx = log_first_idx;
		user->seq = log_first_seq;
		break;
	case SEEK_DATA:
		/*
		 * The first record after the last SYSLOG_ACTION_CLEAR,
		 * like issued by 'dmesg -c'. Reading /dev/kmsg itself
		 * changes no global state, and does not clear anything.
		 */
		user->idx = clear_idx;
		user->seq = clear_seq;
		break;
	case SEEK_END:
		/* after the last record */
		user->idx = log_next_idx;
		user->seq = log_next_seq;
		break;
	default:
		ret = -EINVAL;
	}
770
	raw_spin_unlock_irq(&logbuf_lock);
771
772
773
774
775
776
777
778
779
780
781
782
783
	return ret;
}

static unsigned int devkmsg_poll(struct file *file, poll_table *wait)
{
	struct devkmsg_user *user = file->private_data;
	int ret = 0;

	if (!user)
		return POLLERR|POLLNVAL;

	poll_wait(file, &log_wait, wait);

784
	raw_spin_lock_irq(&logbuf_lock);
785
786
787
788
	if (user->seq < log_next_seq) {
		/* return error when data has vanished underneath us */
		if (user->seq < log_first_seq)
			ret = POLLIN|POLLRDNORM|POLLERR|POLLPRI;
789
790
		else
			ret = POLLIN|POLLRDNORM;
791
	}
792
	raw_spin_unlock_irq(&logbuf_lock);
793
794
795
796
797
798
799
800
801
802
803
804
805

	return ret;
}

static int devkmsg_open(struct inode *inode, struct file *file)
{
	struct devkmsg_user *user;
	int err;

	/* write-only does not need any file context */
	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
		return 0;

806
807
	err = check_syslog_permissions(SYSLOG_ACTION_READ_ALL,
				       SYSLOG_FROM_READER);
808
809
810
811
812
813
814
815
816
	if (err)
		return err;

	user = kmalloc(sizeof(struct devkmsg_user), GFP_KERNEL);
	if (!user)
		return -ENOMEM;

	mutex_init(&user->lock);

817
	raw_spin_lock_irq(&logbuf_lock);
818
819
	user->idx = log_first_idx;
	user->seq = log_first_seq;
820
	raw_spin_unlock_irq(&logbuf_lock);
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840

	file->private_data = user;
	return 0;
}

static int devkmsg_release(struct inode *inode, struct file *file)
{
	struct devkmsg_user *user = file->private_data;

	if (!user)
		return 0;

	mutex_destroy(&user->lock);
	kfree(user);
	return 0;
}

const struct file_operations kmsg_fops = {
	.open = devkmsg_open,
	.read = devkmsg_read,
Al Viro's avatar
Al Viro committed
841
	.write_iter = devkmsg_write,
842
843
844
845
846
	.llseek = devkmsg_llseek,
	.poll = devkmsg_poll,
	.release = devkmsg_release,
};

847
#ifdef CONFIG_KEXEC_CORE
848
/*
849
 * This appends the listed symbols to /proc/vmcore
850
 *
851
 * /proc/vmcore is used by various utilities, like crash and makedumpfile to
852
853
854
855
856
857
858
859
 * obtain access to symbols that are otherwise very difficult to locate.  These
 * symbols are specifically used so that utilities can access and extract the
 * dmesg log from a vmcore file after a crash.
 */
void log_buf_kexec_setup(void)
{
	VMCOREINFO_SYMBOL(log_buf);
	VMCOREINFO_SYMBOL(log_buf_len);
860
	VMCOREINFO_SYMBOL(log_first_idx);
861
	VMCOREINFO_SYMBOL(clear_idx);
862
	VMCOREINFO_SYMBOL(log_next_idx);
863
	/*
864
	 * Export struct printk_log size and field offsets. User space tools can
865
866
	 * parse it and detect any changes to structure down the line.
	 */
867
868
869
870
871
	VMCOREINFO_STRUCT_SIZE(printk_log);
	VMCOREINFO_OFFSET(printk_log, ts_nsec);
	VMCOREINFO_OFFSET(printk_log, len);
	VMCOREINFO_OFFSET(printk_log, text_len);
	VMCOREINFO_OFFSET(printk_log, dict_len);
872
873
874
}
#endif

875
876
877
/* requested log_buf_len from kernel cmdline */
static unsigned long __initdata new_log_buf_len;

878
879
/* we practice scaling the ring buffer by powers of 2 */
static void __init log_buf_len_update(unsigned size)
Linus Torvalds's avatar
Linus Torvalds committed
880
881
882
{
	if (size)
		size = roundup_pow_of_two(size);
883
884
	if (size > log_buf_len)
		new_log_buf_len = size;
885
886
887
888
889
890
891
892
}

/* save requested log_buf_len since it's too early to process it */
static int __init log_buf_len_setup(char *str)
{
	unsigned size = memparse(str, &str);

	log_buf_len_update(size);
893
894

	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
895
}
896
897
early_param("log_buf_len", log_buf_len_setup);

898
899
900
#ifdef CONFIG_SMP
#define __LOG_CPU_MAX_BUF_LEN (1 << CONFIG_LOG_CPU_MAX_BUF_SHIFT)

901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
static void __init log_buf_add_cpu(void)
{
	unsigned int cpu_extra;

	/*
	 * archs should set up cpu_possible_bits properly with
	 * set_cpu_possible() after setup_arch() but just in
	 * case lets ensure this is valid.
	 */
	if (num_possible_cpus() == 1)
		return;

	cpu_extra = (num_possible_cpus() - 1) * __LOG_CPU_MAX_BUF_LEN;

	/* by default this will only continue through for large > 64 CPUs */
	if (cpu_extra <= __LOG_BUF_LEN / 2)
		return;

	pr_info("log_buf_len individual max cpu contribution: %d bytes\n",
		__LOG_CPU_MAX_BUF_LEN);
	pr_info("log_buf_len total cpu_extra contributions: %d bytes\n",
		cpu_extra);
	pr_info("log_buf_len min size: %d bytes\n", __LOG_BUF_LEN);

	log_buf_len_update(cpu_extra + __LOG_BUF_LEN);
}
927
928
929
#else /* !CONFIG_SMP */
static inline void log_buf_add_cpu(void) {}
#endif /* CONFIG_SMP */
930

931
932
933
934
935
936
void __init setup_log_buf(int early)
{
	unsigned long flags;
	char *new_log_buf;
	int free;

937
938
939
940
941
942
	if (log_buf != __log_buf)
		return;

	if (!early && !new_log_buf_len)
		log_buf_add_cpu();

943
944
	if (!new_log_buf_len)
		return;
Linus Torvalds's avatar
Linus Torvalds committed
945

946
	if (early) {
947
		new_log_buf =
948
			memblock_virt_alloc(new_log_buf_len, LOG_ALIGN);
949
	} else {
950
951
		new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len,
							  LOG_ALIGN);
952
953
954
955
956
957
958
959
	}

	if (unlikely(!new_log_buf)) {
		pr_err("log_buf_len: %ld bytes not available\n",
			new_log_buf_len);
		return;
	}

960
	raw_spin_lock_irqsave(&logbuf_lock, flags);
961
962
963
	log_buf_len = new_log_buf_len;
	log_buf = new_log_buf;
	new_log_buf_len = 0;
964
965
	free = __LOG_BUF_LEN - log_next_idx;
	memcpy(log_buf, __log_buf, __LOG_BUF_LEN);
966
	raw_spin_unlock_irqrestore(&logbuf_lock, flags);
967

968
	pr_info("log_buf_len: %d bytes\n", log_buf_len);
969
970
971
	pr_info("early log buf free: %d(%d%%)\n",
		free, (free * 100) / __LOG_BUF_LEN);
}
Linus Torvalds's avatar
Linus Torvalds committed
972

973
974
975
976
static bool __read_mostly ignore_loglevel;

static int __init ignore_loglevel_setup(char *str)
{
977
	ignore_loglevel = true;
978
	pr_info("debug: ignoring loglevel setting.\n");
979
980
981
982
983
984

	return 0;
}

early_param("ignore_loglevel", ignore_loglevel_setup);
module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR);
985
986
MODULE_PARM_DESC(ignore_loglevel,
		 "ignore loglevel setting (prints all kernel messages to the console)");
987

988
989
990
991
992
static bool suppress_message_printing(int level)
{
	return (level >= console_loglevel && !ignore_loglevel);
}

Randy Dunlap's avatar
Randy Dunlap committed
993
994
#ifdef CONFIG_BOOT_PRINTK_DELAY

995
static int boot_delay; /* msecs delay after each printk during bootup */
996
static unsigned long long loops_per_msec;	/* based on boot_delay */
Randy Dunlap's avatar
Randy Dunlap committed
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008

static int __init boot_delay_setup(char *str)
{
	unsigned long lpj;

	lpj = preset_lpj ? preset_lpj : 1000000;	/* some guess */
	loops_per_msec = (unsigned long long)lpj / 1000 * HZ;

	get_option(&str, &boot_delay);
	if (boot_delay > 10 * 1000)
		boot_delay = 0;

1009
1010
1011
	pr_debug("boot_delay: %u, preset_lpj: %ld, lpj: %lu, "
		"HZ: %d, loops_per_msec: %llu\n",
		boot_delay, preset_lpj, lpj, HZ, loops_per_msec);
1012
	return 0;
Randy Dunlap's avatar
Randy Dunlap committed
1013
}
1014
early_param("boot_delay", boot_delay_setup);
Randy Dunlap's avatar
Randy Dunlap committed
1015

1016
static void boot_delay_msec(int level)
Randy Dunlap's avatar
Randy Dunlap committed
1017
1018
1019
1020
{
	unsigned long long k;
	unsigned long timeout;

1021
	if ((boot_delay == 0 || system_state != SYSTEM_BOOTING)
1022
		|| suppress_message_printing(level)) {
Randy Dunlap's avatar
Randy Dunlap committed
1023
		return;
1024
	}
Randy Dunlap's avatar
Randy Dunlap committed
1025

1026
	k = (unsigned long long)loops_per_msec * boot_delay;
Randy Dunlap's avatar
Randy Dunlap committed
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042

	timeout = jiffies + msecs_to_jiffies(boot_delay);
	while (k) {
		k--;
		cpu_relax();
		/*
		 * use (volatile) jiffies to prevent
		 * compiler reduction; loop termination via jiffies
		 * is secondary and may or may not happen.
		 */
		if (time_after(jiffies, timeout))
			break;
		touch_nmi_watchdog();
	}
}
#else
1043
static inline void boot_delay_msec(int level)
Randy Dunlap's avatar
Randy Dunlap committed
1044
1045
1046
1047
{
}
#endif

Alex Elder's avatar
Alex Elder committed
1048
static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME);
1049
1050
module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);

1051
1052
1053
1054
1055
1056
1057
static size_t print_time(u64 ts, char *buf)
{
	unsigned long rem_nsec;

	if (!printk_time)
		return 0;

1058
1059
	rem_nsec = do_div(ts, 1000000000);

1060
	if (!buf)
1061
		return snprintf(NULL, 0, "[%5lu.000000] ", (unsigned long)ts);
1062
1063
1064
1065
1066

	return sprintf(buf, "[%5lu.%06lu] ",
		       (unsigned long)ts, rem_nsec / 1000);
}

1067
static size_t print_prefix(const struct printk_log *msg, bool syslog, char *buf)
1068
{
1069
	size_t len = 0;
1070
	unsigned int prefix = (msg->facility << 3) | msg->level;
1071

1072
1073
	if (syslog) {
		if (buf) {
1074
			len += sprintf(buf, "<%u>", prefix);
1075
1076
		} else {
			len += 3;
1077
1078
1079
1080
1081
			if (prefix > 999)
				len += 3;
			else if (prefix > 99)
				len += 2;
			else if (prefix > 9)
1082
1083
1084
				len++;
		}
	}
1085

1086
	len += print_time(msg->ts_nsec, buf ? buf + len : NULL);
1087
	return len;
1088
1089
}

1090
static size_t msg_print_text(const struct printk_log *msg, enum log_flags prev,
1091
			     bool syslog, char *buf, size_t size)
1092
{
1093
1094
	const char *text = log_text(msg);
	size_t text_size = msg->text_len;
1095
1096
	bool prefix = true;
	bool newline = true;
1097
1098
	size_t len = 0;

1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
	if ((prev & LOG_CONT) && !(msg->flags & LOG_PREFIX))
		prefix = false;

	if (msg->flags & LOG_CONT) {
		if ((prev & LOG_CONT) && !(prev & LOG_NEWLINE))
			prefix = false;

		if (!(msg->flags & LOG_NEWLINE))
			newline = false;
	}

1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
	do {
		const char *next = memchr(text, '\n', text_size);
		size_t text_len;

		if (next) {
			text_len = next - text;
			next++;
			text_size -= next - text;
		} else {
			text_len = text_size;
		}
1121

1122
1123
		if (buf) {
			if (print_prefix(msg, syslog, NULL) +
1124
			    text_len + 1 >= size - len)
1125
				break;
1126

1127
1128
			if (prefix)
				len += print_prefix(msg, syslog, buf + len);
1129
1130
			memcpy(buf + len, text, text_len);
			len += text_len;
1131
1132
			if (next || newline)
				buf[len++] = '\n';
1133
1134
		} else {
			/* SYSLOG_ACTION_* buffer size only calculation */
1135
1136
1137
1138
1139
			if (prefix)
				len += print_prefix(msg, syslog, NULL);
			len += text_len;
			if (next || newline)
				len++;
1140
		}
1141

1142
		prefix = true;
1143
1144
		text = next;
	} while (text);
1145
1146
1147
1148
1149
1150
1151

	return len;
}

static int syslog_print(char __user *buf, int size)
{
	char *text;
1152
	struct printk_log *msg;
1153
	int len = 0;
1154

1155
	text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
1156
1157
1158
	if (!text)
		return -ENOMEM;

1159
1160
	while (size > 0) {
		size_t n;
1161
		size_t skip;
1162
1163
1164
1165
1166
1167

		raw_spin_lock_irq(&logbuf_lock);
		if (syslog_seq < log_first_seq) {
			/* messages are gone, move to first one */
			syslog_seq = log_first_seq;
			syslog_idx = log_first_idx;
1168
			syslog_prev = 0;
1169
			syslog_partial = 0;
1170
1171
1172
1173
1174
		}
		if (syslog_seq == log_next_seq) {
			raw_spin_unlock_irq(&logbuf_lock);
			break;
		}
1175
1176

		skip = syslog_partial;
1177
		msg = log_from_idx(syslog_idx);
1178
1179
		n = msg_print_text(msg, syslog_prev, true, text,
				   LOG_LINE_MAX + PREFIX_MAX);
1180
1181
		if (n - syslog_partial <= size) {
			/* message fits into buffer, move forward */
1182
1183
			syslog_idx = log_next(syslog_idx);
			syslog_seq++;
1184
			syslog_prev = msg->flags;
1185
1186
1187
1188
1189
1190
			n -= syslog_partial;
			syslog_partial = 0;
		} else if (!len){
			/* partial read(), remember position */
			n = size;
			syslog_partial += n;
1191
1192
1193
1194
1195
1196
1197
		} else
			n = 0;
		raw_spin_unlock_irq(&logbuf_lock);

		if (!n)
			break;

1198
		if (copy_to_user(buf, text + skip, n)) {
1199
1200
1201
1202
			if (!len)
				len = -EFAULT;
			break;
		}
1203
1204
1205
1206

		len += n;
		size -= n;
		buf += n;
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
	}

	kfree(text);
	return len;
}

static int syslog_print_all(char __user *buf, int size, bool clear)
{
	char *text;
	int len = 0;

1218
	text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
1219
1220
1221
1222
1223
1224
1225
1226
	if (!text)
		return -ENOMEM;

	raw_spin_lock_irq(&logbuf_lock);
	if (buf) {
		u64 next_seq;
		u64 seq;
		u32 idx;
1227
		enum log_flags prev;
1228
1229
1230
1231

		/*
		 * Find first record that fits, including all following records,
		 * into the user-provided buffer for this dump.
1232
		 */
1233
1234
		seq = clear_seq;
		idx = clear_idx;
1235
		prev = 0;
1236
		while (seq < log_next_seq) {
1237
			struct printk_log *msg = log_from_idx(idx);
1238

1239
			len += msg_print_text(msg, prev, true, NULL, 0);
1240
			prev = msg->flags;
1241
1242
1243
			idx = log_next(idx);
			seq++;
		}
1244
1245

		/* move first record forward until length fits into the buffer */
1246
1247
		seq = clear_seq;
		idx = clear_idx;
1248
		prev = 0;
1249
		while (len > size && seq < log_next_seq) {
1250
			struct printk_log *msg = log_from_idx(idx);
1251

1252
			len -= msg_print_text(msg, prev, true, NULL, 0);
1253
			prev = msg->flags;
1254
1255
1256
1257
			idx = log_next(idx);
			seq++;
		}

1258
		/* last message fitting into this dump */
1259
1260
1261
1262
		next_seq = log_next_seq;

		len = 0;
		while (len >= 0 && seq < next_seq) {
1263
			struct printk_log *msg = log_from_idx(idx);
1264
1265
			int textlen;

1266
1267
			textlen = msg_print_text(msg, prev, true, text,
						 LOG_LINE_MAX + PREFIX_MAX);
1268
1269
1270
1271
1272
1273
			if (textlen < 0) {
				len = textlen;
				break;
			}
			idx = log_next(idx);
			seq++;
1274
			prev = msg->flags;
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286

			raw_spin_unlock_irq(&logbuf_lock);
			if (copy_to_user(buf + len, text, textlen))
				len = -EFAULT;
			else
				len += textlen;
			raw_spin_lock_irq(&logbuf_lock);

			if (seq < log_first_seq) {
				/* messages are gone, move to next one */
				seq = log_first_seq;
				idx = log_first_idx;
1287
				prev = 0;
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
			}
		}
	}

	if (clear) {
		clear_seq = log_next_seq;
		clear_idx = log_next_idx;
	}
	raw_spin_unlock_irq(&logbuf_lock);

	kfree(text);
	return len;
}

1302
int do_syslog(int type, char __user *buf, int len, int source)
Linus Torvalds's avatar
Linus Torvalds committed
1303
{
1304
	bool clear = false;
1305
	static int saved_console_loglevel = LOGLEVEL_DEFAULT;
1306
	int error;
Linus Torvalds's avatar
Linus Torvalds committed
1307

1308
	error = check_syslog_permissions(type, source);
1309
1310
	if (error)
		goto out;
Eric Paris's avatar