checkpatch.pl 121 KB
Newer Older
1
#!/usr/bin/perl -w
2
# (c) 2001, Dave Jones. (the file handling bit)
3
# (c) 2005, Joel Schopp <jschopp@austin.ibm.com> (the ugly bit)
Andy Whitcroft's avatar
Andy Whitcroft committed
4
# (c) 2007,2008, Andy Whitcroft <apw@uk.ibm.com> (new conditions, test suite)
5
# (c) 2008-2010 Andy Whitcroft <apw@canonical.com>
6
7
8
# Licensed under the terms of the GNU GPL License version 2

use strict;
9
use POSIX;
10
11

my $P = $0;
12
$P =~ s@.*/@@g;
13

14
my $V = '0.32';
15
16
17
18
19
20
21

use Getopt::Long qw(:config no_auto_abbrev);

my $quiet = 0;
my $tree = 1;
my $chk_signoff = 1;
my $chk_patch = 1;
22
my $tst_only;
23
my $emacs = 0;
24
my $terse = 0;
25
26
my $file = 0;
my $check = 0;
27
28
my $summary = 1;
my $mailback = 0;
29
my $summary_file = 0;
30
my $show_types = 0;
31
my $fix = 0;
32
my $root;
33
my %debug;
34
my %camelcase = ();
35
36
37
my %use_type = ();
my @use = ();
my %ignore_type = ();
38
my @ignore = ();
39
my $help = 0;
40
my $configuration_file = ".checkpatch.conf";
41
my $max_line_length = 80;
42
43
my $ignore_perl_version = 0;
my $minimum_perl_version = 5.10.0;
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60

sub help {
	my ($exitcode) = @_;

	print << "EOM";
Usage: $P [OPTION]... [FILE]...
Version: $V

Options:
  -q, --quiet                quiet
  --no-tree                  run without a kernel tree
  --no-signoff               do not check for 'Signed-off-by' line
  --patch                    treat FILE as patchfile (default)
  --emacs                    emacs compile window format
  --terse                    one line per report
  -f, --file                 treat FILE as regular source file
  --subjective, --strict     enable more subjective tests
61
  --types TYPE(,TYPE2...)    show only these comma separated message types
62
  --ignore TYPE(,TYPE2...)   ignore various comma separated message types
63
  --max-line-length=n        set the maximum line length, if exceeded, warn
64
  --show-types               show the message "types" in the output
65
66
67
68
69
70
71
72
73
  --root=PATH                PATH to the kernel tree root
  --no-summary               suppress the per-file summary
  --mailback                 only produce a report in case of warnings/errors
  --summary-file             include the filename in summary
  --debug KEY=[0|1]          turn on/off debugging of KEY, where KEY is one of
                             'values', 'possible', 'type', and 'attr' (default
                             is all off)
  --test-only=WORD           report only warnings/errors containing WORD
                             literally
74
75
76
77
78
  --fix                      EXPERIMENTAL - may create horrible results
                             If correctable single-line errors exist, create
                             "<inputfile>.EXPERIMENTAL-checkpatch-fixes"
                             with potential errors corrected to the preferred
                             checkpatch style
79
80
  --ignore-perl-version      override checking of perl version.  expect
                             runtime errors.
81
82
83
84
85
86
87
88
  -h, --help, --version      display this help and exit

When FILE is - read standard input.
EOM

	exit($exitcode);
}

89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
my $conf = which_conf($configuration_file);
if (-f $conf) {
	my @conf_args;
	open(my $conffile, '<', "$conf")
	    or warn "$P: Can't find a readable $configuration_file file $!\n";

	while (<$conffile>) {
		my $line = $_;

		$line =~ s/\s*\n?$//g;
		$line =~ s/^\s*//g;
		$line =~ s/\s+/ /g;

		next if ($line =~ m/^\s*#/);
		next if ($line =~ m/^\s*$/);

		my @words = split(" ", $line);
		foreach my $word (@words) {
			last if ($word =~ m/^#/);
			push (@conf_args, $word);
		}
	}
	close($conffile);
	unshift(@ARGV, @conf_args) if @conf_args;
}

115
GetOptions(
116
	'q|quiet+'	=> \$quiet,
117
118
119
	'tree!'		=> \$tree,
	'signoff!'	=> \$chk_signoff,
	'patch!'	=> \$chk_patch,
120
	'emacs!'	=> \$emacs,
121
	'terse!'	=> \$terse,
122
	'f|file!'	=> \$file,
123
124
	'subjective!'	=> \$check,
	'strict!'	=> \$check,
125
	'ignore=s'	=> \@ignore,
126
	'types=s'	=> \@use,
127
	'show-types!'	=> \$show_types,
128
	'max-line-length=i' => \$max_line_length,
129
	'root=s'	=> \$root,
130
131
	'summary!'	=> \$summary,
	'mailback!'	=> \$mailback,
132
	'summary-file!'	=> \$summary_file,
133
	'fix!'		=> \$fix,
134
	'ignore-perl-version!' => \$ignore_perl_version,
135
	'debug=s'	=> \%debug,
136
	'test-only=s'	=> \$tst_only,
137
138
139
140
141
	'h|help'	=> \$help,
	'version'	=> \$help
) or help(1);

help(0) if ($help);
142
143
144

my $exit = 0;

145
146
147
148
149
150
151
if ($^V && $^V lt $minimum_perl_version) {
	printf "$P: requires at least perl version %vd\n", $minimum_perl_version;
	if (!$ignore_perl_version) {
		exit(1);
	}
}

152
if ($#ARGV < 0) {
153
	print "$P: no input files\n";
154
155
156
	exit(1);
}

157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
sub hash_save_array_words {
	my ($hashRef, $arrayRef) = @_;

	my @array = split(/,/, join(',', @$arrayRef));
	foreach my $word (@array) {
		$word =~ s/\s*\n?$//g;
		$word =~ s/^\s*//g;
		$word =~ s/\s+/ /g;
		$word =~ tr/[a-z]/[A-Z]/;

		next if ($word =~ m/^\s*#/);
		next if ($word =~ m/^\s*$/);

		$hashRef->{$word}++;
	}
}
173

174
175
sub hash_show_words {
	my ($hashRef, $prefix) = @_;
176

177
	if ($quiet == 0 && keys %$hashRef) {
178
		print "NOTE: $prefix message types:";
179
		foreach my $word (sort keys %$hashRef) {
180
181
182
183
			print " $word";
		}
		print "\n\n";
	}
184
185
}

186
187
188
hash_save_array_words(\%ignore_type, \@ignore);
hash_save_array_words(\%use_type, \@use);

189
190
my $dbg_values = 0;
my $dbg_possible = 0;
191
my $dbg_type = 0;
192
my $dbg_attr = 0;
193
for my $key (keys %debug) {
194
195
196
	## no critic
	eval "\${dbg_$key} = '$debug{$key}';";
	die "$@" if ($@);
197
198
}

199
200
my $rpt_cleaners = 0;

201
202
203
204
205
if ($terse) {
	$emacs = 1;
	$quiet++;
}

206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
if ($tree) {
	if (defined $root) {
		if (!top_of_kernel_tree($root)) {
			die "$P: $root: --root does not point at a valid tree\n";
		}
	} else {
		if (top_of_kernel_tree('.')) {
			$root = '.';
		} elsif ($0 =~ m@(.*)/scripts/[^/]*$@ &&
						top_of_kernel_tree($1)) {
			$root = $1;
		}
	}

	if (!defined $root) {
		print "Must be run from the top-level dir. of a kernel tree\n";
		exit(2);
	}
224
225
}

226
227
my $emitted_corrupt = 0;

228
229
230
231
our $Ident	= qr{
			[A-Za-z_][A-Za-z\d_]*
			(?:\s*\#\#\s*[A-Za-z_][A-Za-z\d_]*)*
		}x;
232
233
234
235
236
237
238
239
our $Storage	= qr{extern|static|asmlinkage};
our $Sparse	= qr{
			__user|
			__kernel|
			__force|
			__iomem|
			__must_check|
			__init_refok|
240
			__kprobes|
241
242
			__ref|
			__rcu
243
		}x;
244
245
246
247
248
our $InitAttributePrefix = qr{__(?:mem|cpu|dev|net_|)};
our $InitAttributeData = qr{$InitAttributePrefix(?:initdata\b)};
our $InitAttributeConst = qr{$InitAttributePrefix(?:initconst\b)};
our $InitAttributeInit = qr{$InitAttributePrefix(?:init\b)};
our $InitAttribute = qr{$InitAttributeData|$InitAttributeConst|$InitAttributeInit};
249

250
251
# Notes to $Attribute:
# We need \b after 'init' otherwise 'initconst' will cause a false positive in a check
252
253
our $Attribute	= qr{
			const|
254
255
256
257
258
259
260
261
262
263
264
265
266
267
			__percpu|
			__nocast|
			__safe|
			__bitwise__|
			__packed__|
			__packed2__|
			__naked|
			__maybe_unused|
			__always_unused|
			__noreturn|
			__used|
			__cold|
			__noclone|
			__deprecated|
268
269
			__read_mostly|
			__kprobes|
270
			$InitAttribute|
271
272
			____cacheline_aligned|
			____cacheline_aligned_in_smp|
273
274
			____cacheline_internodealigned_in_smp|
			__weak
275
		  }x;
276
our $Modifier;
277
278
279
280
our $Inline	= qr{inline|__always_inline|noinline};
our $Member	= qr{->$Ident|\.$Ident|\[[^]]*\]};
our $Lval	= qr{$Ident(?:$Member)*};

281
282
283
284
our $Int_type	= qr{(?i)llu|ull|ll|lu|ul|l|u};
our $Binary	= qr{(?i)0b[01]+$Int_type?};
our $Hex	= qr{(?i)0x[0-9a-f]+$Int_type?};
our $Int	= qr{[0-9]+$Int_type?};
285
286
287
our $Float_hex	= qr{(?i)0x[0-9a-f]+p-?[0-9]+[fl]?};
our $Float_dec	= qr{(?i)(?:[0-9]+\.[0-9]*|[0-9]*\.[0-9]+)(?:e-?[0-9]+)?[fl]?};
our $Float_int	= qr{(?i)[0-9]+e-?[0-9]+[fl]?};
288
our $Float	= qr{$Float_hex|$Float_dec|$Float_int};
289
our $Constant	= qr{$Float|$Binary|$Hex|$Int};
290
our $Assignment	= qr{\*\=|/=|%=|\+=|-=|<<=|>>=|&=|\^=|\|=|=};
291
our $Compare    = qr{<=|>=|==|!=|<|>};
292
our $Arithmetic = qr{\+|-|\*|\/|%};
293
294
295
our $Operators	= qr{
			<=|>=|==|!=|
			=>|->|<<|>>|<|>|!|~|
296
			&&|\|\||,|\^|\+\+|--|&|\||$Arithmetic
297
298
		  }x;

299
our $NonptrType;
300
our $NonptrTypeWithAttr;
301
302
303
our $Type;
our $Declare;

304
305
our $NON_ASCII_UTF8	= qr{
	[\xC2-\xDF][\x80-\xBF]               # non-overlong 2-byte
306
307
308
309
310
311
312
313
	|  \xE0[\xA0-\xBF][\x80-\xBF]        # excluding overlongs
	| [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}  # straight 3-byte
	|  \xED[\x80-\x9F][\x80-\xBF]        # excluding surrogates
	|  \xF0[\x90-\xBF][\x80-\xBF]{2}     # planes 1-3
	| [\xF1-\xF3][\x80-\xBF]{3}          # planes 4-15
	|  \xF4[\x80-\x8F][\x80-\xBF]{2}     # plane 16
}x;

314
315
316
317
318
our $UTF8	= qr{
	[\x09\x0A\x0D\x20-\x7E]              # ASCII
	| $NON_ASCII_UTF8
}x;

319
our $typeTypedefs = qr{(?x:
320
	(?:__)?(?:u|s|be|le)(?:8|16|32|64)|
321
322
323
	atomic_t
)};

324
our $logFunctions = qr{(?x:
325
	printk(?:_ratelimited|_once|)|
326
	(?:[a-z0-9]+_){1,2}(?:printk|emerg|alert|crit|err|warning|warn|notice|info|debug|dbg|vdbg|devel|cont|WARN)(?:_ratelimited|_once|)|
327
	WARN(?:_RATELIMIT|_ONCE|)|
328
	panic|
329
330
	MODULE_[A-Z_]+|
	seq_vprintf|seq_printf|seq_puts
331
332
)};

333
334
335
336
337
338
our $signature_tags = qr{(?xi:
	Signed-off-by:|
	Acked-by:|
	Tested-by:|
	Reviewed-by:|
	Reported-by:|
339
	Suggested-by:|
340
341
342
343
	To:|
	Cc:
)};

344
345
our @typeList = (
	qr{void},
346
347
348
349
350
351
352
	qr{(?:unsigned\s+)?char},
	qr{(?:unsigned\s+)?short},
	qr{(?:unsigned\s+)?int},
	qr{(?:unsigned\s+)?long},
	qr{(?:unsigned\s+)?long\s+int},
	qr{(?:unsigned\s+)?long\s+long},
	qr{(?:unsigned\s+)?long\s+long\s+int},
353
354
355
356
357
358
359
360
361
362
363
	qr{unsigned},
	qr{float},
	qr{double},
	qr{bool},
	qr{struct\s+$Ident},
	qr{union\s+$Ident},
	qr{enum\s+$Ident},
	qr{${Ident}_t},
	qr{${Ident}_handler},
	qr{${Ident}_handler_fn},
);
364
365
366
367
368
369
our @typeListWithAttr = (
	@typeList,
	qr{struct\s+$InitAttribute\s+$Ident},
	qr{union\s+$InitAttribute\s+$Ident},
);

370
371
372
our @modifierList = (
	qr{fastcall},
);
373

374
375
376
377
378
379
our $allowed_asm_includes = qr{(?x:
	irq|
	memory
)};
# memory.h: ARM has a custom one

380
sub build_types {
381
382
	my $mods = "(?x:  \n" . join("|\n  ", @modifierList) . "\n)";
	my $all = "(?x:  \n" . join("|\n  ", @typeList) . "\n)";
383
	my $allWithAttr = "(?x:  \n" . join("|\n  ", @typeListWithAttr) . "\n)";
384
	$Modifier	= qr{(?:$Attribute|$Sparse|$mods)};
385
	$NonptrType	= qr{
386
			(?:$Modifier\s+|const\s+)*
387
			(?:
388
				(?:typeof|__typeof__)\s*\([^\)]*\)|
389
				(?:$typeTypedefs\b)|
390
				(?:${all}\b)
391
			)
392
			(?:\s+$Modifier|\s+const)*
393
		  }x;
394
395
396
397
398
399
400
401
402
	$NonptrTypeWithAttr	= qr{
			(?:$Modifier\s+|const\s+)*
			(?:
				(?:typeof|__typeof__)\s*\([^\)]*\)|
				(?:$typeTypedefs\b)|
				(?:${allWithAttr}\b)
			)
			(?:\s+$Modifier|\s+const)*
		  }x;
403
	$Type	= qr{
404
			$NonptrType
405
			(?:(?:\s|\*|\[\])+\s*const|(?:\s|\*|\[\])+|(?:\s*\[\s*\])+)?
406
			(?:\s+$Inline|\s+$Modifier)*
407
408
409
410
		  }x;
	$Declare	= qr{(?:$Storage\s+)?$Type};
}
build_types();
411

412
our $Typecast	= qr{\s*(\(\s*$NonptrType\s*\)){0,1}\s*};
413
414
415
416
417
418
419

# Using $balanced_parens, $LvalOrFunc, or $FuncArg
# requires at least perl version v5.10.0
# Any use must be runtime checked with $^V

our $balanced_parens = qr/(\((?:[^\(\)]++|(?-1))*\))/;
our $LvalOrFunc	= qr{($Lval)\s*($balanced_parens{0,1})\s*};
420
our $FuncArg = qr{$Typecast{0,1}($LvalOrFunc|$Constant)};
421
422
423
424
425
426
427
428
429
430

sub deparenthesize {
	my ($string) = @_;
	return "" if (!defined($string));
	$string =~ s@^\s*\(\s*@@g;
	$string =~ s@\s*\)\s*$@@g;
	$string =~ s@\s+@ @g;
	return $string;
}

431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
sub seed_camelcase_file {
	my ($file) = @_;

	return if (!(-f $file));

	local $/;

	open(my $include_file, '<', "$file")
	    or warn "$P: Can't read '$file' $!\n";
	my $text = <$include_file>;
	close($include_file);

	my @lines = split('\n', $text);

	foreach my $line (@lines) {
		next if ($line !~ /(?:[A-Z][a-z]|[a-z][A-Z])/);
		if ($line =~ /^[ \t]*(?:#[ \t]*define|typedef\s+$Type)\s+(\w*(?:[A-Z][a-z]|[a-z][A-Z])\w*)/) {
			$camelcase{$1} = 1;
449
450
451
		} elsif ($line =~ /^\s*$Declare\s+(\w*(?:[A-Z][a-z]|[a-z][A-Z])\w*)\s*[\(\[,;]/) {
			$camelcase{$1} = 1;
		} elsif ($line =~ /^\s*(?:union|struct|enum)\s+(\w*(?:[A-Z][a-z]|[a-z][A-Z])\w*)\s*[;\{]/) {
452
453
454
455
456
457
458
459
460
461
			$camelcase{$1} = 1;
		}
	}
}

my $camelcase_seeded = 0;
sub seed_camelcase_includes {
	return if ($camelcase_seeded);

	my $files;
462
463
464
465
	my $camelcase_cache = "";
	my @include_files = ();

	$camelcase_seeded = 1;
466

467
	if (-d ".git") {
468
469
		my $git_last_include_commit = `git log --no-merges --pretty=format:"%h%n" -1 -- include`;
		chomp $git_last_include_commit;
470
		$camelcase_cache = ".checkpatch-camelcase.git.$git_last_include_commit";
471
	} else {
472
		my $last_mod_date = 0;
473
		$files = `find $root/include -name "*.h"`;
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
		@include_files = split('\n', $files);
		foreach my $file (@include_files) {
			my $date = POSIX::strftime("%Y%m%d%H%M",
						   localtime((stat $file)[9]));
			$last_mod_date = $date if ($last_mod_date < $date);
		}
		$camelcase_cache = ".checkpatch-camelcase.date.$last_mod_date";
	}

	if ($camelcase_cache ne "" && -f $camelcase_cache) {
		open(my $camelcase_file, '<', "$camelcase_cache")
		    or warn "$P: Can't read '$camelcase_cache' $!\n";
		while (<$camelcase_file>) {
			chomp;
			$camelcase{$_} = 1;
		}
		close($camelcase_file);

		return;
493
	}
494
495
496
497
498
499

	if (-d ".git") {
		$files = `git ls-files "include/*.h"`;
		@include_files = split('\n', $files);
	}

500
501
502
	foreach my $file (@include_files) {
		seed_camelcase_file($file);
	}
503

504
	if ($camelcase_cache ne "") {
505
		unlink glob ".checkpatch-camelcase.*";
506
507
		open(my $camelcase_file, '>', "$camelcase_cache")
		    or warn "$P: Can't write '$camelcase_cache' $!\n";
508
509
510
511
512
		foreach (sort { lc($a) cmp lc($b) } keys(%camelcase)) {
			print $camelcase_file ("$_\n");
		}
		close($camelcase_file);
	}
513
514
}

515
516
$chk_signoff = 0 if ($file);

517
my @rawlines = ();
518
my @lines = ();
519
my @fixed = ();
520
my $vname;
521
for my $filename (@ARGV) {
522
	my $FILE;
523
	if ($file) {
524
		open($FILE, '-|', "diff -u /dev/null $filename") ||
525
			die "$P: $filename: diff failed - $!\n";
526
527
	} elsif ($filename eq '-') {
		open($FILE, '<&STDIN');
528
	} else {
529
		open($FILE, '<', "$filename") ||
530
			die "$P: $filename: open failed - $!\n";
531
	}
532
533
534
535
536
	if ($filename eq '-') {
		$vname = 'Your patch';
	} else {
		$vname = $filename;
	}
537
	while (<$FILE>) {
538
539
540
		chomp;
		push(@rawlines, $_);
	}
541
	close($FILE);
542
	if (!process($filename)) {
543
544
545
		$exit = 1;
	}
	@rawlines = ();
546
	@lines = ();
547
	@fixed = ();
548
549
550
551
552
}

exit($exit);

sub top_of_kernel_tree {
553
554
555
556
557
558
559
560
561
562
563
564
	my ($root) = @_;

	my @tree_check = (
		"COPYING", "CREDITS", "Kbuild", "MAINTAINERS", "Makefile",
		"README", "Documentation", "arch", "include", "drivers",
		"fs", "init", "ipc", "kernel", "lib", "scripts",
	);

	foreach my $check (@tree_check) {
		if (! -e $root . '/' . $check) {
			return 0;
		}
565
	}
566
	return 1;
567
}
568

569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
sub parse_email {
	my ($formatted_email) = @_;

	my $name = "";
	my $address = "";
	my $comment = "";

	if ($formatted_email =~ /^(.*)<(\S+\@\S+)>(.*)$/) {
		$name = $1;
		$address = $2;
		$comment = $3 if defined $3;
	} elsif ($formatted_email =~ /^\s*<(\S+\@\S+)>(.*)$/) {
		$address = $1;
		$comment = $2 if defined $2;
	} elsif ($formatted_email =~ /(\S+\@\S+)(.*)$/) {
		$address = $1;
		$comment = $2 if defined $2;
		$formatted_email =~ s/$address.*$//;
		$name = $formatted_email;
588
		$name = trim($name);
589
590
591
592
593
594
595
596
597
598
599
600
601
602
		$name =~ s/^\"|\"$//g;
		# If there's a name left after stripping spaces and
		# leading quotes, and the address doesn't have both
		# leading and trailing angle brackets, the address
		# is invalid. ie:
		#   "joe smith joe@smith.com" bad
		#   "joe smith <joe@smith.com" bad
		if ($name ne "" && $address !~ /^<[^>]+>$/) {
			$name = "";
			$address = "";
			$comment = "";
		}
	}

603
	$name = trim($name);
604
	$name =~ s/^\"|\"$//g;
605
	$address = trim($address);
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
	$address =~ s/^\<|\>$//g;

	if ($name =~ /[^\w \-]/i) { ##has "must quote" chars
		$name =~ s/(?<!\\)"/\\"/g; ##escape quotes
		$name = "\"$name\"";
	}

	return ($name, $address, $comment);
}

sub format_email {
	my ($name, $address) = @_;

	my $formatted_email;

621
	$name = trim($name);
622
	$name =~ s/^\"|\"$//g;
623
	$address = trim($address);
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638

	if ($name =~ /[^\w \-]/i) { ##has "must quote" chars
		$name =~ s/(?<!\\)"/\\"/g; ##escape quotes
		$name = "\"$name\"";
	}

	if ("$name" eq "") {
		$formatted_email = "$address";
	} else {
		$formatted_email = "$name <$address>";
	}

	return $formatted_email;
}

639
640
641
642
643
644
645
646
647
648
649
650
sub which_conf {
	my ($conf) = @_;

	foreach my $path (split(/:/, ".:$ENV{HOME}:.scripts")) {
		if (-e "$path/$conf") {
			return "$path/$conf";
		}
	}

	return "";
}

651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
sub expand_tabs {
	my ($str) = @_;

	my $res = '';
	my $n = 0;
	for my $c (split(//, $str)) {
		if ($c eq "\t") {
			$res .= ' ';
			$n++;
			for (; ($n % 8) != 0; $n++) {
				$res .= ' ';
			}
			next;
		}
		$res .= $c;
		$n++;
	}

	return $res;
}
671
sub copy_spacing {
672
	(my $res = shift) =~ tr/\t/ /c;
673
674
	return $res;
}
675

676
677
678
679
680
681
682
683
684
685
686
687
688
sub line_stats {
	my ($line) = @_;

	# Drop the diff line leader and expand tabs
	$line =~ s/^.//;
	$line = expand_tabs($line);

	# Pick the indent from the front of the line.
	my ($white) = ($line =~ /^(\s*)/);

	return (length($line), length($white));
}

689
690
691
692
693
694
695
696
697
698
699
my $sanitise_quote = '';

sub sanitise_line_reset {
	my ($in_comment) = @_;

	if ($in_comment) {
		$sanitise_quote = '*/';
	} else {
		$sanitise_quote = '';
	}
}
700
701
702
703
704
705
sub sanitise_line {
	my ($line) = @_;

	my $res = '';
	my $l = '';

706
	my $qlen = 0;
707
708
	my $off = 0;
	my $c;
709

710
711
712
713
714
715
716
717
718
719
720
721
722
723
	# Always copy over the diff marker.
	$res = substr($line, 0, 1);

	for ($off = 1; $off < length($line); $off++) {
		$c = substr($line, $off, 1);

		# Comments we are wacking completly including the begin
		# and end, all to $;.
		if ($sanitise_quote eq '' && substr($line, $off, 2) eq '/*') {
			$sanitise_quote = '*/';

			substr($res, $off, 2, "$;$;");
			$off++;
			next;
724
		}
725
		if ($sanitise_quote eq '*/' && substr($line, $off, 2) eq '*/') {
726
727
728
729
			$sanitise_quote = '';
			substr($res, $off, 2, "$;$;");
			$off++;
			next;
730
		}
731
732
733
734
735
736
737
		if ($sanitise_quote eq '' && substr($line, $off, 2) eq '//') {
			$sanitise_quote = '//';

			substr($res, $off, 2, $sanitise_quote);
			$off++;
			next;
		}
738
739
740
741
742
743
744

		# A \ in a string means ignore the next character.
		if (($sanitise_quote eq "'" || $sanitise_quote eq '"') &&
		    $c eq "\\") {
			substr($res, $off, 2, 'XX');
			$off++;
			next;
745
		}
746
747
748
749
		# Regular quotes.
		if ($c eq "'" || $c eq '"') {
			if ($sanitise_quote eq '') {
				$sanitise_quote = $c;
750

751
752
753
754
755
756
				substr($res, $off, 1, $c);
				next;
			} elsif ($sanitise_quote eq $c) {
				$sanitise_quote = '';
			}
		}
757

758
		#print "c<$c> SQ<$sanitise_quote>\n";
759
760
		if ($off != 0 && $sanitise_quote eq '*/' && $c ne "\t") {
			substr($res, $off, 1, $;);
761
762
		} elsif ($off != 0 && $sanitise_quote eq '//' && $c ne "\t") {
			substr($res, $off, 1, $;);
763
764
765
766
767
		} elsif ($off != 0 && $sanitise_quote && $c ne "\t") {
			substr($res, $off, 1, 'X');
		} else {
			substr($res, $off, 1, $c);
		}
768
769
	}

770
771
772
773
	if ($sanitise_quote eq '//') {
		$sanitise_quote = '';
	}

774
	# The pathname on a #include may be surrounded by '<' and '>'.
775
	if ($res =~ /^.\s*\#\s*include\s+\<(.*)\>/) {
776
777
778
779
		my $clean = 'X' x length($1);
		$res =~ s@\<.*\>@<$clean>@;

	# The whole of a #error is a string.
780
	} elsif ($res =~ /^.\s*\#\s*(?:error|warning)\s+(.*)\b/) {
781
		my $clean = 'X' x length($1);
782
		$res =~ s@(\#\s*(?:error|warning)\s+).*@$1$clean@;
783
784
	}

785
786
787
	return $res;
}

788
789
790
791
792
793
794
sub get_quoted_string {
	my ($line, $rawline) = @_;

	return "" if ($line !~ m/(\"[X]+\")/g);
	return substr($rawline, $-[0], $+[0] - $-[0]);
}

795
796
797
798
799
800
sub ctx_statement_block {
	my ($linenr, $remain, $off) = @_;
	my $line = $linenr - 1;
	my $blk = '';
	my $soff = $off;
	my $coff = $off - 1;
801
	my $coff_set = 0;
802

803
804
	my $loff = 0;

805
806
	my $type = '';
	my $level = 0;
807
	my @stack = ();
808
	my $p;
809
810
	my $c;
	my $len = 0;
811
812

	my $remainder;
813
	while (1) {
814
815
		@stack = (['', 0]) if ($#stack == -1);

816
		#warn "CSB: blk<$blk> remain<$remain>\n";
817
818
819
820
		# If we are about to drop off the end, pull in more
		# context.
		if ($off >= $len) {
			for (; $remain > 0; $line++) {
821
				last if (!defined $lines[$line]);
822
				next if ($lines[$line] =~ /^-/);
823
				$remain--;
824
				$loff = $len;
825
				$blk .= $lines[$line] . "\n";
826
827
828
829
830
831
				$len = length($blk);
				$line++;
				last;
			}
			# Bail if there is no further context.
			#warn "CSB: blk<$blk> off<$off> len<$len>\n";
832
			if ($off >= $len) {
833
834
				last;
			}
835
836
837
838
			if ($level == 0 && substr($blk, $off) =~ /^.\s*#\s*define/) {
				$level++;
				$type = '#';
			}
839
		}
840
		$p = $c;
841
		$c = substr($blk, $off, 1);
842
		$remainder = substr($blk, $off);
843

844
		#warn "CSB: c<$c> type<$type> level<$level> remainder<$remainder> coff_set<$coff_set>\n";
845
846
847
848
849
850
851
852
853
854

		# Handle nested #if/#else.
		if ($remainder =~ /^#\s*(?:ifndef|ifdef|if)\s/) {
			push(@stack, [ $type, $level ]);
		} elsif ($remainder =~ /^#\s*(?:else|elif)\b/) {
			($type, $level) = @{$stack[$#stack - 1]};
		} elsif ($remainder =~ /^#\s*endif\b/) {
			($type, $level) = @{pop(@stack)};
		}

855
856
857
858
859
860
		# Statement ends at the ';' or a close '}' at the
		# outermost level.
		if ($level == 0 && $c eq ';') {
			last;
		}

861
		# An else is really a conditional as long as its not else if
862
863
864
865
866
867
868
869
		if ($level == 0 && $coff_set == 0 &&
				(!defined($p) || $p =~ /(?:\s|\}|\+)/) &&
				$remainder =~ /^(else)(?:\s|{)/ &&
				$remainder !~ /^else\s+if\b/) {
			$coff = $off + length($1) - 1;
			$coff_set = 1;
			#warn "CSB: mark coff<$coff> soff<$soff> 1<$1>\n";
			#warn "[" . substr($blk, $soff, $coff - $soff + 1) . "]\n";
870
871
		}

872
873
874
875
876
877
878
879
880
881
		if (($type eq '' || $type eq '(') && $c eq '(') {
			$level++;
			$type = '(';
		}
		if ($type eq '(' && $c eq ')') {
			$level--;
			$type = ($level != 0)? '(' : '';

			if ($level == 0 && $coff < $soff) {
				$coff = $off;
882
883
				$coff_set = 1;
				#warn "CSB: mark coff<$coff>\n";
884
885
886
887
888
889
890
891
892
893
894
			}
		}
		if (($type eq '' || $type eq '{') && $c eq '{') {
			$level++;
			$type = '{';
		}
		if ($type eq '{' && $c eq '}') {
			$level--;
			$type = ($level != 0)? '{' : '';

			if ($level == 0) {
895
896
897
				if (substr($blk, $off + 1, 1) eq ';') {
					$off++;
				}
898
899
900
				last;
			}
		}
901
902
903
904
905
906
907
		# Preprocessor commands end at the newline unless escaped.
		if ($type eq '#' && $c eq "\n" && $p ne "\\") {
			$level--;
			$type = '';
			$off++;
			last;
		}
908
909
		$off++;
	}
910
	# We are truly at the end, so shuffle to the next line.
911
	if ($off == $len) {
912
		$loff = $len + 1;
913
914
915
		$line++;
		$remain--;
	}
916
917
918
919
920
921
922

	my $statement = substr($blk, $soff, $off - $soff + 1);
	my $condition = substr($blk, $soff, $coff - $soff + 1);

	#warn "STATEMENT<$statement>\n";
	#warn "CONDITION<$condition>\n";

923
	#print "coff<$coff> soff<$off> loff<$loff>\n";
924
925
926
927
928

	return ($statement, $condition,
			$line, $remain + 1, $off - $loff + 1, $level);
}

929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
sub statement_lines {
	my ($stmt) = @_;

	# Strip the diff line prefixes and rip blank lines at start and end.
	$stmt =~ s/(^|\n)./$1/g;
	$stmt =~ s/^\s*//;
	$stmt =~ s/\s*$//;

	my @stmt_lines = ($stmt =~ /\n/g);

	return $#stmt_lines + 2;
}

sub statement_rawlines {
	my ($stmt) = @_;

	my @stmt_lines = ($stmt =~ /\n/g);

	return $#stmt_lines + 2;
}

sub statement_block_size {
	my ($stmt) = @_;

	$stmt =~ s/(^|\n)./$1/g;
	$stmt =~ s/^\s*{//;
	$stmt =~ s/}\s*$//;
	$stmt =~ s/^\s*//;
	$stmt =~ s/\s*$//;

	my @stmt_lines = ($stmt =~ /\n/g);
	my @stmt_statements = ($stmt =~ /;/g);

	my $stmt_lines = $#stmt_lines + 2;
	my $stmt_statements = $#stmt_statements + 1;

	if ($stmt_lines > $stmt_statements) {
		return $stmt_lines;
	} else {
		return $stmt_statements;
	}
}

972
973
974
975
976
977
sub ctx_statement_full {
	my ($linenr, $remain, $off) = @_;
	my ($statement, $condition, $level);

	my (@chunks);

978
	# Grab the first conditional/block pair.
979
980
	($statement, $condition, $linenr, $remain, $off, $level) =
				ctx_statement_block($linenr, $remain, $off);
981
	#print "F: c<$condition> s<$statement> remain<$remain>\n";
982
983
984
985
986
987
988
	push(@chunks, [ $condition, $statement ]);
	if (!($remain > 0 && $condition =~ /^\s*(?:\n[+-])?\s*(?:if|else|do)\b/s)) {
		return ($level, $linenr, @chunks);
	}

	# Pull in the following conditional/block pairs and see if they
	# could continue the statement.
989
990
991
	for (;;) {
		($statement, $condition, $linenr, $remain, $off, $level) =
				ctx_statement_block($linenr, $remain, $off);
992
		#print "C: c<$condition> s<$statement> remain<$remain>\n";
993
		last if (!($remain > 0 && $condition =~ /^(?:\s*\n[+-])*\s*(?:else|do)\b/s));
994
995
		#print "C: push\n";
		push(@chunks, [ $condition, $statement ]);
996
997
998
	}

	return ($level, $linenr, @chunks);
999
1000
}

1001
sub ctx_block_get {
1002
	my ($linenr, $remain, $outer, $open, $close, $off) = @_;
1003
1004
1005
1006
1007
1008
1009
	my $line;
	my $start = $linenr - 1;
	my $blk = '';
	my @o;
	my @c;
	my @res = ();

1010
	my $level = 0;
1011
	my @stack = ($level);
1012
1013
1014
1015
1016
	for ($line = $start; $remain > 0; $line++) {
		next if ($rawlines[$line] =~ /^-/);
		$remain--;

		$blk .= $rawlines[$line];
1017
1018

		# Handle nested #if/#else.
1019
		if ($lines[$line] =~ /^.\s*#\s*(?:ifndef|ifdef|if)\s/) {
1020
			push(@stack, $level);
1021
		} elsif ($lines[$line] =~ /^.\s*#\s*(?:else|elif)\b/) {
1022
			$level = $stack[$#stack - 1];
1023
		} elsif ($lines[$line] =~ /^.\s*#\s*endif\b/) {
1024
1025
1026
			$level = pop(@stack);
		}

1027
		foreach my $c (split(//, $lines[$line])) {
1028
1029
1030
1031
1032
			##print "C<$c>L<$level><$open$close>O<$off>\n";
			if ($off > 0) {
				$off--;
				next;
			}
1033

1034
1035
1036
1037
1038
1039
1040
			if ($c eq $close && $level > 0) {
				$level--;
				last if ($level == 0);
			} elsif ($c eq $open) {
				$level++;
			}
		}
1041

1042
		if (!$outer || $level <= 1) {
1043
			push(@res, $rawlines[$line]);
1044
1045
		}

1046
		last if ($level == 0);
1047
1048
	}

1049
	return ($level, @res);
1050
1051
1052
1053
}
sub ctx_block_outer {
	my ($linenr, $remain) = @_;

1054
1055
	my ($level, @r) = ctx_block_get($linenr, $remain, 1, '{', '}', 0);
	return @r;
1056
1057
1058
1059
}
sub ctx_block {
	my ($linenr, $remain) = @_;

1060
1061
	my ($level, @r) = ctx_block_get($linenr, $remain, 0, '{', '}', 0);
	return @r;
1062
1063
}
sub ctx_statement {
1064
1065
1066
1067
1068
1069
	my ($linenr, $remain, $off) = @_;

	my ($level, @r) = ctx_block_get($linenr, $remain, 0, '(', ')', $off);
	return @r;
}
sub ctx_block_level {
1070
1071
	my ($linenr, $remain) = @_;

1072
	return ctx_block_get($linenr, $remain, 0, '{', '}', 0);
1073
}
1074
1075
1076
1077
1078
sub ctx_statement_level {
	my ($linenr, $remain, $off) = @_;

	return ctx_block_get($linenr, $remain, 0, '(', ')', $off);
}
1079
1080
1081
1082
1083

sub ctx_locate_comment {
	my ($first_line, $end_line) = @_;

	# Catch a comment on the end of the line itself.
1084
	my ($current_comment) = ($rawlines[$end_line - 1] =~ m@.*(/\*.*\*/)\s*(?:\\\s*)?$@);
1085
1086
1087
1088
1089
1090
1091
	return $current_comment if (defined $current_comment);

	# Look through the context and try and figure out if there is a
	# comment.
	my $in_comment = 0;
	$current_comment = '';
	for (my $linenr = $first_line; $linenr < $end_line; $linenr++) {
1092
1093
		my $line = $rawlines[$linenr - 1];
		#warn "           $line\n";
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
		if ($linenr == $first_line and $line =~ m@^.\s*\*@) {
			$in_comment = 1;
		}
		if ($line =~ m@/\*@) {
			$in_comment = 1;
		}
		if (!$in_comment && $current_comment ne '') {
			$current_comment = '';
		}
		$current_comment .= $line . "\n" if ($in_comment);
		if ($line =~ m@\*/@) {
			$in_comment = 0;
		}
	}

	chomp($current_comment);
	return($current_comment);
}
sub ctx_has_comment {
	my ($first_line, $end_line) = @_;
	my $cmt = ctx_locate_comment($first_line, $end_line);

1116
	##print "LINE: $rawlines[$end_line - 1 ]\n";
1117
1118
1119
1120
1121
	##print "CMMT: $cmt\n";

	return ($cmt ne '');
}

1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
sub raw_line {
	my ($linenr, $cnt) = @_;

	my $offset = $linenr - 1;
	$cnt++;

	my $line;
	while ($cnt) {
		$line = $rawlines[$offset++];
		next if (defined($line) && $line =~ /^-/);
		$cnt--;
	}

	return $line;
}

1138
1139
1140
sub cat_vet {
	my ($vet) = @_;
	my ($res, $coded);
1141

1142
1143
1144
1145
1146
1147
	$res = '';
	while ($vet =~ /([^[:cntrl:]]*)([[:cntrl:]]|$)/g) {
		$res .= $1;
		if ($2 ne '') {
			$coded = sprintf("^%c", unpack('C', $2) + 64);
			$res .= $coded;
1148
1149
		}
	}
1150
	$res =~ s/$/\$/;
1151

1152
	return $res;
1153
1154
}

1155
my $av_preprocessor = 0;
1156
my $av_pending;
1157
my @av_paren_type;
1158
my $av_pend_colon;
1159
1160
1161

sub annotate_reset {
	$av_preprocessor = 0;
1162
1163
	$av_pending = '_';
	@av_paren_type = ('E');
1164
	$av_pend_colon = 'O';
1165
1166
}

1167
1168
sub annotate_values {
	my ($stream, $type) = @_;
1169

1170
	my $res;
1171
	my $var = '_' x length($stream);
1172
1173
	my $cur = $stream;

1174
	print "$stream\n" if ($dbg_values > 1);
1175
1176

	while (length($cur)) {
1177
		@av_paren_type = ('E') if ($#av_paren_type < 0);
1178
		print " <" . join('', @av_paren_type) .
1179
				"> <$type> <$av_pending>" if ($dbg_values > 1);
1180
		if ($cur =~ /^(\s+)/o) {
1181
1182
			print "WS($1)\n" if ($dbg_values > 1);
			if ($1 =~ /\n/ && $av_preprocessor) {
1183
				$type = pop(@av_paren_type);
1184
				$av_preprocessor = 0;
1185
1186
			}

1187
		} elsif ($cur =~ /^(\(\s*$Type\s*)\)/ && $av_pending eq '_') {
1188
1189
			print "CAST($1)\n" if ($dbg_values > 1);
			push(@av_paren_type, $type);
1190
			$type = 'c';
1191

1192
		} elsif ($cur =~ /^($Type)\s*(?:$Ident|,|\)|\(|\s*$)/) {
1193
			print "DECLARE($1)\n" if ($dbg_values > 1);
1194
1195
			$type = 'T';