+
+set_perl_main () {
+ adjust_rx='
+s,\\(,\\(?:,g;
+s,\\\([{(|)}?+]\),\1,g;
+'
+
+ # Add $ before arguments
+ set `echo "$@" | sed 's,\(^\|= *\),&$,g'`
+
+ cat >> "$scriptname" <<\EOF
+#! /usr/bin/perl
+
+use strict;
+use warnings;
+
+# Should we replace blobs and false positives with replacement?
+my $replace_blob = 0;
+my $replace_falsepos = 0;
+my $replacement = '/*(DEBLOBBED)*/';
+
+# Should we print lines containing blobs, false positives, and neither?
+my $print_blob = 0;
+my $with_context = 0;
+my $print_falsepos = 0;
+my $print_nomatch = 0;
+
+# Should we print the input stack and exit if we find blobs or false positives?
+my $list_blob = 0;
+my $list_falsepos = 0;
+
+# Should we forget everything we know about false positives?
+my $falsepos;
+my $no_falsepos = 0;
+
+EOF
+
+ cat >> "$scriptname" <<EOF
+my \$verbose = $vp;
+
+# Which of the defaults above should we override?
+$@ = 1;
+
+EOF
+
+ ${SED-sed} -n 's,^[+],,p' < "$regex_name" |
+ ${SED-sed} -n -e "$adjust_rx" -e 's,\^,,' \
+ -e '1h; 1!H; $ { g; s,[\n],|,g; '"\
+s,^\\(.*\\)\$,\$falsepos = qr'(?<falsepos>\\1)'ms;,;\
+"' p;}' >> "$scriptname"
+
+ ${SED-sed} -n 's,^[-],,p' < "$regex_name" |
+ ${SED-sed} -n -e "$adjust_rx" \
+ -e '1h; 1!H; $ { g; s,[\n],|,g; '"\
+s,^\\(.*\\)\$,my \$blob = qr'(?<blob>\\1)'ms;,;\
+"' p;}' >> "$scriptname"
+
+ echo "\\($initblob\\|$defineblob\\|$asmblob\\)" |
+ ${SED-sed} -e "$adjust_rx" \
+ -e "s,^\\(.*\\)\$,my \$cblob = qr'(?<cblob>\\1)'ms if \$with_context;," >> "$scriptname"
+
+ cat >> "$scriptname" <<\EOF
+
+$falsepos = qr/(?<falsepos>(?!))/ if $no_falsepos || ! defined $falsepos;
+
+my $rx = qr/^$falsepos|$blob/ms;
+
+$rx = qr/$rx|^$cblob/ms if $with_context;
+
+my @filenames;
+my $nfilenames = 0;
+my $nextnfilenames;
+
+my $s = '';
+
+while (<STDIN>) {
+ # Read into s all lines between begin and end. An empty line, without
+ # even the '\n', flags the end of the input.
+ if (m:^[;][/][*](begin|end) (.*)[*][/][;]$:) {
+ if ($1 eq 'begin') {
+ print "entering $2\n" if $verbose;
+ $filenames[$nfilenames] = $2;
+ $nextnfilenames = $nfilenames + 1;
+ if ($s eq '') {
+ $nfilenames = $nextnfilenames;
+ next;
+ }
+ } else {
+ $nextnfilenames = $nfilenames - 1;
+ print "processing $filenames[$nextnfilenames]\n" if $verbose;
+ }
+ } else {
+ $s .= $_;
+ next;
+ }
+
+ if ($verbose) {
+ print "looking for matches in\n";
+ for (my $i = $nfilenames; --$i > 0; ) {
+ print $filenames[$i], " within\n";
+ }
+ print $filenames[0], "\n";
+ }
+
+ $s =~ s/[\n]$//;
+
+ my $pp = my $p = 0;
+
+ my $matchfound = substr ($s, $p) =~ /$rx/o;
+ while ($matchfound) {
+ print "found first match\n" if $verbose;
+ my $firstmatchstart = $-[0] + $p;
+ my $blobs = my $falses = 0;
+ my $matchstart = $-[0] + $p;
+ my $pend = -1;
+ my $blob_p;
+ do {{
+ my $matchend = $+[0] + $p;
+ print "found match $matchstart..$matchend\n" if $verbose;
+ print "$&" if $verbose > 1;
+
+ if (defined $+{'cblob'}) {
+ print "match is a blob context\n" if ($verbose);
+ $pend = index ($s, "\n", $matchend) + 1;
+ $pend = length $s if !$pend;
+ }
+
+ if (defined $+{'falsepos'}) {
+ print "match is a false positive\n" if ($verbose);
+ # $matchend -= $+[0] - $+[1];
+ $blob_p = 0;
+ $falses++;
+ } elsif (defined $+{'blob'}) {
+ $blob_p = 1;
+ $blobs++;
+ print "match is a blob at $matchstart\n" if ($verbose);
+ } else {
+ $blob_p = 2;
+ $p = $matchstart;
+ print "searching up to $pend\n" if $verbose;
+ next;
+ }
+
+ if ($blob_p ? $replace_blob : $replace_falsepos) {
+ substr ($s, $matchstart, $matchend - $matchstart,
+ $replacement);
+ $p = $matchstart + length $replacement;
+ $pend += $p - $matchend if $pend >= $matchstart;
+ } else {
+ $p = $matchend;
+ }
+
+ $pend = index ($s, "\n", $p) + 1 if $p >= $pend;
+ $pend = length $s if !$pend;
+ print "searching up to $pend\n" if $verbose;
+ $p--;
+ }} while (($matchfound = (substr ($s, $p) =~ /(?<=.)$rx/mso))
+ && ($matchstart = $-[0] + $p) < $pend
+ && !($blob_p
+ ? (!$print_blob && !$falses)
+ : (!$print_falsepos && !$blobs)));
+
+ print "last match before $pend\n" if $verbose;
+
+ if ($print_nomatch) {
+ print substr ($s, $pp, $firstmatchstart - $pp);
+ $pp = $firstmatchstart;
+ } elsif (($print_blob || $print_falsepos) && $firstmatchstart > 0) {
+ $pp = rindex ($s, "\n", $firstmatchstart - 1) + 1;
+ }
+
+ if (($print_blob && $blobs) || ($print_falsepos && $falses)) {
+ if (!$print_nomatch) {
+ for (my $i = $nfilenames; $i-- > 0;) {
+ print "::: ", $filenames[$i], " :::\n";
+ }
+ }
+
+ print substr ($s, $pp, $pend - $pp);
+ $pp = $pend;
+ }
+
+ if (($list_blob && $blobs) || ($list_falsepos && $falses)) {
+ for (my $i = $nfilenames; --$i > 0;) {
+ print $filenames[$i], " within ";
+ }
+ print $filenames[0], "\n";
+ exit (1);
+ }
+ }
+
+ print substr ($s, $pp) if $print_nomatch;
+
+ print "no further matches\n" if $verbose;
+
+ $s = '';
+ $nfilenames = $nextnfilenames;
+}
+
+exit (0);
+EOF
+
+ scriptcmd="${PERL-perl} "'"$scriptname"'
+}
+
+set_awk_main () {
+ adjust_rx='
+s,[$]$,([\\n]|$),;
+s,\[^\],[^\\],g;
+s,\\\([{(|)}?+]\),\1,g;
+'
+
+ case " = $@ = " in
+ *" = no_falsepos = "*) falsepos='$.^';;
+ *) falsepos=`
+ ${SED-sed} -n 's,^[+],,p' < "$regex_name" |
+ ${SED-sed} -n -e "$adjust_rx" -e 's,\^,,' \
+ -e '1h; 1!H; $ { g; s,[\n],|,g; p;}'
+ `
+ case $falsepos in "") falsepos='$.^';; esac;;
+ esac
+
+ blob=`
+ ${SED-sed} -n 's,^[-],,p' < "$regex_name" |
+ ${SED-sed} -n -e "$adjust_rx" \
+ -e '1h; 1!H; $ { g; s,[\n],|,g; p;}'`
+
+ case " = $@ = " in
+ *" = with_context = "*) cblob=`
+ $echo "\\($initblob\\|$defineblob\\|$asmblob\\)" |
+ ${SED-sed} -e "$adjust_rx"
+ `;;
+ *) cblob='$.^';;
+ esac
+
+ xrs= nrs="# " eor="RT" eormatch='RT ~ ' eornl='[\n]' eornlsz=1
+ # Uncomment the line below to disable the use of a regular
+ # expression for the awk Record Separator, a GNU awk extension.
+ # Using this extension appears to save a lot of memory for long
+ # deblob-check runs.
+ # xrs="# " nrs= eor='$0' eormatch='' eornl= eornlsz=0
+
+ cat >> "$scriptname" <<EOF
+#! /bin/gawk --re-interval -f
+
+BEGIN {
+ # Should we replace blobs and false positives with replacement?
+ replace_blob = 0;
+ replace_falsepos = 0;
+ replacement = "/*(DEBLOBBED)*/";
+
+ # Should we print lines containing blobs, false positives, and neither?
+ print_blob = 0;
+ with_context = 0;
+ print_falsepos = 0;
+ print_nomatch = 0;
+
+ # Should we print the input stack and exit if we find blobs or
+ # false positives?
+ list_blob = 0;
+ list_falsepos = 0;
+
+ # Should we forget everything we know about false positives?
+ no_falsepos = 0;
+
+ verbose = $vp;
+
+ nfilenames = 0;
+ s = "\n";
+
+ # Which of the defaults above should we override?
+ $@ = 1;
+
+ # requires GNU awk RS extension:
+$xrs RS = "[;][/][*](begin|end) [^\n]*[*][/][;][\n]";
+}
+# requires GNU awk RS extension:
+$xrs { s = s \$0; }
+# does not require GNU awk RS extension:
+$nrs !/^[;][/][*].*[*][/][;]$/ {
+$nrs s = s \$0 "\n";
+$nrs next;
+$nrs }
+$eormatch /^[;][/][*]begin .*[*][/][;]$eornl$/ {
+ filenames[nfilenames] = substr($eor, 10, length ($eor) - 12 - $eornlsz);
+ if (verbose) print "entering " nfilenames ": " filenames[nfilenames];
+ nextnfilenames = nfilenames + 1;
+ if (s == "\n") {
+ nfilenames = nextnfilenames;
+ next;
+ }
+}
+$eormatch /^[;][/][*]end .*[*][/][;]$eornl$/ {
+ nextnfilenames = nfilenames - 1;
+ if (verbose)
+ print "got to the end of " nextnfilenames ": " filenames[nextnfilenames];
+}
+{
+ if (verbose) {
+ print "looking for matches";
+ for (i = nfilenames; --i > 0;)
+ print filenames[i] " within";
+ print filenames[0]
+ }
+
+ s = substr (s, 1, length (s) - 1)
+
+ pp = 2;
+ p = pend = 1;
+ if (verbose > 1) print "searching starting at", substr (s, p, 10)
+ matchfound = match (substr (s, p),
+ /[\n]($falsepos)|[\n]($cblob)|.($blob)/);
+ while (matchfound) {
+ blobs = falses = 0;
+ firstmatchstart = RSTART + p;
+ for (;;) {
+ matchstart = RSTART + p - 1;
+ matchlen = RLENGTH;
+ if (verbose) {
+ print "found match", matchstart, matchlen;
+ if (verbose > 1)
+ print substr (s, matchstart + 1, matchlen - 1);
+ }
+
+ if (match (substr (s, matchstart, matchlen), /^[\n]($falsepos)/) == 1) {
+ matchlen = RLENGTH;
+ if (verbose) print "match is a false positive of length", matchlen;
+ blob_p = 0;
+ falses++;
+ } else if (match (substr (s, matchstart, matchlen), /^.($blob)/) == 1) {
+ matchlen = RLENGTH;
+ if (verbose) print "match is a blob of length", matchlen;
+ blob_p = 1;
+ blobs++;
+ } else if (match (substr (s, matchstart, matchlen), /^[\n]($cblob)$/) == 1) {
+ if (verbose) print "match is a blob context";
+ pend = index (substr (s, matchstart + matchlen), "\n");
+ if (pend)
+ pend += matchstart + matchlen;
+ else
+ pend = length (s);
+ p = matchstart + 1;
+ blob_p = 2;
+ if (verbose > 1) print "range is:", substr (s, p, pend - p);
+ }
+
+ if (blob_p < 2) {
+ if (blob_p ? replace_blob : replace_falsepos) {
+ s = substr (s, 1, matchstart) \\
+ replacement \\
+ substr (s, matchstart + matchlen);
+ p = matchstart + length (replacement) - 1;
+ pend += (p + 1 - matchstart - matchlen);
+ } else
+ p = matchstart + matchlen - 1;
+
+ if (p >= pend) {
+ i = index (substr (s, p + 1), "\n");
+ if (i)
+ pend = p + 1 + i;
+ else
+ pend = length (s)
+ }
+ }
+
+ if (verbose) print "search until", pend;
+
+ if (!(matchfound = match (substr (s, p),
+ /[\n]($falsepos)|[\n]($cblob)|.($blob)/)) ||
+ p + RSTART >= pend ||
+ (blob_p ?
+ (!print_blob && !falses) :
+ (!print_falsepos && !blobs)))
+ break;
+ }
+
+ if (print_nomatch)
+ printf "%s", substr (s, pp, firstmatchstart - pp);
+ else if (print_blob || print_falsepos) {
+ lastline = substr (s, pp, firstmatchstart - pp);
+ sub (/.*[\n]/, "", lastline);
+ if (verbose) print "lastline: " lastline "\\\\n"
+ firstmatchstart -= length (lastline);
+ }
+ pp = firstmatchstart;
+
+ if (verbose) print "match set range:", pp, pend
+
+ if ((print_blob && blobs) || (print_falsepos && falses)) {
+ if (!print_nomatch)
+ for (i = nfilenames; i-- > 0;)
+ print "::: " filenames[i] " :::";
+ printf "%s", substr (s, pp, pend - pp);
+ pp = pend;
+ }
+
+ if ((list_blob && blobs) || (list_falsepos && falses)) {
+ for (i = nfilenames; --i > 0;)
+ print filenames[i] " within";
+ print filenames[0];
+ exit (1);
+ }
+ }
+
+ if (print_nomatch)
+ printf "%s", substr (s, pp)
+
+ if (verbose)
+ print "no further matches";
+
+ s = "\n";
+ nfilenames = nextnfilenames;
+ next;
+}
+EOF
+
+ scriptcmd="${AWK-gawk} --re-interval -f "'"$scriptname"'
+}
+
+set_flex_main () {
+ adjust_rx='