+" > "$scriptname"
+
+ scriptcmd='${SED-sed} -n -f "$scriptname"'
+
+ case $vp in
+ [01]) xv= ;;
+ 2) xv='# ';;
+ esac
+
+ sedunbreak='
+: restart
+/[/][*](DEBLOB-$/ {
+ N
+ /[/][*](DEBLOB-[\n]ERROR)[*][/]/{q 1;}'"
+$xv"'s,[/][*](DEBLOB-[\n]BED)[*][/],,
+ b restart
+}
+p
+'
+ scriptcmd2='${SED-sed} -n -e "$sedunbreak"'
+}
+
+set_flex_main () {
+ adjust_rx='
+s,\\\([{(|)}?+]\),\1,g
+s,^\([-+]\)\(\^\?\)\(.*\)\(\$\?\)$,\2(?s:\3)\4\1,g
+s,[+]$, { falsepos (); },
+s,[-]$, { blob (); },
+'
+
+ echo '%%' > "$scriptname"
+ ${SED-sed} "$adjust_rx" < "$regex_name" >> "$scriptname"
+ echo '\n|. { unmatched (); }
+%%
+int falsepos () {}
+int blob () {}
+int unmatched () {}
+' >> "$scriptname"
+
+ scriptcmd=false
+}
+
+set_python_main () {
+ adjust_rx='
+s,\\(,\\(?:,g;
+s,\\\([{(|)}?+]\),\1,g;
+'
+
+ cat >> "$scriptname" <<EOF
+#! /usr/bin/python
+
+import sys
+import re
+
+# Should we replace blobs and false positives with replacement?
+replace_blob = 0
+replace_falsepos = 0
+replacement = '/*(DEBLOBBED)*/'
+
+# Should we print lines containing blobs, false positives, and neither?
+print_blob = 0
+with_context = 0
+print_falsepos = 0
+print_nomatch = 0
+
+# Should we print the input stack if we find blobs or false positives?
+list_blob = 0
+list_falsepos = 0
+
+# Should we forget everything we know about false positives?
+falsepos = None
+no_falsepos = 0
+
+verbose = $vp
+
+# Which of the defaults above should we override?
+$@ = 1
+
+EOF
+
+ if test "X$DEBLOB_CHECK_PYTHON_REGEX" = Xdebug; then
+ ${SED-sed} -e 's,^[+-],,' -e "$adjust_rx" \
+ -e "s,.*,re.compile (r'&'),g" \
+ < "$regex_name" >> "$scriptname"
+ fi
+
+ ${SED-sed} -n 's,^[+],,p' < "$regex_name" |
+ ${SED-sed} -n -e "$adjust_rx" -e 's,\^,,' \
+ -e '1h; 1!H; $ { g; s,[\n],|,g; '"\
+s,^\\(.*\\)\$,falsepos = r'(?P<falsepos>\\1)',;\
+"' p;}' >> "$scriptname"
+
+ ${SED-sed} -n 's,^[-],,p' < "$regex_name" |
+ ${SED-sed} -n -e "$adjust_rx" \
+ -e '1h; 1!H; $ { g; s,[\n],|,g; '"\
+s,^\\(.*\\)\$,blob = r'(?P<blob>\\1)',;\
+"' p;}' >> "$scriptname"
+
+ echo "\\($initblob\\|$defineblob\\|$asmblob\\)" |
+ ${SED-sed} -e "$adjust_rx" \
+ -e "s,^\\(.*\\)\$,cblob = r'(?P<cblob>\\1)'," >> "$scriptname"
+
+ cat >> "$scriptname" <<\EOF
+
+if no_falsepos or falsepos is None:
+ falsepos = r'(?!)'
+
+rx = '^%s|%s' % (falsepos, blob)
+
+if with_context:
+ rx += '|^' + cblob
+
+rxc = re.compile('(?<=.)(?:%s)' % rx, re.M | re.S)
+
+filenames = None
+
+s = '\n'
+
+for line in sys.stdin:
+ # Read into s all lines between begin and end. An empty line, without
+ # even the '\n', flags the end of the input.
+ if line[:3] == ';/*' and line[-4:] == '*/;\n':
+ if line[3:9] == 'begin ':
+ nextfilenames = (line[9:-4], filenames)
+ if s == '\n':
+ filenames = nextfilenames
+ del nextfilenames
+ continue
+ elif line[3:7] == 'end ':
+ #if print_blob and not print_nomatch:
+ # from time import time
+ # sys.stderr.write('%i %i %s\n' % (time(), len(s), filenames[0]))
+ assert line[7:-4] == filenames[0]
+ nextfilenames = filenames[1]
+ else:
+ assert filenames != None
+ s += line
+ continue
+ else:
+ assert filenames != None
+ s += line
+ continue
+
+ if verbose:
+ print 'looking for matches'
+ sfilenames = filenames
+ while filenames != None:
+ if filenames[1] is None:
+ print filenames[0]
+ else:
+ print filenames[0] + ' within'
+ filenames = filenames[1]
+ filenames = sfilenames
+
+ if s[-1] == '\n':
+ s = s[:-1]
+
+ pp = 1
+ p = pend = 0
+ match = rxc.search (s, p)
+ while match != None:
+ firstmatch = match
+ blobs = falses = 0
+ while 1:
+ if verbose:
+ print 'found match'
+ what = match.lastgroup
+
+ if what == 'cblob':
+ if verbose: print 'match is a blob context'
+ pend = s.find ('\n', match.end()) + 1
+ if pend == 0:
+ pend = len(s)
+ p = match.start() + 1
+ blob_p = 2
+ else:
+ blob_p = what == 'blob'
+ assert blob_p or what == 'falsepos'
+
+ if blob_p:
+ if verbose: print 'match is a blob'
+ blobs += 1
+ else:
+ if verbose: print 'match is a false positive'
+ falses += 1
+
+ if blob_p and replace_blob or not blob_p and replace_falsepos:
+ s = s[:match.start(what)] + replacement + s[match.end(what):]
+ p = match.start(what) + len(replacement)
+ if pend > match.start(what):
+ pend += p - match.end(what)
+ else:
+ p = match.end(what)
+
+ if p > pend:
+ pend = s.find ('\n', p) + 1
+ if (pend == 0):
+ pend = len(s)
+
+ match = rxc.search (s, p)
+ if match is None or match.start () >= pend or \
+ (blob_p and not print_blob and not falses) or \
+ (not blob_p and not print_falsepos and not blobs):
+ break
+
+ if print_nomatch:
+ sys.stdout.write (s[pp:firstmatch.start() + 1])
+ pp = firstmatch.start() + 1
+ else:
+ pp = s.rfind ('\n', 0, firstmatch.start () + 1) + 1
+
+ if print_blob and blobs or print_falsepos and falses:
+ if not print_nomatch:
+ sfilenames = filenames
+ while filenames != None:
+ print '::: ' + filenames[0] + ' :::'
+ filenames = filenames[1]
+ filenames = sfilenames
+ sys.stdout.write (s[pp:pend])
+ pp = pend
+
+ if list_blob and blobs or list_falsepos and falses:
+ while filenames != None:
+ if filenames[1] is None:
+ print filenames[0]
+ else:
+ print filenames[0] + ' within'
+ filenames = filenames[1]
+ exit (1)
+
+ if print_nomatch:
+ sys.stdout.write(s[pp:])
+
+ if verbose:
+ print 'no further matches'
+
+ s = '\n'
+ filenames = nextfilenames
+ del nextfilenames
+
+assert filenames is None
+
+exit (0)
+EOF
+
+ scriptcmd="${PYTHON-python} "'"$scriptname"'
+}
+
+set_perl_main () {
+ adjust_rx='
+s,\\(,\\(?:,g;
+s,\\\([{(|)}?+]\),\1,g;
+'
+
+ # Add $ before arguments
+ set `echo "$@" | sed 's,\(^\|= *\),&$,g'`
+
+ cat >> "$scriptname" <<\EOF
+#! /usr/bin/perl
+
+use strict;
+use warnings;
+
+# Should we replace blobs and false positives with replacement?
+my $replace_blob = 0;
+my $replace_falsepos = 0;
+my $replacement = '/*(DEBLOBBED)*/';
+
+# Should we print lines containing blobs, false positives, and neither?
+my $print_blob = 0;
+my $with_context = 0;
+my $print_falsepos = 0;
+my $print_nomatch = 0;
+
+# Should we print the input stack and exit if we find blobs or false positives?
+my $list_blob = 0;
+my $list_falsepos = 0;
+
+# Should we forget everything we know about false positives?
+my $falsepos;
+my $no_falsepos = 0;
+
+EOF
+
+ cat >> "$scriptname" <<EOF
+my \$verbose = $vp;
+
+# Which of the defaults above should we override?
+$@ = 1;
+
+EOF
+
+ ${SED-sed} -n 's,^[+],,p' < "$regex_name" |
+ ${SED-sed} -n -e "$adjust_rx" -e 's,\^,,' \
+ -e '1h; 1!H; $ { g; s,[\n],|,g; '"\
+s,^\\(.*\\)\$,\$falsepos = qr'(?<falsepos>\\1)'ms;,;\
+"' p;}' >> "$scriptname"
+
+ ${SED-sed} -n 's,^[-],,p' < "$regex_name" |
+ ${SED-sed} -n -e "$adjust_rx" \
+ -e '1h; 1!H; $ { g; s,[\n],|,g; '"\
+s,^\\(.*\\)\$,my \$blob = qr'(?<blob>\\1)'ms;,;\
+"' p;}' >> "$scriptname"
+
+ echo "\\($initblob\\|$defineblob\\|$asmblob\\)" |
+ ${SED-sed} -e "$adjust_rx" \
+ -e "s,^\\(.*\\)\$,my \$cblob = qr'(?<cblob>\\1)'ms if \$with_context;," >> "$scriptname"
+
+ cat >> "$scriptname" <<\EOF
+
+$falsepos = qr/(?<falsepos>(?!))/ if $no_falsepos || ! defined $falsepos;
+
+my $rx = qr/^$falsepos|$blob/ms;
+
+$rx = qr/$rx|^$cblob/ms if $with_context;
+
+my @filenames;
+my $nfilenames = 0;
+my $nextnfilenames;
+
+my $s = '';
+
+while (<STDIN>) {
+ # Read into s all lines between begin and end. An empty line, without
+ # even the '\n', flags the end of the input.
+ if (m:^[;][/][*](begin|end) (.*)[*][/][;]$:) {
+ if ($1 eq 'begin') {
+ print "entering $2\n" if $verbose;
+ $filenames[$nfilenames] = $2;
+ $nextnfilenames = $nfilenames + 1;
+ if ($s eq '') {
+ $nfilenames = $nextnfilenames;
+ next;
+ }
+ } else {
+ $nextnfilenames = $nfilenames - 1;
+ print "processing $filenames[$nextnfilenames]\n" if $verbose;
+ }
+ } else {
+ $s .= $_;
+ next;
+ }
+
+ if ($verbose) {
+ print "looking for matches in\n";
+ for (my $i = $nfilenames; --$i > 0; ) {
+ print $filenames[$i], " within\n";
+ }
+ print $filenames[0], "\n";
+ }
+
+ $s =~ s/[\n]$//;
+
+ my $pp = my $p = 0;
+
+ my $matchfound = substr ($s, $p) =~ /$rx/o;
+ while ($matchfound) {
+ print "found first match\n" if $verbose;
+ my $firstmatchstart = $-[0] + $p;
+ my $blobs = my $falses = 0;
+ my $matchstart = $-[0] + $p;
+ my $pend = -1;
+ my $blob_p;
+ do {{
+ my $matchend = $+[0] + $p;
+ print "found match $matchstart..$matchend\n" if $verbose;
+ print "$&" if $verbose > 1;
+
+ if (defined $+{'cblob'}) {
+ print "match is a blob context\n" if ($verbose);
+ $pend = index ($s, "\n", $matchend) + 1;
+ $pend = length $s if !$pend;
+ }
+
+ if (defined $+{'falsepos'}) {
+ print "match is a false positive\n" if ($verbose);
+ # $matchend -= $+[0] - $+[1];
+ $blob_p = 0;
+ $falses++;
+ } elsif (defined $+{'blob'}) {
+ $blob_p = 1;
+ $blobs++;
+ print "match is a blob at $matchstart\n" if ($verbose);
+ } else {
+ $blob_p = 2;
+ $p = $matchstart;
+ print "searching up to $pend\n" if $verbose;
+ next;
+ }
+
+ if ($blob_p ? $replace_blob : $replace_falsepos) {
+ substr ($s, $matchstart, $matchend - $matchstart,
+ $replacement);
+ $p = $matchstart + length $replacement;
+ $pend += $p - $matchend if $pend >= $matchstart;
+ } else {
+ $p = $matchend;
+ }
+
+ $pend = index ($s, "\n", $p) + 1 if $p >= $pend;
+ $pend = length $s if !$pend;
+ print "searching up to $pend\n" if $verbose;
+ $p--;
+ }} while (($matchfound = (substr ($s, $p) =~ /(?<=.)$rx/mso))
+ && ($matchstart = $-[0] + $p) < $pend
+ && !($blob_p
+ ? (!$print_blob && !$falses)
+ : (!$print_falsepos && !$blobs)));
+
+ print "last match before $pend\n" if $verbose;
+
+ if ($print_nomatch) {
+ print substr ($s, $pp, $firstmatchstart - $pp);
+ $pp = $firstmatchstart;
+ } elsif (($print_blob || $print_falsepos) && $firstmatchstart > 0) {
+ $pp = rindex ($s, "\n", $firstmatchstart - 1) + 1;
+ }
+
+ if (($print_blob && $blobs) || ($print_falsepos && $falses)) {
+ if (!$print_nomatch) {
+ for (my $i = $nfilenames; $i-- > 0;) {
+ print "::: ", $filenames[$i], " :::\n";
+ }
+ }
+
+ print substr ($s, $pp, $pend - $pp);
+ $pp = $pend;
+ }
+
+ if (($list_blob && $blobs) || ($list_falsepos && $falses)) {
+ for (my $i = $nfilenames; --$i > 0;) {
+ print $filenames[$i], " within ";
+ }
+ print $filenames[0], "\n";
+ exit (1);
+ }
+ }
+
+ print substr ($s, $pp) if $print_nomatch;
+
+ print "no further matches\n" if $verbose;
+
+ $s = '';
+ $nfilenames = $nextnfilenames;
+}
+
+exit (0);
+EOF
+
+ scriptcmd="${PERL-perl} "'"$scriptname"'
+}
+
+set_awk_main () {
+ adjust_rx='
+s,[$]$,([\\n]|$),;
+s,\[^\],[^\\],g;
+s,\\\([{(|)}?+]\),\1,g;
+'
+
+ case " = $@ = " in
+ *" = no_falsepos = "*) falsepos='$.^';;
+ *) falsepos=`
+ ${SED-sed} -n 's,^[+],,p' < "$regex_name" |
+ ${SED-sed} -n -e "$adjust_rx" -e 's,\^,,' \
+ -e '1h; 1!H; $ { g; s,[\n],|,g; p;}'
+ `
+ case $falsepos in "") falsepos='$.^';; esac;;
+ esac
+
+ blob=`
+ ${SED-sed} -n 's,^[-],,p' < "$regex_name" |
+ ${SED-sed} -n -e "$adjust_rx" \
+ -e '1h; 1!H; $ { g; s,[\n],|,g; p;}'`
+
+ case " = $@ = " in
+ *" = with_context = "*) cblob=`
+ $echo "\\($initblob\\|$defineblob\\|$asmblob\\)" |
+ ${SED-sed} -e "$adjust_rx"
+ `;;
+ *) cblob='$.^';;
+ esac
+
+ xrs= nrs="# " eor="RT" eormatch='RT ~ ' eornl='[\n]' eornlsz=1
+ # Uncomment the line below to disable the use of a regular
+ # expression for the awk Record Separator, a GNU awk extension.
+ # Using this extension appears to save a lot of memory for long
+ # deblob-check runs.
+ # xrs="# " nrs= eor='$0' eormatch='' eornl= eornlsz=0
+
+ cat >> "$scriptname" <<EOF
+#! /bin/gawk --re-interval -f
+
+BEGIN {
+ # Should we replace blobs and false positives with replacement?
+ replace_blob = 0;
+ replace_falsepos = 0;
+ replacement = "/*(DEBLOBBED)*/";
+
+ # Should we print lines containing blobs, false positives, and neither?
+ print_blob = 0;
+ with_context = 0;
+ print_falsepos = 0;
+ print_nomatch = 0;
+
+ # Should we print the input stack and exit if we find blobs or
+ # false positives?
+ list_blob = 0;
+ list_falsepos = 0;
+
+ # Should we forget everything we know about false positives?
+ no_falsepos = 0;
+
+ verbose = $vp;
+
+ nfilenames = 0;
+ s = "\n";
+
+ # Which of the defaults above should we override?
+ $@ = 1;
+
+ # requires GNU awk RS extension:
+$xrs RS = "[;][/][*](begin|end) [^\n]*[*][/][;][\n]";
+}
+# requires GNU awk RS extension:
+$xrs { s = s \$0; }
+# does not require GNU awk RS extension:
+$nrs !/^[;][/][*].*[*][/][;]$/ {
+$nrs s = s \$0 "\n";
+$nrs next;
+$nrs }
+$eormatch /^[;][/][*]begin .*[*][/][;]$eornl$/ {
+ filenames[nfilenames] = substr($eor, 10, length ($eor) - 12 - $eornlsz);
+ if (verbose) print "entering " nfilenames ": " filenames[nfilenames];
+ nextnfilenames = nfilenames + 1;
+ if (s == "\n") {
+ nfilenames = nextnfilenames;
+ next;
+ }
+}
+$eormatch /^[;][/][*]end .*[*][/][;]$eornl$/ {
+ nextnfilenames = nfilenames - 1;
+ if (verbose)
+ print "got to the end of " nextnfilenames ": " filenames[nextnfilenames];
+}
+{
+ if (verbose) {
+ print "looking for matches";
+ for (i = nfilenames; --i > 0;)
+ print filenames[i] " within";
+ print filenames[0]
+ }
+
+ s = substr (s, 1, length (s) - 1)
+
+ pp = 2;
+ p = pend = 1;
+ if (verbose > 1) print "searching starting at", substr (s, p, 10)
+ matchfound = match (substr (s, p),
+ /[\n]($falsepos)|[\n]($cblob)|.($blob)/);
+ while (matchfound) {
+ blobs = falses = 0;
+ firstmatchstart = RSTART + p;
+ for (;;) {
+ matchstart = RSTART + p - 1;
+ matchlen = RLENGTH;
+ if (verbose) {
+ print "found match", matchstart, matchlen;
+ if (verbose > 1)
+ print substr (s, matchstart + 1, matchlen - 1);
+ }
+
+ if (match (substr (s, matchstart, matchlen), /^[\n]($falsepos)/) == 1) {
+ matchlen = RLENGTH;
+ if (verbose) print "match is a false positive of length", matchlen;
+ blob_p = 0;
+ falses++;
+ } else if (match (substr (s, matchstart, matchlen), /^.($blob)/) == 1) {
+ matchlen = RLENGTH;
+ if (verbose) print "match is a blob of length", matchlen;
+ blob_p = 1;
+ blobs++;
+ } else if (match (substr (s, matchstart, matchlen), /^[\n]($cblob)$/) == 1) {
+ if (verbose) print "match is a blob context";
+ pend = index (substr (s, matchstart + matchlen), "\n");
+ if (pend)
+ pend += matchstart + matchlen;
+ else
+ pend = length (s);
+ p = matchstart + 1;
+ blob_p = 2;
+ if (verbose > 1) print "range is:", substr (s, p, pend - p);
+ }
+
+ if (blob_p < 2) {
+ if (blob_p ? replace_blob : replace_falsepos) {
+ s = substr (s, 1, matchstart) \\
+ replacement \\
+ substr (s, matchstart + matchlen);
+ p = matchstart + length (replacement) - 1;
+ pend += (p + 1 - matchstart - matchlen);
+ } else
+ p = matchstart + matchlen - 1;
+
+ if (p >= pend) {
+ i = index (substr (s, p + 1), "\n");
+ if (i)
+ pend = p + 1 + i;
+ else
+ pend = length (s)
+ }
+ }
+
+ if (verbose) print "search until", pend;
+
+ if (!(matchfound = match (substr (s, p),
+ /[\n]($falsepos)|[\n]($cblob)|.($blob)/)) ||
+ p + RSTART >= pend ||
+ (blob_p ?
+ (!print_blob && !falses) :
+ (!print_falsepos && !blobs)))
+ break;
+ }
+
+ if (print_nomatch)
+ printf "%s", substr (s, pp, firstmatchstart - pp);
+ else if (print_blob || print_falsepos) {
+ lastline = substr (s, pp, firstmatchstart - pp);
+ sub (/.*[\n]/, "", lastline);
+ if (verbose) print "lastline: " lastline "\\\\n"
+ firstmatchstart -= length (lastline);
+ }
+ pp = firstmatchstart;
+
+ if (verbose) print "match set range:", pp, pend
+
+ if ((print_blob && blobs) || (print_falsepos && falses)) {
+ if (!print_nomatch)
+ for (i = nfilenames; i-- > 0;)
+ print "::: " filenames[i] " :::";
+ printf "%s", substr (s, pp, pend - pp);
+ pp = pend;
+ }
+
+ if ((list_blob && blobs) || (list_falsepos && falses)) {
+ for (i = nfilenames; --i > 0;)
+ print filenames[i] " within";
+ print filenames[0];
+ exit (1);
+ }
+ }
+
+ if (print_nomatch)
+ printf "%s", substr (s, pp)
+
+ if (verbose)
+ print "no further matches";
+
+ s = "\n";
+ nfilenames = nextnfilenames;
+ next;
+}
+EOF
+
+ scriptcmd="${AWK-gawk} --re-interval -f "'"$scriptname"'
+}
+
+set_flex_main () {
+ adjust_rx='
+s,\\\([{(|)}?+]\),\1,g
+s,^\([-+]\)\(\^\?\)\(.*\)\(\$\?\)$,\2(?s:\3)\4\1,g
+s,[+]$, { falsepos (); },
+s,[-]$, { blob (); },
+'
+
+ echo '%%' > "$scriptname"
+ ${SED-sed} "$adjust_rx" < "$regex_name" >> "$scriptname"
+ echo '\n|. { unmatched (); }
+%%
+int falsepos () {}
+int blob () {}
+int unmatched () {}
+' >> "$scriptname"
+
+ scriptcmd=false
+}
+
+set_save_script_input_main () {
+ savename=`mktemp -t deblob-check-input-XXXXXX`
+ scriptcmd="{ echo saving input in $savename && cat > $savename && echo done; }"