From 4620c384181532572dc29081eae5e04648951bfb Mon Sep 17 00:00:00 2001 From: lxoliva Date: Fri, 22 Jan 2010 22:57:10 +0000 Subject: [PATCH] Add --save-script-input. Move flex-related functions. Adjust sed hashbang line: add -n, remove line before. Use gawk instead of awk. Use begin/end as RS in gawk by default, to further reduce memory use. git-svn-id: http://www.fsfla.org/svn/fsfla/software/linux-libre/scripts@5874 559672b5-ba27-0410-b829-e8f1faed8b1b --- deblob-check-awk | 91 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 67 insertions(+), 24 deletions(-) diff --git a/deblob-check-awk b/deblob-check-awk index 180935a06b9..fe0059e4fe8 100755 --- a/deblob-check-awk +++ b/deblob-check-awk @@ -338,10 +338,6 @@ test_mode=false name=deblob-check -set_flex_cmd () { - set_flex_main -} - set_eqscript_main () { $set_main_cmd "$@" } @@ -359,13 +355,22 @@ ERROR)*/ q 1" } +set_flex_cmd () { + set_flex_main +} + +set_save_script_input_cmd () { + set_save_script_input_main +} + set_cmd=set_eqscript_cmd # GNU awk works fine, but it requires --re-interval to accept regexp # ranges, which we rely on to match blobs. We could expand the blob # on our own, but, yuck. -if (${AWK-awk} --re-interval --version) > /dev/null 2>&1; then +if (${AWK-gawk} --re-interval --version) > /dev/null 2>&1; then set_main_cmd=set_awk_main # Don't choose python by default, it exhibits exponential behavior +# (see http://swtch.com/~rsc/regexp/regexp1.html for details) # processing lines containing /* such as this: # Documentation/sysctl/*, swap/mm readaround # Try it: deblob-check --use-python linux-2.6.32/CREDITS @@ -375,6 +380,7 @@ elif (${PYTHON-false} --version) > /dev/null 2>&1; then set_main_cmd=set_python_main # Sed takes GBs of RAM to compile all the huge regexps in the sed # script we generate with all known false positives and blobs in Linux. +# However, it is somewhat faster than GNU awk for long runs. # Try it: deblob-check --use-sed -i linux-2.6.32 /dev/null else set_cmd=set_sed_cmd @@ -395,12 +401,17 @@ case $1 in --use-sed) shift; - set_cmd=set_sed_cmd + set_cmd=set_sed_cmd; ;; --gen-flex) shift; - set_cmd=set_flex_cmd + set_cmd=set_flex_cmd; + ;; + +--save-script-input) + shift; + set_cmd=set_save_script_input_cmd; ;; esac @@ -2640,8 +2651,7 @@ g check_false_positives= fi - $echo " -#! /bin/sed -f + $echo "#! /bin/sed -nf /^$/N /^[\\n]\\?;[/][*]\\(end .*\\)\\?[*][/];$/{ @@ -3457,8 +3467,14 @@ s,\\\([{(|)}?+]\),\1,g; *) cblob='$.^';; esac + if ${DONT_USE_GAWK_EXTENSIONS-false}; then + xrs="# " nrs= eor='$0' eormatch='' eornl= eornlsz=0 + else + xrs= nrs="# " eor="RT" eormatch='RT ~ ' eornl='[\n]' eornlsz=1 + fi + cat >> "$scriptname" <= 0;) print filenames[i] " within"; print filenames[0] } @@ -3602,7 +3620,7 @@ BEGIN { } if ((list_blob && blobs) || (list_falsepos && falses)) { - for (i = nfilenames; --i;) + for (i = nfilenames; --i >= 0;) print filenames[i] " within"; print filenames[0]; exit (1); @@ -3621,7 +3639,32 @@ BEGIN { } EOF - scriptcmd="${AWK-awk} --re-interval -f "'"$scriptname"' + scriptcmd="${AWK-gawk} --re-interval -f "'"$scriptname"' +} + +set_flex_main () { + adjust_rx=' +s,\\\([{(|)}?+]\),\1,g +s,^\([-+]\)\(\^\?\)\(.*\)\(\$\?\)$,\2(?s:\3)\4\1,g +s,[+]$, { falsepos (); }, +s,[-]$, { blob (); }, +' + + echo '%%' > "$scriptname" + sed "$adjust_rx" < "$regex_name" >> "$scriptname" + echo '\n|. { unmatched (); } +%% +int falsepos () {} +int blob () {} +int unmatched () {} +' >> "$scriptname" + + scriptcmd=false +} + +set_save_script_input_main () { + savename=`mktemp -t deblob-check-input-XXXXXX` + scriptcmd="{ echo saving input in $savename && cat > $savename && echo done; }" } # Process an input file named in $1 and run it through the blob -- 2.31.1