Linux: awk Tips and Tricks

From Define Wiki
Revision as of 17:15, 11 February 2013 by David (talk | contribs) (Created page with "NUMBERING AND CALCULATIONS: # precede each line by its line number FOR THAT FILE (left alignment). # Using a tab (\t) instead of space will preserve margins. awk '{prin...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

NUMBERING AND CALCULATIONS:

 # precede each line by its line number FOR THAT FILE (left alignment).
 # Using a tab (\t) instead of space will preserve margins.
 awk '{print FNR "\t" $0}' files*
 # precede each line by its line number FOR ALL FILES TOGETHER, with tab.
 awk '{print NR "\t" $0}' files*
 # number each line of a file (number on left, right-aligned)
 # Double the percent signs if typing from the DOS command prompt.
 awk '{printf("%5d : %s\n", NR,$0)}'
 # number each line of file, but only print numbers if line is not blank
 # Remember caveats about Unix treatment of \r (mentioned above)
 awk 'NF{$0=++a " :" $0};{print}'
 awk '{print (NF? ++a " :" :"") $0}'
 # count lines (emulates "wc -l")
 awk 'END{print NR}'
 # print the sums of the fields of every line
 awk '{s=0; for (i=1; i<=NF; i++) s=s+$i; print s}'
 # add all fields in all lines and print the sum
 awk '{for (i=1; i<=NF; i++) s=s+$i}; END{print s}'
 # print every line after replacing each field with its absolute value
 awk '{for (i=1; i<=NF; i++) if ($i < 0) $i = -$i; print }'
awk '{for (i=1; i<=NF; i++) $i = ($i < 0) ? -$i : $i; print }'
# print the total number of fields ("words") in all lines
awk '{ total = total + NF }; END {print total}' file
# print the total number of lines that contain "Beth"
awk '/Beth/{n++}; END {print n+0}' file
# print the largest first field and the line that contains it
# Intended for finding the longest string in field #1
awk '$1 > max {max=$1; maxline=$0}; END{ print max, maxline}'
# print the number of fields in each line, followed by the line
awk '{ print NF ":" $0 } '
# print the last field of each line
awk '{ print $NF }'
# print the last field of the last line
awk '{ field = $NF }; END{ print field }'
# print every line with more than 4 fields
awk 'NF > 4'
# print every line where the value of the last field is > 4
awk '$NF > 4'


TEXT CONVERSION AND SUBSTITUTION:

# IN UNIX ENVIRONMENT: convert DOS newlines (CR/LF) to Unix format
awk '{sub(/\r$/,"");print}'   # assumes EACH line ends with Ctrl-M
# IN UNIX ENVIRONMENT: convert Unix newlines (LF) to DOS format
awk '{sub(/$/,"\r");print}
# IN DOS ENVIRONMENT: convert Unix newlines (LF) to DOS format
awk 1
# IN DOS ENVIRONMENT: convert DOS newlines (CR/LF) to Unix format
# Cannot be done with DOS versions of awk, other than gawk:
gawk -v BINMODE="w" '1' infile >outfile
# Use "tr" instead.
tr -d \r <infile >outfile            # GNU tr version 1.22 or higher
# delete leading whitespace (spaces, tabs) from front of each line
# aligns all text flush left
awk '{sub(/^[ \t]+/, ""); print}'
# delete trailing whitespace (spaces, tabs) from end of each line
awk '{sub(/[ \t]+$/, "");print}'
# delete BOTH leading and trailing whitespace from each line
awk '{gsub(/^[ \t]+|[ \t]+$/,"");print}'
awk '{$1=$1;print}'           # also removes extra space between fields
# insert 5 blank spaces at beginning of each line (make page offset)
awk '{sub(/^/, "     ");print}'
# align all text flush right on a 79-column width
awk '{printf "%79s\n", $0}' file*
# center all text on a 79-character width
awk '{l=length();s=int((79-l)/2); printf "%"(s+l)"s\n",$0}' file*
# substitute (find and replace) "foo" with "bar" on each line
awk '{sub(/foo/,"bar");print}'           # replaces only 1st instance
gawk '{$0=gensub(/foo/,"bar",4);print}'  # replaces only 4th instance
awk '{gsub(/foo/,"bar");print}'          # replaces ALL instances in a line
# substitute "foo" with "bar" ONLY for lines which contain "baz"
awk '/baz/{gsub(/foo/, "bar")};{print}'
# substitute "foo" with "bar" EXCEPT for lines which contain "baz"
awk '!/baz/{gsub(/foo/, "bar")};{print}'
# change "scarlet" or "ruby" or "puce" to "red"
awk '{gsub(/scarlet|ruby|puce/, "red"); print}'
# reverse order of lines (emulates "tac")
awk '{a[i++]=$0} END {for (j=i-1; j>=0;) print a[j--] }' file*
# if a line ends with a backslash, append the next line to it
# (fails if there are multiple lines ending with backslash...)
awk '/\\$/ {sub(/\\$/,""); getline t; print $0 t; next}; 1' file*
# print and sort the login names of all users
awk -F ":" '{ print $1 | "sort" }' /etc/passwd
# print the first 2 fields, in opposite order, of every line
awk '{print $2, $1}' file
# switch the first 2 fields of every line
awk '{temp = $1; $1 = $2; $2 = temp}' file
# print every line, deleting the second field of that line
awk '{ $2 = ""; print }'
# print in reverse order the fields of every line
awk '{for (i=NF; i>0; i--) printf("%s ",i);printf ("\n")}' file
# remove duplicate, consecutive lines (emulates "uniq")
awk 'a !~ $0; {a=$0}'
# remove duplicate, nonconsecutive lines
awk '! a[$0]++'                     # most concise script
awk '!($0 in a) {a[$0];print}'      # most efficient script
# concatenate every 5 lines of input, using a comma separator
# between fields
awk 'ORS=%NR%5?",":"\n"' file


SELECTIVE PRINTING OF CERTAIN LINES:

# print first 10 lines of file (emulates behavior of "head")
awk 'NR < 11'
# print first line of file (emulates "head -1")
awk 'NR>1{exit};1'
 # print the last 2 lines of a file (emulates "tail -2")
awk '{y=x "\n" $0; x=$0};END{print y}'
# print the last line of a file (emulates "tail -1")
awk 'END{print}'
# print only lines which match regular expression (emulates "grep")
awk '/regex/'
# print only lines which do NOT match regex (emulates "grep -v")
awk '!/regex/'
# print the line immediately before a regex, but not the line
# containing the regex
awk '/regex/{print x};{x=$0}'
awk '/regex/{print (x=="" ? "match on line 1" : x)};{x=$0}'
# print the line immediately after a regex, but not the line
# containing the regex
awk '/regex/{getline;print}'
# grep for AAA and BBB and CCC (in any order)
awk '/AAA/; /BBB/; /CCC/'
# grep for AAA and BBB and CCC (in that order)
awk '/AAA.*BBB.*CCC/'
# print only lines of 65 characters or longer
awk 'length > 64'
# print only lines of less than 65 characters
awk 'length < 64'
# print section of file from regular expression to end of file
awk '/regex/,0'
awk '/regex/,EOF'
# print section of file based on line numbers (lines 8-12, inclusive)
awk 'NR==8,NR==12'
# print line number 52
awk 'NR==52'
awk 'NR==52 {print;exit}'          # more efficient on large files
# print section of file between two regular expressions (inclusive)
awk '/Iowa/,/Montana/'             # case sensitive


SELECTIVE DELETION OF CERTAIN LINES:

# delete ALL blank lines from a file (same as "grep '.' ")
awk NF
awk '/./'