Skip to content

Awk

Chapter 22: awk - Pattern Scanning and Processing

Section titled “Chapter 22: awk - Pattern Scanning and Processing”

awk is a powerful text processing language designed for pattern scanning and reporting. It processes files line by line, splitting each line into fields, making it ideal for log analysis, data extraction, and reporting in DevOps environments.


┌────────────────────────────────────────────────────────────────┐
│ awk Processing Flow │
├────────────────────────────────────────────────────────────────┤
│ │
│ Input File │
│ ───────── │
│ │
│ line 1: field1 field2 field3 field4 │
│ ↓ ↓ ↓ ↓ │
│ $1 $2 $3 $4 ... $NF (number of fields) │
│ │
│ ┌─────────────────────────────────────────┐ │
│ │ BEGIN { } - Run once before │ │
│ │ processing starts │ │
│ │ │ │
│ │ /pattern/ { } - Process matching │ │
│ │ lines │ │
│ │ │ │
│ │ { } - Process all lines │ │
│ │ │ │
│ │ END { } - Run once after │ │
│ │ processing ends │ │
│ └─────────────────────────────────────────┘ │
│ │
│ Output │
│ ────── │
│ │
└────────────────────────────────────────────────────────────────┘

Terminal window
# Basic syntax
awk 'pattern { action }' file.txt
# Using -F for field separator
awk -F',' '{ print $1 }' file.csv
# Using -v for variables
awk -v name=value '{ print name, $1 }' file.txt
# Using -f for program file
awk -f script.awk file.txt

Terminal window
# $0 - entire line
echo "hello world" | awk '{print $0}'
# hello world
# $1 - first field
echo "hello world" | awk '{print $1}'
# hello
# $2 - second field
echo "hello world" | awk '{print $2}'
# world
# $NF - last field
echo "one two three four" | awk '{print $NF}'
# four
# $(NF-1) - second to last
echo "one two three four" | awk '{print $(NF-1)}'
# three
Terminal window
# Print number of fields
echo "a b c d e" | awk '{print NF}'
# 5
# Print last field
echo "a b c d e" | awk '{print $NF}'
# e

Terminal window
# Default is whitespace
echo "one two three" | awk '{print $1, $2}'
# one two
# Multiple spaces handled correctly
echo "one two three" | awk '{print $1, $2}'
# one two
Terminal window
# Using -F
awk -F',' '{print $1, $2}' file.csv
# Using FS in BEGIN
awk 'BEGIN {FS=","} {print $1, $2}' file.csv
# Regex separator
awk 'BEGIN {FS="[:,|]"} {print $1, $2}' file.txt
# Output separator
awk 'BEGIN {OFS=" - "} {print $1, $2}' file.txt

Terminal window
# Lines where $1 equals "error"
awk '$1 == "error" {print}' logfile
# Lines where $3 > 100
awk '$3 > 100 {print}' data.txt
# Lines where $1 contains "warning"
awk '$1 ~ /warning/ {print}' logfile
# Lines where $1 does NOT contain "debug"
awk '$1 !~ /debug/ {print}' logfile
Terminal window
# Match lines containing "ERROR"
awk '/ERROR/ {print}' logfile
# Match lines starting with "2024"
awk '/^2024/ {print}' logfile
# Match lines ending with "failed"
awk '/failed$/ {print}' logfile
# Complex patterns
awk '/ERROR/ || /WARN/ {print}' logfile

VariableDescription
FSInput field separator
OFSOutput field separator
RSInput record separator
ORSOutput record separator
NFNumber of fields in current record
NRCurrent record number
FNRRecord number in current file
FILENAMECurrent filename
ARGCNumber of arguments
ARGVArray of arguments
Terminal window
# Print line numbers
awk '{print NR, $0}' file.txt
# Print with custom field separator
awk 'BEGIN {FS=","; OFS=" | "} {print $1, $2}' file.csv
# Different separators for input/output
awk 'BEGIN {RS="\n\n"; ORS="\n\n"} {print}' file.txt

Terminal window
# Print entire line
awk '{print}' file.txt
# Print specific fields
awk '{print $1, $3}' file.txt
# Print with text
awk '{print "User:", $1, "Status:", $2}' file.txt
# Print without newline
awk '{printf "%s ", $1}' file.txt
Terminal window
# Formatted output
awk '{printf "%-10s %5d\n", $1, $2}' file.txt
# Numbers
awk '{printf "%.2f\n", $3}' file.txt
# Align columns
awk '{printf "%-20s %10s %10s\n", $1, $2, $3}' file.txt

Terminal window
# Addition
awk '{print $1 + $2}' file.txt
# Multiplication
awk '{print $1 * $2}' file.txt
# Division
awk '{print $1 / $2}' file.txt
# Modulo
awk '{print $1 % $2}' file.txt
Terminal window
# Concatenation
awk '{print $1 "-" $2}' file.txt
# String length
awk '{print length($1)}' file.txt
# Substring
awk '{print substr($1, 1, 5)}' file.txt

Terminal window
awk '{
if ($2 > 50) {
print "High:", $0
} else {
print "Low:", $0
}
}' file.txt
# One-liner
awk '{if ($2 > 50) print "High"; else print "Low"}' file.txt
Terminal window
# For loop
awk '{
for (i=1; i<=NF; i++) {
print $i
}
}' file.txt
# While loop
awk '{
i=1
while (i<=NF) {
print $i
i++
}
}' file.txt

Terminal window
# Count occurrences
awk '{count[$1]++} END {for (word in count) print word, count[word]}' file.txt
# Sum values
awk '{sum+=$1} END {print sum}' file.txt
# Average
awk '{sum+=$1; count++} END {print sum/count}' file.txt
Terminal window
# Using string keys
awk '{ip_count[$1]++} END {for (ip in ip_count) print ip, ip_count[ip]}' access.log
# Multiple keys
awk '{(($1,$2)):count++} END {for (k in count) print k, count[k]}' file.txt

Terminal window
# String functions
awk '{print toupper($1)}' file.txt
awk '{print tolower($1)}' file.txt
awk '{print length($0)}' file.txt
awk '{print substr($1, 2, 5)}' file.txt
awk '{print index($1, "test")}' file.txt
awk '{print gsub(/old/, "new", $1)}' file.txt
awk '{print split($1, arr, "-")}' file.txt
# Math functions
awk '{print sqrt($1)}' file.txt
awk '{print int($1)}' file.txt
awk '{print sin($1)}' file.txt
awk '{print rand()}'
Terminal window
awk 'function max(a, b) {
return (a > b) ? a : b
}
{print max($1, $2)}' file.txt

Terminal window
# Count HTTP status codes
awk '{print $9}' access.log | sort | uniq -c | sort -rn
# Find 5xx errors
awk '$9 ~ /^[56][0-9][0-9]/ {print}' access.log
# Extract top IPs
awk '{print $1}' access.log | sort | uniq -c | sort -rn | head -10
# Response time analysis
awk '{sum+=$NF; count++} END {print "Average:", sum/count}' access.log
Terminal window
# Extract failed login attempts
awk '/Failed password/ {print $1, $2, $3, $11}' /var/log/auth.log
# Count errors by hour
awk '/ERROR/ {print $2}' app.log | cut -d: -f1 | sort | uniq -c
# System resource alerts
awk '$5 > 90 {print "ALERT:", $0}' system.log
Terminal window
# Print specific columns
awk -F',' '{print $1, $3, $5}' data.csv
# Skip header
awk -F',' 'NR>1 {print}' data.csv
# Sum column
awk -F',' 'NR>1 {sum+=$4} END {print sum}' data.csv
# Filter by column value
awk -F',' '$3 > 1000 {print}' data.csv
Terminal window
# Extract JSON fields
kubectl logs app | awk '{for(i=1;i<=NF;i++) if($i~/^level=/) print $i}'
# Parse timestamp
kubectl logs app | awk '{print $1, $2}' | sort | uniq -c
# Error aggregation
kubectl logs app | awk '/ERROR/ {errors[$NF]++} END {for (e in errors) print e, errors[e]}'
Terminal window
# Parse docker stats output
docker stats --no-stream | awk '{print $2, $3, $4}'
# Container resource usage
docker stats --format "{{.Name}} {{.CPUPerc}} {{.MemUsage}}" | \
awk '{if ($2 ~ /[0-9]+\.[0-9]+%/) print}'
Terminal window
# Parse EC2 instances
aws ec2 describe-instances | \
jq -r '.Reservations[].Instances[] | "\(.InstanceId) \(.State.Name) \(.PublicIpAddress)"'
# Using awk after jq
aws ec2 describe-instances | \
jq -r '.Reservations[].Instances[] | "\(.Tags[] | select(.Key=="Name").Value) \(.InstanceType)"' | \
awk '{print $1, $2}'
Terminal window
# Parse netstat
netstat -ant | awk '{print $6}' | sort | uniq -c
# Parse ss output
ss -tunap | awk 'NR>1 {print $1, $5, $6}'
# Firewall logs
iptables -L -v -n | awk 'NR>2 {print $2, $3, $9}'
Terminal window
# Find large files
du -sh /* 2>/dev/null | awk '{print $1, $2}' | sort -h
# Disk usage by directory
du -h /var | awk '{print $1, $2}' | sort -h
# Percentage usage
df -h | awk 'NR>1 {print $1, $5}' | awk '{gsub(/%/,""); print $1, $2}'

Terminal window
# BEGIN - runs once before processing
awk 'BEGIN {print "Starting..."} {print} END {print "Done"}' file.txt
# Initialize variables
awk 'BEGIN {sum=0; count=0} {sum+=$1; count++} END {print "Average:", sum/count}' file.txt
# Custom headers
awk 'BEGIN {print "Name\tScore\tGrade"} {print} END {print "---"}' file.txt
Terminal window
# Group data
awk 'BEGIN {print "=== Report ==="}
/ERROR/ {errors++}
/WARN/ {warnings++}
END {print "Errors:", errors, "Warnings:", warnings}' logfile

#!/usr/bin/env bash
# Generate system report
echo "=== System Report ==="
echo "Date: $(date)"
echo
echo "Top Processes by Memory:"
ps aux --sort=-%mem | awk 'NR<=6 {printf "%-30s %10s %10s\n", $11, $6"MB", $3"%"}'
echo
echo "Disk Usage:"
df -h | awk 'NR>1 {printf "%-20s %10s %10s\n", $1, $3, $5}'
echo
echo "Network Connections:"
netstat -ant | awk '{print $6}' | sort | uniq -c | sort -rn
#!/usr/bin/env bash
# Aggregate application logs
LOG_DIR="/var/log/myapp"
OUTPUT="/tmp/log_summary.txt"
echo "Log Summary - $(date)" > "$OUTPUT"
echo "===================" >> "$OUTPUT"
# Count by level
echo "" >> "$OUTPUT"
echo "Messages by Level:" >> "$OUTPUT"
grep -h "level=" "$LOG_DIR"/*.log 2>/dev/null | \
awk -F'level=' '{print $2}' | cut -d, -f1 | sort | uniq -c | sort -rn >> "$OUTPUT"
# Top errors
echo "" >> "$OUTPUT"
echo "Top 10 Errors:" >> "$OUTPUT"
grep -h "ERROR" "$LOG_DIR"/*.log 2>/dev/null | \
awk '{print $NF}' | sort | uniq -c | sort -rn | head -10 >> "$OUTPUT"
cat "$OUTPUT"

Terminal window
# Use specific patterns (reduces processing)
awk '/ERROR/ {print}' largefile.log
# Process only needed fields
awk '{print $1, $2}' largefile.log
# Use while loops carefully
awk '{while()}' largefile.log # Can be slow
Terminal window
# Set FS in BEGIN (faster than -F)
awk 'BEGIN {FS=","} {print $1}' file.csv
# Close files when done
awk '{print > "output.txt"}' file.txt
# Use next to skip records
awk 'NR==1 {next} {print}' file.txt

In this chapter, you learned:

  • ✅ How awk processes text (fields and records)
  • ✅ Field variables ($0, $1, $NF, etc.)
  • ✅ Field separators (FS, OFS)
  • ✅ Patterns and pattern matching
  • ✅ Built-in variables (NR, NF, etc.)
  • ✅ Actions and expressions
  • ✅ Control flow (if-else, loops)
  • ✅ Arrays and associative arrays
  • ✅ Built-in and user-defined functions
  • ✅ BEGIN and END blocks
  • ✅ Practical DevOps examples

Continue to the next chapter to learn about Process Management in Bash.


Previous Chapter: sed - Stream Editor Next Chapter: Process Management