Skip to content

Regex

Regular Expressions (regex) are patterns used to match character combinations in strings. For DevOps engineers and system administrators, regex is essential for log parsing, data extraction, input validation, and automation scripts.


A regular expression is a sequence of characters that defines a search pattern. Think of it as a powerful version of wildcard matching.

┌────────────────────────────────────────────────────────────────┐
│ Regex Characters │
├────────────────────────────────────────────────────────────────┤
│ │
│ LITERAL CHARACTERS (match themselves) │
│ ───────────────────────────────────── │
│ a, b, 1, @, . (when not metacharacter) │
│ │
│ METACHARACTERS (special meaning) │
│ ────────────────────────────── │
│ . - Any single character │
│ ^ - Start of string/line │
│ $ - End of string/line │
│ * - Zero or more of previous │
│ + - One or more of previous │
│ ? - Zero or one of previous │
│ [] - Character class │
│ {} - Quantifier │
│ | - Alternation (OR) │
│ () - Grouping │
│ \ - Escape │
│ │
└────────────────────────────────────────────────────────────────┘

Terminal window
# Match any single character
echo "abc" | grep -o '.'
# a
# b
# c
# Match specific characters
echo "cat bat hat" | grep -o '[cbh]at'
# cat
# bat
# hat
# Negated character class
echo "cat bat hat mat" | grep -o '[^cb]at'
# hat
# mat
Terminal window
# \d - Digit [0-9]
# \D - Non-digit [^0-9]
# \w - Word character [a-zA-Z0-9_]
# \W - Non-word character
# \s - Whitespace [ \t\n\r\f]
# \S - Non-whitespace
# Using POSIX classes
[[ "abc123" =~ [[:digit:]]+ ]] && echo "Contains digits"
[[ "abc" =~ [[:alpha:]]+ ]] && echo "Contains letters"
[[ "abc_123" =~ [[:alnum:]]+ ]] && echo "Contains alphanumeric"
[[ " " =~ [[:space:]]+ ]] && echo "Contains whitespace"

Terminal window
# ^ - Start of string/line
echo "hello world" | grep '^hello'
# hello world
# $ - End of string/line
echo "hello world" | grep 'world$'
# hello world
# Combined
echo "hello world" | grep '^hello world$'
# hello world
# Match IP address pattern
ip="192.168.1.1"
[[ "$ip" =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]] && echo "Valid IP"

Terminal window
# * - Zero or more (greedy)
echo "aaa" | grep -o 'a*'
# aaa
# + - One or more (greedy)
echo "aaa" | grep -o 'a+'
# aaa
# ? - Zero or one
echo "color colour" | grep -o 'colou?r'
# color
# colour
# {n} - Exactly n times
echo "12345" | grep -o '[0-9]\{3\}'
# 123
# {n,} - n or more times
echo "12345" | grep -o '[0-9]\{2,\}'
# 12345
# {n,m} - Between n and m times
echo "123456" | grep -o '[0-9]\{2,4\}'
# 1234
# 56
Terminal window
# Greedy (default) - matches as much as possible
echo "<div>content</div>" | grep -o '<.*>'
# <div>content</div>
# Non-greedy - matches as little as possible
echo "<div>content</div>" | grep -o '<.*?>'
# <div>
# </div>

Terminal window
# () - Capture group
echo "123-456-7890" | grep -oE '([0-9]{3})-([0-9]{3})-([0-9]{4})'
# 123-456-7890
# Referencing groups
text="2024-02-22"
[[ "$text" =~ ([0-9]{4})-([0-9]{2})-([0-9]{2}) ]]
echo "${BASH_REMATCH[0]}" # Full match
echo "${BASH_REMATCH[1]}" # Year
echo "${BASH_REMATCH[2]}" # Month
echo "${BASH_REMATCH[3]}" # Day
Terminal window
# | - OR operator
echo "cat dog bird" | grep -E 'cat|dog'
# cat
# dog
# With grouping
echo "color colour" | grep -E '(colou?r)'
# color
# colour

Terminal window
# Match literal dot
echo "file.txt" | grep 'file\.txt'
# file.txt
# Match literal asterisk
echo "a*b" | grep 'a\*b'
# Match literal backslash
echo "path\to\file" | grep 'path\\to\\file'
# Match special characters in bracket
echo "[test]" | grep '\[test\]'

#!/usr/bin/env bash
validate_email() {
local email="$1"
# RFC 5322 simplified pattern
local pattern='^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
if [[ "$email" =~ $pattern ]]; then
return 0
else
return 1
fi
}
# Test
validate_email "admin@example.com" && echo "Valid" || echo "Invalid"
validate_email "invalid-email" && echo "Valid" || echo "Invalid"
#!/usr/bin/env bash
# Extract timestamps from logs
log_line="2024-02-22 10:30:45 ERROR: Connection failed from 192.168.1.100"
# Extract date
[[ "$log_line" =~ ([0-9]{4}-[0-9]{2}-[0-9]{2}) ]]
date="${BASH_REMATCH[1]}"
echo "Date: $date" # 2024-02-22
# Extract time
[[ "$log_line" =~ ([0-9]{2}:[0-9]{2}:[0-9]{2}) ]]
time="${BASH_REMATCH[1]}"
echo "Time: $time" # 10:30:45
# Extract log level
[[ "$log_line" =~ (ERROR|WARN|INFO|DEBUG) ]]
level="${BASH_REMATCH[1]}"
echo "Level: $level" # ERROR
# Extract IP address
[[ "$log_line" =~ ([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}) ]]
ip="${BASH_REMATCH[1]}"
echo "IP: $ip" # 192.168.1.100
#!/usr/bin/env bash
# Parse Nginx combined log format
# 127.0.0.1 - - [22/Feb/2024:10:30:45 +0000] "GET /api/users HTTP/1.1" 200 2326 "https://example.com" "Mozilla/5.0"
log='127.0.0.1 - - [22/Feb/2024:10:30:45 +0000] "GET /api/users HTTP/1.1" 200 2326 "https://example.com" "Mozilla/5.0"'
# Extract IP
[[ "$log" =~ ^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+) ]]
echo "IP: ${BASH_REMATCH[1]}"
# Extract timestamp
[[ "$log" =~ \[([^\]]+)\] ]]
echo "Timestamp: ${BASH_REMATCH[1]}"
# Extract request
[[ "$log" =~ \"([A-Z]+)\ ([^\"]+)\ ]]
echo "Method: ${BASH_REMATCH[1]}"
echo "Path: ${BASH_REMATCH[2]}"
# Extract status code
[[ "$log" =~ \"\ [0-9]+\ ]]
[[ "$log" =~ ([0-9]{3})\ ]]
echo "Status: ${BASH_REMATCH[1]}"
# Extract bytes
[[ "$log" =~ \"\ [0-9]+\ ([0-9]+) ]]
echo "Bytes: ${BASH_REMATCH[1]}"
#!/usr/bin/env bash
# Get container ID from docker ps
container_id=$(docker ps --format '{{.ID}} {{.Names}}' | grep "my-app" | awk '{print $1}')
# Or using regex directly
container_id=$(docker ps | grep "my-app" | grep -oE '^[a-f0-9]{12}')
#!/usr/bin/env bash
# Simple JSON parsing with regex (for simple cases)
json='{"status": "success", "data": {"user": "john", "id": 123}}'
# Extract status
[[ "$json" =~ \"status\"\:\ \"([^\"]+)\" ]]
echo "Status: ${BASH_REMATCH[1]}"
# Extract user
[[ "$json" =~ \"user\"\:\ \"([^\"]+)\" ]]
echo "User: ${BASH_REMATCH[1]}"
# Extract ID
[[ "$json" =~ \"id\"\:\ ([0-9]+) ]]
echo "ID: ${BASH_REMATCH[1]}"
#!/usr/bin/env bash
validate_ip() {
local ip="$1"
# Check format
if ! [[ "$ip" =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
return 1
fi
# Check each octet
IFS='.' read -ra octets <<< "$ip"
for octet in "${octets[@]}"; do
if (( octet > 255 )); then
return 1
fi
done
return 0
}
# Test
validate_ip "192.168.1.1" && echo "Valid" || echo "Invalid"
validate_ip "256.1.1.1" && echo "Valid" || echo "Invalid"
validate_ip "192.168.1" && echo "Valid" || echo "Invalid"
#!/usr/bin/env bash
text="Visit https://example.com or http://test.org or ftp://files.net"
# Extract all URLs
echo "$text" | grep -oE 'https?://[^ ]+|ftp://[^ ]+'
# https://example.com
# http://test.org
# ftp://files.net
# Extract domain
echo "https://api.example.com:8080/path" | grep -oE 'https?://[^:/]+'
# api.example.com
#!/usr/bin/env bash
# Parse "*/5 * * * *" cron expression
cron="*/5 * * * * /path/to/script.sh"
# Extract fields
[[ "$cron" =~ ^([^\ ]+)\ ([^\ ]+)\ ([^\ ]+)\ ([^\ ]+)\ ([^\ ]+) ]]
minute="${BASH_REMATCH[1]}"
hour="${BASH_REMATCH[2]}"
day="${BASH_REMATCH[3]}"
month="${BASH_REMATCH[4]}"
weekday="${BASH_REMATCH[5]}"
echo "Minute: $minute" # */
echo "Hour: $hour" # *
echo "Day: $day" # *
echo "Month: $month" # *
echo "Weekday: $weekday" # *

Terminal window
# Basic grep - uses basic regex (BRE)
grep 'a.*b' file.txt
# Extended grep - uses extended regex (ERE)
grep -E 'a.*b' file.txt
grep -E 'a+' file.txt
grep -E 'a?' file.txt
grep -E '(ab)+' file.txt
# Same as grep -E
egrep 'a+' file.txt

Terminal window
# Perl-compatible regex
grep -P '\d+' file.txt
grep -P '\w+' file.txt
grep -P '(?<=@)[^ ]+' file.txt # Lookbehind
grep -P '[^ ]+(?=@)' file.txt # Lookahead

#!/usr/bin/env bash
string="Date: 2024-02-22, Time: 10:30:45"
# Full match and capture groups
if [[ "$string" =~ Date:\ ([0-9-]+),\ Time:\ ([0-9:]+) ]]; then
echo "Full match: ${BASH_REMATCH[0]}"
echo "Date: ${BASH_REMATCH[1]}"
echo "Time: ${BASH_REMATCH[2]}"
fi

Terminal window
# IPv4 Address
ipv4_pattern='[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}'
# IPv6 Address (simplified)
ipv6_pattern='([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}'
# MAC Address
mac_pattern='([0-9a-fA-F]{2}:){5}[0-9a-fA-F]{2}'
# Email
email_pattern='[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
# URL
url_pattern='https?://[a-zA-Z0-9.-]+(?:/[a-zA-Z0-9./_-]*)?'
# Date (YYYY-MM-DD)
date_pattern='[0-9]{4}-[0-9]{2}-[0-9]{2}'
# Time (HH:MM:SS)
time_pattern='[0-9]{2}:[0-9]{2}:[0-9]{2}'
# Hex color
hex_color_pattern='#[0-9a-fA-F]{6}'
# UUID
uuid_pattern='[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}'
# Port number
port_pattern='[0-9]{1,5}'

In this chapter, you learned:

  • ✅ Character classes and how they work
  • ✅ Anchors for start/end matching
  • ✅ Quantifiers for repetition
  • ✅ Grouping and alternation
  • ✅ Escaping special characters
  • ✅ Practical DevOps examples
  • ✅ Using BASH_REMATCH for captures
  • ✅ Common regex patterns for DevOps

Continue to the next chapter to learn about sed - the stream editor for text transformation.


Previous Chapter: Here Documents Next Chapter: sed - Stream Editor