Sysadmin_scripts

Chapter 36: System Administration Scripts

Overview

This chapter covers practical system administration scripts that DevOps engineers use daily. These scripts help manage users, services, system monitoring, and common administrative tasks.

User Management Scripts

Create User with Home Directory

#!/usr/bin/env bash
# create_user.sh - Create system user with home directory

set -euo pipefail

USAGE="Usage: $0 <username> [shell]"

username="${1:-}"
shell="${2:-/bin/bash}"

# Validate input
if [[ -z "$username" ]]; then
    echo "$USAGE"
    exit 1
fi

# Check if user exists
if id "$username" &>/dev/null; then
    echo "Error: User $username already exists"
    exit 1
fi

# Create user with home directory and shell
sudo useradd -m -s "$shell" "$username"

# Set password
echo "Enter password for $username:"
sudo passwd "$username"

echo "User $username created successfully"

#!/usr/bin/env bash
# list_users_by_shell.sh - List users by their login shell

set -euo pipefail

shell="${1:-/bin/bash}"

echo "Users using shell: $shell"
echo "======================"

# Get users with specific shell
awk -F: -v shell="$shell" '$7 == shell {print $1, $6}' /etc/passwd | \
    while read -r user home; do
        echo "User: $user, Home: $home"
    done

Service Management Scripts

Service Health Monitor

#!/usr/bin/env bash
# monitor_services.sh - Monitor critical services

set -euo pipefail

# Define services to monitor
SERVICES=(
    "nginx"
    "postgresql"
    "redis"
    "docker"
)

log() { echo "[$(date)] $*"; }

check_service() {
    local service="$1"

    if systemctl is-active --quiet "$service"; then
        log "✓ $service is running"
        return 0
    else
        log "✗ $service is NOT running"
        return 1
    fi
}

main() {
    local failed=0

    log "Checking services..."

    for service in "${SERVICES[@]}"; do
        check_service "$service" || ((failed++))
    done

    if [[ $failed -gt 0 ]]; then
        log "WARNING: $failed service(s) not running"
        exit 1
    fi

    log "All services running"
}

main

Restart Service with Health Check

#!/usr/bin/env bash
# restart_service.sh - Restart service with health verification

set -euo pipefail

SERVICE="${1:-}"
HEALTH_URL="${HEALTH_URL:-http://localhost:8080/health}"
MAX_WAIT=60

log() { echo "[$(date)] $*"; }

usage() {
    echo "Usage: $0 <service> [health-url]"
    echo "Example: $0 nginx"
    exit 1
}

# Validate arguments
[[ -z "$SERVICE" ]] && usage

log "Restarting service: $SERVICE"

# Stop service
sudo systemctl stop "$SERVICE"
log "Service stopped"

# Start service
sudo systemctl start "$SERVICE"
log "Service started"

# Wait for health check
log "Waiting for health check..."
elapsed=0

while [[ $elapsed -lt $MAX_WAIT ]]; do
    if curl -sf "$HEALTH_URL" &>/dev/null; then
        log "Service is healthy"
        exit 0
    fi
    sleep 2
    ((elapsed += 2))
done

log "Health check failed after ${MAX_WAIT}s"
sudo systemctl status "$SERVICE"
exit 1

System Monitoring Scripts

System Resource Report

#!/usr/bin/env bash
# system_report.sh - Generate comprehensive system report

set -euo pipefail

REPORT_FILE="${1:-/tmp/system_report.txt}"

log() { echo "$*" | tee -a "$REPORT_FILE"; }

generate_report() {
    : > "$REPORT_FILE"

    log "=== System Report ==="
    log "Generated: $(date)"
    log ""

    # System info
    log "--- System Information ---"
    log "Hostname: $(hostname)"
    log "Kernel: $(uname -r)"
    log "OS: $(lsb_release -d 2>/dev/null | cut -f2 || cat /etc/os-release | grep PRETTY_NAME | cut -d= -f2)"
    log "Uptime: $(uptime -p)"
    log ""

    # CPU info
    log "--- CPU Information ---"
    log "Model: $(grep 'model name' /proc/cpuinfo | head -1 | cut -d: -f2 | xargs)"
    log "Cores: $(nproc)"
    log "Load Average: $(uptime | awk -F'load average:' '{print $2}')"
    log ""

    # Memory info
    log "--- Memory Information ---"
    free -h | tee -a "$REPORT_FILE"
    log ""

    # Disk info
    log "--- Disk Usage ---"
    df -h | grep -E '^/dev' | tee -a "$REPORT_FILE"
    log ""

    # Top processes
    log "--- Top 5 Processes by Memory ---"
    ps aux --sort=-%mem | head -6 | tee -a "$REPORT_FILE"
    log ""

    # Network connections
    log "--- Network Connections ---"
    log "Established: $(ss -tun | grep ESTAB | wc -l)"
    log "Listening: $(ss -tun | grep LISTEN | wc -l)"

    log ""
    log "Report saved to: $REPORT_FILE"
}

generate_report

Disk Space Alert

#!/usr/bin/env bash
# disk_alert.sh - Alert when disk space is low

set -euo pipefail

THRESHOLD="${1:-90}"

log() { echo "[$(date)] $*"; }

check_disk() {
    local usage
    usage=$(df / | awk 'NR==2 {print $5}' | tr -d '%')

    if [[ $usage -ge $THRESHOLD ]]; then
        log "ALERT: Disk usage at ${usage}%"

        # Show detailed info
        df -h

        # Find large directories
        log "Large directories:"
        du -ahx / 2>/dev/null | sort -rh | head -10

        return 1
    fi

    log "Disk usage OK: ${usage}%"
    return 0
}

check_disk

Network Administration Scripts

Network Connectivity Check

#!/usr/bin/env bash
# network_check.sh - Check network connectivity

set -euo pipefail

HOSTS=(
    "8.8.8.8"
    "google.com"
    "cloudflare.com"
)

log() { echo "[$(date)] $*"; }

check_ping() {
    local host="$1"

    if ping -c 1 -W 2 "$host" &>/dev/null; then
        log "✓ $host is reachable"
        return 0
    else
        log "✗ $host is NOT reachable"
        return 1
    fi
}

main() {
    local failed=0

    log "Checking network connectivity..."

    for host in "${HOSTS[@]}"; do
        check_ping "$host" || ((failed++))
    done

    if [[ $failed -gt 0 ]]; then
        log "WARNING: $failed host(s) unreachable"
        exit 1
    fi

    log "All hosts reachable"
}

main

Port Scanner

#!/usr/bin/env bash
# port_scan.sh - Simple port scanner

set -euo pipefail

HOST="${1:-localhost}"
START_PORT="${2:-1}"
END_PORT="${3:-1024}"

log() { echo "Scanning $HOST ports $START_PORT-$END_PORT..."; }

scan_ports() {
    for port in $(seq $START_PORT $END_PORT); do
        (echo >/dev/tcp/$HOST/$port) 2>/dev/null && \
            echo "Port $port: OPEN"
    done
}

log
scan_ports

Log Management Scripts

Log Rotation

#!/usr/bin/env bash
# rotate_logs.sh - Rotate application logs

set -euo pipefail

LOG_DIR="${LOG_DIR:-/var/log/myapp}"
MAX_DAYS="${MAX_DAYS:-30}"
MAX_SIZE="${MAX_SIZE:-100M}"

log() { echo "[$(date)] $*"; }

rotate_logs() {
    log "Starting log rotation"
    log "Directory: $LOG_DIR"
    log "Max days: $MAX_DAYS"

    # Find and compress old logs
    find "$LOG_DIR" -name "*.log" -type f -mtime +$MAX_DAYS -exec gzip {} \;

    # Remove very old compressed logs
    find "$LOG_DIR" -name "*.log.gz" -type f -mtime +$((MAX_DAYS * 2)) -delete

    # Compress large logs
    find "$LOG_DIR" -name "*.log" -type f -size +$MAX_SIZE -exec gzip {} \;

    log "Log rotation complete"
}

rotate_logs

Package Management Scripts

Update System Script

#!/usr/bin/env bash
# update_system.sh - Update Arch Linux system

set -euo pipefail

log() { echo "[$(date)] $*"; }

error() { echo "[$(date)] ERROR: $*" >&2; }

# Check for root
if [[ $EUID -ne 0 ]]; then
   error "This script must be run as root"
   exit 1
fi

log "Starting system update..."

# Sync databasescing databases..."
pac
log "Synman -Sy

# Upgrade system
log "Upgrading packages..."
pacman -Syu --noconfirm

# Clean up
log "Cleaning up..."
pacman -Scc --noconfirm

# Check for orphans
if pacman -Qdtq &>/dev/null; then
    log "Removing orphan packages..."
    pacman -Rns $(pacman -Qdtq) --noconfirm
fi

log "System update complete"

Backup and Sync Scripts

Directory Backup

#!/usr/bin/env bash
# backup_dir.sh - Backup directories

set -euo pipefail

SOURCE_DIR="${1:-}"
BACKUP_DIR="${2:-/backup}"

log() { echo "[$(date)] $*"; }

usage() {
    echo "Usage: $0 <source_directory> [backup_directory]"
    exit 1
}

[[ -z "$SOURCE_DIR" ]] && usage

# Create timestamped backup
timestamp=$(date +%Y%m%d_%H%M%S)
backup_name="$(basename "$SOURCE_DIR")_$timestamp.tar.gz"
backup_path="$BACKUP_DIR/$backup_name"

log "Creating backup: $backup_path"

mkdir -p "$BACKUP_DIR"

# Create compressed archive
tar -czf "$backup_path" -C "$(dirname "$SOURCE_DIR")" "$(basename "$SOURCE_DIR")"

# Show backup size
log "Backup size: $(du -h "$backup_path" | cut -f1)"

# Cleanup old backups (keep last 7)
cd "$BACKUP_DIR"
ls -t *.tar.gz | tail -n +8 | xargs -r rm

log "Backup complete: $backup_path"

Summary

In this chapter, you learned:

✅ User management scripts
✅ Service monitoring and management
✅ System resource monitoring
✅ Disk space alerts
✅ Network connectivity checks
✅ Port scanning
✅ Log rotation
✅ System updates
✅ Directory backups

Next Steps

Continue to the next chapter to learn about Backup and Restore Scripts.

Previous Chapter: Code Style and Organization Next Chapter: Backup and Restore Scripts