Skip to content

Group_by


Aggregation functions perform calculations on sets of rows and return a single result.

Aggregation Process
========================================================================
┌─────────────────────────────────────────────────────────────────────┐
│ GROUP BY Processing │
│ │
│ Original Data: │
│ ┌──────┬────────────┬───────┐ │
│ │ user │ category │ amount│ │
│ ├──────┼────────────┼───────┤ │
│ │ A │ Electronics│ 100 │ │
│ │ A │ Electronics│ 50 │────► GROUP BY user, category │
│ │ A │ Books │ 30 │────► Aggregates per group │
│ │ B │ Electronics│ 80 │────► │
│ │ B │ Books │ 40 │────► Results: │
│ └──────┴────────────┴───────┘ ┌──────┬────────────┬────────┐ │
│ │ user │ category │ SUM() │ │
│ ├──────┼────────────┼────────┤ │
│ │ A │ Electronics│ 150 │ │
│ │ A │ Books │ 30 │ │
│ │ B │ Electronics│ 80 │ │
│ │ B │ Books │ 40 │ │
│ └──────┴────────────┴────────┘ │
└─────────────────────────────────────────────────────────────────────┘

-- Sample data
CREATE TABLE sales (
id SERIAL,
product VARCHAR(100),
category VARCHAR(50),
amount DECIMAL(10,2),
sale_date DATE,
region VARCHAR(20)
);
INSERT INTO sales (product, category, amount, sale_date, region) VALUES
('Laptop', 'Electronics', 999.99, '2024-01-15', 'North'),
('Mouse', 'Electronics', 29.99, '2024-01-16', 'North'),
('Book', 'Books', 19.99, '2024-01-17', 'South'),
('Laptop', 'Electronics', 1199.99, '2024-01-18', 'South'),
('Shirt', 'Clothing', 39.99, '2024-01-19', 'North'),
('Book', 'Books', 24.99, '2024-01-20', 'North');
-- COUNT - count rows
SELECT COUNT(*) FROM sales; -- Total rows
SELECT COUNT(DISTINCT product) FROM sales; -- Unique products
-- SUM - total sum
SELECT SUM(amount) FROM sales;
SELECT SUM(amount) FROM sales WHERE category = 'Electronics';
-- AVG - average
SELECT AVG(amount) FROM sales;
SELECT category, AVG(amount) FROM sales GROUP BY category;
-- MIN and MAX
SELECT MIN(amount), MAX(amount) FROM sales;
SELECT
category,
MIN(amount) AS min_sale,
MAX(amount) AS max_sale,
AVG(amount) AS avg_sale
FROM sales
GROUP BY category;
-- Multiple aggregations
SELECT
COUNT(*) AS total_sales,
SUM(amount) AS total_revenue,
AVG(amount) AS avg_sale,
MIN(amount) AS min_sale,
MAX(amount) AS max_sale
FROM sales;

-- Basic GROUP BY
SELECT category, SUM(amount) FROM sales GROUP BY category;
-- GROUP BY multiple columns
SELECT category, region, SUM(amount)
FROM sales
GROUP BY category, region;
-- GROUP BY with multiple aggregations
SELECT
category,
region,
COUNT(*) AS transaction_count,
SUM(amount) AS total_sales,
AVG(amount) AS avg_sale,
MIN(amount) AS min_sale,
MAX(amount) AS max_sale
FROM sales
GROUP BY category, region
ORDER BY category, region;
-- GROUP BY with string functions
SELECT
EXTRACT(YEAR FROM sale_date) AS year,
EXTRACT(MONTH FROM sale_date) AS month,
SUM(amount) AS total_sales
FROM sales
GROUP BY EXTRACT(YEAR FROM sale_date), EXTRACT(MONTH FROM sale_date)
ORDER BY year, month;
-- GROUP BY with filtering before aggregation
SELECT region, SUM(amount)
FROM sales
WHERE sale_date >= '2024-01-18'
GROUP BY region;

The HAVING clause filters groups after aggregation.

-- HAVING vs WHERE
-- WHERE filters rows BEFORE aggregation
-- HAVING filters groups AFTER aggregation
-- Find categories with total sales > 100
SELECT
category,
SUM(amount) AS total
FROM sales
GROUP BY category
HAVING SUM(amount) > 100;
-- Find products sold more than once
SELECT
product,
COUNT(*) AS times_sold
FROM sales
GROUP BY product
HAVING COUNT(*) > 1;
-- Complex HAVING with multiple conditions
SELECT
region,
category,
SUM(amount) AS total,
COUNT(*) AS transactions
FROM sales
GROUP BY region, category
HAVING SUM(amount) > 50 AND COUNT(*) >= 2
ORDER BY total DESC;
-- HAVING with subquery
SELECT
category,
SUM(amount) AS total
FROM sales
GROUP BY category
HAVING SUM(amount) > (
SELECT AVG(total)
FROM (SELECT SUM(amount) AS total FROM sales GROUP BY category) sub
);

Generate multiple grouping levels in one query.

-- GROUP BY ROLLUP - hierarchical subtotals
SELECT
region,
category,
SUM(amount) AS total
FROM sales
GROUP BY ROLLUP (region, category);
-- Equivalent to:
-- GROUP BY region, category
-- UNION ALL
-- GROUP BY region
-- UNION ALL
-- GROUP BY () (grand total)
-- GROUP BY CUBE - all combinations
SELECT
region,
category,
SUM(amount) AS total
FROM sales
GROUP BY CUBE (region, category);
-- Equivalent to all combinations:
-- region, category
-- region, NULL
-- NULL, category
-- NULL, NULL (grand total)
-- GROUPING() function to identify grouped rows
SELECT
GROUPING(region) AS is_region_grouped,
GROUPING(category) AS is_category_grouped,
region,
category,
SUM(amount) AS total
FROM sales
GROUP BY ROLLUP (region, category);
-- 0 = grouped by this column
-- 1 = not grouped by this column (subtotal row)

Use FILTER to aggregate only certain rows.

-- Using FILTER instead of CASE
SELECT
category,
SUM(amount) AS total,
SUM(amount) FILTER (WHERE region = 'North') AS north_sales,
SUM(amount) FILTER (WHERE region = 'South') AS south_sales,
COUNT(*) AS total_transactions,
COUNT(*) FILTER (WHERE amount > 100) AS high_value_transactions
FROM sales
GROUP BY category;
-- Multiple FILTER conditions
SELECT
category,
SUM(amount) FILTER (WHERE sale_date >= '2024-01-18') AS recent_sales,
COUNT(*) FILTER (WHERE amount > 50) AS premium_sales
FROM sales
GROUP BY category;
-- FILTER with other aggregates
SELECT
region,
AVG(amount) AS avg_amount,
AVG(amount) FILTER (WHERE category = 'Electronics') AS avg_electronics,
AVG(amount) FILTER (WHERE category = 'Books') AS avg_books
FROM sales
GROUP BY region;

-- COUNT DISTINCT
SELECT
COUNT(*) AS total_rows,
COUNT(DISTINCT product) AS unique_products,
COUNT(DISTINCT category) AS unique_categories,
COUNT(DISTINCT region) AS unique_regions
FROM sales;
-- SUM DISTINCT
SELECT
SUM(DISTINCT amount) AS sum_unique_amounts
FROM sales;
-- Multiple distinct aggregates
SELECT
COUNT(DISTINCT product) AS products,
COUNT(DISTINCT region) AS regions,
COUNT(DISTINCT category) AS categories
FROM sales
WHERE amount > 20;
-- DISTINCT vs GROUP BY
-- These are equivalent:
SELECT category, SUM(amount) FROM sales GROUP BY category;
SELECT SUM(amount) FILTER (WHERE category = 'Books') FROM sales;

-- Aggregate of aggregates
SELECT
category,
SUM(amount) AS total,
AVG(SUM(amount)) OVER () AS grand_avg
FROM sales
GROUP BY category;
-- Running total with window function (for comparison)
SELECT
sale_date,
amount,
SUM(amount) OVER (ORDER BY sale_date) AS running_total,
SUM(amount) OVER () AS grand_total
FROM sales;
-- Percentage of total
SELECT
category,
SUM(amount) AS amount,
ROUND(
SUM(amount)::numeric / SUM(SUM(amount)) OVER() * 100,
2
) AS percentage
FROM sales
GROUP BY category
ORDER BY percentage DESC;
-- Aggregates in subqueries
SELECT
category,
(SELECT SUM(amount) FROM sales s2 WHERE s2.category = s1.category) AS total
FROM (SELECT DISTINCT category FROM sales) s1;

-- Monthly sales summary
SELECT
TO_CHAR(sale_date, 'YYYY-MM') AS month,
COUNT(*) AS transactions,
SUM(amount) AS revenue,
AVG(amount) AS avg_transaction,
MIN(amount) AS min_sale,
MAX(amount) AS max_sale
FROM sales
GROUP BY TO_CHAR(sale_date, 'YYYY-MM')
ORDER BY month;
-- Top products by category
SELECT
category,
product,
SUM(amount) AS total
FROM sales
GROUP BY category, product
QUALIFY ROW_NUMBER() OVER (PARTITION BY category ORDER BY SUM(amount) DESC) <= 2
ORDER BY category, total DESC;
-- Customer purchase analysis
WITH customer_sales AS (
SELECT
customer_id,
SUM(amount) AS total_spent,
COUNT(*) AS order_count,
MAX(created_at) AS last_order
FROM orders
GROUP BY customer_id
)
SELECT
CASE
WHEN total_spent > 1000 THEN 'VIP'
WHEN total_spent > 500 THEN 'Premium'
ELSE 'Regular'
END AS customer_tier,
COUNT(*) AS customers,
SUM(total_spent) AS revenue,
AVG(order_count) AS avg_orders
FROM customer_sales
GROUP BY
CASE
WHEN total_spent > 1000 THEN 'VIP'
WHEN total_spent > 500 THEN 'Premium'
ELSE 'Regular'
END;

Aggregation Best Practices
========================================================================
1. Use FILTER Instead of CASE
┌──────────────────────────────────────────────────────────────┐
│ ✓ SELECT SUM(amount) FILTER (WHERE status = 'completed') │
│ ✗ SELECT SUM(CASE WHEN status = 'completed' THEN amount) │
└──────────────────────────────────────────────────────────────┘
2. Use GROUP BY ROLLUP for Hierarchical Summaries
┌──────────────────────────────────────────────────────────────┐
│ • Generate subtotals automatically │
│ • One query instead of multiple UNION ALL │
└──────────────────────────────────────────────────────────────┘
3. Always Include Non-Aggregated Columns in GROUP BY
┌──────────────────────────────────────────────────────────────┐
│ • PostgreSQL requires this │
│ • Ensures correct grouping │
└──────────────────────────────────────────────────────────────┘
4. Filter Before Grouping When Possible
┌──────────────────────────────────────────────────────────────┐
│ • Use WHERE to filter rows before aggregation │
│ • More efficient than HAVING │
└──────────────────────────────────────────────────────────────┘
5. Use COALESCE for NULL Handling
┌──────────────────────────────────────────────────────────────┐
│ • COALESCE(SUM(amount), 0) prevents NULL in results │
└──────────────────────────────────────────────────────────────┘
========================================================================

Chapter 20: Window Functions


Last Updated: February 2026