Skip to content

Subqueries


A subquery is a query nested inside another SQL statement. They provide powerful ways to perform complex data operations.

Subquery Types
========================================================================
┌─────────────────────────────────────────────────────────────────────┐
│ Subquery Types │
│ │
│ ┌──────────────────┐ ┌──────────────────┐ │
│ │ Scalar │ │ Table │ │
│ │ Subquery │ │ Subquery │ │
│ │ │ │ │ │
│ │ Returns single │ │ Returns multiple │ │
│ │ value │ │ rows/columns │ │
│ │ │ │ (used as table) │ │
│ └──────────────────┘ └──────────────────┘ │
│ │
│ ┌──────────────────┐ ┌──────────────────┐ │
│ │ Correlated │ │ EXISTS │ │
│ │ Subquery │ │ Subquery │ │
│ │ │ │ │ │
│ │ References outer │ │ Checks for row │ │
│ │ query columns │ │ existence │ │
│ └──────────────────┘ └──────────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────────┘
Where Subqueries Can Be Used:
============================
• SELECT clause (scalar subquery)
• FROM clause (derived table)
• WHERE clause (scalar or IN/ANY/ALL)
• HAVING clause
• INSERT/UPDATE/DELETE VALUES
• CREATE TABLE AS

Return a single value, used in expressions.

-- Sample tables
CREATE TABLE orders (
id SERIAL PRIMARY KEY,
user_id INTEGER,
total DECIMAL(10,2),
status VARCHAR(20),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE users (
id SERIAL PRIMARY KEY,
username VARCHAR(50),
email VARCHAR(255)
);
INSERT INTO users (username, email) VALUES
('john', 'john@example.com'),
('jane', 'jane@example.com'),
('bob', 'bob@example.com');
INSERT INTO orders (user_id, total, status) VALUES
(1, 150.00, 'completed'),
(1, 75.00, 'pending'),
(2, 200.00, 'completed'),
(2, 50.00, 'completed'),
(3, 100.00, 'cancelled');
-- Scalar subquery in SELECT
SELECT
username,
(SELECT COUNT(*) FROM orders WHERE user_id = users.id) AS order_count
FROM users;
-- Scalar subquery for comparison
SELECT
username,
total AS order_total,
(SELECT AVG(total) FROM orders) AS avg_order
FROM users u
JOIN orders o ON u.id = o.user_id;
-- Scalar subquery in WHERE
SELECT * FROM orders
WHERE total > (SELECT AVG(total) FROM orders);
-- Multiple scalar subqueries
SELECT
u.username,
(SELECT MAX(total) FROM orders WHERE user_id = u.id) AS max_order,
(SELECT MIN(total) FROM orders WHERE user_id = u.id) AS min_order,
(SELECT COUNT(*) FROM orders WHERE user_id = u.id) AS order_count
FROM users u;

Return a result set used as a temporary table.

-- Basic derived table
SELECT
sub.category,
sub.order_count,
sub.total
FROM (
SELECT
u.username AS category,
COUNT(o.id) AS order_count,
SUM(o.total) AS total
FROM users u
JOIN orders o ON u.id = o.user_id
GROUP BY u.username
) sub
WHERE sub.order_count > 1;
-- Derived table with multiple columns
SELECT
category,
MAX(order_count) as max_orders,
AVG(total) as avg_total
FROM (
SELECT
u.username AS category,
u.email,
COUNT(o.id) AS order_count,
AVG(o.total) AS total
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.username, u.email
) sub
GROUP BY category;
-- Complex derived table
SELECT
product_category,
product_count,
total_value,
CASE
WHEN product_count > 5 THEN 'High Volume'
WHEN product_count > 2 THEN 'Medium Volume'
ELSE 'Low Volume'
END AS category_type
FROM (
SELECT
'Products' AS product_category,
COUNT(*) AS product_count,
SUM(price) AS total_value
FROM products
WHERE price > 50
) sub;

Filter rows based on membership in a result set.

-- IN with subquery
SELECT * FROM orders
WHERE user_id IN (
SELECT id FROM users WHERE username IN ('john', 'jane')
);
-- NOT IN with subquery
SELECT * FROM orders
WHERE user_id NOT IN (
SELECT id FROM users WHERE username = 'bob'
);
-- IN with multiple columns
SELECT * FROM orders
WHERE (user_id, status) IN (
SELECT id, 'completed' FROM users WHERE email LIKE '%@example.com'
);
-- IN with correlated subquery
SELECT * FROM users u
WHERE id IN (
SELECT user_id FROM orders WHERE total > 100
);
-- Performance consideration: IN vs EXISTS
-- Both are often equivalent in PostgreSQL, but test with EXPLAIN

Compare to any or all values returned by subquery.

-- ANY: TRUE if comparison is TRUE for ANY value
SELECT * FROM orders
WHERE total > ANY (
SELECT total FROM orders WHERE user_id = 1
);
-- Same as:
SELECT * FROM orders
WHERE total > (
SELECT MIN(total) FROM orders WHERE user_id = 1
);
-- ALL: TRUE if comparison is TRUE for ALL values
SELECT * FROM orders
WHERE total > ALL (
SELECT total FROM orders WHERE user_id = 1
);
-- Same as:
SELECT * FROM orders
WHERE total > (
SELECT MAX(total) FROM orders WHERE user_id = 1
);
-- Practical example: Products more expensive than any Electronics
SELECT * FROM products
WHERE price > ANY (
SELECT price FROM products WHERE category = 'Electronics'
);
-- Products more expensive than all Electronics products
SELECT * FROM products
WHERE price > ALL (
SELECT price FROM products WHERE category = 'Electronics'
);

Check if subquery returns any rows.

-- EXISTS: Find users with orders
SELECT * FROM users u
WHERE EXISTS (
SELECT 1 FROM orders WHERE user_id = u.id
);
-- NOT EXISTS: Find users without orders
SELECT * FROM users u
WHERE NOT EXISTS (
SELECT 1 FROM orders WHERE user_id = u.id
);
-- EXISTS with multiple conditions
SELECT * FROM users u
WHERE EXISTS (
SELECT 1 FROM orders
WHERE user_id = u.id
AND status = 'completed'
);
-- NOT EXISTS for finding "latest" records
SELECT * FROM orders o1
WHERE NOT EXISTS (
SELECT 1 FROM orders o2
WHERE o2.user_id = o1.user_id
AND o2.created_at > o1.created_at
);
-- EXISTS vs IN: When to use each
-- EXISTS: Better when checking existence (often faster)
-- IN: Better when comparing to small, static list

-- Nested derived tables
SELECT
category,
COUNT(*) AS count,
SUM(total) AS total
FROM (
SELECT
u.username AS category,
o.total
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
) sub
GROUP BY category;
-- Multiple derived tables
SELECT
sub1.category,
sub1.total AS completed_total,
sub2.total AS cancelled_total
FROM (
SELECT
u.username AS category,
SUM(o.total) AS total
FROM users u
JOIN orders o ON u.id = o.user_id
WHERE o.status = 'completed'
GROUP BY u.username
) sub1
JOIN (
SELECT
u.username AS category,
SUM(o.total) AS total
FROM users u
JOIN orders o ON u.id = o.user_id
WHERE o.status = 'cancelled'
GROUP BY u.username
) sub2 ON sub1.category = sub2.category;
-- LATERAL subquery (PostgreSQL 9.3+)
-- Allows subquery to reference columns from preceding tables
SELECT
u.username,
o.*
FROM users u
LEFT JOIN LATERAL (
SELECT * FROM orders
WHERE user_id = u.id
ORDER BY created_at DESC
LIMIT 3
) o ON TRUE;

-- Subquery in WHERE
SELECT * FROM products
WHERE category_id = (
SELECT id FROM categories WHERE name = 'Electronics'
);
-- Subquery in HAVING
SELECT
user_id,
COUNT(*) AS order_count
FROM orders
GROUP BY user_id
HAVING COUNT(*) > (
SELECT AVG(order_count)
FROM (SELECT COUNT(*) AS order_count FROM orders GROUP BY user_id) sub
);
-- Subquery with aggregate in HAVING
SELECT
status,
AVG(total) AS avg_total
FROM orders
GROUP BY status
HAVING AVG(total) > (
SELECT AVG(total) FROM orders WHERE status = 'pending'
);
-- Complex HAVING with subquery
SELECT
u.username,
COUNT(o.id) AS total_orders,
SUM(o.total) AS total_spent
FROM users u
JOIN orders o ON u.id = o.user_id
GROUP BY u.username
HAVING SUM(o.total) > (
SELECT AVG(total) FROM orders WHERE status = 'completed'
);

-- INSERT with subquery
INSERT INTO order_stats (user_id, total_orders, total_spent)
SELECT
user_id,
COUNT(*) AS total_orders,
SUM(total) AS total_spent
FROM orders
WHERE status = 'completed'
GROUP BY user_id;
-- UPDATE with subquery
UPDATE products p SET
price = price * 1.1
WHERE p.category_id IN (
SELECT id FROM categories WHERE name IN ('Electronics', 'Toys')
);
-- DELETE with subquery
DELETE FROM orders
WHERE user_id IN (
SELECT id FROM users WHERE username = 'inactive_user'
);
-- UPDATE with correlated subquery
UPDATE products p SET
price = (
SELECT AVG(price) FROM products WHERE category_id = p.category_id
)
WHERE p.price > (
SELECT AVG(price) * 1.5 FROM products WHERE category_id = p.category_id
);

-- Always EXPLAIN your queries with subqueries
EXPLAIN ANALYZE
SELECT * FROM orders
WHERE user_id IN (SELECT id FROM users WHERE is_active = TRUE);
-- Convert IN to EXISTS often better
EXPLAIN ANALYZE
SELECT * FROM orders o
WHERE EXISTS (SELECT 1 FROM users u WHERE u.id = o.user_id AND u.is_active);
-- Use LATERAL for better performance in some cases
EXPLAIN ANALYZE
SELECT u.username, o.*
FROM users u
LEFT JOIN LATERAL (
SELECT * FROM orders WHERE user_id = u.id LIMIT 1
) o ON TRUE;
-- Consider materialization
-- PostgreSQL may materialize subqueries for performance
-- Check query plan for materialize nodes

Subquery Best Practices
========================================================================
1. Use EXISTS for Existence Checks
┌──────────────────────────────────────────────────────────────┐
│ ✓ SELECT * FROM users u │
│ WHERE EXISTS (SELECT 1 FROM orders o WHERE o.user_id = u│
│ │
│ ✗ SELECT * FROM users u │
│ WHERE u.id IN (SELECT user_id FROM orders) │
└──────────────────────────────────────────────────────────────┘
2. Avoid Unnecessary Nested Subqueries
┌──────────────────────────────────────────────────────────────┐
│ ✗ Complex nested subqueries can be slow │
│ ✓ Use CTEs (WITH clause) for readability │
└──────────────────────────────────────────────────────────────┘
3. Index Subquery Conditions
┌──────────────────────────────────────────────────────────────┐
│ • Add indexes on columns used in subquery WHERE clauses │
│ • This dramatically improves performance │
└──────────────────────────────────────────────────────────────┘
4. Consider JOIN Instead of Subquery
┌──────────────────────────────────────────────────────────────┐
│ • Sometimes JOIN is more efficient than subquery │
│ • Test with EXPLAIN ANALYZE │
└──────────────────────────────────────────────────────────────┘
5. Use LATERAL for Complex Requirements
┌──────────────────────────────────────────────────────────────┐
│ • LATERAL allows subqueries to reference outer query cols │
│ • Useful for top-N per group and similar patterns │
└──────────────────────────────────────────────────────────────┘
========================================================================

Chapter 17: Common Table Expressions (CTEs)


Last Updated: February 2026