Skip to content

Correlated_subqueries


A correlated subquery is a subquery that references columns from the outer query. Unlike regular subqueries, correlated subqueries are executed once for each row processed by the outer query.

Correlated Subquery Flow
========================================================================
┌─────────────────────────────────────────────────────────────────────┐
│ Correlated Subquery Processing │
│ │
│ Outer Query │
│ ┌─────────────────────────────────────┐ │
│ │ SELECT * FROM employees │ │
│ │ WHERE salary > (SELECT AVG(salary) │ ──┐ │
│ │ FROM employees │ │ Subquery references │
│ │ WHERE department = │ │ outer query's │
│ │ employees.department) │ ──┘ department column │
│ └─────────────────────────────────────┘ │
│ │
│ Execution Flow: │
│ ============== │
│ │
│ 1. Fetch first row from employees │
│ → department = 'Engineering', salary = 95000 │
│ 2. Run subquery with department = 'Engineering' │
│ → AVG(salary) for Engineering = 92500 │
│ 3. Compare: 95000 > 92500? → YES → Include row │
│ │
│ 4. Fetch second row from employees │
│ → department = 'Sales', salary = 85000 │
│ 5. Run subquery with department = 'Sales' │
│ → AVG(salary) for Sales = 97500 │
│ 6. Compare: 85000 > 97500? → NO → Exclude row │
│ │
│ ... repeats for each row ... │
│ │
└─────────────────────────────────────────────────────────────────────┘
-- Create sample tables
CREATE TABLE employees (
employee_id SERIAL PRIMARY KEY,
first_name VARCHAR(50),
last_name VARCHAR(50),
department VARCHAR(50),
salary DECIMAL(10,2),
hire_date DATE
);
CREATE TABLE departments (
department_id SERIAL PRIMARY KEY,
department_name VARCHAR(50),
budget DECIMAL(12,2)
);
INSERT INTO employees (first_name, last_name, department, salary, hire_date) VALUES
('Alice', 'Johnson', 'Engineering', 95000, '2020-01-15'),
('Bob', 'Smith', 'Engineering', 90000, '2021-03-20'),
('Charlie', 'Brown', 'Sales', 85000, '2019-07-10'),
('Diana', 'Wilson', 'Sales', 110000, '2018-05-01'),
('Eve', 'Davis', 'Engineering', 92000, '2022-02-14');
-- Find employees earning more than their department's average
SELECT
employee_id,
first_name || ' ' || last_name as name,
department,
salary,
(
SELECT AVG(salary)
FROM employees e2
WHERE e2.department = e1.department
) as dept_avg_salary
FROM employees e1
WHERE salary > (
SELECT AVG(salary)
FROM employees e2
WHERE e2.department = e1.department
)
ORDER BY department, salary DESC;

EXISTS checks if any rows satisfy the condition - very powerful with correlated subqueries.

-- Find departments that have employees earning over 100000
SELECT DISTINCT department
FROM employees e1
WHERE EXISTS (
SELECT 1
FROM employees e2
WHERE e2.department = e1.department
AND e2.salary > 100000
);
-- Find customers who have placed at least one order
CREATE TABLE customers (
customer_id SERIAL PRIMARY KEY,
name VARCHAR(100),
email VARCHAR(255)
);
CREATE TABLE orders (
order_id SERIAL PRIMARY KEY,
customer_id INTEGER REFERENCES customers(customer_id),
total DECIMAL(10,2),
order_date DATE
);
INSERT INTO customers (name, email) VALUES
('Acme Corp', 'acme@example.com'),
('Global Inc', 'global@example.com'),
('Small Biz', 'small@example.com');
INSERT INTO orders (customer_id, total) VALUES
(1, 500), (1, 750), (2, 1000);
-- Customers with orders (using EXISTS)
SELECT c.*
FROM customers c
WHERE EXISTS (
SELECT 1 FROM orders o WHERE o.customer_id = c.customer_id
);
-- Customers without orders (using NOT EXISTS)
SELECT c.*
FROM customers c
WHERE NOT EXISTS (
SELECT 1 FROM orders o WHERE o.customer_id = c.customer_id
);

Use correlated subqueries to calculate per-row metrics.

-- Calculate rank within department
SELECT
employee_id,
first_name || ' ' || last_name as name,
department,
salary,
(
SELECT COUNT(*) + 1
FROM employees e2
WHERE e2.department = e1.department
AND e2.salary > e1.salary
) as dept_rank
FROM employees e1
ORDER BY department, dept_rank;
-- Calculate percentage of department total
SELECT
employee_id,
name,
department,
salary,
(
SELECT SUM(salary)
FROM employees e2
WHERE e2.department = e1.department
) as dept_total,
ROUND(
salary * 100.0 / (
SELECT SUM(salary)
FROM employees e2
WHERE e2.department = e1.department
), 2
) as pct_of_dept
FROM employees e1;
-- Compare to company average
SELECT
first_name,
last_name,
salary,
salary - (SELECT AVG(salary) FROM employees) as vs_company_avg
FROM employees;

16.4 Correlated Subqueries in FROM (Derived Tables)

Section titled “16.4 Correlated Subqueries in FROM (Derived Tables)”
-- Using correlated subquery result as derived table
SELECT
dept_stats.department,
dept_stats.avg_salary,
dept_stats.max_salary,
e.first_name || ' ' || e.last_name as highest_paid
FROM employees e
INNER JOIN (
SELECT
department,
AVG(salary) as avg_salary,
MAX(salary) as max_salary,
COUNT(*) as emp_count
FROM employees
GROUP BY department
) dept_stats ON e.department = dept_stats.department
WHERE e.salary = dept_stats.max_salary;

16.5 Comparison Operators with Correlated Subqueries

Section titled “16.5 Comparison Operators with Correlated Subqueries”
-- Find employees in departments with high average salary
SELECT *
FROM employees
WHERE department IN (
SELECT department
FROM employees
GROUP BY department
HAVING AVG(salary) > 90000
);
-- Find employees whose salary is higher than ANY manager's salary
-- (technically, this compares to minimum manager salary)
SELECT *
FROM employees
WHERE salary > ANY (
SELECT salary
FROM employees
WHERE manager_id IS NOT NULL
);
-- Find employees whose salary is higher than ALL managers
-- (this compares to maximum manager salary)
SELECT *
FROM employees
WHERE salary > ALL (
SELECT salary
FROM employees
WHERE manager_id IS NOT NULL
);

16.6 UPDATE and DELETE with Correlated Subqueries

Section titled “16.6 UPDATE and DELETE with Correlated Subqueries”
-- Update using correlated subquery
CREATE TABLE employee_stats (
emp_id INTEGER,
department VARCHAR(50),
salary DECIMAL(10,2),
dept_avg DECIMAL(10,2)
);
INSERT INTO employee_stats (emp_id, department, salary)
SELECT employee_id, department, salary FROM employees;
-- Add department average to each row
UPDATE employee_stats es
SET dept_avg = (
SELECT AVG(salary)
FROM employees e
WHERE e.department = es.department
);
-- Delete rows using correlated subquery
CREATE TABLE orders_archive AS SELECT * FROM orders;
-- Delete orders for customers with no recent activity
DELETE FROM orders_archive oa
WHERE NOT EXISTS (
SELECT 1
FROM orders o2
WHERE o2.customer_id = oa.customer_id
AND o2.order_date > oa.order_date - INTERVAL '30 days'
);

Correlated Subquery Performance
========================================================================
⚠️ Important: Correlated subqueries can be expensive!
Why They Can Be Slow:
─────────────────────
• Executed once for each outer row (N+1 problem)
• No index on correlated columns = full table scans
• Complex correlation can prevent optimization
Optimization Strategies:
────────────────────────
✓ Use JOINs instead when possible
✓ Ensure indexes on correlated columns
✓ Consider using LATERAL joins (PostgreSQL 9.3+)
✓ Use window functions for ranking/aggregates
✓ Pre-compute aggregations in a CTE or temp table
When Correlated Subqueries Excel:
──────────────────────────────────
• Checking existence (EXISTS/NOT EXISTS)
• Per-row calculations that can't be aggregated
• Complex conditional logic per row
-- Correlated subquery (can be slow)
SELECT e.*
FROM employees e
WHERE salary > (
SELECT AVG(salary)
FROM employees e2
WHERE e2.department = e.department
);
-- Equivalent JOIN (usually faster)
SELECT e.*
FROM employees e
INNER JOIN (
SELECT department, AVG(salary) as avg_sal
FROM employees
GROUP BY department
) dept_avg ON e.department = dept_avg.department
WHERE e.salary > dept_avg.avg_sal;

-- Create table with sequence gaps
CREATE TABLE invoice_numbers (
invoice_id SERIAL PRIMARY KEY,
invoice_number INTEGER,
created_at DATE DEFAULT CURRENT_DATE
);
INSERT INTO invoice_numbers (invoice_number) VALUES
(1001), (1002), (1003), (1005), (1006), (1009), (1010);
-- Find missing invoice numbers
SELECT
(invoice_number + 1) as missing_from,
(
SELECT MIN(invoice_number) - 1
FROM invoice_numbers i2
WHERE i2.invoice_number > i1.invoice_number
) as missing_to
FROM invoice_numbers i1
WHERE NOT EXISTS (
SELECT 1
FROM invoice_numbers i2
WHERE i2.invoice_number = i1.invoice_number + 1
)
AND invoice_number < (
SELECT MAX(invoice_number) FROM invoice_numbers
);
-- Calculate running total using correlated subquery
SELECT
employee_id,
salary,
(
SELECT SUM(salary)
FROM employees e2
WHERE e2.employee_id <= e1.employee_id
) as running_total
FROM employees e1
ORDER BY employee_id;

FeatureDescription
CorrelationSubquery references outer query columns
ExecutionRuns once per outer row
EXISTSChecks for row existence
PerformanceCan be slow; consider JOINs

Next: Chapter 17: Common Table Expressions (CTEs)