Skip to content

Self_cross_joins


A Self JOIN occurs when a table is joined to itself. This is useful for comparing rows within the same table or representing hierarchical data.

Self JOIN Concept
========================================================================
┌─────────────────────────────────────────────────────────────────────┐
│ Same Table, Multiple Aliases │
│ │
│ employees table │
│ ┌─────────────┬──────────────┬────────────┐ │
│ │ employee_id │ name │ manager_id │ │
│ ├─────────────┼──────────────┼────────────┤ │
│ │ 1 │ Alice (CEO) │ NULL │ │
│ │ 2 │ Bob (Manager) │ 1 │ │
│ │ 3 │ Charlie (Dev)│ 2 │ │
│ │ 4 │ Diana (Dev) │ 2 │ │
│ └─────────────┴──────────────┴────────────┘ │
│ │
│ Self JOIN: employee e1 JOIN employees e2 │
│ on e1.manager_id = e2.employee_id │
│ │
│ Result: │
│ ┌────────────┬───────────┬─────────────┐ │
│ │ Employee │ Manager │ Manager ID │ │
│ ├────────────┼───────────┼─────────────┤ │
│ │ Bob │ Alice │ 1 │ │
│ │ Charlie │ Bob │ 2 │ │
│ │ Diana │ Bob │ 2 │ │
│ └────────────┴───────────┴─────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────────┘
-- Create employee table for self-join examples
CREATE TABLE employees (
employee_id SERIAL PRIMARY KEY,
first_name VARCHAR(50),
last_name VARCHAR(50),
email VARCHAR(255),
manager_id INTEGER REFERENCES employees(employee_id),
department VARCHAR(50),
salary DECIMAL(10,2)
);
-- Insert sample data with hierarchical structure
INSERT INTO employees (first_name, last_name, email, manager_id, department, salary) VALUES
('Alice', 'Johnson', 'alice@company.com', NULL, 'Executive', 150000),
('Bob', 'Smith', 'bob@company.com', 1, 'Engineering', 120000),
('Charlie', 'Brown', 'charlie@company.com', 1, 'Sales', 110000),
('Diana', 'Wilson', 'diana@company.com', 2, 'Engineering', 95000),
('Eve', 'Davis', 'eve@company.com', 2, 'Engineering', 90000),
('Frank', 'Miller', 'frank@company.com', 3, 'Sales', 85000);
-- Self JOIN: Find employees and their managers
SELECT
e.employee_id,
e.first_name || ' ' || e.last_name as employee_name,
m.first_name || ' ' || m.last_name as manager_name,
e.department
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.employee_id
ORDER BY e.employee_id;

Self JOINs are essential for working with hierarchical/tree-structured data.

-- Find all direct reports of a manager
SELECT
m.first_name || ' ' || m.last_name as manager,
e.first_name || ' ' || e.last_name as direct_report
FROM employees e
INNER JOIN employees m ON e.manager_id = m.employee_id
WHERE m.employee_id = 2; -- Bob's direct reports
-- Find the management chain (path to CEO)
WITH RECURSIVE management_chain AS (
-- Base case: top-level employee
SELECT
employee_id,
first_name || ' ' || last_name as name,
manager_id,
1 as level,
ARRAY[first_name || ' ' || last_name] as path
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive case: employees with managers
SELECT
e.employee_id,
e.first_name || ' ' || e.last_name,
e.manager_id,
mc.level + 1,
mc.path || (e.first_name || ' ' || e.last_name)
FROM employees e
INNER JOIN management_chain mc ON e.manager_id = mc.employee_id
)
SELECT * FROM management_chain
ORDER BY level, employee_id;
-- Find employees at each level
SELECT level, COUNT(*) as count
FROM management_chain
GROUP BY level
ORDER BY level;

Self JOINs allow comparing rows within the same table.

-- Find employees in the same department
SELECT
e1.first_name || ' ' || e1.last_name as employee1,
e2.first_name || ' ' || e2.last_name as employee2,
e1.department
FROM employees e1
INNER JOIN employees e2
ON e1.department = e2.department
AND e1.employee_id < e2.employee_id -- Avoid duplicates
ORDER BY e1.department, employee1;
-- Find salary comparisons within department
SELECT
e1.first_name || ' ' || e1.last_name as employee1,
e1.salary as salary1,
e2.first_name || ' ' || e2.last_name as employee2,
e2.salary as salary2,
e1.department,
e1.salary - e2.salary as salary_diff
FROM employees e1
INNER JOIN employees e2
ON e1.department = e2.department
AND e1.employee_id < e2.employee_id
ORDER BY e1.department, salary_diff DESC;
-- Find highest paid employee in each department
SELECT e1.*
FROM employees e1
WHERE NOT EXISTS (
SELECT 1 FROM employees e2
WHERE e2.department = e1.department
AND e2.salary > e1.salary
);

CROSS JOIN produces the Cartesian product of two tables - every row from the first table is combined with every row from the second table.

CROSS JOIN Flow
========================================================================
┌─────────────────┐ ┌─────────────────┐
│ Table A │ │ Table B │
│ (sizes) │ │ (colors) │
│ │ │ │
│ ┌─────────────┐ │ │ ┌─────────────┐ │
│ │ size │ │ │ │ color │ │
│ ├─────────────┤ │ │ ├─────────────┤ │
│ │ S │ │ │ │ Red │ │
│ │ M │ │ │ │ Blue │ │
│ │ L │ │ │ │ Green │ │
│ └─────────────┘ │ │ └─────────────┘ │
└────────┬────────┘ └────────┬────────┘
│ │
└───────────┬───────────────┘
┌─────────────────────────────────────────────────────────┐
│ CROSS JOIN Result │
│ (3 sizes × 3 colors = 9 combinations) │
│ │
│ size │ color │
│ ─────┼──────── │
│ S │ Red │
│ S │ Blue │
│ S │ Green │
│ M │ Red │
│ M │ Blue │
│ M │ Green │
│ L │ Red │
│ L │ Blue │
│ L │ Green │
│ │
└─────────────────────────────────────────────────────────┘
-- Create tables for CROSS JOIN demonstration
CREATE TABLE sizes (
size_id SERIAL PRIMARY KEY,
size_name VARCHAR(10)
);
CREATE TABLE colors (
color_id SERIAL PRIMARY KEY,
color_name VARCHAR(20)
);
CREATE TABLE products (
product_id SERIAL PRIMARY KEY,
product_name VARCHAR(100)
);
INSERT INTO sizes (size_name) VALUES ('S'), ('M'), ('L'), ('XL');
INSERT INTO colors (color_name) VALUES ('Red'), ('Blue'), ('Green'), ('Black');
-- Implicit CROSS JOIN (comma-separated)
SELECT s.size_name, c.color_name
FROM sizes s, colors c;
-- Explicit CROSS JOIN
SELECT s.size_name, c.color_name
FROM sizes s
CROSS JOIN colors c;
-- Generate all possible size/color combinations
SELECT
s.size_name || '-' || c.color_name as variant,
s.size_name,
c.color_name
FROM sizes s
CROSS JOIN colors c;

-- Generate date ranges
SELECT
generate_series(
'2024-01-01'::date,
'2024-01-31'::date,
'1 day'::interval
) as date;
-- Generate all combinations for reporting
CREATE TABLE months (
month_num INTEGER,
month_name VARCHAR(20)
);
CREATE TABLE years (
year_num INTEGER
);
INSERT INTO months VALUES
(1, 'January'), (2, 'February'), (3, 'March'),
(4, 'April'), (5, 'May'), (6, 'June'),
(7, 'July'), (8, 'August'), (9, 'September'),
(10, 'October'), (11, 'November'), (12, 'December');
INSERT INTO years VALUES (2022), (2023), (2024);
-- Generate calendar matrix
SELECT y.year_num, m.month_num, m.month_name
FROM years y
CROSS JOIN months m
ORDER BY y.year_num, m.month_num;
-- Create sales data
CREATE TABLE sales (
sale_id SERIAL PRIMARY KEY,
product_name VARCHAR(100),
quarter INTEGER,
amount DECIMAL(10,2)
);
INSERT INTO sales (product_name, quarter, amount) VALUES
('Product A', 1, 1000), ('Product A', 2, 1500),
('Product B', 1, 800), ('Product B', 3, 1200);
-- Create quarter reference
CREATE TABLE quarters (quarter INTEGER);
INSERT INTO quarters VALUES (1), (2), (3), (4);
-- Show all products with all quarters (including zeros)
SELECT
s.product_name,
q.quarter,
COALESCE(sale.amount, 0) as sales_amount
FROM (SELECT DISTINCT product_name FROM sales) s
CROSS JOIN quarters q
LEFT JOIN sales sale
ON s.product_name = sale.product_name
AND q.quarter = sale.quarter
ORDER BY s.product_name, q.quarter;

-- Create temperature readings table
CREATE TABLE temperature_readings (
reading_id SERIAL PRIMARY KEY,
reading_date DATE,
temperature DECIMAL(5,2)
);
INSERT INTO temperature_readings (reading_date, temperature) VALUES
('2024-01-01', 20.5), ('2024-01-02', 21.0),
('2024-01-03', 19.5), ('2024-01-04', 22.0),
('2024-01-05', 23.5);
-- Compare each day's temperature with previous day
SELECT
current.reading_date as current_date,
current.temperature as current_temp,
previous.temperature as previous_temp,
current.temperature - previous.temperature as change
FROM temperature_readings current
LEFT JOIN temperature_readings previous
ON previous.reading_id = current.reading_id - 1
ORDER BY current.reading_date;
-- Running total using self-join
SELECT
t1.reading_id,
t1.temperature,
SUM(t2.temperature) as running_total
FROM temperature_readings t1
INNER JOIN temperature_readings t2
ON t2.reading_id <= t1.reading_id
GROUP BY t1.reading_id, t1.temperature
ORDER BY t1.reading_id;

Self JOIN & CROSS JOIN Performance
========================================================================
⚠️ WARNING: Both can produce large result sets!
Self JOIN Tips:
─────────────────
✓ Always use aliases to distinguish columns
✓ Add conditions to avoid duplicate pairs (e.g., a.id < b.id)
✓ Ensure indexes exist on join columns
✓ Consider using CTEs for complex hierarchies
CROSS JOIN Tips:
─────────────────
✓ Be careful - can explode data size!
✓ Use LIMIT if testing
✓ Consider if you really need all combinations
✓ Use WHERE clause to filter after cross join

-- Build complete org chart with levels
WITH RECURSIVE org_chart AS (
SELECT
employee_id,
first_name || ' ' || last_name as name,
manager_id,
department,
1 as level,
first_name || ' ' || last_name as path
FROM employees
WHERE manager_id IS NULL
UNION ALL
SELECT
e.employee_id,
e.first_name || ' ' || e.last_name,
e.manager_id,
e.department,
oc.level + 1,
oc.path || ' -> ' || e.first_name || ' ' || e.last_name
FROM employees e
INNER JOIN org_chart oc ON e.manager_id = oc.employee_id
)
SELECT
level,
REPEAT(' ', level - 1) || name as formatted_name,
department,
path
FROM org_chart
ORDER BY level, employee_id;

Join TypeUse Case
Self JOINHierarchical data, comparing rows in same table
CROSS JOINGenerate combinations, test data, matrix reports

Next: Chapter 15: Subqueries