Skip to content

Indexes


Indexes are data structures that improve the speed of data retrieval operations on database tables.

How Indexes Work
========================================================================
Without Index (Full Table Scan):
================================
Table: users (1 million rows)
SELECT * FROM users WHERE email = 'john@example.com';
┌─────┬──────────┬─────────────────────┐
│ id │ name │ email │
├─────┼──────────┼─────────────────────┤
│ 1 │ Alice │ alice@example.com │ ← Check
│ 2 │ Bob │ bob@example.com │ ← Check
│ ... │ ... │ ... │ ← Check
│99999│ John │ john@example.com │ ← Found!
└─────┴──────────┴─────────────────────┘
→ Must check every row in worst case
→ O(n) complexity
With Index:
==========
Index: idx_users_email (B-tree)
┌────────────────────────┐
│ B-Tree Structure │
└────────────────────────┘
/ \
... ...
/ \ / \
(a-d) (e-h) (i-l) (m-z)
| | | |
┌─────────┐ ┌─────────┐
│email │ │email │
│pointer │ │pointer │
└─────────┘ └─────────┘
SELECT * FROM users WHERE email = 'john@example.com';
→ Navigate tree directly to email
→ O(log n) complexity
→ Very fast lookup

-- Sample table
CREATE TABLE users (
id SERIAL PRIMARY KEY,
username VARCHAR(50) NOT NULL,
email VARCHAR(255) NOT NULL,
phone VARCHAR(20),
status VARCHAR(20) DEFAULT 'active',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Basic B-tree index (default)
CREATE INDEX idx_users_email ON users(email);
-- Index with specific index type
CREATE INDEX idx_users_email_hash ON users USING HASH (email);
-- Composite index (multiple columns)
CREATE INDEX idx_users_status_created ON users(status, created_at DESC);
-- Unique index
CREATE UNIQUE INDEX idx_users_username ON users(username);
-- Index with WHERE clause (partial index)
CREATE INDEX idx_users_active ON users(email) WHERE status = 'active';
-- Index on expression
CREATE INDEX idx_users_lower_email ON users(LOWER(email));
-- Index on JSON field
CREATE INDEX idx_users_data ON users USING GIN(data);
-- Index with custom collation
CREATE INDEX idx_users_name_collate ON users(name COLLATE "en_US");

-- B-tree Index (default, most common)
-- Best for: =, <, >, <=, >=, BETWEEN, LIKE
CREATE INDEX idx_users_email ON users(email); -- email = 'x'
CREATE INDEX idx_users_id ON users(id); -- id < 100
CREATE INDEX idx_users_created ON users(created_at); -- date range
-- Hash Index
-- Best for: Simple equality (=) only
CREATE INDEX idx_users_phone_hash ON users USING HASH (phone);
-- GiST Index
-- Best for: Geometric data, full-text search, range types
CREATE INDEX idx_locations_coords ON locations USING GIST (coordinates);
CREATE INDEX idx_events_range ON events USING GIST (time_range);
-- GIN Index
-- Best for: Arrays, JSONB, full-text search
CREATE INDEX idx_products_tags ON products USING GIN (tags);
CREATE INDEX idx_documents_content ON documents USING GIN (content gin_trgm_ops);
-- BRIN Index
-- Best for: Large tables with sequential data
CREATE INDEX idx_logs_date ON logs USING BRIN (created_at);
-- SP-GiST Index
-- Best for: Non-balanced data structures
CREATE INDEX idx_customers_region ON customers USING SPGIST (region);

-- Check if index exists
\d users
-- Shows: indexes, constraints, triggers
-- Get index information
SELECT
indexname,
indexdef
FROM pg_indexes
WHERE tablename = 'users';
-- Check index size
SELECT
pg_size_pretty(pg_relation_size('idx_users_email'));
-- Explain query to see index usage
EXPLAIN
SELECT * FROM users WHERE email = 'john@example.com';
EXPLAIN ANALYZE
SELECT * FROM users WHERE email = 'john@example.com';
-- Find unused indexes
SELECT
schemaname,
tablename,
indexname,
idx_tup_read,
idx_tup_fetch,
idx_scan
FROM pg_stat_user_indexes
WHERE idx_scan = 0
AND indexname NOT LIKE '%pkey%';
-- Index usage statistics
SELECT
schemaname || '.' || tablename AS table_name,
indexname,
idx_blks_read,
idx_blks_hit,
ROUND(
100.0 * idx_blks_hit / NULLIF(idx_blks_read + idx_blks_hit, 0)
) AS hit_rate
FROM pg_statio_user_indexes
ORDER BY idx_blks_read DESC;

-- Reindex to rebuild fragmented index
REINDEX INDEX idx_users_email;
-- Reindex entire table
REINDEX TABLE users;
-- Concurrent index creation (no locks)
-- Useful for production systems
CREATE INDEX CONCURRENTLY idx_users_phone ON users(phone);
-- Rebuild index with CONCURRENTLY (PostgreSQL 12+)
REINDEX INDEX CONCURRENTLY idx_users_email;
-- Drop index
DROP INDEX idx_users_old;
-- Rename index
ALTER INDEX idx_users_email RENAME TO idx_users_email_unique;
-- Disable index (for bulk loads)
ALTER INDEX idx_users_email DISABLE;
-- Enable index
ALTER INDEX idx_users_email ENABLE;

-- Create composite index
CREATE INDEX idx_orders_user_status ON orders(user_id, status);
-- Column order matters!
-- For: WHERE user_id = 1 AND status = 'completed'
-- Index (user_id, status) is optimal
-- For: WHERE status = 'completed'
-- Partial index might be better
CREATE INDEX idx_orders_status ON orders(status)
WHERE status = 'completed';
-- Covering index (INCLUDE)
CREATE INDEX idx_users_email ON users(email) INCLUDE (username, phone);
-- Allows index-only scans without fetching from table
-- Index selectivity
-- High cardinality (many unique values): B-tree
-- Low cardinality (few unique values): Consider partial index

-- Index only active users
CREATE INDEX idx_users_active_email ON users(email)
WHERE status = 'active';
-- Index high-value orders
CREATE INDEX idx_orders_high_value ON orders(user_id)
WHERE total > 1000;
-- Index recent records
CREATE INDEX idx_orders_2024 ON orders(created_at)
WHERE created_at >= '2024-01-01';
-- Multiple partial indexes
CREATE INDEX idx_orders_pending ON orders(user_id)
WHERE status = 'pending';
CREATE INDEX idx_orders_completed ON orders(user_id)
WHERE status = 'completed';
-- Partial index for NULL values
CREATE INDEX idx_users_phone ON users(phone)
WHERE phone IS NOT NULL;

-- Index on function result
CREATE INDEX idx_users_lower_email ON users(LOWER(email));
-- Query using expression index
SELECT * FROM users WHERE LOWER(email) = 'john@example.com';
-- Index on JSON field
CREATE INDEX idx_users_data ON users USING GIN (data);
-- JSONB path index
CREATE INDEX idx_users_name ON users ((data->>'name'));
-- Index on type cast
CREATE INDEX idx_orders_date ON orders ((created_at::date));
-- Composite with expression
CREATE INDEX idx_users_name_email ON users (LOWER(username), email);

When to Create Indexes
========================================================================
✓ Index columns used in WHERE clauses
✓ Index foreign key columns
✓ Index columns used in JOIN conditions
✓ Index columns used in ORDER BY
✓ Index columns used in GROUP BY
✓ Index high-cardinality columns
✗ Don't index low-cardinality columns (e.g., boolean)
✗ Don't index frequently updated columns
✗ Don't index small tables (full scan is faster)
✗ Don't over-index (too many indexes slow writes)
Index Order Guidelines:
=======================
1. Equality conditions first (column = value)
2. Range conditions last (column > value)
3. High-cardinality columns first
Example:
WHERE status = 'active' AND created_at > '2024-01-01'
Best: INDEX (status, created_at)
```
---
## 21.10 Best Practices
Index Best Practices
========================================================================
1. Analyze Query Patterns First
┌──────────────────────────────────────────────────────────────┐
│ • Use EXPLAIN ANALYZE to identify slow queries │
│ • Check for sequential scans │
│ • Look for missing indexes │
└──────────────────────────────────────────────────────────────┘
2. Choose Right Index Type
┌──────────────────────────────────────────────────────────────┐
│ • B-tree: Most queries (default) │
│ • GIN: Arrays, JSONB, full-text │
│ • BRIN: Time-series, append-only data │
│ • Hash: Simple equality only │
└──────────────────────────────────────────────────────────────┘
3. Consider Composite Index Column Order
┌──────────────────────────────────────────────────────────────┐
│ • Put equality columns first │
│ • Put range/sort columns last │
│ • Consider query patterns │
└──────────────────────────────────────────────────────────────┘
4. Use Partial Indexes
┌──────────────────────────────────────────────────────────────┐
│ • Smaller and faster than full indexes │
│ • Perfect for common filters │
└──────────────────────────────────────────────────────────────┘
5. Monitor and Maintain
┌──────────────────────────────────────────────────────────────┐
│ • Check for unused indexes │
│ • Reindex periodically to reduce bloat │
│ • Consider index size vs. query improvement │
└──────────────────────────────────────────────────────────────┘
========================================================================
---
## Next Chapter
[Chapter 23: Query Execution Plans](./10_performance/23_explain_analyze.md)
---
*Last Updated: February 2026*