Index_types

Chapter 22: Index Types & When to Use

Choosing the Right Index Type

22.1 Index Type Overview

                    PostgreSQL Index Types
    ========================================================================

    ┌─────────────────────────────────────────────────────────────────────┐
    │  Index Type    │  Best For                                          │
    ├────────────────┼────────────────────────────────────────────────────┤
    │  B-tree       │  Equality, range queries (=, <, >, BETWEEN)       │
    ├────────────────┼────────────────────────────────────────────────────┤
    │  Hash         │  Simple equality only (=)                         │
    ├────────────────┼────────────────────────────────────────────────────┤
    │  GiST         │  Geometric, full-text, GIS, range types            │
    ├────────────────┼────────────────────────────────────────────────────┤
    │  GIN          │  Arrays, JSONB, full-text search                   │
    ├────────────────┼────────────────────────────────────────────────────┤
    │  BRIN         │  Very large tables with sequential data            │
    ├────────────────┼────────────────────────────────────────────────────┤
    │  SP-GiST      │  Non-balanced structures (quadtrees, kd-trees)   │
    └────────────────┴────────────────────────────────────────────────────┘

22.2 B-tree Index

-- B-tree is the default and most versatile

-- When to use B-tree:
-- ===================
-- Equality queries: WHERE column = value
-- Range queries: WHERE column > value OR column BETWEEN a AND b
-- Sort operations: ORDER BY column
-- Partial matching: LIKE 'prefix%'

-- Create B-tree index (default)
CREATE INDEX idx_users_email ON users(email);

-- Composite B-tree
CREATE INDEX idx_orders_user_status ON orders(user_id, status);

-- B-tree with specific options
CREATE INDEX idx_users_email ON users(email)
    WITH (fillfactor = 80)
    USING BTREE;

-- Queries that use B-tree:
SELECT * FROM users WHERE email = 'john@example.com';
SELECT * FROM orders WHERE created_at > '2024-01-01';
SELECT * FROM products WHERE price BETWEEN 10 AND 100;
SELECT * FROM users WHERE username LIKE 'john%';

22.3 Hash Index

-- Hash indexes are optimized for equality comparisons

-- When to use Hash:
-- =================
-- Only for equality (=) operations
-- Smaller than B-tree
-- Cannot be used for sorting

-- Create Hash index
CREATE INDEX idx_users_phone ON users USING HASH (phone);

-- When NOT to use Hash:
-- ====================
-- Range queries
-- Sorting
-- Pattern matching (LIKE)
-- Partial matches

-- Warning: Hash indexes are not WAL-logged by default
-- Enable for durability:
ALTER SYSTEM SET hash_mem_multiplier = 1.0;
-- In postgresql.conf:
-- wal_level = replica (or higher)
-- For hash indexes to be durable:
-- Use pg_dump/pg_restore carefully

-- Consider B-tree for most cases
-- Hash is useful for very large, read-only data

22.4 GIN (Generalized Inverted Index)

-- GIN is ideal for composite values

-- When to use GIN:
-- ================
-- Array columns: INTEGER[], TEXT[]
-- JSONB columns
-- Full-text search
-- hstore columns

-- Array indexing
CREATE INDEX idx_products_tags ON products USING GIN (tags);

-- Query using array operators
SELECT * FROM products WHERE tags @> '{"electronics"}';
SELECT * FROM products WHERE tags && '{"sale", "new"}';

-- JSONB indexing
CREATE INDEX idx_users_data ON users USING GIN (data);

-- Query JSONB
SELECT * FROM users WHERE data @> '{"role": "admin"}';
SELECT * FROM users WHERE data ? 'role';
SELECT * FROM users WHERE data @@ '$.role == "admin"';

-- GIN with custom operator class
-- gin_trgm_ops for text pattern matching
CREATE INDEX idx_users_name_gin ON users USING GIN (name gin_trgm_ops);

-- Partial GIN for arrays
CREATE INDEX idx_products_tags_gin ON products USING GIN (tags)
    WITH (gin_fuzzy_search_limit = 100);

22.5 GiST (Generalized Search Tree)

-- GiST is flexible for custom data types

-- When to use GiST:
-- ================
-- Geometric data (PostGIS)
-- Range types
-- Full-text search
-- Custom data types

-- Range type indexing
CREATE INDEX idx_events_time ON events USING GIST (time_range);

-- Query ranges
SELECT * FROM events WHERE time_range && '[2024-01-01,2024-01-02)'::tsrange;

-- Geometric data (requires PostGIS)
-- CREATE INDEX idx_locations_geom ON locations USING GIST (geom);

-- Full-text search with GiST
CREATE INDEX idx_documents_content ON documents USING GIST (to_tsvector('english', content));

-- GiST for exclusion constraints
CREATE TABLE reservations (
    id SERIAL PRIMARY KEY,
    room_number INTEGER,
    reservation_range TSRANGE,
    EXCLUDE USING GIST (
        room_number WITH =,
        reservation_range WITH &&
    )
);

-- GiST vs GIN for text search
-- GiST: Updates are faster, searches slower
-- GIN: Updates slower, searches faster

22.6 BRIN (Block Range Index)

-- BRIN is designed for very large, naturally ordered data

-- When to use BRIN:
-- ================
-- Very large tables (millions of rows)
-- Data is sequentially ordered (e.g., time-series)
-- Read-heavy workloads
-- Append-only data

-- Create BRIN index
CREATE INDEX idx_logs_created ON logs USING BRIN (created_at);

-- BRIN with specific page range
CREATE INDEX idx_logs_created ON logs USING BRIN (created_at)
    WITH (pages_per_range = 128);

-- Best use cases for BRIN:
-- =======================
-- Time-series data: logs, events, metrics
-- Sequential inserts: auto-increment IDs
-- Ordered data: clustered tables

-- BRIN vs B-tree:
-- ==============
-- B-tree: 1 index per 1GB table (approx)
-- BRIN: 1 index per 128MB (default)
-- BRIN much smaller, slightly slower lookups

-- Example: Time-series data
CREATE TABLE metrics (
    id BIGSERIAL,
    sensor_id INTEGER,
    value NUMERIC,
    recorded_at TIMESTAMP DEFAULT NOW()
);

-- Perfect for BRIN
CREATE INDEX idx_metrics_recorded ON metrics USING BRIN (recorded_at);

-- Composite BRIN
CREATE INDEX idx_metrics_sensor_time ON metrics USING BRIN (sensor_id, recorded_at);

22.7 SP-GiST

-- Space-Partitioned GiST for non-balanced trees

-- When to use SP-GiST:
-- ====================
-- Quad-trees (2D points)
-- Kd-trees (multi-dimensional)
-- Radix trees (prefix-based)
-- Phone number routing

-- SP-GiST for IP addresses (built-in)
CREATE INDEX idx_connections_ip ON connections USING SPGIST (ip_address inet_spgist_ops);

-- Custom SP-GiST for specific use cases
-- Often used with PostGIS for large datasets

-- Example: Prefix searches
CREATE INDEX idx_phonebook_region ON phonebook USING SPGIST (phone_prefix text_spgist_ops);

22.8 Choosing the Right Index

                    Index Selection Guide
    ========================================================================

    ┌─────────────────────────────────────────────────────────────────────┐
    │  Data Type/Query                     │  Recommended Index          │
    ├─────────────────────────────────────┼─────────────────────────────┤
    │  Single column equality             │  B-tree                     │
    ├─────────────────────────────────────┼─────────────────────────────┤
    │  Single column range               │  B-tree                     │
    ├─────────────────────────────────────┼─────────────────────────────┤
    │  Arrays containing values           │  GIN                        │
    ├─────────────────────────────────────┼─────────────────────────────┤
    │  JSONB documents                   │  GIN                        │
    ├─────────────────────────────────────┼─────────────────────────────┤
    │  Full-text search                 │  GIN or GiST                │
    ├─────────────────────────────────────┼─────────────────────────────┤
    │  Geometric/GIS data               │  GiST (PostGIS)             │
    ├─────────────────────────────────────┼─────────────────────────────┤
    │  Time-series / sequential          │  BRIN                       │
    ├─────────────────────────────────────┼─────────────────────────────┤
    │  Simple equality (memory-constrained│  Hash                       │
    └─────────────────────────────────────┴─────────────────────────────┘

    Performance Considerations:
    ==========================

    Index Type  │  Write Speed │  Read Speed │  Size
    ────────────┼──────────────┼─────────────┼────────
    B-tree      │    Medium    │   Fast     │  Medium
    Hash        │    Fast      │   Fast     │  Small
    GIN         │    Slow      │   Fast     │  Large
    GiST        │    Fast      │   Medium   │  Medium
    BRIN        │    Fast      │   Medium   │  Small
    SP-GiST     │    Fast      │   Medium   │  Medium
    ────────────┴──────────────┴─────────────┴────────

22.9 Best Practices

                    Index Type Best Practices
    ========================================================================

    1. Default to B-tree
       ┌──────────────────────────────────────────────────────────────┐
       │  • B-tree handles most use cases efficiently                │
       │  • Only switch when you have specific requirements         │
       └──────────────────────────────────────────────────────────────┘

    2. Use GIN for JSON/Arrays
       ┌──────────────────────────────────────────────────────────────┐
       │  • GIN provides excellent query performance               │
       │  • Accept slower write performance                         │
       └──────────────────────────────────────────────────────────────┘

    3. Use BRIN for Time-Series
       ┌──────────────────────────────────────────────────────────────┐
       │  • Extremely small index size                             │
       │  • Perfect for sequentially inserted data                │
       └──────────────────────────────────────────────────────────────┘

    4. Test Performance
       ┌──────────────────────────────────────────────────────────────┐
       │  • Use EXPLAIN ANALYZE to compare index types              │
       │  • Consider both query time and write overhead           │
       └──────────────────────────────────────────────────────────────┘
    ========================================================================

Next Chapter

Chapter 23: Query Execution Plans

Last Updated: February 2026