Schema_design

Chapter 7: Schema Design Best Practices

Designing Efficient and Scalable Database Schemas

7.1 Schema Design Principles

                    Schema Design Process
    ========================================================================

                              Start: Requirements
                                        |
                                        v
                           +----------------------+
                           | Identify Entities   |
                           | (nouns: user, order│
                           +----------------------+
                                        |
                                        v
                           +----------------------+
                           | Define Relationships|
                           | (1:1, 1:N, N:M)    |
                           +----------------------+
                                        |
                                        v
                           +----------------------+
                           | Normalize Design    |
                           | (1NF, 2NF, 3NF)    |
                           +----------------------+
                                        |
                                        v
                           +----------------------+
                           | Denormalize Where   |
                           | Needed for Performance|
                           +----------------------+
                                        |
                                        v
                           +----------------------+
                           | Define Constraints  |
                           +----------------------+
    ========================================================================

7.2 Normalization

-- First Normal Form (1NF)
-- Eliminate repeating groups, each cell has single value

-- NOT normalized (repeating groups):
-- Table: orders (order_id, item1, item2, item3, item4)

-- 1NF: Separate rows
CREATE TABLE order_items (
    id SERIAL PRIMARY KEY,
    order_id INTEGER REFERENCES orders(id),
    product_id INTEGER REFERENCES products(id),
    quantity INTEGER
);

-- Second Normal Form (2NF)
-- No partial dependencies (non-key attrs depend on whole key)

-- NOT 2NF: partial dependency
-- Table: order_items(order_id, product_id, product_name, quantity)
-- product_name depends on product_id, not the full key

-- 2NF: Separate products
CREATE TABLE products (
    id SERIAL PRIMARY KEY,
    name VARCHAR(200),
    price DECIMAL(10,2)
);

CREATE TABLE order_items (
    order_id INTEGER,
    product_id INTEGER,
    quantity INTEGER,
    PRIMARY KEY (order_id, product_id)
);

-- Third Normal Form (3NF)
-- No transitive dependencies (non-key attrs depend only on key)

-- NOT 3NF: transitive dependency
-- Table: users(user_id, username, department, department_head)
-- department_head depends on department, not user_id

-- 3NF: Separate departments
CREATE TABLE departments (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    head_id INTEGER REFERENCES employees(id)
);

CREATE TABLE users (
    id SERIAL PRIMARY KEY,
    username VARCHAR(50),
    department_id INTEGER REFERENCES departments(id)
);

7.3 Denormalization Strategies

-- When to Denormalize
-- ===================
-- 1. Read-heavy workloads
-- 2. Complex reporting queries
-- 3. Aggregated data that's frequently accessed
-- 4. Eliminate frequent joins

-- Example: Pre-computed totals
CREATE TABLE orders (
    id SERIAL PRIMARY KEY,
    user_id INTEGER REFERENCES users(id),
    total_amount DECIMAL(10,2),
    item_count INTEGER DEFAULT 0,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- Instead of:
-- SELECT SUM(amount), COUNT(*) FROM order_items WHERE order_id = ?

-- We can query directly:
SELECT total_amount, item_count FROM orders WHERE id = ?;

-- Update triggers to maintain denormalized data
CREATE OR REPLACE FUNCTION update_order_totals()
RETURNS TRIGGER AS $$
BEGIN
    UPDATE orders
    SET
        total_amount = (SELECT SUM(price * quantity) FROM order_items WHERE order_id = NEW.order_id),
        item_count = (SELECT COUNT(*) FROM order_items WHERE order_id = NEW.order_id)
    WHERE id = NEW.order_id;
    RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER trigger_update_order_totals
AFTER INSERT OR UPDATE OR DELETE ON order_items
FOR EACH ROW EXECUTE FUNCTION update_order_totals();

7.4 Relationship Design

-- One-to-One Relationship
-- Use same primary key or foreign key with unique constraint

CREATE TABLE users (
    id SERIAL PRIMARY KEY,
    username VARCHAR(50)
);

CREATE TABLE user_profiles (
    user_id INTEGER PRIMARY KEY REFERENCES users(id),
    bio TEXT,
    avatar_url VARCHAR(255)
);

-- One-to-Many Relationship
-- Foreign key in the "many" side

CREATE TABLE departments (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100)
);

CREATE TABLE employees (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    department_id INTEGER REFERENCES departments(id)
);

-- Many-to-Many Relationship
-- Junction/bridge table

CREATE TABLE students (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100)
);

CREATE TABLE courses (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100)
);

CREATE TABLE enrollments (
    student_id INTEGER REFERENCES students(id),
    course_id INTEGER REFERENCES courses(id),
    enrolled_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    PRIMARY KEY (student_id, course_id)
);

7.5 Schema Design Patterns

-- Audit Trail Pattern
CREATE TABLE audit_log (
    id SERIAL PRIMARY KEY,
    table_name VARCHAR(100) NOT NULL,
    record_id INTEGER NOT NULL,
    action VARCHAR(10) NOT NULL,
    old_data JSONB,
    new_data JSONB,
    changed_by INTEGER,
    changed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- Soft Delete Pattern
CREATE TABLE products (
    id SERIAL PRIMARY KEY,
    name VARCHAR(200),
    is_deleted BOOLEAN DEFAULT FALSE,
    deleted_at TIMESTAMP,
    deleted_by INTEGER
);

-- Polymorphic Association
-- Instead of single column reference multiple tables
CREATE TABLE comments (
    id SERIAL PRIMARY KEY,
    content TEXT,
    commentable_type VARCHAR(50),  -- 'post', 'product', 'order'
    commentable_id INTEGER         -- ID in the referenced table
);

-- Better: Separate foreign keys (PostgreSQL exclusive)
CREATE TABLE comments (
    id SERIAL PRIMARY KEY,
    content TEXT,
    post_id INTEGER REFERENCES posts(id),
    product_id INTEGER REFERENCES products(id),

    CONSTRAINT exactly_one_reference CHECK (
        (post_id IS NOT NULL AND product_id IS NULL) OR
        (post_id IS NULL AND product_id IS NOT NULL)
    )
);

-- EAV (Entity-Attribute-Value) - Use Sparingly!
-- Only when attributes vary greatly per entity
CREATE TABLE product_attributes (
    product_id INTEGER REFERENCES products(id),
    attribute_name VARCHAR(50),
    attribute_value VARCHAR(255),
    PRIMARY KEY (product_id, attribute_name)
);

7.6 Naming Conventions

-- Tables: plural, snake_case
users, order_items, product_categories

-- Primary Keys
id (or table_name_id for join tables)
-- Examples: user_id, order_item_id

-- Foreign Keys
referenced_table_id
-- Examples: user_id, category_id, order_id

-- Indexes
idx_tablename_column
-- Examples: idx_users_email, idx_orders_user_status

-- Constraints
pk_tablename (primary key)
fk_tablename_referenced (foreign key)
uk_tablename_column (unique key)
chk_tablename_condition (check constraint)

-- Columns
snake_case
-- Examples: created_at, is_active, order_total

-- Views
v_tablename or view_tablename
-- Examples: v_active_users, view_order_summary

-- Functions
verb_noun or verb_noun
-- Examples: get_user_by_id, calculate_total

7.7 Schema Design for Performance

-- Appropriate Data Types
-- Use smallest data type that fits

-- Instead of:
CREATE TABLE example_bad (
    id BIGINT,        -- unnecessary for small tables
    amount NUMERIC(100,2),  -- too much precision
    status VARCHAR(1000)   -- too long
);

-- Use:
CREATE TABLE example_good (
    id SERIAL,        -- sufficient for most cases
    amount DECIMAL(10,2),  -- appropriate precision
    status VARCHAR(20)    -- sufficient length
);

-- Avoid NULL when possible
CREATE TABLE users (
    id SERIAL PRIMARY KEY,
    username VARCHAR(50) NOT NULL,
    email VARCHAR(255) NOT NULL,  -- Required fields
    phone VARCHAR(20),           -- Optional
    bio TEXT                     -- Optional
);

-- Default values for frequently used values
CREATE TABLE orders (
    id SERIAL PRIMARY KEY,
    status VARCHAR(20) DEFAULT 'pending',
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

7.8 Best Practices

                    Schema Design Best Practices
    ========================================================================

    1. Start with Normalization
       ┌──────────────────────────────────────────────────────────────┐
       │  • Normalize to at least 3NF for transactional systems     │
       │  • Denormalize strategically for read performance          │
       └──────────────────────────────────────────────────────────────┘

    2. Choose Appropriate Data Types
       ┌──────────────────────────────────────────────────────────────┐
       │  • Use smallest type that fits                             │
       │  • Use proper types for the data (DECIMAL for money)      │
       │  • Avoid VARCHAR(n) when TEXT is acceptable                │
       └──────────────────────────────────────────────────────────────┘

    3. Define Constraints Early
       ┌──────────────────────────────────────────────────────────────┐
       │  • Primary keys for every table                            │
       │  • Foreign keys for relationships                          │
       │  • NOT NULL for required fields                           │
       │  • CHECK for business rules                               │
       └──────────────────────────────────────────────────────────────┘

    4. Plan for Growth
       ┌──────────────────────────────────────────────────────────────┐
       │  • Consider partitioning for large tables                 │
       │  • Use appropriate index types                           │
       │  • Think about archiving strategy                        │
       └──────────────────────────────────────────────────────────────┘

    5. Document Your Schema
       ┌──────────────────────────────────────────────────────────────┐
       │  • Add comments to tables and columns                      │
       │  • Maintain ER diagram                                   │
       │  • Document assumptions and business rules                │
       └──────────────────────────────────────────────────────────────┘
    ========================================================================

Next Chapter

Chapter 8: SELECT - Querying Data

Last Updated: February 2026