Skip to content

Schema_design

Designing Efficient and Scalable Database Schemas

Section titled “Designing Efficient and Scalable Database Schemas”

Schema Design Process
========================================================================
Start: Requirements
|
v
+----------------------+
| Identify Entities |
| (nouns: user, order│
+----------------------+
|
v
+----------------------+
| Define Relationships|
| (1:1, 1:N, N:M) |
+----------------------+
|
v
+----------------------+
| Normalize Design |
| (1NF, 2NF, 3NF) |
+----------------------+
|
v
+----------------------+
| Denormalize Where |
| Needed for Performance|
+----------------------+
|
v
+----------------------+
| Define Constraints |
+----------------------+
========================================================================

-- First Normal Form (1NF)
-- Eliminate repeating groups, each cell has single value
-- NOT normalized (repeating groups):
-- Table: orders (order_id, item1, item2, item3, item4)
-- 1NF: Separate rows
CREATE TABLE order_items (
id SERIAL PRIMARY KEY,
order_id INTEGER REFERENCES orders(id),
product_id INTEGER REFERENCES products(id),
quantity INTEGER
);
-- Second Normal Form (2NF)
-- No partial dependencies (non-key attrs depend on whole key)
-- NOT 2NF: partial dependency
-- Table: order_items(order_id, product_id, product_name, quantity)
-- product_name depends on product_id, not the full key
-- 2NF: Separate products
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(200),
price DECIMAL(10,2)
);
CREATE TABLE order_items (
order_id INTEGER,
product_id INTEGER,
quantity INTEGER,
PRIMARY KEY (order_id, product_id)
);
-- Third Normal Form (3NF)
-- No transitive dependencies (non-key attrs depend only on key)
-- NOT 3NF: transitive dependency
-- Table: users(user_id, username, department, department_head)
-- department_head depends on department, not user_id
-- 3NF: Separate departments
CREATE TABLE departments (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
head_id INTEGER REFERENCES employees(id)
);
CREATE TABLE users (
id SERIAL PRIMARY KEY,
username VARCHAR(50),
department_id INTEGER REFERENCES departments(id)
);

-- When to Denormalize
-- ===================
-- 1. Read-heavy workloads
-- 2. Complex reporting queries
-- 3. Aggregated data that's frequently accessed
-- 4. Eliminate frequent joins
-- Example: Pre-computed totals
CREATE TABLE orders (
id SERIAL PRIMARY KEY,
user_id INTEGER REFERENCES users(id),
total_amount DECIMAL(10,2),
item_count INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Instead of:
-- SELECT SUM(amount), COUNT(*) FROM order_items WHERE order_id = ?
-- We can query directly:
SELECT total_amount, item_count FROM orders WHERE id = ?;
-- Update triggers to maintain denormalized data
CREATE OR REPLACE FUNCTION update_order_totals()
RETURNS TRIGGER AS $$
BEGIN
UPDATE orders
SET
total_amount = (SELECT SUM(price * quantity) FROM order_items WHERE order_id = NEW.order_id),
item_count = (SELECT COUNT(*) FROM order_items WHERE order_id = NEW.order_id)
WHERE id = NEW.order_id;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER trigger_update_order_totals
AFTER INSERT OR UPDATE OR DELETE ON order_items
FOR EACH ROW EXECUTE FUNCTION update_order_totals();

-- One-to-One Relationship
-- Use same primary key or foreign key with unique constraint
CREATE TABLE users (
id SERIAL PRIMARY KEY,
username VARCHAR(50)
);
CREATE TABLE user_profiles (
user_id INTEGER PRIMARY KEY REFERENCES users(id),
bio TEXT,
avatar_url VARCHAR(255)
);
-- One-to-Many Relationship
-- Foreign key in the "many" side
CREATE TABLE departments (
id SERIAL PRIMARY KEY,
name VARCHAR(100)
);
CREATE TABLE employees (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
department_id INTEGER REFERENCES departments(id)
);
-- Many-to-Many Relationship
-- Junction/bridge table
CREATE TABLE students (
id SERIAL PRIMARY KEY,
name VARCHAR(100)
);
CREATE TABLE courses (
id SERIAL PRIMARY KEY,
name VARCHAR(100)
);
CREATE TABLE enrollments (
student_id INTEGER REFERENCES students(id),
course_id INTEGER REFERENCES courses(id),
enrolled_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (student_id, course_id)
);

-- Audit Trail Pattern
CREATE TABLE audit_log (
id SERIAL PRIMARY KEY,
table_name VARCHAR(100) NOT NULL,
record_id INTEGER NOT NULL,
action VARCHAR(10) NOT NULL,
old_data JSONB,
new_data JSONB,
changed_by INTEGER,
changed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Soft Delete Pattern
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(200),
is_deleted BOOLEAN DEFAULT FALSE,
deleted_at TIMESTAMP,
deleted_by INTEGER
);
-- Polymorphic Association
-- Instead of single column reference multiple tables
CREATE TABLE comments (
id SERIAL PRIMARY KEY,
content TEXT,
commentable_type VARCHAR(50), -- 'post', 'product', 'order'
commentable_id INTEGER -- ID in the referenced table
);
-- Better: Separate foreign keys (PostgreSQL exclusive)
CREATE TABLE comments (
id SERIAL PRIMARY KEY,
content TEXT,
post_id INTEGER REFERENCES posts(id),
product_id INTEGER REFERENCES products(id),
CONSTRAINT exactly_one_reference CHECK (
(post_id IS NOT NULL AND product_id IS NULL) OR
(post_id IS NULL AND product_id IS NOT NULL)
)
);
-- EAV (Entity-Attribute-Value) - Use Sparingly!
-- Only when attributes vary greatly per entity
CREATE TABLE product_attributes (
product_id INTEGER REFERENCES products(id),
attribute_name VARCHAR(50),
attribute_value VARCHAR(255),
PRIMARY KEY (product_id, attribute_name)
);

-- Tables: plural, snake_case
users, order_items, product_categories
-- Primary Keys
id (or table_name_id for join tables)
-- Examples: user_id, order_item_id
-- Foreign Keys
referenced_table_id
-- Examples: user_id, category_id, order_id
-- Indexes
idx_tablename_column
-- Examples: idx_users_email, idx_orders_user_status
-- Constraints
pk_tablename (primary key)
fk_tablename_referenced (foreign key)
uk_tablename_column (unique key)
chk_tablename_condition (check constraint)
-- Columns
snake_case
-- Examples: created_at, is_active, order_total
-- Views
v_tablename or view_tablename
-- Examples: v_active_users, view_order_summary
-- Functions
verb_noun or verb_noun
-- Examples: get_user_by_id, calculate_total

-- Appropriate Data Types
-- Use smallest data type that fits
-- Instead of:
CREATE TABLE example_bad (
id BIGINT, -- unnecessary for small tables
amount NUMERIC(100,2), -- too much precision
status VARCHAR(1000) -- too long
);
-- Use:
CREATE TABLE example_good (
id SERIAL, -- sufficient for most cases
amount DECIMAL(10,2), -- appropriate precision
status VARCHAR(20) -- sufficient length
);
-- Avoid NULL when possible
CREATE TABLE users (
id SERIAL PRIMARY KEY,
username VARCHAR(50) NOT NULL,
email VARCHAR(255) NOT NULL, -- Required fields
phone VARCHAR(20), -- Optional
bio TEXT -- Optional
);
-- Default values for frequently used values
CREATE TABLE orders (
id SERIAL PRIMARY KEY,
status VARCHAR(20) DEFAULT 'pending',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

Schema Design Best Practices
========================================================================
1. Start with Normalization
┌──────────────────────────────────────────────────────────────┐
│ • Normalize to at least 3NF for transactional systems │
│ • Denormalize strategically for read performance │
└──────────────────────────────────────────────────────────────┘
2. Choose Appropriate Data Types
┌──────────────────────────────────────────────────────────────┐
│ • Use smallest type that fits │
│ • Use proper types for the data (DECIMAL for money) │
│ • Avoid VARCHAR(n) when TEXT is acceptable │
└──────────────────────────────────────────────────────────────┘
3. Define Constraints Early
┌──────────────────────────────────────────────────────────────┐
│ • Primary keys for every table │
│ • Foreign keys for relationships │
│ • NOT NULL for required fields │
│ • CHECK for business rules │
└──────────────────────────────────────────────────────────────┘
4. Plan for Growth
┌──────────────────────────────────────────────────────────────┐
│ • Consider partitioning for large tables │
│ • Use appropriate index types │
│ • Think about archiving strategy │
└──────────────────────────────────────────────────────────────┘
5. Document Your Schema
┌──────────────────────────────────────────────────────────────┐
│ • Add comments to tables and columns │
│ • Maintain ER diagram │
│ • Document assumptions and business rules │
└──────────────────────────────────────────────────────────────┘
========================================================================

Chapter 8: SELECT - Querying Data


Last Updated: February 2026