Atomic_operations

Atomic Operations

Atomics provide lock-free thread synchronization for primitive types. They are fundamental building blocks for high-performance concurrent code.

Introduction

Atomic operations ensure that reads and writes to shared variables happen indivisibly, without race conditions. Unlike mutexes, atomics don’t block - they use hardware-level instructions.

std::atomic Basics

#include <atomic>
#include <iostream>
#include <thread>

// Basic atomic counter
std::atomic<int> counter(0);

// Increment counter atomically
void increment() {
    for (int i = 0; i < 1000; i++) {
        counter.fetch_add(1);  // Atomic add, returns old value
        // Or simply: counter++;
    }
}

int main() {
    std::thread t1(increment);
    std::thread t2(increment);

    t1.join();
    t2.join();

    std::cout << "Counter: " << counter.load() << std::endl;
    // Output: Counter: 2000
    return 0;
}

Available Operations

Arithmetic Operations

std::atomic<int> value(10);

// fetch_add: returns OLD value
int old = value.fetch_add(5);  // old=10, value=15

// fetch_sub: returns OLD value
int old2 = value.fetch_sub(3);  // old2=15, value=12

// fetch_and, fetch_or, fetch_xor (for integers)
std::atomic<int> flags(0b1100);
int old3 = flags.fetch_and(0b1010);  // old3=12, value=8 (0b1000)

Comparison and Exchange

std::atomic<int> data(100);

// compare_exchange_strong: atomic compare-and-swap
int expected = 100;
int desired = 200;
// Returns true if successful
bool success = data.compare_exchange_strong(expected, desired);
// If data==expected, data=desired, returns true
// Otherwise, expected=data, returns false

// compare_exchange_weak: may fail spuriously
while (!data.compare_exchange_weak(expected, desired)) {
    // Keep trying
    desired = expected + 100;
}

// exchange: atomically replace and return old value
int old = data.exchange(500);  // old=200, data=500

Simple Operations

std::atomic<int> value(42);

// store: write value
value.store(100);

// load: read value
int x = value.load();

// operator shorthand
value = 200;  // store
int y = value;  // load

Memory Order

Memory ordering determines how memory operations are ordered relative to other threads. C++ provides several models:

memory_order_relaxed

No synchronization or ordering constraints. Only atomicity is guaranteed.

std::atomic<int> counter{0};

void relaxed() {
    // Only atomicity guaranteed, no ordering
    counter.fetch_add(1, std::memory_order_relaxed);
}

memory_order_consume (Recommended for Dependencies)

Synchronizes with release operations. Reads dependent on the atomic value are ordered.

std::atomic<int*> ptr{nullptr};
int data = 0;

void producer() {
    data = 42;
    ptr.store(&data, std::memory_order_release);
}

void consumer() {
    int* p = ptr.load(std::memory_order_consume);
    if (p) {
        // p is guaranteed to see data=42
        int value = *p;  // Safe!
    }
}

memory_order_acquire (Most Common Default)

Synchronizes with release operations. All subsequent reads see writes from the releasing thread.

std::atomic<bool> ready{false};
int result = 0;

void producer() {
    result = 42;
    ready.store(true, std::memory_order_release);
}

void consumer() {
    while (!ready.load(std::memory_order_acquire)) {
        std::this_thread::yield();
    }
    // result is guaranteed to be 42 here
    std::cout << result << std::endl;
}

memory_order_seq_cst (Default)

Full sequential consistency. Most intuitive but potentially slower.

// Default - used when no memory order specified
std::atomic<int> x(0), y(0);

void thread1() {
    x.store(1, std::memory_order_seq_cst);
    y.store(1, std::memory_order_seq_cst);
}

void thread2() {
    while (y.load(std::memory_order_seq_cst) != 1) {}
    std::cout << x.load(std::memory_order_seq_cst) << std::endl;
}

memory_order_release

Makes prior writes visible to the acquiring thread.

std::atomic<int*> data_ptr{nullptr};

void set_data(int* p) {
    // Must set data before pointer
    // This is guaranteed by release semantics
    data_ptr.store(p, std::memory_order_release);
}

int* get_data() {
    return data_ptr.load(std::memory_order_acquire);
}

Common Patterns

Spinlock Using Atomics

class Spinlock {
    std::atomic<bool> flag{false};

public:
    void lock() {
        while (flag.exchange(true, std::memory_order_acquire)) {
            std::this_thread::yield();
        }
    }

    void unlock() {
        flag.store(false, std::memory_order_release);
    }
};

// RAII wrapper
class SpinlockGuard {
    Spinlock& lock_;
public:
    explicit SpinlockGuard(Spinlock& lock) : lock_(lock) {
        lock_.lock();
    }
    ~SpinlockGuard() {
        lock_.unlock();
    }
};

Lock-Free Counter

class LockFreeCounter {
    std::atomic<long long> count_{0};

public:
    void increment() {
        count_.fetch_add(1, std::memory_order_relaxed);
    }

    long long get() const {
        return count_.load(std::memory_order_relaxed);
    }

    // Increment and return new value
    long long increment_and_get() {
        return count_.fetch_add(1) + 1;
    }
};

Single-Producer Single-Cconsumer Queue

template<typename T>
class SPSCQueue {
    static constexpr size_t SIZE = 1024;
    std::array<T, SIZE> buffer_;
    std::atomic<size_t> write_pos_{0};
    std::atomic<size_t> read_pos_{0};

public:
    bool push(T&& value) {
        size_t write = write_pos_.load(std::memory_order_relaxed);
        size_t next_write = (write + 1) % SIZE;

        if (next_write == read_pos_.load(std::memory_order_acquire)) {
            return false;  // Full
        }

        buffer_[write] = std::move(value);
        write_pos_.store(next_write, std::memory_order_release);
        return true;
    }

    bool pop(T& value) {
        size_t read = read_pos_.load(std::memory_order_relaxed);

        if (read == write_pos_.load(std::memory_order_acquire)) {
            return false;  // Empty
        }

        value = std::move(buffer_[read]);
        read_pos_.store((read + 1) % SIZE, std::memory_order_release);
        return true;
    }
};

Thread-Safe Flag

class NotificationFlag {
    std::atomic<bool> flag_{false};

public:
    void set() {
        flag_.store(true, std::memory_order_release);
    }

    void wait() {
        while (!flag_.load(std::memory_order_acquire)) {
            std::this_thread::yield();
        }
    }

    void reset() {
        flag_.store(false, std::memory_order_relaxed);
    }
};

Built-in Atomic Types

For Integral Types

#include <atomic>

// Integer atomics
std::atomic<int> ai;
std::atomic<unsigned int> aui;
std::atomic<long> al;
std::atomic<long long> all;

// Boolean
std::atomic<bool> flag;

// Pointer
std::atomic<int*> ptr;

// Character
std::atomic<char> c;

For Custom Types

// Use std::atomic with trivially copyable types
struct Data {
    int id;
    double value;
    char name[32];
};

static_assert(std::is_trivially_copyable_v<Data>,
              "Must be trivially copyable");

std::atomic<Data> atomic_data;

// For complex types, use mutex or lock-free structures

Performance Considerations

When to Use Atomics

Simple counters: Faster than mutexes
Flags and status indicators: Very efficient
Lock-free data structures: Maximum performance

When to Avoid Atomics

Complex operations: Use mutexes
Multiple related variables: Use mutexes
When ordering matters: Use mutexes or proper memory ordering

Benchmark

#include <chrono>
#include <atomic>
#include <mutex>
#include <thread>

// Atomic counter
std::atomic<int> atomic_count{0};
std::mutex mutex_count;

// Mutex-protected counter
int mutex_count_val = 0;

void atomic_increment() {
    for (int i = 0; i < 1000000; i++) {
        atomic_count.fetch_add(1);
    }
}

void mutex_increment() {
    for (int i = 0; i < 1000000; i++) {
        std::lock_guard<std::mutex> lock(mutex_count);
        mutex_count_val++;
    }
}

// Typically, atomics are 10-100x faster for simple operations

Memory Model and Fences

Memory Fences

#include <atomic>

// Full memory fence
std::atomic_thread_fence(std::memory_order_seq_cst);

// Release fence
std::atomic_thread_fence(std::memory_order_release);

// Acquire fence
std::atomic_thread_fence(std::memory_order_acquire);

When Fences Are Useful

// Without atomics, use fences
bool ready = false;
int data = 0;

void producer() {
    data = 42;
    std::atomic_thread_fence(std::memory_order_release);
    ready = true;
}

void consumer() {
    while (!ready) {}
    std::atomic_thread_fence(std::memory_order_acquire);
    std::cout << data << std::endl;  // Guaranteed to see 42
}

Best Practices

Prefer defaults: Use memory_order_seq_cst unless you have specific needs
Know your ordering: Understand the difference between acquire/release
Avoid over-optimization: Don’t sacrifice correctness for minor gains
Test thoroughly: Race conditions can be hard to reproduce
Use appropriate tools: ThreadSanitizer helps find races

# Compile with ThreadSanitizer
g++ -fsanitize=thread -o program program.cpp

Key Takeaways

Atomics provide lock-free thread synchronization
Use std::atomic<T> for primitive types
Memory ordering controls visibility between threads
memory_order_seq_cst is the default and safest
Use acquire/release for better performance when safe
For complex types, use mutexes or design lock-free structures carefully