Caching Strategies for High-Performance Systems

August 8, 2022

Caching is often the difference between a system that scales and one that falls over. But caching is also a source of subtle bugs: stale data, cache stampedes, and inconsistency. Good caching requires understanding the patterns and their trade-offs.

Here’s how to cache effectively.

Cache Patterns

Cache-Aside (Lazy Loading)

cache_aside:
  pattern: Application manages cache
  flow:
    read:
      1. Check cache
      2. If miss, read from database
      3. Store in cache
      4. Return data
    write:
      1. Update database
      2. Invalidate or update cache
def get_user(user_id):
    # Check cache first
    cache_key = f"user:{user_id}"
    cached = redis.get(cache_key)
    if cached:
        return json.loads(cached)

    # Cache miss - load from database
    user = db.query("SELECT * FROM users WHERE id = %s", user_id)
    if user:
        redis.setex(cache_key, 3600, json.dumps(user))  # 1 hour TTL

    return user

def update_user(user_id, data):
    # Update database
    db.execute("UPDATE users SET ... WHERE id = %s", data, user_id)
    # Invalidate cache
    redis.delete(f"user:{user_id}")

Write-Through

write_through:
  pattern: Cache updated synchronously with database
  flow:
    write:
      1. Write to cache
      2. Cache writes to database
      3. Return to client

  pros:
    - Cache always consistent
    - Simplified read path
  cons:
    - Write latency increased
    - Cache may hold rarely-read data

Write-Behind (Write-Back)

write_behind:
  pattern: Cache writes async to database
  flow:
    write:
      1. Write to cache
      2. Return to client immediately
      3. Async: Cache writes to database

  pros:
    - Fast writes
    - Batching possible
  cons:
    - Data loss risk
    - Complexity
    - Consistency challenges

Invalidation Strategies

TTL-Based

# Simple TTL expiration
redis.setex("user:123", 3600, user_data)  # Expires in 1 hour

# Variable TTL based on data characteristics
def get_cache_ttl(data_type):
    ttl_config = {
        'user_profile': 3600,      # 1 hour
        'product_catalog': 86400,   # 24 hours
        'session': 900,             # 15 minutes
        'search_results': 300,      # 5 minutes
    }
    return ttl_config.get(data_type, 1800)

Event-Based Invalidation

# Invalidate on related events
def handle_user_update(event):
    user_id = event['user_id']

    # Invalidate directly related caches
    redis.delete(f"user:{user_id}")
    redis.delete(f"user_profile:{user_id}")

    # Invalidate dependent caches
    order_ids = get_user_orders(user_id)
    for order_id in order_ids:
        redis.delete(f"order:{order_id}")

# Pattern: Publish invalidation events
def update_user(user_id, data):
    db.execute("UPDATE users SET ...", data, user_id)
    event_bus.publish("user.updated", {"user_id": user_id})

Versioned Keys

# Version-based invalidation
def get_cache_key(user_id):
    version = redis.get(f"user_version:{user_id}") or 0
    return f"user:{user_id}:v{version}"

def invalidate_user_cache(user_id):
    redis.incr(f"user_version:{user_id}")
    # Old versioned keys naturally become orphaned and expire

Common Problems

Cache Stampede

cache_stampede:
  problem: Many requests miss cache simultaneously, all hit database
  scenario:
    - Popular item's cache expires
    - 1000 concurrent requests
    - All hit database
    - Database overwhelmed

  solutions:
    - Locking
    - Early refresh
    - Probabilistic expiration
# Solution 1: Locking (single flight)
def get_with_lock(key, fetch_func, ttl):
    cached = redis.get(key)
    if cached:
        return json.loads(cached)

    lock_key = f"lock:{key}"
    acquired = redis.set(lock_key, "1", nx=True, ex=5)

    if acquired:
        try:
            data = fetch_func()
            redis.setex(key, ttl, json.dumps(data))
            return data
        finally:
            redis.delete(lock_key)
    else:
        # Wait and retry
        time.sleep(0.1)
        return get_with_lock(key, fetch_func, ttl)

# Solution 2: Probabilistic early expiration
def get_with_early_refresh(key, fetch_func, ttl):
    cached, ttl_remaining = redis.get_with_ttl(key)

    if cached:
        # Probabilistic early refresh
        if ttl_remaining < ttl * 0.1:  # < 10% TTL remaining
            if random.random() < 0.1:  # 10% chance to refresh
                refresh_async(key, fetch_func, ttl)
        return json.loads(cached)

    return fetch_and_cache(key, fetch_func, ttl)

Cache Penetration

cache_penetration:
  problem: Requests for non-existent data always hit database
  scenario:
    - Attacker requests invalid IDs
    - Cache always misses
    - Every request hits database

  solutions:
    - Cache null results
    - Bloom filter
    - Rate limiting
# Solution: Cache null results
def get_user(user_id):
    cache_key = f"user:{user_id}"
    cached = redis.get(cache_key)

    if cached == "NULL":
        return None  # Cached negative result

    if cached:
        return json.loads(cached)

    user = db.query("SELECT * FROM users WHERE id = %s", user_id)

    if user:
        redis.setex(cache_key, 3600, json.dumps(user))
    else:
        redis.setex(cache_key, 300, "NULL")  # Cache null, shorter TTL

    return user

Cache Inconsistency

cache_inconsistency:
  problem: Cache and database have different data
  causes:
    - Failed invalidation
    - Race conditions
    - Replication lag

  mitigations:
    - Short TTL as safety net
    - Read-your-writes consistency
    - Eventual consistency acceptance
# Read-your-writes pattern
def update_user(user_id, data, session):
    db.execute("UPDATE users SET ...", data, user_id)

    # Update cache immediately
    user = db.query("SELECT * FROM users WHERE id = %s", user_id)
    redis.setex(f"user:{user_id}", 3600, json.dumps(user))

    # Mark session as having seen latest version
    session['user_version'] = user['updated_at']

def get_user(user_id, session):
    cached = redis.get(f"user:{user_id}")
    if cached:
        user = json.loads(cached)
        # Check if session expects newer version
        if user['updated_at'] >= session.get('user_version', 0):
            return user

    # Bypass cache if user expects newer data
    return db.query("SELECT * FROM users WHERE id = %s", user_id)

Multi-Level Caching

cache_hierarchy:
  L1_local:
    - In-process memory
    - Fastest
    - No network
    - Limited size, per-instance

  L2_distributed:
    - Redis/Memcached
    - Shared across instances
    - Network latency
    - Larger capacity

  L3_cdn:
    - Edge caching
    - Geographic distribution
    - Public content only
# Multi-level cache implementation
class MultiLevelCache:
    def __init__(self):
        self.local_cache = {}  # LRU with TTL
        self.redis = redis.Redis()

    def get(self, key):
        # L1: Local memory
        if key in self.local_cache:
            return self.local_cache[key]

        # L2: Redis
        value = self.redis.get(key)
        if value:
            self.local_cache[key] = value  # Populate L1
            return value

        return None

    def set(self, key, value, ttl):
        self.local_cache[key] = value
        self.redis.setex(key, ttl, value)

    def invalidate(self, key):
        self.local_cache.pop(key, None)
        self.redis.delete(key)
        # Note: Other instances' L1 still have stale data
        # Use pub/sub for distributed invalidation

Key Takeaways

There are only two hard things: cache invalidation and naming things.