Examples¶

This page provides practical examples of using libCacheSim Python bindings for various cache simulation scenarios.

Basic Cache Simulation¶

Simple LRU Cache Example¶

import libcachesim as lcs

# Create an LRU cache with 1MB capacity
cache = lcs.LRU(cache_size=1024*1024)

# Generate synthetic Zipf trace
reader = lcs.SyntheticReader(
    num_of_req=10000,
    obj_size=1024,
    dist="zipf",
    alpha=1.0,
    num_objects=1000,
    seed=42
)

# Simulate cache behavior
hits = 0
total = 0

for req in reader:
    if cache.get(req):
        hits += 1
    total += 1

print(f"Hit ratio: {hits/total:.4f}")
print(f"Total requests: {total}")

Comparing Multiple Cache Algorithms¶

import libcachesim as lcs

def compare_algorithms(trace_file, cache_size):
    """Compare hit ratios of different cache algorithms."""

    algorithms = {
        "LRU": lcs.LRU,
        "LFU": lcs.LFU, 
        "FIFO": lcs.FIFO,
        "Clock": lcs.Clock,
        "ARC": lcs.ARC,
        "S3FIFO": lcs.S3FIFO
    }

    results = {}

    for name, cache_class in algorithms.items():
        # Create fresh reader for each algorithm
        reader = lcs.SyntheticReader(
            num_of_req=10000,
            obj_size=1024,
            dist="zipf", 
            alpha=1.0,
            seed=42  # Same seed for fair comparison
        )

        cache = cache_class(cache_size=cache_size)
        hits = 0

        for req in reader:
            if cache.get(req):
                hits += 1

        hit_ratio = hits / reader.get_num_of_req()
        results[name] = hit_ratio
        print(f"{name:8}: {hit_ratio:.4f}")

    return results

# Compare with 64KB cache
results = compare_algorithms("trace.csv", 64*1024)

Working with Real Traces¶

Reading CSV Traces¶

import libcachesim as lcs

def simulate_csv_trace(csv_file):
    """Simulate cache behavior on CSV trace."""

    # Configure CSV reader
    reader_params = lcs.ReaderInitParam(
        has_header=True,
        delimiter=",",
        obj_id_is_num=True
    )

    # Set field mappings (1-indexed)
    reader_params.time_field = 1
    reader_params.obj_id_field = 2
    reader_params.obj_size_field = 3
    reader_params.op_field = 4

    reader = lcs.TraceReader(
        trace=csv_file,
        trace_type=lcs.TraceType.CSV_TRACE,
        reader_init_params=reader_params
    )

    print(f"Loaded trace with {reader.get_num_of_req()} requests")

    # Test different cache sizes
    cache_sizes = [1024*1024*i for i in [1, 2, 4, 8, 16]]  # 1MB to 16MB

    for size in cache_sizes:
        cache = lcs.LRU(cache_size=size)
        reader.reset()  # Reset to beginning

        hits = 0
        for req in reader:
            if cache.get(req):
                hits += 1

        hit_ratio = hits / reader.get_num_of_req()
        print(f"Cache size: {size//1024//1024}MB, Hit ratio: {hit_ratio:.4f}")

# Usage
simulate_csv_trace("workload.csv")

Handling Large Traces with Sampling¶

import libcachesim as lcs

def analyze_large_trace(trace_file, sample_ratio=0.1):
    """Analyze large trace using sampling."""

    # Create sampler
    sampler = lcs.Sampler(
        sample_ratio=sample_ratio,
        type=lcs.SamplerType.SPATIAL_SAMPLER
    )

    reader_params = lcs.ReaderInitParam(
        has_header=True,
        delimiter=",",
        obj_id_is_num=True
    )
    reader_params.sampler = sampler

    reader = lcs.TraceReader(
        trace=trace_file,
        trace_type=lcs.TraceType.CSV_TRACE,
        reader_init_params=reader_params
    )

    print(f"Sampling {sample_ratio*100}% of trace")
    print(f"Sampled requests: {reader.get_num_of_req()}")

    # Run simulation on sampled trace
    cache = lcs.LRU(cache_size=10*1024*1024)  # 10MB
    hits = 0

    for req in reader:
        if cache.get(req):
            hits += 1

    hit_ratio = hits / reader.get_num_of_req()
    print(f"Hit ratio on sampled trace: {hit_ratio:.4f}")

# Sample 5% of a large trace
analyze_large_trace("large_trace.csv", sample_ratio=0.05)

Advanced Analysis¶

Comprehensive Trace Analysis¶

import libcachesim as lcs
import os

def comprehensive_analysis(trace_file, output_dir="analysis_results"):
    """Run comprehensive trace analysis."""

    # Create output directory
    os.makedirs(output_dir, exist_ok=True)

    # Load trace
    reader = lcs.TraceReader(trace_file, lcs.TraceType.CSV_TRACE)

    # Run trace analysis
    analyzer = lcs.TraceAnalyzer(reader, f"{output_dir}/trace_analysis")
    print("Running trace analysis...")
    analyzer.run()

    print(f"Analysis complete. Results saved to {output_dir}/")
    print("Generated files:")
    for file in os.listdir(output_dir):
        print(f"  - {file}")

# Run analysis
comprehensive_analysis("workload.csv")

Hit Ratio Curves¶

import libcachesim as lcs
import matplotlib.pyplot as plt

def plot_hit_ratio_curve(trace_file, algorithms=None):
    """Plot hit ratio curves for different algorithms."""

    if algorithms is None:
        algorithms = ["LRU", "LFU", "FIFO", "ARC"]

    # Cache sizes from 1MB to 100MB
    cache_sizes = [1024*1024*i for i in range(1, 101, 5)]

    plt.figure(figsize=(10, 6))

    for algo_name in algorithms:
        hit_ratios = []

        for cache_size in cache_sizes:
            reader = lcs.SyntheticReader(
                num_of_req=5000,
                obj_size=1024,
                dist="zipf",
                alpha=1.0,
                seed=42
            )

            cache = getattr(lcs, algo_name)(cache_size=cache_size)
            hits = 0

            for req in reader:
                if cache.get(req):
                    hits += 1

            hit_ratio = hits / reader.get_num_of_req()
            hit_ratios.append(hit_ratio)

        # Convert to MB for plotting
        sizes_mb = [size // 1024 // 1024 for size in cache_sizes]
        plt.plot(sizes_mb, hit_ratios, label=algo_name, marker='o')

    plt.xlabel('Cache Size (MB)')
    plt.ylabel('Hit Ratio')
    plt.title('Hit Ratio vs Cache Size')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.show()

# Generate hit ratio curves
plot_hit_ratio_curve("trace.csv")

Custom Cache Policies¶

Implementing a Custom LRU with Python Hooks¶

import libcachesim as lcs
from collections import OrderedDict

def create_python_lru(cache_size):
    """Create a custom LRU cache using Python hooks."""

    def init_hook(size):
        """Initialize cache data structure."""
        return {
            'data': OrderedDict(),
            'size': 0,
            'capacity': size
        }

    def hit_hook(cache_dict, obj_id, obj_size):
        """Handle cache hit."""
        # Move to end (most recently used)
        cache_dict['data'].move_to_end(obj_id)

    def miss_hook(cache_dict, obj_id, obj_size):
        """Handle cache miss."""
        # Add new item
        cache_dict['data'][obj_id] = obj_size
        cache_dict['size'] += obj_size

    def eviction_hook(cache_dict, obj_id, obj_size):
        """Handle eviction when cache is full."""
        # Remove least recently used items
        while cache_dict['size'] + obj_size > cache_dict['capacity']:
            if not cache_dict['data']:
                break
            lru_id, lru_size = cache_dict['data'].popitem(last=False)
            cache_dict['size'] -= lru_size

    return lcs.PythonHookCache(
        cache_size=cache_size,
        init_hook=init_hook,
        hit_hook=hit_hook,
        miss_hook=miss_hook,
        eviction_hook=eviction_hook
    )

# Test custom LRU
custom_cache = create_python_lru(1024*1024)
reader = lcs.SyntheticReader(num_of_req=1000, obj_size=1024)

hits = 0
for req in reader:
    if custom_cache.get(req):
        hits += 1

print(f"Custom LRU hit ratio: {hits/1000:.4f}")

Time-based Cache with TTL¶

import libcachesim as lcs
import time

def create_ttl_cache(cache_size, ttl_seconds=300):
    """Create a cache with time-to-live (TTL) expiration."""

    def init_hook(size):
        return {
            'data': {},
            'timestamps': {},
            'size': 0,
            'capacity': size,
            'ttl': ttl_seconds
        }

    def is_expired(cache_dict, obj_id):
        """Check if object has expired."""
        if obj_id not in cache_dict['timestamps']:
            return True
        return time.time() - cache_dict['timestamps'][obj_id] > cache_dict['ttl']

    def hit_hook(cache_dict, obj_id, obj_size):
        """Handle cache hit."""
        if is_expired(cache_dict, obj_id):
            # Expired, treat as miss
            if obj_id in cache_dict['data']:
                del cache_dict['data'][obj_id]
                del cache_dict['timestamps'][obj_id]
                cache_dict['size'] -= obj_size
            return False
        return True

    def miss_hook(cache_dict, obj_id, obj_size):
        """Handle cache miss."""
        current_time = time.time()
        cache_dict['data'][obj_id] = obj_size
        cache_dict['timestamps'][obj_id] = current_time
        cache_dict['size'] += obj_size

    def eviction_hook(cache_dict, obj_id, obj_size):
        """Handle eviction."""
        # First try to evict expired items
        current_time = time.time()
        expired_items = []

        for oid, timestamp in cache_dict['timestamps'].items():
            if current_time - timestamp > cache_dict['ttl']:
                expired_items.append(oid)

        for oid in expired_items:
            if oid in cache_dict['data']:
                cache_dict['size'] -= cache_dict['data'][oid]
                del cache_dict['data'][oid]
                del cache_dict['timestamps'][oid]

        # If still need space, evict oldest items
        while cache_dict['size'] + obj_size > cache_dict['capacity']:
            if not cache_dict['data']:
                break
            # Find oldest item
            oldest_id = min(cache_dict['timestamps'].keys(), 
                          key=lambda x: cache_dict['timestamps'][x])
            cache_dict['size'] -= cache_dict['data'][oldest_id]
            del cache_dict['data'][oldest_id]
            del cache_dict['timestamps'][oldest_id]

    return lcs.PythonHookCache(
        cache_size=cache_size,
        init_hook=init_hook,
        hit_hook=hit_hook,
        miss_hook=miss_hook,
        eviction_hook=eviction_hook
    )

# Test TTL cache
ttl_cache = create_ttl_cache(1024*1024, ttl_seconds=60)

Performance Optimization¶

Batch Processing for Large Workloads¶

import libcachesim as lcs

def batch_simulation(trace_file, batch_size=10000):
    """Process large traces in batches to optimize memory usage."""

    reader = lcs.TraceReader(trace_file, lcs.TraceType.CSV_TRACE)
    cache = lcs.LRU(cache_size=10*1024*1024)

    total_requests = 0
    total_hits = 0
    batch_count = 0

    while True:
        batch_hits = 0
        batch_requests = 0

        # Process a batch of requests
        for _ in range(batch_size):
            try:
                req = reader.read_one_req()
                if req.valid:
                    if cache.get(req):
                        batch_hits += 1
                    batch_requests += 1
                else:
                    break  # End of trace
            except:
                break

        if batch_requests == 0:
            break

        total_hits += batch_hits
        total_requests += batch_requests
        batch_count += 1

        # Print progress
        hit_ratio = batch_hits / batch_requests
        print(f"Batch {batch_count}: {batch_requests} requests, "
              f"hit ratio: {hit_ratio:.4f}")

    overall_hit_ratio = total_hits / total_requests
    print(f"Overall: {total_requests} requests, hit ratio: {overall_hit_ratio:.4f}")

# Process in batches
batch_simulation("large_trace.csv", batch_size=50000)

Multi-threaded Analysis¶

import libcachesim as lcs
import concurrent.futures
import threading

def parallel_cache_comparison(trace_file, algorithms, cache_size):
    """Compare cache algorithms in parallel."""

    def simulate_algorithm(algo_name):
        """Simulate single algorithm."""
        reader = lcs.TraceReader(trace_file, lcs.TraceType.CSV_TRACE)
        cache = getattr(lcs, algo_name)(cache_size=cache_size)

        hits = 0
        total = 0

        for req in reader:
            if cache.get(req):
                hits += 1
            total += 1

        hit_ratio = hits / total if total > 0 else 0
        return algo_name, hit_ratio

    # Run simulations in parallel
    with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
        futures = {executor.submit(simulate_algorithm, algo): algo 
                  for algo in algorithms}

        results = {}
        for future in concurrent.futures.as_completed(futures):
            algo_name, hit_ratio = future.result()
            results[algo_name] = hit_ratio
            print(f"{algo_name}: {hit_ratio:.4f}")

    return results

# Compare algorithms in parallel
algorithms = ["LRU", "LFU", "FIFO", "ARC", "S3FIFO"]
results = parallel_cache_comparison("trace.csv", algorithms, 1024*1024)

These examples demonstrate the versatility and power of libCacheSim Python bindings for cache simulation, analysis, and research. You can modify and extend these examples for your specific use cases.