Skip to content

Instantly share code, notes, and snippets.

@catid
Created August 17, 2024 18:18
Show Gist options
  • Save catid/98f1aa0f088bf8d7eafb2317db37b93d to your computer and use it in GitHub Desktop.
Save catid/98f1aa0f088bf8d7eafb2317db37b93d to your computer and use it in GitHub Desktop.
Monte Carlo simulation evaluating policy of when to start using Anthropic Claude caching in tokens
cmake_minimum_required(VERSION 3.10)
project(MonteCarloCasino)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
# Set optimization flags
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native")
# Find threads package
find_package(Threads REQUIRED)
# Add the executable
add_executable(monte_carlo_simulation monte_carlo_simulation.cpp)
# Link against threads
target_link_libraries(monte_carlo_simulation PRIVATE Threads::Threads)
# Enable link time optimization
include(CheckIPOSupported)
check_ipo_supported(RESULT supported OUTPUT error)
if(supported)
set_property(TARGET monte_carlo_simulation PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
else()
message(WARNING "IPO is not supported: ${error}")
endif()
This simulation is for agentic workflows that have indeterminate ends: 5% probability of ending early and some max number of rounds.
If you always cache, it will cost 2x as much (broken policy).
If you never cache, it will cost 2x as much as ideal (no caching).
Otherwise there's a very broad basin. About every 3000 input tokens is pretty good.
The results are pretty similar if the probability of early stopping varies.
If you pick a non-ideal policy you'll still be within about 30% of the ideal choice.
So don't stress about it!
#include <iostream>
#include <vector>
#include <random>
#include <thread>
#include <algorithm>
#include <iomanip>
// Constants
const int NUM_THREADS = 24;
const int P_TERM = 5; // 1% probability of termination at each step
const int MAX_ROUNDS = 100; // Maximum number of steps
const int MIN_ROUNDS = 10; // Minimum number of OUT tokens to process
const int WRITE_CACHE_COST = 375; // $3.75 in cents
const int WRITE_INPUT_COST = 300; // $3.00 in cents
const int READ_CACHE_COST = 30; // $0.30 in cents
const int OUT_TOKEN_COST = 1500; // $15.00 in cents
// Thread-local random number generators
thread_local std::mt19937 gen(std::random_device{}());
thread_local std::uniform_int_distribution<> uniformDist(0, 99);
thread_local std::normal_distribution<> inputDist(300, 50);
thread_local std::normal_distribution<> outputDist(300, 50);
struct PolicyParams
{
int TokensSinceLastCacheThresh = 5000;
};
// Simulation function
int32_t runSimulation(const PolicyParams& params) {
int rounds = 0;
int totalCents = 0;
int totalTokens = 0;
int cachedTokens = 0;
int tokensSinceLastCache = 0;
while (rounds < MAX_ROUNDS) {
int in_count = inputDist(gen);
/*
Policy Function:
We have seen all rounds to date and the latest input as well.
*/
bool write_cache = tokensSinceLastCache > params.TokensSinceLastCacheThresh;
totalTokens += in_count;
if (write_cache) {
cachedTokens = totalTokens;
tokensSinceLastCache = 0;
totalCents += totalTokens * WRITE_CACHE_COST;
} else {
tokensSinceLastCache += in_count;
totalCents += cachedTokens * READ_CACHE_COST + tokensSinceLastCache * WRITE_INPUT_COST;
}
/*
Generate output
*/
int out_count = outputDist(gen);
totalTokens += out_count;
tokensSinceLastCache += out_count;
totalCents += out_count * OUT_TOKEN_COST;
++rounds;
if (rounds >= MIN_ROUNDS && uniformDist(gen) < P_TERM) {
break;
}
}
return totalCents;
}
// Function to run multiple simulations and collect raw data
std::vector<int32_t> runMultipleSimulations(int numSimulations, const PolicyParams& params) {
std::vector<int32_t> results;
results.reserve(numSimulations);
return results;
}
int main() {
const int numSimulationsPerThread = 1000;
PolicyParams params;
for (int N = 0; N <= 50000; N += 1000) {
params.TokensSinceLastCacheThresh = N;
std::vector<std::thread> threads;
std::vector<int32_t> threadResults[NUM_THREADS];
for (int thread_id = 0; thread_id < NUM_THREADS; ++thread_id) {
threads.emplace_back([thread_id, params, &threadResults]() {
for (int i = 0; i < numSimulationsPerThread; ++i) {
int32_t cost = runSimulation(params);
threadResults[thread_id].push_back(cost);
}
});
}
for (auto& thread : threads) {
thread.join();
}
std::vector<int32_t> allResults;
allResults.reserve(NUM_THREADS * numSimulationsPerThread);
for (const auto& result : threadResults) {
allResults.insert(allResults.end(), result.begin(), result.end());
}
// Calculate 80th percentile (80% confidence max score)
size_t n = allResults.size() * 0.8;
std::nth_element(allResults.begin(), allResults.begin() + n, allResults.end());
int32_t percentile80 = allResults[n];
std::cout << "N = " << N << ": 80% confidence max score = $"
<< std::fixed << std::setprecision(2) << (percentile80 / 100.0) << std::endl;
}
return 0;
}
(ct) ➜ build git:(main) ✗ ./monte_carlo_simulation
N = 0: 80% confidence max score = $2129534.25
N = 1000: 80% confidence max score = $1000279.80
N = 2000: 80% confidence max score = $844556.10
N = 3000: 80% confidence max score = $769363.50
N = 4000: 80% confidence max score = $787035.00
N = 5000: 80% confidence max score = $789797.10
N = 6000: 80% confidence max score = $787786.20
N = 7000: 80% confidence max score = $857179.50
N = 8000: 80% confidence max score = $841608.00
N = 9000: 80% confidence max score = $886655.40
N = 10000: 80% confidence max score = $944212.20
N = 11000: 80% confidence max score = $1037086.05
N = 12000: 80% confidence max score = $1036189.35
N = 13000: 80% confidence max score = $1047584.85
N = 14000: 80% confidence max score = $1062933.60
N = 15000: 80% confidence max score = $1102255.50
N = 16000: 80% confidence max score = $1125764.40
N = 17000: 80% confidence max score = $1159761.15
N = 18000: 80% confidence max score = $1227036.00
N = 19000: 80% confidence max score = $1294861.35
N = 20000: 80% confidence max score = $1367140.05
N = 21000: 80% confidence max score = $1454967.75
N = 22000: 80% confidence max score = $1549312.50
N = 23000: 80% confidence max score = $1623869.25
N = 24000: 80% confidence max score = $1674751.50
N = 25000: 80% confidence max score = $1664562.00
N = 26000: 80% confidence max score = $1690857.00
N = 27000: 80% confidence max score = $1691220.00
N = 28000: 80% confidence max score = $1681605.00
N = 29000: 80% confidence max score = $1675842.00
N = 30000: 80% confidence max score = $1704783.00
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment