Created
August 17, 2024 18:18
-
-
Save catid/98f1aa0f088bf8d7eafb2317db37b93d to your computer and use it in GitHub Desktop.
Monte Carlo simulation evaluating policy of when to start using Anthropic Claude caching in tokens
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cmake_minimum_required(VERSION 3.10) | |
project(MonteCarloCasino) | |
set(CMAKE_CXX_STANDARD 17) | |
set(CMAKE_CXX_STANDARD_REQUIRED ON) | |
# Set optimization flags | |
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native") | |
# Find threads package | |
find_package(Threads REQUIRED) | |
# Add the executable | |
add_executable(monte_carlo_simulation monte_carlo_simulation.cpp) | |
# Link against threads | |
target_link_libraries(monte_carlo_simulation PRIVATE Threads::Threads) | |
# Enable link time optimization | |
include(CheckIPOSupported) | |
check_ipo_supported(RESULT supported OUTPUT error) | |
if(supported) | |
set_property(TARGET monte_carlo_simulation PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) | |
else() | |
message(WARNING "IPO is not supported: ${error}") | |
endif() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This simulation is for agentic workflows that have indeterminate ends: 5% probability of ending early and some max number of rounds. | |
If you always cache, it will cost 2x as much (broken policy). | |
If you never cache, it will cost 2x as much as ideal (no caching). | |
Otherwise there's a very broad basin. About every 3000 input tokens is pretty good. | |
The results are pretty similar if the probability of early stopping varies. | |
If you pick a non-ideal policy you'll still be within about 30% of the ideal choice. | |
So don't stress about it! |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <vector> | |
#include <random> | |
#include <thread> | |
#include <algorithm> | |
#include <iomanip> | |
// Constants | |
const int NUM_THREADS = 24; | |
const int P_TERM = 5; // 1% probability of termination at each step | |
const int MAX_ROUNDS = 100; // Maximum number of steps | |
const int MIN_ROUNDS = 10; // Minimum number of OUT tokens to process | |
const int WRITE_CACHE_COST = 375; // $3.75 in cents | |
const int WRITE_INPUT_COST = 300; // $3.00 in cents | |
const int READ_CACHE_COST = 30; // $0.30 in cents | |
const int OUT_TOKEN_COST = 1500; // $15.00 in cents | |
// Thread-local random number generators | |
thread_local std::mt19937 gen(std::random_device{}()); | |
thread_local std::uniform_int_distribution<> uniformDist(0, 99); | |
thread_local std::normal_distribution<> inputDist(300, 50); | |
thread_local std::normal_distribution<> outputDist(300, 50); | |
struct PolicyParams | |
{ | |
int TokensSinceLastCacheThresh = 5000; | |
}; | |
// Simulation function | |
int32_t runSimulation(const PolicyParams& params) { | |
int rounds = 0; | |
int totalCents = 0; | |
int totalTokens = 0; | |
int cachedTokens = 0; | |
int tokensSinceLastCache = 0; | |
while (rounds < MAX_ROUNDS) { | |
int in_count = inputDist(gen); | |
/* | |
Policy Function: | |
We have seen all rounds to date and the latest input as well. | |
*/ | |
bool write_cache = tokensSinceLastCache > params.TokensSinceLastCacheThresh; | |
totalTokens += in_count; | |
if (write_cache) { | |
cachedTokens = totalTokens; | |
tokensSinceLastCache = 0; | |
totalCents += totalTokens * WRITE_CACHE_COST; | |
} else { | |
tokensSinceLastCache += in_count; | |
totalCents += cachedTokens * READ_CACHE_COST + tokensSinceLastCache * WRITE_INPUT_COST; | |
} | |
/* | |
Generate output | |
*/ | |
int out_count = outputDist(gen); | |
totalTokens += out_count; | |
tokensSinceLastCache += out_count; | |
totalCents += out_count * OUT_TOKEN_COST; | |
++rounds; | |
if (rounds >= MIN_ROUNDS && uniformDist(gen) < P_TERM) { | |
break; | |
} | |
} | |
return totalCents; | |
} | |
// Function to run multiple simulations and collect raw data | |
std::vector<int32_t> runMultipleSimulations(int numSimulations, const PolicyParams& params) { | |
std::vector<int32_t> results; | |
results.reserve(numSimulations); | |
return results; | |
} | |
int main() { | |
const int numSimulationsPerThread = 1000; | |
PolicyParams params; | |
for (int N = 0; N <= 50000; N += 1000) { | |
params.TokensSinceLastCacheThresh = N; | |
std::vector<std::thread> threads; | |
std::vector<int32_t> threadResults[NUM_THREADS]; | |
for (int thread_id = 0; thread_id < NUM_THREADS; ++thread_id) { | |
threads.emplace_back([thread_id, params, &threadResults]() { | |
for (int i = 0; i < numSimulationsPerThread; ++i) { | |
int32_t cost = runSimulation(params); | |
threadResults[thread_id].push_back(cost); | |
} | |
}); | |
} | |
for (auto& thread : threads) { | |
thread.join(); | |
} | |
std::vector<int32_t> allResults; | |
allResults.reserve(NUM_THREADS * numSimulationsPerThread); | |
for (const auto& result : threadResults) { | |
allResults.insert(allResults.end(), result.begin(), result.end()); | |
} | |
// Calculate 80th percentile (80% confidence max score) | |
size_t n = allResults.size() * 0.8; | |
std::nth_element(allResults.begin(), allResults.begin() + n, allResults.end()); | |
int32_t percentile80 = allResults[n]; | |
std::cout << "N = " << N << ": 80% confidence max score = $" | |
<< std::fixed << std::setprecision(2) << (percentile80 / 100.0) << std::endl; | |
} | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ct) ➜ build git:(main) ✗ ./monte_carlo_simulation | |
N = 0: 80% confidence max score = $2129534.25 | |
N = 1000: 80% confidence max score = $1000279.80 | |
N = 2000: 80% confidence max score = $844556.10 | |
N = 3000: 80% confidence max score = $769363.50 | |
N = 4000: 80% confidence max score = $787035.00 | |
N = 5000: 80% confidence max score = $789797.10 | |
N = 6000: 80% confidence max score = $787786.20 | |
N = 7000: 80% confidence max score = $857179.50 | |
N = 8000: 80% confidence max score = $841608.00 | |
N = 9000: 80% confidence max score = $886655.40 | |
N = 10000: 80% confidence max score = $944212.20 | |
N = 11000: 80% confidence max score = $1037086.05 | |
N = 12000: 80% confidence max score = $1036189.35 | |
N = 13000: 80% confidence max score = $1047584.85 | |
N = 14000: 80% confidence max score = $1062933.60 | |
N = 15000: 80% confidence max score = $1102255.50 | |
N = 16000: 80% confidence max score = $1125764.40 | |
N = 17000: 80% confidence max score = $1159761.15 | |
N = 18000: 80% confidence max score = $1227036.00 | |
N = 19000: 80% confidence max score = $1294861.35 | |
N = 20000: 80% confidence max score = $1367140.05 | |
N = 21000: 80% confidence max score = $1454967.75 | |
N = 22000: 80% confidence max score = $1549312.50 | |
N = 23000: 80% confidence max score = $1623869.25 | |
N = 24000: 80% confidence max score = $1674751.50 | |
N = 25000: 80% confidence max score = $1664562.00 | |
N = 26000: 80% confidence max score = $1690857.00 | |
N = 27000: 80% confidence max score = $1691220.00 | |
N = 28000: 80% confidence max score = $1681605.00 | |
N = 29000: 80% confidence max score = $1675842.00 | |
N = 30000: 80% confidence max score = $1704783.00 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment