olafgeibig · August 4, 2025 19:49 · arvit1 · Aug 4, 2025
diff --git a/cc-proxy.sh b/cc-proxy.sh
 #!/bin/bash
 export WANDB_API_KEY=<your key>
 export WANDB_PROJECT=<org/project>

 litellm --port 4000 --debug --config cc-proxy.yaml
diff --git a/cc-proxy.yaml b/cc-proxy.yaml
 litellm_settings:
  drop_params: true
  cache: True
  cache_params:
    type: local
  enable_preview_features: True

 model_list:

  - model_name: anthropic/claude-sonnet-*
    litellm_params:
      model: openai/Qwen/Qwen3-Coder-480B-A35B-Instruct 
      api_key: "os.environ/WANDB_API_KEY"
      api_base: https://api.inference.wandb.ai/v1
      headers: 
        OpenAI-Project: "os.environ/WANDB_PROJECT"
      max_tokens: 65536
      repetition_penalty: 1.05
      temperature: 0.7
      top_k: 20
      top_p: 0.8
    model_info:
      input_cost_per_token: 0.000001
      output_cost_per_token: 0.0000015

  - model_name: anthropic/claude-opus-*
    litellm_params:
      model: openai/Qwen/Qwen3-235B-A22B-Thinking-2507
      api_key: "os.environ/WANDB_API_KEY"
      api_base: https://api.inference.wandb.ai/v1
      headers: 
        OpenAI-Project: "os.environ/WANDB_PROJECT"
      max_tokens: 65536
      repetition_penalty: 1.05
      temperature: 0.6
      top_k: 40
      top_p: 0.95
    model_info:
      input_cost_per_token: 0.0000001
      output_cost_per_token: 0.0000001

  - model_name: anthropic/claude-3-5-haiku-*
    litellm_params:
      model: openai/Qwen/Qwen3-235B-A22B-Instruct-2507
      api_key: "os.environ/WANDB_API_KEY"
      api_base: https://api.inference.wandb.ai/v1
      max_tokens: 65536
      repetition_penalty: 1.05
      temperature: 0.7
      top_k: 20
      top_p: 0.8
      headers: 
        OpenAI-Project: "os.environ/WANDB_PROJECT"
      input_cost_per_token: 0.0000001
      output_cost_per_token: 0.0000001
diff --git a/code-wandb.sh b/code-wandb.sh
 #!/bin/bash
 export ANTHROPIC_AUTH_TOKEN=sk-1234
 export ANTHROPIC_BASE_URL=http://localhost:4000
 export CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1

 # Startin VS Code, but could also run claude here
 code &
	#!/bin/bash
	export WANDB_API_KEY=<your key>
	export WANDB_PROJECT=<org/project>

	litellm --port 4000 --debug --config cc-proxy.yaml
	litellm_settings:
	drop_params: true
	cache: True
	cache_params:
	type: local
	enable_preview_features: True

	model_list:

	- model_name: anthropic/claude-sonnet-*
	litellm_params:
	model: openai/Qwen/Qwen3-Coder-480B-A35B-Instruct
	api_key: "os.environ/WANDB_API_KEY"
	api_base: https://api.inference.wandb.ai/v1
	headers:
	OpenAI-Project: "os.environ/WANDB_PROJECT"
	max_tokens: 65536
	repetition_penalty: 1.05
	temperature: 0.7
	top_k: 20
	top_p: 0.8
	model_info:
	input_cost_per_token: 0.000001
	output_cost_per_token: 0.0000015

	- model_name: anthropic/claude-opus-*
	litellm_params:
	model: openai/Qwen/Qwen3-235B-A22B-Thinking-2507
	api_key: "os.environ/WANDB_API_KEY"
	api_base: https://api.inference.wandb.ai/v1
	headers:
	OpenAI-Project: "os.environ/WANDB_PROJECT"
	max_tokens: 65536
	repetition_penalty: 1.05
	temperature: 0.6
	top_k: 40
	top_p: 0.95
	model_info:
	input_cost_per_token: 0.0000001
	output_cost_per_token: 0.0000001

	- model_name: anthropic/claude-3-5-haiku-*
	litellm_params:
	model: openai/Qwen/Qwen3-235B-A22B-Instruct-2507
	api_key: "os.environ/WANDB_API_KEY"
	api_base: https://api.inference.wandb.ai/v1
	max_tokens: 65536
	repetition_penalty: 1.05
	temperature: 0.7
	top_k: 20
	top_p: 0.8
	headers:
	OpenAI-Project: "os.environ/WANDB_PROJECT"
	input_cost_per_token: 0.0000001
	output_cost_per_token: 0.0000001
	#!/bin/bash
	export ANTHROPIC_AUTH_TOKEN=sk-1234
	export ANTHROPIC_BASE_URL=http://localhost:4000
	export CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1

	# Startin VS Code, but could also run claude here
	code &