g023 · April 21, 2026 17:47
diff --git a/_inc_deepseek.py b/_inc_deepseek.py
 import requests
 import json
 from typing import List, Dict, Optional, Generator, Union
 import os
 import time
 import random

 # ==============================================================================
 # DeepSeek API Configuration (Replaces Ollama Globals)
 # ==============================================================================
 G_LOOP_SIZE = 75  # Size of the substring to check for repeats

 # DeepSeek API does not use an append prompt for disabling think; that is controlled via "thinking" object
 G_APPEND_PROMPT = "" 

 # DeepSeek API Base URL
 G_HOST = "https://api.deepseek.com"

 # ------------------------------------------------------------------------------
 # DeepSeek Model Selection
 # Use "deepseek-chat" for standard chat + optional thinking, or "deepseek-reasoner" for dedicated reasoning.
 # ------------------------------------------------------------------------------
 # G_MODEL = "deepseek-chat" 
 G_MODEL = "deepseek-reasoner"  # Uncomment to use the dedicated reasoning model

 # ------------------------------------------------------------------------------
 # Thinking Mode Configuration
 # For 'deepseek-chat', set G_THINKING = True to enable. For 'deepseek-reasoner', thinking is always on.
 # ------------------------------------------------------------------------------
 G_THINKING = True 

 # ------------------------------------------------------------------------------
 # API Limits & Controls
 # ------------------------------------------------------------------------------
 G_MAX_OUTPUT_TOKENS = 256  # Positive integer required. For deepseek-reasoner, max is 64K.[reference:5]
 G_TEMP = 0.0

 # DeepSeek Options Dictionary (Simplified & Compatible)
 # Unsupported options like 'num_ctx', 'repeat_penalty', 'mirostat' are removed.
 # For 'deepseek-reasoner', temperature, top_p, presence_penalty, frequency_penalty have no effect.[reference:6]
 G_OPTIONS = {
    "max_tokens": G_MAX_OUTPUT_TOKENS,
    "temperature": G_TEMP,
    "top_p": 0.9,
    "frequency_penalty": 1.0,
    "presence_penalty": 0.5
 }

 # ------------------------------------------------------------------------------
 # Environment Variables
 # ------------------------------------------------------------------------------
 # Ensure you set DEEPSEEK_API_KEY in your environment or replace the string below.
 G_API_KEY = os.getenv("DEEPSEEK_API_KEY", "sk-your_key_here")


 _STREAM_DONE = object()

 def _resolve_host(host: Optional[str]) -> str:
    """Resolve the DeepSeek host. Override, environment fallback, or default."""
    return (host or os.getenv("DEEPSEEK_HOST") or G_HOST).rstrip('/')

 def _parse_stream_line(raw_line: Union[bytes, str]) -> Optional[Union[Dict, object]]:
    """Parse one SSE line from a DeepSeek streaming response."""
    if isinstance(raw_line, bytes):
        line = raw_line.decode('utf-8', errors='replace')
    else:
        line = raw_line

    line = line.strip()
    if not line or line.startswith(':'):
        return None

    if line.startswith('data:'):
        payload = line[5:].strip()
        if not payload or payload == '[DONE]':
            return _STREAM_DONE
    else:
        payload = line

    try:
        return json.loads(payload)
    except json.JSONDecodeError as exc:
        print(f"Failed to decode chunk: {raw_line!r}, error: {exc}")
        return None

 def chat_with_deepseek(
    messages: List[Dict[str, str]],
    model: str = G_MODEL,
    host: str = None,
    stream: bool = False,
    options: Dict = None,  # Will use G_OPTIONS if None
    thinking: bool = G_THINKING,
    **kwargs
 ) -> Union[Dict, Generator[Dict, None, None]]:
    """
    Sends a conversation to DeepSeek's API and returns the model's response.
    Preserves the exact functional signature of chat_with_ollama for drop-in replacement.
    """
    if options is None:
        options = G_OPTIONS.copy()

    # Append G_APPEND_PROMPT (legacy behavior, though DeepSeek doesn't use this)
    if G_APPEND_PROMPT and messages and messages[-1].get("role") == "user":
        messages[-1]["content"] += f"{G_APPEND_PROMPT}"

    # --- Build DeepSeek API Payload ---
    payload = {
        "model": model,
        "messages": messages,
        "stream": stream,
        **options  # Unpack core options like max_tokens, temperature, etc.
    }

    # Conditionally add the "thinking" object for deepseek-chat
    if model == "deepseek-chat":
        payload["thinking"] = {"type": "enabled" if thinking else "disabled"}
    # Note: 'deepseek-reasoner' always thinks; this parameter is not needed.

    # --- Resolve Host & Endpoint ---
    effective_host = _resolve_host(host)
    endpoint = f"{effective_host}/chat/completions"

    headers = {
        "Authorization": f"Bearer {G_API_KEY}",
        "Content-Type": "application/json"
    }

    # Print configuration for debugging (preserves original behavior)
    print(f"Using model: {model}")
    print(f"Temperature: {payload.get('temperature', 'N/A')}")
    print(f"Max Tokens: {payload.get('max_tokens', 'N/A')}")

    # --- Make the Request ---
    try:
        if stream:
            return _stream_response(endpoint, headers, payload)
        else:
            response = requests.post(endpoint, headers=headers, json=payload, timeout=600)
            response.raise_for_status()
            return response.json()
    except requests.exceptions.ConnectionError as e:
        raise ConnectionError(f"Could not connect to DeepSeek at {effective_host}.") from e
    except requests.exceptions.Timeout as e:
        raise requests.exceptions.Timeout("Request timed out.") from e
    except requests.exceptions.RequestException as e:
        raise e

 def _stream_response(endpoint: str, headers: Dict, payload: Dict) -> Generator[Dict, None, None]:
    """Internal generator to handle DeepSeek streaming responses."""
    payload["stream"] = True
    with requests.post(endpoint, headers=headers, json=payload, stream=True, timeout=600) as response:
        response.raise_for_status()
        for line in response.iter_lines(decode_unicode=True):
            chunk = _parse_stream_line(line)
            if chunk is None:
                continue
            if chunk is _STREAM_DONE:
                break
            yield chunk

            if isinstance(chunk, dict):
                if chunk.get("done"):
                    break
                choices = chunk.get("choices") or []
                if choices:
                    finish_reason = choices[0].get("finish_reason")
                    if finish_reason in {"stop", "length", "content_filter"}:
                        break

 def llm_nonstream(conv=[], thinking=True, options=None):
    if options is None:
        options = G_OPTIONS.copy()

    ret_dict = {
        "reasoning": "",
        "content": "",
        "usage": {},
        "time_taken": 0,
    }

    print("\n--- (Non-Streaming) ---")
    try:
        time_start = time.time()    

        response = chat_with_deepseek(
            messages=conv,
            model=G_MODEL, 
            options=options,
            thinking=thinking
        )

        message = response['choices'][0]['message']
        ret_dict["time_taken"] = time.time() - time_start
        
        # DeepSeek separates reasoning and content
        ret_dict["reasoning"] = message.get('reasoning_content', '')
        ret_dict["content"] = message.get('content', '')
        
        # Fallback for older models that embed thinking in content with <think> tags
        if not ret_dict["reasoning"] and "</think>" in ret_dict["content"]:
            parts = ret_dict["content"].rsplit("</think>", 1)
            ret_dict["reasoning"] = parts[0].strip()
            ret_dict["content"] = parts[1].strip()

        # Use exact token counts returned by DeepSeek
        usage = response.get('usage', {})
        ret_dict["usage"] = {
            "reasoning_tokens": 0,  # DeepSeek doesn't break this down separately
            "content_tokens": usage.get('completion_tokens', 0),
            "total_tokens": usage.get('total_tokens', 0)
        }

    except Exception as e:
        print(f"Error: {e}")

    return ret_dict

 def llm_stream(conv=[], thinking=True, options=None, retry_on_repeat=False, the_model=G_MODEL):
    if options is None:
        options = G_OPTIONS.copy()

    print("\n--- Streaming Example ---")
    print(f"Using model: {the_model}")
    print(f"Temperature: {options.get('temperature', G_TEMP)}")

    while True:
        ret_dict = {
            "reasoning": "",
            "content": "",
            "usage": {},
            "time_taken": 0,
        }
        
        try:
            stream = chat_with_deepseek(
                messages=conv,
                model=the_model,
                stream=True,
                thinking=thinking,
                options=options,
            )

            print("Streaming response: ", end="")
            time_start = time.time()
            reason_str = ""
            response_str = ""
            in_reasoning = True
            
            for chunk in stream:
                if chunk.get('choices'):
                    delta = chunk['choices'][0].get('delta', {})
                    
                    # Handle dedicated reasoning_content field
                    if delta.get('reasoning_content'):
                        reason_str += delta['reasoning_content']
                        print(delta['reasoning_content'], end="", flush=True)
                    
                    # Handle main content field
                    if delta.get('content'):
                        if in_reasoning and delta.get('content'):
                            print("\n--- End of Reasoning, Start of Content ---")
                            in_reasoning = False
                        response_str += delta['content']
                        print(delta['content'], end="", flush=True)

                # Check for final message in the chunk (for thinking-enabled models where content isn't streamed)
                if 'message' in chunk:
                    final_message = chunk['message']
                    if not response_str:  # Only set if content wasn't streamed
                        ret_dict["content"] = final_message.get('content', '')
                        print(final_message.get('content', ''), end="", flush=True)
                    if not reason_str:  # Similarly for reasoning
                        ret_dict["reasoning"] = final_message.get('reasoning_content', '')
                        print(final_message.get('reasoning_content', ''), end="", flush=True)
                elif chunk.get('choices') and 'message' in chunk['choices'][0]:
                    final_message = chunk['choices'][0]['message']
                    if not response_str:  # Only set if content wasn't streamed
                        ret_dict["content"] = final_message.get('content', '')
                        print(final_message.get('content', ''), end="", flush=True)
                    if not reason_str:  # Similarly for reasoning
                        ret_dict["reasoning"] = final_message.get('reasoning_content', '')
                        print(final_message.get('reasoning_content', ''), end="", flush=True)

                # Collect usage from the final chunk
                if 'usage' in chunk:
                    ret_dict["usage"] = {
                        "reasoning_tokens": chunk['usage'].get('reasoning_tokens', 0),
                        "content_tokens": chunk['usage'].get('completion_tokens', 0),
                        "total_tokens": chunk['usage'].get('total_tokens', 0)
                    }

            print()  # Newline after stream

            # Fallback for </think> tags
            if not reason_str and "</think>" in response_str:
                parts = response_str.rsplit("</think>", 1)
                reason_str = parts[0].strip()
                response_str = parts[1].strip()

            # Update ret_dict with final values
            ret_dict["reasoning"] = reason_str
            ret_dict["content"] = response_str
            ret_dict["time_taken"] = time.time() - time_start
            # Usage is already set from the final chunk

        except Exception as e:
            print(f"Error: {e}")
            return ret_dict

        # --- Repeat Detection Logic (Preserved exactly from original) ---
        if not retry_on_repeat:
            break

        repeat_found = False
        
        # 1. Line-level loop detection
        lines = [line.strip() for line in response_str.split('\n') if line.strip()]
        for block_size in range(1, 6):
            if len(lines) < block_size * 3:
                continue
            for i in range(len(lines) - block_size * 3 + 1):
                block = lines[i:i+block_size]
                matches = sum(1 for j in range(len(lines) - block_size + 1) if lines[j:j+block_size] == block)
                if matches >= 4:
                    repeat_found = True
                    break
            if repeat_found:
                break
                
        # 2. Substring level loop detection
        if not repeat_found and len(response_str) >= 100:
            for chunk_size in [75, 100, 150]: 
                if len(response_str) < chunk_size * 4:
                    continue
                for i in range(0, len(response_str) - chunk_size, chunk_size // 2):
                    chunk = response_str[i:i+chunk_size]
                    if response_str.count(chunk) >= 4:
                        repeat_found = True
                        break
                if repeat_found:
                    break

        if not repeat_found:
            break

        # Retry with varied temperature
        options['temperature'] = round(random.uniform(0.6, 1.5), 2)
        print(f"\n[!] Repeat detected, retrying with temperature {options['temperature']}...")

    return ret_dict


 # --- Example Usage (Unchanged) ---
 if __name__ == "__main__":
    conversation = [
        {"role": "system", "content": "You are a helpful, concise assistant."},
        {"role": "user", "content": "Say hello in an alien language:"}
    ]

    # --- IMPORTANT: Set your API Key before running ---
    # export DEEPSEEK_API_KEY="your_key_here"
    
    # The rest of the script runs identically
    ret_dict = llm_stream(conversation, thinking=G_THINKING)

    print(f"\n--- Reasoning ---\n")
    print(ret_dict["reasoning"])
    print(f"\n--- Content ---\n")
    print(ret_dict["content"])

    print(f"\n--- Token Counts and Timing Info ---\n")
    print(f"Estimated Reasoning Tokens: {ret_dict['usage'].get('reasoning_tokens', 'N/A')}")
    print(f"Estimated Content Tokens: {ret_dict['usage'].get('content_tokens', 'N/A')}")
    print(f"Estimated Total Tokens: {ret_dict['usage'].get('total_tokens', 'N/A')}")
    print(f"Total Time: {ret_dict.get('time_taken', 'N/A'):.2f} seconds")
    if ret_dict.get('time_taken', 0) > 0:
        print(f"Average Speed: {ret_dict['usage'].get('total_tokens', 0) / ret_dict['time_taken']:.2f} tokens/second")
	import requests
	import json
	from typing import List, Dict, Optional, Generator, Union
	import os
	import time
	import random

	# ==============================================================================
	# DeepSeek API Configuration (Replaces Ollama Globals)
	# ==============================================================================
	G_LOOP_SIZE = 75 # Size of the substring to check for repeats

	# DeepSeek API does not use an append prompt for disabling think; that is controlled via "thinking" object
	G_APPEND_PROMPT = ""

	# DeepSeek API Base URL
	G_HOST = "https://api.deepseek.com"

	# ------------------------------------------------------------------------------
	# DeepSeek Model Selection
	# Use "deepseek-chat" for standard chat + optional thinking, or "deepseek-reasoner" for dedicated reasoning.
	# ------------------------------------------------------------------------------
	# G_MODEL = "deepseek-chat"
	G_MODEL = "deepseek-reasoner" # Uncomment to use the dedicated reasoning model

	# ------------------------------------------------------------------------------
	# Thinking Mode Configuration
	# For 'deepseek-chat', set G_THINKING = True to enable. For 'deepseek-reasoner', thinking is always on.
	# ------------------------------------------------------------------------------
	G_THINKING = True

	# ------------------------------------------------------------------------------
	# API Limits & Controls
	# ------------------------------------------------------------------------------
	G_MAX_OUTPUT_TOKENS = 256 # Positive integer required. For deepseek-reasoner, max is 64K.[reference:5]
	G_TEMP = 0.0

	# DeepSeek Options Dictionary (Simplified & Compatible)
	# Unsupported options like 'num_ctx', 'repeat_penalty', 'mirostat' are removed.
	# For 'deepseek-reasoner', temperature, top_p, presence_penalty, frequency_penalty have no effect.[reference:6]
	G_OPTIONS = {
	"max_tokens": G_MAX_OUTPUT_TOKENS,
	"temperature": G_TEMP,
	"top_p": 0.9,
	"frequency_penalty": 1.0,
	"presence_penalty": 0.5
	}

	# ------------------------------------------------------------------------------
	# Environment Variables
	# ------------------------------------------------------------------------------
	# Ensure you set DEEPSEEK_API_KEY in your environment or replace the string below.
	G_API_KEY = os.getenv("DEEPSEEK_API_KEY", "sk-your_key_here")


	_STREAM_DONE = object()

	def _resolve_host(host: Optional[str]) -> str:
	"""Resolve the DeepSeek host. Override, environment fallback, or default."""
	return (host or os.getenv("DEEPSEEK_HOST") or G_HOST).rstrip('/')

	def _parse_stream_line(raw_line: Union[bytes, str]) -> Optional[Union[Dict, object]]:
	"""Parse one SSE line from a DeepSeek streaming response."""
	if isinstance(raw_line, bytes):
	line = raw_line.decode('utf-8', errors='replace')
	else:
	line = raw_line

	line = line.strip()
	if not line or line.startswith(':'):
	return None

	if line.startswith('data:'):
	payload = line[5:].strip()
	if not payload or payload == '[DONE]':
	return _STREAM_DONE
	else:
	payload = line

	try:
	return json.loads(payload)
	except json.JSONDecodeError as exc:
	print(f"Failed to decode chunk: {raw_line!r}, error: {exc}")
	return None

	def chat_with_deepseek(
	messages: List[Dict[str, str]],
	model: str = G_MODEL,
	host: str = None,
	stream: bool = False,
	options: Dict = None, # Will use G_OPTIONS if None
	thinking: bool = G_THINKING,
	**kwargs
	) -> Union[Dict, Generator[Dict, None, None]]:
	"""
	Sends a conversation to DeepSeek's API and returns the model's response.
	Preserves the exact functional signature of chat_with_ollama for drop-in replacement.
	"""
	if options is None:
	options = G_OPTIONS.copy()

	# Append G_APPEND_PROMPT (legacy behavior, though DeepSeek doesn't use this)
	if G_APPEND_PROMPT and messages and messages[-1].get("role") == "user":
	messages[-1]["content"] += f"{G_APPEND_PROMPT}"

	# --- Build DeepSeek API Payload ---
	payload = {
	"model": model,
	"messages": messages,
	"stream": stream,
	**options # Unpack core options like max_tokens, temperature, etc.
	}

	# Conditionally add the "thinking" object for deepseek-chat
	if model == "deepseek-chat":
	payload["thinking"] = {"type": "enabled" if thinking else "disabled"}
	# Note: 'deepseek-reasoner' always thinks; this parameter is not needed.

	# --- Resolve Host & Endpoint ---
	effective_host = _resolve_host(host)
	endpoint = f"{effective_host}/chat/completions"

	headers = {
	"Authorization": f"Bearer {G_API_KEY}",
	"Content-Type": "application/json"
	}

	# Print configuration for debugging (preserves original behavior)
	print(f"Using model: {model}")
	print(f"Temperature: {payload.get('temperature', 'N/A')}")
	print(f"Max Tokens: {payload.get('max_tokens', 'N/A')}")

	# --- Make the Request ---
	try:
	if stream:
	return _stream_response(endpoint, headers, payload)
	else:
	response = requests.post(endpoint, headers=headers, json=payload, timeout=600)
	response.raise_for_status()
	return response.json()
	except requests.exceptions.ConnectionError as e:
	raise ConnectionError(f"Could not connect to DeepSeek at {effective_host}.") from e
	except requests.exceptions.Timeout as e:
	raise requests.exceptions.Timeout("Request timed out.") from e
	except requests.exceptions.RequestException as e:
	raise e

	def _stream_response(endpoint: str, headers: Dict, payload: Dict) -> Generator[Dict, None, None]:
	"""Internal generator to handle DeepSeek streaming responses."""
	payload["stream"] = True
	with requests.post(endpoint, headers=headers, json=payload, stream=True, timeout=600) as response:
	response.raise_for_status()
	for line in response.iter_lines(decode_unicode=True):
	chunk = _parse_stream_line(line)
	if chunk is None:
	continue
	if chunk is _STREAM_DONE:
	break
	yield chunk

	if isinstance(chunk, dict):
	if chunk.get("done"):
	break
	choices = chunk.get("choices") or []
	if choices:
	finish_reason = choices[0].get("finish_reason")
	if finish_reason in {"stop", "length", "content_filter"}:
	break

	def llm_nonstream(conv=[], thinking=True, options=None):
	if options is None:
	options = G_OPTIONS.copy()

	ret_dict = {
	"reasoning": "",
	"content": "",
	"usage": {},
	"time_taken": 0,
	}

	print("\n--- (Non-Streaming) ---")
	try:
	time_start = time.time()

	response = chat_with_deepseek(
	messages=conv,
	model=G_MODEL,
	options=options,
	thinking=thinking
	)

	message = response['choices'][0]['message']
	ret_dict["time_taken"] = time.time() - time_start

	# DeepSeek separates reasoning and content
	ret_dict["reasoning"] = message.get('reasoning_content', '')
	ret_dict["content"] = message.get('content', '')

	# Fallback for older models that embed thinking in content with <think> tags
	if not ret_dict["reasoning"] and "</think>" in ret_dict["content"]:
	parts = ret_dict["content"].rsplit("</think>", 1)
	ret_dict["reasoning"] = parts[0].strip()
	ret_dict["content"] = parts[1].strip()

	# Use exact token counts returned by DeepSeek
	usage = response.get('usage', {})
	ret_dict["usage"] = {
	"reasoning_tokens": 0, # DeepSeek doesn't break this down separately
	"content_tokens": usage.get('completion_tokens', 0),
	"total_tokens": usage.get('total_tokens', 0)
	}

	except Exception as e:
	print(f"Error: {e}")

	return ret_dict

	def llm_stream(conv=[], thinking=True, options=None, retry_on_repeat=False, the_model=G_MODEL):
	if options is None:
	options = G_OPTIONS.copy()

	print("\n--- Streaming Example ---")
	print(f"Using model: {the_model}")
	print(f"Temperature: {options.get('temperature', G_TEMP)}")

	while True:
	ret_dict = {
	"reasoning": "",
	"content": "",
	"usage": {},
	"time_taken": 0,
	}

	try:
	stream = chat_with_deepseek(
	messages=conv,
	model=the_model,
	stream=True,
	thinking=thinking,
	options=options,
	)

	print("Streaming response: ", end="")
	time_start = time.time()
	reason_str = ""
	response_str = ""
	in_reasoning = True

	for chunk in stream:
	if chunk.get('choices'):
	delta = chunk['choices'][0].get('delta', {})

	# Handle dedicated reasoning_content field
	if delta.get('reasoning_content'):
	reason_str += delta['reasoning_content']
	print(delta['reasoning_content'], end="", flush=True)

	# Handle main content field
	if delta.get('content'):
	if in_reasoning and delta.get('content'):
	print("\n--- End of Reasoning, Start of Content ---")
	in_reasoning = False
	response_str += delta['content']
	print(delta['content'], end="", flush=True)

	# Check for final message in the chunk (for thinking-enabled models where content isn't streamed)
	if 'message' in chunk:
	final_message = chunk['message']
	if not response_str: # Only set if content wasn't streamed
	ret_dict["content"] = final_message.get('content', '')
	print(final_message.get('content', ''), end="", flush=True)
	if not reason_str: # Similarly for reasoning
	ret_dict["reasoning"] = final_message.get('reasoning_content', '')
	print(final_message.get('reasoning_content', ''), end="", flush=True)
	elif chunk.get('choices') and 'message' in chunk['choices'][0]:
	final_message = chunk['choices'][0]['message']
	if not response_str: # Only set if content wasn't streamed
	ret_dict["content"] = final_message.get('content', '')
	print(final_message.get('content', ''), end="", flush=True)
	if not reason_str: # Similarly for reasoning
	ret_dict["reasoning"] = final_message.get('reasoning_content', '')
	print(final_message.get('reasoning_content', ''), end="", flush=True)

	# Collect usage from the final chunk
	if 'usage' in chunk:
	ret_dict["usage"] = {
	"reasoning_tokens": chunk['usage'].get('reasoning_tokens', 0),
	"content_tokens": chunk['usage'].get('completion_tokens', 0),
	"total_tokens": chunk['usage'].get('total_tokens', 0)
	}

	print() # Newline after stream

	# Fallback for </think> tags
	if not reason_str and "</think>" in response_str:
	parts = response_str.rsplit("</think>", 1)
	reason_str = parts[0].strip()
	response_str = parts[1].strip()

	# Update ret_dict with final values
	ret_dict["reasoning"] = reason_str
	ret_dict["content"] = response_str
	ret_dict["time_taken"] = time.time() - time_start
	# Usage is already set from the final chunk

	except Exception as e:
	print(f"Error: {e}")
	return ret_dict

	# --- Repeat Detection Logic (Preserved exactly from original) ---
	if not retry_on_repeat:
	break

	repeat_found = False

	# 1. Line-level loop detection
	lines = [line.strip() for line in response_str.split('\n') if line.strip()]
	for block_size in range(1, 6):
	if len(lines) < block_size * 3:
	continue
	for i in range(len(lines) - block_size * 3 + 1):
	block = lines[i:i+block_size]
	matches = sum(1 for j in range(len(lines) - block_size + 1) if lines[j:j+block_size] == block)
	if matches >= 4:
	repeat_found = True
	break
	if repeat_found:
	break

	# 2. Substring level loop detection
	if not repeat_found and len(response_str) >= 100:
	for chunk_size in [75, 100, 150]:
	if len(response_str) < chunk_size * 4:
	continue
	for i in range(0, len(response_str) - chunk_size, chunk_size // 2):
	chunk = response_str[i:i+chunk_size]
	if response_str.count(chunk) >= 4:
	repeat_found = True
	break
	if repeat_found:
	break

	if not repeat_found:
	break

	# Retry with varied temperature
	options['temperature'] = round(random.uniform(0.6, 1.5), 2)
	print(f"\n[!] Repeat detected, retrying with temperature {options['temperature']}...")

	return ret_dict


	# --- Example Usage (Unchanged) ---
	if __name__ == "__main__":
	conversation = [
	{"role": "system", "content": "You are a helpful, concise assistant."},
	{"role": "user", "content": "Say hello in an alien language:"}
	]

	# --- IMPORTANT: Set your API Key before running ---
	# export DEEPSEEK_API_KEY="your_key_here"

	# The rest of the script runs identically
	ret_dict = llm_stream(conversation, thinking=G_THINKING)

	print(f"\n--- Reasoning ---\n")
	print(ret_dict["reasoning"])
	print(f"\n--- Content ---\n")
	print(ret_dict["content"])

	print(f"\n--- Token Counts and Timing Info ---\n")
	print(f"Estimated Reasoning Tokens: {ret_dict['usage'].get('reasoning_tokens', 'N/A')}")
	print(f"Estimated Content Tokens: {ret_dict['usage'].get('content_tokens', 'N/A')}")
	print(f"Estimated Total Tokens: {ret_dict['usage'].get('total_tokens', 'N/A')}")
	print(f"Total Time: {ret_dict.get('time_taken', 'N/A'):.2f} seconds")
	if ret_dict.get('time_taken', 0) > 0:
	print(f"Average Speed: {ret_dict['usage'].get('total_tokens', 0) / ret_dict['time_taken']:.2f} tokens/second")
No results found