#!/bin/bash

# Requires code2prompt to be installed: https://github.com/mufeedvh/code2prompt
# Requires the fd or fdfind command to be installed. Under Ubuntu: sudo apt install fd-find

## ---------------------------------------------------------------------------------
## INITIALIZATION
## ---------------------------------------------------------------------------------

echo "Updating prompts"

echo "Create output directory if it doesn't exist"
mkdir -p "./50 Resources/57 Prompts/"

# Declare an associative array to store file paths
declare -A file_map

## ---------------------------------------------------------------------------------
## FUNCTIONS
## ---------------------------------------------------------------------------------

# Function to build a map of all markdown files in specified folders
build_file_map() {
  echo "Building file map for efficient lookups..."
  local start_time=$(date +%s)
  
  # Clear the map first
  file_map=()
  
  # Define the folders to search in
  local folders=(
    "./10 Meta"
    "./20 Projects"
    "./30 Areas"
    "./40 Journal"
    "./50 Resources"
    "./60 Archives"
  )
  
  # Create a list of existing folders
  local existing_folders=()
  for folder in "${folders[@]}"; do
    if [ -d "$folder" ]; then
      existing_folders+=("$folder")
    else
      echo "Warning: Folder $folder does not exist, skipping"
    fi
  done
  
  if [ ${#existing_folders[@]} -eq 0 ]; then
    echo "Error: No valid folders to scan"
    return 1
  fi
  
  echo "Scanning ${#existing_folders[@]} folders..."
  
  # Create a temporary file to store the results
  local temp_file=$(mktemp)
  
  # Try to use fd if available (much faster than find)
  if command -v fd &> /dev/null; then
    fd --type f --extension md . "${existing_folders[@]}" > "$temp_file"
  elif command -v fdfind &> /dev/null; then
    fdfind --type f --extension md . "${existing_folders[@]}" > "$temp_file"
  else
    # Fall back to find with -exec which is faster than piping to xargs
    echo "Using find for scanning since the fd and fdfind commands could not be found..."
    find "${existing_folders[@]}" -type f -name "*.md" -exec echo {} \; 2>/dev/null > "$temp_file"
  fi
  
  # Process all files in a single awk command for maximum speed
  echo "Processing found files..."
  
  # Use a single awk command to process all files at once
  # This is much faster than a bash loop for large numbers of files
  awk '{
    # Extract filename without extension
    filename = $0;
    gsub(/.*\//, "", filename);  # Remove path
    gsub(/\.md$/, "", filename);  # Remove .md extension
    
    # Escape special characters
    gsub(/"/, "\\\"", filename);
    
    # Process the path
    path = $0;
    gsub(/"/, "\\\"", path);
    
    # Ensure path starts with ./ if needed
    if (path !~ /^\.\/|^\// ) {
      path = "./" path;
    }
    
    # Print the file_map assignment
    print "file_map[\"" filename "\"]=\"" path "\"";
  }' "$temp_file" > /tmp/file_map_init.sh
  
  # Source the file to populate the file_map in one operation
  source /tmp/file_map_init.sh
  
  # Clean up
  rm /tmp/file_map_init.sh
  
  # Clean up
  rm "$temp_file"
  
  local end_time=$(date +%s)
  local duration=$((end_time - start_time))
  echo "File map built with ${#file_map[@]} files in $duration seconds"
}

# Function to extract links from a Map of Content file
extract_links() {
  local moc_file="$1"
  # Extract links and handle the case of aliased links [[Foo|Bar]]
  # by keeping only the part before the pipe character
  # Using awk for better performance in a single pass
  awk '
    {
      start = 1
      while (match(substr($0, start), /\[\[([^]]*)\]\]/)) {
        link = substr($0, start + RSTART - 1 + 2, RLENGTH - 4)
        # Handle aliased links by keeping only part before pipe
        sub(/\|.*/, "", link)
        print link
        start += RSTART + RLENGTH - 1
      }
    }
  ' "$moc_file"
}

# Function to find a file in the vault based on its title using the file map
find_file_by_title() {
  local title="$1"
  
  # Look up the file directly in our map
  if [[ -n "${file_map[$title]}" ]]; then
    echo "${file_map[$title]}"
    return 0
  fi
  
  # If not found, return empty string
  echo ""
  return 1
}

# Function to generate a mega prompt from a Map of Content
generate_moc_mega_prompt() {
  local moc_file="$1"
  local output_file="$2"
  local temp_dir=$(mktemp -d)
  
  # Remove the target file if it already exists
  if [ -f "$output_file" ]; then
    echo "Removing existing file: $output_file"
    rm "$output_file"
  fi
  
  echo "Extracting links from $moc_file"
  # Store links in an array for faster processing
  mapfile -t link_array < <(extract_links "$moc_file")
  local total_count=${#link_array[@]}
  
  echo "Processing $total_count links from $moc_file"
  
  # Create a manifest of files to copy
  local manifest_file=$(mktemp)
  local found_count=0
  
  # Process all links and build a manifest of files to copy
  for title in "${link_array[@]}"; do
    # Find the file using the file map (direct lookup)
    if [[ -n "${file_map[$title]}" ]]; then
      echo "${file_map[$title]}" >> "$manifest_file"
      ((found_count++))
      echo "✓ Found file for '$title': ${file_map[$title]}"
    else
      echo "✗ Could not find file for '$title' - ignoring"
    fi
  done
  
  echo "Found $found_count out of $total_count linked files"
  
  if [ $found_count -eq 0 ]; then
    echo "Warning: No files were found for the links in $moc_file"
    echo "No files found for links in $moc_file" > "$output_file"
    rm -rf "$temp_dir"
    rm "$manifest_file"
    return 1
  fi
  
  # Copy all files in one operation using xargs for better performance
  echo "Copying files to temporary directory"
  # Ensure each file path is valid before copying
  while IFS= read -r file_path; do
    # Make sure the file exists and has the .md extension
    if [[ -f "$file_path" ]]; then
      cp "$file_path" "$temp_dir/"
      echo "Copied: $file_path"
    elif [[ -f "${file_path}.md" ]]; then
      cp "${file_path}.md" "$temp_dir/"
      echo "Copied with added extension: ${file_path}.md"
    else
      echo "Warning: Could not find file: $file_path"
    fi
  done < "$manifest_file"
  
  rm "$manifest_file"
  
  echo "Generating mega prompt from collected files"
  code2prompt "$temp_dir" --include "*.md" --tokens --output="$output_file"
  
  echo "Cleaning up temporary directory"
  rm -rf "$temp_dir"
  
  echo "Successfully generated prompt at $output_file with content from $found_count files"
}

# Function to combine multiple mega prompt files into a new one
combine_mega_prompts() {
  local output_file="$1"
  shift  # Remove the first argument (output_file) from the argument list
  local input_files=("$@")  # Remaining arguments are input files
  
  # Check if we have between 1 and 10 input files
  if [ ${#input_files[@]} -lt 1 ] || [ ${#input_files[@]} -gt 10 ]; then
    echo "Error: combine_mega_prompts requires between 1 and 10 input files"
    return 1
  fi
  
  # Remove the target file if it already exists
  if [ -f "$output_file" ]; then
    echo "Removing existing file: $output_file"
    rm "$output_file"
  fi
  
  echo "Combining ${#input_files[@]} mega prompt files into $output_file"
  
  # Create a temporary directory
  local temp_dir=$(mktemp -d)
  
  # Create a manifest of files to copy
  local manifest_file=$(mktemp)
  local valid_files=0
  
  # Check which input files exist and add them to the manifest
  for input_file in "${input_files[@]}"; do
    if [ -f "$input_file" ]; then
      echo "Processing: $input_file"
      echo "$input_file" >> "$manifest_file"
      ((valid_files++))
    else
      echo "Warning: Input file not found: $input_file"
    fi
  done
  
  if [ $valid_files -eq 0 ]; then
    echo "Error: No valid input files found"
    rm "$manifest_file"
    rm -rf "$temp_dir"
    return 1
  fi
  
  # Copy all files in one operation
  echo "Copying files to temporary directory"
  # Ensure each file path is valid before copying
  while IFS= read -r file_path; do
    # Make sure the file exists
    if [[ -f "$file_path" ]]; then
      cp "$file_path" "$temp_dir/$(basename "$file_path")"
      echo "Copied: $file_path"
    else
      echo "Warning: Could not find file: $file_path"
    fi
  done < "$manifest_file"
  
  rm "$manifest_file"
  
  # Use code2prompt to combine all files in the temp directory
  code2prompt "$temp_dir" --include "*.md" --tokens --output="$output_file"
  
  # Clean up
  rm -rf "$temp_dir"
  
  echo "Successfully combined mega prompts into $output_file"
}

## ---------------------------------------------------------------------------------
## PROMPTS GENERATION
## ---------------------------------------------------------------------------------

# Build the file map at startup
build_file_map

echo "Generate mega prompt from literature notes"
code2prompt "./30 Areas/32 Literature notes" --include "*.md" --tokens --output="./50 Resources/57 Prompts/Literature notes.md"

echo "Generate mega prompt from permanent notes"
code2prompt "./30 Areas/33 Permanent notes" --include "*.md" --tokens --output="./50 Resources/57 Prompts/Permanent notes.md"

echo "Generate Projects mega prompt from project notes"
code2prompt "./20 Projects" --include "*.md" --tokens --output="./50 Resources/57 Prompts/Projects.md"

echo "Generate mega prompt from PKM Map of Content"
generate_moc_mega_prompt "./30 Areas/34 MOCs/PKM (MoC).md" "./50 Resources/57 Prompts/PKM notes.md"

echo "Generate mega prompt from Obsidian Map of Content"
generate_moc_mega_prompt "./30 Areas/34 MOCs/Obsidian (MoC).md" "./50 Resources/57 Prompts/Obsidian notes.md"

# Combine relevant mega prompts into a Knowledge Management mega prompt
echo "Generating combined Knowledge Management mega prompt"
combine_mega_prompts "./50 Resources/57 Prompts/Knowledge Management.md" \
  "./50 Resources/57 Prompts/Obsidian notes.md" \
  "./50 Resources/57 Prompts/PKM notes.md"