Last active
July 7, 2025 20:02
-
-
Save johnnymo87/4701b6671730768ba95f19a5ee29a177 to your computer and use it in GitHub Desktop.
Concatenates code files from directories and their subdirectories into a single output file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
: <<'END' | |
Script Name: code_concatenator.bash | |
Purpose: | |
This script is designed to concatenate all code files within specified directories | |
and their subdirectories, or specified individual files, into a single output file. | |
The output file will contain the file paths and contents of each code file, | |
separated by a delimiter (```). This script is particularly useful for preparing | |
code files for analysis or processing by other tools or services that require a | |
single file input. | |
Usage: | |
./code_concatenator.bash <PATH_1> [<PATH_2> ...] | |
Arguments: | |
- PATH...: One or more paths to files or directories to be concatenated. | |
Features: | |
- Respects the .gitignore file if the current working directory is a Git repository. | |
- Recursively traverses specified directories and their subdirectories. | |
- Can handle a mix of file and directory paths as input. | |
- Supports various file extensions (e.g., .js, .py, .java, .cpp, etc.). | |
- Handles files with unbalanced backticks or other special characters. | |
- Outputs the concatenated file contents to the console (can be redirected to a file). | |
Background: | |
The script uses a combination of Bash built-in commands and utilities to achieve | |
its functionality. It checks if the current working directory is a Git repository | |
and uses the `git ls-files` command to list files, respecting the .gitignore file. | |
If the current working directory is not a Git repository, the script falls back to | |
the recursive traversal method. The script handles potential issues with the | |
use of the ``` delimiter appearing in the source code by using a custom | |
intermediate delimiter (###) before adjusting it to ``` at the very end. | |
Dependencies: | |
- Bash: The script is written for Bash shell environments found in Linux and macOS | |
systems. | |
- Git: The script requires Git to be installed if the current working directory is | |
a Git repository. | |
Note: | |
While this script is designed to handle a wide range of code files, it may not work | |
as expected for files with extremely large sizes or specific encoding issues. It's | |
recommended to review the output and adjust the script as needed for your specific | |
use case. | |
Note: | |
This script was originally written with the assistance of the | |
"claude-3-opus-20240229" model developed by Anthropic. | |
Author: Jonathan Mohrbacher (github.com/johnnymo87) | |
Date: 2024-04-13 | |
END | |
set -euo pipefail | |
# Function to add a single file to the output | |
add_file() { | |
local file_path="$1" | |
local output_file="$2" | |
printf -v file_path_output "File: \`%s\`\n" "$file_path" | |
printf -v delimiter "###\n" | |
printf "%s" "$file_path_output" >> "$output_file" | |
printf "%s" "$delimiter" >> "$output_file" | |
cat "$file_path" >> "$output_file" 2>/dev/null | |
printf "%s\n" "$delimiter" >> "$output_file" | |
} | |
# Function to process a given path (file or directory) | |
concatenate_files() { | |
local path_arg="$1" | |
local output_file="$2" | |
local repo_root | |
# Check if the current working directory is a Git repository | |
if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then | |
# Get the root directory of the Git repository | |
repo_root=$(git rev-parse --show-toplevel) | |
# Use git ls-files to list files, respecting .gitignore. This works for both files and directories. | |
git ls-files -- "$path_arg" | while read -r file; do | |
file_path="$repo_root/$file" | |
add_file "$file_path" "$output_file" | |
done | |
else | |
# Not a git repo. Handle file/dir separately. | |
if [ -d "$path_arg" ]; then | |
# Traverse the directory recursively | |
for item in "$path_arg"/*; do | |
if [ -d "$item" ]; then | |
concatenate_files "$item" "$output_file" | |
elif [ -f "$item" ]; then | |
add_file "$item" "$output_file" | |
fi | |
done | |
elif [ -f "$path_arg" ]; then | |
add_file "$path_arg" "$output_file" | |
fi | |
fi | |
} | |
# Check if at least one path is provided | |
if [ $# -eq 0 ]; then | |
echo "Usage: $0 <PATH_1> [<PATH_2> ...]" | |
exit 1 | |
fi | |
# Create a temporary file for output | |
output_file=$(mktemp) | |
# Process each path provided as an argument | |
for path_arg in "$@"; do | |
if [ ! -e "$path_arg" ]; then | |
echo "Error: Path '$path_arg' not found." >&2 | |
rm "$output_file" | |
exit 1 | |
fi | |
abs_path="" | |
if [ -d "$path_arg" ]; then | |
abs_path="$(cd "$path_arg" && pwd)" | |
elif [ -f "$path_arg" ]; then | |
file_dir=$(dirname "$path_arg") | |
file_name=$(basename "$path_arg") | |
abs_path="$(cd "$file_dir" && pwd)/$file_name" | |
else | |
echo "Warning: '$path_arg' is not a regular file or directory. Skipping." >&2 | |
continue | |
fi | |
concatenate_files "$abs_path" "$output_file" | |
done | |
# Print the contents of the output file, replacing ### with ```. | |
cat "$output_file" | LC_ALL=C sed 's/###$/```/g' | |
# Clean up the temporary file | |
rm "$output_file" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment