stuaxo · September 10, 2024 16:31 · amigax · May 21, 2024 · stuaxo · May 21, 2024
diff --git a/dirtollm.py b/dirtollm.py
 #!/usr/bin/env python3

 # Usage: python dirtollm.py [files or glob patterns...] [options]
 # Example: python dirtollm.py "*.py" "*.txt" /path/to/specific/file.py --exclude "*.pyc" --copy --verbose -x --binaries

 import argparse
 import pathlib
 import fnmatch
 import sys
 import os
 from typing import List, Tuple, Optional

 try:
    import pyperclip
    PYPERCLIP_AVAILABLE = True
 except ImportError:
    PYPERCLIP_AVAILABLE = False
    pyperclip = None  # Keep linter happy

 class FileProcessingError(Exception):
    pass

 def get_file_content(
    path: pathlib.Path,
    errors: str,
    verbose: bool,
    include_binaries: bool,
    include_empty: bool,
 ) -> Tuple[Optional[str], Optional[Exception]]:
    try:
        content = path.read_text(errors=errors)
        if not (include_empty or content.strip()):
            return None, None

        if not include_binaries:
            if "\0" in content:
                return None, None
        return content, None
    except UnicodeDecodeError as ude:
        if not include_binaries:
            return None, None
        error_msg = f"#:{path}: Binary file\n"
        if verbose:
            error_msg += f"UnicodeDecodeError details: {ude}\n"
        return f"{error_msg}\n", ude
    except Exception as ex:
        error_msg = f"#:{path}: Read error\n"
        if verbose:
            error_msg += f"Error details: {ex}\n"
        return f"{error_msg}\n", ex

 def fn_matches_multiple(file: str, patterns: List[str]) -> bool:
    return any(fnmatch.fnmatch(file, pattern) for pattern in patterns)

 def process_path(
    path: pathlib.Path,
    globs: List[str],
    excludes: List[str],
    listing: bool,
    errors: str,
    verbose: bool,
    exit_on_error: bool,
    include_binaries: bool,
    include_empty: bool,
 ) -> Tuple[str, int]:
    output = ""
    file_count = 0

    if path.is_file():
        if not globs or fn_matches_multiple(path.name, globs):
            if not fn_matches_multiple(path.name, excludes):
                if listing:
                    output += f"{path}\n"
                    file_count += 1
                else:
                    file_output, error = get_file_content(
                        path, errors, verbose, include_binaries, include_empty
                    )
                    if file_output is not None:
                        output += f"#:{path}:\n"
                        output += file_output.rstrip("\n") + "\n\n"
                        file_count += 1
                    if error and exit_on_error:
                        raise FileProcessingError(f"Exiting due to error in file: {path}")
    elif path.is_dir():
        for child in path.iterdir():
            child_output, child_count = process_path(
                child, globs, excludes, listing, errors, verbose,
                exit_on_error, include_binaries, include_empty
            )
            output += child_output
            file_count += child_count

    return output, file_count

 def dirtollm(
    paths: List[pathlib.Path],
    globs: List[str],
    excludes: List[str],
    listing: bool = False,
    errors: str = "replace",
    verbose: bool = False,
    exit_on_error: bool = False,
    include_binaries: bool = False,
    include_empty: bool = False,
 ) -> Tuple[str, int]:
    output = ""
    total_file_count = 0

    for path in paths:
        path_output, file_count = process_path(
            path, globs, excludes, listing, errors, verbose,
            exit_on_error, include_binaries, include_empty
        )
        output += path_output
        total_file_count += file_count

    return output, total_file_count

 def main():
    parser = argparse.ArgumentParser(
        description="Process files based on specified paths or glob patterns.",
        epilog='Example: python dirtollm.py "*.py" "*.txt" /path/to/specific/file.py --exclude "*.pyc" --copy --verbose -x --binaries',
    )
    parser.add_argument("paths", nargs="*", help="Files, directories, or glob patterns to process")
    parser.add_argument(
        "--exclude", nargs="+", help="Glob patterns to exclude", default=[]
    )
    parser.add_argument(
        "--prompt",
        nargs="?",
        const="File contents:",
        help="Specify prompt text to output before the files",
    )
    parser.add_argument(
        "--count",
        action="store_true",
        help="Display the count of files, bytes, and tokens processed",
    )
    parser.add_argument(
        "--copy",
        action="store_true",
        help="Copy output to the clipboard instead of printing to stdout",
    )
    parser.add_argument(
        "--list",
        action="store_true",
        help="List all files that match the patterns without showing their contents",
    )
    parser.add_argument(
        "--errors",
        choices=["strict", "ignore", "replace", "backslashreplace"],
        default="replace",
        help="Specify how encoding errors are handled (default: replace)",
    )
    parser.add_argument(
        "--verbose", "-v", action="store_true", help="Enable verbose output for errors"
    )
    parser.add_argument(
        "-x",
        "--exit-on-error",
        action="store_true",
        help="Exit on first error encountered",
    )
    parser.add_argument(
        "--binaries", action="store_true", help="Include non unicode files"
    )
    parser.add_argument("--empty", action="store_true", help="Include empty files")

    args = parser.parse_args()

    paths = []
    globs = []

    if not args.paths:
        paths = [pathlib.Path(".")]
        globs = ["*"]
    else:
        for path_or_glob in args.paths:
            path = pathlib.Path(path_or_glob)
            if path.exists():
                paths.append(path.resolve())
            else:
                paths.append(pathlib.Path.cwd())
                globs.append(path_or_glob)

    try:
        output, file_count = dirtollm(
            paths,
            globs,
            args.exclude,
            listing=args.list,
            errors=args.errors,
            verbose=args.verbose,
            exit_on_error=args.exit_on_error,
            include_binaries=args.binaries,
            include_empty=args.empty,
        )
    except FileProcessingError as fpe:
        print(f"Error: {fpe}", file=sys.stderr)
        sys.exit(1)

    if args.prompt:
        output = f"{args.prompt}\n\n{output}"

    output = output.rstrip("\n")
    byte_count = len(output.encode("utf-8"))
    token_count = len(output.split())

    if args.count:
        print(
            f"Processed {file_count} files, {byte_count} bytes, ~{token_count} tokens."
        )
    elif args.copy:
        if PYPERCLIP_AVAILABLE:
            pyperclip.copy(output)
            print(
                f"Copied to clipboard: {file_count} files, {byte_count} bytes, ~{token_count} tokens."
            )
        else:
            print(
                "Error: --copy requires pyperclip module. Falling back to stdout.",
                file=sys.stderr,
            )
            print(output)
    else:
        print(output)

 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3

	# Usage: python dirtollm.py [files or glob patterns...] [options]
	# Example: python dirtollm.py ".py" ".txt" /path/to/specific/file.py --exclude "*.pyc" --copy --verbose -x --binaries

	import argparse
	import pathlib
	import fnmatch
	import sys
	import os
	from typing import List, Tuple, Optional

	try:
	import pyperclip
	PYPERCLIP_AVAILABLE = True
	except ImportError:
	PYPERCLIP_AVAILABLE = False
	pyperclip = None # Keep linter happy

	class FileProcessingError(Exception):
	pass

	def get_file_content(
	path: pathlib.Path,
	errors: str,
	verbose: bool,
	include_binaries: bool,
	include_empty: bool,
	) -> Tuple[Optional[str], Optional[Exception]]:
	try:
	content = path.read_text(errors=errors)
	if not (include_empty or content.strip()):
	return None, None

	if not include_binaries:
	if "\0" in content:
	return None, None
	return content, None
	except UnicodeDecodeError as ude:
	if not include_binaries:
	return None, None
	error_msg = f"#:{path}: Binary file\n"
	if verbose:
	error_msg += f"UnicodeDecodeError details: {ude}\n"
	return f"{error_msg}\n", ude
	except Exception as ex:
	error_msg = f"#:{path}: Read error\n"
	if verbose:
	error_msg += f"Error details: {ex}\n"
	return f"{error_msg}\n", ex

	def fn_matches_multiple(file: str, patterns: List[str]) -> bool:
	return any(fnmatch.fnmatch(file, pattern) for pattern in patterns)

	def process_path(
	path: pathlib.Path,
	globs: List[str],
	excludes: List[str],
	listing: bool,
	errors: str,
	verbose: bool,
	exit_on_error: bool,
	include_binaries: bool,
	include_empty: bool,
	) -> Tuple[str, int]:
	output = ""
	file_count = 0

	if path.is_file():
	if not globs or fn_matches_multiple(path.name, globs):
	if not fn_matches_multiple(path.name, excludes):
	if listing:
	output += f"{path}\n"
	file_count += 1
	else:
	file_output, error = get_file_content(
	path, errors, verbose, include_binaries, include_empty
	)
	if file_output is not None:
	output += f"#:{path}:\n"
	output += file_output.rstrip("\n") + "\n\n"
	file_count += 1
	if error and exit_on_error:
	raise FileProcessingError(f"Exiting due to error in file: {path}")
	elif path.is_dir():
	for child in path.iterdir():
	child_output, child_count = process_path(
	child, globs, excludes, listing, errors, verbose,
	exit_on_error, include_binaries, include_empty
	)
	output += child_output
	file_count += child_count

	return output, file_count

	def dirtollm(
	paths: List[pathlib.Path],
	globs: List[str],
	excludes: List[str],
	listing: bool = False,
	errors: str = "replace",
	verbose: bool = False,
	exit_on_error: bool = False,
	include_binaries: bool = False,
	include_empty: bool = False,
	) -> Tuple[str, int]:
	output = ""
	total_file_count = 0

	for path in paths:
	path_output, file_count = process_path(
	path, globs, excludes, listing, errors, verbose,
	exit_on_error, include_binaries, include_empty
	)
	output += path_output
	total_file_count += file_count

	return output, total_file_count

	def main():
	parser = argparse.ArgumentParser(
	description="Process files based on specified paths or glob patterns.",
	epilog='Example: python dirtollm.py ".py" ".txt" /path/to/specific/file.py --exclude "*.pyc" --copy --verbose -x --binaries',
	)
	parser.add_argument("paths", nargs="*", help="Files, directories, or glob patterns to process")
	parser.add_argument(
	"--exclude", nargs="+", help="Glob patterns to exclude", default=[]
	)
	parser.add_argument(
	"--prompt",
	nargs="?",
	const="File contents:",
	help="Specify prompt text to output before the files",
	)
	parser.add_argument(
	"--count",
	action="store_true",
	help="Display the count of files, bytes, and tokens processed",
	)
	parser.add_argument(
	"--copy",
	action="store_true",
	help="Copy output to the clipboard instead of printing to stdout",
	)
	parser.add_argument(
	"--list",
	action="store_true",
	help="List all files that match the patterns without showing their contents",
	)
	parser.add_argument(
	"--errors",
	choices=["strict", "ignore", "replace", "backslashreplace"],
	default="replace",
	help="Specify how encoding errors are handled (default: replace)",
	)
	parser.add_argument(
	"--verbose", "-v", action="store_true", help="Enable verbose output for errors"
	)
	parser.add_argument(
	"-x",
	"--exit-on-error",
	action="store_true",
	help="Exit on first error encountered",
	)
	parser.add_argument(
	"--binaries", action="store_true", help="Include non unicode files"
	)
	parser.add_argument("--empty", action="store_true", help="Include empty files")

	args = parser.parse_args()

	paths = []
	globs = []

	if not args.paths:
	paths = [pathlib.Path(".")]
	globs = ["*"]
	else:
	for path_or_glob in args.paths:
	path = pathlib.Path(path_or_glob)
	if path.exists():
	paths.append(path.resolve())
	else:
	paths.append(pathlib.Path.cwd())
	globs.append(path_or_glob)

	try:
	output, file_count = dirtollm(
	paths,
	globs,
	args.exclude,
	listing=args.list,
	errors=args.errors,
	verbose=args.verbose,
	exit_on_error=args.exit_on_error,
	include_binaries=args.binaries,
	include_empty=args.empty,
	)
	except FileProcessingError as fpe:
	print(f"Error: {fpe}", file=sys.stderr)
	sys.exit(1)

	if args.prompt:
	output = f"{args.prompt}\n\n{output}"

	output = output.rstrip("\n")
	byte_count = len(output.encode("utf-8"))
	token_count = len(output.split())

	if args.count:
	print(
	f"Processed {file_count} files, {byte_count} bytes, ~{token_count} tokens."
	)
	elif args.copy:
	if PYPERCLIP_AVAILABLE:
	pyperclip.copy(output)
	print(
	f"Copied to clipboard: {file_count} files, {byte_count} bytes, ~{token_count} tokens."
	)
	else:
	print(
	"Error: --copy requires pyperclip module. Falling back to stdout.",
	file=sys.stderr,
	)
	print(output)
	else:
	print(output)

	if __name__ == "__main__":
	main()