stuaxo · September 7, 2025 14:09 · stuaxo · May 21, 2024 · stuaxo · Sep 10, 2024
diff --git a/dirtollm.py b/dirtollm.py
 #!/usr/bin/env python3
 # Usage: python dirtollm.py [paths...] [--dir /path/to/directory] [--glob *.py] [--exclude *.pyc] [--exclude-regex ".*\.log"] [--include-dotfiles] [--no-gitignore] [--copy | --copy-append]
 import argparse
 import pathlib
 import fnmatch
 import os
 import re
 import subprocess

 def is_wayland():
    # Check for Wayland-specific environment variables
    return (os.environ.get('WAYLAND_DISPLAY') is not None or
            os.environ.get('XDG_SESSION_TYPE') == 'wayland')

 class WLCopy:
    """Workaround bug on wayland where clipboard blocks."""
    @staticmethod
    def copy(text: str):
        try:
            subprocess.Popen(['wl-copy'], stdin=subprocess.PIPE).communicate(input=output.encode('utf-8'), timeout=0.5)
        except (subprocess.TimeoutExpired, FileNotFoundError):
            pass

 def get_pyperclip():
    if is_wayland():
        return WLCopy
    try:
        import pyperclip
        return pyperclip
    except ImportError:
        return None

 def get_gitignore_regex(path):
    """
    Reads a .gitignore file and converts its patterns into regular expressions.
    This is a simplified converter. A full implementation is more complex.
    """
    regexes = []
    p = pathlib.Path(path)
    if not p.is_file():
        return []

    base_dir = p.parent.resolve()

    with p.open(encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            if not line or line.startswith('#'):
                continue

            # Escape special regex characters, except for '*'
            regex = re.escape(line).replace('\\*', '.*')

            # Handle directory-only patterns (e.g., 'build/')
            if regex.endswith('/'):
                regex = regex[:-1] + '(/.*)?' # Match the dir or content within it

            # Anchor the pattern to the gitignore's directory
            full_pattern = str(base_dir) + '/.*' + regex

            try:
                regexes.append(re.compile(full_pattern))
            except re.error as e:
                print(f"Warning: Could not compile regex for gitignore pattern '{line}': {e}")

    return regexes

 def append_file_content(output, path):
    try:
        content = path.read_text(encoding='utf-8')
    except UnicodeDecodeError:
        content = ""
    except Exception as ex:
        content = f"Skipped (error reading file: {ex})\n\n"
    if content:
        output += f"#:{path}:\n{content}\n\n"
    return output

 def dir_to_llm(output, directory, glob, exclude_globs, exclude_regexes, parse_gitignore, include_dotfiles, list_only):
    file_count = 0
    p = pathlib.Path(directory)

    current_exclude_globs = list(exclude_globs)
    current_exclude_regexes = list(exclude_regexes)
    if parse_gitignore:
        gitignore_path = p / ".gitignore"
        current_exclude_regexes.extend(get_gitignore_regex(gitignore_path))

    # Recurse into subdirectories
    for child in p.iterdir():
        # Added basic dotfile check here
        if not include_dotfiles and child.name.startswith('.'):
            continue

        child_path_str = str(child.resolve())
        is_excluded_glob = any(fnmatch.fnmatch(child_path_str, pattern) for pattern in current_exclude_globs)
        is_excluded_regex = any(r.fullmatch(child_path_str) for r in current_exclude_regexes)

        if child.is_dir() and not is_excluded_glob and not is_excluded_regex:
            output, sub_file_count = dir_to_llm(
                output=output,
                directory=child,
                glob=glob,
                exclude_globs=current_exclude_globs,
                exclude_regexes=current_exclude_regexes,
                parse_gitignore=parse_gitignore,
                include_dotfiles=include_dotfiles,
                list_only=list_only
            )
            file_count += sub_file_count

    # Process files
    for child in p.glob(glob):
        # Added basic dotfile check here as well
        if not include_dotfiles and child.name.startswith('.'):
            continue

        child_path_str = str(child.resolve())
        is_excluded_glob = any(fnmatch.fnmatch(child_path_str, pattern) for pattern in current_exclude_globs)
        is_excluded_regex = any(r.fullmatch(child_path_str) for r in current_exclude_regexes)

        if child.is_file() and not is_excluded_glob and not is_excluded_regex:
            # --- FIX IS HERE ---
            if list_only:
                output += f"{child}\n"  # Just append the file path string
            else:
                output = append_file_content(output, child) # Append the content
            # --- END FIX ---
            file_count += 1

    return output, file_count

 if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description='Concatenate directory contents for Large Language Models.',
        formatter_class=argparse.RawTextHelpFormatter
    )
    parser.add_argument('paths', nargs='*', help='Directories to process or glob patterns to match')
    parser.add_argument('--dir', type=str, help='Directory to process', default=".")
    parser.add_argument('--exclude', type=str, nargs='+', help='Glob patterns to exclude (e.g., *.pyc ****pycache****/*)', default=[])
    parser.add_argument('--exclude-regex', type=str, nargs='+', help='Regular expression patterns to exclude', default=[])
    parser.add_argument('--glob', type=str, help='Glob pattern to match files', default="*")
    parser.add_argument('--prompt', nargs='?', const="Filenames followed by file content-:", default=None, help='Display a prompt before the files')
    parser.add_argument('--list', action='store_true', help='List file paths only, without content')
    parser.add_argument('--include-dotfiles', action='store_true', help='Include dotfiles and dot directories (e.g., .git, .venv)')
    parser.add_argument('--no-gitignore', action='store_true', help='Do not parse .gitignore files for exclusion patterns')
    copy_group = parser.add_mutually_exclusive_group()
    copy_group.add_argument('--copy', action='store_true', help='Copy output to the clipboard, replacing its content.')
    copy_group.add_argument('--copy-append', action='store_true', help='Append output to the clipboard.')

    args = parser.parse_args()

    # Determine directories and glob patterns from positional arguments
    directories = []
    glob_patterns = []

    if args.paths:
        for path_arg in args.paths:
            path_obj = pathlib.Path(path_arg)
            if path_obj.exists() and path_obj.is_dir():
                directories.append(path_arg)
            elif '*' in path_arg or '?' in path_arg or '[' in path_arg:
                glob_patterns.append(path_arg)
            elif path_obj.exists() and path_obj.is_file():
                directories.append(str(path_obj.parent))
                glob_patterns.append(path_obj.name)
            else:
                glob_patterns.append(path_arg)

    # Use defaults if nothing was specified
    if not directories:
        directories = [args.dir]
    if not glob_patterns:
        glob_patterns = [args.glob]

    # Compile user-provided regexes for efficiency
    compiled_regexes = []
    for pattern in args.exclude_regex:
        try:
            compiled_regexes.append(re.compile(pattern))
        except re.error as e:
            print(f"Warning: Invalid regex pattern '{pattern}' skipped: {e}")

    output = ""
    if args.prompt is not None:
        output += args.prompt + "\n\n"

    total_file_count = 0
    for directory in directories:
        for glob_pattern in glob_patterns:
            dir_output, file_count = dir_to_llm(
                output="",
                directory=directory,
                glob=glob_pattern,
                exclude_globs=args.exclude,
                exclude_regexes=compiled_regexes,
                parse_gitignore=not args.no_gitignore,
                include_dotfiles=args.include_dotfiles,
                list_only=args.list
            )
            output += dir_output
            total_file_count += file_count
    token_count = len(output.split())

    if args.copy or args.copy_append:
        pyperclip = get_pyperclip()
        if not pyperclip:
            print("The --copy or --copy-append options require 'pyperclip'. Please install it.")
        elif args.copy_append:
            original_content = pyperclip.paste()
            if original_content and isinstance(original_content, str):
                new_content = original_content + "\n\n" + output
            else:
                new_content = output

            pyperclip.copy(new_content)
            print(f"Appended {total_file_count} files, {len(output)} bytes, and approximately {token_count} tokens to the clipboard.")
        else:
            pyperclip.copy(output)
            print(f"Copied {total_file_count} files, {len(output)} bytes, and approximately {token_count} tokens to the clipboard.")
    else:
        print(output)
	#!/usr/bin/env python3
	# Usage: python dirtollm.py [paths...] [--dir /path/to/directory] [--glob .py] [--exclude .pyc] [--exclude-regex ".*\.log"] [--include-dotfiles] [--no-gitignore] [--copy \| --copy-append]
	import argparse
	import pathlib
	import fnmatch
	import os
	import re
	import subprocess

	def is_wayland():
	# Check for Wayland-specific environment variables
	return (os.environ.get('WAYLAND_DISPLAY') is not None or
	os.environ.get('XDG_SESSION_TYPE') == 'wayland')

	class WLCopy:
	"""Workaround bug on wayland where clipboard blocks."""
	@staticmethod
	def copy(text: str):
	try:
	subprocess.Popen(['wl-copy'], stdin=subprocess.PIPE).communicate(input=output.encode('utf-8'), timeout=0.5)
	except (subprocess.TimeoutExpired, FileNotFoundError):
	pass

	def get_pyperclip():
	if is_wayland():
	return WLCopy
	try:
	import pyperclip
	return pyperclip
	except ImportError:
	return None

	def get_gitignore_regex(path):
	"""
	Reads a .gitignore file and converts its patterns into regular expressions.
	This is a simplified converter. A full implementation is more complex.
	"""
	regexes = []
	p = pathlib.Path(path)
	if not p.is_file():
	return []

	base_dir = p.parent.resolve()

	with p.open(encoding='utf-8') as f:
	for line in f:
	line = line.strip()
	if not line or line.startswith('#'):
	continue

	# Escape special regex characters, except for '*'
	regex = re.escape(line).replace('\\', '.')

	# Handle directory-only patterns (e.g., 'build/')
	if regex.endswith('/'):
	regex = regex[:-1] + '(/.*)?' # Match the dir or content within it

	# Anchor the pattern to the gitignore's directory
	full_pattern = str(base_dir) + '/.*' + regex

	try:
	regexes.append(re.compile(full_pattern))
	except re.error as e:
	print(f"Warning: Could not compile regex for gitignore pattern '{line}': {e}")

	return regexes

	def append_file_content(output, path):
	try:
	content = path.read_text(encoding='utf-8')
	except UnicodeDecodeError:
	content = ""
	except Exception as ex:
	content = f"Skipped (error reading file: {ex})\n\n"
	if content:
	output += f"#:{path}:\n{content}\n\n"
	return output

	def dir_to_llm(output, directory, glob, exclude_globs, exclude_regexes, parse_gitignore, include_dotfiles, list_only):
	file_count = 0
	p = pathlib.Path(directory)

	current_exclude_globs = list(exclude_globs)
	current_exclude_regexes = list(exclude_regexes)
	if parse_gitignore:
	gitignore_path = p / ".gitignore"
	current_exclude_regexes.extend(get_gitignore_regex(gitignore_path))

	# Recurse into subdirectories
	for child in p.iterdir():
	# Added basic dotfile check here
	if not include_dotfiles and child.name.startswith('.'):
	continue

	child_path_str = str(child.resolve())
	is_excluded_glob = any(fnmatch.fnmatch(child_path_str, pattern) for pattern in current_exclude_globs)
	is_excluded_regex = any(r.fullmatch(child_path_str) for r in current_exclude_regexes)

	if child.is_dir() and not is_excluded_glob and not is_excluded_regex:
	output, sub_file_count = dir_to_llm(
	output=output,
	directory=child,
	glob=glob,
	exclude_globs=current_exclude_globs,
	exclude_regexes=current_exclude_regexes,
	parse_gitignore=parse_gitignore,
	include_dotfiles=include_dotfiles,
	list_only=list_only
	)
	file_count += sub_file_count

	# Process files
	for child in p.glob(glob):
	# Added basic dotfile check here as well
	if not include_dotfiles and child.name.startswith('.'):
	continue

	child_path_str = str(child.resolve())
	is_excluded_glob = any(fnmatch.fnmatch(child_path_str, pattern) for pattern in current_exclude_globs)
	is_excluded_regex = any(r.fullmatch(child_path_str) for r in current_exclude_regexes)

	if child.is_file() and not is_excluded_glob and not is_excluded_regex:
	# --- FIX IS HERE ---
	if list_only:
	output += f"{child}\n" # Just append the file path string
	else:
	output = append_file_content(output, child) # Append the content
	# --- END FIX ---
	file_count += 1

	return output, file_count

	if __name__ == "__main__":
	parser = argparse.ArgumentParser(
	description='Concatenate directory contents for Large Language Models.',
	formatter_class=argparse.RawTextHelpFormatter
	)
	parser.add_argument('paths', nargs='*', help='Directories to process or glob patterns to match')
	parser.add_argument('--dir', type=str, help='Directory to process', default=".")
	parser.add_argument('--exclude', type=str, nargs='+', help='Glob patterns to exclude (e.g., .pyc *pycache*/)', default=[])
	parser.add_argument('--exclude-regex', type=str, nargs='+', help='Regular expression patterns to exclude', default=[])
	parser.add_argument('--glob', type=str, help='Glob pattern to match files', default="*")
	parser.add_argument('--prompt', nargs='?', const="Filenames followed by file content-:", default=None, help='Display a prompt before the files')
	parser.add_argument('--list', action='store_true', help='List file paths only, without content')
	parser.add_argument('--include-dotfiles', action='store_true', help='Include dotfiles and dot directories (e.g., .git, .venv)')
	parser.add_argument('--no-gitignore', action='store_true', help='Do not parse .gitignore files for exclusion patterns')
	copy_group = parser.add_mutually_exclusive_group()
	copy_group.add_argument('--copy', action='store_true', help='Copy output to the clipboard, replacing its content.')
	copy_group.add_argument('--copy-append', action='store_true', help='Append output to the clipboard.')

	args = parser.parse_args()

	# Determine directories and glob patterns from positional arguments
	directories = []
	glob_patterns = []

	if args.paths:
	for path_arg in args.paths:
	path_obj = pathlib.Path(path_arg)
	if path_obj.exists() and path_obj.is_dir():
	directories.append(path_arg)
	elif '*' in path_arg or '?' in path_arg or '[' in path_arg:
	glob_patterns.append(path_arg)
	elif path_obj.exists() and path_obj.is_file():
	directories.append(str(path_obj.parent))
	glob_patterns.append(path_obj.name)
	else:
	glob_patterns.append(path_arg)

	# Use defaults if nothing was specified
	if not directories:
	directories = [args.dir]
	if not glob_patterns:
	glob_patterns = [args.glob]

	# Compile user-provided regexes for efficiency
	compiled_regexes = []
	for pattern in args.exclude_regex:
	try:
	compiled_regexes.append(re.compile(pattern))
	except re.error as e:
	print(f"Warning: Invalid regex pattern '{pattern}' skipped: {e}")

	output = ""
	if args.prompt is not None:
	output += args.prompt + "\n\n"

	total_file_count = 0
	for directory in directories:
	for glob_pattern in glob_patterns:
	dir_output, file_count = dir_to_llm(
	output="",
	directory=directory,
	glob=glob_pattern,
	exclude_globs=args.exclude,
	exclude_regexes=compiled_regexes,
	parse_gitignore=not args.no_gitignore,
	include_dotfiles=args.include_dotfiles,
	list_only=args.list
	)
	output += dir_output
	total_file_count += file_count
	token_count = len(output.split())

	if args.copy or args.copy_append:
	pyperclip = get_pyperclip()
	if not pyperclip:
	print("The --copy or --copy-append options require 'pyperclip'. Please install it.")
	elif args.copy_append:
	original_content = pyperclip.paste()
	if original_content and isinstance(original_content, str):
	new_content = original_content + "\n\n" + output
	else:
	new_content = output

	pyperclip.copy(new_content)
	print(f"Appended {total_file_count} files, {len(output)} bytes, and approximately {token_count} tokens to the clipboard.")
	else:
	pyperclip.copy(output)
	print(f"Copied {total_file_count} files, {len(output)} bytes, and approximately {token_count} tokens to the clipboard.")
	else:
	print(output)