brozkeff · September 7, 2024 14:28
diff --git a/convert_zpevnik_xml_to_single_txt.py b/convert_zpevnik_xml_to_single_txt.py
 """
 This Python script processes an XML file that contains multiple songs with lyrics and chords, 
 extracting song information and converting each song into separate `.txt` files. The output files 
 are named based on the `author` and `title` fields found in the XML and placed in a `txt` 
 subfolder. 

 Key Features:
 - Extracts the `author`, `title`, `step`, and `songtext` fields from each song in the XML.
 - Writes the following content to each text file:
    - First line: "author - title"
    - Second line (if `step` exists): "ladění = VALUE" (where VALUE is the content of the `step` tag)
    - Empty line
    - Full song lyrics from the `songtext` field
 - Automatically generates unique filenames by adding a numeric suffix (e.g., `-2`, `-3`) 
  if a file with the same name already exists, to prevent overwriting.
 - The XML file can be passed as a parameter, or it defaults to `export.xml` in the current working directory.
 - The script only processes relevant fields and ignores other fields in the XML file.

 Functions:
 - create_output_filename(author, title, folder_path): 
    Generates a unique filename based on the `author` and `title` values.
    If a file with the same name already exists, it appends a number to the filename.
    
 - process_songs_from_xml(xml_file="export.xml"): 
    Main function that parses the XML file, processes each song, and writes the song data to a 
    corresponding `.txt` file in the `txt` subfolder.

 Example usage:
    You can simply run the script in the same directory as the XML file and it will 
    generate `.txt` files for each song in a `txt` folder.
 """

 import os
 import xml.etree.ElementTree as ET
 import re
 import unicodedata

 def slugify(text):
    """
    Converts a string into a valid ASCII-only slug that can be used as a filename.
    It replaces spaces with dashes, removes invalid characters, and converts to lowercase.
    """
    # Normalize the text to remove accents and convert to ASCII
    text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('ascii')
    
    # Replace any invalid characters with a dash and remove extra whitespace
    text = re.sub(r'[^\w\s-]', '', text).strip().lower()
    
    # Replace spaces with a dash
    return re.sub(r'[-\s]+', '-', text)

 def create_output_filename(author, title, folder_path):
    """Generate a unique filename for each song based on author and title, ensuring it's a valid filename."""
    # Slugify the author and title to ensure they are safe for filenames
    safe_author = slugify(author)
    safe_title = slugify(title)
    base_name = f"{safe_author} - {safe_title}.txt"
    
    file_path = os.path.join(folder_path, base_name)
    if not os.path.exists(file_path):
        return file_path
    
    # Add suffix if file exists
    counter = 2
    while True:
        new_name = f"{safe_author} - {safe_title}-{counter}.txt"
        new_file_path = os.path.join(folder_path, new_name)
        if not os.path.exists(new_file_path):
            return new_file_path
        counter += 1

 def process_songs_from_xml(xml_file="export.xml"):
    """
    Parses the XML file, processes each song, and writes them into separate `.txt` files 
    named `author - title.txt`. The song's author, title, step (if present), and songtext are extracted 
    and written to the text file. The text files are saved in a `txt` subfolder.
    """
    tree = ET.parse(xml_file)
    root = tree.getroot()

    namespace = {'ns': 'http://zpevnik.net/InetSongDb.xsd'}
    
    # Create output folder
    output_folder = "txt"
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for song in root.findall('ns:song', namespace):
        author = song.find('ns:author', namespace).text.strip() if song.find('ns:author', namespace) is not None else "Unknown Author"
        title = song.find('ns:title', namespace).text.strip() if song.find('ns:title', namespace) is not None else "Unknown Title"
        songtext = song.find('ns:songtext', namespace).text.strip() if song.find('ns:songtext', namespace) is not None else ""
        step = song.find('ns:step', namespace)
        
        # Prepare file content
        file_content = f"{author} - {title}\n"
        if step is not None and step.text.strip():
            file_content += f"ladění = {step.text.strip()}\n"
        file_content += "\n" + songtext
        
        # Generate unique filename and save content
        file_path = create_output_filename(author, title, output_folder)
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(file_content)
    
    print(f"Songs have been processed and saved in '{output_folder}' folder.")

 if __name__ == "__main__":
    process_songs_from_xml("export.xml")  # Replace with your file path if needed
	"""
	This Python script processes an XML file that contains multiple songs with lyrics and chords,
	extracting song information and converting each song into separate `.txt` files. The output files
	are named based on the `author` and `title` fields found in the XML and placed in a `txt`
	subfolder.

	Key Features:
	- Extracts the `author`, `title`, `step`, and `songtext` fields from each song in the XML.
	- Writes the following content to each text file:
	- First line: "author - title"
	- Second line (if `step` exists): "ladění = VALUE" (where VALUE is the content of the `step` tag)
	- Empty line
	- Full song lyrics from the `songtext` field
	- Automatically generates unique filenames by adding a numeric suffix (e.g., `-2`, `-3`)
	if a file with the same name already exists, to prevent overwriting.
	- The XML file can be passed as a parameter, or it defaults to `export.xml` in the current working directory.
	- The script only processes relevant fields and ignores other fields in the XML file.

	Functions:
	- create_output_filename(author, title, folder_path):
	Generates a unique filename based on the `author` and `title` values.
	If a file with the same name already exists, it appends a number to the filename.

	- process_songs_from_xml(xml_file="export.xml"):
	Main function that parses the XML file, processes each song, and writes the song data to a
	corresponding `.txt` file in the `txt` subfolder.

	Example usage:
	You can simply run the script in the same directory as the XML file and it will
	generate `.txt` files for each song in a `txt` folder.
	"""

	import os
	import xml.etree.ElementTree as ET
	import re
	import unicodedata

	def slugify(text):
	"""
	Converts a string into a valid ASCII-only slug that can be used as a filename.
	It replaces spaces with dashes, removes invalid characters, and converts to lowercase.
	"""
	# Normalize the text to remove accents and convert to ASCII
	text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('ascii')

	# Replace any invalid characters with a dash and remove extra whitespace
	text = re.sub(r'[^\w\s-]', '', text).strip().lower()

	# Replace spaces with a dash
	return re.sub(r'[-\s]+', '-', text)

	def create_output_filename(author, title, folder_path):
	"""Generate a unique filename for each song based on author and title, ensuring it's a valid filename."""
	# Slugify the author and title to ensure they are safe for filenames
	safe_author = slugify(author)
	safe_title = slugify(title)
	base_name = f"{safe_author} - {safe_title}.txt"

	file_path = os.path.join(folder_path, base_name)
	if not os.path.exists(file_path):
	return file_path

	# Add suffix if file exists
	counter = 2
	while True:
	new_name = f"{safe_author} - {safe_title}-{counter}.txt"
	new_file_path = os.path.join(folder_path, new_name)
	if not os.path.exists(new_file_path):
	return new_file_path
	counter += 1

	def process_songs_from_xml(xml_file="export.xml"):
	"""
	Parses the XML file, processes each song, and writes them into separate `.txt` files
	named `author - title.txt`. The song's author, title, step (if present), and songtext are extracted
	and written to the text file. The text files are saved in a `txt` subfolder.
	"""
	tree = ET.parse(xml_file)
	root = tree.getroot()

	namespace = {'ns': 'http://zpevnik.net/InetSongDb.xsd'}

	# Create output folder
	output_folder = "txt"
	if not os.path.exists(output_folder):
	os.makedirs(output_folder)

	for song in root.findall('ns:song', namespace):
	author = song.find('ns:author', namespace).text.strip() if song.find('ns:author', namespace) is not None else "Unknown Author"
	title = song.find('ns:title', namespace).text.strip() if song.find('ns:title', namespace) is not None else "Unknown Title"
	songtext = song.find('ns:songtext', namespace).text.strip() if song.find('ns:songtext', namespace) is not None else ""
	step = song.find('ns:step', namespace)

	# Prepare file content
	file_content = f"{author} - {title}\n"
	if step is not None and step.text.strip():
	file_content += f"ladění = {step.text.strip()}\n"
	file_content += "\n" + songtext

	# Generate unique filename and save content
	file_path = create_output_filename(author, title, output_folder)
	with open(file_path, 'w', encoding='utf-8') as f:
	f.write(file_content)

	print(f"Songs have been processed and saved in '{output_folder}' folder.")

	if __name__ == "__main__":
	process_songs_from_xml("export.xml") # Replace with your file path if needed