Skip to content

Instantly share code, notes, and snippets.

@brozkeff
Created September 7, 2024 14:28
Show Gist options
  • Save brozkeff/e3501f13743f517ec233cd1e4bb7e5a4 to your computer and use it in GitHub Desktop.
Save brozkeff/e3501f13743f517ec233cd1e4bb7e5a4 to your computer and use it in GitHub Desktop.
Převod export.xml Zpěvníku od Karla Hovorky (https://dev.zpevnik.eu/) do samostatných TXT souborů
"""
This Python script processes an XML file that contains multiple songs with lyrics and chords,
extracting song information and converting each song into separate `.txt` files. The output files
are named based on the `author` and `title` fields found in the XML and placed in a `txt`
subfolder.
Key Features:
- Extracts the `author`, `title`, `step`, and `songtext` fields from each song in the XML.
- Writes the following content to each text file:
- First line: "author - title"
- Second line (if `step` exists): "ladění = VALUE" (where VALUE is the content of the `step` tag)
- Empty line
- Full song lyrics from the `songtext` field
- Automatically generates unique filenames by adding a numeric suffix (e.g., `-2`, `-3`)
if a file with the same name already exists, to prevent overwriting.
- The XML file can be passed as a parameter, or it defaults to `export.xml` in the current working directory.
- The script only processes relevant fields and ignores other fields in the XML file.
Functions:
- create_output_filename(author, title, folder_path):
Generates a unique filename based on the `author` and `title` values.
If a file with the same name already exists, it appends a number to the filename.
- process_songs_from_xml(xml_file="export.xml"):
Main function that parses the XML file, processes each song, and writes the song data to a
corresponding `.txt` file in the `txt` subfolder.
Example usage:
You can simply run the script in the same directory as the XML file and it will
generate `.txt` files for each song in a `txt` folder.
"""
import os
import xml.etree.ElementTree as ET
import re
import unicodedata
def slugify(text):
"""
Converts a string into a valid ASCII-only slug that can be used as a filename.
It replaces spaces with dashes, removes invalid characters, and converts to lowercase.
"""
# Normalize the text to remove accents and convert to ASCII
text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('ascii')
# Replace any invalid characters with a dash and remove extra whitespace
text = re.sub(r'[^\w\s-]', '', text).strip().lower()
# Replace spaces with a dash
return re.sub(r'[-\s]+', '-', text)
def create_output_filename(author, title, folder_path):
"""Generate a unique filename for each song based on author and title, ensuring it's a valid filename."""
# Slugify the author and title to ensure they are safe for filenames
safe_author = slugify(author)
safe_title = slugify(title)
base_name = f"{safe_author} - {safe_title}.txt"
file_path = os.path.join(folder_path, base_name)
if not os.path.exists(file_path):
return file_path
# Add suffix if file exists
counter = 2
while True:
new_name = f"{safe_author} - {safe_title}-{counter}.txt"
new_file_path = os.path.join(folder_path, new_name)
if not os.path.exists(new_file_path):
return new_file_path
counter += 1
def process_songs_from_xml(xml_file="export.xml"):
"""
Parses the XML file, processes each song, and writes them into separate `.txt` files
named `author - title.txt`. The song's author, title, step (if present), and songtext are extracted
and written to the text file. The text files are saved in a `txt` subfolder.
"""
tree = ET.parse(xml_file)
root = tree.getroot()
namespace = {'ns': 'http://zpevnik.net/InetSongDb.xsd'}
# Create output folder
output_folder = "txt"
if not os.path.exists(output_folder):
os.makedirs(output_folder)
for song in root.findall('ns:song', namespace):
author = song.find('ns:author', namespace).text.strip() if song.find('ns:author', namespace) is not None else "Unknown Author"
title = song.find('ns:title', namespace).text.strip() if song.find('ns:title', namespace) is not None else "Unknown Title"
songtext = song.find('ns:songtext', namespace).text.strip() if song.find('ns:songtext', namespace) is not None else ""
step = song.find('ns:step', namespace)
# Prepare file content
file_content = f"{author} - {title}\n"
if step is not None and step.text.strip():
file_content += f"ladění = {step.text.strip()}\n"
file_content += "\n" + songtext
# Generate unique filename and save content
file_path = create_output_filename(author, title, output_folder)
with open(file_path, 'w', encoding='utf-8') as f:
f.write(file_content)
print(f"Songs have been processed and saved in '{output_folder}' folder.")
if __name__ == "__main__":
process_songs_from_xml("export.xml") # Replace with your file path if needed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment