ndandanov · April 15, 2025 21:05
diff --git a/convert_huawei_notes.py b/convert_huawei_notes.py
 import hashlib
 import json
 import logging
 import shutil
 import traceback
 from datetime import datetime
 from pathlib import Path

 from lxml import html
 from tqdm import tqdm

 # This script aims to help you convert your exported notes from Huawei Notes to Markdown
 # or Joplin RAW directory for importing into a Joplin instance (https://joplinapp.org/).
 # It is heavily inspired by the gists of spersico and titanismo:
 # https://gist.github.com/spersico/19b92f2c37f01118c19f2ef9c113f0d7
 # It builds upon https://gist.github.com/spersico/19b92f2c37f01118c19f2ef9c113f0d7?permalink_comment_id=5387480#gistcomment-5387480
 # by dedicating more care for exporting the title, checklist items, attachments, and metadata of notes.

 # IMPORTANT: In order to precisely extract the aforementioned note attributes, the lxml python package is used:
 # https://lxml.de/installation.html
 # Please install it with:
 # pip install lxml

 # NOTE: It is always recommended to install new packages in a separate virtual environment.
 # Consult, e.g.:
 # https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/
 # https://docs.astral.sh/uv/
 # https://mamba.readthedocs.io/en/latest/user_guide/mamba.html
 # https://mamba.readthedocs.io/en/latest/user_guide/micromamba.html


 # Steps to export your notes from Huawei Notes (courtesy of https://gist.github.com/spersico/19b92f2c37f01118c19f2ef9c113f0d7):
 # 1. Login into a Huawei Account in the phone.
 # 2. Activate in your phone, the Notes Syncing, which is inside of Huawei Account > Cloud
 # 3. Log in into https://cloud.huawei.com
 # 4. Go to https://cloud.huawei.com/home#/account/gdpr, and click on Download Notes
 # 5. This will give you a zip file with a password. Download the file and copy the password.
 # 6. Extract the zip file using this password into a directory.
 # 7. You'll need Python installed for this: https://www.python.org/downloads/.
 # 8. Using a text editor, edit the variables in the next IMPORTANT section accordingly.
 # 9. Open a console, terminal, or your favourite IDE (Visual Studio Code, PyCharm, etc.)
 # 10. Run this script with `python convert_huawei_notes.py` (modify if needed when executing inside IDE).
 # 11. Your notes should be in the CONVERTED_NOTES_PATH. Congratulations!

 # IMPORTANT: Before running this script, please edit these variables accordingly:
 # Inside this directory, there should be multiple directories, each for a single note, e.g., 20191129071631742, 20200203095535253, 20210622205504108
 EXPORTED_NOTES_PATH = '/path/to/Exported/Huawei/Notes/'
 # Select the directory in which you wish to store the converted notes
 CONVERTED_NOTES_PATH = '/path/to/Converted/Huawei/Notes/'
 EXPORT_TO_JOPLIN_FORMAT = False
 # If you would like to import the notes in Joplin, uncomment the next line. Otherwise, keep as is
 # EXPORT_TO_JOPLIN_FORMAT = True


 def format_note(title, description):
    return "\n\n".join([title, description])


 def process_bullet(text):
    complete = None
    if text[0] == '1':
        complete = True
    elif text[0] == '0':
        complete = False
    if complete is None:
        return text
    bullet_content = text[1:]
    complete_prefix = '- [x] ' if complete else '- [ ] '
    return complete_prefix + bullet_content


 # When a new note is created, it is automatically assigned a new unique ID so normally you do not need to set the ID.
 # However, if for some reason you want to set it, you can supply it as the id property.
 # It needs to be a 32 characters long string in hexadecimal. Make sure it is unique, for example by generating it
 # using whatever GUID function is available in your programming language.
 def add_joplin_info(note_data, parsed_note):
    created_dt = datetime.fromtimestamp(note_data['created'] / 1e3).isoformat()
    modified_dt = datetime.fromtimestamp(note_data['modified'] / 1e3).isoformat()
    joplin_info = f"""id: {note_data['prefix_uuid'].replace('$', '')}
 parent_id: 
 created_time: {created_dt}
 updated_time: {modified_dt}
 is_conflict: 0
 latitude: 
 longitude: 
 altitude: 
 author: 
 source_url: 
 is_todo: 0
 todo_due: 0
 todo_completed: 0
 source: huawei-notes
 source_application: com.example.android.notepad
 application_data: 
 order: 0
 user_created_time: {created_dt}
 user_updated_time: {modified_dt}
 encryption_cipher_text: 
 encryption_applied: 0
 markup_language: 1
 is_shared: 0
 share_id: 
 conflict_original_id: 
 master_key_id: 
 user_data: 
 deleted_time: 0
 type_: 1"""
    return '\n\n'.join([parsed_note, joplin_info])


 mimetypes_map = {
    'jpeg': 'image/jpeg',
    'jpg': 'image/jpeg',
    'png': 'image/png',
    'gif': 'image/gif',
    'svg': 'image/svg+xml',
    'pdf': 'application/pdf',
 }


 def add_joplin_attachment(attachment_uuid, path_to_attachment, note_data):
    original_name = path_to_attachment.name
    created_dt = datetime.fromtimestamp(note_data['created'] / 1e3).isoformat()
    modified_dt = datetime.fromtimestamp(note_data['modified'] / 1e3).isoformat()
    suffix = path_to_attachment.suffix.lstrip('.').lower()
    mimetype = mimetypes_map[suffix]
    return f"""{original_name}

 id: {attachment_uuid}
 mime: {mimetype}
 filename: 
 created_time: {created_dt}
 updated_time: {modified_dt}
 user_created_time: {created_dt}
 user_updated_time: {modified_dt}
 file_extension: {suffix}
 encryption_cipher_text: 
 encryption_applied: 0
 encryption_blob_encrypted: 0
 size: {len(path_to_attachment.read_bytes())}
 is_shared: 0
 share_id: 
 master_key_id: 
 user_data: 
 blob_updated_time: {note_data['modified']}
 ocr_text: 
 ocr_details: 
 ocr_status: 0
 ocr_error: 
 type_: 4"""


 def convert_single_note(note_path, out_path, attachments_path, joplin_format):
    json_file_path = note_path / 'json.js'
    try:
        # Read and parse the JSON file
        json_text_content = json_file_path.read_text()
        # Remove the JavaScript variable assignment and parse JSON
        note_data = json.loads(json_text_content.replace('var data = ', ''))['content']
        root = html.fromstring(note_data['html_content'])
        note_uuid = note_data['prefix_uuid'].replace('$', '')
        text_elements = []
        first_is_bullet_or_attachment = False
        for el in root.iterchildren():
            if el.tag != 'element':
                continue
            text = el.text_content()
            if el.attrib.get('type') == 'Bullet':
                if len(text_elements) <= 0:
                    first_is_bullet_or_attachment = True
                text = process_bullet(text)
            elif el.attrib.get('type') == "Attachment":
                if len(text_elements) <= 0:
                    first_is_bullet_or_attachment = True
                path_to_attachment = Path(text)
                path_to_exported_attachment = note_path / "attachment" / path_to_attachment.name
                logging.info(f'[{note_path.name} {note_uuid}] Found attachment.')
                if not path_to_exported_attachment.exists() and path_to_exported_attachment.is_file():
                    text = f'Missing attachment: "{path_to_exported_attachment}"!'
                else:
                    attachment_uuid = hashlib.md5(path_to_attachment.name.encode('utf-8')).hexdigest()
                    attachment_out_path = (attachments_path / attachment_uuid).with_suffix(
                        path_to_exported_attachment.suffix)
                    shutil.copy(path_to_exported_attachment, attachment_out_path)
                    # Actually embed image
                    if joplin_format:
                        text = f"![](:/{attachment_uuid})"
                        attachment_metadata = add_joplin_attachment(
                            attachment_uuid, path_to_exported_attachment, note_data
                        )
                        attachment_metadata_out_path = (out_path / attachment_uuid).with_suffix('.md')
                        attachment_metadata_out_path.write_text(attachment_metadata)
                    else:
                        text = f"![]({attachment_out_path.relative_to(out_path)})"
                    logging.info(f'[{note_path.name} {note_uuid}] Embedded attachment successfully.')
            text_elements.append(text)
        if len(text_elements) <= 0:
            raise ValueError(f"No text elements found!")
        elif len(text_elements) == 1:
            truncated_title = note_data['title'].splitlines()[0]
            note = format_note(truncated_title, *text_elements)
        else:
            if first_is_bullet_or_attachment:  # This note has not title
                title = ""
                description = '\n'.join(text_elements)
            else:
                title = text_elements[0]
                description = '\n'.join(text_elements[1:])
            note = format_note(title, description)
            if len(text_elements) > 2:
                logging.info(
                    f'[{note_path.name} {note_uuid}] Found {len(text_elements)} text elements with '
                    f'{"no" if first_is_bullet_or_attachment else ""} title elements in note HTML, combining them.'
                )
        note_out_path = (out_path / note_uuid).with_suffix('.md')
        if joplin_format:
            note = add_joplin_info(note_data, note)
        note_out_path.write_text(note)
    except (FileNotFoundError, KeyError, json.JSONDecodeError) as e:
        logging.error(f"JSON parsing error when processing note {note_path.name}: {e}")
        logging.error(traceback.format_exc())
    except Exception as e:
        logging.error(f"Unexpected error when processing note {note_path.name}: {e}")
        logging.error(traceback.format_exc())


 def convert_notes(notes_path: Path, out_path: Path, joplin_format=False):
    logging.info(f"Exporting Huawei Notes from HTML+JS to {'Joplin RAW Directory' if joplin_format else 'Markdown'}")
    logging.info(f"Reading Directory with exported notes: {notes_path}")
    # List all subdirectories
    note_dirs = [e for e in notes_path.iterdir() if e.is_dir()]
    logging.info(f"Found {len(note_dirs)} notes.")
    out_path.mkdir(exist_ok=True, parents=True)
    attachments_path = out_path / 'resources'
    attachments_path.mkdir(exist_ok=True, parents=True)
    for note_path in tqdm(note_dirs):
        convert_single_note(note_path, out_path, attachments_path, joplin_format)


 if __name__ == "__main__":
    logger = logging.getLogger(__name__)
    logging.basicConfig(level=logging.INFO)
    convert_notes(Path(EXPORTED_NOTES_PATH), Path(CONVERTED_NOTES_PATH), joplin_format=EXPORT_TO_JOPLIN_FORMAT)
	import hashlib
	import json
	import logging
	import shutil
	import traceback
	from datetime import datetime
	from pathlib import Path

	from lxml import html
	from tqdm import tqdm

	# This script aims to help you convert your exported notes from Huawei Notes to Markdown
	# or Joplin RAW directory for importing into a Joplin instance (https://joplinapp.org/).
	# It is heavily inspired by the gists of spersico and titanismo:
	# https://gist.github.com/spersico/19b92f2c37f01118c19f2ef9c113f0d7
	# It builds upon https://gist.github.com/spersico/19b92f2c37f01118c19f2ef9c113f0d7?permalink_comment_id=5387480#gistcomment-5387480
	# by dedicating more care for exporting the title, checklist items, attachments, and metadata of notes.

	# IMPORTANT: In order to precisely extract the aforementioned note attributes, the lxml python package is used:
	# https://lxml.de/installation.html
	# Please install it with:
	# pip install lxml

	# NOTE: It is always recommended to install new packages in a separate virtual environment.
	# Consult, e.g.:
	# https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/
	# https://docs.astral.sh/uv/
	# https://mamba.readthedocs.io/en/latest/user_guide/mamba.html
	# https://mamba.readthedocs.io/en/latest/user_guide/micromamba.html


	# Steps to export your notes from Huawei Notes (courtesy of https://gist.github.com/spersico/19b92f2c37f01118c19f2ef9c113f0d7):
	# 1. Login into a Huawei Account in the phone.
	# 2. Activate in your phone, the Notes Syncing, which is inside of Huawei Account > Cloud
	# 3. Log in into https://cloud.huawei.com
	# 4. Go to https://cloud.huawei.com/home#/account/gdpr, and click on Download Notes
	# 5. This will give you a zip file with a password. Download the file and copy the password.
	# 6. Extract the zip file using this password into a directory.
	# 7. You'll need Python installed for this: https://www.python.org/downloads/.
	# 8. Using a text editor, edit the variables in the next IMPORTANT section accordingly.
	# 9. Open a console, terminal, or your favourite IDE (Visual Studio Code, PyCharm, etc.)
	# 10. Run this script with `python convert_huawei_notes.py` (modify if needed when executing inside IDE).
	# 11. Your notes should be in the CONVERTED_NOTES_PATH. Congratulations!

	# IMPORTANT: Before running this script, please edit these variables accordingly:
	# Inside this directory, there should be multiple directories, each for a single note, e.g., 20191129071631742, 20200203095535253, 20210622205504108
	EXPORTED_NOTES_PATH = '/path/to/Exported/Huawei/Notes/'
	# Select the directory in which you wish to store the converted notes
	CONVERTED_NOTES_PATH = '/path/to/Converted/Huawei/Notes/'
	EXPORT_TO_JOPLIN_FORMAT = False
	# If you would like to import the notes in Joplin, uncomment the next line. Otherwise, keep as is
	# EXPORT_TO_JOPLIN_FORMAT = True


	def format_note(title, description):
	return "\n\n".join([title, description])


	def process_bullet(text):
	complete = None
	if text[0] == '1':
	complete = True
	elif text[0] == '0':
	complete = False
	if complete is None:
	return text
	bullet_content = text[1:]
	complete_prefix = '- [x] ' if complete else '- [ ] '
	return complete_prefix + bullet_content


	# When a new note is created, it is automatically assigned a new unique ID so normally you do not need to set the ID.
	# However, if for some reason you want to set it, you can supply it as the id property.
	# It needs to be a 32 characters long string in hexadecimal. Make sure it is unique, for example by generating it
	# using whatever GUID function is available in your programming language.
	def add_joplin_info(note_data, parsed_note):
	created_dt = datetime.fromtimestamp(note_data['created'] / 1e3).isoformat()
	modified_dt = datetime.fromtimestamp(note_data['modified'] / 1e3).isoformat()
	joplin_info = f"""id: {note_data['prefix_uuid'].replace('$', '')}
	parent_id:
	created_time: {created_dt}
	updated_time: {modified_dt}
	is_conflict: 0
	latitude:
	longitude:
	altitude:
	author:
	source_url:
	is_todo: 0
	todo_due: 0
	todo_completed: 0
	source: huawei-notes
	source_application: com.example.android.notepad
	application_data:
	order: 0
	user_created_time: {created_dt}
	user_updated_time: {modified_dt}
	encryption_cipher_text:
	encryption_applied: 0
	markup_language: 1
	is_shared: 0
	share_id:
	conflict_original_id:
	master_key_id:
	user_data:
	deleted_time: 0
	type_: 1"""
	return '\n\n'.join([parsed_note, joplin_info])


	mimetypes_map = {
	'jpeg': 'image/jpeg',
	'jpg': 'image/jpeg',
	'png': 'image/png',
	'gif': 'image/gif',
	'svg': 'image/svg+xml',
	'pdf': 'application/pdf',
	}


	def add_joplin_attachment(attachment_uuid, path_to_attachment, note_data):
	original_name = path_to_attachment.name
	created_dt = datetime.fromtimestamp(note_data['created'] / 1e3).isoformat()
	modified_dt = datetime.fromtimestamp(note_data['modified'] / 1e3).isoformat()
	suffix = path_to_attachment.suffix.lstrip('.').lower()
	mimetype = mimetypes_map[suffix]
	return f"""{original_name}

	id: {attachment_uuid}
	mime: {mimetype}
	filename:
	created_time: {created_dt}
	updated_time: {modified_dt}
	user_created_time: {created_dt}
	user_updated_time: {modified_dt}
	file_extension: {suffix}
	encryption_cipher_text:
	encryption_applied: 0
	encryption_blob_encrypted: 0
	size: {len(path_to_attachment.read_bytes())}
	is_shared: 0
	share_id:
	master_key_id:
	user_data:
	blob_updated_time: {note_data['modified']}
	ocr_text:
	ocr_details:
	ocr_status: 0
	ocr_error:
	type_: 4"""


	def convert_single_note(note_path, out_path, attachments_path, joplin_format):
	json_file_path = note_path / 'json.js'
	try:
	# Read and parse the JSON file
	json_text_content = json_file_path.read_text()
	# Remove the JavaScript variable assignment and parse JSON
	note_data = json.loads(json_text_content.replace('var data = ', ''))['content']
	root = html.fromstring(note_data['html_content'])
	note_uuid = note_data['prefix_uuid'].replace('$', '')
	text_elements = []
	first_is_bullet_or_attachment = False
	for el in root.iterchildren():
	if el.tag != 'element':
	continue
	text = el.text_content()
	if el.attrib.get('type') == 'Bullet':
	if len(text_elements) <= 0:
	first_is_bullet_or_attachment = True
	text = process_bullet(text)
	elif el.attrib.get('type') == "Attachment":
	if len(text_elements) <= 0:
	first_is_bullet_or_attachment = True
	path_to_attachment = Path(text)
	path_to_exported_attachment = note_path / "attachment" / path_to_attachment.name
	logging.info(f'[{note_path.name} {note_uuid}] Found attachment.')
	if not path_to_exported_attachment.exists() and path_to_exported_attachment.is_file():
	text = f'Missing attachment: "{path_to_exported_attachment}"!'
	else:
	attachment_uuid = hashlib.md5(path_to_attachment.name.encode('utf-8')).hexdigest()
	attachment_out_path = (attachments_path / attachment_uuid).with_suffix(
	path_to_exported_attachment.suffix)
	shutil.copy(path_to_exported_attachment, attachment_out_path)
	# Actually embed image
	if joplin_format:
	text = f"![](:/{attachment_uuid})"
	attachment_metadata = add_joplin_attachment(
	attachment_uuid, path_to_exported_attachment, note_data
	)
	attachment_metadata_out_path = (out_path / attachment_uuid).with_suffix('.md')
	attachment_metadata_out_path.write_text(attachment_metadata)
	else:
	text = f"![]({attachment_out_path.relative_to(out_path)})"
	logging.info(f'[{note_path.name} {note_uuid}] Embedded attachment successfully.')
	text_elements.append(text)
	if len(text_elements) <= 0:
	raise ValueError(f"No text elements found!")
	elif len(text_elements) == 1:
	truncated_title = note_data['title'].splitlines()[0]
	note = format_note(truncated_title, *text_elements)
	else:
	if first_is_bullet_or_attachment: # This note has not title
	title = ""
	description = '\n'.join(text_elements)
	else:
	title = text_elements[0]
	description = '\n'.join(text_elements[1:])
	note = format_note(title, description)
	if len(text_elements) > 2:
	logging.info(
	f'[{note_path.name} {note_uuid}] Found {len(text_elements)} text elements with '
	f'{"no" if first_is_bullet_or_attachment else ""} title elements in note HTML, combining them.'
	)
	note_out_path = (out_path / note_uuid).with_suffix('.md')
	if joplin_format:
	note = add_joplin_info(note_data, note)
	note_out_path.write_text(note)
	except (FileNotFoundError, KeyError, json.JSONDecodeError) as e:
	logging.error(f"JSON parsing error when processing note {note_path.name}: {e}")
	logging.error(traceback.format_exc())
	except Exception as e:
	logging.error(f"Unexpected error when processing note {note_path.name}: {e}")
	logging.error(traceback.format_exc())


	def convert_notes(notes_path: Path, out_path: Path, joplin_format=False):
	logging.info(f"Exporting Huawei Notes from HTML+JS to {'Joplin RAW Directory' if joplin_format else 'Markdown'}")
	logging.info(f"Reading Directory with exported notes: {notes_path}")
	# List all subdirectories
	note_dirs = [e for e in notes_path.iterdir() if e.is_dir()]
	logging.info(f"Found {len(note_dirs)} notes.")
	out_path.mkdir(exist_ok=True, parents=True)
	attachments_path = out_path / 'resources'
	attachments_path.mkdir(exist_ok=True, parents=True)
	for note_path in tqdm(note_dirs):
	convert_single_note(note_path, out_path, attachments_path, joplin_format)


	if __name__ == "__main__":
	logger = logging.getLogger(__name__)
	logging.basicConfig(level=logging.INFO)
	convert_notes(Path(EXPORTED_NOTES_PATH), Path(CONVERTED_NOTES_PATH), joplin_format=EXPORT_TO_JOPLIN_FORMAT)