Skip to content

Instantly share code, notes, and snippets.

@ndandanov
Created April 15, 2025 21:05
Show Gist options
  • Save ndandanov/31d820108d44779cfb973e4ac56a959a to your computer and use it in GitHub Desktop.
Save ndandanov/31d820108d44779cfb973e4ac56a959a to your computer and use it in GitHub Desktop.
Convert exported Huawei Notes to Markdown or Joplin
import hashlib
import json
import logging
import shutil
import traceback
from datetime import datetime
from pathlib import Path
from lxml import html
from tqdm import tqdm
# This script aims to help you convert your exported notes from Huawei Notes to Markdown
# or Joplin RAW directory for importing into a Joplin instance (https://joplinapp.org/).
# It is heavily inspired by the gists of spersico and titanismo:
# https://gist.github.com/spersico/19b92f2c37f01118c19f2ef9c113f0d7
# It builds upon https://gist.github.com/spersico/19b92f2c37f01118c19f2ef9c113f0d7?permalink_comment_id=5387480#gistcomment-5387480
# by dedicating more care for exporting the title, checklist items, attachments, and metadata of notes.
# IMPORTANT: In order to precisely extract the aforementioned note attributes, the lxml python package is used:
# https://lxml.de/installation.html
# Please install it with:
# pip install lxml
# NOTE: It is always recommended to install new packages in a separate virtual environment.
# Consult, e.g.:
# https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/
# https://docs.astral.sh/uv/
# https://mamba.readthedocs.io/en/latest/user_guide/mamba.html
# https://mamba.readthedocs.io/en/latest/user_guide/micromamba.html
# Steps to export your notes from Huawei Notes (courtesy of https://gist.github.com/spersico/19b92f2c37f01118c19f2ef9c113f0d7):
# 1. Login into a Huawei Account in the phone.
# 2. Activate in your phone, the Notes Syncing, which is inside of Huawei Account > Cloud
# 3. Log in into https://cloud.huawei.com
# 4. Go to https://cloud.huawei.com/home#/account/gdpr, and click on Download Notes
# 5. This will give you a zip file with a password. Download the file and copy the password.
# 6. Extract the zip file using this password into a directory.
# 7. You'll need Python installed for this: https://www.python.org/downloads/.
# 8. Using a text editor, edit the variables in the next IMPORTANT section accordingly.
# 9. Open a console, terminal, or your favourite IDE (Visual Studio Code, PyCharm, etc.)
# 10. Run this script with `python convert_huawei_notes.py` (modify if needed when executing inside IDE).
# 11. Your notes should be in the CONVERTED_NOTES_PATH. Congratulations!
# IMPORTANT: Before running this script, please edit these variables accordingly:
# Inside this directory, there should be multiple directories, each for a single note, e.g., 20191129071631742, 20200203095535253, 20210622205504108
EXPORTED_NOTES_PATH = '/path/to/Exported/Huawei/Notes/'
# Select the directory in which you wish to store the converted notes
CONVERTED_NOTES_PATH = '/path/to/Converted/Huawei/Notes/'
EXPORT_TO_JOPLIN_FORMAT = False
# If you would like to import the notes in Joplin, uncomment the next line. Otherwise, keep as is
# EXPORT_TO_JOPLIN_FORMAT = True
def format_note(title, description):
return "\n\n".join([title, description])
def process_bullet(text):
complete = None
if text[0] == '1':
complete = True
elif text[0] == '0':
complete = False
if complete is None:
return text
bullet_content = text[1:]
complete_prefix = '- [x] ' if complete else '- [ ] '
return complete_prefix + bullet_content
# When a new note is created, it is automatically assigned a new unique ID so normally you do not need to set the ID.
# However, if for some reason you want to set it, you can supply it as the id property.
# It needs to be a 32 characters long string in hexadecimal. Make sure it is unique, for example by generating it
# using whatever GUID function is available in your programming language.
def add_joplin_info(note_data, parsed_note):
created_dt = datetime.fromtimestamp(note_data['created'] / 1e3).isoformat()
modified_dt = datetime.fromtimestamp(note_data['modified'] / 1e3).isoformat()
joplin_info = f"""id: {note_data['prefix_uuid'].replace('$', '')}
parent_id:
created_time: {created_dt}
updated_time: {modified_dt}
is_conflict: 0
latitude:
longitude:
altitude:
author:
source_url:
is_todo: 0
todo_due: 0
todo_completed: 0
source: huawei-notes
source_application: com.example.android.notepad
application_data:
order: 0
user_created_time: {created_dt}
user_updated_time: {modified_dt}
encryption_cipher_text:
encryption_applied: 0
markup_language: 1
is_shared: 0
share_id:
conflict_original_id:
master_key_id:
user_data:
deleted_time: 0
type_: 1"""
return '\n\n'.join([parsed_note, joplin_info])
mimetypes_map = {
'jpeg': 'image/jpeg',
'jpg': 'image/jpeg',
'png': 'image/png',
'gif': 'image/gif',
'svg': 'image/svg+xml',
'pdf': 'application/pdf',
}
def add_joplin_attachment(attachment_uuid, path_to_attachment, note_data):
original_name = path_to_attachment.name
created_dt = datetime.fromtimestamp(note_data['created'] / 1e3).isoformat()
modified_dt = datetime.fromtimestamp(note_data['modified'] / 1e3).isoformat()
suffix = path_to_attachment.suffix.lstrip('.').lower()
mimetype = mimetypes_map[suffix]
return f"""{original_name}
id: {attachment_uuid}
mime: {mimetype}
filename:
created_time: {created_dt}
updated_time: {modified_dt}
user_created_time: {created_dt}
user_updated_time: {modified_dt}
file_extension: {suffix}
encryption_cipher_text:
encryption_applied: 0
encryption_blob_encrypted: 0
size: {len(path_to_attachment.read_bytes())}
is_shared: 0
share_id:
master_key_id:
user_data:
blob_updated_time: {note_data['modified']}
ocr_text:
ocr_details:
ocr_status: 0
ocr_error:
type_: 4"""
def convert_single_note(note_path, out_path, attachments_path, joplin_format):
json_file_path = note_path / 'json.js'
try:
# Read and parse the JSON file
json_text_content = json_file_path.read_text()
# Remove the JavaScript variable assignment and parse JSON
note_data = json.loads(json_text_content.replace('var data = ', ''))['content']
root = html.fromstring(note_data['html_content'])
note_uuid = note_data['prefix_uuid'].replace('$', '')
text_elements = []
first_is_bullet_or_attachment = False
for el in root.iterchildren():
if el.tag != 'element':
continue
text = el.text_content()
if el.attrib.get('type') == 'Bullet':
if len(text_elements) <= 0:
first_is_bullet_or_attachment = True
text = process_bullet(text)
elif el.attrib.get('type') == "Attachment":
if len(text_elements) <= 0:
first_is_bullet_or_attachment = True
path_to_attachment = Path(text)
path_to_exported_attachment = note_path / "attachment" / path_to_attachment.name
logging.info(f'[{note_path.name} {note_uuid}] Found attachment.')
if not path_to_exported_attachment.exists() and path_to_exported_attachment.is_file():
text = f'Missing attachment: "{path_to_exported_attachment}"!'
else:
attachment_uuid = hashlib.md5(path_to_attachment.name.encode('utf-8')).hexdigest()
attachment_out_path = (attachments_path / attachment_uuid).with_suffix(
path_to_exported_attachment.suffix)
shutil.copy(path_to_exported_attachment, attachment_out_path)
# Actually embed image
if joplin_format:
text = f"![](:/{attachment_uuid})"
attachment_metadata = add_joplin_attachment(
attachment_uuid, path_to_exported_attachment, note_data
)
attachment_metadata_out_path = (out_path / attachment_uuid).with_suffix('.md')
attachment_metadata_out_path.write_text(attachment_metadata)
else:
text = f"![]({attachment_out_path.relative_to(out_path)})"
logging.info(f'[{note_path.name} {note_uuid}] Embedded attachment successfully.')
text_elements.append(text)
if len(text_elements) <= 0:
raise ValueError(f"No text elements found!")
elif len(text_elements) == 1:
truncated_title = note_data['title'].splitlines()[0]
note = format_note(truncated_title, *text_elements)
else:
if first_is_bullet_or_attachment: # This note has not title
title = ""
description = '\n'.join(text_elements)
else:
title = text_elements[0]
description = '\n'.join(text_elements[1:])
note = format_note(title, description)
if len(text_elements) > 2:
logging.info(
f'[{note_path.name} {note_uuid}] Found {len(text_elements)} text elements with '
f'{"no" if first_is_bullet_or_attachment else ""} title elements in note HTML, combining them.'
)
note_out_path = (out_path / note_uuid).with_suffix('.md')
if joplin_format:
note = add_joplin_info(note_data, note)
note_out_path.write_text(note)
except (FileNotFoundError, KeyError, json.JSONDecodeError) as e:
logging.error(f"JSON parsing error when processing note {note_path.name}: {e}")
logging.error(traceback.format_exc())
except Exception as e:
logging.error(f"Unexpected error when processing note {note_path.name}: {e}")
logging.error(traceback.format_exc())
def convert_notes(notes_path: Path, out_path: Path, joplin_format=False):
logging.info(f"Exporting Huawei Notes from HTML+JS to {'Joplin RAW Directory' if joplin_format else 'Markdown'}")
logging.info(f"Reading Directory with exported notes: {notes_path}")
# List all subdirectories
note_dirs = [e for e in notes_path.iterdir() if e.is_dir()]
logging.info(f"Found {len(note_dirs)} notes.")
out_path.mkdir(exist_ok=True, parents=True)
attachments_path = out_path / 'resources'
attachments_path.mkdir(exist_ok=True, parents=True)
for note_path in tqdm(note_dirs):
convert_single_note(note_path, out_path, attachments_path, joplin_format)
if __name__ == "__main__":
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
convert_notes(Path(EXPORTED_NOTES_PATH), Path(CONVERTED_NOTES_PATH), joplin_format=EXPORT_TO_JOPLIN_FORMAT)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment