Created
April 15, 2025 21:05
-
-
Save ndandanov/31d820108d44779cfb973e4ac56a959a to your computer and use it in GitHub Desktop.
Convert exported Huawei Notes to Markdown or Joplin
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import hashlib | |
import json | |
import logging | |
import shutil | |
import traceback | |
from datetime import datetime | |
from pathlib import Path | |
from lxml import html | |
from tqdm import tqdm | |
# This script aims to help you convert your exported notes from Huawei Notes to Markdown | |
# or Joplin RAW directory for importing into a Joplin instance (https://joplinapp.org/). | |
# It is heavily inspired by the gists of spersico and titanismo: | |
# https://gist.github.com/spersico/19b92f2c37f01118c19f2ef9c113f0d7 | |
# It builds upon https://gist.github.com/spersico/19b92f2c37f01118c19f2ef9c113f0d7?permalink_comment_id=5387480#gistcomment-5387480 | |
# by dedicating more care for exporting the title, checklist items, attachments, and metadata of notes. | |
# IMPORTANT: In order to precisely extract the aforementioned note attributes, the lxml python package is used: | |
# https://lxml.de/installation.html | |
# Please install it with: | |
# pip install lxml | |
# NOTE: It is always recommended to install new packages in a separate virtual environment. | |
# Consult, e.g.: | |
# https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/ | |
# https://docs.astral.sh/uv/ | |
# https://mamba.readthedocs.io/en/latest/user_guide/mamba.html | |
# https://mamba.readthedocs.io/en/latest/user_guide/micromamba.html | |
# Steps to export your notes from Huawei Notes (courtesy of https://gist.github.com/spersico/19b92f2c37f01118c19f2ef9c113f0d7): | |
# 1. Login into a Huawei Account in the phone. | |
# 2. Activate in your phone, the Notes Syncing, which is inside of Huawei Account > Cloud | |
# 3. Log in into https://cloud.huawei.com | |
# 4. Go to https://cloud.huawei.com/home#/account/gdpr, and click on Download Notes | |
# 5. This will give you a zip file with a password. Download the file and copy the password. | |
# 6. Extract the zip file using this password into a directory. | |
# 7. You'll need Python installed for this: https://www.python.org/downloads/. | |
# 8. Using a text editor, edit the variables in the next IMPORTANT section accordingly. | |
# 9. Open a console, terminal, or your favourite IDE (Visual Studio Code, PyCharm, etc.) | |
# 10. Run this script with `python convert_huawei_notes.py` (modify if needed when executing inside IDE). | |
# 11. Your notes should be in the CONVERTED_NOTES_PATH. Congratulations! | |
# IMPORTANT: Before running this script, please edit these variables accordingly: | |
# Inside this directory, there should be multiple directories, each for a single note, e.g., 20191129071631742, 20200203095535253, 20210622205504108 | |
EXPORTED_NOTES_PATH = '/path/to/Exported/Huawei/Notes/' | |
# Select the directory in which you wish to store the converted notes | |
CONVERTED_NOTES_PATH = '/path/to/Converted/Huawei/Notes/' | |
EXPORT_TO_JOPLIN_FORMAT = False | |
# If you would like to import the notes in Joplin, uncomment the next line. Otherwise, keep as is | |
# EXPORT_TO_JOPLIN_FORMAT = True | |
def format_note(title, description): | |
return "\n\n".join([title, description]) | |
def process_bullet(text): | |
complete = None | |
if text[0] == '1': | |
complete = True | |
elif text[0] == '0': | |
complete = False | |
if complete is None: | |
return text | |
bullet_content = text[1:] | |
complete_prefix = '- [x] ' if complete else '- [ ] ' | |
return complete_prefix + bullet_content | |
# When a new note is created, it is automatically assigned a new unique ID so normally you do not need to set the ID. | |
# However, if for some reason you want to set it, you can supply it as the id property. | |
# It needs to be a 32 characters long string in hexadecimal. Make sure it is unique, for example by generating it | |
# using whatever GUID function is available in your programming language. | |
def add_joplin_info(note_data, parsed_note): | |
created_dt = datetime.fromtimestamp(note_data['created'] / 1e3).isoformat() | |
modified_dt = datetime.fromtimestamp(note_data['modified'] / 1e3).isoformat() | |
joplin_info = f"""id: {note_data['prefix_uuid'].replace('$', '')} | |
parent_id: | |
created_time: {created_dt} | |
updated_time: {modified_dt} | |
is_conflict: 0 | |
latitude: | |
longitude: | |
altitude: | |
author: | |
source_url: | |
is_todo: 0 | |
todo_due: 0 | |
todo_completed: 0 | |
source: huawei-notes | |
source_application: com.example.android.notepad | |
application_data: | |
order: 0 | |
user_created_time: {created_dt} | |
user_updated_time: {modified_dt} | |
encryption_cipher_text: | |
encryption_applied: 0 | |
markup_language: 1 | |
is_shared: 0 | |
share_id: | |
conflict_original_id: | |
master_key_id: | |
user_data: | |
deleted_time: 0 | |
type_: 1""" | |
return '\n\n'.join([parsed_note, joplin_info]) | |
mimetypes_map = { | |
'jpeg': 'image/jpeg', | |
'jpg': 'image/jpeg', | |
'png': 'image/png', | |
'gif': 'image/gif', | |
'svg': 'image/svg+xml', | |
'pdf': 'application/pdf', | |
} | |
def add_joplin_attachment(attachment_uuid, path_to_attachment, note_data): | |
original_name = path_to_attachment.name | |
created_dt = datetime.fromtimestamp(note_data['created'] / 1e3).isoformat() | |
modified_dt = datetime.fromtimestamp(note_data['modified'] / 1e3).isoformat() | |
suffix = path_to_attachment.suffix.lstrip('.').lower() | |
mimetype = mimetypes_map[suffix] | |
return f"""{original_name} | |
id: {attachment_uuid} | |
mime: {mimetype} | |
filename: | |
created_time: {created_dt} | |
updated_time: {modified_dt} | |
user_created_time: {created_dt} | |
user_updated_time: {modified_dt} | |
file_extension: {suffix} | |
encryption_cipher_text: | |
encryption_applied: 0 | |
encryption_blob_encrypted: 0 | |
size: {len(path_to_attachment.read_bytes())} | |
is_shared: 0 | |
share_id: | |
master_key_id: | |
user_data: | |
blob_updated_time: {note_data['modified']} | |
ocr_text: | |
ocr_details: | |
ocr_status: 0 | |
ocr_error: | |
type_: 4""" | |
def convert_single_note(note_path, out_path, attachments_path, joplin_format): | |
json_file_path = note_path / 'json.js' | |
try: | |
# Read and parse the JSON file | |
json_text_content = json_file_path.read_text() | |
# Remove the JavaScript variable assignment and parse JSON | |
note_data = json.loads(json_text_content.replace('var data = ', ''))['content'] | |
root = html.fromstring(note_data['html_content']) | |
note_uuid = note_data['prefix_uuid'].replace('$', '') | |
text_elements = [] | |
first_is_bullet_or_attachment = False | |
for el in root.iterchildren(): | |
if el.tag != 'element': | |
continue | |
text = el.text_content() | |
if el.attrib.get('type') == 'Bullet': | |
if len(text_elements) <= 0: | |
first_is_bullet_or_attachment = True | |
text = process_bullet(text) | |
elif el.attrib.get('type') == "Attachment": | |
if len(text_elements) <= 0: | |
first_is_bullet_or_attachment = True | |
path_to_attachment = Path(text) | |
path_to_exported_attachment = note_path / "attachment" / path_to_attachment.name | |
logging.info(f'[{note_path.name} {note_uuid}] Found attachment.') | |
if not path_to_exported_attachment.exists() and path_to_exported_attachment.is_file(): | |
text = f'Missing attachment: "{path_to_exported_attachment}"!' | |
else: | |
attachment_uuid = hashlib.md5(path_to_attachment.name.encode('utf-8')).hexdigest() | |
attachment_out_path = (attachments_path / attachment_uuid).with_suffix( | |
path_to_exported_attachment.suffix) | |
shutil.copy(path_to_exported_attachment, attachment_out_path) | |
# Actually embed image | |
if joplin_format: | |
text = f"" | |
attachment_metadata = add_joplin_attachment( | |
attachment_uuid, path_to_exported_attachment, note_data | |
) | |
attachment_metadata_out_path = (out_path / attachment_uuid).with_suffix('.md') | |
attachment_metadata_out_path.write_text(attachment_metadata) | |
else: | |
text = f"})" | |
logging.info(f'[{note_path.name} {note_uuid}] Embedded attachment successfully.') | |
text_elements.append(text) | |
if len(text_elements) <= 0: | |
raise ValueError(f"No text elements found!") | |
elif len(text_elements) == 1: | |
truncated_title = note_data['title'].splitlines()[0] | |
note = format_note(truncated_title, *text_elements) | |
else: | |
if first_is_bullet_or_attachment: # This note has not title | |
title = "" | |
description = '\n'.join(text_elements) | |
else: | |
title = text_elements[0] | |
description = '\n'.join(text_elements[1:]) | |
note = format_note(title, description) | |
if len(text_elements) > 2: | |
logging.info( | |
f'[{note_path.name} {note_uuid}] Found {len(text_elements)} text elements with ' | |
f'{"no" if first_is_bullet_or_attachment else ""} title elements in note HTML, combining them.' | |
) | |
note_out_path = (out_path / note_uuid).with_suffix('.md') | |
if joplin_format: | |
note = add_joplin_info(note_data, note) | |
note_out_path.write_text(note) | |
except (FileNotFoundError, KeyError, json.JSONDecodeError) as e: | |
logging.error(f"JSON parsing error when processing note {note_path.name}: {e}") | |
logging.error(traceback.format_exc()) | |
except Exception as e: | |
logging.error(f"Unexpected error when processing note {note_path.name}: {e}") | |
logging.error(traceback.format_exc()) | |
def convert_notes(notes_path: Path, out_path: Path, joplin_format=False): | |
logging.info(f"Exporting Huawei Notes from HTML+JS to {'Joplin RAW Directory' if joplin_format else 'Markdown'}") | |
logging.info(f"Reading Directory with exported notes: {notes_path}") | |
# List all subdirectories | |
note_dirs = [e for e in notes_path.iterdir() if e.is_dir()] | |
logging.info(f"Found {len(note_dirs)} notes.") | |
out_path.mkdir(exist_ok=True, parents=True) | |
attachments_path = out_path / 'resources' | |
attachments_path.mkdir(exist_ok=True, parents=True) | |
for note_path in tqdm(note_dirs): | |
convert_single_note(note_path, out_path, attachments_path, joplin_format) | |
if __name__ == "__main__": | |
logger = logging.getLogger(__name__) | |
logging.basicConfig(level=logging.INFO) | |
convert_notes(Path(EXPORTED_NOTES_PATH), Path(CONVERTED_NOTES_PATH), joplin_format=EXPORT_TO_JOPLIN_FORMAT) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment