Skip to content

Instantly share code, notes, and snippets.

@mrozo
Created May 22, 2025 22:08
Show Gist options
  • Save mrozo/c70f2ef5814a70b6cacec2d8148c8fc0 to your computer and use it in GitHub Desktop.
Save mrozo/c70f2ef5814a70b6cacec2d8148c8fc0 to your computer and use it in GitHub Desktop.
Parse md task list
#!/usr/bin/env python3
x="""
# header 1
textblock 1 line1
textblock 1 line2
textblock 1 line3
textblock 2 line 1
textblock 3 line 1
textblock 3 line 2
- p1
- p2
- p2.1
\t- p2.2
\t- p2.3
\t - p2.3.1
- p2.3.2
[] t2.3.3
[] @user1 task
[] ! important task
[] //25.01.2025 dated task
[] !! //26.02.2024 @user1 @user2 full task
```language
code line1
code line with spaces
code line 3
code line 4
```
* p 3
""".splitlines()
text=" ! //25.10.2025 @user1 zrobić cos"
def lcount_chars(line, chars):
for count, char in enumerate(line):
if char not in chars:
return count
return 0
def parse_text(text):
offset=0
while True:
offset += lcount_chars(text[offset:], " \t")
t = text[offset:]
if t.startswith('//') :
start=2
end = t.find(" ")
yield ["Date", (offset + start,offset + end), t[start:end]]
offset += end
elif t.startswith("@"):
start=1
end = t.find(" ")
yield ["User", (offset + start,offset + end), t[start:end]]
offset += end
elif t[0] == "!":
priority = lcount_chars(t,"!")
yield ["Priority", (offset, offset + priority), priority]
offset += priority
else:
break
def _parse(lines):
line_gen=enumerate(lines)
for line_n, line in line_gen:
stripped_line = line.strip()
indentation=lcount_chars(line, " \t")
if len(stripped_line) <= 0:
yield [ "Empty", indentation, (line_n,1) ]
continue
first_char=stripped_line[0]
if first_char == '#':
level=lcount_chars(stripped_line, '#')
yield [ "Header", indentation, (line_n,1), level, stripped_line[level:].strip()]
elif first_char in '-*':
yield [ "List", indentation, (line_n,1), stripped_line[1:].strip()]
elif first_char == '[':
bracket_end = stripped_line.find(']')
status = stripped_line[1:bracket_end].strip()
text = stripped_line[bracket_end+1:].strip()
yield [ "Task", indentation, (line_n,1), status, text, list(parse_text(text))]
elif stripped_line.startswith("```"):
lang=stripped_line[3:].strip()
start_line=line_n
code=""
for line_n, code_line in line_gen:
if code_line.rstrip().endswith("```"):
code += code_line.rstrip()[:-3] + "\n"
break
else:
code += code_line + "\n"
yield [ "Code", indentation, (start_line, start_line-line_n), lang, code]
else:
yield [ "Text", indentation, (line_n,1), line]
def parse(lines):
prev_node=None
for node in _parse(lines):
if prev_node and prev_node[0] == "Text" and node[0] == "Text":
prev_node[2] = (prev_node[2][0], prev_node[2][0]+1)
prev_node[3] += "\n" + node[3]
continue
if prev_node and prev_node[0] != "Empty":
yield prev_node
prev_node = node
yield node
for n in parse(x):
print(n)
for n in parse_text(text):
print(n)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment