Created
November 18, 2020 13:52
-
-
Save gsamat/04f4d31e8399f8d4956d7dcaeff59a42 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# python3 | |
# pip3 install beautifulsoup4 | |
# pip3 install markdown | |
# копируете маркдаун-текст в буфер обмена | |
# pbpaste | python3 unmarkdown.py | pbcopy | |
# получаете в буфер обмена текст для фейсбука | |
from bs4 import BeautifulSoup | |
from markdown import markdown | |
from sys import stdin | |
def get_plain_text(soup): | |
lines = [] | |
for p in soup.findAll('p'): | |
line = ''.join(p.findAll(text=True)) | |
lines.append(line) | |
return '\n\n'.join(lines) | |
refs = [] | |
# with open('one.md') as f: | |
with stdin as f: | |
read_data = f.read() | |
html = markdown(read_data) | |
soup = BeautifulSoup(html, features="html.parser") | |
i = 0 | |
for link in soup.find_all('a'): | |
refs.append(link.get('href')) | |
i = len(refs) | |
text = f"{link.text} [{i}]" | |
link.replaceWith(text) | |
# print(soup.prettify()) | |
# for p in soup.find_all('p'): | |
# newtag = soup.new_tag('p') | |
# p.insert_before(newtag) | |
# # print(p) | |
# text = ''.join(soup.findAll(text=True)) | |
text = get_plain_text(soup) | |
refs_string = '' | |
for idx, link in enumerate(refs): | |
refs_string = refs_string + (f"{idx+1}. {link}") + '\n' | |
print(text + '\n\n' + refs_string) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment