Last active
April 1, 2021 10:42
-
-
Save JakubOrsula/bd4f6370ca4efa61dee30dcd94566883 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
from bs4 import BeautifulSoup, NavigableString | |
GIT_DIR = '/home/jakub/github/pb138/' | |
def validate(url): | |
page = open(url) | |
soup = BeautifulSoup(page.read(), 'html.parser') | |
forbidden_tags = soup.find_all(['table', 'td', 'tr', 'strong', 'small', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7', 'svg', 'br', 'style']) | |
if len(forbidden_tags) > 0: | |
print('[ FAIL ] submission contains forbidden tags', forbidden_tags) | |
if len(soup.find_all('body')) != 1: | |
print('[ FAIL] I expected to find exactly one body tag') | |
return | |
forbidden_attrs = ['style', 'width', 'height'] | |
for tag in soup.body.children: | |
if isinstance(tag, NavigableString): | |
continue | |
for attr in forbidden_attrs: | |
if tag.has_attr(attr): | |
print(f'[ FAIL ] {attr} attrs are forbidden: {tag}') | |
for img in soup.find_all('img'): | |
if not img.has_attr('alt'): | |
print('[ FAIL ]Imgs must have alt text', img) | |
elif len(img['alt']) <= 3: | |
print('[ WARN ] ALt tag should be something meaningful', img) | |
for row in (soup.find_all('row') + soup.find_all('div', {'class', re.compile(r'.*=row( )?.*')})): | |
if row is not None and len(row.find_all('col')) + len(row.find_all('div', {'class', re.compile(r'.*=col( )?.*')})) < 1: | |
print('[ WARN ]There are rows without cols!', row) | |
for inp in soup.find_all('input'): | |
if len(inp.find_parents('form')) != 1: | |
print('Every input should be wrapped in one form', inp) | |
for repo in os.listdir(GIT_DIR): | |
flag = False | |
for root, dirs, files in os.walk(os.path.join(GIT_DIR, repo)): | |
for file in files: | |
if file.endswith('.html') and flag: | |
print('Duplicate html file', os.path.join(root, file)) | |
if file.endswith('.html') and not flag: | |
flag = True | |
print('~~~~~~') | |
print(repo) | |
print('~~~~~~') | |
validate(os.path.join(root, file)) | |
if not flag: | |
print('~~~~~\nHtml file for ' + repo + ' not found!\n~~~~~~') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment