Last active
June 30, 2018 17:53
-
-
Save 0x4E0x650x6F/870a83e4068425b97b61664d6acfce30 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import unittest | |
class LogParser(object): | |
""" | |
Some the regular expressions are 'based' on Django's | |
Validators | |
""" | |
IPV4_RE = (r"(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)" | |
r"(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}" | |
) | |
IPV6_RE = r"\[[0-9a-f:\.]+\]" | |
DOMAIN_RE = (r"[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?" | |
r"(?:\.(?!-)[a-z' + ul + r'0-9-]{1,63}(?<!-))*" | |
r"\." | |
r"(?!-)" | |
r"(?:[a-z-]{2,63}" | |
r"|xn--[a-z0-9]{1,59})" | |
r"(?<!-)" | |
r"\.?" | |
) | |
SERVER_RE = r"(?P<host>" + IPV4_RE + "|" + IPV6_RE + "|" + DOMAIN_RE + ")" | |
URL_RE = (SERVER_RE + | |
r"(?::\d{2,5})?" | |
r"(?:(?P<path>[/][^\s]{,})?[?#](?P<query>[^\s]*))?" | |
) | |
SEPARATOR_RE = r"[\s|\t]{1,}" | |
DATE_RE = r"[a-zA-Z0-9/\s\t:+]{1,}" | |
LOG_RE = (r"^(?P<ip>" + IPV4_RE + ")" | |
r"[\s|\t][-][\s|\t][-][\s|\t]" | |
r"\[(?P<date>" + DATE_RE + ")\]" | |
+ SEPARATOR_RE + | |
r"\"(?P<method>[A-Z]{3,7})" | |
+ SEPARATOR_RE + | |
r"/(?P<url>" + URL_RE + ")" | |
+ SEPARATOR_RE + | |
r"(?P<code>HTTP/1\.[0-1])\"" | |
+ SEPARATOR_RE + | |
r"(?P<status>[0-9]{3})" | |
+ SEPARATOR_RE + | |
r"(?P<bytes>[0-9]{1,})" | |
) | |
def __init__(self): | |
self.log_pattern = re.compile(LogParser.LOG_RE) | |
def parse(self, line): | |
paths = [] | |
match = re.match(self.log_pattern, line) | |
found = match.groupdict() | |
path = found.get("path") | |
if path: | |
paths = filter(None, path.split("/")) | |
found.update(paths=paths) | |
found.update(raw=line) | |
return found | |
class TestStringParsing(unittest.TestCase): | |
def test_parsing(self): | |
log_parser = LogParser() | |
log = log_parser.parse('11.22.33.44 - - [27/Jun/2018:01:19:44 +0000] "POST /api-lb.babel.photobox.com/babel/a/a?d=f&a=42 HTTP/1.1" 200 1160') | |
print "=============== TEST 1 ================\n" | |
for name, value in log.iteritems(): | |
print "%s\t:%s" % (name, value) | |
print "\n=============== TEST 2 ================\n" | |
log = log_parser.parse( | |
'11.22.33.44 - - [27/Jun/2018:01:19:44 +0000] "POST /11.22.33.44?d=f&a=42 HTTP/1.1" 401 1160') | |
for name, value in log.iteritems(): | |
print "%s\t:%s" % (name, value) | |
if __name__ == '__main__': | |
unittest.main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
output:
=============== TEST 1 ================
status :200
paths :['babel', 'a', 'a']
code :HTTP/1.1
url :api-lb.babel.photobox.com/babel/a/a?d=f&a=42
ip :11.22.33.44
bytes :1160
raw :11.22.33.44 - - [27/Jun/2018:01:19:44 +0000] "POST /api-lb.babel.photobox.com/babel/a/a?d=f&a=42 HTTP/1.1" 200 1160
host :api-lb.babel.photobox.com
path :/babel/a/a
date :27/Jun/2018:01:19:44 +0000
query :d=f&a=42
method :POST
=============== TEST 2 ================
status :401
paths :[]
code :HTTP/1.1
url :11.22.33.44?d=f&a=42
ip :11.22.33.44
bytes :1160
raw :11.22.33.44 - - [27/Jun/2018:01:19:44 +0000] "POST /11.22.33.44?d=f&a=42 HTTP/1.1" 401 1160
host :11.22.33.44
path :None
date :27/Jun/2018:01:19:44 +0000
query :d=f&a=42
method :POST