Skip to content

Instantly share code, notes, and snippets.

@valzav
Created November 25, 2014 15:35
Show Gist options
  • Save valzav/5958437dc5cd707c8767 to your computer and use it in GitHub Desktop.
Save valzav/5958437dc5cd707c8767 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
import sys
import re
import pygeoip
# Note. Download GeoLite database from http://geolite.maxmind.com/download/geoip/database/GeoLiteCity.dat.gz and extract to /tmp dir
geoip_rawdata = pygeoip.GeoIP('/tmp/GeoLiteCity.dat')
def ipquery(ip):
data = geoip_rawdata.record_by_name(ip)
return data['country_name']
from datetime import *
one_day_uuids = {}
seven_day_uuids = {}
alltime_uuids = {}
version_uuids = {}
by_country = {}
current_date = ""
days = 0
print "%32s" % "number of unique uuids"
with open(sys.argv[1]) as f:
for line in f:
m = re.search( "^([\d\.]+).+\[(.+) \+0000\] \"GET \/manifest\.json\?uuid=([\d\w\-]+)&version=v([\d\.]+)", line)
if not m:
continue
date = datetime.strptime( m.group(2), "%d/%b/%Y:%H:%M:%S" ).strftime("%d/%m/%y")
if current_date != date:
if current_date != "":
days += 1
print "%32s %5d" % (current_date, len(one_day_uuids))
if days % 7 == 0:
if current_date != "":
print "%32s %5d" % ("7 days total", len(seven_day_uuids))
seven_day_uuids.clear()
one_day_uuids.clear()
current_date = date
one_day_uuids[m.group(3)] = 1
seven_day_uuids[m.group(3)] = 1
alltime_uuids[m.group(3)] = 1
if not version_uuids.has_key(m.group(4)):
version_uuids[m.group(4)] = {}
version_uuids[m.group(4)][m.group(3)] = 1
country = ipquery(m.group(1))
if not by_country.has_key(country):
by_country[country] = {}
by_country[country][m.group(3)] = 1
print "%32s %5d" % (current_date, len(one_day_uuids))
if days % 7 and current_date != "":
print "%32s %5d" % (str((days % 7) + 1) + " days unique", len(seven_day_uuids))
total_unique = len(alltime_uuids)
print "%32s %5d" % ("total unique", total_unique)
print
print "%32s" % "total by version"
for k,v in version_uuids.iteritems():
uuids = len(v)
print "%32s %6d %6.2f%%" % (k, uuids, uuids*100.0/total_unique)
print
print "%32s" % "total by country"
for k,v in by_country.iteritems():
uuids = len(v)
print "%32s %6d %6.2f%%" % (k, uuids, uuids*100.0/total_unique)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment