-
-
Save prakhar-goel/50f302d12549f43bb849 to your computer and use it in GitHub Desktop.
Google Vision API - Examples and Python utilities
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from utils_google import get_vision_api | |
from utils_image import (read_image, read_image_base64, save_image, draw_face, draw_box, draw_text) | |
inputfile = "input.jpg" | |
outputfile = "output.jpg" | |
def main(): | |
vision = get_vision_api() | |
body = make_request(inputfile) | |
response = vision.images().annotate(body=body).execute() | |
show_results(inputfile, response, outputfile) | |
def make_request(inputfile): | |
""" Create a request batch (one file at a time) """ | |
return { | |
"requests":[ | |
{ | |
"image":{ | |
"content": read_image_base64(inputfile) | |
}, | |
"features": [ | |
{ | |
"type":"LABEL_DETECTION", | |
"maxResults": 10 | |
}, | |
{ | |
"type":"TEXT_DETECTION", | |
"maxResults": 10 | |
}, | |
{ | |
"type":"FACE_DETECTION", | |
"maxResults": 20 | |
} | |
] | |
} | |
] | |
} | |
def show_results(inputfile, data, outputfile): | |
#read original file | |
im = read_image(inputfile) | |
#draw face, boxes and text for each response | |
for r in data['responses']: | |
if 'faceAnnotations' in r: | |
draw_face(im, r['faceAnnotations']) | |
if 'labelAnnotations' in r: | |
strs = map(lambda a: a['description'], r['labelAnnotations']) | |
im = draw_text(im, ", ".join(strs)) | |
for field in ['textAnnotations', 'logoAnnotations']: | |
if field in r: | |
for a in r[field]: | |
draw_box(im, a['boundingPoly']['vertices']) | |
#save to output file | |
save_image(outputfile, im) | |
if __name__ == '__main__': | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import httplib2 | |
import sys | |
from googleapiclient import discovery | |
from oauth2client import tools, file, client | |
# limited preview only (sorry!) | |
API_DISCOVERY_FILE = 'vision_discovery_v1alpha1.json' | |
""" Google Authentication Utilities """ | |
def get_vision_api(): | |
credentials = get_api_credentials('https://www.googleapis.com/auth/cloud-platform') | |
with open(API_DISCOVERY_FILE, 'r') as f: | |
doc = f.read() | |
return discovery.build_from_document(doc, credentials=credentials, http=httplib2.Http()) | |
def get_api_credentials(scope, service_account=True): | |
""" Build API client based on oAuth2 authentication """ | |
STORAGE = file.Storage('oAuth2.json') #local storage of oAuth tokens | |
credentials = STORAGE.get() | |
if credentials is None or credentials.invalid: #check if new oAuth flow is needed | |
if service_account: #server 2 server flow | |
with open('service_account.json') as f: | |
account = json.loads(f.read()) | |
email = account['client_email'] | |
key = account['private_key'] | |
credentials = client.SignedJwtAssertionCredentials(email, key, scope=scope) | |
STORAGE.put(credentials) | |
else: #normal oAuth2 flow | |
CLIENT_SECRETS = os.path.join(os.path.dirname(__file__), 'client_secrets.json') | |
FLOW = client.flow_from_clientsecrets(CLIENT_SECRETS, scope=scope) | |
PARSER = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, parents=[tools.argparser]) | |
FLAGS = PARSER.parse_args(sys.argv[1:]) | |
credentials = tools.run_flow(FLOW, STORAGE, FLAGS) | |
return credentials |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cv2 | |
from base64 import b64encode | |
""" read/write utilities """ | |
def read_image(filename): | |
return cv2.imread(filename) | |
def save_image(filename, im): | |
cv2.imwrite(filename, im) | |
def read_image_base64(filename): | |
with open(filename, 'rb') as f: | |
return b64encode(f.read()) | |
""" OpenCV drawing utilities """ | |
def draw_face(im, annotations): | |
for a in annotations: | |
tl ,br = draw_box(im, a['boundingPoly']['vertices']) | |
tl_,br_ = draw_box(im, a['fdBoundingPoly']['vertices']) | |
draw_angle(im, a['panAngle'], a['tiltAngle'], pt=tl_, size=br_[0]-tl_[0]) | |
for landmark in a['landmarks']: | |
draw_point(im, landmark['position']) | |
def draw_angle(im, pan, tilt, pt, size): | |
x_delta = np.interp(pan, [-180,180], [-size,size]) | |
y_delta = np.interp(tilt, [-180,180], [-size,size]) | |
pt2 = (pt[0] + int(x_delta), pt[1] + int(y_delta)) | |
cv2.arrowedLine(im, pt, pt2, (0,255,0)) | |
def extract_vertices(vertices): | |
""" Extract two opposite vertices from a list of 4 (assumption: rectangle) """ | |
min_x,max_x,min_y,max_y = float("inf"),float("-inf"),float("inf"),float("-inf") | |
for v in vertices: | |
if v.get('x',min_y) < min_x: | |
min_x = v.get('x') | |
if v.get('x',max_y) > max_x: | |
max_x = v.get('x') | |
if v.get('y',min_y) < min_y: | |
min_y = v.get('y') | |
if v.get('y',max_y) > max_y: | |
max_y = v.get('y') | |
v1 = next(v for v in vertices if v.get('x') == min_x and v.get('y') == min_y) | |
v2 = next(v for v in vertices if v.get('x') == max_x and v.get('y') == max_y) | |
return v1,v2 | |
def draw_box(im, vertices): | |
v1,v2 = extract_vertices(vertices) | |
pt1 = (v1.get('x',0), v1.get('y',0)) | |
pt2 = (v2.get('x',0), v2.get('y',0)) | |
cv2.rectangle(im, pt1, pt2, (0,0,255)) | |
return pt1, pt2 | |
def draw_point(im, position): | |
pt = (int(position.get('x',0)), int(position.get('y',0))) | |
cv2.circle(im, pt, 3, (0,0,255)) | |
return pt | |
def draw_text(im, text): | |
font_face = cv2.FONT_HERSHEY_SIMPLEX | |
thickness = 1 | |
for scale in np.arange(2,0,-0.2): | |
(w,h),baseline = cv2.getTextSize(text, font_face, scale, thickness) | |
if w <= im.shape[1]: | |
new_img = cv2.copyMakeBorder(im, 0, baseline*4, 0, 0, cv2.BORDER_CONSTANT, value=0) | |
cv2.putText(new_img, text, (baseline*2,new_img.shape[0]-baseline), font_face, scale, (255,255,255), thickness) | |
return new_img |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment