Last active
February 29, 2016 15:14
-
-
Save parkeristyping/1de0b38c4383d29f10d6 to your computer and use it in GitHub Desktop.
Autoload CSVs from s3 into Redshift
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import json | |
import urllib | |
import boto3 | |
import psycopg2 | |
import re | |
s3 = boto3.client('s3') | |
def lambda_handler(event, context = {}): | |
""" | |
When you upload `my_csv 2015-01-21.csv` to the s3 bucket your | |
lambda is listening to, this copies its contents into `s3.my_csv` | |
table in specified Redshift db. | |
You have to have already made the table in the Redshift db e.g. | |
> create schema if not exists s3; | |
> create table my_csv (my_column varchar(255)); | |
And to use psycopg2 in a Lambda you have to follow instructions like | |
those found in the following link: | |
https://github.com/jkehler/awslambda-psycopg2 | |
""" | |
try: | |
bucket = event['Records'][0]['s3']['bucket']['name'] | |
key = urllib.unquote_plus(event['Records'][0]['s3']['object']['key']).decode('utf8') | |
table = re.sub(r'(input/|.csv)',"",key).split(" ")[0] | |
print("Okay so we're gonna try to copy {0}/{1} to {2}".format(bucket, key, table)) | |
conn = make_conn() | |
print("Connected to Redshift") | |
query = """ | |
COPY s3.{0} | |
FROM 's3://{1}/{2}' | |
CREDENTIALS 'aws_access_key_id=SDFKnotmycredsDSKFJD;aws_secret_access_key=DSFHKD5hjnotmycreds34jjk434343' | |
DELIMITER ',' | |
REMOVEQUOTES | |
IGNOREHEADER AS 1; | |
""".format(table, bucket, key) | |
print("Running query:\n{0}".format(query)) | |
conn.cursor().execute(query) | |
print("Query complete...") | |
conn.commit() | |
print("... and committed") | |
return True | |
except Exception as e: | |
print(e) | |
raise e | |
def make_conn(): | |
conn_string = """ | |
host='name.dfdnotreal343.us-east-1.redshift.amazonaws.com' | |
dbname='name' | |
user='name' | |
password='password' | |
port='5439"' | |
""" | |
return psycopg2.connect(conn_string) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment