Last active
January 8, 2021 08:47
-
-
Save jens-andersson-2-wcar/a41ba0afd5e6b89cede7f7588eb8d6da to your computer and use it in GitHub Desktop.
SageMaker Autopilot models used for batch prediction (preprocess + predict)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sagemaker.pipeline import PipelineModel | |
from sagemaker.model import Model | |
from sagemaker.transformer import Transformer | |
import sagemaker | |
# The location of the input dataset, WITHOUT the column to predict (and WITHOUT headers) | |
batch_input = 's3://YOURBUCKET/YOURFOLDER/predictmeplease.csv' | |
# The location to store the results of the batch transform job (will be stored under this folder, with a single value per line) | |
batch_output = 's3://YOURBUCKET/YOURFOLDER/predictedcolumn' | |
# (all YOURxyz references need to be replaced with your own information, of course) | |
# image_uri based on https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html | |
# model_data for the preprocessing model of the winning trial run | |
preprocess_model = sagemaker.model.Model(model_data='s3://YOURBUCKET/YOURFOLDER/YOUREXPERIMENT/data-processor-models/somethingsomething-a-dpp1-1-6250fb936ade44a586e58b1f39e5e42b998bdc72321d4/output/model.tar.gz', | |
image_uri='141502667606.dkr.ecr.eu-west-1.amazonaws.com/sagemaker-sklearn-automl:0.2-1-cpu-py3', | |
role='arn:aws:iam::YOURACCOUNT:role/YOURROLE', | |
env = { | |
'AUTOML_SPARSE_ENCODE_RECORDIO_PROTOBUF': '1', | |
'AUTOML_TRANSFORM_MODE': 'feature-transform', | |
'SAGEMAKER_DEFAULT_INVOCATIONS_ACCEPT': 'application/x-recordio-protobuf', | |
'SAGEMAKER_PROGRAM': 'sagemaker_serve', | |
'SAGEMAKER_SUBMIT_DIRECTORY': '/opt/ml/model/code' | |
}) | |
# image_uri based on https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html | |
# model_data for the prediction model of the winning trial run | |
xgb_model = sagemaker.model.Model(model_data='s3://YOURBUCKET/YOUR-OLDER/YOUREXPERIMENT/tuning/somethingsomething-a-dpp1-xgb/tuning-job-1-73f824135c5f424091-026-66495d8c/output/model.tar.gz', | |
image_uri='141502667606.dkr.ecr.eu-west-1.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3', | |
role='arn:aws:iam::YOURACCOUNT:role/YOURROLE') | |
# you can retrieve container references like this too if you do not want to hardcode as above: | |
#container = sagemaker.image_uris.retrieve("xgboost", region, "1.0-1-cpu-py3") | |
sagemaker_session = sagemaker.Session() | |
pipelinemodel = PipelineModel( | |
role='arn:aws:iam::YOURACCOUNT:role/YOURROLE', | |
sagemaker_session=sagemaker_session, | |
models=[ | |
preprocess_model, | |
xgb_model | |
]) | |
transformer = pipelinemodel.transformer(instance_count=1, instance_type='ml.m4.xlarge', output_path=batch_output) | |
transformer.transform(data=batch_input, data_type='S3Prefix', content_type='text/csv', split_type='Line') | |
transformer.wait() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment