Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save jens-andersson-2-wcar/a41ba0afd5e6b89cede7f7588eb8d6da to your computer and use it in GitHub Desktop.
Save jens-andersson-2-wcar/a41ba0afd5e6b89cede7f7588eb8d6da to your computer and use it in GitHub Desktop.
SageMaker Autopilot models used for batch prediction (preprocess + predict)
from sagemaker.pipeline import PipelineModel
from sagemaker.model import Model
from sagemaker.transformer import Transformer
import sagemaker
# The location of the input dataset, WITHOUT the column to predict (and WITHOUT headers)
batch_input = 's3://YOURBUCKET/YOURFOLDER/predictmeplease.csv'
# The location to store the results of the batch transform job (will be stored under this folder, with a single value per line)
batch_output = 's3://YOURBUCKET/YOURFOLDER/predictedcolumn'
# (all YOURxyz references need to be replaced with your own information, of course)
# image_uri based on https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html
# model_data for the preprocessing model of the winning trial run
preprocess_model = sagemaker.model.Model(model_data='s3://YOURBUCKET/YOURFOLDER/YOUREXPERIMENT/data-processor-models/somethingsomething-a-dpp1-1-6250fb936ade44a586e58b1f39e5e42b998bdc72321d4/output/model.tar.gz',
image_uri='141502667606.dkr.ecr.eu-west-1.amazonaws.com/sagemaker-sklearn-automl:0.2-1-cpu-py3',
role='arn:aws:iam::YOURACCOUNT:role/YOURROLE',
env = {
'AUTOML_SPARSE_ENCODE_RECORDIO_PROTOBUF': '1',
'AUTOML_TRANSFORM_MODE': 'feature-transform',
'SAGEMAKER_DEFAULT_INVOCATIONS_ACCEPT': 'application/x-recordio-protobuf',
'SAGEMAKER_PROGRAM': 'sagemaker_serve',
'SAGEMAKER_SUBMIT_DIRECTORY': '/opt/ml/model/code'
})
# image_uri based on https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html
# model_data for the prediction model of the winning trial run
xgb_model = sagemaker.model.Model(model_data='s3://YOURBUCKET/YOUR-OLDER/YOUREXPERIMENT/tuning/somethingsomething-a-dpp1-xgb/tuning-job-1-73f824135c5f424091-026-66495d8c/output/model.tar.gz',
image_uri='141502667606.dkr.ecr.eu-west-1.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3',
role='arn:aws:iam::YOURACCOUNT:role/YOURROLE')
# you can retrieve container references like this too if you do not want to hardcode as above:
#container = sagemaker.image_uris.retrieve("xgboost", region, "1.0-1-cpu-py3")
sagemaker_session = sagemaker.Session()
pipelinemodel = PipelineModel(
role='arn:aws:iam::YOURACCOUNT:role/YOURROLE',
sagemaker_session=sagemaker_session,
models=[
preprocess_model,
xgb_model
])
transformer = pipelinemodel.transformer(instance_count=1, instance_type='ml.m4.xlarge', output_path=batch_output)
transformer.transform(data=batch_input, data_type='S3Prefix', content_type='text/csv', split_type='Line')
transformer.wait()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment