jens-andersson-2-wcar · January 8, 2021 08:47
diff --git a/sagemaker-autopilot-batch-predictions-ipynb.py b/sagemaker-autopilot-batch-predictions-ipynb.py
 from sagemaker.pipeline import PipelineModel
 from sagemaker.model import Model
 from sagemaker.transformer import Transformer

 import sagemaker

 # The location of the input dataset, WITHOUT the column to predict (and WITHOUT headers)
 batch_input = 's3://YOURBUCKET/YOURFOLDER/predictmeplease.csv'

 # The location to store the results of the batch transform job (will be stored under this folder, with a single value per line)
 batch_output = 's3://YOURBUCKET/YOURFOLDER/predictedcolumn'

 # (all YOURxyz references need to be replaced with your own information, of course)

 # image_uri based on https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html
 # model_data for the preprocessing model of the winning trial run
 preprocess_model = sagemaker.model.Model(model_data='s3://YOURBUCKET/YOURFOLDER/YOUREXPERIMENT/data-processor-models/somethingsomething-a-dpp1-1-6250fb936ade44a586e58b1f39e5e42b998bdc72321d4/output/model.tar.gz',
                                  image_uri='141502667606.dkr.ecr.eu-west-1.amazonaws.com/sagemaker-sklearn-automl:0.2-1-cpu-py3',
                                  role='arn:aws:iam::YOURACCOUNT:role/YOURROLE',
                                  env = {
                                      'AUTOML_SPARSE_ENCODE_RECORDIO_PROTOBUF': '1',
                                      'AUTOML_TRANSFORM_MODE': 'feature-transform',
                                      'SAGEMAKER_DEFAULT_INVOCATIONS_ACCEPT': 'application/x-recordio-protobuf',
                                      'SAGEMAKER_PROGRAM': 'sagemaker_serve',
                                      'SAGEMAKER_SUBMIT_DIRECTORY': '/opt/ml/model/code'
                                  })

    
 # image_uri based on https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html
 # model_data for the prediction model of the winning trial run
 xgb_model = sagemaker.model.Model(model_data='s3://YOURBUCKET/YOUR-OLDER/YOUREXPERIMENT/tuning/somethingsomething-a-dpp1-xgb/tuning-job-1-73f824135c5f424091-026-66495d8c/output/model.tar.gz',
                                  image_uri='141502667606.dkr.ecr.eu-west-1.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3',
                                  role='arn:aws:iam::YOURACCOUNT:role/YOURROLE')


 # you can retrieve container references like this too if you do not want to hardcode as above:
 #container = sagemaker.image_uris.retrieve("xgboost", region, "1.0-1-cpu-py3")


 sagemaker_session = sagemaker.Session()

 pipelinemodel = PipelineModel(
    role='arn:aws:iam::YOURACCOUNT:role/YOURROLE', 
    sagemaker_session=sagemaker_session,
    models=[
        preprocess_model,
        xgb_model
        ])

 transformer = pipelinemodel.transformer(instance_count=1, instance_type='ml.m4.xlarge', output_path=batch_output)
 transformer.transform(data=batch_input, data_type='S3Prefix', content_type='text/csv', split_type='Line')

 transformer.wait()
	from sagemaker.pipeline import PipelineModel
	from sagemaker.model import Model
	from sagemaker.transformer import Transformer

	import sagemaker

	# The location of the input dataset, WITHOUT the column to predict (and WITHOUT headers)
	batch_input = 's3://YOURBUCKET/YOURFOLDER/predictmeplease.csv'

	# The location to store the results of the batch transform job (will be stored under this folder, with a single value per line)
	batch_output = 's3://YOURBUCKET/YOURFOLDER/predictedcolumn'

	# (all YOURxyz references need to be replaced with your own information, of course)

	# image_uri based on https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html
	# model_data for the preprocessing model of the winning trial run
	preprocess_model = sagemaker.model.Model(model_data='s3://YOURBUCKET/YOURFOLDER/YOUREXPERIMENT/data-processor-models/somethingsomething-a-dpp1-1-6250fb936ade44a586e58b1f39e5e42b998bdc72321d4/output/model.tar.gz',
	image_uri='141502667606.dkr.ecr.eu-west-1.amazonaws.com/sagemaker-sklearn-automl:0.2-1-cpu-py3',
	role='arn:aws:iam::YOURACCOUNT:role/YOURROLE',
	env = {
	'AUTOML_SPARSE_ENCODE_RECORDIO_PROTOBUF': '1',
	'AUTOML_TRANSFORM_MODE': 'feature-transform',
	'SAGEMAKER_DEFAULT_INVOCATIONS_ACCEPT': 'application/x-recordio-protobuf',
	'SAGEMAKER_PROGRAM': 'sagemaker_serve',
	'SAGEMAKER_SUBMIT_DIRECTORY': '/opt/ml/model/code'
	})


	# image_uri based on https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html
	# model_data for the prediction model of the winning trial run
	xgb_model = sagemaker.model.Model(model_data='s3://YOURBUCKET/YOUR-OLDER/YOUREXPERIMENT/tuning/somethingsomething-a-dpp1-xgb/tuning-job-1-73f824135c5f424091-026-66495d8c/output/model.tar.gz',
	image_uri='141502667606.dkr.ecr.eu-west-1.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3',
	role='arn:aws:iam::YOURACCOUNT:role/YOURROLE')


	# you can retrieve container references like this too if you do not want to hardcode as above:
	#container = sagemaker.image_uris.retrieve("xgboost", region, "1.0-1-cpu-py3")


	sagemaker_session = sagemaker.Session()

	pipelinemodel = PipelineModel(
	role='arn:aws:iam::YOURACCOUNT:role/YOURROLE',
	sagemaker_session=sagemaker_session,
	models=[
	preprocess_model,
	xgb_model
	])

	transformer = pipelinemodel.transformer(instance_count=1, instance_type='ml.m4.xlarge', output_path=batch_output)
	transformer.transform(data=batch_input, data_type='S3Prefix', content_type='text/csv', split_type='Line')

	transformer.wait()