Skip to content

Instantly share code, notes, and snippets.

@athena15
Created April 8, 2019 17:31
Show Gist options
  • Save athena15/6f4ffe886aee603f69bcc3fe6b3c53de to your computer and use it in GitHub Desktop.
Save athena15/6f4ffe886aee603f69bcc3fe6b3c53de to your computer and use it in GitHub Desktop.
import os
import random
import _ucrdtw
import mlflow
import numpy as np
import pandas as pd
from mlflow.pyfunc import PythonModel
# Set our production capacity pattern to compare product sales to
pattern = [12.5, 13, 13.5, 14, 17, 10, 15, 16, 17, 16, 19, 14, 14, 15, 11, 20, 18, 17, 19, 21, 19, 15, 17, 17, 16, 15,
19, 19, 20, 21, 19, 18, 17, 16, 20, 19, 19, 18, 17, 18, 26, 25, 17, 23, 21, 19, 28, 18, 19, 16, 21, 23]
# Create new product data for model to evaluate
new_prod_1 = ['New Product 1'] + random.sample(range(1, 107), 106)
new_prod_2 = ['New Product 2'] + random.sample(range(1, 107), 106)
new_sales_units = pd.DataFrame([new_prod_1, new_prod_2])
# Set location of folder that will hold the model (Conda environment, MLmodel, and pickled model files)
MODEL_FOLDER = os.path.abspath('./model_folder')
# Define the model class, which inherits from PythonModel
class PyModel(PythonModel):
"""
An object used to save a Python function as an MLFlow model.
Accepts keyword arguments for state variables.
"""
def __init__(self, **kwargs):
for key, value in kwargs.items():
setattr(self, key, value)
# Override the .predict() method of PythonModel.
def predict(self, context, input_data):
"""
Replace the function below with your custom function.
Your function must accept a pandas Dataframe, and return a Dataframe or NumPy array.
Be sure to include 'self.' before any state variables that need to be referenced.
"""
def get_keyed_values(s):
return s[0], s[1:]
def compute_distance(row):
return row[0], _ucrdtw.ucrdtw(list(row[1][0:52]), self.pattern, self.stretch_factor, True)[1]
ts_values = pd.DataFrame(np.apply_along_axis(get_keyed_values, 1, input_data.values))
distances = pd.DataFrame(np.apply_along_axis(compute_distance, 1, ts_values.values))
distances.columns = ['pcode', 'dtw_dist']
distances['dtw_dist'] = distances['dtw_dist'].astype('float')
return distances
# Set conda environment up
CONDA_ENV = {
'name': 'mlflow-env',
'channels': ['defaults'],
# enter any packages that need to be pip-installed below
'dependencies': [
'python=3.7.0',
'pandas',
'numpy',
'ucrdtw'
]
}
# Construct and save the model
python_model = PyModel(stretch_factor=0.5, pattern=pattern)
mlflow.pyfunc.save_model(dst_path=MODEL_FOLDER, python_model=python_model, conda_env=CONDA_ENV)
# Load the model in 'python_function' format
loaded_model = mlflow.pyfunc.load_pyfunc(MODEL_FOLDER)
# Use the new model to evaluate new data using .predict()
output = loaded_model.predict(new_sales_units)
print(f'Output: {output}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment