Created
April 8, 2019 17:31
-
-
Save athena15/6f4ffe886aee603f69bcc3fe6b3c53de to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import random | |
import _ucrdtw | |
import mlflow | |
import numpy as np | |
import pandas as pd | |
from mlflow.pyfunc import PythonModel | |
# Set our production capacity pattern to compare product sales to | |
pattern = [12.5, 13, 13.5, 14, 17, 10, 15, 16, 17, 16, 19, 14, 14, 15, 11, 20, 18, 17, 19, 21, 19, 15, 17, 17, 16, 15, | |
19, 19, 20, 21, 19, 18, 17, 16, 20, 19, 19, 18, 17, 18, 26, 25, 17, 23, 21, 19, 28, 18, 19, 16, 21, 23] | |
# Create new product data for model to evaluate | |
new_prod_1 = ['New Product 1'] + random.sample(range(1, 107), 106) | |
new_prod_2 = ['New Product 2'] + random.sample(range(1, 107), 106) | |
new_sales_units = pd.DataFrame([new_prod_1, new_prod_2]) | |
# Set location of folder that will hold the model (Conda environment, MLmodel, and pickled model files) | |
MODEL_FOLDER = os.path.abspath('./model_folder') | |
# Define the model class, which inherits from PythonModel | |
class PyModel(PythonModel): | |
""" | |
An object used to save a Python function as an MLFlow model. | |
Accepts keyword arguments for state variables. | |
""" | |
def __init__(self, **kwargs): | |
for key, value in kwargs.items(): | |
setattr(self, key, value) | |
# Override the .predict() method of PythonModel. | |
def predict(self, context, input_data): | |
""" | |
Replace the function below with your custom function. | |
Your function must accept a pandas Dataframe, and return a Dataframe or NumPy array. | |
Be sure to include 'self.' before any state variables that need to be referenced. | |
""" | |
def get_keyed_values(s): | |
return s[0], s[1:] | |
def compute_distance(row): | |
return row[0], _ucrdtw.ucrdtw(list(row[1][0:52]), self.pattern, self.stretch_factor, True)[1] | |
ts_values = pd.DataFrame(np.apply_along_axis(get_keyed_values, 1, input_data.values)) | |
distances = pd.DataFrame(np.apply_along_axis(compute_distance, 1, ts_values.values)) | |
distances.columns = ['pcode', 'dtw_dist'] | |
distances['dtw_dist'] = distances['dtw_dist'].astype('float') | |
return distances | |
# Set conda environment up | |
CONDA_ENV = { | |
'name': 'mlflow-env', | |
'channels': ['defaults'], | |
# enter any packages that need to be pip-installed below | |
'dependencies': [ | |
'python=3.7.0', | |
'pandas', | |
'numpy', | |
'ucrdtw' | |
] | |
} | |
# Construct and save the model | |
python_model = PyModel(stretch_factor=0.5, pattern=pattern) | |
mlflow.pyfunc.save_model(dst_path=MODEL_FOLDER, python_model=python_model, conda_env=CONDA_ENV) | |
# Load the model in 'python_function' format | |
loaded_model = mlflow.pyfunc.load_pyfunc(MODEL_FOLDER) | |
# Use the new model to evaluate new data using .predict() | |
output = loaded_model.predict(new_sales_units) | |
print(f'Output: {output}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment