An HTTPS endpoint that:
Types of online endpoints:
To deploy a model to a managed online endpoint, you need:
Blue/green deployment allows multiple models to be deployed to an endpoint. Traffic can be distributed between deployments for testing and transitioning:
When deploying MLFlow models:
from azure.ai.ml.entities import Model, ManagedOnlineDeployment
from azure.ai.ml.constants import AssetTypes
# Define the model
model = Model(
path="./model",
type=AssetTypes.MLFLOW_MODEL,
description="MLFlow model example",
)
# Create a deployment
blue_deployment = ManagedOnlineDeployment(
name="blue",
endpoint_name="endpoint-example",
model=model,
instance_type="Standard_F4s_v2",
instance_count=1,
)
ml_client.online_deployments.begin_create_or_update(blue_deployment).result()
Direct traffic to specific deployments:
# Allocate 100% traffic to blue deployment
endpoint.traffic = {"blue": 100}
ml_client.begin_create_or_update(endpoint).result()
Switch between deployments:
# Allocate 25% traffic to blue and 75% to green deployment
endpoint.traffic = {"blue": 25, "green": 75}
ml_client.begin_create_or_update(endpoint).result()
ml_client.online_endpoints.begin_delete(name="endpoint-example")
import json
import joblib
import numpy as np
import os
def init():
global model
model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'model.pkl')
model = joblib.load(model_path)
def run(raw_data):
data = np.array(json.loads(raw_data)['data'])
predictions = model.predict(data)
return predictions.tolist()
Using a Conda YAML file:
name: basic-env-cpu
channels:
- conda-forge
dependencies:
- python=3.7
- scikit-learn
- pandas
- numpy
- matplotlib
from azure.ai.ml.entities import Environment
env = Environment(
image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04",
conda_file="./src/conda.yml",
name="deployment-environment",
description="Custom environment with Conda dependencies",
)
ml_client.environments.create_or_update(env)
from azure.ai.ml.entities import ManagedOnlineDeployment, CodeConfiguration
blue_deployment = ManagedOnlineDeployment(
name="blue",
endpoint_name="endpoint-example",
model=model,
environment="deployment-environment",
code_configuration=CodeConfiguration(
code="./src", scoring_script="score.py"
),
instance_type="Standard_DS2_v2",
instance_count=1,
)
ml_client.online_deployments.begin_create_or_update(blue_deployment).result()
from azure.ai.ml.entities import BatchEndpoint
endpoint = BatchEndpoint(
name="endpoint-example",
description="Batch endpoint example",
)
ml_client.batch_endpoints.begin_create_or_update(endpoint)
from azure.ai.ml.entities import Model
from azure.ai.ml.constants import AssetTypes
model = ml_client.models.create_or_update(
Model(name="mlflow-model", path="./model", type=AssetTypes.MLFLOW_MODEL)
)
from azure.ai.ml.entities import BatchDeployment, BatchRetrySettings
from azure.ai.ml.constants import BatchDeploymentOutputAction
deployment = BatchDeployment(
name="forecast-mlflow",
endpoint_name=endpoint.name,
model=model,
compute="aml-cluster",
instance_count=2,
max_concurrency_per_instance=2,
mini_batch_size=2,
output_action=BatchDeploymentOutputAction.APPEND_ROW,
output_file_name="predictions.csv",
retry_settings=BatchRetrySettings(max_retries=3, timeout=300),
)
ml_client.batch_deployments.begin_create_or_update(deployment)
To deploy a custom model, you must create:
The scoring script must include two functions:
run
method should return a pandas DataFrame or array/list.import os
import mlflow
import pandas as pd
def init():
global model # Make assets available for scoring
# Get the path to the registered model file and load it
model_path = os.path.join(os.environ["AZUREML_MODEL_DIR"], "model")
model = mlflow.pyfunc.load_model(model_path)
def run(mini_batch):
print(f"run method start: {__file__}, run({len(mini_batch)} files)")
result_list = []
for file_path in mini_batch:
data = pd.read_csv(file_path)
pred = model.predict(data)
df = pd.DataFrame(pred, columns=["predictions"])
df["file"] = os.path.basename(file_path)
result_list.extend(df.values)
return result_list
You can create an environment using a Docker image with Conda dependencies or a Dockerfile.
Conda YML File Example:
name: basic-env-cpu
channels:
- conda-forge
dependencies:
- python=3.8
- pandas
- pip
- pip:
- azureml-core
- mlflow
from azure.ai.ml.entities import Environment
env = Environment(
image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04",
conda_file="./src/conda-env.yml",
name="deployment-environment",
description="Environment created from a Docker image plus Conda environment.",
)
ml_client.environments.create_or_update(env)
from azure.ai.ml.entities import BatchDeployment, BatchRetrySettings
from azure.ai.ml.constants import BatchDeploymentOutputAction
deployment = BatchDeployment(
name="forecast-mlflow",
description="A sales forecaster",
endpoint_name=endpoint.name,
model=model,
compute="aml-cluster",
code_path="./code",
scoring_script="score.py",
environment=env,
instance_count=2,
max_concurrency_per_instance=2,
mini_batch_size=2,
output_action=BatchDeploymentOutputAction.APPEND_ROW,
output_file_name="predictions.csv",
retry_settings=BatchRetrySettings(max_retries=3, timeout=300),
logging_level="info",
)
ml_client.batch_deployments.begin_create_or_update(deployment)
This structured approach enables you to deploy and consume models efficiently using Azure ML. Whether you need real-time predictions or large-scale batch processing, Azure ML has you covered.