{"cells":[{"cell_type":"code","source":["import os\nimport sys\nimport warnings"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"69ab7caa-ba94-45a5-a965-bd0ed5d0c308"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"
","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"html","arguments":{}}},"output_type":"display_data","data":{"text/html":["\n"]}}],"execution_count":0},{"cell_type":"code","source":["import pandas as pd\nimport numpy as np\nfrom itertools import cycle\nimport matplotlib.pyplot as plt\nfrom sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score\nfrom sklearn import metrics\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import ElasticNet\nfrom sklearn.linear_model import lasso_path, enet_path\nfrom sklearn import datasets\nfrom sklearn.tree import DecisionTreeClassifier\n# Import mlflow\nimport mlflow\nimport mlflow.sklearn"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"6732926b-fea5-4b1e-a7dc-74a7bb49f0eb"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"html","arguments":{}}},"output_type":"display_data","data":{"text/html":["\n"]}}],"execution_count":0},{"cell_type":"code","source":["# Load Diabetes datasets\ndiabetes = datasets.load_diabetes()\nX = diabetes.data\ny = diabetes.target"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"36e09d95-2544-49df-8f7b-d78a9243255b"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"html","arguments":{}}},"output_type":"display_data","data":{"text/html":["\n"]}}],"execution_count":0},{"cell_type":"code","source":["# Create pandas DataFrame for sklearn ElasticNet linear_model\nY = np.array([y]).transpose()\nd = np.concatenate((X, Y), axis=1)\ncols = diabetes.feature_names + [\"progression\"]\ndata = pd.DataFrame(d, columns=cols)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"41bd1d3a-5b82-4a3f-8525-fe914cf0ee11"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"html","arguments":{}}},"output_type":"display_data","data":{"text/html":["\n"]}}],"execution_count":0},{"cell_type":"code","source":["# Evaluate metrics\ndef eval_metrics(actual, pred):\n rmse = np.sqrt(mean_squared_error(actual, pred))\n mae = mean_absolute_error(actual, pred)\n r2 = r2_score(actual, pred)\n return rmse, mae, r2"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"e2c83d83-6276-4e17-8afd-d11f0bdd1900"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"html","arguments":{}}},"output_type":"display_data","data":{"text/html":["\n"]}}],"execution_count":0},{"cell_type":"code","source":["if __name__ == \"__main__\":\n warnings.filterwarnings(\"ignore\")\n np.random.seed(40)\n # Split the data into training and test sets. (0.75, 0.25) split.\n train, test = train_test_split(data)\n\n # The predicted column is \"progression\" which is a quantitative measure of disease progression one year after baseline\n train_x = train.drop([\"progression\"], axis=1)\n test_x = test.drop([\"progression\"], axis=1)\n train_y = train[[\"progression\"]]\n test_y = test[[\"progression\"]]\n\n alpha = float(sys.argv[1]) if len(sys.argv) > 1 else 0.05\n l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else 0.05\n\n # Run ElasticNet\n lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)\n lr.fit(train_x, train_y)\n predicted_qualities = lr.predict(test_x)\n (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)\n\n # Print out ElasticNet model metrics\n print(\"Elasticnet model (alpha=%f, l1_ratio=%f):\" % (alpha, l1_ratio))\n print(\" RMSE: %s\" % rmse)\n print(\" MAE: %s\" % mae)\n print(\" R2: %s\" % r2)\n \n # Log mlflow attributes for mlflow UI\n mlflow.log_param(\"alpha\", alpha)\n mlflow.log_param(\"l1_ratio\", l1_ratio)\n mlflow.log_metric(\"rmse\", rmse)\n mlflow.log_metric(\"r2\", r2)\n mlflow.log_metric(\"mae\", mae)\n mlflow.sklearn.log_model(lr, \"model\")\n mlflow.end_run()"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"b49093d0-7fbc-4a34-9031-521683d45ee7"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"