{ "cells": [ { "cell_type": "markdown", "id": "6c4d4faf-ab84-4a72-a80e-535b211747cd", "metadata": { "tags": [] }, "source": [ "# H2O Sonar Demo of Driverless AI MOJO Models" ] }, { "cell_type": "code", "execution_count": 15, "id": "69f414e3-bc88-478b-bed5-890352b1041a", "metadata": {}, "outputs": [], "source": [ "import os\n", "import logging\n", "\n", "import datatable\n", "import daimojo\n", "import webbrowser\n", "\n", "from h2o_sonar import interpret\n", "from h2o_sonar.lib.api import commons\n", "from h2o_sonar.lib.api import explainers\n", "from h2o_sonar.explainers import fi_naive_shapley_explainer as explainer\n", "from h2o_sonar.lib.api.models import ModelApi" ] }, { "cell_type": "code", "execution_count": 16, "id": "bbe0ca51", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'id': 'h2o_sonar.explainers.fi_naive_shapley_explainer.NaiveShapleyMojoFeatureImportanceExplainer',\n", " 'name': 'NaiveShapleyMojoFeatureImportanceExplainer',\n", " 'display_name': 'Shapley Values for Original Features of MOJO Models (Naive Method)',\n", " 'tagline': 'NaiveShapleyMojoFeatureImportanceExplainer.',\n", " 'description': 'Shapley values for original features of (Driverless AI) MOJO models are approximated from the accompanying Shapley values for transformed features with the Naive Shapley method. This method makes the assumption that input features to a transformer are independent. For example, if the transformed feature, feature1_feature2, has a Shapley value of 0.5, then the Shapley value of the original features feature1 and feature2 will be 0.25 each.',\n", " 'brief_description': 'NaiveShapleyMojoFeatureImportanceExplainer.',\n", " 'model_types': ['iid'],\n", " 'can_explain': ['regression', 'binomial', 'multinomial'],\n", " 'explanation_scopes': ['global_scope', 'local_scope'],\n", " 'explanations': [{'explanation_type': 'global-feature-importance',\n", " 'name': 'GlobalFeatImpExplanation',\n", " 'category': '',\n", " 'scope': 'global',\n", " 'has_local': '',\n", " 'formats': []},\n", " {'explanation_type': 'local-feature-importance',\n", " 'name': 'LocalFeatImpExplanation',\n", " 'category': '',\n", " 'scope': 'local',\n", " 'has_local': '',\n", " 'formats': []}],\n", " 'keywords': ['run-by-default',\n", " 'explains-original-feature-importance',\n", " 'is_fast',\n", " 'h2o-sonar'],\n", " 'parameters': [{'name': 'sample_size',\n", " 'description': 'Sample size.',\n", " 'comment': '',\n", " 'type': 'int',\n", " 'val': 100000,\n", " 'predefined': [],\n", " 'tags': [],\n", " 'min_': 0.0,\n", " 'max_': 0.0,\n", " 'category': ''},\n", " {'name': 'fast_approx_contribs',\n", " 'description': 'Speed up predictions with fast contributions predictions approximation.',\n", " 'comment': '',\n", " 'type': 'bool',\n", " 'val': True,\n", " 'predefined': [],\n", " 'tags': [],\n", " 'min_': 0.0,\n", " 'max_': 0.0,\n", " 'category': ''},\n", " {'name': 'leakage_warning_threshold',\n", " 'description': 'The threshold above which to report a potentially detected feature importance leak problem.',\n", " 'comment': '',\n", " 'type': 'float',\n", " 'val': 0.95,\n", " 'predefined': [],\n", " 'tags': [],\n", " 'min_': 0.0,\n", " 'max_': 0.0,\n", " 'category': ''}],\n", " 'metrics_meta': []}" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# explainer description\n", "interpret.describe_explainer(explainer.NaiveShapleyMojoFeatureImportanceExplainer)" ] }, { "cell_type": "markdown", "id": "90d401d2-14cd-4686-982f-3cac9e9f5eb7", "metadata": { "tags": [] }, "source": [ "## Interpretation" ] }, { "cell_type": "code", "execution_count": 17, "id": "15201d08-873b-45c3-82ad-052266f0526c", "metadata": {}, "outputs": [], "source": [ "# dataset\n", "dataset_path = \"../../data/predictive/creditcard.csv\"\n", "target_col = \"default payment next month\"\n", "\n", "# model\n", "mojo_path = \"../../data/predictive/models/creditcard-binomial.mojo\"\n", "mojo_model = daimojo.model(mojo_path)\n", "model = ModelApi().create_model(\n", " model_src=mojo_model,\n", " target_col=target_col,\n", " used_features=list(mojo_model.feature_names),\n", ")\n", "\n", "# results\n", "results_location = \"./results\"\n", "os.makedirs(results_location, exist_ok=True)" ] }, { "cell_type": "code", "execution_count": 18, "id": "0ba8f0aa-2e0e-4a0a-93ab-77ce9e968fa0", "metadata": { "tags": [] }, "outputs": [], "source": [ "%%capture\n", "interpretation = interpret.run_interpretation(\n", " dataset=dataset_path,\n", " model=model,\n", " target_col=target_col,\n", " results_location=results_location,\n", " explainers=[explainer.NaiveShapleyMojoFeatureImportanceExplainer.explainer_id()],\n", " log_level=logging.INFO,\n", ")" ] }, { "cell_type": "markdown", "id": "ff9df4be-d4da-44db-a479-7d8d7f45c29d", "metadata": { "tags": [] }, "source": [ "## Explainer Result" ] }, { "cell_type": "code", "execution_count": 19, "id": "25556ca5-8239-4201-8a23-1ace2b3a46d4", "metadata": { "tags": [] }, "outputs": [], "source": [ "# retrieve the result\n", "result = interpretation.get_explainer_result(\n", " explainer.NaiveShapleyMojoFeatureImportanceExplainer.explainer_id()\n", ")" ] }, { "cell_type": "code", "execution_count": 20, "id": "fb71aafe-543d-49dd-aa20-bae7753af62e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# open interpretation HTML report in web browser\n", "webbrowser.open(interpretation.result.get_html_report_location())" ] }, { "cell_type": "code", "execution_count": 21, "id": "122fa09a-f552-4945-8b17-6b2c91fc64eb", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "{'id': 'h2o_sonar.explainers.fi_naive_shapley_explainer.NaiveShapleyMojoFeatureImportanceExplainer',\n", " 'name': 'NaiveShapleyMojoFeatureImportanceExplainer',\n", " 'display_name': 'Shapley Values for Original Features of MOJO Models (Naive Method)',\n", " 'tagline': 'NaiveShapleyMojoFeatureImportanceExplainer.',\n", " 'description': 'Shapley values for original features of (Driverless AI) MOJO models are approximated from the accompanying Shapley values for transformed features with the Naive Shapley method. This method makes the assumption that input features to a transformer are independent. For example, if the transformed feature, feature1_feature2, has a Shapley value of 0.5, then the Shapley value of the original features feature1 and feature2 will be 0.25 each.',\n", " 'brief_description': 'NaiveShapleyMojoFeatureImportanceExplainer.',\n", " 'model_types': ['iid'],\n", " 'can_explain': ['regression', 'binomial', 'multinomial'],\n", " 'explanation_scopes': ['global_scope', 'local_scope'],\n", " 'explanations': [{'explanation_type': 'global-feature-importance',\n", " 'name': 'Shapley Values for Original Features of MOJO Models (Naive Method)',\n", " 'category': 'DAI MODEL',\n", " 'scope': 'global',\n", " 'has_local': 'local-feature-importance',\n", " 'formats': ['application/vnd.h2oai.json+datatable.jay',\n", " 'application/vnd.h2oai.json+csv',\n", " 'application/json']},\n", " {'explanation_type': 'local-feature-importance',\n", " 'name': 'Shapley Values for Original Features of MOJO Models (Naive Method)',\n", " 'category': 'DAI MODEL',\n", " 'scope': 'local',\n", " 'has_local': None,\n", " 'formats': ['application/vnd.h2oai.json+datatable.jay']},\n", " {'explanation_type': 'global-html-fragment',\n", " 'name': 'Shapley Values for Original Features of MOJO Models (Naive Method)',\n", " 'category': 'DAI MODEL',\n", " 'scope': 'global',\n", " 'has_local': None,\n", " 'formats': ['text/html']}],\n", " 'keywords': ['run-by-default',\n", " 'explains-original-feature-importance',\n", " 'is_fast',\n", " 'h2o-sonar'],\n", " 'parameters': [{'name': 'sample_size',\n", " 'description': 'Sample size.',\n", " 'comment': '',\n", " 'type': 'int',\n", " 'val': 100000,\n", " 'predefined': [],\n", " 'tags': [],\n", " 'min_': 0.0,\n", " 'max_': 0.0,\n", " 'category': ''},\n", " {'name': 'fast_approx_contribs',\n", " 'description': 'Speed up predictions with fast contributions predictions approximation.',\n", " 'comment': '',\n", " 'type': 'bool',\n", " 'val': True,\n", " 'predefined': [],\n", " 'tags': [],\n", " 'min_': 0.0,\n", " 'max_': 0.0,\n", " 'category': ''},\n", " {'name': 'leakage_warning_threshold',\n", " 'description': 'The threshold above which to report a potentially detected feature importance leak problem.',\n", " 'comment': '',\n", " 'type': 'float',\n", " 'val': 0.95,\n", " 'predefined': [],\n", " 'tags': [],\n", " 'min_': 0.0,\n", " 'max_': 0.0,\n", " 'category': ''}],\n", " 'metrics_meta': []}" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# summary\n", "result.summary()" ] }, { "cell_type": "code", "execution_count": 22, "id": "4a40f000-8120-4898-82d7-ca0ad9c3425b", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "{'sample_size': 100000,\n", " 'fast_approx_contribs': True,\n", " 'leakage_warning_threshold': 0.95}" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Parameters\n", "result.params()" ] }, { "cell_type": "markdown", "id": "490d132b-b7e2-48a2-8ec4-dbd71886edf9", "metadata": { "tags": [] }, "source": [ "### Display Data" ] }, { "cell_type": "code", "execution_count": 23, "id": "2aa6274e-79d5-49b1-b29a-2263db5cb8a8", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
| feature | importance | |
|---|---|---|
| ▪▪▪▪ | ▪▪▪▪▪▪▪▪ | |
| 0 | PAY_0 | 0.592187 |
| 1 | PAY_2 | 0.224423 |
| 2 | LIMIT_BAL | 0.159352 |
| 3 | PAY_AMT4 | 0.14868 |
| 4 | PAY_AMT2 | 0.125437 |
| 5 | BILL_AMT1 | 0.101179 |
| 6 | PAY_3 | 0.0576715 |
| 7 | PAY_AMT3 | 0.0495318 |
| 8 | PAY_6 | 0.0453093 |
| 9 | PAY_4 | 0.0391064 |
| 10 | BILL_AMT2 | 0.0371473 |
| 11 | BILL_AMT6 | 0.0298867 |
| 12 | PAY_5 | 0.0270603 |
| 13 | PAY_AMT1 | 0.01873 |
| 14 | EDUCATION | 0.0131732 |
| 15 | AGE | 0.0110948 |
| 16 | MARRIAGE | 0.00884818 |
| 17 | PAY_AMT6 | 0.00759522 |
| 18 | PAY_AMT5 | 0.00719935 |
| 19 | BILL_AMT5 | 0.00589392 |
| 20 | BILL_AMT4 | 0.00189465 |
| 21 | BILL_AMT3 | 0.00108067 |