{
"cells": [
{
"cell_type": "markdown",
"id": "6c4d4faf-ab84-4a72-a80e-535b211747cd",
"metadata": {
"tags": []
},
"source": [
"# Shapley Values for Transformed Features of MOJO Models Demo\n",
"\n",
"This example demonstrates how to interpret a Driverless AI MOJO model using the H2O Sonar library and retrieve the data and plot with transformed features importances."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "69f414e3-bc88-478b-bed5-890352b1041a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import os\n",
"import logging\n",
"\n",
"import datatable\n",
"import daimojo\n",
"import webbrowser\n",
"\n",
"from h2o_sonar import interpret\n",
"from h2o_sonar.lib.api import commons\n",
"from h2o_sonar.lib.api import explainers\n",
"from h2o_sonar.explainers import transformed_fi_shapley_explainer as explainer\n",
"from h2o_sonar.lib.api.models import ModelApi"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "bbe0ca51",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'id': 'h2o_sonar.explainers.transformed_fi_shapley_explainer.ShapleyMojoTransformedFeatureImportanceExplainer',\n",
" 'name': 'ShapleyMojoTransformedFeatureImportanceExplainer',\n",
" 'display_name': 'Shapley Values for Transformed Features of MOJO Models',\n",
" 'description': 'Shapley explanations are a technique with credible theoretical support that presents consistent global and local variable contributions. Local numeric Shapley values are calculated by tracing single rows of data through a trained tree ensemble and aggregating the contribution of each input variable as the row of data moves through the trained ensemble. For regression tasks Shapley values sum to the prediction of the (Driverless AI) MOJO model. For classification problems, Shapley values sum to the prediction of the MOJO model before applying the link function. Global Shapley values are the average of the absolute local Shapley values over every row of a data set.',\n",
" 'model_types': ['iid', 'time_series'],\n",
" 'can_explain': ['regression', 'binomial', 'multinomial'],\n",
" 'explanation_scopes': ['global_scope', 'local_scope'],\n",
" 'explanations': [{'explanation_type': 'global-feature-importance',\n",
" 'name': 'GlobalFeatImpExplanation',\n",
" 'category': None,\n",
" 'scope': 'global',\n",
" 'has_local': None,\n",
" 'formats': []},\n",
" {'explanation_type': 'local-feature-importance',\n",
" 'name': 'LocalFeatImpExplanation',\n",
" 'category': None,\n",
" 'scope': 'local',\n",
" 'has_local': None,\n",
" 'formats': []}],\n",
" 'parameters': [{'name': 'sample_size',\n",
" 'description': 'Sample size.',\n",
" 'comment': '',\n",
" 'type': 'int',\n",
" 'val': 100000,\n",
" 'predefined': [],\n",
" 'tags': [],\n",
" 'min_': 0.0,\n",
" 'max_': 0.0,\n",
" 'category': ''},\n",
" {'name': 'calculate_predictions',\n",
" 'description': 'Score dataset and include predictions in the explanation (local explanations speed-up cache).',\n",
" 'comment': '',\n",
" 'type': 'bool',\n",
" 'val': False,\n",
" 'predefined': [],\n",
" 'tags': [],\n",
" 'min_': 0.0,\n",
" 'max_': 0.0,\n",
" 'category': ''},\n",
" {'name': 'fast_approx_contrib',\n",
" 'description': 'Speed up predictions with fast contributions predictions approximation.',\n",
" 'comment': '',\n",
" 'type': 'bool',\n",
" 'val': True,\n",
" 'predefined': [],\n",
" 'tags': [],\n",
" 'min_': 0.0,\n",
" 'max_': 0.0,\n",
" 'category': ''}],\n",
" 'keywords': ['run-by-default',\n",
" 'explains-transformed_feature-importance',\n",
" 'is_fast']}"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# explainer description\n",
"interpret.describe_explainer(explainer.ShapleyMojoTransformedFeatureImportanceExplainer)"
]
},
{
"cell_type": "markdown",
"id": "90d401d2-14cd-4686-982f-3cac9e9f5eb7",
"metadata": {
"tags": []
},
"source": [
"## Interpretation"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "15201d08-873b-45c3-82ad-052266f0526c",
"metadata": {},
"outputs": [],
"source": [
"# dataset\n",
"dataset_path = \"../../data/creditcard.csv\"\n",
"target_col = \"default payment next month\"\n",
"\n",
"# model\n",
"mojo_path = \"../../data/models/creditcard-binomial.mojo\"\n",
"mojo_model = daimojo.model(mojo_path)\n",
"model = ModelApi().create_model(\n",
" model_src=mojo_model,\n",
" target_col=target_col,\n",
" used_features=list(mojo_model.feature_names),\n",
")\n",
"\n",
"# results\n",
"results_location = \"./results\"\n",
"os.makedirs(results_location, exist_ok=True)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "0ba8f0aa-2e0e-4a0a-93ab-77ce9e968fa0",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/dvorka/h/mli/git/h2o-sonar/.venv/lib/python3.8/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"h2o_sonar.explainers.transformed_fi_shapley_explainer.ShapleyMojoTransformedFeatureImportanceExplainer: progress 20.0%\n",
"h2o_sonar.explainers.transformed_fi_shapley_explainer.ShapleyMojoTransformedFeatureImportanceExplainer: progress 90.0%\n",
"h2o_sonar.explainers.transformed_fi_shapley_explainer.ShapleyMojoTransformedFeatureImportanceExplainer: progress 90.0%\n"
]
}
],
"source": [
"interpretation = interpret.run_interpretation(\n",
" dataset=dataset_path,\n",
" model=model,\n",
" target_col=target_col,\n",
" results_location=results_location,\n",
" explainers=[explainer.ShapleyMojoTransformedFeatureImportanceExplainer.explainer_id()],\n",
" log_level=logging.INFO,\n",
")"
]
},
{
"cell_type": "markdown",
"id": "ff9df4be-d4da-44db-a479-7d8d7f45c29d",
"metadata": {
"tags": []
},
"source": [
"## Explainer Result"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "25556ca5-8239-4201-8a23-1ace2b3a46d4",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# retrieve the result\n",
"result = interpretation.get_explainer_result(\n",
" explainer.ShapleyMojoTransformedFeatureImportanceExplainer.explainer_id()\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "49cb45cc-1482-4535-9663-c8d316775223",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# open interpretation HTML report in web browser\n",
"webbrowser.open(interpretation.result.get_html_report_location())"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "375b9bd6-e424-4f8e-a73f-aa7fda7f7606",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"{'id': 'h2o_sonar.explainers.transformed_fi_shapley_explainer.ShapleyMojoTransformedFeatureImportanceExplainer',\n",
" 'name': 'ShapleyMojoTransformedFeatureImportanceExplainer',\n",
" 'display_name': 'Shapley Values for Transformed Features of MOJO Models',\n",
" 'description': 'Shapley explanations are a technique with credible theoretical support that presents consistent global and local variable contributions. Local numeric Shapley values are calculated by tracing single rows of data through a trained tree ensemble and aggregating the contribution of each input variable as the row of data moves through the trained ensemble. For regression tasks Shapley values sum to the prediction of the (Driverless AI) MOJO model. For classification problems, Shapley values sum to the prediction of the MOJO model before applying the link function. Global Shapley values are the average of the absolute local Shapley values over every row of a data set.',\n",
" 'model_types': ['iid', 'time_series'],\n",
" 'can_explain': ['regression', 'binomial', 'multinomial'],\n",
" 'explanation_scopes': ['global_scope', 'local_scope'],\n",
" 'explanations': [{'explanation_type': 'global-feature-importance',\n",
" 'name': 'Shapley Values for Transformed Features of MOJO Models',\n",
" 'category': 'DAI MODEL',\n",
" 'scope': 'global',\n",
" 'has_local': 'local-feature-importance',\n",
" 'formats': ['application/vnd.h2oai.json+datatable.jay',\n",
" 'application/vnd.h2oai.json+csv',\n",
" 'application/json']},\n",
" {'explanation_type': 'local-feature-importance',\n",
" 'name': 'Shapley Values for Transformed Features of MOJO Models',\n",
" 'category': 'DAI MODEL',\n",
" 'scope': 'local',\n",
" 'has_local': None,\n",
" 'formats': ['application/vnd.h2oai.json+datatable.jay']},\n",
" {'explanation_type': 'global-html-fragment',\n",
" 'name': 'Shapley Values for Transformed Features of MOJO Models',\n",
" 'category': 'DAI MODEL',\n",
" 'scope': 'global',\n",
" 'has_local': None,\n",
" 'formats': ['text/html']}],\n",
" 'parameters': [{'name': 'sample_size',\n",
" 'description': 'Sample size.',\n",
" 'comment': '',\n",
" 'type': 'int',\n",
" 'val': 100000,\n",
" 'predefined': [],\n",
" 'tags': [],\n",
" 'min_': 0.0,\n",
" 'max_': 0.0,\n",
" 'category': ''},\n",
" {'name': 'calculate_predictions',\n",
" 'description': 'Score dataset and include predictions in the explanation (local explanations speed-up cache).',\n",
" 'comment': '',\n",
" 'type': 'bool',\n",
" 'val': False,\n",
" 'predefined': [],\n",
" 'tags': [],\n",
" 'min_': 0.0,\n",
" 'max_': 0.0,\n",
" 'category': ''},\n",
" {'name': 'fast_approx_contrib',\n",
" 'description': 'Speed up predictions with fast contributions predictions approximation.',\n",
" 'comment': '',\n",
" 'type': 'bool',\n",
" 'val': True,\n",
" 'predefined': [],\n",
" 'tags': [],\n",
" 'min_': 0.0,\n",
" 'max_': 0.0,\n",
" 'category': ''}],\n",
" 'keywords': ['run-by-default',\n",
" 'explains-transformed_feature-importance',\n",
" 'is_fast']}"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# summary\n",
"result.summary()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "046dadb3-4472-4068-9bfe-d8ba85b2489d",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"{'sample_size': 100000,\n",
" 'calculate_predictions': False,\n",
" 'fast_approx_contrib': True}"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# parameters\n",
"result.params()"
]
},
{
"cell_type": "markdown",
"id": "490d132b-b7e2-48a2-8ec4-dbd71886edf9",
"metadata": {
"tags": []
},
"source": [
"### Display Data"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "2aa6274e-79d5-49b1-b29a-2263db5cb8a8",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" | feature | importance |
\n",
" | ▪▪▪▪ | ▪▪▪▪▪▪▪▪ |
\n",
" \n",
" \n",
" 0 | 10_PAY_0 | 0.484314 |
\n",
" 1 | 11_PAY_2 | 0.177288 |
\n",
" 2 | 8_LIMIT_BAL | 0.14293 |
\n",
" 3 | 19_PAY_AMT4 | 0.125988 |
\n",
" 4 | 17_PAY_AMT2 | 0.109839 |
\n",
" 5 | 1_BILL_AMT1 | 0.0856685 |
\n",
" 6 | 12_PAY_3 | 0.0460469 |
\n",
" 7 | 18_PAY_AMT3 | 0.0405643 |
\n",
" 8 | 15_PAY_6 | 0.035129 |
\n",
" 9 | 2_BILL_AMT2 | 0.0307533 |
\n",
" 10 | 13_PAY_4 | 0.0292232 |
\n",
" 11 | 6_BILL_AMT6 | 0.0230935 |
\n",
" 12 | 14_PAY_5 | 0.0203799 |
\n",
" 13 | 16_PAY_AMT1 | 0.014774 |
\n",
" 14 | 7_EDUCATION | 0.00983341 |
\n",
" 15 | 0_AGE | 0.00831316 |
\n",
" 16 | 9_MARRIAGE | 0.00725285 |
\n",
" 17 | 21_PAY_AMT6 | 0.00588243 |
\n",
" 18 | 20_PAY_AMT5 | 0.00541516 |
\n",
" 19 | 5_BILL_AMT5 | 0.00464642 |
\n",
" 20 | 4_BILL_AMT4 | 0.00144066 |
\n",
" 21 | 3_BILL_AMT3 | 0.000818275 |
\n",
" \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result.data()"
]
},
{
"cell_type": "markdown",
"id": "df8a083b-3b88-4349-bb63-28551c24cc4f",
"metadata": {},
"source": [
"### Plot Feature Importance Data"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "5a9d8262-574e-4073-a282-567d4fd1209c",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"result.plot()"
]
},
{
"cell_type": "markdown",
"id": "a493b092-6236-419f-906c-16d52c47674f",
"metadata": {},
"source": [
"### Save Explainer Log and Data"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "7c638a2c-6b01-4228-aa0f-93fd8dd7feab",
"metadata": {},
"outputs": [],
"source": [
"# save the explainer log\n",
"log_file_path = \"./feature-importance-demo.log\"\n",
"result.log(path=log_file_path)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "f5d91240-09ff-4893-b652-b0259a8f222a",
"metadata": {},
"outputs": [],
"source": [
"!cat $log_file_path"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "da4e2b28-96d7-440e-bfea-41cb694a52d4",
"metadata": {},
"outputs": [],
"source": [
"# save the explainer data\n",
"result.zip(file_path=\"./feature-importance-demo-archive.zip\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "c0540819-f896-481a-b470-b9d53a243b0a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Archive: feature-importance-demo-archive.zip\n",
" Length Date Time Name\n",
"--------- ---------- ----- ----\n",
" 3597 2022-10-10 22:56 explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/result_descriptor.json\n",
" 110 2022-10-10 22:56 explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/global_html_fragment/text_html.meta\n",
" 390 2022-10-10 22:56 explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/global_html_fragment/text_html/explanation.html\n",
" 32992 2022-10-10 22:56 explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/global_html_fragment/text_html/fi-class-0.png\n",
" 0 2022-10-10 22:56 explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/log/explainer_run_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a.log\n",
" 1826902 2022-10-10 22:56 explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/work/shapley_formatted.zip\n",
" 1842216 2022-10-10 22:56 explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/work/shapley.bin\n",
" 4883188 2022-10-10 22:56 explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/work/shapley.csv\n",
" 185 2022-10-10 22:56 explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/global_feature_importance/application_vnd_h2oai_json_datatable_jay.meta\n",
" 143 2022-10-10 22:56 explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/global_feature_importance/application_json.meta\n",
" 163 2022-10-10 22:56 explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/global_feature_importance/application_vnd_h2oai_json_csv.meta\n",
" 1499 2022-10-10 22:56 explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/global_feature_importance/application_vnd_h2oai_json_datatable_jay/explanation.json\n",
" 944 2022-10-10 22:56 explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/global_feature_importance/application_vnd_h2oai_json_datatable_jay/feature_importance_class_0.jay\n",
" 808 2022-10-10 22:56 explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/global_feature_importance/application_json/explanation.json\n",
" 1672 2022-10-10 22:56 explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/global_feature_importance/application_json/feature_importance_class_0.json\n",
" 807 2022-10-10 22:56 explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/global_feature_importance/application_vnd_h2oai_json_csv/explanation.json\n",
" 803 2022-10-10 22:56 explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/global_feature_importance/application_vnd_h2oai_json_csv/feature_importance_class_0.csv\n",
" 2 2022-10-10 22:56 explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/model_problems/problems_and_actions.json\n",
" 201 2022-10-10 22:56 explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/local_feature_importance/application_vnd_h2oai_json_datatable_jay.meta\n",
" 855 2022-10-10 22:56 explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/local_feature_importance/application_vnd_h2oai_json_datatable_jay/explanation.json\n",
" 1842216 2022-10-10 22:56 explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/local_feature_importance/application_vnd_h2oai_json_datatable_jay/feature_importance_class_0.jay\n",
"--------- -------\n",
" 10439693 21 files\n"
]
}
],
"source": [
"!unzip -l feature-importance-demo-archive.zip"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "72ae2b2f-5817-4ccc-a7d0-3cbc70d3eaa5",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "h2o-sonar",
"language": "python",
"name": "h2o-sonar"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}