{ "cells": [ { "cell_type": "markdown", "id": "01a9296c-7130-43ab-9355-b632ba48eb3e", "metadata": {}, "source": [ "# Decision Tree Surrogate Explainer Demo\n", "\n", "This example demonstrates how to interpret a **Scikit-learn** model using\n", "the H2O Sonar library and plot **decision tree**." ] }, { "cell_type": "code", "execution_count": 1, "id": "f6fd7532-0023-42f7-95fa-cefe588237b1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import logging\n", "\n", "import pandas\n", "import webbrowser\n", "\n", "from h2o_sonar import interpret\n", "from h2o_sonar.lib.api import commons, explainers\n", "from h2o_sonar.explainers.dt_surrogate_explainer import DecisionTreeSurrogateExplainer\n", "from h2o_sonar.lib.api.models import ModelApi\n", "\n", "from sklearn.ensemble import GradientBoostingClassifier" ] }, { "cell_type": "code", "execution_count": 2, "id": "d3bbfb25-f7e1-47e1-a2d9-1ceeedfa7d73", "metadata": {}, "outputs": [], "source": [ "results_location = \"../../results\"\n", "\n", "# dataset\n", "dataset_path = \"../../data/predictive/creditcard.csv\"\n", "target_col = \"default payment next month\"\n", "df = pandas.read_csv(dataset_path)\n", "(X, y) = df.drop(target_col, axis=1), df[target_col]" ] }, { "cell_type": "code", "execution_count": 3, "id": "82366fdc", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'id': 'h2o_sonar.explainers.dt_surrogate_explainer.DecisionTreeSurrogateExplainer',\n", " 'name': 'DecisionTreeSurrogateExplainer',\n", " 'display_name': 'Surrogate Decision Tree',\n", " 'tagline': 'DecisionTreeSurrogateExplainer.',\n", " 'description': 'The surrogate decision tree is an approximate overall flow chart of the model, created by training a simple decision tree on the original inputs and the predictions of the model.',\n", " 'brief_description': 'DecisionTreeSurrogateExplainer.',\n", " 'model_types': ['iid', 'time_series'],\n", " 'can_explain': ['regression', 'binomial', 'multinomial'],\n", " 'explanation_scopes': ['global_scope', 'local_scope'],\n", " 'explanations': [{'explanation_type': 'global-decision-tree',\n", " 'name': 'GlobalDtExplanation',\n", " 'category': '',\n", " 'scope': 'global',\n", " 'has_local': '',\n", " 'formats': []},\n", " {'explanation_type': 'local-decision-tree',\n", " 'name': 'LocalDtExplanation',\n", " 'category': '',\n", " 'scope': 'local',\n", " 'has_local': '',\n", " 'formats': []}],\n", " 'keywords': ['run-by-default',\n", " 'requires-h2o3',\n", " 'surrogate',\n", " 'explains-approximate-behavior',\n", " 'h2o-sonar'],\n", " 'parameters': [{'name': 'debug_residuals',\n", " 'description': 'Debug model residuals.',\n", " 'comment': '',\n", " 'type': 'bool',\n", " 'val': False,\n", " 'predefined': [],\n", " 'tags': [],\n", " 'min_': 0.0,\n", " 'max_': 0.0,\n", " 'category': ''},\n", " {'name': 'debug_residuals_class',\n", " 'description': 'Class for debugging classification model logloss residuals, empty string for debugging regression model residuals.',\n", " 'comment': '',\n", " 'type': 'str',\n", " 'val': '',\n", " 'predefined': [],\n", " 'tags': [],\n", " 'min_': 0.0,\n", " 'max_': 0.0,\n", " 'category': ''},\n", " {'name': 'dt_tree_depth',\n", " 'description': 'Decision tree depth.',\n", " 'comment': '',\n", " 'type': 'int',\n", " 'val': 3,\n", " 'predefined': [],\n", " 'tags': [],\n", " 'min_': 0.0,\n", " 'max_': 0.0,\n", " 'category': ''},\n", " {'name': 'nfolds',\n", " 'description': 'Number of CV folds.',\n", " 'comment': '',\n", " 'type': 'int',\n", " 'val': 3,\n", " 'predefined': [],\n", " 'tags': [],\n", " 'min_': 0.0,\n", " 'max_': 0.0,\n", " 'category': ''},\n", " {'name': 'qbin_cols',\n", " 'description': 'Quantile binning columns.',\n", " 'comment': '',\n", " 'type': 'list',\n", " 'val': None,\n", " 'predefined': [],\n", " 'tags': ['SOURCE_DATASET_COLUMN_NAMES'],\n", " 'min_': 0.0,\n", " 'max_': 0.0,\n", " 'category': ''},\n", " {'name': 'qbin_count',\n", " 'description': 'Quantile bins count.',\n", " 'comment': '',\n", " 'type': 'int',\n", " 'val': 0,\n", " 'predefined': [],\n", " 'tags': [],\n", " 'min_': 0.0,\n", " 'max_': 0.0,\n", " 'category': ''},\n", " {'name': 'categorical_encoding',\n", " 'description': 'Categorical encoding.',\n", " 'comment': 'Specify one of the following encoding schemes for handling of categorical features:\\n\\n_**AUTO**_: 1 column per categorical feature.\\n\\n_**Enum Limited**_: Automatically reduce categorical levels to the most prevalent ones during training and only keep the top 10 most frequent levels.\\n\\n_**One Hot Encoding**_: N+1 new columns for categorical features with N levels.\\n\\n_**Label Encoder**_: Convert every enum into the integer of its index (for example, level 0 -> 0, level 1 -> 1, etc.).\\n\\n_**Sort by Response**_: Reorders the levels by the mean response (for example, the level with lowest response -> 0, the level with second-lowest response -> 1, etc.).',\n", " 'type': 'str',\n", " 'val': 'onehotexplicit',\n", " 'predefined': ['AUTO',\n", " 'One Hot Encoding',\n", " 'Enum Limited',\n", " 'Sort by Response',\n", " 'Label Encoder'],\n", " 'tags': [],\n", " 'min_': 0.0,\n", " 'max_': 0.0,\n", " 'category': ''}],\n", " 'metrics_meta': []}" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# parameters\n", "interpret.describe_explainer(DecisionTreeSurrogateExplainer)" ] }, { "cell_type": "markdown", "id": "4f682e92-3a7d-451d-a27f-f3b1c4be043b", "metadata": {}, "source": [ "## Interpret" ] }, { "cell_type": "code", "execution_count": 4, "id": "b96b6e28-868a-467d-b872-0ffa3b7c9766", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/user/h/mli/git/h2o-sonar-FLOSS/.venv/lib/python3.11/site-packages/ragas/metrics/__init__.py:1: LangChainDeprecationWarning: As of langchain-core 0.3.0, LangChain uses pydantic v2 internally. The langchain_core.pydantic_v1 module was a compatibility shim for pydantic v1, and should no longer be used. Please update the code to import from Pydantic directly.\n", "\n", "For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`\n", "with: `from pydantic import BaseModel`\n", "or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. \tfrom pydantic.v1 import BaseModel\n", "\n", " from ragas.metrics._answer_correctness import AnswerCorrectness, answer_correctness\n", "/home/user/h/mli/git/h2o-sonar-FLOSS/.venv/lib/python3.11/site-packages/ragas/metrics/__init__.py:4: LangChainDeprecationWarning: As of langchain-core 0.3.0, LangChain uses pydantic v2 internally. The langchain.pydantic_v1 module was a compatibility shim for pydantic v1, and should no longer be used. Please update the code to import from Pydantic directly.\n", "\n", "For example, replace imports like: `from langchain.pydantic_v1 import BaseModel`\n", "with: `from pydantic import BaseModel`\n", "or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. \tfrom pydantic.v1 import BaseModel\n", "\n", " from ragas.metrics._context_entities_recall import (\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Checking whether there is an H2O instance running at http://localhost:54324..... not found.\n", "Attempting to start a local H2O server...\n", " Java Version: openjdk version \"10\" 2018-03-20; OpenJDK Runtime Environment 18.3 (build 10+44); OpenJDK 64-Bit Server VM 18.3 (build 10+44, mixed mode)\n", " Starting server from /home/user/h/mli/git/h2o-sonar-FLOSS/.venv/lib/python3.11/site-packages/h2o/backend/bin/h2o.jar\n", " Ice root: /tmp/tmpkayckszk\n", " JVM stdout: /tmp/tmpkayckszk/h2o_user_started_from_python.out\n", " JVM stderr: /tmp/tmpkayckszk/h2o_user_started_from_python.err\n", " Server is running at http://127.0.0.1:54324\n", " successful.o H2O server at http://127.0.0.1:54324 ...\n" ] }, { "data": { "text/html": [ "\n", " \n", "
\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
H2O_cluster_uptime:01 secs
H2O_cluster_timezone:Europe/Prague
H2O_data_parsing_timezone:UTC
H2O_cluster_version:3.46.0.9
H2O_cluster_version_age:2 months and 4 days
H2O_cluster_name:H2O_from_python_user_lbdhuu
H2O_cluster_total_nodes:1
H2O_cluster_free_memory:4 Gb
H2O_cluster_total_cores:16
H2O_cluster_allowed_cores:16
H2O_cluster_status:locked, healthy
H2O_connection_url:http://127.0.0.1:54324
H2O_connection_proxy:{\"http\": null, \"https\": null}
H2O_internal_security:False
Python_version:3.11.11 final
\n", "
\n" ], "text/plain": [ "-------------------------- -----------------------------\n", "H2O_cluster_uptime: 01 secs\n", "H2O_cluster_timezone: Europe/Prague\n", "H2O_data_parsing_timezone: UTC\n", "H2O_cluster_version: 3.46.0.9\n", "H2O_cluster_version_age: 2 months and 4 days\n", "H2O_cluster_name: H2O_from_python_user_lbdhuu\n", "H2O_cluster_total_nodes: 1\n", "H2O_cluster_free_memory: 4 Gb\n", "H2O_cluster_total_cores: 16\n", "H2O_cluster_allowed_cores: 16\n", "H2O_cluster_status: locked, healthy\n", "H2O_connection_url: http://127.0.0.1:54324\n", "H2O_connection_proxy: {\"http\": null, \"https\": null}\n", "H2O_internal_security: False\n", "Python_version: 3.11.11 final\n", "-------------------------- -----------------------------" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Connecting to H2O server at http://localhost:54324 ... successful.\n" ] }, { "data": { "text/html": [ "\n", " \n", "
\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
H2O_cluster_uptime:01 secs
H2O_cluster_timezone:Europe/Prague
H2O_data_parsing_timezone:UTC
H2O_cluster_version:3.46.0.9
H2O_cluster_version_age:2 months and 4 days
H2O_cluster_name:H2O_from_python_user_lbdhuu
H2O_cluster_total_nodes:1
H2O_cluster_free_memory:4 Gb
H2O_cluster_total_cores:16
H2O_cluster_allowed_cores:16
H2O_cluster_status:locked, healthy
H2O_connection_url:http://localhost:54324
H2O_connection_proxy:{\"http\": null, \"https\": null}
H2O_internal_security:False
Python_version:3.11.11 final
\n", "
\n" ], "text/plain": [ "-------------------------- -----------------------------\n", "H2O_cluster_uptime: 01 secs\n", "H2O_cluster_timezone: Europe/Prague\n", "H2O_data_parsing_timezone: UTC\n", "H2O_cluster_version: 3.46.0.9\n", "H2O_cluster_version_age: 2 months and 4 days\n", "H2O_cluster_name: H2O_from_python_user_lbdhuu\n", "H2O_cluster_total_nodes: 1\n", "H2O_cluster_free_memory: 4 Gb\n", "H2O_cluster_total_cores: 16\n", "H2O_cluster_allowed_cores: 16\n", "H2O_cluster_status: locked, healthy\n", "H2O_connection_url: http://localhost:54324\n", "H2O_connection_proxy: {\"http\": null, \"https\": null}\n", "H2O_internal_security: False\n", "Python_version: 3.11.11 final\n", "-------------------------- -----------------------------" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "X does not have valid feature names, but GradientBoostingClassifier was fitted with feature names\n", "2026-01-29 16:02:04,986 - h2o_sonar.explainers.dt_surrogate_explainer.DecisionTreeSurrogateExplainerLogger - INFO - Surrogate decision tree 848167ad-8173-475c-9268-a7e70047e751/73b8cca1-ad75-4754-a3a7-0c6654031656: connecting to H2O-3 server: localhost:54324\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Connecting to H2O server at http://localhost:54324 ... successful.\n" ] }, { "data": { "text/html": [ "\n", " \n", "
\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
H2O_cluster_uptime:01 secs
H2O_cluster_timezone:Europe/Prague
H2O_data_parsing_timezone:UTC
H2O_cluster_version:3.46.0.9
H2O_cluster_version_age:2 months and 4 days
H2O_cluster_name:H2O_from_python_user_lbdhuu
H2O_cluster_total_nodes:1
H2O_cluster_free_memory:4 Gb
H2O_cluster_total_cores:16
H2O_cluster_allowed_cores:16
H2O_cluster_status:locked, healthy
H2O_connection_url:http://localhost:54324
H2O_connection_proxy:{\"http\": null, \"https\": null}
H2O_internal_security:False
Python_version:3.11.11 final
\n", "
\n" ], "text/plain": [ "-------------------------- -----------------------------\n", "H2O_cluster_uptime: 01 secs\n", "H2O_cluster_timezone: Europe/Prague\n", "H2O_data_parsing_timezone: UTC\n", "H2O_cluster_version: 3.46.0.9\n", "H2O_cluster_version_age: 2 months and 4 days\n", "H2O_cluster_name: H2O_from_python_user_lbdhuu\n", "H2O_cluster_total_nodes: 1\n", "H2O_cluster_free_memory: 4 Gb\n", "H2O_cluster_total_cores: 16\n", "H2O_cluster_allowed_cores: 16\n", "H2O_cluster_status: locked, healthy\n", "H2O_connection_url: http://localhost:54324\n", "H2O_connection_proxy: {\"http\": null, \"https\": null}\n", "H2O_internal_security: False\n", "Python_version: 3.11.11 final\n", "-------------------------- -----------------------------" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%\n", "Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%\n", "drf Model Build progress: |" ] }, { "name": "stderr", "output_type": "stream", "text": [ "We have detected that your response column has only 2 unique values (0/1). If you wish to train a binary model instead of a regression model, convert your target column to categorical before training.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "██████████████████████████████████████████████████████| (done) 100%\n", "Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Converting H2O frame to pandas dataframe using single-thread. For faster conversion using multi-thread, install polars and pyarrow and use it as pandas_df = h2o_df.as_data_frame(use_multi_thread=True)\n", "\n", "2026-01-29 16:02:08,111 - h2o_sonar.explainers.dt_surrogate_explainer.DecisionTreeSurrogateExplainerLogger - INFO - Surrogate decision tree 848167ad-8173-475c-9268-a7e70047e751/73b8cca1-ad75-4754-a3a7-0c6654031656: DONE calculation\n" ] } ], "source": [ "# scikit-learn model\n", "gradient_booster = GradientBoostingClassifier(learning_rate=0.1)\n", "gradient_booster.fit(X, y)\n", "\n", "# explainable model\n", "model = ModelApi().create_model(target_col=target_col, model_src=gradient_booster, used_features=X.columns.to_list())\n", "\n", "interpretation = interpret.run_interpretation(\n", " dataset=df,\n", " model=model,\n", " target_col=target_col,\n", " results_location=results_location,\n", " log_level=logging.INFO,\n", " explainers=[\n", " commons.ExplainerToRun(\n", " explainer_id=DecisionTreeSurrogateExplainer.explainer_id(),\n", " params=\"\",\n", " )\n", " ]\n", ")" ] }, { "cell_type": "markdown", "id": "8e4598d6-84ce-4b7c-8cb3-b1023cb2a5b9", "metadata": {}, "source": [ "## Interact with the Explainer Result" ] }, { "cell_type": "code", "execution_count": 5, "id": "10a879bf-5fde-45c2-a3f3-ed07011f1ae5", "metadata": {}, "outputs": [], "source": [ "# retrieve the result\n", "result = interpretation.get_explainer_result(DecisionTreeSurrogateExplainer.explainer_id())\n", "\n", "# result.data() method is not supported in this explainer" ] }, { "cell_type": "code", "execution_count": 6, "id": "0407e6c0-8388-4ff6-91d4-698594c7b6cf", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# open interpretation HTML report in web browser\n", "webbrowser.open(interpretation.result.get_html_report_location())" ] }, { "cell_type": "code", "execution_count": 7, "id": "7b3708c9-dd13-41c3-9490-dbc2e20d857d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'id': 'h2o_sonar.explainers.dt_surrogate_explainer.DecisionTreeSurrogateExplainer',\n", " 'name': 'DecisionTreeSurrogateExplainer',\n", " 'display_name': 'Surrogate Decision Tree',\n", " 'tagline': 'DecisionTreeSurrogateExplainer.',\n", " 'description': 'The surrogate decision tree is an approximate overall flow chart of the model, created by training a simple decision tree on the original inputs and the predictions of the model.',\n", " 'brief_description': 'DecisionTreeSurrogateExplainer.',\n", " 'model_types': ['iid', 'time_series'],\n", " 'can_explain': ['regression', 'binomial', 'multinomial'],\n", " 'explanation_scopes': ['global_scope', 'local_scope'],\n", " 'explanations': [{'explanation_type': 'global-decision-tree',\n", " 'name': 'Decision Tree',\n", " 'category': 'SURROGATE MODELS',\n", " 'scope': 'global',\n", " 'has_local': 'local-decision-tree',\n", " 'formats': ['application/json']},\n", " {'explanation_type': 'local-decision-tree',\n", " 'name': 'Local DT',\n", " 'category': 'SURROGATE MODELS',\n", " 'scope': 'local',\n", " 'has_local': None,\n", " 'formats': ['application/json']},\n", " {'explanation_type': 'global-html-fragment',\n", " 'name': 'Surrogate Decision Tree',\n", " 'category': 'SURROGATE MODELS',\n", " 'scope': 'global',\n", " 'has_local': None,\n", " 'formats': ['text/html']},\n", " {'explanation_type': 'global-custom-archive',\n", " 'name': 'Decision tree surrogate rules ZIP archive',\n", " 'category': 'SURROGATE MODELS',\n", " 'scope': 'global',\n", " 'has_local': None,\n", " 'formats': ['application/zip']}],\n", " 'keywords': ['run-by-default',\n", " 'requires-h2o3',\n", " 'surrogate',\n", " 'explains-approximate-behavior',\n", " 'h2o-sonar'],\n", " 'parameters': [{'name': 'debug_residuals',\n", " 'description': 'Debug model residuals.',\n", " 'comment': '',\n", " 'type': 'bool',\n", " 'val': False,\n", " 'predefined': [],\n", " 'tags': [],\n", " 'min_': 0.0,\n", " 'max_': 0.0,\n", " 'category': ''},\n", " {'name': 'debug_residuals_class',\n", " 'description': 'Class for debugging classification model logloss residuals, empty string for debugging regression model residuals.',\n", " 'comment': '',\n", " 'type': 'str',\n", " 'val': '',\n", " 'predefined': [],\n", " 'tags': [],\n", " 'min_': 0.0,\n", " 'max_': 0.0,\n", " 'category': ''},\n", " {'name': 'dt_tree_depth',\n", " 'description': 'Decision tree depth.',\n", " 'comment': '',\n", " 'type': 'int',\n", " 'val': 3,\n", " 'predefined': [],\n", " 'tags': [],\n", " 'min_': 0.0,\n", " 'max_': 0.0,\n", " 'category': ''},\n", " {'name': 'nfolds',\n", " 'description': 'Number of CV folds.',\n", " 'comment': '',\n", " 'type': 'int',\n", " 'val': 3,\n", " 'predefined': [],\n", " 'tags': [],\n", " 'min_': 0.0,\n", " 'max_': 0.0,\n", " 'category': ''},\n", " {'name': 'qbin_cols',\n", " 'description': 'Quantile binning columns.',\n", " 'comment': '',\n", " 'type': 'list',\n", " 'val': None,\n", " 'predefined': [],\n", " 'tags': ['SOURCE_DATASET_COLUMN_NAMES'],\n", " 'min_': 0.0,\n", " 'max_': 0.0,\n", " 'category': ''},\n", " {'name': 'qbin_count',\n", " 'description': 'Quantile bins count.',\n", " 'comment': '',\n", " 'type': 'int',\n", " 'val': 0,\n", " 'predefined': [],\n", " 'tags': [],\n", " 'min_': 0.0,\n", " 'max_': 0.0,\n", " 'category': ''},\n", " {'name': 'categorical_encoding',\n", " 'description': 'Categorical encoding.',\n", " 'comment': 'Specify one of the following encoding schemes for handling of categorical features:\\n\\n_**AUTO**_: 1 column per categorical feature.\\n\\n_**Enum Limited**_: Automatically reduce categorical levels to the most prevalent ones during training and only keep the top 10 most frequent levels.\\n\\n_**One Hot Encoding**_: N+1 new columns for categorical features with N levels.\\n\\n_**Label Encoder**_: Convert every enum into the integer of its index (for example, level 0 -> 0, level 1 -> 1, etc.).\\n\\n_**Sort by Response**_: Reorders the levels by the mean response (for example, the level with lowest response -> 0, the level with second-lowest response -> 1, etc.).',\n", " 'type': 'str',\n", " 'val': 'onehotexplicit',\n", " 'predefined': ['AUTO',\n", " 'One Hot Encoding',\n", " 'Enum Limited',\n", " 'Sort by Response',\n", " 'Label Encoder'],\n", " 'tags': [],\n", " 'min_': 0.0,\n", " 'max_': 0.0,\n", " 'category': ''}],\n", " 'metrics_meta': []}" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# summary\n", "result.summary()" ] }, { "cell_type": "code", "execution_count": 8, "id": "94ffc8d8-203d-4b28-91d4-cb2a0a1b7384", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'debug_residuals': False,\n", " 'debug_residuals_class': '',\n", " 'dt_tree_depth': 3,\n", " 'nfolds': 3,\n", " 'qbin_cols': None,\n", " 'qbin_count': 0,\n", " 'categorical_encoding': 'onehotexplicit'}" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# parameters\n", "result.params()" ] }, { "cell_type": "markdown", "id": "9383ec7d-e274-47b6-a9d0-2ca22cb24a1e", "metadata": {}, "source": [ "### Plot the Decision Tree" ] }, { "cell_type": "code", "execution_count": 9, "id": "fea92a97-f7d6-4965-b247-57804ece5603", "metadata": { "scrolled": true, "tags": [] }, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "%3\n", "\n", "\n", "\n", "0\n", "\n", "PAY_0 <= 1.500000\n", "\n", "\n", "\n", "0.0\n", "\n", "PAY_5 <= 1.000000\n", "\n", "\n", "\n", "0--0.0\n", "\n", """\n", "\n", "\n", "\n", "0.1\n", "\n", "PAY_3 <= -0.500000\n", "\n", "\n", "\n", "0--0.1\n", "\n", """\n", "\n", "\n", "\n", "0.0.0\n", "\n", "BILL_AMT1 <= 497566.500000\n", "\n", "\n", "\n", "0.0--0.0.0\n", "\n", """\n", "\n", "\n", "\n", "0.0.1\n", "\n", "PAY_2 <= 1.000000\n", "\n", "\n", "\n", "0.0--0.0.1\n", "\n", """\n", "\n", "\n", "\n", "0.1.0\n", "\n", "LIMIT_BAL <= 175605.500000\n", "\n", "\n", "\n", "0.1--0.1.0\n", "\n", """\n", "\n", "\n", "\n", "0.1.1\n", "\n", "PAY_AMT1 <= 15625.500000\n", "\n", "\n", "\n", "0.1--0.1.1\n", "\n", """\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result.plot()\n", "\n", "# show plot in a separate view\n", "# result.plot().render(view=True)" ] }, { "cell_type": "markdown", "id": "f0d0a9c7-36fa-4fc6-a7d8-597c06f85af0", "metadata": {}, "source": [ "### Save the explainer log and data" ] }, { "cell_type": "code", "execution_count": 10, "id": "dbbc188f-1900-43a2-919e-983dcb17c897", "metadata": {}, "outputs": [], "source": [ "# save the explainer log\n", "result.log(path=\"./dt-surrogate-demo.log\")" ] }, { "cell_type": "code", "execution_count": 11, "id": "376a7c86-825f-4f04-989c-031c58f04496", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2026-01-29 16:02:04,974 INFO Surrogate decision tree 848167ad-8173-475c-9268-a7e70047e751/73b8cca1-ad75-4754-a3a7-0c6654031656: BEGIN calculation\n", "2026-01-29 16:02:04,974 INFO Surrogate decision tree 848167ad-8173-475c-9268-a7e70047e751/73b8cca1-ad75-4754-a3a7-0c6654031656: dataset (10000, 25) loaded\n", "2026-01-29 16:02:04,974 INFO Surrogate decision tree 848167ad-8173-475c-9268-a7e70047e751/73b8cca1-ad75-4754-a3a7-0c6654031656: sampling down to 0 rows...\n", "2026-01-29 16:02:04,986 INFO Surrogate decision tree 848167ad-8173-475c-9268-a7e70047e751/73b8cca1-ad75-4754-a3a7-0c6654031656: connecting to H2O-3 server: localhost:54324\n", "2026-01-29 16:02:08,111 INFO Surrogate decision tree 848167ad-8173-475c-9268-a7e70047e751/73b8cca1-ad75-4754-a3a7-0c6654031656: DONE calculation\n" ] } ], "source": [ "!head dt-surrogate-demo.log" ] }, { "cell_type": "code", "execution_count": 12, "id": "833c06f0-0d5f-40cc-a195-dbad04573564", "metadata": {}, "outputs": [], "source": [ "# save the explainer data\n", "result.zip(file_path=\"./dt-surrogate-demo-archive.zip\")" ] }, { "cell_type": "code", "execution_count": 13, "id": "1dc369e3-f097-4a2b-8093-9bbcf17b070a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Archive: dt-surrogate-demo-archive.zip\n", " Length Date Time Name\n", "--------- ---------- ----- ----\n", " 5418 2026-01-29 16:02 explainer_h2o_sonar_explainers_dt_surrogate_explainer_DecisionTreeSurrogateExplainer_73b8cca1-ad75-4754-a3a7-0c6654031656/result_descriptor.json\n", " 2 2026-01-29 16:02 explainer_h2o_sonar_explainers_dt_surrogate_explainer_DecisionTreeSurrogateExplainer_73b8cca1-ad75-4754-a3a7-0c6654031656/problems/problems_and_actions.json\n", " 131 2026-01-29 16:02 explainer_h2o_sonar_explainers_dt_surrogate_explainer_DecisionTreeSurrogateExplainer_73b8cca1-ad75-4754-a3a7-0c6654031656/local_decision_tree/application_json.meta\n", " 482 2026-01-29 16:02 explainer_h2o_sonar_explainers_dt_surrogate_explainer_DecisionTreeSurrogateExplainer_73b8cca1-ad75-4754-a3a7-0c6654031656/local_decision_tree/application_json/explanation.json\n", " 110 2026-01-29 16:02 explainer_h2o_sonar_explainers_dt_surrogate_explainer_DecisionTreeSurrogateExplainer_73b8cca1-ad75-4754-a3a7-0c6654031656/global_html_fragment/text_html.meta\n", " 87198 2026-01-29 16:02 explainer_h2o_sonar_explainers_dt_surrogate_explainer_DecisionTreeSurrogateExplainer_73b8cca1-ad75-4754-a3a7-0c6654031656/global_html_fragment/text_html/dt-class-0.png\n", " 356 2026-01-29 16:02 explainer_h2o_sonar_explainers_dt_surrogate_explainer_DecisionTreeSurrogateExplainer_73b8cca1-ad75-4754-a3a7-0c6654031656/global_html_fragment/text_html/explanation.html\n", " 133 2026-01-29 16:02 explainer_h2o_sonar_explainers_dt_surrogate_explainer_DecisionTreeSurrogateExplainer_73b8cca1-ad75-4754-a3a7-0c6654031656/global_decision_tree/application_json.meta\n", " 600 2026-01-29 16:02 explainer_h2o_sonar_explainers_dt_surrogate_explainer_DecisionTreeSurrogateExplainer_73b8cca1-ad75-4754-a3a7-0c6654031656/global_decision_tree/application_json/explanation.json\n", " 1134 2026-01-29 16:02 explainer_h2o_sonar_explainers_dt_surrogate_explainer_DecisionTreeSurrogateExplainer_73b8cca1-ad75-4754-a3a7-0c6654031656/global_decision_tree/application_json/dt_class_0.json\n", " 773 2026-01-29 16:02 explainer_h2o_sonar_explainers_dt_surrogate_explainer_DecisionTreeSurrogateExplainer_73b8cca1-ad75-4754-a3a7-0c6654031656/log/explainer_run_73b8cca1-ad75-4754-a3a7-0c6654031656.log\n", " 924 2026-01-29 16:02 explainer_h2o_sonar_explainers_dt_surrogate_explainer_DecisionTreeSurrogateExplainer_73b8cca1-ad75-4754-a3a7-0c6654031656/work/dt-class-0.dot\n", " 1042912 2026-01-29 16:02 explainer_h2o_sonar_explainers_dt_surrogate_explainer_DecisionTreeSurrogateExplainer_73b8cca1-ad75-4754-a3a7-0c6654031656/work/dtpaths_frame.bin\n", " 984706 2026-01-29 16:02 explainer_h2o_sonar_explainers_dt_surrogate_explainer_DecisionTreeSurrogateExplainer_73b8cca1-ad75-4754-a3a7-0c6654031656/work/dtPathsFrame.csv\n", " 1268 2026-01-29 16:02 explainer_h2o_sonar_explainers_dt_surrogate_explainer_DecisionTreeSurrogateExplainer_73b8cca1-ad75-4754-a3a7-0c6654031656/work/dtSurrogate.json\n", " 1870 2026-01-29 16:02 explainer_h2o_sonar_explainers_dt_surrogate_explainer_DecisionTreeSurrogateExplainer_73b8cca1-ad75-4754-a3a7-0c6654031656/work/dtModel.json\n", " 7856 2026-01-29 16:02 explainer_h2o_sonar_explainers_dt_surrogate_explainer_DecisionTreeSurrogateExplainer_73b8cca1-ad75-4754-a3a7-0c6654031656/work/dt-class-0.dot.pdf\n", " 3131 2026-01-29 16:02 explainer_h2o_sonar_explainers_dt_surrogate_explainer_DecisionTreeSurrogateExplainer_73b8cca1-ad75-4754-a3a7-0c6654031656/work/dt_surrogate_rules.zip\n", " 9477 2026-01-29 16:02 explainer_h2o_sonar_explainers_dt_surrogate_explainer_DecisionTreeSurrogateExplainer_73b8cca1-ad75-4754-a3a7-0c6654031656/work/dtsurr_mojo.zip\n", " 2 2026-01-29 16:02 explainer_h2o_sonar_explainers_dt_surrogate_explainer_DecisionTreeSurrogateExplainer_73b8cca1-ad75-4754-a3a7-0c6654031656/insights/insights_and_actions.json\n", " 140 2026-01-29 16:02 explainer_h2o_sonar_explainers_dt_surrogate_explainer_DecisionTreeSurrogateExplainer_73b8cca1-ad75-4754-a3a7-0c6654031656/global_custom_archive/application_zip.meta\n", " 3131 2026-01-29 16:02 explainer_h2o_sonar_explainers_dt_surrogate_explainer_DecisionTreeSurrogateExplainer_73b8cca1-ad75-4754-a3a7-0c6654031656/global_custom_archive/application_zip/explanation.zip\n", "--------- -------\n", " 2151754 22 files\n" ] } ], "source": [ "!unzip -l dt-surrogate-demo-archive.zip" ] }, { "cell_type": "code", "execution_count": null, "id": "ff8c9f00-aa68-45a9-9648-f8f2fdcb92c6", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "H2O Sonar", "language": "python", "name": "h2o-sonar" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.11" } }, "nbformat": 4, "nbformat_minor": 5 }