{ "cells": [ { "cell_type": "markdown", "id": "a89d1e71-a876-4189-b6ed-ee226dad679e", "metadata": {}, "source": [ "# Dataset and Model Insights Explainer Demo\n", "\n", "This example demonstrates how to use the **Dataset and Model Insights Explainer** and how to retrieve problems from the interpretation." ] }, { "cell_type": "code", "execution_count": 1, "id": "e4108f24-6e39-43a2-abda-456129eaf361", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import logging\n", "import os\n", "\n", "import daimojo\n", "import webbrowser\n", "\n", "from h2o_sonar import interpret\n", "from h2o_sonar.explainers.dataset_and_model_insights_explainer import DatasetAndModelInsightsExplainer\n", "from h2o_sonar.lib.api.models import ModelApi" ] }, { "cell_type": "code", "execution_count": 2, "id": "e8858dd3-64f5-4341-b3f7-c3a4115fdb24", "metadata": {}, "outputs": [], "source": [ "# dataset\n", "dataset_path = \"../../data/creditcard100_pred_missing_values.csv\"\n", "target_col = \"predictions\"\n", "\n", "# model\n", "mojo_path = \"../../data/models/creditcard-binomial.mojo\"\n", "mojo_model = daimojo.model(mojo_path)\n", "model = ModelApi().create_model(\n", " model_src=mojo_model,\n", " target_col=target_col,\n", " used_features=list(mojo_model.feature_names),\n", ")\n", "\n", "# results\n", "results_location = \"./results\"\n", "os.makedirs(results_location, exist_ok=True)" ] }, { "cell_type": "code", "execution_count": 3, "id": "6eaeb369", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'id': 'h2o_sonar.explainers.dataset_and_model_insights_explainer.DatasetAndModelInsightsExplainer',\n", " 'name': 'DatasetAndModelInsightsExplainer',\n", " 'display_name': 'Dataset and model insights explainer',\n", " 'description': 'The explainer checks the dataset and model for various issues. For example, it provides problems and actions for missing values in the target column and a low number of unique values across columns of a dataset.',\n", " 'model_types': ['iid'],\n", " 'can_explain': ['regression', 'binomial', 'multinomial'],\n", " 'explanation_scopes': ['global_scope'],\n", " 'explanations': [{'explanation_type': 'global-text-explanation',\n", " 'name': 'TextExplanation',\n", " 'category': None,\n", " 'scope': 'global',\n", " 'has_local': None,\n", " 'formats': []}],\n", " 'parameters': [],\n", " 'keywords': []}" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# parameters\n", "interpret.describe_explainer(DatasetAndModelInsightsExplainer)" ] }, { "cell_type": "markdown", "id": "0a146156-5e48-4fe0-9f58-f37c305f89a6", "metadata": {}, "source": [ "## Interpret" ] }, { "cell_type": "code", "execution_count": 4, "id": "f9010a65-f211-4160-a7a6-2dca49618f06", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/dvorka/h/mli/git/h2o-sonar/.venv/lib/python3.8/site-packages/shap/utils/_clustering.py:35: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.\n", " def _pt_shuffle_rec(i, indexes, index_mask, partition_tree, M, pos):\n", "/home/dvorka/h/mli/git/h2o-sonar/.venv/lib/python3.8/site-packages/shap/utils/_clustering.py:54: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.\n", " def delta_minimization_order(all_masks, max_swap_size=100, num_passes=2):\n", "/home/dvorka/h/mli/git/h2o-sonar/.venv/lib/python3.8/site-packages/shap/utils/_clustering.py:63: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.\n", " def _reverse_window(order, start, length):\n", "/home/dvorka/h/mli/git/h2o-sonar/.venv/lib/python3.8/site-packages/shap/utils/_clustering.py:69: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.\n", " def _reverse_window_score_gain(masks, order, start, length):\n", "/home/dvorka/h/mli/git/h2o-sonar/.venv/lib/python3.8/site-packages/shap/utils/_clustering.py:77: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.\n", " def _mask_delta_score(m1, m2):\n", "/home/dvorka/h/mli/git/h2o-sonar/.venv/lib/python3.8/site-packages/shap/links.py:5: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.\n", " def identity(x):\n", "/home/dvorka/h/mli/git/h2o-sonar/.venv/lib/python3.8/site-packages/shap/links.py:10: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.\n", " def _identity_inverse(x):\n", "/home/dvorka/h/mli/git/h2o-sonar/.venv/lib/python3.8/site-packages/shap/links.py:15: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.\n", " def logit(x):\n", "/home/dvorka/h/mli/git/h2o-sonar/.venv/lib/python3.8/site-packages/shap/links.py:20: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.\n", " def _logit_inverse(x):\n", "/home/dvorka/h/mli/git/h2o-sonar/.venv/lib/python3.8/site-packages/shap/utils/_masked_model.py:363: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.\n", " def _build_fixed_single_output(averaged_outs, last_outs, outputs, batch_positions, varying_rows, num_varying_rows, link, linearizing_weights):\n", "/home/dvorka/h/mli/git/h2o-sonar/.venv/lib/python3.8/site-packages/shap/utils/_masked_model.py:385: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.\n", " def _build_fixed_multi_output(averaged_outs, last_outs, outputs, batch_positions, varying_rows, num_varying_rows, link, linearizing_weights):\n", "/home/dvorka/h/mli/git/h2o-sonar/.venv/lib/python3.8/site-packages/shap/utils/_masked_model.py:428: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.\n", " def _init_masks(cluster_matrix, M, indices_row_pos, indptr):\n", "/home/dvorka/h/mli/git/h2o-sonar/.venv/lib/python3.8/site-packages/shap/utils/_masked_model.py:439: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.\n", " def _rec_fill_masks(cluster_matrix, indices_row_pos, indptr, indices, M, ind):\n", "/home/dvorka/h/mli/git/h2o-sonar/.venv/lib/python3.8/site-packages/shap/maskers/_tabular.py:186: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.\n", " def _single_delta_mask(dind, masked_inputs, last_mask, data, x, noop_code):\n", "/home/dvorka/h/mli/git/h2o-sonar/.venv/lib/python3.8/site-packages/shap/maskers/_tabular.py:197: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.\n", " def _delta_masking(masks, x, curr_delta_inds, varying_rows_out,\n", "/home/dvorka/h/mli/git/h2o-sonar/.venv/lib/python3.8/site-packages/shap/maskers/_image.py:175: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.\n", " def _jit_build_partition_tree(xmin, xmax, ymin, ymax, zmin, zmax, total_ywidth, total_zwidth, M, clustering, q):\n", "/home/dvorka/h/mli/git/h2o-sonar/.venv/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n", "/home/dvorka/h/mli/git/h2o-sonar/.venv/lib/python3.8/site-packages/shap/explainers/_partition.py:676: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.\n", " def lower_credit(i, value, M, values, clustering):\n", "The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.\n", "The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.\n" ] } ], "source": [ "interpretation = interpret.run_interpretation(\n", " dataset=dataset_path,\n", " model=model,\n", " target_col=target_col,\n", " results_location=results_location,\n", " explainers=[DatasetAndModelInsightsExplainer.explainer_id()],\n", " log_level=logging.INFO,\n", ")" ] }, { "cell_type": "markdown", "id": "94378f1c-9778-42ad-a9c5-a3696266d90c", "metadata": {}, "source": [ "## Interact with the Explainer Result" ] }, { "cell_type": "code", "execution_count": 5, "id": "ecd0a2d7-c462-4b9e-aab7-ded94a73a278", "metadata": {}, "outputs": [], "source": [ "# retrieve the result\n", "result = interpretation.get_explainer_result(DatasetAndModelInsightsExplainer.explainer_id())" ] }, { "cell_type": "code", "execution_count": 6, "id": "35e6147e-4e12-4faf-9584-4b3ad93d907b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# open interpretation HTML report in web browser\n", "webbrowser.open(interpretation.result.get_html_report_location())" ] }, { "cell_type": "code", "execution_count": 7, "id": "324568e9-5495-4a5f-969b-ee9108b5e644", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'id': 'h2o_sonar.explainers.dataset_and_model_insights_explainer.DatasetAndModelInsightsExplainer',\n", " 'name': 'DatasetAndModelInsightsExplainer',\n", " 'display_name': 'Dataset and model insights explainer',\n", " 'description': 'The explainer checks the dataset and model for various issues. For example, it provides problems and actions for missing values in the target column and a low number of unique values across columns of a dataset.',\n", " 'model_types': ['iid'],\n", " 'can_explain': ['regression', 'binomial', 'multinomial'],\n", " 'explanation_scopes': ['global_scope'],\n", " 'explanations': [{'explanation_type': 'global-text-explanation',\n", " 'name': 'Dataset and model insights explainer',\n", " 'category': 'CUSTOM',\n", " 'scope': 'global',\n", " 'has_local': None,\n", " 'formats': ['text/plain']}],\n", " 'parameters': [],\n", " 'keywords': []}" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# summary\n", "result.summary()" ] }, { "cell_type": "code", "execution_count": 8, "id": "83be40a4-fe14-4fcd-919d-c0f5a2c84e31", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{}" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# parameters\n", "result.params()" ] }, { "cell_type": "code", "execution_count": 9, "id": "418932dd-a881-410d-9813-c398920d1887", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Found 2 problems with the dataset, please check the interpretation for further details\n" ] } ], "source": [ "print(\n", " f\"Found {len(interpretation.result.problems)} problems with the dataset. \"\n", " f\"Please check the interpretation for further details.\"\n", ")" ] }, { "cell_type": "markdown", "id": "c3ebc2e5-0fe2-48e2-a70d-aa7ce8724bc2", "metadata": {}, "source": [ "### Save the explainer log and data" ] }, { "cell_type": "code", "execution_count": 10, "id": "499c0011-390a-4511-82bc-11aa371290d9", "metadata": {}, "outputs": [], "source": [ "# save the explainer log\n", "result.log(path=\"./d_and_m_insights-demo.log\")" ] }, { "cell_type": "code", "execution_count": 11, "id": "68f24660-e183-4163-b1ab-575462c24ec4", "metadata": {}, "outputs": [], "source": [ "!head d_and_m_insights-demo.log" ] }, { "cell_type": "code", "execution_count": null, "id": "334d7964-69df-4b0b-805d-a38c07fe0f47", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "h2o-sonar", "language": "python", "name": "h2o-sonar" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 5 }