{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "a89d1e71-a876-4189-b6ed-ee226dad679e",
   "metadata": {},
   "source": [
    "# Dataset and Model Insights Explainer Demo\n",
    "\n",
    "This example demonstrates how to use the **Dataset and Model Insights Explainer** and how to retrieve problems from the interpretation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "e4108f24-6e39-43a2-abda-456129eaf361",
   "metadata": {},
   "outputs": [],
   "source": [
    "import logging\n",
    "import os\n",
    "\n",
    "import daimojo\n",
    "import webbrowser\n",
    "\n",
    "from h2o_sonar import interpret\n",
    "from h2o_sonar.explainers.dataset_and_model_insights_explainer import DatasetAndModelInsightsExplainer\n",
    "from h2o_sonar.lib.api.models import ModelApi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "e8858dd3-64f5-4341-b3f7-c3a4115fdb24",
   "metadata": {},
   "outputs": [],
   "source": [
    "# dataset\n",
    "dataset_path = \"../../data/predictive/creditcard100_pred_missing_values.csv\"\n",
    "target_col = \"predictions\"\n",
    "\n",
    "# model\n",
    "mojo_path = \"../../data/predictive/models/creditcard-binomial.mojo\"\n",
    "mojo_model = daimojo.model(mojo_path)\n",
    "model = ModelApi().create_model(\n",
    "    model_src=mojo_model,\n",
    "    target_col=target_col,\n",
    "    used_features=list(mojo_model.feature_names),\n",
    ")\n",
    "\n",
    "# results\n",
    "results_location = \"./results\"\n",
    "os.makedirs(results_location, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "6eaeb369",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'id': 'h2o_sonar.explainers.dataset_and_model_insights_explainer.DatasetAndModelInsightsExplainer',\n",
       " 'name': 'DatasetAndModelInsightsExplainer',\n",
       " 'display_name': 'Dataset and model insights explainer',\n",
       " 'tagline': 'DatasetAndModelInsightsExplainer.',\n",
       " 'description': 'The explainer checks the dataset and model for various issues. For example, it provides problems and actions for missing values in the target column and a low number of unique values across columns of a dataset.',\n",
       " 'brief_description': 'DatasetAndModelInsightsExplainer.',\n",
       " 'model_types': ['iid'],\n",
       " 'can_explain': ['regression', 'binomial', 'multinomial'],\n",
       " 'explanation_scopes': ['global_scope'],\n",
       " 'explanations': [{'explanation_type': 'global-text-explanation',\n",
       "   'name': 'TextExplanation',\n",
       "   'category': '',\n",
       "   'scope': 'global',\n",
       "   'has_local': '',\n",
       "   'formats': []}],\n",
       " 'keywords': [],\n",
       " 'parameters': [],\n",
       " 'metrics_meta': []}"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# parameters\n",
    "interpret.describe_explainer(DatasetAndModelInsightsExplainer)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0a146156-5e48-4fe0-9f58-f37c305f89a6",
   "metadata": {},
   "source": [
    "## Interpret"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "f9010a65-f211-4160-a7a6-2dca49618f06",
   "metadata": {},
   "outputs": [],
   "source": [
    "%%capture\n",
    "interpretation = interpret.run_interpretation(\n",
    "    dataset=dataset_path,\n",
    "    model=model,\n",
    "    target_col=target_col,\n",
    "    results_location=results_location,\n",
    "    explainers=[DatasetAndModelInsightsExplainer.explainer_id()],\n",
    "    log_level=logging.INFO,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "94378f1c-9778-42ad-a9c5-a3696266d90c",
   "metadata": {},
   "source": [
    "## Interact with the Explainer Result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "ecd0a2d7-c462-4b9e-aab7-ded94a73a278",
   "metadata": {},
   "outputs": [],
   "source": [
    "# retrieve the result\n",
    "result = interpretation.get_explainer_result(DatasetAndModelInsightsExplainer.explainer_id())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "35e6147e-4e12-4faf-9584-4b3ad93d907b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# open interpretation HTML report in web browser\n",
    "webbrowser.open(interpretation.result.get_html_report_location())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "324568e9-5495-4a5f-969b-ee9108b5e644",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'id': 'h2o_sonar.explainers.dataset_and_model_insights_explainer.DatasetAndModelInsightsExplainer',\n",
       " 'name': 'DatasetAndModelInsightsExplainer',\n",
       " 'display_name': 'Dataset and model insights explainer',\n",
       " 'tagline': 'DatasetAndModelInsightsExplainer.',\n",
       " 'description': 'The explainer checks the dataset and model for various issues. For example, it provides problems and actions for missing values in the target column and a low number of unique values across columns of a dataset.',\n",
       " 'brief_description': 'DatasetAndModelInsightsExplainer.',\n",
       " 'model_types': ['iid'],\n",
       " 'can_explain': ['regression', 'binomial', 'multinomial'],\n",
       " 'explanation_scopes': ['global_scope'],\n",
       " 'explanations': [{'explanation_type': 'global-text-explanation',\n",
       "   'name': 'Dataset and model insights explainer',\n",
       "   'category': 'CUSTOM',\n",
       "   'scope': 'global',\n",
       "   'has_local': None,\n",
       "   'formats': ['text/plain']}],\n",
       " 'keywords': [],\n",
       " 'parameters': [],\n",
       " 'metrics_meta': []}"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# summary\n",
    "result.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "83be40a4-fe14-4fcd-919d-c0f5a2c84e31",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{}"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# parameters\n",
    "result.params()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "418932dd-a881-410d-9813-c398920d1887",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Found 2 problems with the dataset. Please check the interpretation for further details.\n"
     ]
    }
   ],
   "source": [
    "print(\n",
    "    f\"Found {len(interpretation.result.problems)} problems with the dataset. \"\n",
    "    f\"Please check the interpretation for further details.\"\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c3ebc2e5-0fe2-48e2-a70d-aa7ce8724bc2",
   "metadata": {},
   "source": [
    "### Save the explainer log and data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "499c0011-390a-4511-82bc-11aa371290d9",
   "metadata": {},
   "outputs": [],
   "source": [
    "# save the explainer log\n",
    "result.log(path=\"./d_and_m_insights-demo.log\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "68f24660-e183-4163-b1ab-575462c24ec4",
   "metadata": {},
   "outputs": [],
   "source": [
    "!head d_and_m_insights-demo.log"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "334d7964-69df-4b0b-805d-a38c07fe0f47",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "H2O Sonar",
   "language": "python",
   "name": "h2o-sonar"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}