Friedman H-statistic Explainer Demo

This example demonstrates run Friedman’s H-statistic explainer using the H2O Eval Studio library and retrieve the data and plot with original features interactions.

[1]:
import os
import logging

import datatable
import daimojo
import webbrowser

from h2o_sonar import interpret
from h2o_sonar.lib.api import commons
from h2o_sonar.lib.api import explainers
from h2o_sonar.explainers import friedman_h_statistic_explainer as explainer
from h2o_sonar.lib.api.models import ModelApi
[2]:
# explainer description
interpret.describe_explainer(explainer.FriedmanHStatisticExplainer)
[2]:
{'id': 'h2o_sonar.explainers.friedman_h_statistic_explainer.FriedmanHStatisticExplainer',
 'name': 'FriedmanHStatisticExplainer',
 'display_name': "Friedman's H-statistic",
 'description': "Friedman's H-statistic describes the amount of variance explained by the feature *pair*. It's expressed with a graph where most important original features are nodes and the interaction scores are edges.\nWhen features interact with each other, then the influence of the features on the prediction does not have be additive, but more complex. For instance the contribution might be greater than the sum of contributions.\nFriedman's H-statistic calculation is computationally intensive and typically requires long time to finish - calculation duration grows with the number of features and bins.",
 'model_types': ['iid'],
 'can_explain': ['regression', 'binomial'],
 'explanation_scopes': ['global_scope'],
 'explanations': [{'explanation_type': 'global-report',
   'name': 'ReportExplanation',
   'category': None,
   'scope': 'global',
   'has_local': None,
   'formats': []},
  {'explanation_type': 'global-feature-importance',
   'name': 'GlobalFeatImpExplanation',
   'category': None,
   'scope': 'global',
   'has_local': None,
   'formats': []}],
 'parameters': [{'name': 'features_number',
   'description': 'Number of features for which to calculate H-Statistic.',
   'comment': '',
   'type': 'int',
   'val': 4,
   'predefined': [],
   'tags': [],
   'min_': 2.0,
   'max_': 0.0,
   'category': ''},
  {'name': 'grid_resolution',
   'description': 'Observations per bin (number of equally spaced points used to create bins).',
   'comment': '',
   'type': 'int',
   'val': 3,
   'predefined': [],
   'tags': [],
   'min_': 1.0,
   'max_': 0.0,
   'category': ''},
  {'name': 'features',
   'description': 'Feature list - at least 2 features must be selected.',
   'comment': '',
   'type': 'multilist',
   'val': None,
   'predefined': [],
   'tags': ['SOURCE_DATASET_COLUMN_NAMES'],
   'min_': 0.0,
   'max_': 0.0,
   'category': ''},
  {'name': 'sample_size',
   'description': 'Sample size for Partial Dependence Plot',
   'comment': '',
   'type': 'int',
   'val': 25000,
   'predefined': [],
   'tags': [],
   'min_': 0.0,
   'max_': 0.0,
   'category': ''}],
 'keywords': ['explains-feature-behavior', 'h2o-sonar']}

Interpretation

[3]:
# dataset
dataset_path = "../../data/pd_ice_creditcard_10_rows.csv"

# Driverless AI MOJO model
mojo_path = "../../data/models/creditcard-regression.mojo"
target_col = "LIMIT_BAL"
mojo_model = daimojo.model(mojo_path)
model = ModelApi().create_model(
    model_src=mojo_model,
    target_col=target_col,
    used_features=list(mojo_model.feature_names),
)

# scikit-learn model
# mojo_path = "../../data/models/creditcard-binomial-sklearn-gbm.pkl"
# target_col = "default payment next month"

# results
results_location = "./results"
os.makedirs(results_location, exist_ok=True)
[4]:
interpretation = interpret.run_interpretation(
    dataset=dataset_path,
    model=model,
    target_col=target_col,
    results_location=results_location,
    explainers=[explainer.FriedmanHStatisticExplainer.explainer_id()],
    log_level=logging.INFO,
)
/home/srasaratnam/projects/h2o-sonar/venv/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
2023-03-12 23:11:30,987 - h2o_sonar - DEBUG - ICE strategy: MANY predict method invocations
2023-03-12 23:11:31,017 - h2o_sonar - DEBUG - ICE strategy: MANY predict method invocations
2023-03-12 23:11:31,037 - h2o_sonar - DEBUG - ICE strategy: MANY predict method invocations
2023-03-12 23:11:31,064 - h2o_sonar - DEBUG - ICE strategy: MANY predict method invocations
2023-03-12 23:11:31,099 - h2o_sonar - DEBUG - ICE strategy: MANY predict method invocations
2023-03-12 23:11:31,137 - h2o_sonar - DEBUG - ICE strategy: MANY predict method invocations
2023-03-12 23:11:31,165 - h2o_sonar - DEBUG - ICE strategy: 1 predict method invocation
2023-03-12 23:11:31,180 - h2o_sonar - DEBUG - ICE strategy: 1 predict method invocation
2023-03-12 23:11:31,206 - h2o_sonar - DEBUG - ICE strategy: 1 predict method invocation
2023-03-12 23:11:31,220 - h2o_sonar - DEBUG - ICE strategy: 1 predict method invocation
../_images/notebooks_h2o-sonar-friedman-h-statistic-explainer_5_1.png
../_images/notebooks_h2o-sonar-friedman-h-statistic-explainer_5_2.png

Explainer Result

[5]:
# retrieve the result
result = interpretation.get_explainer_result(
    explainer.FriedmanHStatisticExplainer.explainer_id()
)
[6]:
# open interpretation HTML report in web browser
webbrowser.open(interpretation.result.get_html_report_location())
[6]:
True
[7]:
# summary
result.summary()
[7]:
{'id': 'h2o_sonar.explainers.friedman_h_statistic_explainer.FriedmanHStatisticExplainer',
 'name': 'FriedmanHStatisticExplainer',
 'display_name': "Friedman's H-statistic",
 'description': "Friedman's H-statistic describes the amount of variance explained by the feature *pair*. It's expressed with a graph where most important original features are nodes and the interaction scores are edges.\nWhen features interact with each other, then the influence of the features on the prediction does not have be additive, but more complex. For instance the contribution might be greater than the sum of contributions.\nFriedman's H-statistic calculation is computationally intensive and typically requires long time to finish - calculation duration grows with the number of features and bins.",
 'model_types': ['iid'],
 'can_explain': ['regression', 'binomial'],
 'explanation_scopes': ['global_scope'],
 'explanations': [{'explanation_type': 'global-feature-importance',
   'name': "Friedman's H-statistic",
   'category': 'DAI MODEL',
   'scope': 'global',
   'has_local': None,
   'formats': ['application/vnd.h2oai.json+datatable.jay',
    'application/vnd.h2oai.json+csv',
    'application/json']},
  {'explanation_type': 'global-report',
   'name': "Friedman's H-statistic report",
   'category': 'DAI MODEL',
   'scope': 'global',
   'has_local': None,
   'formats': ['text/markdown']},
  {'explanation_type': 'global-html-fragment',
   'name': "Friedman's H-statistic",
   'category': 'DAI MODEL',
   'scope': 'global',
   'has_local': None,
   'formats': ['text/html']}],
 'parameters': [{'name': 'features_number',
   'description': 'Number of features for which to calculate H-Statistic.',
   'comment': '',
   'type': 'int',
   'val': 4,
   'predefined': [],
   'tags': [],
   'min_': 2.0,
   'max_': 0.0,
   'category': ''},
  {'name': 'grid_resolution',
   'description': 'Observations per bin (number of equally spaced points used to create bins).',
   'comment': '',
   'type': 'int',
   'val': 3,
   'predefined': [],
   'tags': [],
   'min_': 1.0,
   'max_': 0.0,
   'category': ''},
  {'name': 'features',
   'description': 'Feature list - at least 2 features must be selected.',
   'comment': '',
   'type': 'multilist',
   'val': None,
   'predefined': [],
   'tags': ['SOURCE_DATASET_COLUMN_NAMES'],
   'min_': 0.0,
   'max_': 0.0,
   'category': ''},
  {'name': 'sample_size',
   'description': 'Sample size for Partial Dependence Plot',
   'comment': '',
   'type': 'int',
   'val': 25000,
   'predefined': [],
   'tags': [],
   'min_': 0.0,
   'max_': 0.0,
   'category': ''}],
 'keywords': ['explains-feature-behavior', 'h2o-sonar']}
[8]:
# Parameters
result.params()
[8]:
{'features_number': 4,
 'grid_resolution': 3,
 'features': None,
 'sample_size': 25000}

Display Data

[9]:
result.data()
[9]:
featureinteractions
▪▪▪▪▪▪▪▪▪▪▪▪
0'SEX' and 'MARRIAGE'6.16078e-12
1'SEX' and 'EDUCATION'7.55627e-13
2'EDUCATION' and 'MARRIAGE'5.37272e-13
3'EDUCATION' and 'AGE'2.65952e-13
4'MARRIAGE' and 'AGE'2.57216e-13
5'SEX' and 'AGE'1.00249e-13

Plot Feature Interactions Data

[10]:
result.plot()
../_images/notebooks_h2o-sonar-friedman-h-statistic-explainer_14_0.png

Save Explainer Log and Data

[11]:
# save the explainer log
log_file_path = "./feature-interactions-demo.log"
result.log(path=log_file_path)
[12]:
!cat $log_file_path
2023-03-12 23:11:30,982 INFO Friedman's H-statistic 48ac664c-3ef8-4494-baa3-49341aa2c100/7490c8b6-3ab0-4b7b-b401-e1efe8929fe5: getting features list, importance and metadata...
2023-03-12 23:11:30,982 INFO Friedman's H-statistic 48ac664c-3ef8-4494-baa3-49341aa2c100/7490c8b6-3ab0-4b7b-b401-e1efe8929fe5 all most important model features: ['SEX', 'EDUCATION', 'MARRIAGE', 'AGE', 'PAY_1', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'BILL_AMT2', 'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1', 'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6']
2023-03-12 23:11:30,982 INFO Friedman's H-statistic 48ac664c-3ef8-4494-baa3-49341aa2c100/7490c8b6-3ab0-4b7b-b401-e1efe8929fe5: features used by model: ['SEX', 'EDUCATION', 'MARRIAGE', 'AGE', 'PAY_1', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'BILL_AMT2', 'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1', 'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6']
2023-03-12 23:11:30,982 INFO Friedman's H-statistic 48ac664c-3ef8-4494-baa3-49341aa2c100/7490c8b6-3ab0-4b7b-b401-e1efe8929fe5: final features list: ['SEX', 'EDUCATION', 'MARRIAGE', 'AGE']
[13]:
# save the explainer data
result.zip(file_path="./feature-interactions-demo-archive.zip")
[14]:
!unzip -l feature-interactions-demo-archive.zip
Archive:  feature-interactions-demo-archive.zip
  Length      Date    Time    Name
---------  ---------- -----   ----
     3573  2023-03-12 23:11   explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/result_descriptor.json
      122  2023-03-12 23:11   explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_report/text_markdown.meta
      529  2023-03-12 23:11   explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_report/text_markdown/explanation.md
   197234  2023-03-12 23:11   explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_report/text_markdown/network-chart.png
   197234  2023-03-12 23:11   explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/work/network-chart.png
      529  2023-03-12 23:11   explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/work/report.md
      110  2023-03-12 23:11   explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_html_fragment/text_html.meta
      399  2023-03-12 23:11   explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_html_fragment/text_html/explanation.html
    26644  2023-03-12 23:11   explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_html_fragment/text_html/fi-class-0.png
        2  2023-03-12 23:11   explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/model_problems/problems_and_actions.json
     1165  2023-03-12 23:11   explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/log/explainer_run_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5.log
      143  2023-03-12 23:11   explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_feature_importance/application_json.meta
      163  2023-03-12 23:11   explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_feature_importance/application_vnd_h2oai_json_csv.meta
      185  2023-03-12 23:11   explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_feature_importance/application_vnd_h2oai_json_datatable_jay.meta
      632  2023-03-12 23:11   explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_feature_importance/application_vnd_h2oai_json_datatable_jay/feature_importance_class_0.jay
      808  2023-03-12 23:11   explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_feature_importance/application_vnd_h2oai_json_datatable_jay/explanation.json
      530  2023-03-12 23:11   explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_feature_importance/application_json/feature_importance_class_0.json
      747  2023-03-12 23:11   explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_feature_importance/application_json/explanation.json
      331  2023-03-12 23:11   explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_feature_importance/application_vnd_h2oai_json_csv/feature_importance_class_0.csv
      746  2023-03-12 23:11   explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_feature_importance/application_vnd_h2oai_json_csv/explanation.json
---------                     -------
   431826                     20 files
[ ]: