Friedman H-statistic Explainer Demo
This example demonstrates run Friedman’s H-statistic explainer using the H2O Eval Studio library and retrieve the data and plot with original features interactions.
[1]:
import os
import logging
import datatable
import daimojo
import webbrowser
from h2o_sonar import interpret
from h2o_sonar.lib.api import commons
from h2o_sonar.lib.api import explainers
from h2o_sonar.explainers import friedman_h_statistic_explainer as explainer
from h2o_sonar.lib.api.models import ModelApi
[2]:
# explainer description
interpret.describe_explainer(explainer.FriedmanHStatisticExplainer)
[2]:
{'id': 'h2o_sonar.explainers.friedman_h_statistic_explainer.FriedmanHStatisticExplainer',
'name': 'FriedmanHStatisticExplainer',
'display_name': "Friedman's H-statistic",
'description': "Friedman's H-statistic describes the amount of variance explained by the feature *pair*. It's expressed with a graph where most important original features are nodes and the interaction scores are edges.\nWhen features interact with each other, then the influence of the features on the prediction does not have be additive, but more complex. For instance the contribution might be greater than the sum of contributions.\nFriedman's H-statistic calculation is computationally intensive and typically requires long time to finish - calculation duration grows with the number of features and bins.",
'model_types': ['iid'],
'can_explain': ['regression', 'binomial'],
'explanation_scopes': ['global_scope'],
'explanations': [{'explanation_type': 'global-report',
'name': 'ReportExplanation',
'category': None,
'scope': 'global',
'has_local': None,
'formats': []},
{'explanation_type': 'global-feature-importance',
'name': 'GlobalFeatImpExplanation',
'category': None,
'scope': 'global',
'has_local': None,
'formats': []}],
'parameters': [{'name': 'features_number',
'description': 'Number of features for which to calculate H-Statistic.',
'comment': '',
'type': 'int',
'val': 4,
'predefined': [],
'tags': [],
'min_': 2.0,
'max_': 0.0,
'category': ''},
{'name': 'grid_resolution',
'description': 'Observations per bin (number of equally spaced points used to create bins).',
'comment': '',
'type': 'int',
'val': 3,
'predefined': [],
'tags': [],
'min_': 1.0,
'max_': 0.0,
'category': ''},
{'name': 'features',
'description': 'Feature list - at least 2 features must be selected.',
'comment': '',
'type': 'multilist',
'val': None,
'predefined': [],
'tags': ['SOURCE_DATASET_COLUMN_NAMES'],
'min_': 0.0,
'max_': 0.0,
'category': ''},
{'name': 'sample_size',
'description': 'Sample size for Partial Dependence Plot',
'comment': '',
'type': 'int',
'val': 25000,
'predefined': [],
'tags': [],
'min_': 0.0,
'max_': 0.0,
'category': ''}],
'keywords': ['explains-feature-behavior', 'h2o-sonar']}
Interpretation
[3]:
# dataset
dataset_path = "../../data/pd_ice_creditcard_10_rows.csv"
# Driverless AI MOJO model
mojo_path = "../../data/models/creditcard-regression.mojo"
target_col = "LIMIT_BAL"
mojo_model = daimojo.model(mojo_path)
model = ModelApi().create_model(
model_src=mojo_model,
target_col=target_col,
used_features=list(mojo_model.feature_names),
)
# scikit-learn model
# mojo_path = "../../data/models/creditcard-binomial-sklearn-gbm.pkl"
# target_col = "default payment next month"
# results
results_location = "./results"
os.makedirs(results_location, exist_ok=True)
[4]:
interpretation = interpret.run_interpretation(
dataset=dataset_path,
model=model,
target_col=target_col,
results_location=results_location,
explainers=[explainer.FriedmanHStatisticExplainer.explainer_id()],
log_level=logging.INFO,
)
/home/srasaratnam/projects/h2o-sonar/venv/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
from .autonotebook import tqdm as notebook_tqdm
2023-03-12 23:11:30,987 - h2o_sonar - DEBUG - ICE strategy: MANY predict method invocations
2023-03-12 23:11:31,017 - h2o_sonar - DEBUG - ICE strategy: MANY predict method invocations
2023-03-12 23:11:31,037 - h2o_sonar - DEBUG - ICE strategy: MANY predict method invocations
2023-03-12 23:11:31,064 - h2o_sonar - DEBUG - ICE strategy: MANY predict method invocations
2023-03-12 23:11:31,099 - h2o_sonar - DEBUG - ICE strategy: MANY predict method invocations
2023-03-12 23:11:31,137 - h2o_sonar - DEBUG - ICE strategy: MANY predict method invocations
2023-03-12 23:11:31,165 - h2o_sonar - DEBUG - ICE strategy: 1 predict method invocation
2023-03-12 23:11:31,180 - h2o_sonar - DEBUG - ICE strategy: 1 predict method invocation
2023-03-12 23:11:31,206 - h2o_sonar - DEBUG - ICE strategy: 1 predict method invocation
2023-03-12 23:11:31,220 - h2o_sonar - DEBUG - ICE strategy: 1 predict method invocation


Explainer Result
[5]:
# retrieve the result
result = interpretation.get_explainer_result(
explainer.FriedmanHStatisticExplainer.explainer_id()
)
[6]:
# open interpretation HTML report in web browser
webbrowser.open(interpretation.result.get_html_report_location())
[6]:
True
[7]:
# summary
result.summary()
[7]:
{'id': 'h2o_sonar.explainers.friedman_h_statistic_explainer.FriedmanHStatisticExplainer',
'name': 'FriedmanHStatisticExplainer',
'display_name': "Friedman's H-statistic",
'description': "Friedman's H-statistic describes the amount of variance explained by the feature *pair*. It's expressed with a graph where most important original features are nodes and the interaction scores are edges.\nWhen features interact with each other, then the influence of the features on the prediction does not have be additive, but more complex. For instance the contribution might be greater than the sum of contributions.\nFriedman's H-statistic calculation is computationally intensive and typically requires long time to finish - calculation duration grows with the number of features and bins.",
'model_types': ['iid'],
'can_explain': ['regression', 'binomial'],
'explanation_scopes': ['global_scope'],
'explanations': [{'explanation_type': 'global-feature-importance',
'name': "Friedman's H-statistic",
'category': 'DAI MODEL',
'scope': 'global',
'has_local': None,
'formats': ['application/vnd.h2oai.json+datatable.jay',
'application/vnd.h2oai.json+csv',
'application/json']},
{'explanation_type': 'global-report',
'name': "Friedman's H-statistic report",
'category': 'DAI MODEL',
'scope': 'global',
'has_local': None,
'formats': ['text/markdown']},
{'explanation_type': 'global-html-fragment',
'name': "Friedman's H-statistic",
'category': 'DAI MODEL',
'scope': 'global',
'has_local': None,
'formats': ['text/html']}],
'parameters': [{'name': 'features_number',
'description': 'Number of features for which to calculate H-Statistic.',
'comment': '',
'type': 'int',
'val': 4,
'predefined': [],
'tags': [],
'min_': 2.0,
'max_': 0.0,
'category': ''},
{'name': 'grid_resolution',
'description': 'Observations per bin (number of equally spaced points used to create bins).',
'comment': '',
'type': 'int',
'val': 3,
'predefined': [],
'tags': [],
'min_': 1.0,
'max_': 0.0,
'category': ''},
{'name': 'features',
'description': 'Feature list - at least 2 features must be selected.',
'comment': '',
'type': 'multilist',
'val': None,
'predefined': [],
'tags': ['SOURCE_DATASET_COLUMN_NAMES'],
'min_': 0.0,
'max_': 0.0,
'category': ''},
{'name': 'sample_size',
'description': 'Sample size for Partial Dependence Plot',
'comment': '',
'type': 'int',
'val': 25000,
'predefined': [],
'tags': [],
'min_': 0.0,
'max_': 0.0,
'category': ''}],
'keywords': ['explains-feature-behavior', 'h2o-sonar']}
[8]:
# Parameters
result.params()
[8]:
{'features_number': 4,
'grid_resolution': 3,
'features': None,
'sample_size': 25000}
Display Data
[9]:
result.data()
[9]:
feature | interactions | |
---|---|---|
▪▪▪▪ | ▪▪▪▪▪▪▪▪ | |
0 | 'SEX' and 'MARRIAGE' | 6.16078e-12 |
1 | 'SEX' and 'EDUCATION' | 7.55627e-13 |
2 | 'EDUCATION' and 'MARRIAGE' | 5.37272e-13 |
3 | 'EDUCATION' and 'AGE' | 2.65952e-13 |
4 | 'MARRIAGE' and 'AGE' | 2.57216e-13 |
5 | 'SEX' and 'AGE' | 1.00249e-13 |
Plot Feature Interactions Data
[10]:
result.plot()

Save Explainer Log and Data
[11]:
# save the explainer log
log_file_path = "./feature-interactions-demo.log"
result.log(path=log_file_path)
[12]:
!cat $log_file_path
2023-03-12 23:11:30,982 INFO Friedman's H-statistic 48ac664c-3ef8-4494-baa3-49341aa2c100/7490c8b6-3ab0-4b7b-b401-e1efe8929fe5: getting features list, importance and metadata...
2023-03-12 23:11:30,982 INFO Friedman's H-statistic 48ac664c-3ef8-4494-baa3-49341aa2c100/7490c8b6-3ab0-4b7b-b401-e1efe8929fe5 all most important model features: ['SEX', 'EDUCATION', 'MARRIAGE', 'AGE', 'PAY_1', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'BILL_AMT2', 'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1', 'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6']
2023-03-12 23:11:30,982 INFO Friedman's H-statistic 48ac664c-3ef8-4494-baa3-49341aa2c100/7490c8b6-3ab0-4b7b-b401-e1efe8929fe5: features used by model: ['SEX', 'EDUCATION', 'MARRIAGE', 'AGE', 'PAY_1', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'BILL_AMT2', 'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1', 'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6']
2023-03-12 23:11:30,982 INFO Friedman's H-statistic 48ac664c-3ef8-4494-baa3-49341aa2c100/7490c8b6-3ab0-4b7b-b401-e1efe8929fe5: final features list: ['SEX', 'EDUCATION', 'MARRIAGE', 'AGE']
[13]:
# save the explainer data
result.zip(file_path="./feature-interactions-demo-archive.zip")
[14]:
!unzip -l feature-interactions-demo-archive.zip
Archive: feature-interactions-demo-archive.zip
Length Date Time Name
--------- ---------- ----- ----
3573 2023-03-12 23:11 explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/result_descriptor.json
122 2023-03-12 23:11 explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_report/text_markdown.meta
529 2023-03-12 23:11 explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_report/text_markdown/explanation.md
197234 2023-03-12 23:11 explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_report/text_markdown/network-chart.png
197234 2023-03-12 23:11 explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/work/network-chart.png
529 2023-03-12 23:11 explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/work/report.md
110 2023-03-12 23:11 explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_html_fragment/text_html.meta
399 2023-03-12 23:11 explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_html_fragment/text_html/explanation.html
26644 2023-03-12 23:11 explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_html_fragment/text_html/fi-class-0.png
2 2023-03-12 23:11 explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/model_problems/problems_and_actions.json
1165 2023-03-12 23:11 explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/log/explainer_run_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5.log
143 2023-03-12 23:11 explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_feature_importance/application_json.meta
163 2023-03-12 23:11 explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_feature_importance/application_vnd_h2oai_json_csv.meta
185 2023-03-12 23:11 explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_feature_importance/application_vnd_h2oai_json_datatable_jay.meta
632 2023-03-12 23:11 explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_feature_importance/application_vnd_h2oai_json_datatable_jay/feature_importance_class_0.jay
808 2023-03-12 23:11 explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_feature_importance/application_vnd_h2oai_json_datatable_jay/explanation.json
530 2023-03-12 23:11 explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_feature_importance/application_json/feature_importance_class_0.json
747 2023-03-12 23:11 explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_feature_importance/application_json/explanation.json
331 2023-03-12 23:11 explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_feature_importance/application_vnd_h2oai_json_csv/feature_importance_class_0.csv
746 2023-03-12 23:11 explainer_h2o_sonar_explainers_friedman_h_statistic_explainer_FriedmanHStatisticExplainer_7490c8b6-3ab0-4b7b-b401-e1efe8929fe5/global_feature_importance/application_vnd_h2oai_json_csv/explanation.json
--------- -------
431826 20 files
[ ]: