Original Feature Importance Explainer for MOJO Models (Naive Shapley method) Demo

This example demonstrates how to interpret a Driverless AI MOJO model using the H2O Eval Studio library and retrieve the data and plot with original features importances.

[1]:
import os
import logging

import datatable
import daimojo
import webbrowser

from h2o_sonar import interpret
from h2o_sonar.lib.api import commons
from h2o_sonar.lib.api import explainers
from h2o_sonar.explainers import fi_naive_shapley_explainer as explainer
from h2o_sonar.lib.api.models import ModelApi
[2]:
# explainer description
interpret.describe_explainer(explainer.NaiveShapleyMojoFeatureImportanceExplainer)
[2]:
{'id': 'h2o_sonar.explainers.fi_naive_shapley_explainer.NaiveShapleyMojoFeatureImportanceExplainer',
 'name': 'NaiveShapleyMojoFeatureImportanceExplainer',
 'display_name': 'Shapley Values for Original Features of MOJO Models (Naive Method)',
 'description': 'Shapley values for original features of (Driverless AI) MOJO models are approximated from the accompanying Shapley values for transformed features with the Naive Shapley method. This method makes the assumption that input features to a transformer are independent. For example, if the transformed feature, feature1_feature2, has a Shapley value of 0.5, then the Shapley value of the original features feature1 and feature2 will be 0.25 each.',
 'model_types': ['iid'],
 'can_explain': ['regression', 'binomial', 'multinomial'],
 'explanation_scopes': ['global_scope', 'local_scope'],
 'explanations': [{'explanation_type': 'global-feature-importance',
   'name': 'GlobalFeatImpExplanation',
   'category': None,
   'scope': 'global',
   'has_local': None,
   'formats': []},
  {'explanation_type': 'local-feature-importance',
   'name': 'LocalFeatImpExplanation',
   'category': None,
   'scope': 'local',
   'has_local': None,
   'formats': []}],
 'parameters': [{'name': 'sample_size',
   'description': 'Sample size.',
   'comment': '',
   'type': 'int',
   'val': 100000,
   'predefined': [],
   'tags': [],
   'min_': 0.0,
   'max_': 0.0,
   'category': ''},
  {'name': 'fast_approx_contribs',
   'description': 'Speed up predictions with fast contributions predictions approximation.',
   'comment': '',
   'type': 'bool',
   'val': True,
   'predefined': [],
   'tags': [],
   'min_': 0.0,
   'max_': 0.0,
   'category': ''},
  {'name': 'leakage_warning_threshold',
   'description': 'The threshold above which to report a potentially detected feature importance leak problem.',
   'comment': '',
   'type': 'float',
   'val': 0.95,
   'predefined': [],
   'tags': [],
   'min_': 0.0,
   'max_': 0.0,
   'category': ''}],
 'keywords': ['run-by-default',
  'explains-original-feature-importance',
  'is_fast',
  'h2o-sonar']}

Interpretation

[10]:
# dataset
dataset_path = "../../data/creditcard.csv"
target_col = "default payment next month"

# model
mojo_path = "../../data/models/creditcard-binomial.mojo"
mojo_model = daimojo.model(mojo_path)
model = ModelApi().create_model(
    model_src=mojo_model,
    target_col=target_col,
    used_features=list(mojo_model.feature_names),
)

# results
results_location = "./results"
os.makedirs(results_location, exist_ok=True)
[11]:
interpretation = interpret.run_interpretation(
    dataset=dataset_path,
    model=model,
    target_col=target_col,
    results_location=results_location,
    explainers=[explainer.NaiveShapleyMojoFeatureImportanceExplainer.explainer_id()],
    log_level=logging.INFO,
)
h2o_sonar.explainers.fi_naive_shapley_explainer.NaiveShapleyMojoFeatureImportanceExplainer: progress 20.0%
h2o_sonar.explainers.fi_naive_shapley_explainer.NaiveShapleyMojoFeatureImportanceExplainer: progress 90.0%
h2o_sonar.explainers.fi_naive_shapley_explainer.NaiveShapleyMojoFeatureImportanceExplainer: progress 90.0%
../_images/notebooks_h2o-sonar-dai-original-feature-naive-shapley-importance-explainer_5_1.png

Explainer Result

[12]:
# retrieve the result
result = interpretation.get_explainer_result(
    explainer.NaiveShapleyMojoFeatureImportanceExplainer.explainer_id()
)
[13]:
# open interpretation HTML report in web browser
webbrowser.open(interpretation.result.get_html_report_location())
[13]:
True
[14]:
# summary
result.summary()
[14]:
{'id': 'h2o_sonar.explainers.fi_naive_shapley_explainer.NaiveShapleyMojoFeatureImportanceExplainer',
 'name': 'NaiveShapleyMojoFeatureImportanceExplainer',
 'display_name': 'Shapley Values for Original Features of MOJO Models (Naive Method)',
 'description': 'Shapley values for original features of (Driverless AI) MOJO models are approximated from the accompanying Shapley values for transformed features with the Naive Shapley method. This method makes the assumption that input features to a transformer are independent. For example, if the transformed feature, feature1_feature2, has a Shapley value of 0.5, then the Shapley value of the original features feature1 and feature2 will be 0.25 each.',
 'model_types': ['iid'],
 'can_explain': ['regression', 'binomial', 'multinomial'],
 'explanation_scopes': ['global_scope', 'local_scope'],
 'explanations': [{'explanation_type': 'global-feature-importance',
   'name': 'Shapley Values for Original Features of MOJO Models (Naive Method)',
   'category': 'DAI MODEL',
   'scope': 'global',
   'has_local': 'local-feature-importance',
   'formats': ['application/vnd.h2oai.json+datatable.jay',
    'application/vnd.h2oai.json+csv',
    'application/json']},
  {'explanation_type': 'local-feature-importance',
   'name': 'Shapley Values for Original Features of MOJO Models (Naive Method)',
   'category': 'DAI MODEL',
   'scope': 'local',
   'has_local': None,
   'formats': ['application/vnd.h2oai.json+datatable.jay']},
  {'explanation_type': 'global-html-fragment',
   'name': 'Shapley Values for Original Features of MOJO Models (Naive Method)',
   'category': 'DAI MODEL',
   'scope': 'global',
   'has_local': None,
   'formats': ['text/html']}],
 'parameters': [{'name': 'sample_size',
   'description': 'Sample size.',
   'comment': '',
   'type': 'int',
   'val': 100000,
   'predefined': [],
   'tags': [],
   'min_': 0.0,
   'max_': 0.0,
   'category': ''},
  {'name': 'fast_approx_contribs',
   'description': 'Speed up predictions with fast contributions predictions approximation.',
   'comment': '',
   'type': 'bool',
   'val': True,
   'predefined': [],
   'tags': [],
   'min_': 0.0,
   'max_': 0.0,
   'category': ''}],
 'keywords': ['run-by-default',
  'explains-original-feature-importance',
  'is_fast',
  'h2o-sonar']}
[15]:
# parameters
result.params()
[15]:
{'sample_size': 100000, 'fast_approx_contribs': True}

Display Data

[16]:
result.data()
[16]:
featureimportance
▪▪▪▪▪▪▪▪▪▪▪▪
0PAY_00.484314
1PAY_20.177288
2LIMIT_BAL0.14293
3PAY_AMT40.125988
4PAY_AMT20.109839
5BILL_AMT10.0856685
6PAY_30.0460469
7PAY_AMT30.0405643
8PAY_60.035129
9BILL_AMT20.0307533
10PAY_40.0292232
11BILL_AMT60.0230935
12PAY_50.0203799
13PAY_AMT10.014774
14EDUCATION0.00983341
15AGE0.00831316
16MARRIAGE0.00725285
17PAY_AMT60.00588243
18PAY_AMT50.00541516
19BILL_AMT50.00464642
20BILL_AMT40.00144066
21BILL_AMT30.000818275

Plot Feature Importance Data

[17]:
result.plot()
../_images/notebooks_h2o-sonar-dai-original-feature-naive-shapley-importance-explainer_14_0.png

Save Explainer Log and Data

[18]:
# save the explainer log
log_file_path = "./feature-importance-demo.log"
result.log(path=log_file_path)
[19]:
!cat $log_file_path
[20]:
# save the explainer data
result.zip(file_path="./feature-importance-demo-archive.zip")
[21]:
!unzip -l feature-importance-demo-archive.zip
Archive:  feature-importance-demo-archive.zip
  Length      Date    Time    Name
---------  ---------- -----   ----
     2991  2023-03-12 23:02   explainer_h2o_sonar_explainers_fi_naive_shapley_explainer_NaiveShapleyMojoFeatureImportanceExplainer_6b2f1f72-b743-4ffd-84ec-b2f8383153ca/result_descriptor.json
  1762120  2023-03-12 23:02   explainer_h2o_sonar_explainers_fi_naive_shapley_explainer_NaiveShapleyMojoFeatureImportanceExplainer_6b2f1f72-b743-4ffd-84ec-b2f8383153ca/work/shapley.orig.feat.bin
  1742542  2023-03-12 23:02   explainer_h2o_sonar_explainers_fi_naive_shapley_explainer_NaiveShapleyMojoFeatureImportanceExplainer_6b2f1f72-b743-4ffd-84ec-b2f8383153ca/work/shapley_formatted_orig_feat.zip
  4693127  2023-03-12 23:02   explainer_h2o_sonar_explainers_fi_naive_shapley_explainer_NaiveShapleyMojoFeatureImportanceExplainer_6b2f1f72-b743-4ffd-84ec-b2f8383153ca/work/shapley.orig.feat.csv
    40216  2023-03-12 23:02   explainer_h2o_sonar_explainers_fi_naive_shapley_explainer_NaiveShapleyMojoFeatureImportanceExplainer_6b2f1f72-b743-4ffd-84ec-b2f8383153ca/work/y_hat.bin
      110  2023-03-12 23:02   explainer_h2o_sonar_explainers_fi_naive_shapley_explainer_NaiveShapleyMojoFeatureImportanceExplainer_6b2f1f72-b743-4ffd-84ec-b2f8383153ca/global_html_fragment/text_html.meta
      378  2023-03-12 23:02   explainer_h2o_sonar_explainers_fi_naive_shapley_explainer_NaiveShapleyMojoFeatureImportanceExplainer_6b2f1f72-b743-4ffd-84ec-b2f8383153ca/global_html_fragment/text_html/explanation.html
    24441  2023-03-12 23:02   explainer_h2o_sonar_explainers_fi_naive_shapley_explainer_NaiveShapleyMojoFeatureImportanceExplainer_6b2f1f72-b743-4ffd-84ec-b2f8383153ca/global_html_fragment/text_html/fi-class-0.png
      201  2023-03-12 23:02   explainer_h2o_sonar_explainers_fi_naive_shapley_explainer_NaiveShapleyMojoFeatureImportanceExplainer_6b2f1f72-b743-4ffd-84ec-b2f8383153ca/local_feature_importance/application_vnd_h2oai_json_datatable_jay.meta
  1762120  2023-03-12 23:02   explainer_h2o_sonar_explainers_fi_naive_shapley_explainer_NaiveShapleyMojoFeatureImportanceExplainer_6b2f1f72-b743-4ffd-84ec-b2f8383153ca/local_feature_importance/application_vnd_h2oai_json_datatable_jay/feature_importance_class_0.jay
      831  2023-03-12 23:02   explainer_h2o_sonar_explainers_fi_naive_shapley_explainer_NaiveShapleyMojoFeatureImportanceExplainer_6b2f1f72-b743-4ffd-84ec-b2f8383153ca/local_feature_importance/application_vnd_h2oai_json_datatable_jay/explanation.json
    40216  2023-03-12 23:02   explainer_h2o_sonar_explainers_fi_naive_shapley_explainer_NaiveShapleyMojoFeatureImportanceExplainer_6b2f1f72-b743-4ffd-84ec-b2f8383153ca/local_feature_importance/application_vnd_h2oai_json_datatable_jay/y_hat.bin
        2  2023-03-12 23:02   explainer_h2o_sonar_explainers_fi_naive_shapley_explainer_NaiveShapleyMojoFeatureImportanceExplainer_6b2f1f72-b743-4ffd-84ec-b2f8383153ca/model_problems/problems_and_actions.json
        0  2023-03-12 23:02   explainer_h2o_sonar_explainers_fi_naive_shapley_explainer_NaiveShapleyMojoFeatureImportanceExplainer_6b2f1f72-b743-4ffd-84ec-b2f8383153ca/log/explainer_run_6b2f1f72-b743-4ffd-84ec-b2f8383153ca.log
      143  2023-03-12 23:02   explainer_h2o_sonar_explainers_fi_naive_shapley_explainer_NaiveShapleyMojoFeatureImportanceExplainer_6b2f1f72-b743-4ffd-84ec-b2f8383153ca/global_feature_importance/application_json.meta
      163  2023-03-12 23:02   explainer_h2o_sonar_explainers_fi_naive_shapley_explainer_NaiveShapleyMojoFeatureImportanceExplainer_6b2f1f72-b743-4ffd-84ec-b2f8383153ca/global_feature_importance/application_vnd_h2oai_json_csv.meta
      185  2023-03-12 23:02   explainer_h2o_sonar_explainers_fi_naive_shapley_explainer_NaiveShapleyMojoFeatureImportanceExplainer_6b2f1f72-b743-4ffd-84ec-b2f8383153ca/global_feature_importance/application_vnd_h2oai_json_datatable_jay.meta
      880  2023-03-12 23:02   explainer_h2o_sonar_explainers_fi_naive_shapley_explainer_NaiveShapleyMojoFeatureImportanceExplainer_6b2f1f72-b743-4ffd-84ec-b2f8383153ca/global_feature_importance/application_vnd_h2oai_json_datatable_jay/feature_importance_class_0.jay
     1245  2023-03-12 23:02   explainer_h2o_sonar_explainers_fi_naive_shapley_explainer_NaiveShapleyMojoFeatureImportanceExplainer_6b2f1f72-b743-4ffd-84ec-b2f8383153ca/global_feature_importance/application_vnd_h2oai_json_datatable_jay/explanation.json
     1593  2023-03-12 23:02   explainer_h2o_sonar_explainers_fi_naive_shapley_explainer_NaiveShapleyMojoFeatureImportanceExplainer_6b2f1f72-b743-4ffd-84ec-b2f8383153ca/global_feature_importance/application_json/feature_importance_class_0.json
      578  2023-03-12 23:02   explainer_h2o_sonar_explainers_fi_naive_shapley_explainer_NaiveShapleyMojoFeatureImportanceExplainer_6b2f1f72-b743-4ffd-84ec-b2f8383153ca/global_feature_importance/application_json/explanation.json
      726  2023-03-12 23:02   explainer_h2o_sonar_explainers_fi_naive_shapley_explainer_NaiveShapleyMojoFeatureImportanceExplainer_6b2f1f72-b743-4ffd-84ec-b2f8383153ca/global_feature_importance/application_vnd_h2oai_json_csv/feature_importance_class_0.csv
      577  2023-03-12 23:02   explainer_h2o_sonar_explainers_fi_naive_shapley_explainer_NaiveShapleyMojoFeatureImportanceExplainer_6b2f1f72-b743-4ffd-84ec-b2f8383153ca/global_feature_importance/application_vnd_h2oai_json_csv/explanation.json
---------                     -------
 10075385                     23 files
[ ]: