Shapley Values for Transformed Features of MOJO Models Demo

This example demonstrates how to interpret a Driverless AI MOJO model using the H2O Eval Studio library and retrieve the data and plot with transformed features importances.

[1]:
import os
import logging

import datatable
import daimojo
import webbrowser

from h2o_sonar import interpret
from h2o_sonar.lib.api import commons
from h2o_sonar.lib.api import explainers
from h2o_sonar.explainers import transformed_fi_shapley_explainer as explainer
from h2o_sonar.lib.api.models import ModelApi
[2]:
# explainer description
interpret.describe_explainer(explainer.ShapleyMojoTransformedFeatureImportanceExplainer)
[2]:
{'id': 'h2o_sonar.explainers.transformed_fi_shapley_explainer.ShapleyMojoTransformedFeatureImportanceExplainer',
 'name': 'ShapleyMojoTransformedFeatureImportanceExplainer',
 'display_name': 'Shapley Values for Transformed Features of MOJO Models',
 'description': 'Shapley explanations are a technique with credible theoretical support that presents consistent global and local variable contributions. Local numeric Shapley values are calculated by tracing single rows of data through a trained tree ensemble and aggregating the contribution of each input variable as the row of data moves through the trained ensemble. For regression tasks Shapley values sum to the prediction of the (Driverless AI) MOJO model. For classification problems, Shapley values sum to the prediction of the MOJO model before applying the link function. Global Shapley values are the average of the absolute local Shapley values over every row of a data set.',
 'model_types': ['iid', 'time_series'],
 'can_explain': ['regression', 'binomial', 'multinomial'],
 'explanation_scopes': ['global_scope', 'local_scope'],
 'explanations': [{'explanation_type': 'global-feature-importance',
   'name': 'GlobalFeatImpExplanation',
   'category': None,
   'scope': 'global',
   'has_local': None,
   'formats': []},
  {'explanation_type': 'local-feature-importance',
   'name': 'LocalFeatImpExplanation',
   'category': None,
   'scope': 'local',
   'has_local': None,
   'formats': []}],
 'parameters': [{'name': 'sample_size',
   'description': 'Sample size.',
   'comment': '',
   'type': 'int',
   'val': 100000,
   'predefined': [],
   'tags': [],
   'min_': 0.0,
   'max_': 0.0,
   'category': ''},
  {'name': 'calculate_predictions',
   'description': 'Score dataset and include predictions in the explanation (local explanations speed-up cache).',
   'comment': '',
   'type': 'bool',
   'val': False,
   'predefined': [],
   'tags': [],
   'min_': 0.0,
   'max_': 0.0,
   'category': ''},
  {'name': 'fast_approx_contrib',
   'description': 'Speed up predictions with fast contributions predictions approximation.',
   'comment': '',
   'type': 'bool',
   'val': True,
   'predefined': [],
   'tags': [],
   'min_': 0.0,
   'max_': 0.0,
   'category': ''}],
 'keywords': ['run-by-default',
  'explains-transformed_feature-importance',
  'is_fast']}

Interpretation

[3]:
# dataset
dataset_path = "../../data/creditcard.csv"
target_col = "default payment next month"

# model
mojo_path = "../../data/models/creditcard-binomial.mojo"
mojo_model = daimojo.model(mojo_path)
model = ModelApi().create_model(
    model_src=mojo_model,
    target_col=target_col,
    used_features=list(mojo_model.feature_names),
)

# results
results_location = "./results"
os.makedirs(results_location, exist_ok=True)
[4]:
interpretation = interpret.run_interpretation(
    dataset=dataset_path,
    model=model,
    target_col=target_col,
    results_location=results_location,
    explainers=[explainer.ShapleyMojoTransformedFeatureImportanceExplainer.explainer_id()],
    log_level=logging.INFO,
)
/home/dvorka/h/mli/git/h2o-sonar/.venv/lib/python3.8/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
h2o_sonar.explainers.transformed_fi_shapley_explainer.ShapleyMojoTransformedFeatureImportanceExplainer: progress 20.0%
h2o_sonar.explainers.transformed_fi_shapley_explainer.ShapleyMojoTransformedFeatureImportanceExplainer: progress 90.0%
h2o_sonar.explainers.transformed_fi_shapley_explainer.ShapleyMojoTransformedFeatureImportanceExplainer: progress 90.0%

Explainer Result

[5]:
# retrieve the result
result = interpretation.get_explainer_result(
    explainer.ShapleyMojoTransformedFeatureImportanceExplainer.explainer_id()
)
[6]:
# open interpretation HTML report in web browser
webbrowser.open(interpretation.result.get_html_report_location())
[6]:
True
[7]:
# summary
result.summary()
[7]:
{'id': 'h2o_sonar.explainers.transformed_fi_shapley_explainer.ShapleyMojoTransformedFeatureImportanceExplainer',
 'name': 'ShapleyMojoTransformedFeatureImportanceExplainer',
 'display_name': 'Shapley Values for Transformed Features of MOJO Models',
 'description': 'Shapley explanations are a technique with credible theoretical support that presents consistent global and local variable contributions. Local numeric Shapley values are calculated by tracing single rows of data through a trained tree ensemble and aggregating the contribution of each input variable as the row of data moves through the trained ensemble. For regression tasks Shapley values sum to the prediction of the (Driverless AI) MOJO model. For classification problems, Shapley values sum to the prediction of the MOJO model before applying the link function. Global Shapley values are the average of the absolute local Shapley values over every row of a data set.',
 'model_types': ['iid', 'time_series'],
 'can_explain': ['regression', 'binomial', 'multinomial'],
 'explanation_scopes': ['global_scope', 'local_scope'],
 'explanations': [{'explanation_type': 'global-feature-importance',
   'name': 'Shapley Values for Transformed Features of MOJO Models',
   'category': 'DAI MODEL',
   'scope': 'global',
   'has_local': 'local-feature-importance',
   'formats': ['application/vnd.h2oai.json+datatable.jay',
    'application/vnd.h2oai.json+csv',
    'application/json']},
  {'explanation_type': 'local-feature-importance',
   'name': 'Shapley Values for Transformed Features of MOJO Models',
   'category': 'DAI MODEL',
   'scope': 'local',
   'has_local': None,
   'formats': ['application/vnd.h2oai.json+datatable.jay']},
  {'explanation_type': 'global-html-fragment',
   'name': 'Shapley Values for Transformed Features of MOJO Models',
   'category': 'DAI MODEL',
   'scope': 'global',
   'has_local': None,
   'formats': ['text/html']}],
 'parameters': [{'name': 'sample_size',
   'description': 'Sample size.',
   'comment': '',
   'type': 'int',
   'val': 100000,
   'predefined': [],
   'tags': [],
   'min_': 0.0,
   'max_': 0.0,
   'category': ''},
  {'name': 'calculate_predictions',
   'description': 'Score dataset and include predictions in the explanation (local explanations speed-up cache).',
   'comment': '',
   'type': 'bool',
   'val': False,
   'predefined': [],
   'tags': [],
   'min_': 0.0,
   'max_': 0.0,
   'category': ''},
  {'name': 'fast_approx_contrib',
   'description': 'Speed up predictions with fast contributions predictions approximation.',
   'comment': '',
   'type': 'bool',
   'val': True,
   'predefined': [],
   'tags': [],
   'min_': 0.0,
   'max_': 0.0,
   'category': ''}],
 'keywords': ['run-by-default',
  'explains-transformed_feature-importance',
  'is_fast']}
[8]:
# parameters
result.params()
[8]:
{'sample_size': 100000,
 'calculate_predictions': False,
 'fast_approx_contrib': True}

Display Data

[9]:
result.data()
[9]:
featureimportance
▪▪▪▪▪▪▪▪▪▪▪▪
010_PAY_00.484314
111_PAY_20.177288
28_LIMIT_BAL0.14293
319_PAY_AMT40.125988
417_PAY_AMT20.109839
51_BILL_AMT10.0856685
612_PAY_30.0460469
718_PAY_AMT30.0405643
815_PAY_60.035129
92_BILL_AMT20.0307533
1013_PAY_40.0292232
116_BILL_AMT60.0230935
1214_PAY_50.0203799
1316_PAY_AMT10.014774
147_EDUCATION0.00983341
150_AGE0.00831316
169_MARRIAGE0.00725285
1721_PAY_AMT60.00588243
1820_PAY_AMT50.00541516
195_BILL_AMT50.00464642
204_BILL_AMT40.00144066
213_BILL_AMT30.000818275

Plot Feature Importance Data

[10]:
result.plot()

Save Explainer Log and Data

[11]:
# save the explainer log
log_file_path = "./feature-importance-demo.log"
result.log(path=log_file_path)
[12]:
!cat $log_file_path
[13]:
# save the explainer data
result.zip(file_path="./feature-importance-demo-archive.zip")
[14]:
!unzip -l feature-importance-demo-archive.zip
Archive:  feature-importance-demo-archive.zip
  Length      Date    Time    Name
---------  ---------- -----   ----
     3597  2022-10-10 22:56   explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/result_descriptor.json
      110  2022-10-10 22:56   explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/global_html_fragment/text_html.meta
      390  2022-10-10 22:56   explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/global_html_fragment/text_html/explanation.html
    32992  2022-10-10 22:56   explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/global_html_fragment/text_html/fi-class-0.png
        0  2022-10-10 22:56   explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/log/explainer_run_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a.log
  1826902  2022-10-10 22:56   explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/work/shapley_formatted.zip
  1842216  2022-10-10 22:56   explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/work/shapley.bin
  4883188  2022-10-10 22:56   explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/work/shapley.csv
      185  2022-10-10 22:56   explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/global_feature_importance/application_vnd_h2oai_json_datatable_jay.meta
      143  2022-10-10 22:56   explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/global_feature_importance/application_json.meta
      163  2022-10-10 22:56   explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/global_feature_importance/application_vnd_h2oai_json_csv.meta
     1499  2022-10-10 22:56   explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/global_feature_importance/application_vnd_h2oai_json_datatable_jay/explanation.json
      944  2022-10-10 22:56   explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/global_feature_importance/application_vnd_h2oai_json_datatable_jay/feature_importance_class_0.jay
      808  2022-10-10 22:56   explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/global_feature_importance/application_json/explanation.json
     1672  2022-10-10 22:56   explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/global_feature_importance/application_json/feature_importance_class_0.json
      807  2022-10-10 22:56   explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/global_feature_importance/application_vnd_h2oai_json_csv/explanation.json
      803  2022-10-10 22:56   explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/global_feature_importance/application_vnd_h2oai_json_csv/feature_importance_class_0.csv
        2  2022-10-10 22:56   explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/model_problems/problems_and_actions.json
      201  2022-10-10 22:56   explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/local_feature_importance/application_vnd_h2oai_json_datatable_jay.meta
      855  2022-10-10 22:56   explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/local_feature_importance/application_vnd_h2oai_json_datatable_jay/explanation.json
  1842216  2022-10-10 22:56   explainer_h2o_sonar_explainers_transformed_fi_shapley_explainer_ShapleyMojoTransformedFeatureImportanceExplainer_4aafdf97-d1d8-4a00-82d0-42dacf8ebb3a/local_feature_importance/application_vnd_h2oai_json_datatable_jay/feature_importance_class_0.jay
---------                     -------
 10439693                     21 files
[ ]: