h2o_sonar.lib.api package

Submodules

h2o_sonar.lib.api.commons module

class h2o_sonar.lib.api.commons.Branding(value)

Bases: Enum

Branding.

EVAL_STUDIO = 2
H2O_SONAR = 1
class h2o_sonar.lib.api.commons.CommonInterpretationParams(model, models, dataset, target_col: str, validset='', testset='', use_raw_features: bool = '', weight_col: str = '', prediction_col: str = '', drop_cols: list | None = [], sample_num_rows: int | None = 0, results_location: str = '', used_features: list | None = None, extra_params: list | None = None)

Bases: object

PARAM_DATASET = <h2o_sonar.lib.api.commons.Param object>
PARAM_DROP_COLS = <h2o_sonar.lib.api.commons.Param object>
PARAM_MODEL = <h2o_sonar.lib.api.commons.Param object>
PARAM_MODELS = <h2o_sonar.lib.api.commons.Param object>
PARAM_PREDICTION_COL = <h2o_sonar.lib.api.commons.Param object>
PARAM_RESULTS_LOCATION = <h2o_sonar.lib.api.commons.Param object>
PARAM_SAMPLE_NUM_ROWS = <h2o_sonar.lib.api.commons.Param object>
PARAM_TARGET_COL = <h2o_sonar.lib.api.commons.Param object>
PARAM_TESTSET = <h2o_sonar.lib.api.commons.Param object>
PARAM_USED_FEATURES = <h2o_sonar.lib.api.commons.Param object>
PARAM_USE_RAW_FEATURES = <h2o_sonar.lib.api.commons.Param object>
PARAM_VALIDSET = <h2o_sonar.lib.api.commons.Param object>
PARAM_WEIGHT_COL = <h2o_sonar.lib.api.commons.Param object>
clone() CommonInterpretationParams
describe_config_item(config_item_name: str) Param | None
describe_config_items() dict[str, Param]
dump() dict
static load(d: dict) CommonInterpretationParams
to_dict() dict

Safe string-friendly serialization to dictionary.

class h2o_sonar.lib.api.commons.ConfigItem(name: str = '', description: str = '', comment: str = '', type: str = '', val: Any = '', predefined: list | None = None, tags: list | None = None, min_: float = 0.0, max_: float = 0.0, category: str = '')

Bases: object

KEY_CATEGORY = 'category'
KEY_COMMENT = 'comment'
KEY_DESCRIPTION = 'description'
KEY_MAX = 'max_'
KEY_MIN = 'min_'
KEY_NAME = 'name'
KEY_PREDEFINED = 'predefined'
KEY_TAGS = 'tags'
KEY_TYPE = 'type'
KEY_VAL = 'val'
clone() ConfigItem
dump(portable: bool = False) dict
static load(d: dict) ConfigItem
make_portable() ConfigItem
class h2o_sonar.lib.api.commons.EvaluatorParamType(value)

Bases: Enum

Evaluators parameters.

bool = ExplainerParamType.bool
customlist = ExplainerParamType.customlist
describe()
dict = ExplainerParamType.dict
float = ExplainerParamType.float
int = ExplainerParamType.int
list = ExplainerParamType.list
multilist = ExplainerParamType.multilist
str = ExplainerParamType.str
class h2o_sonar.lib.api.commons.EvaluatorToRun(evaluator_id: str, params: str | dict = None, extra_params: list | None = None)

Bases: ExplainerToRun

class h2o_sonar.lib.api.commons.ExperimentType(value)

Bases: Enum

Experiment types.

binomial = 2
multinomial = 3
regression = 1
class h2o_sonar.lib.api.commons.ExplainerFilter

Bases: object

List explainers filters

BLUEPRINT_INPUT_NAME = 'blueprint_input_name'
EXPLAINER_ID = 'explainer_id'
IID: str = 'iid'
IMAGE: str = 'image'
REQUIRES_PREDICT: str = 'requires_predict_method'
TIME_SERIES: str = 'time_series'
UNSUPERVISED: str = 'unsupervised'
class h2o_sonar.lib.api.commons.ExplainerJobStatus(value)

Bases: Enum

ABORTED_BY_RESTART = 4
ABORTED_BY_USER = 3
CANCELLED = 1
FAILED = 2
FINISHED = 0
IN_PROGRESS = -1
RUNNING = -1
SCHEDULED = -3
SUCCESS = 0
SYNCING = -4
TIMED_OUT = 5
UNKNOWN = -2
static from_int(status_code: int) ExplainerJobStatus
static is_job_failed(status: ExplainerJobStatus) bool
static is_job_finished(status: ExplainerJobStatus) bool
static is_job_running(status: ExplainerJobStatus) bool
to_string(status_code: int)
class h2o_sonar.lib.api.commons.ExplainerParamKey

Bases: object

KEY_ALL_EXPLAINERS_PARAMS = 'explainers_params'
KEY_DATASET = 'dataset'
KEY_DESCR_PATH = 'result_descriptor_path'
KEY_EXPERIMENT_TYPE = 'experiment_type'
KEY_E_DEPS = 'explainer_dependencies'
KEY_E_ID = 'explainer_id'
KEY_E_JOB_KEY = 'explainer_job_key'
KEY_E_PARAMS = 'explainer_params'
KEY_FEATURES_META = 'features_metadata'
KEY_I_DATA_PATH = 'interpretation_data_path'
KEY_KWARGS = 'pk'
KEY_LEGACY_I_PARAMS = 'legacy_i_params'
KEY_MODEL = 'model'
KEY_MODEL_TYPE = 'model_type'
KEY_ON_DEMAND = 'on_demand_explanation'
KEY_ON_DEMAND_MLI_KEY = 'on_demand_mli_key'
KEY_ON_DEMAND_PARAMS = 'on_demand_params'
KEY_PARAMS = 'params'
KEY_RUN_KEY = 'run_key'
KEY_TESTSET = 'testset'
KEY_USER = 'user'
KEY_VALIDSET = 'validset'
KEY_WORKER_NAME = 'worker_name'
class h2o_sonar.lib.api.commons.ExplainerParamType(value)

Bases: Enum

Explainer parameters.

bool = 1
customlist = 7
dict = 8
float = 3
int = 2
list = 5
multilist = 6
str = 4
class h2o_sonar.lib.api.commons.ExplainerToRun(explainer_id: str, params: str | dict = None, extra_params: list | None = None)

Bases: object

Parametrized explainer (to run) - ID and explainer parameters (dictionary, JSon string or any format explainer is able to process).

clone() ExplainerToRun
dump() dict
static load(d: dict) ExplainerToRun
class h2o_sonar.lib.api.commons.ExplanationScope(value)

Bases: Enum

Explanation scope.

global_scope = 2
local_scope = 1
class h2o_sonar.lib.api.commons.FilterEntry(filter_by: str = '', value=None)

Bases: object

KEY_FILTER_BY = 'filter_by'
KEY_VALUE = 'value'
clone() FilterEntry
dump() dict
static load(d: dict) FilterEntry
class h2o_sonar.lib.api.commons.InterpretationParamType(value)

Bases: Enum

any = 9
bool = 1
customlist = 7
dict = 8
float = 3
int = 2
list = 5
multilist = 6
str = 4
class h2o_sonar.lib.api.commons.Keyword(key: str, name: str, description: str)

Bases: object

Keyword.

class h2o_sonar.lib.api.commons.KeywordGroup(prefix: str, name: str, description: str, keywords: list[Keyword] | None = None)

Bases: object

Keyword groups.

is_member(keywords: list[str]) bool

Check if the entity (evaluator, explainer, method) with given keywords is a member of this keyword group.

class h2o_sonar.lib.api.commons.KeywordGroups(groups: list[KeywordGroup] = None)

Bases: object

Keyword groups.

add_group(group: KeywordGroup)
get_group(prefix: str) KeywordGroup | None
class h2o_sonar.lib.api.commons.LlmModelHostType(value)

Bases: Enum

RAG = 2
SERVICE = 1
class h2o_sonar.lib.api.commons.LookAndFeel

Bases: object

BLUE_THEME = 'blue'
COLORMAP_BLUE_2_RED = ['#00AAEE', '#FF1166']
COLORMAP_WHITE_2_BLACK = ['#ffffff', '#000000']
COLORMAP_YELLOW_2_BLACK = ['#fec925', '#000000']
COLOR_BLACK = '#000000'
COLOR_DAI_GREEN = '#bbc600'
COLOR_H2OAI_YELLOW = '#fec925'
COLOR_HOT_ORANGE = '#fd5800'
COLOR_MATPLOTLIB_BLUE = '#3b74b4'
COLOR_RED = '#ff0000'
COLOR_WHITE = '#ffffff'
DRIVERLESS_AI_THEME = 'driverless_ai'
FORMAT_HEXA = 'hexa'
H2O_SONAR_THEME = 'h2o_sonar'
KEY_LF = 'look_and_feel'
THEME_2_BG_COLOR = {'blue': '#ffffff', 'driverless_ai': '#000000', 'h2o_sonar': '#ffffff'}
THEME_2_COLORMAP = {'blue': ['#00AAEE', '#FF1166'], 'driverless_ai': ['#fec925', '#000000'], 'h2o_sonar': ['#fec925', '#000000']}
THEME_2_FG_COLOR = {'blue': '#3b74b4', 'driverless_ai': '#fec925', 'h2o_sonar': '#fec925'}
THEME_2_LINE_COLOR = {'blue': '#000000', 'driverless_ai': '#ffffff', 'h2o_sonar': '#000000'}
static get_bg_color(theme: str)
static get_colormap(colormap_data: list[str] | str = '', theme: str = '')

Get Matplotlib colormap.

Parameters:
colormap_datalist[str] | str

Create color map either from the list of two colors (string hexadecimal color specification) or by color map name.

themestr

H2O Sonar theme to create color map based on the theme.

matplotlib.colors.Colormap

Color map.

static get_fg_color(theme: str)
static get_line_color(theme: str)
class h2o_sonar.lib.api.commons.MetricMeta(key: str, display_name: str = '', data_type: str = 'float', display_format: str = '.4f', description: str = '', value_range: tuple[float, float] | None = (0.0, 1.0), value_enum: list[str] | None = None, higher_is_better: bool = True, threshold: float | None = 0.5, is_primary_metric: bool = True, parent_metric: str = '', exclude: bool = False)

Bases: object

Evaluation/explanation metric metadata.

DATA_TYPE_SECONDS = 'seconds'
EFFECTIVE_INF_FLOAT = 1234567.89
EFFECTIVE_INF_FLOAT_INT = 1234567.0
EFFECTIVE_INF_INT = 1234567
KEY_DATA_TYPE = 'data_type'
KEY_DESCRIPTION = 'description'
KEY_DISPLAY_FORMAT = 'display_value'
KEY_DISPLAY_NAME = 'display_name'
KEY_EXCLUDE = 'exclude'
KEY_HIGHER_IS_BETTER = 'higher_is_better'
KEY_IS_PRIMARY_METRIC = 'is_primary_metric'
KEY_KEY = 'key'
KEY_PARENT_METRIC = 'parent_metric'
KEY_THRESHOLD = 'threshold'
KEY_VALUE_ENUM = 'value_enum'
KEY_VALUE_RANGE = 'value_range'
static clone(metric: MetricMeta, primary: bool = True) MetricMeta

Clone the metric metadata, optionally changing the primary flag.

copy() MetricMeta
dump(portable: bool = False) dict
static from_dict(data: dict | tuple) MetricMeta
static is_metric_flip(old_value: float, new_value: float, metric_meta: MetricMeta) bool

Did metric score flip between old and new value?

Returns:
bool

True if metric score flip, False otherwise.

static load(data: dict) MetricMeta
to_dict(threshold: float | None = None, portable: bool = False) dict
to_md(to_rst: bool = False) str
class h2o_sonar.lib.api.commons.MetricsMeta(metrics: list[MetricMeta] = None)

Bases: object

KEY_META = 'metadata'
add_metric(metric: MetricMeta)
clone() MetricsMeta
contains(key: str) bool
copy_with_overrides(metric_key_to_overrides: dict) MetricsMeta

Copy metrics meta with updated:

  • display names

  • descriptions

  • exclude flag

Parameters:
metric_key_to_overridesdict

Dictionary with metric key to overrides mapping - map: metric key -> field key -> new value

Returns:
MetricsMeta

Copy of the metrics meta with updated display names and descriptions.

dump(portable: bool = False) list
static from_dict(metrics_meta: dict | list) MetricsMeta
get_metric(key: str) MetricMeta | None
get_metric_best_value(key: str) float | None

Get the best value for the metric.

get_metric_description(key: str) str
get_metric_keys() list[str]
get_metric_worst_value(key: str) float | None

Get the worst value for the metric.

get_primary_metric() MetricMeta | None

Return the metric which is marked as primary metric.

get_threshold(key: str, default_value=None) float | None
is_higher_better(key: str) bool
is_metric_passed(key: str, value: float) bool
static load(metrics_meta: list) MetricsMeta
make_portable() MetricsMeta
set_threshold(threshold: float, key: str = '')
size() int
to_dict(threshold: float | None = None) dict
to_list() list[MetricMeta]
class h2o_sonar.lib.api.commons.MimeType

Bases: object

EXT_CSV = 'csv'
EXT_DATATABLE = 'jay'
EXT_DOCX = 'docx'
EXT_HTML = 'html'
EXT_JPG = 'jpg'
EXT_JSON = 'json'
EXT_MARKDOWN = 'md'
EXT_PNG = 'png'
EXT_SVG = 'svg'
EXT_TEXT = 'txt'
EXT_ZIP = 'zip'
MIME_CSV = 'text/csv'
MIME_DATATABLE = 'application/vnd.h2oai.datatable.jay'
MIME_DOCX = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
MIME_EVALSTUDIO_MARKDOWN = 'application/vnd.h2oai-evalstudio-leaderboard.markdown'
MIME_HTML = 'text/html'
MIME_IMAGE = 'image/xyz'
MIME_JPG = 'image/jpeg'
MIME_JSON = 'application/json'
MIME_JSON_CSV = 'application/vnd.h2oai.json+csv'
MIME_JSON_DATATABLE = 'application/vnd.h2oai.json+datatable.jay'
MIME_MARKDOWN = 'text/markdown'
MIME_MODEL_PIPELINE = 'application/vnd.h2oai.pipeline+zip'
MIME_PDF = 'application/pdf'
MIME_PNG = 'image/png'
MIME_SVG = 'image/svg+xml'
MIME_TEXT = 'text/plain'
MIME_ZIP = 'application/zip'
static ext_for_mime(mime: str)
class h2o_sonar.lib.api.commons.ModelTypeExplanation

Bases: object

IID: str = 'iid'
IMAGE: str = 'image'
LLM: str = 'llm'
RAG: str = 'rag'
TIME_SERIES: str = 'time_series'
UNSUPERVISED: str = 'unsupervised'
class h2o_sonar.lib.api.commons.Param(param_name: str, param_type: ParamType | InterpretationParamType | ExplainerParamType, description: str = '', default_value='', value_min: float = 0.0, value_max: float = 0.0, predefined: list | None = None, tags: list | None = None)

Bases: object

Generic parameter used as (predecessor) of library, interpretation and explainer parameters.

as_descriptor() ConfigItem

Explainer parameter to descriptor conversion.

class h2o_sonar.lib.api.commons.ParamType(value)

Bases: Enum

bool = 1
customlist = 7
dict = 8
float = 3
int = 2
list = 5
multilist = 6
str = 4
class h2o_sonar.lib.api.commons.PerturbationIntensity(value)

Bases: Enum

EXTREME = 6
HIGH = 4
LOW = 2
MEDIUM = 3
VERY_HIGH = 5
VERY_LOW = 1
class h2o_sonar.lib.api.commons.PerturbatorToRun(perturbator_id: str, intensity: str | PerturbationIntensity = PerturbationIntensity.MEDIUM, params: str | dict = None)

Bases: object

Parametrized perturbator (to run).

KEYWORD_INTENSITY = 'intensity'
KEYWORD_PARAMS = 'params'
KEYWORD_PERTURBATOR_ID = 'perturbator_id'
clone() PerturbatorToRun
dump() dict
static load(d: dict) PerturbatorToRun
class h2o_sonar.lib.api.commons.ResourceHandle(connection_key: str, resource_key: str, version: str = '')

Bases: object

H_CONNECTION: str = 'connection'
H_KEY: str = 'key'
H_PREFIX: str = 'resource:'
H_VERSION: str = 'version'
static is_handle(handle) bool
static parse_string_handle(loc_str: str) tuple[str, str, str]

Parse CLI argument into connection, resource key and version.

class h2o_sonar.lib.api.commons.ResourceLocatorType(value)

Bases: Enum

Resource locator types.

handle = 2
local = 1
class h2o_sonar.lib.api.commons.SafeJavaScript

Bases: object

Safe JavaScript datastructures de/serialization.

INF = 'Infinity'
NAN = 'NaN'
NEG_INF = '-Infinity'
static decode_to_float(obj)
class h2o_sonar.lib.api.commons.SemVer(major: int, minor: int, patch: int)

Bases: object

static from_int_list(version_list: list[int])

Semantic version from the list of 3 integers.

Returns:
SemVer | None

Instance of SemVer class if valid version, None otherwise.

static from_str(version: str)

Parse a semantic version <major>.<minor>.<patch>.

Returns:
SemVer | None

Instance of SemVer class if valid version, None otherwise.

class h2o_sonar.lib.api.commons.UpdateGlobalExplanation

Bases: object

Update mode: merge (to add new explanations) or replace (default).

OPT_CLASS: str = 'class'

Driverless AI/common parameters source: inherit or request (default).

OPT_FEATURE: str = 'feature'
OPT_INHERIT: str = 'inherit'
OPT_MERGE: str = 'merge'
OPT_REPLACE: str = 'replace'
OPT_REQUEST: str = 'request'
PARAMS_SOURCE: str = 'params_source'
UPDATE_MODE: str = 'update_mode'
UPDATE_SCOPE: str = 'update_scope'
h2o_sonar.lib.api.commons.add_string_list(items: list | None, add_items: list | None) list

Robust list handling of features to drop, process, use, skip, …

h2o_sonar.lib.api.commons.base_pkg(obj)

Get base package for given Python object.

Parameters:
objPython object
Returns:
str

Base package of Python object and sub-package, e.g., sklearn or ensemble.

h2o_sonar.lib.api.commons.generate_key() str
h2o_sonar.lib.api.commons.harmonic_mean(xs) float
h2o_sonar.lib.api.commons.is_ncname(s: str) bool
h2o_sonar.lib.api.commons.is_port_used(hostname: str = '127.0.0.1', port: int = 12345, service_name='Driverless AI', timeout=15, logger=None) bool
h2o_sonar.lib.api.commons.is_valid_key(key: str) bool
h2o_sonar.lib.api.commons.opt_import_err_msg(pckg_names: list[str] | str) str

Generate optional package import error message.

Parameters:
pckg_nameslist[str] | str

Name or list of names of the required packages.

Returns:
str

Generated error message.

h2o_sonar.lib.api.commons.port_float(v: float | tuple, portable: bool = False) float | tuple

Make float portable by replacing inf/-inf with large/small finite values.

h2o_sonar.lib.api.commons.raise_opt_import_err(pckg_names: list[str] | str) None

Raise optional package import error.

Parameters:
pckg_nameslist[str] | str

Name or list of names of the required packages.

Raises:
ImportError

Always.

h2o_sonar.lib.api.datasets module

h2o_sonar.lib.api.explainers module

class h2o_sonar.lib.api.explainers.Explainer

Bases: object

Explainer.

Explainer instance is NOT meant to be reusable i.e. the instance must be created using default constructor, initialized using setup() method and used at most once - fit() method invocation.

Explainer lifecycle:

  • constructor() Explainer instantiation (for external basic/sanity checks, …). Note that explainer constructor executed by H2O Sonar runtime must not have parameters.

  • check_compatibility(params) -> bool Explainer check verifying that explainer will be able to explain given model. If compatibility check returns False or raises error, then it will not be run. Compatibility check is optional and does not have to be run by the engine.

  • setup(params) Set required and optional parameters, configuration, etc.

  • fit(X, y) Optional step to train surrogate model(s) or another explainer means. Method gets data needed for training/creation/initialization. This step might be skipped in case that explainer doesn’t need it.

  • explain*(X, y) -> [explainer] Actual computation (persistence and upload) of explainer(s) of given data(set). Explanation might be provided by value or reference (in case it would not fit in memory).

  • get_explanation(type, format) Get (cached/persisted) explanations in desired format.

Attributes:
model: Optional[ExplainerModel]

Instance of ExplainerModel class which has predict and fit functions of the model to be explained. These methods can be used to create predictions using the model/scorer.

persistence: Optional[ExplainerPersistence] = None

Instance of ExplainerPersistence class which provides convenient methods to persist explainer data e.g. to its working directory.

params: Optional[CommonExplainerParameters] = None

Common explainers parameters specified on explainer run like target column or columns to drop.

explainer_params: str

This explainer specific parameters specified on explainer run.

logger:

Explainer’s logger.

config:

Driverless AI server configuration copy.

ARG_EXPLAINER_PARAMS = 'explainer_params_as_str'
EXPLAINERS_PURPOSES = ['explains-dataset', 'explains-approximate-behavior', 'explains-original-feature-importance', 'explains-transformed-feature-importance', 'explains-feature-behavior', 'explains-fairness', 'explains-model-debugging', 'explains-model']
KEYWORD_COMPLIANCE_TEST = 'compliance-test'
KEYWORD_DEFAULT = 'run-by-default'
KEYWORD_EVALUATES_LLM = 'evaluates_llm'
KEYWORD_EVALUATES_RAG = 'evaluates_rag'
KEYWORD_EXPLAINS_APPROX_BEHAVIOR = 'explains-approximate-behavior'
KEYWORD_EXPLAINS_DATASET = 'explains-dataset'
KEYWORD_EXPLAINS_FAIRNESS = 'explains-fairness'
KEYWORD_EXPLAINS_FEATURE_BEHAVIOR = 'explains-feature-behavior'
KEYWORD_EXPLAINS_MODEL_DEBUGGING = 'explains-model-debugging'
KEYWORD_EXPLAINS_O_FEATURE_IMPORTANCE = 'explains-original-feature-importance'
KEYWORD_EXPLAINS_T_FEATURE_IMPORTANCE = 'explains-transformed-feature-importance'
KEYWORD_EXPLAINS_UNKNOWN = 'explains-model'
KEYWORD_H2O_MODEL_VALIDATION = 'h2o-model-validation'
KEYWORD_H2O_SONAR = 'h2o-sonar'
KEYWORD_IS_FAST = 'is_fast'
KEYWORD_IS_SLOW = 'is_slow'
KEYWORD_LLM = 'llm'
KEYWORD_MOCK = 'mock'
KEYWORD_NLP = 'nlp'
KEYWORD_PREFIX_CAPABILITY = 'capability'
KEYWORD_PREFIX_EXPLAINS = 'explains'
KEYWORD_PROXY = 'proxy-explainer'
KEYWORD_REQUIRES_H2O3 = 'requires-h2o3'
KEYWORD_REQUIRES_OPENAI_KEY = 'requires-openai-api-key'
KEYWORD_RQ_AA = 'requires_actual_answer'
KEYWORD_RQ_C = 'requires_constraints'
KEYWORD_RQ_EA = 'requires_expected_answer'
KEYWORD_RQ_J = 'requires_llm_judge'
KEYWORD_RQ_P = 'requires_prompts'
KEYWORD_RQ_RC = 'requires_retrieved_context'
KEYWORD_TEMPLATE = 'template'
KEYWORD_UNLISTED = 'unlisted'
add_insight(insight: InsightAndAction)

Add an evaluated/interpreted model(s) insight identified by explain() method.

Parameters:
insightinsights.InsightAndAction

Insight to be added.

add_problem(problem: ProblemAndAction)

Add an evaluated/interpreted model(s) problem identified by explain() method.

Parameters:
problemproblems.ProblemAndAction

Model problem to be added.

as_descriptor(runtime_view: bool = False, portable: bool = False) ExplainerDescriptor

Explainer descriptor as PROTO entity.

Parameters:
runtime_view: bool

Not all descriptor fields (like parameters declaration) are needed in runtime (for instance they are needed before running explainer), therefore they might be skipped in runtime view.

portablebool

If True, then floats (infinity, NaN) and tuples are converted to be portable - from strings to max/min values of respective types.

Returns:
ExplainerDescriptor:

Explainer descriptor.

property brief_description
classmethod can_explain(model_meta: ExplainableModelMeta = None, experiment_type: ExperimentType = None) bool

Return True if explainer can fit either given Driverless AI model’s type or Driverless AI experiment type.

check_compatibility(params: CommonInterpretationParams | None = None, **explainer_params) bool

Explainer’s check (based on parameters) verifying that explainer will be able to explain a given model. If this compatibility check returns False or raises error, then it will not be run by the engine. This check may, but does not have to be performed by the execution engine.

check_required_modules(required_modules: set[str] | None = None)

Check whether modules specified in self._modules_needed_by_name are imported.

Parameters:
required_moduleslist[str] | None

If defined, then modules specified in the parameter are checked, else self._modules_needed_by_name is checked.

Returns:
bool

True if all modules are available, False otherwise.

classmethod class_brief_description()
classmethod class_description()
classmethod class_display_name()
property class_name
classmethod class_tagline()
create_explanation_workdir_archive(display_name: str = '', display_category: str = '') WorkDirArchiveExplanation

Easily create working directory archive with ZIP of explanations representations.

Parameters:
display_name: str

Display name e.g. to be used for naming tile in UI.

display_category: str

Display category e.g. to be used for naming tab in UI.

dataset_api: DatasetApi | None
dataset_meta: ExplainableDatasetMeta | None
property dependencies: list[type[Explainer]]
classmethod depends_on() list
property description
destroy(**destroy_params)

Override to release resources created by the explainer (DB entities, files, running processes, …) depending on explainer runtime/container.

property display_name
classmethod evaluator_id() str
expected_custom_class

alias of Explainer

explain(X, y=None, explanations_types: list = None, **kwargs) list

Invoke this method to calculate and persist global, local or both type of explanation(s) for given data(set). This method implementation to be overridden by child class (this class implementation). This method is responsible for the calculations, build and persistence of explanations.

Xdatatable.Frame

Dataset frame.

y :

Labels.

explanations_types: list[Type[Explanation]]

Optional explanations to be built. All will be built if empty list or None provided. Get all supported types using has_explanation_types().

Returns:
list[Explanation]:

Explanations descriptors.

explain_global(X, y=None, **kwargs) list

Execute explainer to calculate on-demand global explanations. This method is expected to be overridden if explainer doesn’t pre-compute global explanations and/or needs to update global explanation after initial computation. Default implementation just returns global instance explanations computed by explain() method.

X :

Data frame.

y :

Labels.

Returns:
list[Explanation]:

Explanations.

explain_insights() list[InsightAndAction]

Determine (calculate or get persisted insights identified by explain() method) interpreted/evaluated model(s) problems.

Returns:
list[InsightAndAction]:

Interpreted/evaluated model(s) insights.

explain_local(X, y=None, **kwargs) list

Execute explainer to calculate on-demand local explanations. This method is expected to be overridden if explainer doesn’t pre-compute local explanations. Default implementation just returns local instance explanations computed by explain() method.

X :

Data frame.

y :

Labels.

Returns:
list[Explanation]:

Explanations.

explain_problems() list[ProblemAndAction]

Determine (calculate or get persisted problems identified by explain() method) interpreted/evaluated model(s) problems.

Returns:
list[ProblemAndAction]:

Interpreted/evaluated model(s) problems.

explainer_deps: dict | None
classmethod explainer_id() str
explainer_params: dict | None
explainer_params_as_dict() dict | None
explainer_params_as_str: str | None
classmethod explainer_version()
explains_binary() bool
explains_multiclass() bool
explains_regression() bool
property explanations: dict | None

Explanations created by this explainer.

fit(X, y=None, **kwargs)

Optionally, build/train explainer (model) and explainer prerequisites. This method implementation to be overridden by child class (this class implementation). It may be empty if explainer doesn’t have to be built.

Parameters:
X

Data frame.

y

Labels.

get_explanations(explanation_types: list) list

Get instance explanations representations in given format.

Parameters:
explanation_types: list[Type[Explanation]]

Explanation type to return - must be one of explanations declared (supported) by explainer. Returns all supported explanations if None or empty.

Returns:
list[Explanation]:

Explanations by value or reference.

get_result() type[ExplainerResult] | None
classmethod has_explanation_scopes() list[str]
classmethod has_explanation_types() list[type[Explanation]]

Explanation types supported by the explainer.

classmethod has_explanations() list[str]

Experiment types this explainer explains.

classmethod has_model_type_explanations() list[str]
static is_enabled() bool

Return True in case that explainer is enabled, else False which will make explainer to be completely ignored (unlisted, not loaded, not executed).

classmethod is_iid() bool
classmethod is_image() bool
classmethod is_llm() bool
classmethod is_rag() bool
classmethod is_time_series() bool
classmethod is_unsupervised() bool
key: str | None
property keywords: list[str]
static load(explainer_path: str | None = None)

Load pickled explainer snapshot.

static load_descriptor(descriptor_path: str, persistence: Persistence | None) ExplainerDescriptor
log_name: str
logger: SonarLogger | None
classmethod metrics_meta() MetricsMeta
mli_key: str | None
model: ExplainableModel | ExplainableModelHandle | None
model_api: ModelApi | None
model_meta: ExplainableModelMeta | None
classmethod parameters() list[ExplainerParam]
params: CommonInterpretationParams | None
persistence: ExplainerPersistence | None
classmethod priority() float

Priority used to order explainers by sequential execution scheduler. Higher number, higher priority.

report_progress(progress: float, message: str = '', precision: int = 1)

Report explainer progress in [0, 1] range and message (“” removes previous message, None keeps previous message).

classmethod requires_model() bool
classmethod requires_predict_method() bool
classmethod requires_preloaded_predictor() bool
run_explain(X, y, explanations_types: list = None, **kwargs) dict

Execute explainer to calculate (persist and upload) explanations(s) of a given model.

This method invokes explainer implementation of explain() and then performs explanation verifications and eventual later actions. It is invoked by explainer execution engine (can add code to be executed before/after explain() overridden by child classes).

Explanation might be provided by value or reference (in case it would not fit in memory).

Parameters:
X

Data frame.

y

Labels.

explanations_types: list[Type[Explanation]]

Explanation types to build. All will be built if empty list or None provided. Get all supported types using has_explanation_types().

Returns:
list[Explanation]:

Explanations.

run_explain_global(X, y=None, **kwargs) list

Execute explainer to calculate (persist and upload) global explanation(s).

This method invokes explainer implementation explain_global() and then performs explanations verifications and eventual subsequent actions. It is invoked by explainer execution engine (can add code to be executed before/after explain_global() overridden by child classes).

Parameters:
X

Data frame.

y

Labels.

Returns:
list[Explanation]:

Explanations.

run_explain_local(X, y=None, **kwargs) list

Execute explainer to calculate (persist and upload) local explanation(s).

This method invokes explainer implementation explain_local() and then performs explanations verifications and eventual subsequent actions. It is invoked by explainer execution engine (can add code to be executed before/after explain_local() overridden by child classes).

Parameters:
X

Data frame.

y

Labels.

Returns:
list[Explanation]:

Explanations.

run_fit(X, y=None, **kwargs)

Build explainer and explainer prerequisites.

This is a method invoked by explainer execution engine (can add code to be executed before/after fit() overridden by child classes).

Parameters:
X

Data frame.

y

Labels.

save(explainer_path: str | None = None)

Save explainer snapshot pickle.

static save_descriptor(descriptor_path: str, descriptor: ExplainerDescriptor, persistence: Persistence | None)
setup(model: ExplainableModel | ExplainableModelHandle | None, persistence: ExplainerPersistence, models=None, key: str = '', params: CommonInterpretationParams | None = None, explainer_params_as_str: str | None = '', dataset_api: DatasetApi | None = None, model_api: ModelApi | None = None, logger: SonarLogger | None = None, **explainer_params) None

Set all the parameters needed to execute fit() and explain().

Parameters:
model

Explainable model with (fit and) score methods (or None if 3rd party).

models

(Explainable) models.

persistence: ExplainerPersistence

Persistence API allowing (controlled) saving and loading of explanations.

key: str

Optional (given) explainer run key (generated otherwise).

params: CommonInterpretationParams

Common explainers parameters specified on explainer run.

explainer_params_as_str: str | None

Explainer specific parameters in string representation.

dataset_apidatasets.DatasetApi | None

Dataset API to create custom explainable datasets needed by this explainer.

model_apiOptional[m4s.ModelApi]

Model API to create custom explainable models needed by this explainer.

loggerloggers.SonarLogger | None

Logger.

explainer_params:

Other explainers RUNTIME parameters, options, and configuration.

classmethod supports_dataset_locator(locator: ResourceLocatorType) bool
classmethod supports_model_locator(locator: ResourceLocatorType) bool
property tagline
testset_meta: ExplainableDatasetMeta | None
validate_explanations() bool

Optional method which can be used to verify integrity of explanations.

Returns:
bool:

Returns True if explanations are valid, False otherwise.

validset_meta: ExplainableDatasetMeta | None
property working_dir: str

Working directory path where explainer can store any data it needs.

class h2o_sonar.lib.api.explainers.ExplainerArgs(parameters: list[ExplainerParam] = None)

Bases: object

Explainer arguments ~ parameter values.

add_parameter(param_type: ExplainerParam)
as_descriptor() list

Save parameters as descriptor: [{‘parameter’: {‘type’: ‘str’}}]

from_config_overrides(config_overrides: dict, erase: list[str] | None = None) dict

Try to get all arguments which are declared as parameters from given config overrides and set (or overwrite) in args.

Parameters:
config_overrides: dict

Config overrides as dictionary.

erase: list[str] | None

Parameters to erase from config overrides.

from_dict(args_dict: dict, erase: list[str] | None = None) dict

Try to get all arguments which are declared as parameters from given dictionary and set (or overwrite) in args. Erase given parameters - arguments dictionary is not cloned, but modified.

get(param_name: str, default_value=None)
static json_str_to_dict(json_str: str, logger=None) dict
parameters: list[ExplainerParam]
static resolve_local_paging_args(args: dict, explainer_name: str = '', logger=None)

Resolve local explanation paging arguments.

resolve_params(explainer_params: dict | None = None)

Resolve explainer’s self.parameters (arguments) as follows to self.args.

Parameters:
explainer_params: dict | None

Explainer parameters as dictionary.

static toml_str_to_dict(toml_str: str, logger=None) dict
class h2o_sonar.lib.api.explainers.ExplainerDescriptor(id: str, name: str = '', display_name: str = '', tagline: str = '', description: str = '', brief_description: str = '', model_types: list[str] | None = None, can_explain: list[str] | None = None, explanation_scopes: list[str] | None = None, explanations: list[ExplanationDescriptor] | None = None, parameters: list[ConfigItem] | None = None, keywords: list[str] | None = None, metrics_meta: MetricsMeta | None = None, portable: bool = False)

Bases: object

KEY_BRIEF_DESCRIPTION = 'brief_description'
KEY_CAN_EXPLAIN = 'can_explain'
KEY_DESCRIPTION = 'description'
KEY_DISPLAY_NAME = 'display_name'
KEY_EXPLANATIONS = 'explanations'
KEY_EXPLANATION_SCOPES = 'explanation_scopes'
KEY_ID = 'id'
KEY_KEYWORDS = 'keywords'
KEY_METRICS_META = 'metrics_meta'
KEY_MODEL_TYPES = 'model_types'
KEY_NAME = 'name'
KEY_PARAMETERS = 'parameters'
KEY_TAGLINE = 'tagline'
clone() ExplainerDescriptor
dump(portable: bool = False) dict
static load(d: dict) ExplainerDescriptor
class h2o_sonar.lib.api.explainers.ExplainerParam(param_name: str, param_type: ExplainerParamType | EvaluatorParamType, description: str = '', comment: str = '', default_value: bool | str | float = '', value_min: float = 0.0, value_max: float = 0.0, predefined: list | None = None, tags: list | None = None, category: str = '', src: str = '')

Bases: Param

Explainer parameter declaration.

SRC_ANY = 'any'
SRC_CONFIG_OVERRIDES = 'config_overrides'
SRC_CONFIG_OVERRIDES_ERASE = 'config_overrides_erase'
SRC_EVALUATOR_PARAMS = 'evaluator_params'
SRC_EXPLAINER_PARAMS = 'explainer_params'
TAG_SRC_DATASET_COLUMN_NAMES = 'SOURCE_DATASET_COLUMN_NAMES'
TAG_SRC_DATASET_TEXT_COLUMN_NAMES = 'SOURCE_DATASET_TEXT_COLUMN_NAMES'
as_descriptor(portable: bool = False) ConfigItem

Explainer parameter to descriptor conversion.

class h2o_sonar.lib.api.explainers.ExplainerRegistry(singleton_create_key)

Bases: object

Explainer registry provides list of available OOTB and (registered) explainers.

get_class(explainer_id) type[Explainer] | None
list_explainers() dict
load()

Load registry from configuration.

register(explainer_class, explainer_id: str = '') str
classmethod registry()
save()
unregister(explainer_id: str) str
class h2o_sonar.lib.api.explainers.ExplainerResult(persistence: ExplainerPersistence, explainer_id: str, explanation_format: type[ExplanationFormat] | None, explanation: type[Explanation] | None, h2o_sonar_config, logger=None)

Bases: ABC

abstractmethod data(**kwargs) Frame
classmethod help() dict[str, dict[str, list[dict[str, str | bool]]]]
log(*, path)
params() dict
persistence: ExplainerPersistence
abstractmethod plot(**kwargs)
summary() dict
zip(*, file_path)
class h2o_sonar.lib.api.explainers.OnDemandExplainKey

Bases: object

On-demand explainer run parameters keys.

CLASS = 'class'
EXPLAINER_JOB_KEY = 'target_explainer_job_key'
EXPLANATION_TYPE = 'target_explanation_type'
FEATURE = 'feature'
FORMAT = 'target_format'
METHOD = 'method'
MLI_KEY = 'target_mli_key'
ROW = 'row'
UPDATE_STRATEGY = 'update_strategy'
class h2o_sonar.lib.api.explainers.OnDemandExplainMethod(value)

Bases: Enum

explain = 1
explain_global = 2
explain_local = 3
class h2o_sonar.lib.api.explainers.SurrogateExplainer

Bases: Explainer, ABC

Surrogate model explainer.

KEYWORD_SURROGATE = 'surrogate'
abstractmethod predict(X, y=None, **kwargs)

Surrogate explainer provides predict method allowing to get predictions from the surrogate model. This method to be overridden by child classes.

Parameters:
X

Data frame.

y

Labels.

run_predict(X, y=None, **kwargs)

Surrogate explainer provides predict method allowing to get predictions from the surrogate model.

This is method invoked by explainer execution engine (can add code to be executed before/after fit() overridden by child classes).

Parameters:
X

Data frame.

y

Labels.

h2o_sonar.lib.api.explanations module

h2o_sonar.lib.api.formats module

class h2o_sonar.lib.api.formats.CsvFormatCustomExplanationFormat(explanation, frame: Frame, frame_file: str, persistence: Persistence | None = None)

Bases: ExplanationFormat

add_data(format_data: Frame, file_name: str | None = None)

Add TEXT data as new explanation representation file. Child classes with binary data to override this class.

Parameters:
format_data:

Data to store as new explanation’s format file.

file_name: str

Representation file name or file relative path.

get_data(file_name: str | None = None)
mime: str = 'text/csv'
class h2o_sonar.lib.api.formats.CustomArchiveZipFormat(explanation, format_file: str, persistence: Persistence | None = None)

Bases: ExplanationFormat, GrammarOfMliFormat

Custom ZIP archive representation.

mime: str = 'application/zip'
class h2o_sonar.lib.api.formats.CustomCsvFormat(explanation, frame: Frame, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Representation of custom JSon format.

mime: str = 'text/csv'
static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.CustomJsonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Representation of custom JSon format.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') dict

Load index file and check parameters.

Returns:
dict:

Index file as dictionary.

mime: str = 'application/json'
static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.DatatableCustomExplanationFormat(explanation, frame: Frame, frame_file: str, persistence: Persistence | None = None)

Bases: ExplanationFormat

add_data(format_data: Frame, file_name: str | None = None)

Add TEXT data as new explanation representation file. Child classes with binary data to override this class.

Parameters:
format_data:

Data to store as new explanation’s format file.

file_name: str

Representation file name or file relative path.

get_data(file_name: str | None = None)
mime: str = 'application/vnd.h2oai.datatable.jay'
class h2o_sonar.lib.api.formats.DiaTextFormat(explanation, format_data: str, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Disparate Impact Analysis (DIA) text representation.

mime: str = 'text/plain'
static validate_data(dt_data: Frame)
class h2o_sonar.lib.api.formats.DocxFormat(explanation, format_file: str, persistence: Persistence | None = None)

Bases: ExplanationFormat, GrammarOfMliFormat

Open docx document.

mime: str = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
static validate_data(dt_data: Frame)
class h2o_sonar.lib.api.formats.EvalStudioMarkdownFormat(explanation, format_file: str, extra_format_files: list | None = None, persistence: Persistence | None = None)

Bases: MarkdownFormat

EvalStudio Markdown representation (text and images).

mime: str = 'application/vnd.h2oai-evalstudio-leaderboard.markdown'
class h2o_sonar.lib.api.formats.ExplanationFormat(explanation, format_data, format_file: str | None, extra_format_files: list | None = None, file_extension: str = 'bin', persistence: Persistence | None = None)

Bases: ABC

Base class of explanation representation.

Representation is serialization of explanation in a format like JSon or CSV. Representation has a MIME type. It can be formed by one or more files, but at least one file must be provided.

DEFAULT_PAGE_SIZE = 20
FEATURE_TYPE_CAT = 'categorical'
FEATURE_TYPE_CAT_NUM = 'catnum'
FEATURE_TYPE_DATE = 'date'
FEATURE_TYPE_DATETIME = 'datetime'
FEATURE_TYPE_NUM = 'numeric'
FEATURE_TYPE_TIME = 'time'
FILE_PREFIX_EXPLANATION_IDX = 'explanation.'
KEYWORD_RESIDUALS = 'residuals'
KEY_ACTION = 'action'
KEY_ACTION_TYPE = 'action_type'
KEY_ACTUAL = 'actual'
KEY_BIAS = 'bias'
KEY_CATEGORICAL = 'categorical'
KEY_DATA = 'data'
KEY_DATA_HISTOGRAM = 'data_histogram'
KEY_DATA_HISTOGRAM_CAT = 'data_histogram_categorical'
KEY_DATA_HISTOGRAM_NUM = 'data_histogram_numerical'
KEY_DATE = 'date'
KEY_DATE_TIME = 'datetime'
KEY_DEFAULT_CLASS = 'default_class'
KEY_DOC = 'documentation'
KEY_EXPLAINER_JOB_KEY = 'explainer_job_key'
KEY_FEATURES = 'features'
KEY_FEATURE_TYPE = 'feature_type'
KEY_FEATURE_VALUE = 'feature_value'
KEY_FILES = 'files'
KEY_FILES_DETAILS = 'files_details'
KEY_FILES_NUMCAT_ASPECT = 'files_numcat_aspect'
KEY_FULLNAME = 'full_name'
KEY_ID = 'id'
KEY_IS_MULTI = 'is_multinomial'
KEY_ITEM_ORDER = 'order'
KEY_KEYWORDS = 'keywords'
KEY_LABEL = 'label'
KEY_METADATA = 'metadata'
KEY_METRICS = 'metrics'
KEY_MIME = 'mime'
KEY_MLI_KEY = 'mli_key'
KEY_NAME = 'name'
KEY_NUMERIC = 'numeric'
KEY_ON_DEMAND = 'on_demand'
KEY_ON_DEMAND_PARAMS = 'on_demand_params'
KEY_PAGE_OFFSET = 'page_offset'
KEY_PAGE_SIZE = 'page_size'
KEY_RAW_FEATURES = 'raw_features'
KEY_ROWS_PER_PAGE = 'rows_per_page'
KEY_RUNNING_ACTION = 'running-action'
KEY_SCOPE = 'scope'
KEY_SYNC_ON_DEMAND = 'synchronous_on_demand_exec'
KEY_TIME = 'time'
KEY_TOTAL_ROWS = 'total_rows'
KEY_VALUE = 'value'
KEY_Y_FILE = 'y_file'
LABEL_REGRESSION = 'None (Regression)'
SCOPE_GLOBAL = 'global'
SCOPE_LOCAL = 'local'
add_data(format_data: str, file_name: str | None = None)

Add TEXT data as new explanation representation file. Child classes with binary data to override this class.

Parameters:
format_data:

Data to store as new explanation’s format file.

file_name: str

Representation file name or file relative path.

add_file(format_file: str, file_name: str | None = None) str

Copy file to representation as new explanation representation file.

Parameters:
format_file:

Source file to store (copy) as new explanation’s format file.

file_name: str

Representation file name or file relative path.

property explanation
property file_names: list[str]

Get file names which form the representation.

Hints:

  • representation is formed by flat structure of files without directories

  • representation data are not kept in memory - list of file names is sufficient

get_data(file_name: str | None = None)
classmethod get_local_explanation(persistence: ExplainerPersistence, explanation_type: str, row: int, explanation_filter: list[FilterEntry], **extra_params) str

Get local explanation for given dataset row and feature/class/… specified by explanation filter. Local explanation is returned as string.

classmethod get_page(persistence: ExplainerPersistence, explanation_type: str, page_offset: int, page_size: int, result_format: str, explanation_filter: list[FilterEntry]) str

Get global explanation page.

property index_file_name: str

Get (mandatory) index file name which typically references all other files along with various metadata.

classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:
bool:

True if the representation is calculated on demand.

dict:

On-demand calculation parameters.

classmethod is_paged(persistence: ExplainerPersistence, explanation_type: str) bool

Returns True in case that representation supports paging.

classmethod load_meta(persistence: ExplainerPersistence, explanation_type: str, explanation_format: str) dict

Load representation metadata with class identifier and MIME.

mime: str = None
update_data(format_data: str, file_name: str | None = None)
class h2o_sonar.lib.api.formats.ExplanationFormatUtils

Bases: object

static get_page(data, page_offset: int, page_size: int)

Get page of given data.

Parameters:
data:

Data to page.

page_offset: int

Positive integer or 0 with page offset.

page_size: int

Page size, returns all data entries if 0 or negative integer.

class h2o_sonar.lib.api.formats.Global3dDataJSonCsvFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Representation of global 3D data (3D bar charts, heatmaps, …) as CSV files with JSon index.

JSon representation index file example:

{
    "features": {
        "PAY_0 and AGE": {
            "order": 0,
            "feature_names: ["PAY_0", "AGE"],
            "files": {
                "red_class": "data3d_feature_0_class_0.csv"
                "green_class": "data3d_feature_0_class_1.csv"
                "blue_class": "data3d_feature_0_class_2.csv"
            }
        },
        ...
    },
    "metrics": [{"R2": 0.96}, {"RMSE": 0.03}],
    "documentation": "PD for 2 features..."
}

CSV representation data file example:

,feature_1_bin_1,feature_1_bin_2,feature_1_bin_3
feature_2_bin_1,1,1,1
feature_2_bin_2,2,2,2
feature_2_bin_3,3,3,3
KEY_FEATURE_NAMES = 'feature_names'
classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') dict
mime: str = 'application/vnd.h2oai.json+csv'
static serialize_index_file(features: list[str], features_names: list[list[str]], classes: list[str], default_class: str = '', metrics: list | None = None, keywords: list | None = None, doc: str = '', data_file_prefix: str = 'data3d', data_file_suffix: str = 'csv', y_file: str | None = None) tuple[dict, str]

JSon index file serialization to string.

Parameters:
featureslist

Feature tuples.

features_nameslist

Per-feature tuple feature names.

classeslist

Classes.

default_classstr

Class to be shown as default (the first one) e.g. the class of interest in case of binomial experiment interpretation.

metricslist

Optional list of metrics e.g. [{"RMSE": 0.02}, {"SD": 3.1}]

keywordslist[str]

Optional list of keywords indicating representation features, properties and aspects.

docstr

Chart documentation.

data_file_prefixstr

Prefix for data file names.

data_file_suffixstr

Suffix for data file names.

y_filestr

Predictions file.

Returns:
Tuple[dict, str]

Dictionary with mapping of features and classes to file names AND JSon serialization (as string).

static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.Global3dDataJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Representation of global 3D data (3D bar charts, heatmaps, …) as JSon.

JSon representation index file example:

{
    "features": {
        "PAY_0 and AGE": {
            "order": 0,
            "feature_names: ["PAY_0", "AGE"],
            "files": {
                "red_class": "data3d_feature_0_class_0.json"
                "green_class": "data3d_feature_0_class_1.json"
                "blue_class": "data3d_feature_0_class_2.json"
            }
        },
        ...
    },
    "metrics": [{"R2": 0.96}, {"RMSE": 0.03}],
    "documentation": "PD for 2 features..."
}

JSon representation data file example:

"data_dictionary": {
    {
        "feature_1_bin_1": {
            "feature_2_bin_1": 1,
            "feature_2_bin_2": 2,
            "feature_2_bin_3": 3
        },
        "feature_1_bin_2": {
            "feature_2_bin_1": 1,
            "feature_2_bin_2": 2,
            "feature_2_bin_3": 3
        },
        "feature_1_bin_3": {
            "feature_2_bin_1": 1,
            "feature_2_bin_2": 2,
            "feature_2_bin_3": 3
        }
    }
}

Where:

  • data_dictionary is dictionary which might be used to easily construct data frame where column and row labels represent bin values

  • data key is not intentionally used to be used in the future for Grammar of MLI/Vega friendly representations (like in case of other formats).

KEY_FEATURE_NAMES = 'feature_names'
classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') dict
mime: str = 'application/json'
static serialize_index_file(features: list[str], features_names: list[list[str]], classes: list[str], default_class: str = '', metrics: list | None = None, keywords: list | None = None, doc: str = '', data_file_prefix: str = 'data3d', data_file_suffix: str = 'json', y_file: str | None = None) tuple[dict, str]

JSon index file serialization to string.

Parameters:
featureslist

Feature tuples.

features_nameslist

Per-feature tuple feature names.

classeslist

Classes.

default_classstr

Class to be shown as default (the first one) e.g. the class of interest in case of binomial experiment interpretation.

metricslist

Optional list of metrics e.g. [{"RMSE": 0.02}, {"SD": 3.1}]

keywordslist[str]

Optional list of keywords indicating representation features, properties and aspects.

docstr

Chart documentation.

data_file_prefixstr

Prefix for data file names.

data_file_suffixstr

Suffix for data file names.

y_filestr

Predictions file.

Returns:
Tuple[dict, str]

Dictionary with mapping of features and classes to file names AND JSon serialization (as string).

static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.GlobalDtJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat, GrammarOfMliFormat

Representation of decision tree as JSon.

JSon representation index file example:

{
    "files": {
        "red_class": "dt_class_0.json"
        "green_class": "dt_class_1.json"
        "blue_class": "dt_class_2.json"
        ...
    },
    "metrics": [
      {"Training RMSE": 0.96},
      {"CV RMSE": 0.97},
      {"NFolds": 3},
      {"R2": 0.96}
    ]
}

JSon representation data file example:

 1{
 2     data: [
 3         {
 4           key: str,
 5           name: str,
 6           parent: str,
 7           edge_in: str,
 8           edge_weight: num,
 9           leaf_path: bool,
10           total_weight: num,
11           weight: num,
12         }+
13     ]
14}
KEY_CHILDREN = 'children'
KEY_EDGE_IN = 'edge_in'
KEY_EDGE_WEIGHT = 'edge_weight'
KEY_KEY = 'key'
KEY_LEAF_PATH = 'leaf_path'
KEY_NAME = 'name'
KEY_PARENT = 'parent'
KEY_TOTAL_WEIGHT = 'total_weight'
KEY_WEIGHT = 'weight'
class TreeNode(name: str, parent: Any | None, edge_in: str | None, edge_weight: float | None, total_weight: float | None, weight: float | None, leaf_path: bool = False, key: str = '0')

Bases: object

to_dict() dict
classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') dict
mime: str = 'application/json'
static serialize_data_file(dt_root_node) str

JSon data file serialization to string.

Parameters:
dt_root_node: TreeNode

Object representation root node.

Returns:
str:

Data file serialization.

static serialize_index_file(classes: list[str], default_class: str = '', metrics: list | dict | None = None, doc: str = '', data_file_prefix: str = 'dt', data_file_suffix: str = 'json') tuple[dict, str]

JSon index file serialization to string.

Parameters:
classes: list

Classes.

default_class: str

Class to be shown as default (the first one) e.g. the class of interest in case of binomial experiment interpretation.

metrics: list

Optional list of PD related metrics e.g. [{"RMSE": 0.02}, {"SD": 3.1}] in case of binomial/regression or dictionary (per class key, metrics list as value) in case of multinomial.

doc: str

Documentation.

data_file_prefix: str

Prefix for data file names.

data_file_suffix: str

Suffix for data file names.

Returns:
Tuple[dict, str]:

Dictionary with mapping of classes to file names AND JSon serialization (as string).

static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.GlobalFeatImpDatatableFormat(explanation, frame: Frame, frame_file: str | None, persistence: Persistence | None = None)

Bases: DatatableCustomExplanationFormat

Global feature importance datatable representation.

Canonical representation (datatable frame, ltypes):

| Required column    | Type  | Description        |
|--------------------|-------|--------------------|
| feature_name       | str   | Feature name.      |
| feature_importance | real  | Feature importance |

… other optional columns are allowed

COL_IMPORTANCE = 'feature_importance'
COL_NAME = 'feature_name'
static from_lists(explanation, features: list, importances: list)
mime: str = 'application/vnd.h2oai.datatable.jay'
static validate_data(frame_data: Frame) Frame
class h2o_sonar.lib.api.formats.GlobalFeatImpJSonCsvFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: GlobalFeatImpJSonDatatableFormat

static from_json_datatable(json_dt_format: GlobalFeatImpJSonDatatableFormat) GlobalFeatImpJSonCsvFormat
mime: str = 'application/vnd.h2oai.json+csv'
static serialize_index_file(classes: list[str], default_class: str = '', metrics: list | None = None, doc: str = '', total_rows: int | None = None, data_file_prefix: str = 'feature_importance', data_file_suffix: str = 'csv') tuple[dict, str]
static validate_data(json_data)
class h2o_sonar.lib.api.formats.GlobalFeatImpJSonDatatableFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Global feature importance JSon (index file) and datatable (data files) representation.

The typical use of JSon+datatable feature importance representation:

featImpJsonDt = GlobalFeatImpJSonDatatableFormat(...create...)
# ... get other representations for free:
featImpJSon = GlobalFeatImpJSonFormat.fromJSonDatatable(featImpJsonDt)
featImpJSonCsv = GlobalFeatImpJSonCsvSonFormat.fromJSonDatatable(featImpJsonDt)

JSon representation index file example:

{
    "files": {
        "red_class": "feature_importance_class_0.jay"
        "green_class": "feature_importance_class_1.jay"
        "blue_class": "feature_importance_class_2.jay"
        ...
    },
    "metrics": [{"R2": 0.96}, {"RMSE": 0.03}],
    "total_rows": 592,
}

Datatable representation data file spec (datatable frame, ltypes; other optional columns are allowed):

| Required column    | Type  | Description                           |
|--------------------|-------|---------------------------------------|
| feature_name       | str   | Feature name.                         |
| feature_importance | real  | Feature importance                    |
| global_scope       | bool  | Global/local feature importance scope |

Datatable representation data file example:

   | feature_name  feature_importance  global_scope
-- + ------------  ------------------  ------------
 0 | feature-a                    1.1             1
 1 | feature-b                    2.2             1
COL_GLOBAL_SCOPE = 'global_scope'
COL_IMPORTANCE = 'feature_importance'
COL_NAME = 'feature_name'
add_data_frame(format_data: Frame, file_name: str | None = None)
static dict_to_data_frame(feature_importances: dict[str, float], scope: str = 'global') Frame

(Typical) feature importance dictionary to data frame conversion.

Parameters:
feature_importances: dict

Feature importances as dictionary of feature name to importance.

scope: str

global or local.

Returns:
dt.Frame:

Data file.

static from_lists(explanation, features: list, importances: list)
get_data(file_name: str | None = None)
classmethod get_page(persistence: ExplainerPersistence, explanation_type: str, page_offset: int, page_size: int, result_format: str, explanation_filter: list[FilterEntry]) str

Get global explanation page.

classmethod is_paged(persistence: ExplainerPersistence, explanation_type: str) bool

Returns True in case that representation supports paging.

mime: str = 'application/vnd.h2oai.json+datatable.jay'
static serialize_index_file(classes: list[str], default_class: str = '', metrics: list | None = None, doc: str = '', total_rows: int | None = None, data_file_prefix: str = 'feature_importance', data_file_suffix: str = 'jay') tuple[dict, str]
static validate_data(json_data)
class h2o_sonar.lib.api.formats.GlobalFeatImpJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat, GrammarOfMliFormat

Representation of global feature importance explanation as JSon.

JSon representation index file example:

{
    "files": {
        "red_class": "feature_importance_class_0.json"
        "green_class": "feature_importance_class_1.json"
        "blue_class": "feature_importance_class_2.json"
        ...
    },
    "metrics": [{"R2": 0.96}, {"RMSE": 0.03}],
    "total_rows": 592,
}

JSon representation data file example:

{
    data: [
        {
            label: str,
            value: num,
            scope: str,
        }+
    ]
    bias: num
}

Where:

  • label is feature name

  • value is feature importance

  • scope is either local or global

KEY_LABEL = 'label'
KEY_VALUE = 'value'
static from_dataframe_to_json(frame: Frame, bias_col: str = None) str
static from_json_datatable(json_dt_format: GlobalFeatImpJSonDatatableFormat, bias_col: str = None) GlobalFeatImpJSonFormat
classmethod get_global_explanation(persistence: ExplainerPersistence, explanation_type: str) str

Get global feature importance explanation.

Parameters:
persistence:

Persistence object initialized for explainer/MLI run.

explanation_type: str

Explanation type ~ explanation ID.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') dict

Load index file and check parameters.

Returns:
dict:

Index file as dictionary.

mime: str = 'application/json'
static serialize_data_file(feature_importances: dict[str, float], scope: str = 'global', bias: float | None = None) str

JSon data file serialization to string.

Parameters:
feature_importances: dict

Feature importances as dictionary of feature name to importance.

scope: str

global or local.

bias: optional str

Bias value.

Returns:
str:

Data file serialization.

static serialize_index_file(classes: list[str], default_class: str = '', metrics: list | None = None, keywords: list | None = None, doc: str = '', total_rows: int | None = None, data_file_prefix: str = 'feature_importance', data_file_suffix: str = 'json') tuple[dict, str]

JSon index file serialization to string.

Parameters:
classes: list

Classes.

default_class: str

Class to be shown as default (the first one) e.g. the class of interest in case of binomial experiment interpretation.

metrics: list

Optional list of PD related metrics e.g. [{"RMSE": 0.02}, {"SD": 3.1}].

keywordslist[str]

Optional list of keywords indicating representation features, properties and aspects.

doc: str

Documentation.

total_rows: int

Total number of rows (which can be used for pagination).

data_file_prefix: str

Prefix for data file names.

data_file_suffix: str

Suffix for data file names.

Returns:
Tuple[dict, str]:

Dictionary with mapping of classes to file names AND JSon serialization (as string).

static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.GlobalGroupedBarChartJSonDatatableFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Global grouped bar chart JSon (index file) and datatable (data files) representation.

COL_X = 'x'
COL_Y_GROUP_1 = 'y_group_1'
COL_Y_GROUP_2 = 'y_group_2'
add_data_frame(format_data: Frame, file_name: str | None = None)
get_data(file_name: str | None = None)
classmethod is_paged(persistence: ExplainerPersistence, explanation_type: str) bool

Returns True in case that representation supports paging.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/vnd.h2oai.json+datatable.jay') dict
mime: str = 'application/vnd.h2oai.json+datatable.jay'
static serialize_index_file(classes: list[str], default_class: str = '', metrics: list | None = None, doc: str = '', total_rows: int | None = None, data_file_prefix: str = 'feature_importance', data_file_suffix: str = 'jay') tuple[dict, str]
static validate_data(json_data)
class h2o_sonar.lib.api.formats.GlobalLinePlotJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: GlobalFeatImpJSonFormat, GrammarOfMliFormat

class h2o_sonar.lib.api.formats.GlobalNlpLocoJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat, GrammarOfMliFormat

Representation of global feature importance explanation as JSon.

JSon representation index file example:

{
    "files": {
        "red_class": "feature_importance_class_0.json"
        "green_class": "feature_importance_class_1.json"
        "blue_class": "feature_importance_class_2.json"
        ...
    },
    "filters": [
        {
            "type": "text_features",
            "name": "TEXT FEATURES",
            "description": "Model text features",
            "values": ["description", "review"]
        }
    ],
    "metrics": [{"R2": 0.96}, {"RMSE": 0.03}],
    "total_rows": 592,
}

JSon representation data file example:

{
    data: [
        {
            label: str,
            value: num,
            scope: str,
        }+
    ]
    bias: num
}

Where:

  • label is feature name

  • value is feature importance

  • scope is either local or global

FILTER_TYPE_TEXT_FEATURES = 'text_feature'
KEY_DESCRIPTION = 'description'
KEY_FILTERS = 'filters'
KEY_LABEL = 'label'
KEY_NAME = 'name'
KEY_TYPE = 'type'
KEY_VALUE = 'value'
KEY_VALUES = 'values'
static from_dataframe_to_json(frame: Frame, bias_col: str = None) str
static from_json_datatable(json_dt_format: GlobalFeatImpJSonDatatableFormat, bias_col: str = None) GlobalFeatImpJSonFormat
classmethod get_global_explanation(persistence: ExplainerPersistence, explanation_type: str) str

Get global feature importance explanation.

Parameters:
persistence:

Persistence object initialized for explainer/MLI run.

explanation_type: str

Explanation type ~ explanation ID.

classmethod get_page(persistence: ExplainerPersistence, explanation_type: str, page_offset: int, page_size: int, result_format: str, explanation_filter: list[FilterEntry]) str

Get global explanation page.

classmethod is_paged(persistence: ExplainerPersistence, explanation_type: str) bool

Returns True in case that representation supports paging.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') dict

Load index file and check parameters.

Returns:
dict:

Index file as dictionary.

mime: str = 'application/json'
static serialize_data_file(feature_importances: dict[str, float], scope: str = 'global', bias: float | None = None) str

JSon data file serialization to string.

Parameters:
feature_importances: dict

Feature importances as dictionary of feature name to importance.

scope: str

global or local.

bias: optional str

Bias value.

Returns:
str:

Data file serialization.

static serialize_index_file(classes: list[str], default_class: str = '', filters: list | None = None, metrics: list | None = None, keywords: list | None = None, doc: str = '', total_rows: int | None = None, data_file_prefix: str = 'feature_importance', data_file_suffix: str = 'json') tuple[dict, str]

JSon index file serialization to string.

Parameters:
classes: list

Classes.

default_class: str

Class to be shown as default (the first one) e.g. the class of interest in case of binomial experiment interpretation.

filters: list

Optional list of per-filter items used to filter data entries.

metrics: list

Optional list of PD related metrics e.g. [{"RMSE": 0.02}, {"SD": 3.1}].

keywordslist[str]

Optional list of keywords indicating representation features, properties and aspects.

doc: str

Documentation.

total_rows: int

Total number of rows (which can be used for pagination).

data_file_prefix: str

Prefix for data file names.

data_file_suffix: str

Suffix for data file names.

Returns:
Tuple[dict, str]:

Dictionary with mapping of classes to file names AND JSon serialization (as string).

static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.GlobalScatterPlotJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: GlobalFeatImpJSonFormat, GrammarOfMliFormat

class h2o_sonar.lib.api.formats.GlobalSummaryFeatImpJsonDatatableFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat, GrammarOfMliFormat

Representation of global summary feature importance explanation as JSon.

JSon representation index file example:

{
    "files": {
        "red_class": "feature_importance_summary_class_0.jay"
        "green_class": "feature_importance_summary_class_1.jay"
        "blue_class": "feature_importance_summary_class_2.jay"
        ...
    },
    "metrics": [{"R2": 0.96}, {"RMSE": 0.03}],
    "total_rows": 25,
}

Where:

  • total_rows is number of features.

Getting data file:

> datatable.fread("feature_importance_summary_class_2.jay")

JSon representation data file example:

   |  feature   shapley_value   count   avg_high_value   clazz   order
-- + --------- --------------- ------- ---------------- ------- -------
 0 |  PAY_0      0.390716        0      0.390716         "red"   0
 1 |  PAY_0     -0.386815       25      0.38681          "red"   0
 ...
 . |  AGE        0.425908       17      0.425908         "red"   1
 ...

Where:

  • feature is feature name (y-axis)

  • shapley_value is Shapley value (x-axis)

  • count frequency of the Shapley value (height, normalized to [0, 1])

  • avg_high_value average feature value height (color) normalized to [0, 1] (if feature value is low, it’s 0, if it’s high, then it’s 1) in case of numerical features, None in case of categorical features.

  • order feature order to ensure “order by feature importance” paging

KEY_FEATURE = 'feature'
KEY_FREQUENCY = 'count'
KEY_HIGH_VALUE = 'avg_high_value'
KEY_ORDER = 'order'
KEY_SHAPLEY = 'shapley_value'
add_data_frame(format_data: Frame, file_name: str | None = None)
classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/vnd.h2oai.json+datatable.jay') dict

Load index file and check parameters.

Returns:
dict:

Index file as dictionary.

mime: str = 'application/vnd.h2oai.json+datatable.jay'
static serialize_index_file(classes: list[str], default_class: str = '', metrics: list | None = None, keywords: list | None = None, doc: str = '', total_rows: int | None = None, data_file_prefix: str = 'summary_feature_importance', data_file_suffix: str = 'jay') tuple[dict, str]

JSon index file serialization to string.

Parameters:
classes: list

Classes.

default_class: str

Class to be shown as default (the first one) e.g. the class of interest in case of binomial experiment interpretation.

metrics: list

Optional list of PD related metrics e.g. [{"RMSE": 0.02}, {"SD": 3.1}].

keywordslist[str]

Optional list of keywords indicating representation features, properties and aspects.

doc: str

Documentation.

total_rows: int

Total number of rows (which can be used for pagination).

data_file_prefix: str

Prefix for data file names.

data_file_suffix: str

Suffix for data file names.

Returns:
Tuple[dict, str]:

Dictionary with mapping of classes to file names AND JSon serialization (as string).

static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.GlobalSummaryFeatImpJsonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat, GrammarOfMliFormat

Representation of global summary feature importance explanation as JSon.

JSon representation index file example:

{
    "files": {
        "red_class": {
            "0": "feature_importance_class_0_offset_0.json",
            "10": "feature_importance_class_0_offset_10.json",
            "20": "feature_importance_class_0_offset_20.json"
        },
        "green_class": {
            ...
        },
        "blue_class":  {
            "0": "feature_importance_class_2_offset_0.json",
            "10": "feature_importance_class_2_offset_10.json",
            "20": "feature_importance_class_2_offset_20.json"
        },
        ...
    },
    "metrics": [{"R2": 0.96}, {"RMSE": 0.03}],
    "total_rows": 25,
    "rows_per_page": 10
}

Where:

  • Every class dictionary has per-page offset key with the JSon file containing chart for given page. Offset is based on the number of rows (features) per page.

  • total_rows is number of features.

  • rows_per_page is number of features in every file (created per page)

JSon representation data file example:

{
    data: [
        {
            feature: str,
            shapley_value: num,
            count: num,
            avg_high_value: num,
            order: num,
        }+
    ]
}

Where:

  • feature is feature name (y-axis)

  • shapley_value is Shapley value (x-axis)

  • count frequency of the Shapley value (height, normalized to [0, 1])

  • avg_high_value average feature value height (color) normalized to [0, 1] (if feature value is low, it’s 0, if it’s high, then it’s 1) in case of numerical features, None in case of categorical features.

  • order is feature order (global feature importance).

DATA_FILE_PREFIX = 'summary_feature_importance'
DEFAULT_PAGE_SIZE = 10
KEY_FEATURE = 'feature'
KEY_FEATURES_PER_PAGE = 'features_per_page'
KEY_FREQUENCY = 'count'
KEY_HIGH_VALUE = 'avg_high_value'
KEY_ORDER = 'order'
KEY_SHAPLEY = 'shapley_value'
static from_json_datatable(json_dt_format: GlobalSummaryFeatImpJsonDatatableFormat, page_size: int, total_rows: int = -1, persistence: Persistence | None = None, index_extensions: dict | None = None) tuple[GlobalSummaryFeatImpJsonFormat, dict]
classmethod get_page(persistence: ExplainerPersistence, explanation_type: str, page_offset: int, page_size: int, result_format: str, explanation_filter: list[FilterEntry]) str

Representation expect JSon+datatable representation to exist and use it to construct the page as expected

classmethod is_paged(persistence: ExplainerPersistence, explanation_type: str) bool

Returns True in case that representation supports paging.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') dict
mime: str = 'application/json'
static serialize_data_file(feature_importances: dict[str, float], scope: str = 'global', bias: float | None = None) str

JSon data file serialization to string.

Parameters:
feature_importances: dict

Feature importances as dictionary of feature name to importance.

scope: str

global or local.

bias: optional str

Bias value.

Returns:
str:

Data file serialization.

static serialize_index_file(classes: list[str], default_class: str = '', metrics: list | None = None, keywords: list | None = None, doc: str = '', total_rows: int | None = None, rows_per_page: int | None = None, data_file_prefix: str = 'summary_feature_importance', data_file_suffix: str = 'json') tuple[dict, str]
static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.GrammarOfMliFormat

Bases: object

Format class which is child of Grammar of MLI format class is supported in H2O Sonar UI - there is UI component which will render such format in an (interactive) chart.

classmethod is_grammar_of_mli() bool

Will representation be rendered in UI?

class h2o_sonar.lib.api.formats.HtmlFormat(explanation, format_data: str, format_file: str | None = None, extra_format_files: list | None = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

HTML representation.

Example local (single row) on-demand NLP HTML explanation:

<feature-text min="-10.0" max="5.0">
  Sentence with <word value="-0.9485">dummy word</word>.
</feature-text>
ATT_MAX = 'max'
ATT_MIN = 'min'
ATT_VALUE = 'value'
EL_FEATURE_TEXT = 'feature-text'
EL_WORD = 'word'
MINIMAL_HTML = "<!DOCTYPE html>\n<html lang='en'><head></head><body></body></html>"
classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:
bool:

True if the representation is calculated on demand.

dict:

On-demand calculation parameters.

mime: str = 'text/html'
static validate_data(dt_data: Frame)
class h2o_sonar.lib.api.formats.IceCsvFormat(explanation, frame: Frame, frame_file: str = None, persistence: Persistence | None = None)

Bases: CsvFormatCustomExplanationFormat

mime: str = 'text/csv'
static validate_data(dt_data: Frame)
class h2o_sonar.lib.api.formats.IceDatatableFormat(explanation, frame: Frame, frame_file: str = None, persistence: Persistence | None = None)

Bases: DatatableCustomExplanationFormat

Individual conditional explanation as datatable.

Canonical representation (datatable frame, ltypes) for 1D ICE:

| Required column    | Type  | Description            |
|--------------------|-------|------------------------|
| feature_name       | str   | Feature name.          |
| feature_type       | str   | Feature type.          |
| instance_id        | int   | Instance.              |
| bin_value          | str   | Bin value.             |
| prediction         | real  | Prediction.            |

Hints:

  • bin_value is converted to string (can be converted back using feature_type)

… other optional columns are allowed

COL_BIN_VALUE = 'bin_value'
COL_F_LTYPE = 'feature_type'
COL_F_NAME = 'feature_name'
COL_INSTANCE = 'instance'
COL_PREDICTION = 'prediction'
mime: str = 'application/vnd.h2oai.datatable.jay'
static validate_data(dt_data: Frame)
class h2o_sonar.lib.api.formats.IceJsonDatatableFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat, GrammarOfMliFormat

Individual conditional explanation as per-feature and class datatable frames with JSon index file.

JSon representation index file example:

{
    "features": {
        "PAY_0": {
            "order": 0,
            "feature_type": ["categorical"],
            "files": {
                "rec_class": "ice_feature_0_class_0.jay"
                "blue_class": "ice_feature_0_class_1.jay"
                "white_class": "ice_feature_0_class_2.jay"
            }
        },
        ...
    },
    "metrics": [{"RMSE": 0.03}],
    "y_file": "y_hat.jay",
    "on_demand": false
}

or (if on demand e.g. in case of sampled dataset):

{
    "on_demand": true
    "on_demand_parameters": ...
}

Datatable representation data file example:

> datatable.fread("ice_feature_0_class_0.jay")
   |       -2        -1         0         1         2         7
-- + --------  --------  --------  --------  --------  --------
 0 | 0.390716  0.390716  0.390716  0.390716  0.531548  0.531548
 1 | 0.38681   0.38681   0.38681   0.38681   0.508216  0.508216
 2 | 0.425908  0.425908  0.425908  0.425908  0.536061  0.536061
 ...

Remarks:

  • y_file … datatable frame with predictions for every X dataset instance

  • on_demandtrue if there is no cached ICE and it must be computed

FILE_Y_FILE = 'y_hat.jay'
KEY_BIN = 'bin'
KEY_BINS = 'bins'
KEY_BINS_NUMCAT_ASPECT = 'bins_numcat_aspect'
KEY_COL_NAME = 'column_name'
KEY_FEATURE_VALUE = 'feature_value'
KEY_ICE = 'ice'
KEY_PREDICTION = 'prediction'
add_data_frame(format_data: Frame, file_name: str | None = None)
classmethod get_local_explanation(persistence: ExplainerPersistence, explanation_type: str, dataset_path: str, row: int, explanation_filter: list[FilterEntry], **extra_params) str

Get ICE.

Parameters:
persistence

Persistence object initialized for explainer/MLI run.

explanation_typestr

Explanation type ~ explanation ID.

dataset_path

Dataset path.

row: int

Local explanation to be provided for given row.

explanation_filterlist[FilterEntry]
Required filter entries:

feature class

Returns:
str

JSon representation of the local explanation.

JSon ICE representation:
1{
2     prediction: float,
3     data: [
4         {
5             bin: any,
6             ice: float,
7         }
8     ]
9}
classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:
bool:

True if the representation is calculated on demand.

dict:

On-demand calculation parameters.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/vnd.h2oai.json+datatable.jay') dict

Load index file and check parameters.

Returns:
dict:

Index file as dictionary.

classmethod merge_format(from_path: str, to_path: str, overwrite: bool = True, discriminant: str = '', is_numcat_merge: bool = False, persistence: Persistence | None = None)

Merge from representation files to to representation files.

Parameters:
from_pathstr

Directory with the source representation to merge.

to_pathstr

Directory with the target representation where should be new explanations merged.

overwritebool

Overwrite explanations if they already exist in the target representation. Use False to keep existing target explanations in case of a clash.

discriminant: str

Delimiter to make data file names unique (if needed).

is_numcat_mergebool

True if this is num/cat update, False otherwise.

persistencepersistences.Persistence | None

Persistence store to save and load representations.

mime: str = 'application/vnd.h2oai.json+datatable.jay'
classmethod mli_ice_explanation_to_json(ice_df: DataFrame, filter_feature: str, prediction, feature_value, logger=None) str
static serialize_index_file(features: list[str], classes: list[str], default_class: str = '', features_meta: dict | None = None, metrics: list | None = None, doc: str = '', y_file: str | None = None) tuple[dict, str]
static serialize_on_demand_index_file(on_demand_params: dict) str
static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.LlmHeatmapLeaderboardJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: LlmLeaderboardJSonFormat, TextCustomExplanationFormat

Representation of LLM Heatmap Leaderboard explanation as JSon.

JSon representation index file example:

{
    "files": {
        "ragas": "leaderboard_0.json"
        "answer_relevance": "leaderboard_1.json"
        ...
        "ALL_METRICS": "leaderboard_n.json"
    },
    ...
}

JSon representation data file example:

{
    "data": {
        "h2oai/h2ogpt-4096-llama2-70b-chat": {
            "answer_similarity": 1
        },
        "h2oai/h2ogpt-4096-llama2-70b-chat-4bit": {
            "answer_similarity": 1
        },
        ...
        "gpt-4-32k-0613": {
            "answer_similarity": 1
        }
    },
    "eda": {
        ...
    }
}
KEY_DEFAULT_METRIC = 'default_metric'
KEY_EDA = 'eda'
classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') dict

Load index file and check parameters.

Returns:
dict:

Index file as dictionary.

mime: str = 'application/json'
static serialize_index_file(metrics: list[str], default_metric: str = '', eda: dict | None = None, doc: str = '', data_file_prefix: str = 'leaderboard', data_file_suffix: str = 'json') tuple[dict, str]

JSon index file serialization to string.

Parameters:
metrics: list

Metrics.

default_metric: str

Metric to be shown as default (the first one).

eda: dict

EDA data.

doc: str

Documentation.

data_file_prefix: str

Prefix for data file names.

data_file_suffix: str

Suffix for data file names.

Returns:
Tuple[dict, str]:

Dictionary with mapping of classes to file names AND JSon serialization (as string).

static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.LlmLeaderboardJSonFormat

Bases: ABC

KEY_ALL_METRICS = 'ALL_METRICS'
mime = 'application/json'
class h2o_sonar.lib.api.formats.LocalDtJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Local representation of decision tree as JSon.

JSon representation index file example:

{
    "files": {
        "red_class": "dt_class_0.json"
        "green_class": "dt_class_1.json"
        "blue_class": "dt_class_2.json"
        ...
    },
    "metrics": [
      {"Training RMSE": 0.96},
      {"CV RMSE": 0.97},
      {"NFolds": 3},
      {"R2": 0.96}
    ]
}

JSon representation data file example:

 1{
 2     data: [
 3         {
 4           key: str,
 5           name: str,
 6           parent: str,
 7           edge_in: str,
 8           edge_weight: num,
 9           leaf_path: bool,
10           total_weight: num,
11           weight: num
12         }+
13     ]
14}

or (if on demand e.g. in case of sampled dataset):

{
    "on_demand": true
    "on_demand_parameters": ...
}

Remarks:

  • leaf_pathtrue if local path (hint in the leaf defines path to

    the root), else global explanation. In other words return the whole tree with leaf selected.

  • on_demandtrue if there is no cached ICE and it must be computed.

static dt_path_to_node_key(path: str) str
static dt_set_tree_path(key: str, tree: dict)
classmethod get_local_explanation(persistence: ExplainerPersistence, explanation_type: str, row: int, explanation_filter: list[FilterEntry], **extra_params) str

Get local DT explanation.

Parameters:
persistence:

Persistence object initialized for explainer/MLI run.

explanation_type: str

Explanation type ~ explanation ID.

row: int

Local explanation to be provided for given row.

explanation_filter: list[FilterEntry]
Required filter entries:

feature class

Returns:
str:

JSon representation of the local explanation.

JSon DT representation:
 1{
 2     data: [
 3         {
 4           key: str,
 5           name: str,
 6           parent: str,
 7           edge_in: str,
 8           edge_weight: num,
 9           leaf_path: bool,
10           total_weight: num,
11           weight: num,
12         }+
13     ]
14}
classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:
bool:

True if the representation is calculated on demand.

dict:

On-demand calculation parameters.

mime: str = 'application/json'
static serialize_index_file(classes: list[str], default_class: str = '', metrics: list | None = None, doc: str = '', data_file_prefix: str = 'dt', data_file_suffix: str = 'json') tuple[dict, str]
static serialize_on_demand_index_file(on_demand_params: dict) str
static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.LocalFeatImpDatatableFormat(explanation, frame: Frame, frame_file: str | None = None, persistence: Persistence | None = None)

Bases: DatatableCustomExplanationFormat, GrammarOfMliFormat

Local feature importance datatable representation.

  • feature importance for all classes

Canonical representation (datatable frame):

| Columns            | Rows                                |
|--------------------|-------------------------------------|
| feature names      | per-dataset row feature importance  |

Example:

  |     activity   ...   max_speed
--- + ------------ ...  -----------
0 |    -0.0143614        -0.142553
. |     ...               ...
9 |     0.0156479        -0.231883
FILE_EXT = 'jay'
classmethod get_local_explanation(persistence: ExplainerPersistence, explanation_type: str, row: int, explanation_filter: list[FilterEntry], **extra_params) str

Get local feature importance explanation.

Parameters:
persistence:

Persistence object initialized for explainer/MLI run.

explanation_type: str

Explanation type ~ explanation ID.

row: int

Local explanation to be provided for given row.

explanation_filter: list[FilterEntry]

Filter (unused in case of feature importance).

Returns:
str:

JSon representation of the local explanation.

JSon representation:
1{
2     data: [
3         {
4             label: str,
5             value: num,
6             scope: str,
7         }+
8     ]
9}
Where:
  • label is feature name
  • value is feature importance
  • scope is local
mime: str = 'application/vnd.h2oai.datatable.jay'
static validate_data(frame_data: Frame) Frame
class h2o_sonar.lib.api.formats.LocalFeatImpJSonDatatableFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: GlobalFeatImpJSonDatatableFormat

add_data_frame(format_data: Frame, file_name: str | None = None)
get_data(file_name: str | None = None)
classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:
bool:

True if the representation is calculated on demand.

dict:

On-demand calculation parameters.

mime: str = 'application/vnd.h2oai.json+datatable.jay'
static serialize_index_file(classes: list[str], default_class: str = '', metrics: list | None = None, doc: str = '', total_rows: int | None = None, data_file_prefix: str = 'feature_importance', data_file_suffix: str = 'jay') tuple[dict, str]
class h2o_sonar.lib.api.formats.LocalFeatImpJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat, GrammarOfMliFormat

Representation of local feature importance explanation as JSon. See GlobalFeatImpJSonFormat for structure of the index file and data.

KEY_Y = 'prediction'
classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:
bool:

True if the representation is calculated on demand.

dict:

On-demand calculation parameters.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') dict
static merge_local_and_global_page(global_page: dict, local_page: dict, mli_key: str = '', explainer_job_key: str = '', bias_key: str = '', logger=None)

Use this method to merge local and global explanations page (especially if frontend is not able to process local explanations only.

Local explanations page is expected to be sorted (as required) and it defines order of entries in the merged page. Merged result contains global explanation entry followed by local exp entry.

mime: str = 'application/json'
static serialize_index_file(classes: list[str], default_class: str = '', metrics: list | None = None, doc: str = '') tuple[dict, str]
static sort_data(json_dict: dict)

Sort local feature importance explanation data by (abs) value:

{'data': [{'label': .,'value': .,'scope': .}, ...
static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.LocalFeatImpWithYhatsJSonDatatableFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: LocalFeatImpJSonDatatableFormat

FILE_Y_HAT = 'y_hat.bin'
KEY_Y_HAT = 'y_hat'
class h2o_sonar.lib.api.formats.LocalNlpLocoJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Representation of local LOCO explanation as JSon. See GlobalNlpLocoJSonFormat for structure of the index file and data.

classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:
bool:

True if the representation is calculated on demand.

dict:

On-demand calculation parameters.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') dict
static merge_local_and_global_page(global_page: dict, local_page: dict, mli_key: str = '', explainer_job_key: str = '', bias_key: str = '', logger=None)

Use this method to merge local and global explanations page (especially if frontend is not able to process local explanations only.

Local explanations page is expected to be sorted (as required) and it defines order of entries in the merged page. Merged result contains global explanation entry followed by local exp entry.

mime: str = 'application/json'
static serialize_index_file(classes: list[str], default_class: str = '', metrics: list | None = None, doc: str = '') tuple[dict, str]
static sort_data(json_dict: dict)

Sort local feature importance explanation data by (abs) value:

{'data': [{'label': .,'value': .,'scope': .}, ...
static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.LocalOnDemandHtmlFormat(explanation, format_data: str, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Local (single row) on-demand representation.

classmethod get_local_explanation(persistence: ExplainerPersistence, explanation_type: str, row: int, explanation_filter: list[FilterEntry], **extra_params) str

Load index file and check parameters.

Returns:
str:

Local explanation as string - can be any (on)structured format.

mime: str = 'text/html'
set_on_demand(is_on_demand: bool, mime: str = '')

Indicate that representation is on-demand.

static validate_data(dt_data: Frame)
class h2o_sonar.lib.api.formats.LocalOnDemandTextFormat(explanation, format_data: str, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Local (single row) on-demand representation.

classmethod get_local_explanation(persistence: ExplainerPersistence, explanation_type: str, row: int, explanation_filter: list[FilterEntry], **extra_params) str

Load index file and check parameters.

Returns:
str:

Local explanation as string - can be any (on)structured format.

mime: str = 'text/plain'
static validate_data(dt_data: Frame)
class h2o_sonar.lib.api.formats.LocalSummaryFeatImplJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Local (on-demand) representation of summary feature importance as JSon.

classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:
bool:

True if the representation is calculated on demand.

dict:

On-demand calculation parameters.

classmethod is_paged(persistence: ExplainerPersistence, explanation_type: str) bool

Returns True in case that representation supports paging.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/vnd.h2oai.json+datatable.jay') dict
mime: str = 'application/json'
static serialize_index_file(classes: list[str], default_class: str = '', metrics: list | None = None, doc: str = '', data_file_prefix: str = 'dt', data_file_suffix: str = 'json') tuple[dict, str]
static serialize_on_demand_index_file(on_demand_params: dict) str
static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.MarkdownFormat(explanation, format_file: str, extra_format_files: list | None = None, persistence: Persistence | None = None)

Bases: ExplanationFormat, GrammarOfMliFormat

Markdown representation (text and images).

mime: str = 'text/markdown'
static validate_data(dt_data: Frame)
class h2o_sonar.lib.api.formats.ModelValidationResultArchiveFormat(explanation, mv_test_type: str | Any, mv_test_name: str, mv_test_id: str, mv_test_results, mv_test_settings, mv_test_artifacts: dict, mv_test_log, mv_client=None, persistence: Persistence | None = None, logger=None)

Bases: ExplanationFormat

Model Validation test result archived in a ZIP.

mime: str = 'application/zip'
class h2o_sonar.lib.api.formats.PartialDependenceCsvFormat(explanation, frame: Frame, frame_file: str = None, persistence: Persistence | None = None)

Bases: CsvFormatCustomExplanationFormat

static validate_data(dt_data: Frame)
class h2o_sonar.lib.api.formats.PartialDependenceDatatableFormat(explanation, frame: Frame, frame_file: str = None, persistence: Persistence | None = None)

Bases: DatatableCustomExplanationFormat

Representation of partial dependence (PD) explanation as datatable.

Canonical representation (datatable frame, ltypes) for 1D PD:

| Required column    | Type  | Description            |
|--------------------|-------|------------------------|
| feature_name       | str   | Feature name.          |
| feature_type       | str   | Feature type.          |
| bin_value          | str   | Bin value              |
| mean               | real  | Mean.                  |
| sd                 | real  | Standard deviation.    |
| sem                | real  | Standard mean error.   |
| is_oor             | bool  | Is out of range value? |

Hints:

  • bin_value is converted to string (can be converted back using feature_type).

… other optional columns are allowed

COL_BIN_VALUE = 'bin_value'
COL_F_LTYPE = 'feature_type'
COL_F_NAME = 'feature_name'
COL_IS_OOR = 'is_oor'
COL_MEAN = 'mean'
COL_SD = 'sd'
COL_SEM = 'sem'
mime: str = 'application/vnd.h2oai.datatable.jay'
static validate_data(dt_data: Frame)
class h2o_sonar.lib.api.formats.PartialDependenceJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat, GrammarOfMliFormat

Representation of partial dependence (PD) explanation as JSon.

JSon representation index file example:

{
    "features": {
        "PAY_0": {
            "order": 0,
            "feature_type": ["categorical"],
            "files": {
                "red_class": "pd_feature_0_class_0.json"
                "green_class": "pd_feature_0_class_1.json"
                "blue_class": "pd_feature_0_class_2.json"
            }
        },
        ...
    },
    "metrics": [{"R2": 0.96}, {"RMSE": 0.03}]
}

JSon representation data file example:

{
    "data": [{
        "bin": -2,
        "pd": 0.3710160553455353,
        "sd": 0.029299162328243256,,
        "out_of_range": false
    }, {
        "bin": -1,
        "pd": 0.3710160553455353,
        "sd": 0.029299162328243256,,
        "out_of_range": false
    },
    ...
}
KEY_BIN = 'bin'
KEY_FREQUENCY = 'frequency'
KEY_OOR = 'oor'
KEY_PD = 'pd'
KEY_SD = 'sd'
KEY_X = 'x'
classmethod get_bins(persistence: ExplainerPersistence, explanation_type: str, feature: str) list

Get bins for given feature.

Parameters:
persistence:

Persistence object initialized for explainer/MLI run.

explanation_type: str

Explanation type ~ explanation ID.

feature: str

Feature for which to get bins.

Returns:
list:

Bins.

classmethod get_numcat_aspects(feature, idx: dict) list[str]

Get available num/cat aspects for given feature:

  • [] … invalid feature

  • ["numeric"] … numeric PD only

  • ["categorical"] … categorical PD only

  • ["numeric", "categorical"] … numeric and categorical PD

Parameters:
feature: str

Feature name for which to determine available aspects.

idx: dict

PD JSon index file (explanation.json).

Returns:
list[str]:

Available num/cat aspects.

classmethod get_numcat_missing_aspect(feature: str, idx: dict)

Return (missing) aspect to be calculated.

Parameters:
feature: str

Feature name for which to determine available aspects.

idx: dict

PD JSon index file (explanation.json).

Returns:
str:

Aspect to calculate or "" (no aspect is missing).

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') dict
classmethod merge_format(from_path: str, to_path: str, overwrite: bool = True, discriminant: str = '', is_numcat_merge: bool = False, persistence: Persistence | None = None)

Merge from representation files to to representation files.

Parameters:
from_path: str

Directory with the source representation to merge.

to_path: str

Directory with the target representation where should be new explanations merged.

overwrite: bool

Overwrite explanations if they already exist in the target representation. Use False to keep existing target explanations in case of a clash.

discriminant: str

Delimiter to make data file names unique (if needed).

is_numcat_merge: bool

True if this is num/cat update, False otherwise.

persistencepersistences.Persistence | None

Persistence store to save and load explanation representations.

mime: str = 'application/json'
static serialize_index_file(features: list[str], classes: list[str], default_class: str = '', features_meta: dict | None = None, metrics: list | None = None, keywords: list | None = None, doc: str = '', data_file_prefix: str = 'pd', data_file_suffix: str = 'json', y_file: str | None = None) tuple[dict, str]

JSon index file serialization to string.

Parameters:
featureslist

Features.

classeslist

Classes.

default_classstr

Class to be shown as default (the first one) e.g. the class of interest in case of binomial experiment interpretation.

features_metadict

Features metadata allowing to indicate that given feature is categorical (use categorical key and list of feature names), (use date key and list of feature names, to specify format use date-format and list of Python date formats) or numerical (default).

metricslist

Optional list of PD related metrics e.g. [{"RMSE": 0.02}, {"SD": 3.1}]

keywordslist[str]

Optional list of keywords indicating representation features, properties and aspects.

docstr

Chart documentation.

data_file_prefixstr

Prefix for data file names.

data_file_suffixstr

Suffix for data file names.

y_filestr

Predictions file.

Returns:
Tuple[dict, str]

Dictionary with mapping of features and classes to file names AND JSon serialization (as string).

classmethod set_merge_status(dir_path: str, mli_key: str, explainer_job_key: str, clear: bool = False, action: str = 'update_explanation', action_type: str = 'add_aspect', persistence: Persistence | None = None)

Add (clear=False) or remove running interpretation update.

Parameters:
dir_path: str

Directory with index file where the status should be set.

mli_key: str

MLI key of the interpretation which will update another representation.

explainer_job_key: str

Explainer job key of the interpretation which will update another representation.

clear: bool

Add (clear=False) or remove (clear=True) indicator in representation’s dict.

action: str

Running action identifier e.g. update explanation.

action_type: str

Action (sub)type identifier e.g. add feature, add numeric/categorical view.

persistencepersistences.Persistence | None

Persistence store to save and load explanation representations.

static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.SaTextFormat(explanation, format_data: str, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Sensitivity Analysis (SA) text representation.

mime: str = 'text/plain'
static validate_data(dt_data: Frame)
class h2o_sonar.lib.api.formats.TextCustomExplanationFormat(explanation, format_data: str, format_file: str | None, extra_format_files: list | None = None, persistence: Persistence | None = None)

Bases: ExplanationFormat

FILE_IS_ON_DEMAND = 'IS_ON_DEMAND'
FILTER_CLASS = 'explain_class'
FILTER_FEATURE = 'explain_feature'
FILTER_NUMCAT = 'explain_numcat'
add_data(format_data: str, file_name: str | None = None)

Add TEXT data as new explanation representation file. Child classes with binary data to override this class.

Parameters:
format_data:

Data to store as new explanation’s format file.

file_name: str

Representation file name or file relative path.

add_file(format_file: str, file_name: str | None = None)

Copy file to representation as new explanation representation file.

Parameters:
format_file:

Source file to store (copy) as new explanation’s format file.

file_name: str

Representation file name or file relative path.

get_data(file_name: str | None = None, data_type: PersistenceDataType | None = None)
classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:
bool:

True if the representation is calculated on demand.

dict:

On-demand calculation parameters.

mime: str = 'text/plain'
static set_index_commons(index_dict: dict, classes: list[str], default_class: str = '', metrics: list | None = None, keywords: int | None = None, doc: str = '', total_rows: int | None = None)
set_on_demand(is_on_demand: bool, mime: str = '')

Indicate that representation is on-demand.

update_index_file(index_dict: dict, metrics: list | None = None, total_rows: int | None = None)
class h2o_sonar.lib.api.formats.TextFormat(explanation, format_data: str, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Text representation.

mime: str = 'text/plain'
static validate_data(dt_data: Frame)
class h2o_sonar.lib.api.formats.WorkDirArchiveZipFormat(explanation, file_filter=<function WorkDirArchiveZipFormat.<lambda>>, persistence: Persistence | None = None)

Bases: ExplanationFormat, GrammarOfMliFormat

Working directory ZIP archive representation. Just instantiate this class, and it will create the ZIP representation (no need to add files/data). Note that the archive is created exactly on the time of instantiation.

mime: str = 'application/zip'
h2o_sonar.lib.api.formats.get_custom_explanation_formats()

h2o_sonar.lib.api.interpretations module

class h2o_sonar.lib.api.interpretations.ExplainerJob(key: str = '', created: float = 0.0, duration: float = 0.0, progress: float = 0.0, status: ExplainerJobStatus = ExplainerJobStatus.UNKNOWN, message: str = '', error: str = '', explainer_persistence=None, explainer_descriptor: ExplainerDescriptor | None = None, result_descriptor=None, child_explainer_job_keys: list[str] | None = None, job_location: str = '')

Bases: object

Explainer job.

KEY_CHILD_KEYS = 'child_explainer_job_keys'
KEY_CREATED = 'created'
KEY_DURATION = 'duration'
KEY_ERROR = 'error'
KEY_EXPLAINER_DESCRIPTOR = 'explainer'
KEY_JOB_LOCATION = 'job_location'
KEY_KEY = 'key'
KEY_MESSAGE = 'message'
KEY_PROGRESS = 'progress'
KEY_RESULT_DESCRIPTOR = 'result_descriptor'
KEY_STATUS = 'status'
evaluator_id() str
explainer_id() str
static from_dict(explainer_job_dict: dict)
is_finished() bool
success()
tick(msg: str = '', progress_increment: float = 0.1)
to_dict() dict
class h2o_sonar.lib.api.interpretations.HtmlInterpretationFormat(interpretation: Interpretation, branding: Branding = Branding.H2O_SONAR, logger: SonarLogger | None = None)

Bases: object

HTML representation of the interpretation.

class Context

Bases: object

Context with data which are needed to create HTML.

clear()
explainers_by_purpose: dict[str, list[ExplainerJob]]
get_purpose_representatives_job_keys() list[str]
purpose_image: dict[str, str]
purpose_representative: dict[str, ExplainerJob | None]
purposes_w_repre: list[str]
KEYWORD_ID_2_NAME = {'explains-approximate-behavior': 'Approximate model behavior', 'explains-fairness': 'Fairness', 'explains-feature-behavior': 'Feature behavior', 'explains-model': 'Model explanations', 'explains-model-debugging': 'Model debugging', 'explains-original-feature-importance': 'Original feature importance', 'explains-transformed-feature-importance': 'Transformed feature importance'}

Inject footer into Airium HTML.

Parameters:
htmlairium.Airium

Airium HTML instance.

brand_h2o_sonarstr

H2O Sonar branding.

brandingcommons.Branding

Branding.

static html_h2o_sonar_pitch(brand_h2o_sonar: str) str
static html_safe_str_field(field)
to_html(include_left_navigation: bool = True, report_style: str = 'HTML') str

Get HTML report for the interpretation.

class h2o_sonar.lib.api.interpretations.HtmlInterpretationsFormat(interpretations: Interpretations, branding: Branding = Branding.H2O_SONAR, logger: SonarLogger | None = None)

Bases: object

HTML representation of an interpretations list.

to_html(branding: Branding = Branding.EVAL_STUDIO) str

Get HTML for the interpretations list.

class h2o_sonar.lib.api.interpretations.Interpretation(common_params: CommonInterpretationParams, created: float, explainers: list[str | ExplainerToRun] | None, explainer_keywords: list[str] | None = None, key: str = '', sampler: DatasetSampler | None = None, branding: Branding = Branding.H2O_SONAR, results_formats: list[str] | None = None, progress_callback: AbstractProgressCallbackContext | None = None, logger=None, extra_params: list | None = None)

Bases: object

Interpretation is request to interpret a model using explainers. Interpretation instance serves also as execution context, however, interpretation instance does not execute explainers itself - it’s purpose is to be prescription (of what is requested) and stateful data holder. Interpretation result (referenced by the instance) is a set of explanations which were created by explainers.

Attributes:
keystr

Interpretation key.

common_paramscommons.CommonInterpretationParams

Interpretation parameters specified by the user.

explainerslist

Explainers to be run (if no explainers specified, then all compatible explainers are run).

persistencepersistences.Persistence

Persistence store - file-system, in-memory, DB - where were stored interpretation results and from where it might be loaded using the persistence instance.

KEY_ALL_EXPLAINERS = 'all_explainer_ids'
KEY_CREATED = 'created'
KEY_DATASET = 'dataset'
KEY_ERROR = 'error'
KEY_EXECUTED_EXPLAINERS = 'executed_explainers'
KEY_EXPLAINERS = 'explainers'
KEY_E_PARAMS = 'explainers_parameters'
KEY_INCOMPATIBLE_EXPLAINERS = 'incompatible_explainer_ids'
KEY_INCOMPATIBLE_EXPLAINERS_DS = 'incompatible_explainers'
KEY_INSIGHTS = 'insights'
KEY_INTERPRETATION_LOCATION = 'interpretation_location'
KEY_I_KEY = 'interpretation_key'
KEY_I_PARAMS = 'interpretation_parameters'
KEY_MODEL = 'model'
KEY_MODELS = 'models'
KEY_OVERALL_RESULT = 'overall_result'
KEY_PROBLEMS = 'problems'
KEY_PROGRESS = 'progress'
KEY_PROGRESS_MESSAGE = 'progress_message'
KEY_RESULT = 'result'
KEY_RESULTS_LOCATION = 'results_location'
KEY_SCHEDULED_EXPLAINERS = 'scheduled_explainers'
KEY_STATUS = 'status'
KEY_TARGET_COL = 'target_col'
KEY_TESTSET = 'testset'
KEY_VALIDSET = 'validset'
static dict_to_digest(i_json: dict)
error: str
explainer_results: dict[str, ExplainerResult]
get_all_explainer_ids() list[str]
get_explainer_ids_by_status(status: int) list[str]
get_explainer_insights(explainer_id: str) list
get_explainer_jobs_by_status(status: int) list[ExplainerJob]
get_explainer_problems(explainer_id: str) list
get_explainer_result(explainer_id: str) ExplainerResult | None
get_explainer_result_metadata(explainer_id: str) dict | None
get_explanation_file_path(explanation_type: str, explanation_format: str, explainer_id: str = '', evaluator_id: str = '')

Get explanation (index) file path.

Parameters:
explainer_idstr

Explainer ID - either explainer or evaluator ID must be specified.

evaluator_idstr

Evaluator ID - either explainer or evaluator ID must be specified.

explanation_typestr

Explanation type as string.

explanation_formatstr

Explanation (MIME) format.

Returns:
str

Path to the explanation file.

get_failed_explainer_ids() list[str]
get_finished_explainer_ids() list[str]
get_incompatible_explainer_ids() list[str]
get_insights() list
get_jobs_for_evaluator_id(explainer_id: str) list[ExplainerJob]
get_jobs_for_explainer_id(explainer_id: str) list[ExplainerJob]
get_model_insights(model_name: str) list
get_model_problems(model_name: str) list
get_problems_by_severity(severity: ProblemSeverity) list
get_scheduled_explainer_ids() list[str]
get_successful_explainer_ids() list[str]
html_format: HtmlInterpretationFormat
is_evaluator_failed() bool
is_evaluator_finished() bool
is_evaluator_scheduled() bool
is_evaluator_successful() bool
is_explainer_failed() bool

Indicate whether an explainer failed.

is_explainer_finished() bool

Indicate whether an explainer successfully finished or failed.

is_explainer_scheduled() bool

Indicate whether there was at least one explainer which was ran.

is_explainer_successful() bool

Indicate whether an explainer successfully finished.

is_finished() bool

Check if interpretation is finished.

Returns True if status indicates completion (success, failed, finished) or progress has reached 100%.

Returns:
bool

True if interpretation is finished, False otherwise.

load(persistence, logger=None) Interpretation

Load persistence interpretation using given persistence.

Parameters:
persistencepersistences.InterpretationPersistence

Interpretation persistence which can be used to load the interpretation from file-system, memory or DB.

logger

Logger.

Returns:
Interpretation

Interpretation instance.

static load_from_json(interpretation_json_path: str | Path) Interpretation

Load interpretation from JSON.

pdf_format: PdfInterpretationFormat
persistence: InterpretationPersistence | None
progress: float
progress_message: str
register_explainer_result(explainer_id: str, result: ExplainerResult)
set_progress(progress: float, message: str | None = None) float
status: ExplainerJobStatus
to_dict() dict
to_html() str
to_html_4_pdf() str
to_json(indent=None) str
to_pdf(input_path: str, output_path: str)
update_overall_result() OverallResult
validate_and_normalize_params()

Validate and check interpretation parameters.

class h2o_sonar.lib.api.interpretations.InterpretationResult(results_location: str = '', interpretation_location: str = '')

Bases: object

Result of the interpretation run.

all_explainer_ids: list
explainer_ids: list
explainers: dict[str, ExplainerJob]
get_evaluator_job(evaluator_job_id: str) ExplainerJob | None
get_evaluator_jobs() list[ExplainerJob]
get_explainer_job(explainer_job_id: str) ExplainerJob | None
get_explainer_jobs() list[ExplainerJob]
get_html_report_location(absolute_path: bool = True) str
get_interpretation_dir_location(absolute_path: bool = True) str
get_interpretations_html_index_location(absolute_path: bool = True)
get_json_report_location(absolute_path: bool = True) str
get_pdf_report_location(absolute_path: bool = True) str
get_progress_location(absolute_path: bool = True) str
get_results_dir_location(absolute_path: bool = True) str
incompatible_explainer_ids: list
incompatible_explainers: dict
insights: list[InsightAndAction]
make_zip_archive(zip_filename)
models: list
overall_result: OverallResult
problems: list[ProblemAndAction]
remove_duplicate_insights()
to_dict() dict
to_json(indent=None) str
upload_url: str
class h2o_sonar.lib.api.interpretations.Interpretations(interpretations_paths: list[str], persistence, branding: Branding = Branding.H2O_SONAR, logger=None)

Bases: object

Interpretations created by H2O Sonar in results location.

count() int
load_interpretation_meta(i_path: str, digest: bool = True) dict
to_html(branding: Branding = Branding.EVAL_STUDIO) str
class h2o_sonar.lib.api.interpretations.OverallResult(value)

Bases: Enum

Overall evaluation/interpretation result in the traffic light style.

high_severity_problems = 4
low_severity_problems = 2
medium_severity_problems = 3
no_problem = 1
class h2o_sonar.lib.api.interpretations.PdfInterpretationFormat(interpretation: Interpretation, logger: SonarLogger, branding: Branding = Branding.H2O_SONAR)

Bases: HtmlInterpretationFormat

PDF (via HTML) representation of the interpretation.

to_html_4_pdf() str

To HTML which can be used to generate PDF.

static to_pdf(input_path: str, output_path: str)

h2o_sonar.lib.api.judges module

class h2o_sonar.lib.api.judges.EvaluationJudge

Bases: ABC

Bring your own judge (BYOJ) to evaluate the quality of a model’s output.

abstractmethod evaluate(prompts: list[str], **kwargs) list

Evaluate the quality of a model’s output.

health_check() bool

Check if the judge is healthy and available.

class h2o_sonar.lib.api.judges.LlmEvaluationJudge(llm_host_connection: ConnectionConfig, llm_model_name: str, logger: SonarLogger | None = None)

Bases: EvaluationJudge

LLM judge / interrogator for evaluating the quality of a model output.

evaluate(prompts: list[str], **extra_params) list

Evaluate the quality of a model’s output.

class h2o_sonar.lib.api.judges.RagClientEvaluationJudge(client: RagClient, llm_model_name: str, collection_id: str = '')

Bases: EvaluationJudge

RAG judge / interrogator for evaluating the quality of a model output.

evaluate(prompts: list[str], **extra_params) list

Evaluate the quality of a model’s output.

h2o_sonar.lib.api.judges.get_default_evaluation_judge(logger: SonarLogger | None = None)

Get the default evaluation judge - OpenAI GPT-4 LLM model. If the OpenAI API key is not set, then raise exception.

h2o_sonar.lib.api.judges.get_evaluation_judge_for_config(judge_config: EvaluationJudgeConfig, logger: SonarLogger | None = None)

Get an evaluation judge for the given judge config.

h2o_sonar.lib.api.judges.get_evaluation_judge_for_connection(connection: ConnectionConfig, judge_type: str, llm_model_name: str, collection_id: str = '', logger: SonarLogger | None = None)

Get an evaluation judge for the given connection and judge type.

h2o_sonar.lib.api.models module

class h2o_sonar.lib.api.models.DriverlessAiModel(model_src, target_col: str = '', used_features: list[str] | None = None, sanitization_map: SanitizationMap | None = None, dataset: ExplainableDataset | None = None, logger=None)

Bases: ExplainableModel

Explainable model which understands Driverless AI experiments and models thus it can get model metadata, ensure required sanitization and correctly construct predict method which accepts expected input and provides desired output.

ATTR_HAS_SHAPLEYS = 'has_treeshap'
ATTR_LABEL_NAMES = 'output_names'
COL_SHAPLEY_BIAS = 'contrib_bias'
EXT_MOJO = '.mojo'
PREFIX_CLASS = 'class.'
PREFIX_SHAPLEY_COLS = 'contrib_'
static is_dai_model(model_src) bool
shapley_values(X, original_features: bool = True, fast_approx: bool = False, **kwargs)

Get Shapley values.

Parameters:
Xdatatable.Frame

Dataset to calculate Shapley values.

original_featuresbool

True to get Shapley values for original features, False to get Shapley values for transformed features.

fast_approxbool

True to use fast approximation for Shapley values calculation.

Returns:
datatable.Frame

Shapley values based feature contributions.

class h2o_sonar.lib.api.models.DriverlessAiRestServerModel(model_server_url: str, target_col: str = '', used_features: list[str] | None = None, sanitization_map: SanitizationMap | None = None, dataset: ExplainableDataset | None = None)

Bases: ExplainableModel

Explainable model which represents Driverless AI experiments deployed as REST server. Driverless AI is moving from local REST Server to MLOps, therefore it is deprecated in 1.10.4 and will be removed. Anyway it is useful for existing Driverless AI deployments.

See also:

static is_dai_rest_server_model(model_src) bool
class h2o_sonar.lib.api.models.ExplainableLlmModel(connection: str | ConnectionConfig, model_type: ExplainableModelType = ExplainableModelType.unknown, name: str = '', llm_model_name: str = '', llm_model_meta: dict | None = None, model_cfg: dict | None = None, key: str = '', logger: SonarLogger | None = None)

Bases: object

KEY_CONNECTION = 'connection'
KEY_H2OGPTE_STATS = 'h2ogpte_perf_stats'
KEY_H2OGPTE_VISION_M = 'vision_model_name'
KEY_KEY = 'key'
KEY_LLM_MODEL_META = 'llm_model_meta'
KEY_LLM_MODEL_NAME = 'llm_model_name'
KEY_MODEL_CFG = 'model_cfg'
KEY_MODEL_TYPE = 'model_type'
KEY_NAME = 'name'
KEY_STATS_DURATION = 'duration_stats'
KEY_STATS_FAILURE = 'failure_count'
KEY_STATS_RETRY = 'retry_count'
KEY_STATS_SUCCESS = 'success_count'
KEY_STATS_TIMEOUT = 'timeout_count'
clone()
static from_dict(as_dict: dict, connection=None) ExplainableLlmModel
to_dict()
to_json(indent=None)
class h2o_sonar.lib.api.models.ExplainableModel(model_src, predict_method: Callable, fit_method=None, model_type: ExplainableModelType = ExplainableModelType.unknown, model_meta: ExplainableModelMeta | None = None, transformed_model: TransformedFeaturesModel | None = None, label_encoder: MultiColumnLabelEncoderAbc | None = None, logger: SonarLogger | None = None)

Bases: object

Explainable model - this class provides uniform API for ML models regardless model source, provider or implementation.

fit(X: ExplainableDataset | Frame, y=None, **kwargs)
property has_transformed_model: bool

Does explainable model provides associated model which works on the transformed features?

static load(path: str)

Load model from pickle.

Parameters:
pathstr

Model pickle path.

Returns:
ExplainableModel

Instance of the pickled model.

property meta: ExplainableModelMeta
predict(X: ExplainableDataset | Frame, **kwargs)

Score and return predictions in any format returned by the predict method.

predict_datatable(X, **kwargs) Frame

Score and return predictions as datatable frame.

predict_pandas(X, **kwargs) DataFrame

Score and return predictions as Pandas frame.

save(path: str, update: bool = False)

Pickle the model.

Parameters:
pathstr

Model pickle path.

updatebool

Delete pickled model if it already exists on given path prior saving the new model.

shapley_values(X, original_features: bool = True, **kwargs)

Get Shapley values.

Parameters:
Xdatatable.Frame

Dataset to calculate Shapley values.

original_featuresbool

True to get Shapley values for original features, False to get Shapley values for transformed features.

Returns:
datatable.Frame

Shapley values based feature contributions.

to_dict()
to_json(indent=None)
property transformed_model: TransformedFeaturesModel | None

Get associated model which works on the transformed features.

class h2o_sonar.lib.api.models.ExplainableModelHandle(connection_key: str, model_key: str, model_version: str = '')

Bases: ResourceHandle

Handle to a REMOTE model hosted by a remote system described by its connection configuration.

ExplainableModelHandle differs from the ExplainerModel in that it doesn’t provide the actual predict function, but only the metadata required to access the model.

static from_string(str_handle: str, h2o_sonar_config=None) ExplainableModelHandle

Create a new instance of the model handle from the string.

class h2o_sonar.lib.api.models.ExplainableModelMeta(description: str = '', is_constant: bool = False, is_remote: bool = False, has_shapley_values: bool = False, target_col: str = '', used_features: list | None = None, feature_importances: dict | None = None, feature_meta: dict | None = None, transformed_features: list | None = None, model_path: str = '', model_file_size: int = 0, sanitization_map: SanitizationMap | None = None, dataset: ExplainableDataset | None = None)

Bases: object

Explainable ML model metadata - this class provides uniform API to get ML model metadata regardless model source, provider and implementation.

Model labels (labels class field) convention:

  • Regression model: labels field to be empty list [].

  • Binomial model: labels field to be list with two strings or integers which represent the model labels; the positive class of interest to be the second list item.

  • Multinomial model: labels field to be list of strings or integers with the model classes.

default_feature_importances() dict

Construct default (fallback) feature importances - list of features used by the model with importances 0.0 - to be used if no importances were provided by the user.

property feature_importances: dict

Return per-feature importance set by the user.

property features_metadata: FeaturesMetadata
get_model_type() ExperimentType

Get experiment type (regression, binomial and multinomial) for model.

Returns:
DaiExperimentType:

DAI experiment type.

property has_shapley_values

Does model provides Shapley values?

property has_text_transformers: bool

Does model has text transformers?

property is_constant

Is model constant?

property num_labels: int
property positive_label_of_interest

In case of binomial classification it returns label of the positive class of interest.

to_dict()
to_json(indent=None)
property transformed_features: list
property used_features: list
class h2o_sonar.lib.api.models.ExplainableModelType(value)

Bases: Enum

Explainable model type (extensible via inheritance).

amazon_bedrock_rag = 15
anthropic_llm = 14
azure_openai_llm = 13
driverless_ai = 2
driverless_ai_rest = 3
static from_connection_type(connection_type: ConnectionConfigType) ExplainableModelType
h2o3 = 4
h2ogpt = 8
h2ogpte = 6
h2ogpte_llm = 7
h2ollmops = 9
static is_llm(explainable_model_type: ExplainableModelType) bool
static is_rag(explainable_model_type: ExplainableModelType) bool
mock = 1
ollama = 10
openai_llm = 12
openai_rag = 11
scikit_learn = 5
static to_connection_type(explainable_model_type: ExplainableModelType) ConnectionConfigType | None
unknown = 16
class h2o_sonar.lib.api.models.ExplainableRagModel(connection: str | ConnectionConfig, model_type: ExplainableModelType = ExplainableModelType.unknown, name: str = '', collection_id: str = '', collection_name: str = '', llm_model_name: str = '', llm_model_meta: dict | None = None, documents: list[str] | None = None, model_cfg: dict | None = None, key: str = '', logger: SonarLogger | None = None)

Bases: object

KEY_COLLECTION_ID = 'collection_id'
KEY_COLLECTION_NAME = 'collection_name'
KEY_CONNECTION = 'connection'
KEY_DOCUMENTS = 'documents'
KEY_KEY = 'key'
KEY_LLM_MODEL_META = 'llm_model_meta'
KEY_LLM_MODEL_NAME = 'llm_model_name'
KEY_MODEL_CFG = 'model_cfg'
KEY_MODEL_TYPE = 'model_type'
KEY_NAME = 'name'
clone()
static from_dict(as_dict: dict, connection=None) ExplainableRagModel
to_dict()
class h2o_sonar.lib.api.models.H2o3Model(model_src, target_col: str = '', used_features: list[str] | None = None, sanitization_map: SanitizationMap | None = None, dataset: ExplainableDataset | None = None)

Bases: ExplainableModel

H2O-3 explainable model implementation.

static is_h2o3_model(model_src) bool
class h2o_sonar.lib.api.models.ModelApi(logger: SonarLogger | None = None)

Bases: object

Model API interface provides uniform API allowing explainers to use any model (scorer) regardless provider, implementation or runtime details.

Detects model (path to model, instance of supported model, ..) and creates instances of the Model class.

create_model(model_src, target_col: str, used_features: list[str] | None = None, model_type: ExplainableModelType = ExplainableModelType.unknown, dataset: ExplainableDataset | Frame | DataFrame | str | Path | None = None, sanitization_map: SanitizationMap | None = None, **extra_params) ExplainableModel

Create explainable model.

Parameters:
model_srcAny

Path to model on the filesystem, instance of a 3rd party model, pickle or any other source that can be used to create explainable model. Information about the model can be passed to 3rd party model implementations (like H2O-3) which can create the model.

target_colstr

Target column.

used_featureslist[str] | None

Optional list of features names used by the model - it’s required in case of all models which don’t provide introspection allowing to determine used features.

model_typeExplainableModelType

Explainable model type hint which can be used to construct the model correctly.

datasetdatasets.ExplainableDataset | datatable.Frame | pandas.DataFrame
| str | pathlib.Path | None

Optional training dataset.

sanitization_mapSanitizationMap | None

Optional dataset sanitization map used by model.

Returns:
ExplainableModel

Explainable model.

class h2o_sonar.lib.api.models.ModelVendor

Bases: object

DAI = 'daimojo'
H2O = 'h2o'
SKLEARN = 'sklearn'
class h2o_sonar.lib.api.models.OpenAiRagModel(connection: str | ConnectionConfig, name: str = '', thread_id: str = '', llm_model_name: str = '', documents: list[str] | None = None, key: str = '', logger: SonarLogger | None = None)

Bases: ExplainableRagModel

OpenAI RAG model - AI Assistant with File Search/Retrieval tool enabled.

class h2o_sonar.lib.api.models.PickleFileModel(model_src, predict_method: Callable, fit_method=None, model_type: ExplainableModelType = ExplainableModelType.unknown, model_meta: ExplainableModelMeta | None = None, transformed_model: TransformedFeaturesModel | None = None, label_encoder: MultiColumnLabelEncoderAbc | None = None, logger: SonarLogger | None = None)

Bases: ExplainableModel

Pickled explainable model.

EXT_PICKLE = '.pkl'
static from_pickle(model_src, target_col: str = '', used_features: list[str] | None = None, sanitization_map: SanitizationMap | None = None, dataset: ExplainableDataset | None = None) ExplainableModel
static is_pickle_file_model(model_src) bool
class h2o_sonar.lib.api.models.ScikitLearnModel(model_src, target_col: str = '', used_features: list[str] | None = None, labels: list | None = None, sanitization_map: SanitizationMap | None = None, dataset: ExplainableDataset | Frame | DataFrame | None = None, logger=None)

Bases: ExplainableModel

Scikit-learn explainable model implementation.

static is_scikit_learn_model(model_src) bool
predict(X: ExplainableDataset | Frame, **kwargs)

Score and return predictions in any format returned by the predict method. Scikit-learn models require specific constraint which are enforced by this model specific method.

class h2o_sonar.lib.api.models.TransformedFeaturesModel(model_src, transformed_predict_method, transform_dataset_method, model_meta: ExplainableModelMeta | None = None)

Bases: object

Transformed features model is associated with ExplainableModel which works on original (raw features).

ExplainableModel may have associated transformed features model. In order to score a dataset using transformed features model, the dataset must be transformed first from the original (dataset and features) to transformed (dataset and features) using feature transformers.

static load(path: str)

Load model from pickle.

Parameters:
pathstr

Model pickle path.

Returns:
ExplainableModel

Instance of the pickled model.

property meta: ExplainableModelMeta
predict(transformed_x: ExplainableDataset | Frame, **kwargs)

Score and return predictions in any format returned by the predict method.

save(path: str, update: bool = False)

Pickle the model.

Parameters:
pathstr

Model pickle path.

updatebool

Delete pickled model if it already exists on given path prior saving the new model.

transform_dataset(X: ExplainableDataset | Frame, **kwargs) ExplainableDataset | Frame

Transform dataset from original to transformed features.

h2o_sonar.lib.api.models.explainable_rag_llm_model_from_json(json_dict: dict) ExplainableRagModel | ExplainableLlmModel

Create LLM or RAG model from the JSon dictionary.

Parameters:
json_dictdict

JSon dictionary containing LLM or RAG model definition which can be found in interpretation.json::models section.

Returns:
ExplainableRagModel | ExplainableLlmModel

Instance of the LLM or RAG model.

h2o_sonar.lib.api.models.guess_model_labels(dataset: ExplainableDataset | DataFrame | Frame, target_col, labels: list | None = None, model_type_str='scikit-learn', logger: SonarLogger | None = None) list[str] | None

Guess features used by the model from the dataset.

Parameters:
datasetdatasets.ExplainableDataset | pandas.DataFrame | datatable.Frame

Dataset used to train the model.

target_colstr

Target column name.

labelslist[str] | None

List of model labels value to return if not possible to determine them.

loggerloggers.SonarLogger | None

Logger instance.

model_type_strstr

Model type string to be used in exception messages.

Returns:
list[str] | None

List of model labels. If None, then it’s not possible to determine it.

h2o_sonar.lib.api.models.guess_model_used_features(dataset: ExplainableDataset | DataFrame | Frame, target_col: str = '', model_type_str: str = 'scikit-learn') list[str]

Guess features used by the model from the dataset.

Parameters:
datasetdatasets.ExplainableDataset | pandas.DataFrame | datatable.Frame

Dataset used to train the model.

target_colstr

Target column name. If specified, the target column will be removed from the used features, otherwise it will be included.

model_type_strstr

Model type string to be used in exception messages.

Returns:
list[str]

List of features used by the model.

h2o_sonar.lib.api.persistences module

class h2o_sonar.lib.api.persistences.ExplainerPersistence(data_dir: str, username: str, explainer_id: str, explainer_job_key: str, mli_key: str = None, store_persistence: Persistence | None = None)

Bases: InterpretationPersistence

Explainer persistence.

Filesystem structure:

mli_experiment_<UUID>/ (MLI interpretation) OR explanation_<job UUID>/ (ad hoc)
    explainer_<explainer ID>_<job UUID>/
        <explanation name>/
            explanation.<extension>
            ... extra files completing main explanation file allowed in this dir
         work/
            ... directory which can be used for intermediary results persistence

Web access:

http://<HOST>:<PORT>/files/mli_experiment_<UUID>/...
http://<HOST>:<PORT>/files/explanation_<UUID>/...

Hints:

  • Explainer and explanation names are checked to contain safe characters only (alpha, num, ., _ and -). IDs are preserved (filesystem/runtime match).

  • Format identifiers (MIME types) are processed to contain safe characters only.

  • explanation.<extension> is “index file” - directory may contain also other files which form/support the explanations

  • Explainer may be executed multiple times within one MLI interpretation, therefore uniqueness is guaranteed by job UUID.

  • Datatable explanation is canonical (always present), others are optional.

Examples

# MLI interpretation
mli_experiment_4d774e62-3c67-11ea-9c7e-106530ed5ceb/

    # OOTB PD explainer
    explainer_h2oaicore.h2o_sonar.oss.byor.explainers.pd.PD_4d774e62-3c67...06530ed5ceb/
        global_partial_dependence/
            application_vnd_h2oai_datatable_jay/
                explanation.jay
            application_json/
                explanation.json
        local_individual_conditional_explanation/
            application_vnd_h2oai_datatable_jay/
                explanation.jay
            application_json/
                explanation.json
                feature_1_class_1_pd.json
                ...
                feature_n_class_n_pd.json

    # hot deployed feature importance explainer
    explainer_False_test_kernel_shap_f72edb06_...er.TestKernelShap_4d7...d5ceb/
        local_feature_importance/
            application_vnd_h2oai_datatable_jay/
                explanation.jay
            application_json/
                explanation.json

# Ad hoc explainer run
explanation_4d774e62-3c67-11ea-9c7e-106530ed5ceb/

    # OOTB feature importance explainer
    explainer_h2oaicore.h2o_sonar.oss.byor.explainers.kernel_shap.KernelShap_4d7...ceb/
        global_feature_importance/
            application_vnd_h2oai_datatable_jay/
                explanation.jay
            application_json/
                explanation.json
DIR_EXPLAINER = 'explainer_'
DIR_INSIGHTS = 'insights'
DIR_LOG = 'log'
DIR_PROBLEMS = 'problems'
DIR_WORK = 'work'
EXPLAINER_LOG_PREFIX = 'explainer_run_'
EXPLAINER_LOG_SUFFIX_ANON = '_anonymized.log'
FILE_DONE_DONE = 'EXPLAINER_DONE'
FILE_DONE_FAILED = 'EXPLAINER_FAILED'
FILE_EXPLAINER_PICKLE = 'explainer.pickle'
FILE_EXPLANATION = 'explanation'
FILE_INSIGHTS = 'insights_and_actions.json'
FILE_ON_DEMAND_EXPLANATION_SUFFIX = 'on_demand_explanation.txt'
FILE_PROBLEMS = 'problems_and_actions.json'
FILE_RESULT_DESCRIPTOR = 'result_descriptor.json'
property explainer_id: str
property explainer_job_key: str
static get_dirs_for_explainer_id(data_dir: str, username: str, mli_key: str, explainer_id: str, explainer_job_key: str | None = None) list
get_evaluator_working_file(file_name: str) str
get_explainer_ann_log_file() str
get_explainer_ann_log_path() str
get_explainer_dir() str
get_explainer_dir_archive() str
get_explainer_insights_dir() str
get_explainer_insights_file(file_name: str) str
get_explainer_log_dir() str
get_explainer_log_file() str
get_explainer_log_path() str
get_explainer_problems_dir() str
get_explainer_problems_file(file_name: str) str
get_explainer_working_dir() str
get_explainer_working_file(file_name: str) str
get_explanation_dir_path(explanation_type: str, explanation_format: str) str

Get explanation directory path.

Parameters:
explanation_typestr

Explanation identifier returned by explanation_type().

explanation_formatstr

Format MIME type.

Returns:
str

Path to the directory with the explanation.

get_explanation_file_path(explanation_type: str, explanation_format: str, explanation_file: str = None) str
get_explanation_meta_path(explanation_type: str, explanation_format: str) str
static get_key_for_explainer_dir(explainer_dir_path: str) str | None
static get_locators_for_explainer_id(data_dir: str, username: str, mli_key: str, explainer_id: str, explainer_job_key: str | None = None) list[tuple[str, str]] | None
get_relative_path(path: str, base_entity: str = 'interpretation')
get_result_descriptor_file_path() str
load_insights() list[dict]

Load insights.

load_problems() list[dict]

Load model problems.

load_result_descriptor() dict
static make_dir(target_dir)
make_explainer_dir()
make_explainer_insights_dir()
make_explainer_log_dir()
make_explainer_problems_dir()
make_explainer_sandbox(dai_params=None)

Create explainer working dir and log directories as well as common files.

Parameters:
dai_params: CommonDaiExplainerParameters

Common explainer parameters to be stored in the root of the interpretation (if it already doesn’t exist).

make_explainer_working_dir()
static makedirs(path: str, exist_ok=True)

Avoid some inefficiency in os.makedirs().

Parameters:
pathstr

Path to directory/ies to create.

exist_okbool

Fail if directory exists.

Returns:
str

Path to newly create directory.

resolve_mli_path(mli_key: str, username: str)

Resolve MLI interpretation directory as it should be in the directory with username in path, but potentially it will be possible to create it in directory without it using config.per_user_directories (or can be migrated from 1.8.x).

rm_explainer_dir()
save_insights(insights: list[dict])

Save insights.

static save_json(data: dict, path: str)
save_problems(problems: list[dict])

Save model problems.

property username: str
class h2o_sonar.lib.api.persistences.FilesystemPersistence(base_path: Path | str | None = None, logger=None)

Bases: Persistence

File-system store persistence.

copy_file(from_key: str | Path, to_key: str | Path)
delete(key: str | Path) bool
delete_dir_contents(key: str | Path, logger=None)
delete_file(key: str | Path) bool
delete_tree(key: str | Path)
exists(key: str | Path) bool
static flush_dir_for_file(file_path: str) bool
static get_default_cwl()

Get default current working location when no specified by the user.

getcwl()

Get current working location - directory, memory key or DB locator.

is_dir(key: str | Path) bool
is_file(key: str | Path) bool
list_dir(key: str | Path) list
list_files_by_wildcard(key: str | Path, wildcard: str) list
load(key: str | Path, data_type: PersistenceDataType | None = None) Any
load_json(key: str | Path) dict | list
make_dir(key: str | Path)
make_dir_zip_archive(src_key: str | Path, zip_key: str | Path, file_filter=<function FilesystemPersistence.<lambda>>)

Create ZIP archive of given directory.

Parameters:
src_key: src

Absolute path to directory to be archived.

zip_key: src

ZIP archive path.

file_filter:

Function to be used for filtering - it gets relative path from the src_dir_path as parameter and returns boolean indicating whether to keep (False) or filter file out (True).

save(key: str | Path, data, data_type: PersistenceDataType = PersistenceDataType.text)
static save_json(key: str | Path, data: dict | list, indent: int = 4, save_explainer_params=False) dict
touch(key: str | Path)
property type
update(key: str | Path, data, data_type: PersistenceDataType = PersistenceDataType.binary)
class h2o_sonar.lib.api.persistences.InMemoryPersistence

Bases: Persistence

In-memory key-based store persistence.

DIR = <h2o_sonar.lib.api.persistences.InMemoryPersistence.Directory object>
class Directory

Bases: object

copy_file(from_key: str | Path, to_key: str | Path)
delete(key: str | Path) bool
delete_dir_contents(key: str | Path, logger=None)
delete_file(key: str | Path) bool
delete_tree(key: str | Path)
exists(key: str | Path) bool
static get_default_cwl()

Get default current working location when no specified by the user.

getcwl()

Get current working location - directory, memory key or DB locator.

is_dir(key: str | Path) bool
is_file(key: str | Path) bool
list_dir(key: str | Path) list
list_files_by_wildcard(key: str | Path, wildcard: str) list
load(key: str | Path, data_type: PersistenceDataType = PersistenceDataType.binary) Any
load_json(key: str | Path) dict
make_dir(key: str | Path)
make_dir_zip_archive(src_key: str | Path, zip_key: str | Path, file_filter=<function InMemoryPersistence.<lambda>>)

Make ZIP archive of given source directory.

Parameters:
src_keystr

Source key (directory path).

zip_keystr

ZIP key (ZIP file path).

file_filterCallable

File filter.

save(key: str | Path, data, data_type: PersistenceDataType = PersistenceDataType.binary)
save_json(key: str | Path, data: dict, indent: int = 4, save_explainer_params=False)
touch(key: str | Path)
property type
class h2o_sonar.lib.api.persistences.InterpretationPersistence(data_dir: str, username: str, mli_key: str = None, ad_hoc_explainer_job_key: str = None, store_persistence: Persistence | None = None, logger=None)

Bases: object

Interpretation persistence - class used to manage interpretation files and directories within base data directory (or equivalent on particular store type).

Once extended to actual writing/reading of files it should also simplify store switch - like remote/multinode/distributed.

Filesystem structure:

<base data dir>/

mli_experiment_<UUID>/ … MLI interpretation (bulk explainers run) explanation_<job UUID>/ .. ad-hoc

Examples

# MLI interpretation mli_experiment_4d774e62-3c67-11ea-9c7e-106530ed5ceb/

# Ad hoc explainer run explanation_4d774e62-3c67-11ea-9c7e-106530ed5ceb/

DIR_AD_HOC_EXPLANATION = 'explanation_'
DIR_AUTOML_EXPERIMENT = 'h2oai_experiment_'
DIR_MLI_EXPERIMENT = 'mli_experiment_'
DIR_MLI_TS_EXPERIMENT = 'mli_experiment_timeseries_'
FILE_COMMON_PARAMS = 'explainers_common_parameters.json'
FILE_EXPERIMENT_ID_COLS = 'experiment_id_columns.json'
FILE_EXPERIMENT_IMAGE = 'IS_IMAGE'
FILE_EXPERIMENT_TS = 'IS_TIMESERIES'
FILE_H2O_SONAR_HTML = 'h2o-sonar.html'
FILE_INTERPRETATION_HTML = 'interpretation.html'
FILE_INTERPRETATION_HTML_4_PDF = 'interpretation-detailed.html'
FILE_INTERPRETATION_JSON = 'interpretation.json'
FILE_INTERPRETATION_PDF = 'interpretation-detailed.pdf'
FILE_MLI_EXPERIMENT_LOG = 'mli_experiment_log_'
FILE_PREFIX_DATASET = 'dataset_'
FILE_PROGRESS_JSON = 'progress.json'
KEY_E_PARAMS = 'explainers_parameters'
KEY_RESULT = 'result'
property ad_hoc_job_key: str
property base_dir: str
create_dataset_path() str
property data_dir: str
static get_ad_hoc_mli_dir_name(data_dir: str, username: str, explainer_job_key: str)
static get_async_log_file_name(mli_key: str)
static get_base_dir(data_dir: str, dir_name: str)
get_base_dir_file(file_name: str) str
get_experiment_id_cols_path() str
get_html_4_pdf_path() str
get_html_path() str
get_json_path() str
static get_mli_dir_name(data_dir: str, username: str, mli_key: str)
get_pdf_path() str
is_common_params()
static is_safe_name(name: str) bool

Check whether given nameis formed by alphanumeric chars (and therefore filesystem safe).

static list_interpretations(data_dir: str, username: str, store_persistence: Persistence, paths: bool = True)

List interpretations.

Parameters:
data_dirstr

H2O Sonar results directory.

usernamestr

Username.

store_persistencePersistence

Handle to the store persistence.

pathsbool

Return list of paths (e.g. file-systems) if True (default), else return interpretation UUIDs.

load_common_params(patch_sequential_execution: bool | None = None) CommonInterpretationParams

Load CommonExplainerParameters entity from interpretation root dir.

load_explainers_params(explainer_id: str = '') dict

Load explainers parameters dictionary from interpretation JSon.

load_is_image_experiment()
load_is_timeseries_experiment()
load_message_entity(path: str) dict
make_base_dir()
make_dir_zip_archive(src_dir_path: str | Path, zip_path: str | Path, file_filter=<function InterpretationPersistence.<lambda>>)
make_interpretation_sandbox()

Create interpretation directory as well as common files.

make_tmp_dir()
property mli_key: str
resolve_model_path(model_path: str)

Resolve fitted model path as there are several combinations of DAI configuration and experiment creation (path):

  • fitted model path MAY have <username> prefix, based on whether it was created in 1.8.x version or with config.per_user_directories=True/False

  • current user directory may be either data directory, or may have username in path based on config.per_user_directories configuration item value

Parameters:
model_path: str

(Un)fitted model relative path as present on model entity as model.fitted_model_path.

rm_base_dir(logger=None)
rm_dir(dir_path)
save_as_html(interpretation_html: str)

Save interpretation as HTML.

save_as_json(interpretation_dict: dict)

Save interpretation as JSon.

save_as_pdf(interpretation)

Save interpretation as PDF.

save_common_params(entity: CommonInterpretationParams)

Save CommonExplainerParameters entity to interpretation root dir.

save_experiment_type_hints(is_timeseries: bool = False, is_image: bool = False)

Write hint (in backward compatible manner) indicating experiment type (like timeseries or image) to interpretation directory (IID is default).

Parameters:
is_timeseriesbool

Write time series hint.

is_imagebool

Write image hint.

save_message_entity(entity, path: str)
property tmp_dir: str
static to_alphanum_name(name: str)

Convert given name to filesystem save string formed by alphanumeric characters.

static to_server_file_path(data_dir: str, path: str)

Return bare server path without data directory

static to_server_path(data_dir: str, path: str)

Return bare server path without data directory

property user_dir: str
class h2o_sonar.lib.api.persistences.JsonPersistableExplanations

Bases: ABC

Interface for classes implementing explanations JSon file persistence.

Examples

 ice = ICE("Step by step ICE loading")
 ice.load_json("cache/ice.json")

 es = ice.explanations()

 es = ICE("On the fly").explain(
   ["Feature"],
   X,
   predict_method=scorer
).save_json()
class PandasJSonEncoder(*, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, indent=None, separators=None, default=None)

Bases: JSONEncoder

Custom Pandas DataFrames serializer.

default(o)

Implement this method in a subclass such that it returns a serializable object for o, or calls the base implementation (to raise a TypeError).

For example, to support arbitrary iterators, you could implement default like this:

def default(self, o):
    try:
        iterable = iter(o)
    except TypeError:
        pass
    else:
        return list(iterable)
    # Let the base class default method raise the TypeError
    return super().default(o)
static check_explanations_serializability(explanations)
property default_json_file_name
abstractmethod load_json(path=None)

Load explanations from JSon file.

Parameters:
path: str

Local file path from where to loadJson explanations. If path isn’t specified, then explanations are loaded from explanations.json in the current directory.

Returns:
dict

Explanations deserialized from JSon.

abstractmethod save_json(path=None)

Save explanations as JSon file.

Parameters:
pathstr

Local file path where to store explanations. If path isn’t specified, then explanations are stored to ‘explanations.json’ in the current directory

class h2o_sonar.lib.api.persistences.NanEncoder(*, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, indent=None, separators=None, default=None)

Bases: JSONEncoder

encode(obj)

Return a JSON string representation of a Python data structure.

>>> from json.encoder import JSONEncoder
>>> JSONEncoder().encode({"foo": ["bar", "baz"]})
'{"foo": ["bar", "baz"]}'
class h2o_sonar.lib.api.persistences.Persistence(logger=None)

Bases: ABC

Key/value-based persistence API interface provides uniform store-agnostic API allowing explainers to use chosen store type regardless container runtime or technology to store explainer results (explanations). It aims to enable writing identical code regardless explanation data is stored/loaded to/from filesystem, memory or DB.

Interface and implementations are based on opaque string keys (which might be filesystem paths, dictionary keys or NoSQL database keys) and data types (text, binary, …). On implementation initialization is be set base in-memory reference, filesystem path or DB connection information.

There are the following special types of data which are written to filesystem (network or memory) regardless chosen store type:

  • temporary files (explainer work/ directory)

  • log filed (explainer log/ directory)

Therefore, an explainer sandbox is always created on the file-system, but it might be located in user specified directory (in case of file-system store) or system temp directory (in case of in-memory or database store).

The persistence API is written with security (barriers) and performance in mind.

PREFIX_INTERNAL_STORE = 'h2o_sonar-of-'
static check_key(key: str | Path) str

Check and fix key.

copy_file(from_key: str | Path, to_key: str | Path)
delete(key: str | Path) bool
delete_dir_contents(key: str | Path, logger=None)
delete_file(key: str | Path) bool
static delete_temp_dir(tmp_dir_path: str | Path)
delete_tree(key: str | Path) bool
exists(key: str | Path) bool
static flush_dir_for_file(file_path: str) bool
getcwl()

Get current working location - directory, memory key or DB locator.

static is_binary_file(key: str) bool
is_dir(key: str | Path) bool
is_dir_or_file(key: str | Path) bool
is_file(key: str | Path) bool
static key_folder(key: str | Path) str

Get (parent) folder key for given key (equivalent of os.path.dirname()).

list_dir(key: str | Path) list
list_files_by_wildcard(key: str | Path, wildcard: str) list
load(key: str | Path, data_type: PersistenceDataType = PersistenceDataType.binary) Any
load_json(key: str | Path) dict
make_dir(key: str | Path)
make_dir_zip_archive(src_key: str, zip_key: str, file_filter=<function Persistence.<lambda>>)

Make ZIP archive of given source directory.

Parameters:
src_keystr

Source key (directory path).

zip_keystr

ZIP key (ZIP file path).

file_filterCallable

File filter.

static make_key(*args) str

Assemble key (path) from the string arguments given to this function (equivalent of os.path.join()).

static make_temp_dir() str
static make_temp_file(file_name: str) str
path_to_internal(path: str | Path) str
static safe_name(key: str) str

Encode name to be store (file-sytem) safe (can be decoded if needed).

save(key: str | Path, data, data_type: PersistenceDataType = PersistenceDataType.binary)
touch(key: str | Path)
property type
update(key: str | Path, data, data_type: PersistenceDataType = PersistenceDataType.binary)
class h2o_sonar.lib.api.persistences.PersistenceApi(logger: SonarLogger | None = None)

Bases: ABC

Factory which creates Persistence implementations for various store types and purposes which are available in specific runtime and/or container(s).

create_explainer_persistence(store_persistence: Persistence, base_path: str | Path, interpretation_key: str, explainer_id: str, explainer_job_key: str, username: str = '') ExplainerPersistence

Create explainer persistence atop given store persistence e.g. to store explainer data to database.

create_interpretation_persistence(store_persistence: Persistence, base_path: str | Path, interpretation_key: str, username: str = '') InterpretationPersistence

Create interpretation persistence atop given store persistence e.g. to store interpretations in-memory.

create_persistence(persistence_type: PersistenceType = PersistenceType.file_system, base_path: str = '', connection_string: str = '') InMemoryPersistence | FilesystemPersistence

Create persistence of given store type - file-system, in-memory or DB. Default store persistence is file-system persistence with base in the current directory.

Parameters:
persistence_typePersistenceType

Type of the persistence to create.

base_pathstr

Optional root path of the persistence on the host store (where meaningful e.g. file-system).

connection_stringstr

Option connection string (where meaningful e.g. database).

Returns:
Any

Persistence to load/store container and explainer artifacts.

get_cwl(persistence_type: PersistenceType = PersistenceType.file_system)
class h2o_sonar.lib.api.persistences.PersistenceDataType(value)

Bases: Enum

binary = 1
datatable = 2
json = 4
text = 3
class h2o_sonar.lib.api.persistences.PersistenceType(value)

Bases: Enum

database = 3
file_system = 1
in_memory = 2
class h2o_sonar.lib.api.persistences.RobustEncoder(*, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, indent=None, separators=None, default=None)

Bases: JSONEncoder

default(obj)

Implement this method in a subclass such that it returns a serializable object for o, or calls the base implementation (to raise a TypeError).

For example, to support arbitrary iterators, you could implement default like this:

def default(self, o):
    try:
        iterable = iter(o)
    except TypeError:
        pass
    else:
        return list(iterable)
    # Let the base class default method raise the TypeError
    return super().default(o)

h2o_sonar.lib.api.plots module

class h2o_sonar.lib.api.plots.Data3dPlot

Bases: object

Plot 3D data:

  • heatmap

  • 3D surface plot

  • 3D contour plot

PLOT_TYPES = ['heatmap', 'contour-3d', 'surface-3d']
PLOT_TYPE_CONTOUR = 'contour-3d'
PLOT_TYPE_HEATMAP = 'heatmap'
PLOT_TYPE_SURFACE = 'surface-3d'
static plot(x_axis_labels: list, y_axis_labels: list, heatmap_data: Frame, chart_title: str = '', x_axis_label: str = '', y_axis_label: str = '', plot_type: str = 'heatmap', color_map: str = 'autumn', figsize=(12, 10), dpi=120, plot_file_path: str = '', logger=None, log_name: str = '')

Heatmap plot.

Parameters:
x_axis_labelslist

Horizontal axes labels.

y_axis_labelslist

Vertical axes labels.

heatmap_datadatatable.Frame

Datable frame with heatmap data (column names don’t matter, only data are relevant).

chart_titlestr

Chart title.

x_axis_labelstr

Horizontal axis label.

y_axis_labelstr

Vertical axis label.

plot_typestr

Plot type, one of PLOT_TYPES.

color_mapstr

Matplotlib color map name.

figsizetuple

Figure size.

dpiint

Dots per inch.

plot_file_pathstr

Path to save the plot to.

logger

Logger instance.

log_namestr

Name of the logger.

class h2o_sonar.lib.api.plots.ScatterFeatImpPlot

Bases: object

Scatter plot feature importance representation is based on chart from:

https://github.com/slundberg/shap

static plot(contributions, frame, alpha: float = 1.0, colormap: str | None = None, figsize=(12, 12), jitter: float = 0.35, chart_title: str = 'Feature importance summary plot', x_label: str = 'Value', y_label: str = 'Feature', thermometer_label: str = 'Normalized feature value', columns=None, top_n_features: int = 20, samples: int | None = None, colorize_factors: bool = True, drop_zero_contribs=True, hard_asserts=False, logger=None) Figure

Feature importance summary plot.

Summary plot shows contribution of features for each instance. The sum of the feature contributions and the bias term is equal to the raw prediction of the model, i.e., prediction before applying inverse link function.

Parameters:
contributions

Pandas contributions frame with coefficients. Frame column names to be (sanitized) feature names, rows to correspond to dataset rows, cells to be coefficients.

frame

Pandas dataset frame with values. Frame column names to be (sanitized) feature names, rows to correspond to dataset rows, cells to be values.

columns

Either a list of columns or column indices to show. If specified parameter top_n_features will be ignored.

top_n_featuresint

A number of columns to pick using variable importance (where applicable). Set to -1 to show all features.

samples

Maximum number of observations to use; if lower than number of rows in the frame, take a random sample.

colorize_factors

If True, use colors from the colormap to colorize the factors; otherwise all levels will have same color.

alpha

Transparency of the points.

colormap

Colormap to use instead of the default blue to red colormap.

figsize

Figure size - passed directly to matplotlib.

jitter

Amount of jitter used to show the point density.

chart_titlestr

Chart title.

x_labelstr

Chart x-axis label.

y_labelstr

Chart y-axis label.

thermometer_labelstr

Chart thermometer label.

drop_zero_contribs

Whether to drop features that have zero contribution. Features that are not used in the final model will have zero contribution.

hard_assertsbool

Used in testing to raise exception in try except statements.

logger

Optional logger object.

Returns:
pyplot.Figure

A matplotlib figure object which can be saved or displayed.

h2o_sonar.lib.api.plots.safe_plot_names(column_list: list[str]) list

Return a list of column names that exclude problematic special characters for matplotlib plotting functions.

Parameters:
column_list: list[str]

List of column names.

Returns:
List:

List with column names that are safe to plot.

h2o_sonar.lib.api.problems module

class h2o_sonar.lib.api.problems.AVIDProblemCode(value)

Bases: ProblemCode

Problem codes from AVID https://docs.avidml.org/taxonomy/effect-sep-view

E0100_BIAS = ('E0100', 'Concerns of algorithms propagating societal bias')
E0200_EXPLAINABILITY = ('E0200', 'Ability to explain decisions made by AI')
E0300_TOXICITY = ('E0300', 'Perpetuating/causing/being affected by negative user actions')
E0400_MISINFORMATION = ('E0400', 'Perpetuating/causing the spread of falsehoods')
P0100_DATA = ('P0100', 'Problems arising due to faults in the data pipeline')
P0200_MODEL = ('P0200', 'Ability for the AI to perform as intended')
P0300_PRIVACY = ('P0300', 'Protect leakage of user information as required by rules and regulations')
P0400_SAFETY = ('P0400', 'Minimizing maximum downstream harms')
S0400_MODEL_BYPASS = ('S0400', 'Intentionally try to make a model perform poorly')
S0500_EXFILTRATION = ('S0500', 'Directly or indirectly exfiltrate ML artifacts')
S0600_DATA_POISONING = ('S0600', 'Usage of poisoned data in the ML pipeline')
class h2o_sonar.lib.api.problems.AVIDProblemCodeType(code, description)

Bases: tuple

code

Alias for field number 0

description

Alias for field number 1

class h2o_sonar.lib.api.problems.ProblemAndAction(description: str, description_html: Airium | None = None, severity: ProblemSeverity = ProblemSeverity.medium, problem_type: str = 'problem', problem_attrs: dict = None, actions_description: str = '', actions_codes: list[str] = None, explainer_id: str = '', explainer_name: str = '', evaluator_id: str = '', evaluator_name: str = '', explanation_type: str = '', explanation_name: str = '', explanation_mime: str = '', resources: list[str] = None, problem_code: ProblemCode = None)

Bases: AbcProblemInsight

Instance of this class represents a problem of the interpreted model identified by an explainer. Apart from the problem description, the entry provides also problem severity, problem category (brief characteristic), problem attributes (dictionary of machine processable data describing the problem which might be used for instance as an input to actions), textual description of suggested actions to mitigate the problem (actionability), explainer which detected the problem, and references to resources (explanations, document URLs, …).

KEY_PROBLEM_ATTRS = 'problem_attrs'
KEY_PROBLEM_TYPE = 'problem_type'
KEY_SEVERITY = 'severity'
static from_dict(problem_dict: dict) ProblemAndAction
to_dict() dict
class h2o_sonar.lib.api.problems.ProblemCode(value)

Bases: Enum

class h2o_sonar.lib.api.problems.ProblemSeverity(value)

Bases: Enum

static compare(severity1, severity2) int
high = 1
low = 3
medium = 2
h2o_sonar.lib.api.problems.problems_for_bool_leaderboard(evaluator, leaderboard, primary_metric_meta: MetricMeta, metric_threshold: float | None = None, severity: ProblemSeverity | None = None, problem_type: str = 'accuracy', problem_code: ProblemCode = None, explanation_type: str = '', explanation_name: str = '', explanation_mime: str = '', actions_description: str = '', extra_description_actions: str = '') None

Generate problems based on the heatmap leaderboard analytics.

For models whose average Passes metric score is below the threshold, a problem is created with the description of the problem, severity, problem type, problem attributes, actions description,

h2o_sonar.lib.api.problems.problems_for_cls_leaderboard(evaluator, leaderboard, metric_threshold: float | None = None, primary_metric_meta=None, severity: ProblemSeverity | None = None, problem_type: str = 'classification', explanation_type: str = '', explanation_name: str = '', explanation_mime: str = '', actions_description: str = '', extra_description_actions: str = '', problem_code: ProblemCode = None) None

Generate problems based on the classification leaderboard analytics.

h2o_sonar.lib.api.problems.problems_for_heat_leaderboard(evaluator, leaderboard, metric_threshold: float | None = None, primary_metric_meta=None, severity: ProblemSeverity | None = None, problem_type: str = 'accuracy', explanation_type: str = '', explanation_name: str = '', explanation_mime: str = '', actions_description: str = '', extra_description_actions: str = '', problem_code: ProblemCode = None) None

Generate problems based on the heatmap leaderboard analytics.

h2o_sonar.lib.api.results module

class h2o_sonar.lib.api.results.Data3dResult(persistence: ExplainerPersistence, explainer_id: str, h2o_sonar_config=None, logger=None)

Bases: ExplainerResult

data(*, feature_names: str = '') dict
classmethod help() dict[str, list[dict[str, str | bool]]]
plot(*, feature_names: str = '', plot_type: str = 'surface-3d', title: str = '')
class h2o_sonar.lib.api.results.DiaResult(persistence: ExplainerPersistence, explainer_id: str, dia_entry_constants: DiaEntryConstant, h2o_sonar_config=None, logger=None)

Bases: ExplainerResult

class DiaCategory(value)

Bases: Enum

DIA_CATEGORY_CM = 'cm'
DIA_CATEGORY_DISPARITY = 'disparity'
DIA_CATEGORY_ME_SMD = 'me_smd'
DIA_CATEGORY_PARITY = 'parity'
DIA_METRICS = 'metrics'
class DiaEntryConstant(dia_entity_file: str, param_feature_summaries: str, param_feature_name: str, param_name: str, param_features: str, ref_levels: str)

Bases: object

data(*, feature_name: str, category: DiaCategory | str, ref_level: int | str | None = None) Frame
classmethod help() dict[str, list[dict[str, str | bool]]]
params() dict
plot(*, feature_name: str, metrics_of_interest: list[str] | str | None = None, file_path: str = '') list[str]
class h2o_sonar.lib.api.results.DtResult(persistence: ExplainerPersistence, explainer_id: str, explainer_name: str, h2o_sonar_config=None, highlight_highest_residual: bool = False, logger=None)

Bases: ExplainerResult

data()
classmethod help() dict[str, list[dict[str, str | bool]]]
plot(*, clazz: str | None = None)
class h2o_sonar.lib.api.results.FeatureImportanceResult(persistence: ExplainerPersistence, explainer_id: str = '', chart_title: str = 'Global Feature Importance', chart_x_axis: str = 'feature', chart_y_axis: str = 'importance', h2o_sonar_config=None, logger=None, explanation_format: type[ExplanationFormat] = <class 'h2o_sonar.lib.api.formats.GlobalFeatImpJSonFormat'>, explanation: type[Explanation] = <class 'h2o_sonar.lib.api.explanations._explanations.GlobalFeatImpExplanation'>)

Bases: ExplainerResult

data(*, clazz: str | None = None) Frame
classmethod help() dict[str, list[dict[str, str | bool]]]
plot(*, clazz: str | None = None, file_path: str = '')
class h2o_sonar.lib.api.results.LeaderboardResult(persistence: ExplainerPersistence, explainer_id: str = '', chart_title: str = 'Leaderboard', chart_x_axis: str = 'metrics', chart_y_axis: str = 'models', h2o_sonar_config=None, logger=None, explanation_format: type[ExplanationFormat] = <class 'h2o_sonar.lib.api.formats.LlmHeatmapLeaderboardJSonFormat'>, explanation: type[Explanation] = <class 'h2o_sonar.lib.api.explanations._explanations_leaderboards.LlmHeatmapLeaderboardExplanation'>)

Bases: ExplainerResult

Make (heatmap-based, bool-based, …) leaderboard evaluator result.

data(*, metric_id: str | None = None) dict
classmethod help() dict[str, list[dict[str, str | bool]]]
plot(*, metric_id: str | None = None, file_path: str = '')
class h2o_sonar.lib.api.results.PdResult(persistence: ExplainerPersistence, explainer_id: str, h2o_sonar_config=None, logger=None)

Bases: ExplainerResult

data(*, feature_name: str, clazz: str | None = None) Frame
classmethod help() dict[str, list[dict[str, str | bool]]]
plot(*, feature_name, clazz=None, override_feature_type: Literal['categorical', 'numeric'] | None = None, file_path: str = '', is_problematic: bool = False)
exception h2o_sonar.lib.api.results.ResultValueError

Bases: ValueError

class h2o_sonar.lib.api.results.SummaryShapResult(persistence: ExplainerPersistence, explainer_id: str, raw_contribs_idx_filename: str, h2o_sonar_config=None, logger=None)

Bases: ExplainerResult

data(*, feature_names: list[str] | str | None = None, clazz: str | None = None) Frame
classmethod help() dict[str, list[dict[str, str | bool]]]
plot(*, feature_names: list[str] | str | None = None, clazz: str | None = None)
class h2o_sonar.lib.api.results.TemplateResult(persistence: ExplainerPersistence, explainer_id: str, explainer_name: str, logger=None)

Bases: ExplainerResult

data(**kwargs) Frame
plot(**kwargs)
h2o_sonar.lib.api.results.list_in_english(items: list[str], quote_item=True) str
h2o_sonar.lib.api.results.matplotlib_closing(show: bool)

Module contents