h2o_sonar.lib.api package

Submodules

h2o_sonar.lib.api.commons module

class h2o_sonar.lib.api.commons.Branding(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

Branding.

EVAL_STUDIO = 2
H2O_SONAR = 1
class h2o_sonar.lib.api.commons.CommonInterpretationParams(model, models, dataset, target_col: str, validset='', testset='', use_raw_features: bool = '', weight_col: str = '', prediction_col: str = '', drop_cols: List | None = [], sample_num_rows: int | None = 0, results_location: str = '', used_features: List | None = None, extra_params: List | None = None)

Bases: object

PARAM_DATASET = <h2o_sonar.lib.api.commons.Param object>
PARAM_DROP_COLS = <h2o_sonar.lib.api.commons.Param object>
PARAM_MODEL = <h2o_sonar.lib.api.commons.Param object>
PARAM_MODELS = <h2o_sonar.lib.api.commons.Param object>
PARAM_PREDICTION_COL = <h2o_sonar.lib.api.commons.Param object>
PARAM_RESULTS_LOCATION = <h2o_sonar.lib.api.commons.Param object>
PARAM_SAMPLE_NUM_ROWS = <h2o_sonar.lib.api.commons.Param object>
PARAM_TARGET_COL = <h2o_sonar.lib.api.commons.Param object>
PARAM_TESTSET = <h2o_sonar.lib.api.commons.Param object>
PARAM_USED_FEATURES = <h2o_sonar.lib.api.commons.Param object>
PARAM_USE_RAW_FEATURES = <h2o_sonar.lib.api.commons.Param object>
PARAM_VALIDSET = <h2o_sonar.lib.api.commons.Param object>
PARAM_WEIGHT_COL = <h2o_sonar.lib.api.commons.Param object>
clone() CommonInterpretationParams
describe_config_item(config_item_name: str) Param | None
describe_config_items() Dict[str, Param]
dump() dict
static load(d: dict) CommonInterpretationParams
to_dict() dict

Safe string-friendly serialization to dictionary.

class h2o_sonar.lib.api.commons.ConfigItem(name: str = '', description: str = '', comment: str = '', type: str = '', val: Any = '', predefined: List | None = None, tags: List | None = None, min_: float = 0.0, max_: float = 0.0, category: str = '')

Bases: object

KEY_CATEGORY = 'category'
KEY_COMMENT = 'comment'
KEY_DESCRIPTION = 'description'
KEY_MAX = 'max_'
KEY_MIN = 'min_'
KEY_NAME = 'name'
KEY_PREDEFINED = 'predefined'
KEY_TAGS = 'tags'
KEY_TYPE = 'type'
KEY_VAL = 'val'
clone() ConfigItem
dump() dict
static load(d: dict) ConfigItem
class h2o_sonar.lib.api.commons.EvaluatorParamType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

Evaluators parameters.

bool = ExplainerParamType.bool
customlist = ExplainerParamType.customlist
describe()
dict = ExplainerParamType.dict
float = ExplainerParamType.float
int = ExplainerParamType.int
list = ExplainerParamType.list
multilist = ExplainerParamType.multilist
str = ExplainerParamType.str
class h2o_sonar.lib.api.commons.EvaluatorToRun(evaluator_id: str, params: str | Dict = None, extra_params: List | None = None)

Bases: ExplainerToRun

class h2o_sonar.lib.api.commons.ExperimentType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

Experiment types.

binomial = 2
multinomial = 3
regression = 1
class h2o_sonar.lib.api.commons.ExplainerFilter

Bases: object

List explainers filters

BLUEPRINT_INPUT_NAME = 'blueprint_input_name'
EXPLAINER_ID = 'explainer_id'
IID: str = 'iid'
IMAGE: str = 'image'
REQUIRES_PREDICT: str = 'requires_predict_method'
TIME_SERIES: str = 'time_series'
UNSUPERVISED: str = 'unsupervised'
class h2o_sonar.lib.api.commons.ExplainerJobStatus(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

ABORTED_BY_RESTART = 4
ABORTED_BY_USER = 3
CANCELLED = 1
FAILED = 2
FINISHED = 0
IN_PROGRESS = -1
RUNNING = -1
SCHEDULED = -3
SUCCESS = 0
SYNCING = -4
TIMED_OUT = 5
UNKNOWN = -2
static from_int(status_code: int) ExplainerJobStatus
static is_job_failed(status: ExplainerJobStatus) bool
static is_job_finished(status: ExplainerJobStatus) bool
static is_job_running(status: ExplainerJobStatus) bool
to_string(status_code: int)
class h2o_sonar.lib.api.commons.ExplainerParamKey

Bases: object

KEY_ALL_EXPLAINERS_PARAMS = 'explainers_params'
KEY_DATASET = 'dataset'
KEY_DESCR_PATH = 'result_descriptor_path'
KEY_EXPERIMENT_TYPE = 'experiment_type'
KEY_E_DEPS = 'explainer_dependencies'
KEY_E_ID = 'explainer_id'
KEY_E_JOB_KEY = 'explainer_job_key'
KEY_E_PARAMS = 'explainer_params'
KEY_FEATURES_META = 'features_metadata'
KEY_I_DATA_PATH = 'interpretation_data_path'
KEY_KWARGS = 'pk'
KEY_LEGACY_I_PARAMS = 'legacy_i_params'
KEY_MODEL = 'model'
KEY_MODEL_TYPE = 'model_type'
KEY_ON_DEMAND = 'on_demand_explanation'
KEY_ON_DEMAND_MLI_KEY = 'on_demand_mli_key'
KEY_ON_DEMAND_PARAMS = 'on_demand_params'
KEY_PARAMS = 'params'
KEY_RUN_KEY = 'run_key'
KEY_TESTSET = 'testset'
KEY_USER = 'user'
KEY_VALIDSET = 'validset'
KEY_WORKER_NAME = 'worker_name'
class h2o_sonar.lib.api.commons.ExplainerParamType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

Explainer parameters.

bool = 1
customlist = 7
dict = 8
float = 3
int = 2
list = 5
multilist = 6
str = 4
class h2o_sonar.lib.api.commons.ExplainerToRun(explainer_id: str, params: str | Dict = None, extra_params: List | None = None)

Bases: object

Parametrized explainer (to run) - ID and explainer parameters (dictionary, JSon string or any format explainer is able to process).

clone() ExplainerToRun
dump() dict
static load(d: dict) ExplainerToRun
class h2o_sonar.lib.api.commons.ExplanationScope(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

Explanation scope.

global_scope = 2
local_scope = 1
class h2o_sonar.lib.api.commons.FilterEntry(filter_by: str = '', value=None)

Bases: object

KEY_FILTER_BY = 'filter_by'
KEY_VALUE = 'value'
clone() FilterEntry
dump() dict
static load(d: dict) FilterEntry
class h2o_sonar.lib.api.commons.InterpretationParamType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

any = 9
bool = 1
customlist = 7
dict = 8
float = 3
int = 2
list = 5
multilist = 6
str = 4
class h2o_sonar.lib.api.commons.Keyword(key: str, name: str, description: str)

Bases: object

Keyword.

class h2o_sonar.lib.api.commons.KeywordGroup(prefix: str, name: str, description: str, keywords: List[Keyword] | None = None)

Bases: object

Keyword groups.

is_member(keywords: List[str]) bool

Check if the entity (evaluator, explainer, method) with given keywords is a member of this keyword group.

class h2o_sonar.lib.api.commons.KeywordGroups(groups: List[KeywordGroup] = None)

Bases: object

Keyword groups.

add_group(group: KeywordGroup)
get_group(prefix: str) KeywordGroup | None
class h2o_sonar.lib.api.commons.LlmModelHostType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

RAG = 2
SERVICE = 1
class h2o_sonar.lib.api.commons.LookAndFeel

Bases: object

BLUE_THEME = 'blue'
COLORMAP_BLUE_2_RED = ['#00AAEE', '#FF1166']
COLORMAP_WHITE_2_BLACK = ['#ffffff', '#000000']
COLORMAP_YELLOW_2_BLACK = ['#fec925', '#000000']
COLOR_BLACK = '#000000'
COLOR_DAI_GREEN = '#bbc600'
COLOR_H2OAI_YELLOW = '#fec925'
COLOR_HOT_ORANGE = '#fd5800'
COLOR_MATPLOTLIB_BLUE = '#3b74b4'
COLOR_RED = '#ff0000'
COLOR_WHITE = '#ffffff'
DRIVERLESS_AI_THEME = 'driverless_ai'
FORMAT_HEXA = 'hexa'
H2O_SONAR_THEME = 'h2o_sonar'
KEY_LF = 'look_and_feel'
THEME_2_BG_COLOR = {'blue': '#ffffff', 'driverless_ai': '#000000', 'h2o_sonar': '#ffffff'}
THEME_2_COLORMAP = {'blue': ['#00AAEE', '#FF1166'], 'driverless_ai': ['#fec925', '#000000'], 'h2o_sonar': ['#fec925', '#000000']}
THEME_2_FG_COLOR = {'blue': '#3b74b4', 'driverless_ai': '#fec925', 'h2o_sonar': '#fec925'}
THEME_2_LINE_COLOR = {'blue': '#000000', 'driverless_ai': '#ffffff', 'h2o_sonar': '#000000'}
static get_bg_color(theme: str)
static get_colormap(colormap_data: List[str] | str = '', theme: str = '')

Get Matplotlib colormap.

Parameters:
colormap_dataUnion[List[str], str]

Create color map either from the list of two colors (string hexadecimal color specification) or by color map name.

themestr

H2O Eval Studio theme to create color map based on the theme.

matplotlib.colors.Colormap

Color map.

static get_fg_color(theme: str)
static get_line_color(theme: str)
class h2o_sonar.lib.api.commons.MetricMeta(key: str, display_name: str = '', data_type: str = 'float', display_format: str = '.4f', description: str = '', value_range: Tuple[float, float] | None = (0.0, 1.0), value_enum: List[str] | None = None, higher_is_better: bool = True, threshold: float | None = 0.5, is_primary_metric: bool = True, parent_metric: str = '', exclude: bool = False)

Bases: object

Evaluation/explanation metric metadata.

DATA_TYPE_SECONDS = 'seconds'
KEY_DATA_TYPE = 'data_type'
KEY_DESCRIPTION = 'description'
KEY_DISPLAY_FORMAT = 'display_value'
KEY_DISPLAY_NAME = 'display_name'
KEY_EXCLUDE = 'exclude'
KEY_HIGHER_IS_BETTER = 'higher_is_better'
KEY_IS_PRIMARY_METRIC = 'is_primary_metric'
KEY_KEY = 'key'
KEY_PARENT_METRIC = 'parent_metric'
KEY_THRESHOLD = 'threshold'
KEY_VALUE_ENUM = 'value_enum'
KEY_VALUE_RANGE = 'value_range'
copy() MetricMeta
dump() Dict
static from_dict(data: Dict) MetricMeta
static load(data: Dict) MetricMeta
to_dict(threshold: float | None = None) Dict
to_md(to_rst: bool = False) str
class h2o_sonar.lib.api.commons.MetricsMeta(metrics: List[MetricMeta] = None)

Bases: object

KEY_META = 'metadata'
add_metric(metric: MetricMeta)
contains(key: str) bool
copy_with_overrides(metric_key_to_overrides: Dict) MetricsMeta

Copy metrics meta with updated:

  • display names

  • descriptions

  • exclude flag

Parameters:
metric_key_to_overridesDict

Dictionary with metric key to overrides mapping - map: metric key -> field key -> new value

Returns:
MetricsMeta

Copy of the metrics meta with updated display names and descriptions.

dump() List
static from_dict(metrics_meta: Dict) MetricsMeta
get_metric(key: str) MetricMeta | None
get_metric_description(key: str) str
get_metric_keys() List[str]
get_primary_metric() MetricMeta | None

Return the metric which is marked as primary metric.

get_threshold(key: str, default_value=None) float | None
is_higher_better(key: str) bool
is_metric_passed(key: str, value: float) bool
static load(metrics_meta: List) MetricsMeta
set_threshold(threshold: float, key: str = '')
size() int
to_dict(threshold: float | None = None) Dict
to_list() List[MetricMeta]
class h2o_sonar.lib.api.commons.MimeType

Bases: object

EXT_CSV = 'csv'
EXT_DATATABLE = 'jay'
EXT_DOCX = 'docx'
EXT_HTML = 'html'
EXT_JPG = 'jpg'
EXT_JSON = 'json'
EXT_MARKDOWN = 'md'
EXT_PNG = 'png'
EXT_SVG = 'svg'
EXT_TEXT = 'txt'
EXT_ZIP = 'zip'
MIME_CSV = 'text/csv'
MIME_DATATABLE = 'application/vnd.h2oai.datatable.jay'
MIME_DOCX = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
MIME_EVALSTUDIO_MARKDOWN = 'application/vnd.h2oai-evalstudio-leaderboard.markdown'
MIME_HTML = 'text/html'
MIME_IMAGE = 'image/xyz'
MIME_JPG = 'image/jpeg'
MIME_JSON = 'application/json'
MIME_JSON_CSV = 'application/vnd.h2oai.json+csv'
MIME_JSON_DATATABLE = 'application/vnd.h2oai.json+datatable.jay'
MIME_MARKDOWN = 'text/markdown'
MIME_MODEL_PIPELINE = 'application/vnd.h2oai.pipeline+zip'
MIME_PDF = 'application/pdf'
MIME_PNG = 'image/png'
MIME_SVG = 'image/svg+xml'
MIME_TEXT = 'text/plain'
MIME_ZIP = 'application/zip'
static ext_for_mime(mime: str)
class h2o_sonar.lib.api.commons.ModelTypeExplanation

Bases: object

IID: str = 'iid'
IMAGE: str = 'image'
LLM: str = 'llm'
RAG: str = 'rag'
TIME_SERIES: str = 'time_series'
UNSUPERVISED: str = 'unsupervised'
class h2o_sonar.lib.api.commons.Param(param_name: str, param_type: ParamType | InterpretationParamType | ExplainerParamType, description: str = '', default_value='', value_min: float = 0.0, value_max: float = 0.0, predefined: List | None = None, tags: List | None = None)

Bases: object

Generic parameter used as (predecessor) of library, interpretation and explainer parameters.

as_descriptor() ConfigItem

Explainer parameter to descriptor conversion.

class h2o_sonar.lib.api.commons.ParamType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

bool = 1
customlist = 7
dict = 8
float = 3
int = 2
list = 5
multilist = 6
str = 4
class h2o_sonar.lib.api.commons.PerturbationIntensity(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

EXTREME = 6
HIGH = 4
LOW = 2
MEDIUM = 3
VERY_HIGH = 5
VERY_LOW = 1
class h2o_sonar.lib.api.commons.PerturbatorToRun(perturbator_id: str, intensity: str | PerturbationIntensity = PerturbationIntensity.MEDIUM, params: str | Dict = None)

Bases: object

Parametrized perturbator (to run).

KEYWORD_INTENSITY = 'intensity'
KEYWORD_PARAMS = 'params'
KEYWORD_PERTURBATOR_ID = 'perturbator_id'
clone() PerturbatorToRun
dump() dict
static load(d: dict) PerturbatorToRun
class h2o_sonar.lib.api.commons.ResourceHandle(connection_key: str, resource_key: str, version: str = '')

Bases: object

H_CONNECTION: str = 'connection'
H_KEY: str = 'key'
H_PREFIX: str = 'resource:'
H_VERSION: str = 'version'
static is_handle(handle) bool
static parse_string_handle(loc_str: str) Tuple[str, str, str]

Parse CLI argument into connection, resource key and version.

class h2o_sonar.lib.api.commons.ResourceLocatorType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

Resource locator types.

handle = 2
local = 1
class h2o_sonar.lib.api.commons.SafeJavaScript

Bases: object

Safe JavaScript datastructures de/serialization.

INF = 'Infinity'
NAN = 'NaN'
NEG_INF = '-Infinity'
static decode_to_float(obj)
class h2o_sonar.lib.api.commons.SemVer(major: int, minor: int, patch: int)

Bases: object

static from_int_list(version_list: List[int])

Semantic version from the list of 3 integers.

Returns:
Optional[SemVer]

Instance of SemVer class if valid version, None otherwise.

static from_str(version: str)

Parse a semantic version <major>.<minor>.<patch>.

Returns:
Optional[SemVer]

Instance of SemVer class if valid version, None otherwise.

class h2o_sonar.lib.api.commons.UpdateGlobalExplanation

Bases: object

Update mode: merge (to add new explanations) or replace (default).

OPT_CLASS: str = 'class'

Driverless AI/common parameters source: inherit or request (default).

OPT_FEATURE: str = 'feature'
OPT_INHERIT: str = 'inherit'
OPT_MERGE: str = 'merge'
OPT_REPLACE: str = 'replace'
OPT_REQUEST: str = 'request'
PARAMS_SOURCE: str = 'params_source'
UPDATE_MODE: str = 'update_mode'
UPDATE_SCOPE: str = 'update_scope'
h2o_sonar.lib.api.commons.add_string_list(items: List | None, add_items: List | None) List

Robust list handling of features to drop, process, use, skip, …

h2o_sonar.lib.api.commons.base_pkg(obj)

Get base package for given Python object.

Parameters:
objPython object
Returns:
str

Base package of Python object and sub-package, e.g., sklearn or ensemble.

h2o_sonar.lib.api.commons.generate_key() str
h2o_sonar.lib.api.commons.harmonic_mean(xs) float
h2o_sonar.lib.api.commons.is_ncname(s: str) bool
h2o_sonar.lib.api.commons.is_port_used(hostname: str = '127.0.0.1', port: int = 12345, service_name='Driverless AI', timeout=15, logger=None) bool
h2o_sonar.lib.api.commons.is_valid_key(key: str) bool

h2o_sonar.lib.api.datasets module

class h2o_sonar.lib.api.datasets.DatasetApi(logger: SonarLogger | None = None)

Bases: object

Dataset API interface provides uniform API allowing explainers to use any dataset regardless format or location details.

static create_dataset(dataset_src, dataset_type: ExplainableDatasetType = ExplainableDatasetType.unknown, target_col: str = '', sampled_dataset_path: str = '', sample_num_rows: int | None = None, sampler: DatasetSampler | None = None, **extra_params) ExplainableDataset

Create explainable model.

Parameters:
dataset_srcUnion[ExplainableDataset, datatable.Frame, str, Dict,

pandas.DataFrame, h2o.H2OFrame] Create dataset from given source: explainable dataset instance, datatable frame, H2OFrame, Pandas DataFrame, string (expect path to CSV, .jay or any other file type supported by datatable), dictionary (used to construct frame).

dataset_typeExplainableDatasetType

Optional dataset type hint, which can be used to construct the dataset correctly.

sampled_dataset_pathstr

Optional file path, which can be used to create a new file with the sampled dataset (if the datasets are sampled and if the sampling will be needed).

target_colstr

Optional target column name.

sample_num_rowsOptional[int]

If None, then automatically sample based on the dataset and RAM size. If > 0, then do sample the dataset to sample_num_rows number of rows. If == 0, then do NOT sample.

samplerOptional[DatasetSampler]

Sampling method (implementation) to be used - see h2o_sonar.utils.sampling module (documentation) for available sampling methods. Use a sampler instance to use the specific sampling method.

static write_csv(dataset: Frame | DataFrame, path: str, bom: bool = False)
static write_dataset(dataset: Frame | DataFrame | Series | ndarray, path: str)
static write_datatable_dataset(dataset: Frame, path: str)
static write_pandas_dataset(data: DataFrame | Series | ndarray, path: str)
static zip_csv(csv_file_path)
class h2o_sonar.lib.api.datasets.ExplainableColumnMeta(name: str = '', data_type: str = '', logical_types: List | None = None, values_format: str = '', is_id: bool = False, is_numeric: bool = False, is_categorical: bool = False, count: int = 0, frequency: int = 0, unique: int = 0, max_value: int | None = None, min_value: int | None = None, mean: float | None = None, std: float | None = None, histogram_counts: List | None = None, histogram_ticks: List | None = None, properties: Dict | None = None)

Bases: object

Dataset column metadata.

to_dict() Dict
class h2o_sonar.lib.api.datasets.ExplainableDataset(data=None, meta=None, logger=None)

Bases: object

Dataset with metadata - this class provides a uniform API to get dataset data regardless dataset source, provider or implementation.

COL_BIAS = 'bias'
KEY_DATA = 'data'
KEY_METADATA = 'metadata'
property data: Frame
static frame_2_datatable(frame, columns: List | None = None, trim_to_columns: List | None = None) Frame

Convert frame to datatable.

Parameters:
frame

A frame to be converted.

columnsOptional[List]

Optional list of column names to be used for newly created frame - column names are overwritten by this list.

trim_to_columnsOptional[List]

Remove all columns that are not on this list from the result frame.

Returns:
pandas.DataFrame

Pandas frame.

static frame_2_numpy(frame, flatten: bool = False) ndarray | None
static frame_2_pandas(frame, columns: List | None = None, trim_to_columns: List | None = None) DataFrame

Convert frame to Pandas.

Parameters:
frame

A frame to be converted.

columnsOptional[List]

Optional list of column names to be used for newly created frame - column names are overwritten by this list.

trim_to_columnsOptional[List]

Remove all columns that are not on this list from the result frame.

Returns:
pandas.DataFrame

Pandas frame.

static is_bias_col(col_name) bool
property meta: ExplainableDatasetMeta
prepare(drop_na_rows: bool = True, used_features: ~typing.List | None = None, le_cat_variables: bool = True, cleaned_frame_type: ~typing.Type[~pandas.core.frame.DataFrame] | ~typing.Type[~datatable.Frame] = <class 'datatable.Frame'>, update: bool = False) Tuple[Frame | DataFrame, List, MultiColumnLabelEncoder, int]

Method with commonly need actions to preprocess an explainable dataset. 3rd party libraries often require, e.g., numeric features only, examples without N/A or undefined values, … which this method ensures.

Parameters:
drop_na_rowsbool

Drop rows with N/A values.

used_featuresOptional[List]

Trim dataset to used features.

le_cat_variablesbool

Do label encode non-numerical columns.

cleaned_frame_type

Frame type to return - Pandas or datatable.

updatebool

If True, set data field of this ExplainableDataset instance, else return cleaned dataset and keep data field intact.

Returns:
Tuple[datatable.Frame, List[str], Any, int]

Result frame; non-numeric column names (label encoded); label encoder; number of dropped rows with N/A values.

sample(*args, **kwargs)

Sample the explainable dataset and return new instance.

to_dict()
to_json(indent=None)
transform(*args, **kwargs)

Transform the explainable dataset - sanitize, sample - and return new explainable dataset instance.

class h2o_sonar.lib.api.datasets.ExplainableDatasetHandle(connection_key: str, dataset_key: str, dataset_version: str = '')

Bases: ResourceHandle

Handle to a REMOTE dataset hosted by a remote system described by its connection configuration.

ExplainableDatasetHandle differs from the ExplainerDataset in that it doesn’t provide the actual dataset data, but only the metadata required to access the dataset.

static from_string(str_handle: str, h2o_sonar_config=None) ExplainableDatasetHandle

Create a new instance of the dataset handle from the string.

class h2o_sonar.lib.api.datasets.ExplainableDatasetMeta(shape: Tuple | None = None, columns_meta: List[ExplainableColumnMeta] | None = None, column_names: List | None = None, column_types: List | None = None, column_uniques: List | None = None, columns_cat: List | None = None, columns_num: List | None = None, file_name: str = '', file_path: str = '', file_size: int = 0, key: str = '', missing_values: List | None = None)

Bases: object

Dataset metadata - this class provides a uniform API to get basic EDA dataset metadata regardless dataset source, provider or implementation.

KEY_COLUMNS_CAT = 'columns_cat'
KEY_COLUMNS_META = 'columns_meta'
KEY_COLUMNS_NUM = 'columns_num'
KEY_COLUMN_NAMES = 'column_names'
KEY_COLUMN_TYPES = 'column_types'
KEY_COLUMN_UNIQUES = 'column_uniques'
KEY_FILE_NAME = 'file_name'
KEY_FILE_PATH = 'file_path'
KEY_FILE_SIZE = 'file_size'
KEY_MISSING_VALUES = 'missing_values'
KEY_ORIGINAL_DATASET_PATH = 'original_dataset_path'
KEY_ORIGINAL_DATASET_SAMPLED = 'original_dataset_sampled'
KEY_ORIGINAL_DATASET_SHAPE = 'original_dataset_shape'
KEY_ORIGINAL_DATASET_SIZE = 'original_dataset_size'
KEY_ROW_COUNT = 'row_count'
KEY_SHAPE = 'shape'
copy()
get_column_meta(column_name: str)
has_column(column_name: str)
is_categorical_column(column_name: str)
is_numeric_column(column_name: str)
to_dict()
to_json(indent=None)
class h2o_sonar.lib.api.datasets.ExplainableDatasetType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

datatable = 3
filesystem = 2
h2o3 = 6
pandas = 4
remote = 1
unknown = 5
class h2o_sonar.lib.api.datasets.ExplainableDatatableDataset(frame: Frame)

Bases: ExplainableDataset

Datatable based dataset.

class h2o_sonar.lib.api.datasets.LlmDataset

Bases: object

Dataset used to evaluate LLMs and RAGs.

COLUMNS = ['input', 'corpus', 'context', 'categories', 'relationships', 'model_key', 'test_key', 'expected_output', 'output_constraints', 'output_condition', 'actual_output', 'actual_duration', 'cost']
COL_ACTUAL_DURATION = 'actual_duration'
COL_ACTUAL_OUTPUT = 'actual_output'
COL_CATEGORIES = 'categories'
COL_CONTEXT = 'context'
COL_CORPUS = 'corpus'
COL_COST = 'cost'
COL_EXPECTED_OUTPUT = 'expected_output'
COL_INPUT = 'input'
COL_MODEL_KEY = 'model_key'
COL_OUTPUT_CONDITION = 'output_condition'
COL_OUTPUT_CONSTRAINTS = 'output_constraints'
COL_RELATIONSHIPS = 'relationships'
COL_TEST_KEY = 'test_key'
KEY_ACTUAL_DURATION = 'actual_duration'
KEY_ACTUAL_OUTPUT = 'actual_output'
KEY_CATEGORIES = 'categories'
KEY_CONTEXT = 'context'
KEY_CORPUS = 'corpus'
KEY_COST = 'cost'
KEY_EXPECTED_OUTPUT = 'expected_output'
KEY_INPUT = 'input'
KEY_INPUTS = 'inputs'
KEY_KEY = 'key'
KEY_MODEL_KEY = 'model_key'
KEY_OUTPUT_CONDITION = 'output_condition'
KEY_OUTPUT_CONSTRAINTS = 'output_constraints'
KEY_RELATIONSHIPS = 'relationships'
KEY_TC_KEY = 'test_case_key'
KEY_TEST_KEY = 'test_key'
class LlmDatasetRow(i: str, context: List[str] | None = None, corpus: List[str] | None = None, categories: str | List[str] = '', relationships: List | None = None, expected_output: str = '', output_constraints: List[str] | Any | None = None, output_condition: str = '', actual_output: str = '', actual_duration: float = 0.0, cost: float = 0.0, model_key: str = '', test_key: str = '', key: str = '')

Bases: object

add_relationship(relationship_type: str, target: str, target_type: str)
copy(update_key: bool = True)
static from_dict(as_dict: Dict)
perturb(perturbators: List[PerturbatorToRun], raised_errors: List | None = None)

Perturb the input (prompt) using the specified perturbator. The perturbation is always performed in place on the input, which is a string.

Parameters:
perturbatorsList[commons.PerturbatorToRun]

List of perturbators to run.

raised_errorsOptional[List]

List of raised errors.

to_dict() Dict
add_input(i: str, corpus: List[str] | None = None, context: List[str] | None = None, categories: str | List[str] = '', relationships: List | None = None, expected_output: str = '', output_constraints: List[str] | Any | None = None, output_condition: str = '', actual_output: str = '', actual_duration: float = 0.0, cost: float = 0.0, model_key: str = '', test_key: str = '', key: str = '')

Add new dataset row - question / prompt / input with related (meta)data.

Parameters:
istr

Input / question / prompt.

corpusOptional[List[str]]

URLs/paths to document(s) which were used to fine-tune the RAG for this test case.

contextOptional[List[str]]

Context (set of document chunks by value i.e. text snippets) returned by the vector database for augmentation to LLM.

categoriesUnion[str, List[str]]

Categories of the input (question/prompt) like: math, knowledge, reasoning,

relationshipsOptional[List]

Relationships among rows capturing e.g. perturbation source/product.

expected_outputstr

Expected output / answer.

output_constraintsOptional[Union[List[str], Any]]

An optional output / answer constraints which might be any data structure which can be serialized to JSON going forward. It is interpreted by explainer and used for the validation.

output_conditionstr

An optional string condition which is interpreted and used by explainer in order to validate output / answer. output_condition can use output_constraints or vice versa.

actual_outputstr

Actual output / answer returned by the LLM / RAG product.

actual_durationfloat

How much time it took to get the actual answer.

costfloat

Answer/inference cost.

model_keystr

The key of the H2O Eval Studio model which was used to get the actual answer.

test_keystr

The key of the test where the test case belongs to.

keystr

Key of the dataset row.

static from_datatable_dict(as_dict: Dict) LlmDataset

Deserialize datatable dictionary to LlmDataset. Structured fields (corpus, categories, and output_constraints) are automatically deserialized from JSon string to dictionary if possible.

Parameters:
as_dictDict

Dictionary created using datatable.to_dict().

Returns:
LlmDataset

LLM dataset.

static from_datatable_json_enc_col(enc_json_col: str, logger=None) List

Robust deserialization of datatable JSON encoded column w/ a list value.

static from_dict(as_dict: Dict) LlmDataset
static load_from_json(json_file_path: str | Path, datatable_format: bool = False)
merge(other_llm_dataset: LlmDataset)

Merge another dataset into this one.

Parameters:
other_llm_datasetLlmDataset

LLM dataset to be merged into this one.

perturb(perturbators: List[PerturbatorToRun], in_place: bool = True, raised_errors: List | None = None) LlmDataset

Perturb the inputs (prompts) using the specified perturbator(s).

Parameters:
perturbatorsList[commons.PerturbatorToRun]

Perturbators to run - includes the perturbator ID, intensity, and parameters.

in_placebool

If True, perturb the prompt in place, otherwise create a new perturbed rows.

raised_errorsOptional[List]

If None, then raise error(s) if the perturbator(s) fail(s), otherwise do not raise exceptions and store them in the (empty) list provided by the caller.

prompts() List[str]

Return the list of unique prompts.

save_as_json(json_path: str | Path)
shape() List
stats() Dict[str, int | Dict]
to_datatable() Frame
to_datatable_dict() Dict
to_dict() Dict
class h2o_sonar.lib.api.datasets.LlmEvalResults

Bases: object

LLM dataset with metrics values from the evaluation.

COL_ACTUAL_OUTPUT_META = 'actual_output_meta'
KEY_RESULTS = 'results'
class LlmEvalResultRow(dataset_row: LlmDatasetRow, metrics: Dict, actual_output_meta: List | None = None, metrics_meta: Dict | None = None)

Bases: object

KEY_ACTUAL_OUTPUT_META = 'actual_output_meta'
KEY_METRICS = 'metrics'
KEY_METRICS_META = 'metrics_meta'
KEY_METRIC_KEY = 'key'
KEY_METRIC_VALUE = 'value'
to_dict(type_friendly_metrics: bool = False) Dict
add_result(result: LlmEvalResultRow)

Add new dataset row - question / prompt / input with related (meta)data.

Parameters:
resultLlmEvalResultRow

Result row.

static from_dict(as_dict: Dict) LlmEvalResults
static load_from_json(json_file_path: str | Path, datatable_format: bool = False)
prompts() List[str]

Return the list of unique prompts.

save_as_json(json_path: str | Path)
shape() List
to_datatable() Frame
to_datatable_dict() Dict
to_dict() Dict
to_llm_dataset() LlmDataset

Convert evaluation results to the LLM dataset - keep all fields, skip metrics.

class h2o_sonar.lib.api.datasets.LlmInputRel(rel_type: False, target: str = '', target_type: str = 'test_case')

Bases: object

Test case relationship.

KEY_REL_TARGET = 'target'
KEY_REL_TARGET_TYPE = 'target_type'
KEY_REL_TYPE = 'type'
static from_dict(as_dict: Dict) LlmInputRel
to_dict()
class h2o_sonar.lib.api.datasets.LlmInputRelTargetType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

Test case / input relationships target types.

test_case = 1
class h2o_sonar.lib.api.datasets.LlmInputRelType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

Test case / input relationship types.

perturbation_source = 1
class h2o_sonar.lib.api.datasets.LlmPromptCategories(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

classification = 'classification'
coding = 'coding'
evaluation = 'evaluation'
facts = 'facts'
harm = 'harm'
knowledge = 'knowledge'
math = 'math'
planning = 'planning'
question_answering = 'question_answering'
reasoning = 'reasoning'
recommendation = 'recommendation'
summarization = 'summarization'
troubleshooting = 'troubleshooting'
unknown = 'unknown'
writing = 'writing'
h2o_sonar.lib.api.datasets.filter_importance_greater_than_zero(frame: Frame, label: str | None = None, skip_bias: bool = True) Frame

Filter out all columns with 0s values.

Parameters:
framedatatable.Frame

Frame to filter.

label

Label for which to pull bias.

skip_biasbool

If bias columns presents, do skip it.

Returns:
Filtered frame.

h2o_sonar.lib.api.explainers module

class h2o_sonar.lib.api.explainers.Explainer

Bases: object

Explainer.

Explainer instance is NOT meant to be reusable i.e. the instance must be created using default constructor, initialized using setup() method and used at most once - fit() method invocation.

Explainer lifecycle:

  • constructor() Explainer instantiation (for external basic/sanity checks, …). Note that explainer constructor executed by H2O Eval Studio runtime must not have parameters.

  • check_compatibility(params) -> bool Explainer check verifying that explainer will be able to explain given model. If compatibility check returns False or raises error, then it will not be run. Compatibility check is optional and does not have to be run by the engine.

  • setup(params) Set required and optional parameters, configuration, etc.

  • fit(X, y) Optional step to train surrogate model(s) or another explainer means. Method gets data needed for training/creation/initialization. This step might be skipped in case that explainer doesn’t need it.

  • explain*(X, y) -> [explainer] Actual computation (persistence and upload) of explainer(s) of given data(set). Explanation might be provided by value or reference (in case it would not fit in memory).

  • get_explanation(type, format) Get (cached/persisted) explanations in desired format.

Attributes:
model: Optional[ExplainerModel]

Instance of ExplainerModel class which has predict and fit functions of the model to be explained. These methods can be used to create predictions using the model/scorer.

persistence: Optional[ExplainerPersistence] = None

Instance of ExplainerPersistence class which provides convenient methods to persist explainer data e.g. to its working directory.

params: Optional[CommonExplainerParameters] = None

Common explainers parameters specified on explainer run like target column or columns to drop.

explainer_params: str

This explainer specific parameters specified on explainer run.

logger:

Explainer’s logger.

config:

Driverless AI server configuration copy.

ARG_EXPLAINER_PARAMS = 'explainer_params_as_str'
EXPLAINERS_PURPOSES = ['explains-dataset', 'explains-approximate-behavior', 'explains-original-feature-importance', 'explains-transformed-feature-importance', 'explains-feature-behavior', 'explains-fairness', 'explains-model-debugging', 'explains-model']
KEYWORD_COMPLIANCE_TEST = 'compliance-test'
KEYWORD_DEFAULT = 'run-by-default'
KEYWORD_EVALUATES_LLM = 'evaluates_llm'
KEYWORD_EVALUATES_RAG = 'evaluates_rag'
KEYWORD_EXPLAINS_APPROX_BEHAVIOR = 'explains-approximate-behavior'
KEYWORD_EXPLAINS_DATASET = 'explains-dataset'
KEYWORD_EXPLAINS_FAIRNESS = 'explains-fairness'
KEYWORD_EXPLAINS_FEATURE_BEHAVIOR = 'explains-feature-behavior'
KEYWORD_EXPLAINS_MODEL_DEBUGGING = 'explains-model-debugging'
KEYWORD_EXPLAINS_O_FEATURE_IMPORTANCE = 'explains-original-feature-importance'
KEYWORD_EXPLAINS_T_FEATURE_IMPORTANCE = 'explains-transformed-feature-importance'
KEYWORD_EXPLAINS_UNKNOWN = 'explains-model'
KEYWORD_H2O_MODEL_VALIDATION = 'h2o-model-validation'
KEYWORD_H2O_SONAR = 'h2o-sonar'
KEYWORD_IS_FAST = 'is_fast'
KEYWORD_IS_SLOW = 'is_slow'
KEYWORD_LLM = 'llm'
KEYWORD_MOCK = 'mock'
KEYWORD_NLP = 'nlp'
KEYWORD_PREFIX_CAPABILITY = 'capability'
KEYWORD_PREFIX_EXPLAINS = 'explains'
KEYWORD_PROXY = 'proxy-explainer'
KEYWORD_REQUIRES_H2O3 = 'requires-h2o3'
KEYWORD_REQUIRES_OPENAI_KEY = 'requires-openai-api-key'
KEYWORD_RQ_AA = 'requires_actual_answer'
KEYWORD_RQ_C = 'requires_constraints'
KEYWORD_RQ_EA = 'requires_expected_answer'
KEYWORD_RQ_J = 'requires_llm_judge'
KEYWORD_RQ_P = 'requires_prompts'
KEYWORD_RQ_RC = 'requires_retrieved_context'
KEYWORD_TEMPLATE = 'template'
KEYWORD_UNLISTED = 'unlisted'
add_insight(insight: InsightAndAction)

Add an evaluated/interpreted model(s) insight identified by explain() method.

Parameters:
insightinsights.InsightAndAction

Insight to be added.

add_problem(problem: ProblemAndAction)

Add an evaluated/interpreted model(s) problem identified by explain() method.

Parameters:
problemproblems.ProblemAndAction

Model problem to be added.

as_descriptor(runtime_view=False) ExplainerDescriptor

Explainer descriptor as PROTO entity.

Parameters:
runtime_view: bool

Not all descriptor fields (like parameters declaration) are needed in runtime (for instance they are needed before running explainer), therefore they might be skipped in runtime view.

Returns:
ExplainerDescriptor:

Explainer descriptor.

property brief_description
classmethod can_explain(model_meta: ExplainableModelMeta = None, experiment_type: ExperimentType = None) bool

Return True if explainer can fit either given Driverless AI model’s type or Driverless AI experiment type.

check_compatibility(params: CommonInterpretationParams | None = None, **explainer_params) bool

Explainer’s check (based on parameters) verifying that explainer will be able to explain a given model. If this compatibility check returns False or raises error, then it will not be run by the engine. This check may, but does not have to be performed by the execution engine.

check_required_modules(required_modules: Set[str] | None = None)

Check whether modules specified in self._modules_needed_by_name are imported.

Parameters:
required_modulesOptional[List[str]]

If defined, then modules specified in the parameter are checked, else self._modules_needed_by_name is checked.

Returns:
bool

True if all modules are available, False otherwise.

classmethod class_brief_description()
classmethod class_description()
classmethod class_display_name()
property class_name
classmethod class_tagline()
create_explanation_workdir_archive(display_name: str = '', display_category: str = '') WorkDirArchiveExplanation

Easily create working directory archive with ZIP of explanations representations.

Parameters:
display_name: str

Display name e.g. to be used for naming tile in UI.

display_category: str

Display category e.g. to be used for naming tab in UI.

property dependencies: List[Type[Explainer]]
classmethod depends_on() list
property description
destroy(**destroy_params)

Override to release resources created by the explainer (DB entities, files, running processes, …) depending on explainer runtime/container.

property display_name
classmethod evaluator_id() str
exlainer_params_as_dict() Dict | None
expected_custom_class

alias of Explainer

abstract explain(X, y=None, explanations_types: list = None, **kwargs) list

Invoke this method to calculate and persist global, local or both type of explanation(s) for given data(set). This method implementation to be overridden by child class (this class implementation). This method is responsible for the calculations, build and persistence of explanations.

X: datatable.Frame

Dataset frame.

y: Optional[Union[datatable.Frame, Any]]

Labels.

explanations_types: List[Type[Explanation]]

Optional explanations to be built. All will be built if empty list or None provided. Get all supported types using has_explanation_types().

Returns:
List[Explanation]:

Explanations descriptors.

explain_global(X, y=None, **kwargs) list

Execute explainer to calculate on-demand global explanations. This method is expected to be overridden if explainer doesn’t pre-compute global explanations and/or needs to update global explanation after initial computation. Default implementation just returns global instance explanations computed by explain() method.

X: Union[datatable.Frame, Any]

Data frame.

y: Optional[Union[datatable.Frame, Any]]

Labels.

Returns:
List[Explanation]:

Explanations.

explain_insights() List[InsightAndAction]

Determine (calculate or get persisted insights identified by explain() method) interpreted/evaluated model(s) problems.

Returns:
List[InsightAndAction]:

Interpreted/evaluated model(s) insights.

explain_local(X, y=None, **kwargs) list

Execute explainer to calculate on-demand local explanations. This method is expected to be overridden if explainer doesn’t pre-compute local explanations. Default implementation just returns local instance explanations computed by explain() method.

X: Union[datatable.Frame, Any]

Data frame.

y: Optional[Union[datatable.Frame, Any]]

Labels.

Returns:
List[Explanation]:

Explanations.

explain_problems() List[ProblemAndAction]

Determine (calculate or get persisted problems identified by explain() method) interpreted/evaluated model(s) problems.

Returns:
List[ProblemAndAction]:

Interpreted/evaluated model(s) problems.

classmethod explainer_id() str
classmethod explainer_version()
explains_binary() bool
explains_multiclass() bool
explains_regression() bool
property explanations: Dict | None

Explanations created by this explainer.

fit(X, y=None, **kwargs)

Optionally, build/train explainer (model) and explainer prerequisites. This method implementation to be overridden by child class (this class implementation). It may be empty if explainer doesn’t have to be built.

Parameters:
X: Union[datatable.Frame, Any]

Data frame.

y: Optional[Union[datatable.Frame, Any]]

Labels.

get_explanations(explanation_types: list) list

Get instance explanations representations in given format.

Parameters:
explanation_types: List[Type[Explanation]]

Explanation type to return - must be one of explanations declared (supported) by explainer. Returns all supported explanations if None or empty.

Returns:
List[Explanation]:

Explanations by value or reference.

get_result() Type[ExplainerResult] | None
classmethod has_explanation_scopes() List[str]
classmethod has_explanation_types() List[Type[Explanation]]

Explanation types supported by the explainer.

classmethod has_explanations() List[str]

Experiment types this explainer explains.

classmethod has_model_type_explanations() List[str]
static is_enabled() bool

Return True in case that explainer is enabled, else False which will make explainer to be completely ignored (unlisted, not loaded, not executed).

classmethod is_iid() bool
classmethod is_image() bool
classmethod is_llm() bool
classmethod is_rag() bool
classmethod is_time_series() bool
classmethod is_unsupervised() bool
property keywords: List[str]
static load(explainer_path: str | None = None)

Load pickled explainer snapshot.

static load_descriptor(descriptor_path: str, persistence: Persistence | None) ExplainerDescriptor
classmethod metrics_meta() MetricsMeta
classmethod parameters() List[ExplainerParam]
classmethod priority() float

Priority used to order explainers by sequential execution scheduler. Higher number, higher priority.

report_progress(progress: float, message: str = '', precision: int = 1)

Report explainer progress in [0, 1] range and message (“” removes previous message, None keeps previous message).

classmethod requires_model() bool
classmethod requires_predict_method() bool
classmethod requires_preloaded_predictor() bool
run_explain(X, y, explanations_types: list = None, **kwargs) dict

Execute explainer to calculate (persist and upload) explanations(s) of a given model.

This method invokes explainer implementation of explain() and then performs explanation verifications and eventual later actions. It is invoked by explainer execution engine (can add code to be executed before/after explain() overridden by child classes).

Explanation might be provided by value or reference (in case it would not fit in memory).

Parameters:
X: Union[datatable.Frame, Any]

Data frame.

y: Optional[Union[datatable.Frame, Any]]

Labels.

explanations_types: List[Type[Explanation]]

Explanation types to build. All will be built if empty list or None provided. Get all supported types using has_explanation_types().

Returns:
List[Explanation]:

Explanations.

run_explain_global(X, y=None, **kwargs) list

Execute explainer to calculate (persist and upload) global explanation(s).

This method invokes explainer implementation explain_global() and then performs explanations verifications and eventual subsequent actions. It is invoked by explainer execution engine (can add code to be executed before/after explain_global() overridden by child classes).

Parameters:
X: Union[datatable.Frame, Any]

Data frame.

y: Optional[Union[datatable.Frame, Any]]

Labels.

Returns:
List[Explanation]:

Explanations.

run_explain_local(X, y=None, **kwargs) list

Execute explainer to calculate (persist and upload) local explanation(s).

This method invokes explainer implementation explain_local() and then performs explanations verifications and eventual subsequent actions. It is invoked by explainer execution engine (can add code to be executed before/after explain_local() overridden by child classes).

Parameters:
X: Union[datatable.Frame, Any]

Data frame.

y: Optional[Union[datatable.Frame, Any]]

Labels.

Returns:
List[Explanation]:

Explanations.

run_fit(X, y=None, **kwargs)

Build explainer and explainer prerequisites.

This is a method invoked by explainer execution engine (can add code to be executed before/after fit() overridden by child classes).

Parameters:
X: Union[datatable.Frame, Any]

Data frame.

y: Optional[Union[datatable.Frame, Any]]

Labels.

save(explainer_path: str | None = None)

Save explainer snapshot pickle.

static save_descriptor(descriptor_path: str, descriptor: ExplainerDescriptor, persistence: Persistence | None)
setup(model: ExplainableModel | ExplainableModelHandle | None, persistence: ExplainerPersistence, models=None, key: str = '', params: CommonInterpretationParams | None = None, explainer_params_as_str: str | None = '', dataset_api: DatasetApi | None = None, model_api: ModelApi | None = None, logger: SonarLogger | None = None, **explainer_params) None

Set all the parameters needed to execute fit() and explain().

Parameters:
modelOptional[Union[models.ExplainableModel, models.ExplainableModelHandle]]

Explainable model with (fit and) score methods (or None if 3rd party).

models

(Explainable) models.

persistence: ExplainerPersistence

Persistence API allowing (controlled) saving and loading of explanations.

key: str

Optional (given) explainer run key (generated otherwise).

params: CommonInterpretationParams

Common explainers parameters specified on explainer run.

explainer_params_as_str: Optional[str]

Explainer specific parameters in string representation.

dataset_apiOptional[datasets.DatasetApi]

Dataset API to create custom explainable datasets needed by this explainer.

model_apiOptional[models.ModelApi]

Model API to create custom explainable models needed by this explainer.

loggerOptional[loggers.SonarLogger]

Logger.

explainer_params:

Other explainers RUNTIME parameters, options, and configuration.

classmethod supports_dataset_locator(locator: ResourceLocatorType) bool
classmethod supports_model_locator(locator: ResourceLocatorType) bool
property tagline
validate_explanations() bool

Optional method which can be used to verify integrity of explanations.

Returns:
bool:

Returns True if explanations are valid, False otherwise.

property working_dir: str

Working directory path where explainer can store any data it needs.

class h2o_sonar.lib.api.explainers.ExplainerArgs(parameters: List[ExplainerParam] = None)

Bases: object

Explainer arguments ~ parameter values.

add_parameter(param_type: ExplainerParam)
as_descriptor() List

Save parameters as descriptor: [{‘parameter’: {‘type’: ‘str’}}]

from_config_overrides(config_overrides: dict, erase: List[str] | None = None) dict

Try to get all arguments which are declared as parameters from given config overrides and set (or overwrite) in args.

Parameters:
config_overrides: dict

Config overrides as dictionary.

erase: Optional[List[str]]

Parameters to erase from config overrides.

from_dict(args_dict: dict, erase: List[str] | None = None) dict

Try to get all arguments which are declared as parameters from given dictionary and set (or overwrite) in args. Erase given parameters - arguments dictionary is not cloned, but modified.

get(param_name: str, default_value=None)
static json_str_to_dict(json_str: str, logger=None) dict
static resolve_local_paging_args(args: dict, explainer_name: str = '', logger=None)

Resolve local explanation paging arguments.

resolve_params(explainer_params: dict | None = None)

Resolve explainer’s self.parameters (arguments) as follows to self.args.

Parameters:
explainer_params: Optional[dict]

Explainer parameters as dictionary.

static toml_str_to_dict(toml_str: str, logger=None) dict
class h2o_sonar.lib.api.explainers.ExplainerDescriptor(id: str, name: str = '', display_name: str = '', tagline: str = '', description: str = '', brief_description: str = '', model_types: List[str] | None = None, can_explain: List[str] | None = None, explanation_scopes: List[str] | None = None, explanations: List[ExplanationDescriptor] | None = None, parameters: List[ConfigItem] | None = None, keywords: List[str] | None = None, metrics_meta: MetricsMeta | None = None)

Bases: object

KEY_BRIEF_DESCRIPTION = 'brief_description'
KEY_CAN_EXPLAIN = 'can_explain'
KEY_DESCRIPTION = 'description'
KEY_DISPLAY_NAME = 'display_name'
KEY_EXPLANATIONS = 'explanations'
KEY_EXPLANATION_SCOPES = 'explanation_scopes'
KEY_ID = 'id'
KEY_KEYWORDS = 'keywords'
KEY_METRICS_META = 'metrics_meta'
KEY_MODEL_TYPES = 'model_types'
KEY_NAME = 'name'
KEY_PARAMETERS = 'parameters'
KEY_TAGLINE = 'tagline'
clone() ExplainerDescriptor
dump() dict
static load(d: Dict) ExplainerDescriptor
class h2o_sonar.lib.api.explainers.ExplainerParam(param_name: str, param_type: ExplainerParamType | EvaluatorParamType, description: str = '', comment: str = '', default_value: bool | str | float = '', value_min: float = 0.0, value_max: float = 0.0, predefined: List | None = None, tags: List | None = None, category: str = '', src: str = '')

Bases: Param

Explainer parameter declaration.

SRC_ANY = 'any'
SRC_CONFIG_OVERRIDES = 'config_overrides'
SRC_CONFIG_OVERRIDES_ERASE = 'config_overrides_erase'
SRC_EVALUATOR_PARAMS = 'evaluator_params'
SRC_EXPLAINER_PARAMS = 'explainer_params'
TAG_SRC_DATASET_COLUMN_NAMES = 'SOURCE_DATASET_COLUMN_NAMES'
TAG_SRC_DATASET_TEXT_COLUMN_NAMES = 'SOURCE_DATASET_TEXT_COLUMN_NAMES'
as_descriptor() ConfigItem

Explainer parameter to descriptor conversion.

class h2o_sonar.lib.api.explainers.ExplainerRegistry(singleton_create_key)

Bases: object

Explainer registry provides list of available OOTB and (registered) explainers.

get_class(explainer_id) Type[Explainer] | None
list_explainers() Dict
load()

Load registry from configuration.

register(explainer_class, explainer_id: str = '') str
classmethod registry()
save()
unregister(explainer_id: str) str
class h2o_sonar.lib.api.explainers.ExplainerResult(persistence: ExplainerPersistence, explainer_id: str, explanation_format: Type[ExplanationFormat] | None, explanation: Type[Explanation] | None, h2o_sonar_config, logger=None)

Bases: ABC

abstract data(**kwargs) Frame
classmethod help() Dict[str, Dict[str, List[Dict[str, str | bool]]]]
log(*, path)
params() Dict
abstract plot(**kwargs)
summary() Dict
zip(*, file_path)
class h2o_sonar.lib.api.explainers.OnDemandExplainKey

Bases: object

On-demand explainer run parameters keys.

CLASS = 'class'
EXPLAINER_JOB_KEY = 'target_explainer_job_key'
EXPLANATION_TYPE = 'target_explanation_type'
FEATURE = 'feature'
FORMAT = 'target_format'
METHOD = 'method'
MLI_KEY = 'target_mli_key'
ROW = 'row'
UPDATE_STRATEGY = 'update_strategy'
class h2o_sonar.lib.api.explainers.OnDemandExplainMethod(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

explain = 1
explain_global = 2
explain_local = 3
class h2o_sonar.lib.api.explainers.SurrogateExplainer

Bases: Explainer, ABC

Surrogate model explainer.

KEYWORD_SURROGATE = 'surrogate'
abstract predict(X, y=None, **kwargs)

Surrogate explainer provides predict method allowing to get predictions from the surrogate model. This method to be overridden by child classes.

Parameters:
X: Union[datatable.Frame, Any]

Data frame.

y:Union[datatable.Frame, Any]

Labels.

run_predict(X, y=None, **kwargs)

Surrogate explainer provides predict method allowing to get predictions from the surrogate model.

This is method invoked by explainer execution engine (can add code to be executed before/after fit() overridden by child classes).

Parameters:
X: Union[datatable.Frame, Any]

Data frame.

y: Optional[Union[datatable.Frame, Any]]

Labels.

h2o_sonar.lib.api.explanations module

class h2o_sonar.lib.api.explanations.AbcHeatmapExplanation

Bases: ABC

COLOR_FATAL_ERROR = 'ff0000'
METRIC_ALL = 'ALL_METRICS'
PALETTE_BLUE = ['3d83ad', '5e9dc3', '96bcd3', 'c4dcea', 'eef4f8']
PALETTE_GREEN = ['40a481', '56b896', '71c9ab', '8ad9be', 'aaebd5']
PALETTE_RED = ['f2a7c1', 'f6bbd0', 'f7cfde', 'fae5ed', 'fdf3f7']
class h2o_sonar.lib.api.explanations.AutoReportExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

AutoReport explanation provides various document format (Word, Markdown,…) explanations.

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.CustomArchiveExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

Explainer archive representation like zip or tgz.

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.DiaExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.DurationStatsKey

Bases: object

Performance statistics keys.

AVG = 'avg'
MAX = 'max'
MIN = 'min'
N = 'n'
SUM = 'sum'
class h2o_sonar.lib.api.explanations.Explanation(explainer, display_name: str = '', display_category: str = '', has_local=None)

Bases: ABC

Base class of explainer explanations.

DISPLAY_CAT_AUTOREPORT = 'AUTOREPORT'
DISPLAY_CAT_COMPLIANCE = 'COMPLIANCE TESTS'
DISPLAY_CAT_CUSTOM = 'CUSTOM'
DISPLAY_CAT_DAI_MODEL = 'DAI MODEL'
DISPLAY_CAT_DATA = 'DATA'
DISPLAY_CAT_EXAMPLE = 'EXAMPLE'
DISPLAY_CAT_LLM = 'LLM'
DISPLAY_CAT_MOCK = 'MOCK'
DISPLAY_CAT_MODEL = 'MODEL'
DISPLAY_CAT_NLP = 'NLP'
DISPLAY_CAT_SURROGATES = 'SURROGATE MODELS'
DISPLAY_CAT_SURROGATES_ON_RES = 'SURROGATE MODELS ON RESIDUALS'
DISPLAY_CAT_TEMPLATE = 'TEMPLATE'
add_format(explanation_format: ExplanationFormat) None

Add explanation representation in a new format.

Parameters:
explanation_format: ExplanationFormat

New explanation representation.

classmethod as_class_descriptor() ExplanationDescriptor
as_descriptor() ExplanationDescriptor
property display_category: str
property display_name: str
property explainer
classmethod explanation_scope() str

Explanation scope - either global or local.

classmethod explanation_type() str

Explanation type may be any string identifier (either defined by this class or user ~ extensibility) which is used for validation and further processing. It must specify unique explanation name and scope. Explanation formats are defined by child classes of this abstract class.

Format: <explanation_scope>-<explanation-type>

Example: global-feature-importance

property format_types: List[str]

Explanation formats provided by the explanation.

Representations are set by explanations as they are created. This is why available format types are initialized as empty instance field, not class one.

Example:

["application/json", "application/vnd.h2oai.datatable", "application/zip" ]
Returns:
List[str]:

Representations (formats) of this explanation.

get_format(explanation_format: str) ExplanationFormat

Get explanation in specific representation.

property has_local: str

Does explanation have also related local explanation and which?

Returns:
str:

Local explanation type.

classmethod is_global() bool

Is the explanation global or local?

abstract validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.ExplanationDescriptor(explanation_type: str, name: str = '', category: str = '', scope: str = '', has_local: str = '', formats: List[str] = None)

Bases: object

KEY_CATEGORY = 'category'
KEY_EXPLANATION_TYPE = 'explanation_type'
KEY_FORMATS = 'formats'
KEY_HAS_LOCAL = 'has_local'
KEY_NAME = 'name'
KEY_SCOPE = 'scope'
clone() ExplanationDescriptor
dump() dict
static load(d: dict) ExplanationDescriptor
class h2o_sonar.lib.api.explanations.FlippedPerturbedTestCase(explainable_model_key: str, explainable_model: ExplainableRagModel | ExplainableLlmModel | None, metric_meta: MetricMeta, orig_row: LlmDatasetRow | None = None, orig_metric_value: float = 0.0, orig_pass: bool = False, perturbed_row: LlmDatasetRow | None = None, perturbed_metric_value: float = 0.0, perturbed_pass: bool = False, heat_threshold: float | None = None)

Bases: object

Represents a flipped perturbed test case serialized as LLM dataset row or evaluation result row.

copy() FlippedPerturbedTestCase
property good_to_bad: bool | None

True if the perturbation flipped the test case from PASSING the metric to FAILING it, else False.

property is_flip: bool | None
static is_flipped() bool
property llm_model_name: str
static resolve_metrics(metrics: Dict, metrics_meta: MetricsMeta) Dict[str, Tuple]

Resolve metrics values and pass/fail status for given set of metrics.

Parameters:
metricsDict

Dictionary with metrics.

metrics_metacommons.MetricsMeta

Metrics metadata.

Returns:
Dict[str, Tuple[MetricMeta, float, bool]]

Dictionary which maps metric ID to a tuple with metric meta, metric value, and metric pass/fail status (based on the threshold and higher is better/worse determine from the metadata).

class h2o_sonar.lib.api.explanations.Global3dDataExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

Explanation with per class and feature data frames for rendering of 3D charts like:

  • 3D bar chart

  • heatmap

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.GlobalDataFrameExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

Generic explanation which doesn’t fit any other type.

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.GlobalDtExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.GlobalFeatImpExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.GlobalGroupedBarChartExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.GlobalHtmlFragmentExplanation(explainer=None, evaluator=None, display_name: str = None, display_category: str = None)

Bases: Explanation

add_html_format(html: str)

Add HTML format.

static from_explanation(explainer, explanation, display_name: str = None, display_category: str = None, absolute_paths: bool = False, problems: Dict = None, is_raw_feature: bool = True, data_as_text: bool = True, logger=None) GlobalHtmlFragmentExplanation

Create HTML fragment explanation:

  • from GlobalFeatImpExplanation
    • with formats.HtmlFormat

  • from PartialDependenceJSonFormat
    • with formats.HtmlFormat

Parameters:
explainer

Explainer instance.

explanation

Explanation instance.

display_namestr

Custom display name.

display_categorystr

Custom display category.

absolute_pathsbool

True to create HTML representation with absolute paths to images and explanations, else False to create relative paths (default).

problemsDict

Dictionary of class to feature names with features which are problematic to highlight their charts.

is_raw_featurebool

True if input explains original features, else False for transformed features.

data_as_textbool

Generate HTML text for the chart data.

logger

Optional logger.

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.GlobalLinePlotExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.GlobalNlpLocoExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.GlobalRuleExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.GlobalScatterPlotExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.GlobalSummaryFeatImpExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.IndividualConditionalExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LlmBoolLeaderboardExplanation(evaluator, metrics_meta: MetricsMeta, display_name: str = None, display_category: str = None, key_2_evaluated_model: Dict = None, llm_host: LlmModelHostType = LlmModelHostType.RAG, do_eval_rc: bool = False, logger=None)

Bases: Explanation, LlmLeaderboardExplanation, AbcHeatmapExplanation

LLM failure leaderboard - leaderboard data and formats for metrics which is of the BOOLEAN type i.e. it is possible to infer:

  • success / failure

  • pass / fail

  • true / false

for each test case (prompt + model) in the test set.

Leaderboard provides multiple aspects of the test results (sub-leaderboards):

  • summary leaderboard

  • most problematic prompts leaderboard

Multiple leaderboards within a format are supported via index file:

  • index file:
    • key: leaderboard name

    • value: leaderboard file name

class AdditionalDetails(formatting, text)

Bases: tuple

formatting

Alias for field number 0

text

Alias for field number 1

DEFAULT_METRIC_THRESHOLD = 0.5
class Failure(doc_url, error_message, input, expected_output, output_condition, output_constraints, actual_output, actual_output_meta, fail_retrieval, fail_generation, fail_parse, ctx_bytes, ctx_chunks, row_key, model_key)

Bases: tuple

actual_output

Alias for field number 6

actual_output_meta

Alias for field number 7

ctx_bytes

Alias for field number 11

ctx_chunks

Alias for field number 12

doc_url

Alias for field number 0

error_message

Alias for field number 1

expected_output

Alias for field number 3

fail_generation

Alias for field number 9

fail_parse

Alias for field number 10

fail_retrieval

Alias for field number 8

input

Alias for field number 2

model_key

Alias for field number 14

output_condition

Alias for field number 4

output_constraints

Alias for field number 5

row_key

Alias for field number 13

KEY_INPUT_FAILURES = 'input_failures'
KEY_INPUT_FAILURES_COUNT = 'input_failures_count'
KEY_INPUT_FAILURES_GENERATION_COUNT = 'input_failures_generation_count'
KEY_INPUT_FAILURES_PARSE_COUNT = 'input_failures_parse_count'
KEY_INPUT_FAILURES_RETRIEVAL_COUNT = 'input_failures_retrieval_count'
KEY_INPUT_PASSES_COUNT = 'input_passes_count'
KEY_MODEL_FAILURES = 'model_failures'
KEY_MODEL_FAILURES_COUNT = 'model_failures_count'
KEY_MODEL_FAILURES_GENERATION_COUNT = 'model_failures_generation_count'
KEY_MODEL_FAILURES_PARSE_COUNT = 'model_failures_parse_count'
KEY_MODEL_FAILURES_RETRIEVAL_COUNT = 'model_failures_retrieval_count'
KEY_MODEL_PASSES_COUNT = 'model_passes_count'
KEY_RESULT_CHECK_ERR_MSG = 'result_error_message'
KEY_RESULT_CHECK_FAIL = 'model_failures'
KEY_RESULT_CHECK_FAIL_A = 'model_generation_failures'
KEY_RESULT_CHECK_FAIL_P = 'model_parse_failures'
KEY_RESULT_CHECK_FAIL_R = 'model_retrieval_failures'
KEY_RESULT_CHECK_OK = 'model_passes'
KEY_TOTAL_COST = 'total_cost'
KEY_TOTAL_TIME = 'total_time'
LEADERBOARD_METRICS_META = <h2o_sonar.lib.api.commons.MetricsMeta object>
METRIC_META_MODEL_FAILURES = <h2o_sonar.lib.api.commons.MetricMeta object>
METRIC_META_MODEL_GENERATION_FAILURES = <h2o_sonar.lib.api.commons.MetricMeta object>
METRIC_META_MODEL_PARSE_FAILURES = <h2o_sonar.lib.api.commons.MetricMeta object>
METRIC_META_MODEL_PASSES = <h2o_sonar.lib.api.commons.MetricMeta object>
METRIC_META_MODEL_RETRIEVAL_FAILURES = <h2o_sonar.lib.api.commons.MetricMeta object>
METRIC_MODEL_FAILURES = 'model_failures'
METRIC_MODEL_GENERATION_FAILURES = 'model_generation_failures'
METRIC_MODEL_PARSE_FAILURES = 'model_parse_failures'
METRIC_MODEL_PASSES = 'model_passes'
METRIC_MODEL_RETRIEVAL_FAILURES = 'model_retrieval_failures'
add_evalstudio_markdown_format(title='Summary')
add_failure(llm_model_name: str, doc_url, error_message: str, i: str, context: List[str] | None, expected_output: str, output_constraints: List | None, output_condition: str, actual_output: str, actual_output_meta: List | None, duration, cost, fail_retrieval: bool = False, fail_generation: bool = False, fail_parse: bool = False, row_key: str = None, model_key: str = '')
add_json_format(llm_host: LlmModelHostType, metrics_meta: MetricsMeta | None = None, threshold: float | None = None)

Add JSON format for the leaderboard.

Parameters:
llm_hostcommons.LlmModelHostType

LLM model host type.

metrics_metaOptional[commons.MetricsMeta]

Metrics metadata to override leaderboard’s metrics - it is expected that keys are identical, however, caller can customize names, descriptions and other metrics metadata.

thresholdOptional[float]

Threshold for metrics - if not provided, the default metric threshold is used.

add_markdown_format(title='Benchmarks')
add_pass(llm_model_name: str, i: str, context: List[str] | None, duration: float, cost: float, row_key: str, model_key: str = '')
add_total_cost(llm_model_name: str, cost: float)
add_total_time(llm_model_name: str, duration: float)
as_dict() Dict

All leaderboard data as dictionary.

as_evalstudio_markdown(title: str = 'Summary', top: int = 3) str

Return Markdown representation of the leaderboard for EvalStudio.

Parameters:
titlestr

Title of the leaderboard.

topint

Number of top model failures, prompt failures, empty context prompts, … entries. 0 for all entries. The motivation is to avoid LONG reports with all failures and prompts, it’s just a summary.

Returns:
str

Markdown representation of the leaderboard.

as_html(title: str = 'RAG Benchmark', include_header: bool = False, include_by_accuracy: bool = True, include_by_time: bool = True, include_by_cost: bool = True, additional_details: Dict | None = None) str
as_leaderboard_dict(metrics_meta: MetricsMeta | None = None, threshold: float | None = None) Dict

Create leaderboard dictionary: model -> metric -> value.

By convention, the leaderboard data are always normalized - two options:

  • <0, 1> range for metrics

  • <0, 100> range for percentages

There are never absolute values like counts, times or duration.

Parameters:
metrics_metaOptional[commons.MetricsMeta]

Metrics metadata to override leaderboard’s metrics - it is expected that keys are identical, however, caller can customize names, descriptions and other metrics metadata.

thresholdOptional[float]

Threshold for metrics - if not provided, the default metric threshold is used.

as_markdown(title: str = 'Benchmark', extended: bool = True) str

Markdown representation of the leaderboard.

Parameters:
titlestr

Title of the markdown report.

extendedbool

Extended report (for the h2oGPTe benchmark).

Returns:
str

Markdown representation of the leaderboard.

build()

Analyze, explain, aggregate, and build leaderboard data… so that when HTML representation is built, the leaderboard is ready to be rendered.

check_and_report_negative_cost(cost: float, llm_model_name, i: str, row_key: str, model_key: str) float

Create a problem for negative cost.

evaluation_cost()

Total evaluation cost.

static from_eval_results(evaluator, eval_results, metrics_meta: MetricsMeta, metric_id_success: str, metric_id_failure_message: str, display_name: str = None, display_category: str = None, key_2_evaluated_model: Dict = None, llm_host: LlmModelHostType = LlmModelHostType.RAG, do_eval_rc: bool = False, logger=None) LlmBoolLeaderboardExplanation

Create LLM leaderboard explanation from the evaluation results.

Parameters:
evaluator

Evaluator instance.

eval_resultsdatasets.LlmEvalResults

Evaluation results.

metrics_metacommons.MetricsMeta

Metrics metadata.

metric_id_successstr

Metric ID for the success indicator.

metric_id_failure_messagestr

Metric ID for the failure message.

display_namestr

Custom display name.

display_categorystr

Custom display category.

key_2_evaluated_modelDict

Map: key -> RAG/LLM model.

llm_hostcommons.LlmModelHostType

LLM host type - either a RAG (with retrieval) or a LLM (generation only).

do_eval_rcbool

Whether to show retrieval correctness.

logger

Optional logger.

get_insights(insight_type: str = 'accuracy', quality: str = 'accurate', extra_description_actions: str = '', explanation_type: str = '', explanation_name: str = '', explanation_mime: str = '') None

Create insights for the boolean leaderboard.

Parameters:
insight_typestr

Insight type.

qualitystr

Model quality.

extra_description_actions: str

Additional description for actions.

explanation_typestr

Type of the explanation which can clarify the insight.

explanation_namestr

Name of the explanation which can clarify the insight.

explanation_mimestr

Media type of the explanation which can clarify the insight.

static key_2_rag_type_prefix(evaluated_models) Dict
sort_models_leaderboard(sort_by: Dict[str, int | float], reverse: bool = True)
sort_prompts_by_empty_ctxs(reverse: bool = True) List[str]
sort_prompts_by_failures(sort_by: Dict[str, int | float], reverse: bool = True)
static summary_as_markdown(md: str, metrics_count: int, llm_host: LlmModelHostType, m_failures_count: Dict, i_failures_count: Dict, key_2_evaluated_model: Dict, cost_source=None) str
validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LlmClassifierLeaderboardExplanation(evaluator, eval_results, model_2_metrics: Dict, model_2_confusion_matrix: Dict, classes: List[str], false_positives: Dict[str, List[LlmEvalResultRow]], false_negatives: Dict[str, List[LlmEvalResultRow]], i_passes_count: Dict[str, int], metrics_meta: MetricsMeta, key_2_evaluated_model: Dict, llm_host: LlmModelHostType = LlmModelHostType.RAG, display_name: str = '', display_category: str = '', logger=None)

Bases: Explanation, LlmLeaderboardExplanation, AbcHeatmapExplanation

LLM classification leaderboard explanation.

DEFAULT_METRIC_THRESHOLD = 0.75
METRIC_ACCURACY = 'accuracy'
METRIC_F1 = 'f1'
METRIC_META_ACCURACY = <h2o_sonar.lib.api.commons.MetricMeta object>
METRIC_META_F1 = <h2o_sonar.lib.api.commons.MetricMeta object>
METRIC_META_PRECISION = <h2o_sonar.lib.api.commons.MetricMeta object>
METRIC_META_RECALL = <h2o_sonar.lib.api.commons.MetricMeta object>
METRIC_PRECISION = 'precision'
METRIC_RECALL = 'recall'
add_evalstudio_markdown_format(sort_by_metric_id: str, title: str = 'Summary')
add_json_format(threshold: float | None = None) LlmHeatmapLeaderboardJSonFormat

Add JSon format.

add_markdown_format(sort_by_metric_id: str, title: str = 'Evaluation Report')
as_dict(threshold: float | None = None) Tuple[Dict, Dict]

Return leaderboard as dictionary.

Parameters:
thresholdOptional[float]

Threshold for metrics - if not provided, the default metric threshold is used.

Returns:
Tuple[Dict, Dict]

Leaderboard data dictionary and metric EDA (min, max, …) dictionary.

as_html(sort_by_metric_id: str, html_src=None, include_failures: bool = True, include_prompts_by_metrics: bool = True, additional_details: Dict | None = None) str

Create HTML snippet with:

  • per-metrics heatmap table

  • per-metrics confusion matrix

as_markdown(sort_by_metric_id: str, title: str = 'Evaluation Report', heading_level: str = '#', include_metrics_leaderboards: bool = True, top: int = 3) str

Return Markdown representation of the leaderboard for EvalStudio.

Parameters:
sort_by_metric_idstr

Metric ID to sort models by.

titlestr

Title of the leaderboard.

heading_levelstr

Heading level.

include_metrics_leaderboardsbool

Include per-metrics leaderboards.

topint

Number of top model failures, prompt failures, empty context prompts, … entries. 0 for all entries. The motivation is to avoid LONG reports with all failures and prompts, it’s just a summary.

Returns:
str

Markdown representation of the leaderboard.

build()

Build leaderboard.

static from_eval_results(evaluator, eval_results, model_2_metrics: Dict, model_2_confusion_matrix: Dict, classes: List[str], metrics_meta: MetricsMeta, key_2_evaluated_model: Dict, llm_host: LlmModelHostType = LlmModelHostType.RAG, display_name: str = None, display_category: str = None, logger=None) LlmClassifierLeaderboardExplanation

Create Classification leaderboard explanation from the evaluation results.

Parameters:
evaluator

Evaluator instance.

model_2_metricsDict

Map: model name -> metric ID -> metric value.

model_2_confusion_matrixDict

Map: model name -> confusion matrix.

classesList[str]

List of classes.

eval_resultsdatasets.LlmEvalResults

Evaluation results.

metrics_metacommons.MetricsMeta

Metrics metadata.

key_2_evaluated_modelDict

Map: key -> LLM@RAG/LLM model.

llm_hostcommons.LlmModelHostType

LLM host type - either a RAG (with retrieval) or a LLM (generation only).

display_namestr

Custom leaderboard display name.

display_categorystr

Custom leaderboard display category.

logger

Optional logger.

get_insights(extra_description_best: str = '', extra_description_worst: str = '', insight_type: str = 'accuracy', explanation_type: str = '', explanation_name: str = '', explanation_mime: str = '') None

Create insights for the classifier leaderboard (based on accuracy metric).

Parameters:
extra_description_best: str

Additional description for insights related to the best models.

extra_description_worst: str

Additional description for insights related to the worst models.

insight_typestr

Insight type.

explanation_typestr

Type of the explanation which can clarify the insight.

explanation_namestr

Name of the explanation which can clarify the insight.

explanation_mimestr

Media type of the explanation which can clarify the insight.

sort_prompts_by_failures(sort_by: Dict[str, int | float], reverse: bool = True)
validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LlmEvalResultsExplanation(evaluator, eval_results, display_name: str = None, display_category: str = None)

Bases: Explanation

KEY_EVALUATOR = 'evaluator'
KEY_MODELS = 'models'
KEY_RESULTS = 'results'
add_csv_format()

Add CSV format.

add_datatable_format()

Add datatable format.

add_json_format()

Add JSon format.

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LlmHeatmapLeaderboardExplanation(evaluator, eval_results, metrics_meta: MetricsMeta, key_2_evaluated_model: Dict, llm_host: LlmModelHostType = LlmModelHostType.RAG, nan_tolerance: float = 0.0, display_name: str = '', display_category: str = '', logger=None)

Bases: Explanation, LlmLeaderboardExplanation, AbcHeatmapExplanation

Heatmap leaderboard explanation provides data and formats for a leaderboard which is colorized as heatmap based on metrics values.

LLM_MODEL_ANONYMOUS = 'model'
add_col_value(llm_model_name: str, docs: str, prompt: str, metrics_id: str, value: float, result_row)

Add entry to the data dictionary used to build formatted tables later.

add_evalstudio_markdown_format(sort_by_metric_id: str, title: str = 'Summary')
add_json_format(threshold: float | None = None) LlmHeatmapLeaderboardJSonFormat

Add JSon format.

add_markdown_format(sort_by_metric_id: str, title: str = 'Evaluation Report')
as_dict(threshold: float | None = None) Tuple[Dict, Dict]

Return leaderboard as dictionary.

Parameters:
thresholdOptional[float]

Threshold for the metrics.

Returns:
Tuple[Dict, Dict]

Leaderboard data dictionary and metric EDA (min, max, …) dictionary.

as_html(sort_by_metric_id: str, html_src=None, include_failures: bool = True, include_prompts_by_metrics: bool = True, additional_details: Dict | None = None) str

Create HTML snippet with:

  • metrics heatmap table

  • failures section: model -> document -> prompt -> [metrics] -> value

as_markdown(sort_by_metric_id: str, title: str = 'Evaluation Report', heading_level: str = '#', top: int = 3) str

Return Markdown representation of the leaderboard for EvalStudio.

Parameters:
sort_by_metric_idstr

Metric ID to be used as the FIRST one to sort the table. Then the method renders tables for all other metrics (sorted by that particular metric

titlestr

Title of the leaderboard.

heading_levelstr

Markdown title heading level.

topint

Number of top model failures, prompt failures, empty context prompts, … entries. 0 for all entries. The motivation is to avoid LONG reports with all failures and prompts, it’s just a summary.

build()

Analyze, explain, aggregate, and build leaderboard data… so that when HTML representation is built, the leaderboard is ready to be rendered.

static from_eval_results(evaluator, eval_results, metrics_meta: MetricsMeta, key_2_evaluated_model: Dict, llm_host: LlmModelHostType = LlmModelHostType.RAG, nan_tolerance: float = 0.0, display_name: str = None, display_category: str = None, logger=None) LlmHeatmapLeaderboardExplanation

Create Heatmap leaderboard explanation from the evaluation results.

Parameters:
evaluator

Evaluator instance.

eval_resultsdatasets.LlmEvalResults

Evaluation results.

metrics_metacommons.MetricsMeta

Metadata of the metric to be evaluated.

key_2_evaluated_modelDict

Map: key -> LLM@RAG/LLM model.

llm_hostcommons.LlmModelHostType

LLM host type - either a RAG (with retrieval) or a LLM (generation only).

nan_tolerancefloat

Tolerance for NaN values in the evaluation results.

display_namestr

Custom leaderboard display name.

display_categorystr

Custom leaderboard display category.

logger

Optional logger.

get_insights(metrics_meta: MetricsMeta, metric_id: str = '', metric_name_protection: bool = False, extra_description_best: str = '', extra_description_worst: str = '', insight_type: str = 'accuracy', model_purpose: str = '', explanation_type: str = '', explanation_name: str = '', explanation_mime: str = '') None

Create insights for the heatmap leaderboard.

Parameters:
metrics_metacommons.MetricsMeta

Metrics metadata.

metric_idstr

Optional metric ID to create insights for. If not specified, then insights are created for the primary metrics as specified by the metrics metadata.

metric_name_protectionbool

If True, then the metric ID is not changed to lowercase.

extra_description_best: str

Additional description for insights related to the best models.

extra_description_worst: str

Additional description for insights related to the worst models.

insight_typestr

Insight type.

model_purposestr

Model purpose.

explanation_typestr

Type of the explanation which can clarify the insight.

explanation_namestr

Name of the explanation which can clarify the insight.

explanation_mimestr

Media type of the explanation which can clarify the insight.

sort_prompts_by_failures(sort_by: Dict[str, int | float], reverse: bool = True)
static truncate(f, n)

Truncates a float f to n decimal places without rounding.

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LlmLeaderboardExplanation

Bases: object

static get_leaderboard_data_path(evaluation, evaluator_id, explanation_format: str = 'application/json', metric: str = 'ALL_METRICS')
static markdown_connection_stats_table(evaluated_models_list: List[ExplainableLlmModel])
class h2o_sonar.lib.api.explanations.LlmProcedureEvalLeaderboardExplanation(evaluator, eval_results, metrics_meta: MetricsMeta, key_2_evaluated_model: Dict, llm_host: LlmModelHostType = LlmModelHostType.RAG, display_name: str = '', display_category: str = '', logger=None)

Bases: Explanation, LlmLeaderboardExplanation, AbcHeatmapExplanation

ProcedureEval leaderboard explanation provides data and formats for a leaderboard which is colorized as procedure_eval based on metrics values.

KEY_ALIGNMENT_MATRIX = 'alignment_matrix'
KEY_DYN_PROG_MATRIX = 'dyn_prog_matrix'
LLM_MODEL_ANONYMOUS = 'model'
add_col_value(llm_model_name: str, docs: str, prompt: str, metrics_id: str, value: float, result_row)

Add entry to the data dictionary used to build formatted tables later.

add_evalstudio_markdown_format(sort_by_metric_id: str, title: str = 'Summary')
add_json_format(threshold: float | None = None) LlmHeatmapLeaderboardJSonFormat

Add JSon format.

add_markdown_format(sort_by_metric_id: str, title: str = 'Evaluation Report')
as_dict(threshold: float | None = None) Tuple[Dict, Dict]

Return leaderboard as dictionary.

Parameters:
thresholdOptional[float]

Threshold for the metrics.

Returns:
Tuple[Dict, Dict]

Leaderboard data dictionary and metric EDA (min, max, …) dictionary.

as_html(sort_by_metric_id: str, html_src=None, include_failures: bool = True, include_prompts_by_metrics: bool = True, additional_details: Dict | None = None) str

Create HTML snippet with:

  • metrics procedure_eval table

  • failures section: model -> document -> prompt -> [metrics] -> value

as_markdown(sort_by_metric_id: str, title: str = 'Evaluation Report', heading_level: str = '#', top: int = 3) str

Return Markdown representation of the leaderboard for EvalStudio.

Parameters:
sort_by_metric_idstr

Metric ID to be used as the FIRST one to sort the table. Then the method renders tables for all other metrics (sorted by that particular metric

titlestr

Title of the leaderboard.

heading_levelstr

Markdown title heading level.

topint

Number of top model failures, prompt failures, empty context prompts, … entries. 0 for all entries. The motivation is to avoid LONG reports with all failures and prompts, it’s just a summary.

build()

Analyze, explain, aggregate, and build leaderboard data… so that when HTML representation is built, the leaderboard is ready to be rendered.

static from_eval_results(evaluator, eval_results, metrics_meta: MetricsMeta, key_2_evaluated_model: Dict, llm_host: LlmModelHostType = LlmModelHostType.RAG, display_name: str = None, display_category: str = None, logger=None) LlmProcedureEvalLeaderboardExplanation

Create ProcedureEval leaderboard explanation from the evaluation results.

Parameters:
evaluator

Evaluator instance.

eval_resultsdatasets.LlmEvalResults

Evaluation results.

metrics_metacommons.MetricsMeta

Metrics metadata.

key_2_evaluated_modelDict

Map: key -> LLM@RAG/LLM model.

llm_hostcommons.LlmModelHostType

LLM host type - either a RAG (with retrieval) or a LLM (generation only).

display_namestr

Custom leaderboard display name.

display_categorystr

Custom leaderboard display category.

logger

Optional logger.

get_insights(metrics_meta: MetricsMeta, metric_id: str = '', metric_name_protection: bool = False, extra_description_best: str = '', extra_description_worst: str = '', insight_type: str = 'accuracy', model_purpose: str = '', explanation_type: str = '', explanation_name: str = '', explanation_mime: str = '') None

Create insights for the procedure_eval leaderboard.

Parameters:
metrics_metacommons.MetricsMeta

Metrics metadata.

metric_idstr

Optional metric ID to create insights for. If not specified, then insights are created for the primary metrics as specified by the metrics metadata.

metric_name_protectionbool

If True, then the metric ID is not changed to lowercase.

extra_description_best: str

Additional description for insights related to the best models.

extra_description_worst: str

Additional description for insights related to the worst models.

insight_typestr

Insight type.

model_purposestr

Model purpose.

explanation_typestr

Type of the explanation which can clarify the insight.

explanation_namestr

Name of the explanation which can clarify the insight.

explanation_mimestr

Media type of the explanation which can clarify the insight.

sort_prompts_by_failures(sort_by: Dict[str, int | float], reverse: bool = True)
static truncate(f, n)

Truncates a float f to n decimal places without rounding.

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LocalDataFrameExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

Generic explanation which doesn’t fit any other type.

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LocalDtExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LocalFeatImpExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LocalHtmlSnippetExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LocalNlpLocoExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LocalRuleExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LocalSummaryFeatImpExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LocalTextSnippetExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LocoExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.ModelValidationResultExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

Model validation result explanation is (archived) tree of directories and documents created by an H2O MV based explainer.

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.NlpTokenizerExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.OnDemandExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

On-demand explanations typically used for ad-hoc local on-demand explainer execution by the explainer executor.

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.PartialDependenceExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

KEYWORD_CAN_ADD_FEATURE = 'can-add-feature'
validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.ProxyExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

Proxy explanation is provided by parent explainers.

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.ReportExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

Generic report explanation provides various document formats (like Word, Markdown, …) explanations.

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.SaExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.TextExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.TimeSeriesAppExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.WorkDirArchiveExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

Explainer work directory explanation provides various work dir archive representations like zip or tgz.

validate() bool

Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

h2o_sonar.lib.api.explanations.diagnose_perturbation_flips(eval_results: LlmEvalResults, metrics_meta: MetricsMeta, key_2_evaluated_model: Dict, logger=None) Dict[str, Dict[str, FlippedPerturbedTestCase]]

Diagnose perturbation flips.

Returns:
Dict

Map: original row key -> perturbed row key -> FlippedPerturbedTestCase.

h2o_sonar.lib.api.formats module

class h2o_sonar.lib.api.formats.CsvFormatCustomExplanationFormat(explanation, frame: Frame, frame_file: str, persistence: Persistence | None = None)

Bases: ExplanationFormat

add_data(format_data: Frame, file_name: str | None = None)

Add TEXT data as new explanation representation file. Child classes with binary data to override this class.

Parameters:
format_data:

Data to store as new explanation’s format file.

file_name: str

Representation file name or file relative path.

get_data(file_name: str | None = None)
mime: str = 'text/csv'
class h2o_sonar.lib.api.formats.CustomArchiveZipFormat(explanation, format_file: str, persistence: Persistence | None = None)

Bases: ExplanationFormat, GrammarOfMliFormat

Custom ZIP archive representation.

mime: str = 'application/zip'
class h2o_sonar.lib.api.formats.CustomCsvFormat(explanation, frame: Frame, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Representation of custom JSon format.

mime: str = 'text/csv'
static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.CustomJsonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Representation of custom JSon format.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') dict

Load index file and check parameters.

Returns:
dict:

Index file as dictionary.

mime: str = 'application/json'
static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.DatatableCustomExplanationFormat(explanation, frame: Frame, frame_file: str, persistence: Persistence | None = None)

Bases: ExplanationFormat

add_data(format_data: Frame, file_name: str | None = None)

Add TEXT data as new explanation representation file. Child classes with binary data to override this class.

Parameters:
format_data:

Data to store as new explanation’s format file.

file_name: str

Representation file name or file relative path.

get_data(file_name: str | None = None)
mime: str = 'application/vnd.h2oai.datatable.jay'
class h2o_sonar.lib.api.formats.DiaTextFormat(explanation, format_data: str, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Disparate Impact Analysis (DIA) text representation.

mime: str = 'text/plain'
static validate_data(dt_data: Frame)
class h2o_sonar.lib.api.formats.DocxFormat(explanation, format_file: str, persistence: Persistence | None = None)

Bases: ExplanationFormat, GrammarOfMliFormat

Open docx document.

mime: str = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
static validate_data(dt_data: Frame)
class h2o_sonar.lib.api.formats.EvalStudioMarkdownFormat(explanation, format_file: str, extra_format_files: List | None = None, persistence: Persistence | None = None)

Bases: MarkdownFormat

EvalStudio Markdown representation (text and images).

mime: str = 'application/vnd.h2oai-evalstudio-leaderboard.markdown'
class h2o_sonar.lib.api.formats.ExplanationFormat(explanation, format_data, format_file: str | None, extra_format_files: List | None = None, file_extension: str = 'bin', persistence: Persistence | None = None)

Bases: ABC

Base class of explanation representation.

Representation is serialization of explanation in a format like JSon or CSV. Representation has a MIME type. It can be formed by one or more files, but at least one file must be provided.

DEFAULT_PAGE_SIZE = 20
FEATURE_TYPE_CAT = 'categorical'
FEATURE_TYPE_CAT_NUM = 'catnum'
FEATURE_TYPE_DATE = 'date'
FEATURE_TYPE_DATETIME = 'datetime'
FEATURE_TYPE_NUM = 'numeric'
FEATURE_TYPE_TIME = 'time'
FILE_PREFIX_EXPLANATION_IDX = 'explanation.'
KEYWORD_RESIDUALS = 'residuals'
KEY_ACTION = 'action'
KEY_ACTION_TYPE = 'action_type'
KEY_ACTUAL = 'actual'
KEY_BIAS = 'bias'
KEY_CATEGORICAL = 'categorical'
KEY_DATA = 'data'
KEY_DATA_HISTOGRAM = 'data_histogram'
KEY_DATA_HISTOGRAM_CAT = 'data_histogram_categorical'
KEY_DATA_HISTOGRAM_NUM = 'data_histogram_numerical'
KEY_DATE = 'date'
KEY_DATE_TIME = 'datetime'
KEY_DEFAULT_CLASS = 'default_class'
KEY_DOC = 'documentation'
KEY_EXPLAINER_JOB_KEY = 'explainer_job_key'
KEY_FEATURES = 'features'
KEY_FEATURE_TYPE = 'feature_type'
KEY_FEATURE_VALUE = 'feature_value'
KEY_FILES = 'files'
KEY_FILES_DETAILS = 'files_details'
KEY_FILES_NUMCAT_ASPECT = 'files_numcat_aspect'
KEY_FULLNAME = 'full_name'
KEY_ID = 'id'
KEY_IS_MULTI = 'is_multinomial'
KEY_ITEM_ORDER = 'order'
KEY_KEYWORDS = 'keywords'
KEY_LABEL = 'label'
KEY_METADATA = 'metadata'
KEY_METRICS = 'metrics'
KEY_MIME = 'mime'
KEY_MLI_KEY = 'mli_key'
KEY_NAME = 'name'
KEY_NUMERIC = 'numeric'
KEY_ON_DEMAND = 'on_demand'
KEY_ON_DEMAND_PARAMS = 'on_demand_params'
KEY_PAGE_OFFSET = 'page_offset'
KEY_PAGE_SIZE = 'page_size'
KEY_RAW_FEATURES = 'raw_features'
KEY_ROWS_PER_PAGE = 'rows_per_page'
KEY_RUNNING_ACTION = 'running-action'
KEY_SCOPE = 'scope'
KEY_SYNC_ON_DEMAND = 'synchronous_on_demand_exec'
KEY_TIME = 'time'
KEY_TOTAL_ROWS = 'total_rows'
KEY_VALUE = 'value'
KEY_Y_FILE = 'y_file'
LABEL_REGRESSION = 'None (Regression)'
SCOPE_GLOBAL = 'global'
SCOPE_LOCAL = 'local'
add_data(format_data: str, file_name: str | None = None)

Add TEXT data as new explanation representation file. Child classes with binary data to override this class.

Parameters:
format_data:

Data to store as new explanation’s format file.

file_name: str

Representation file name or file relative path.

add_file(format_file: str, file_name: str | None = None) str

Copy file to representation as new explanation representation file.

Parameters:
format_file:

Source file to store (copy) as new explanation’s format file.

file_name: str

Representation file name or file relative path.

property explanation
property file_names: List[str]

Get file names which form the representation.

Hints:

  • representation is formed by flat structure of files without directories

  • representation data are not kept in memory - list of file names is sufficient

get_data(file_name: str | None = None)
classmethod get_local_explanation(persistence: ExplainerPersistence, explanation_type: str, row: int, explanation_filter: List[FilterEntry], **extra_params) str

Get local explanation for given dataset row and feature/class/… specified by explanation filter. Local explanation is returned as string.

classmethod get_page(persistence: ExplainerPersistence, explanation_type: str, page_offset: int, page_size: int, result_format: str, explanation_filter: List[FilterEntry]) str

Get global explanation page.

property index_file_name: str

Get (mandatory) index file name which typically references all other files along with various metadata.

classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) Tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:
bool:

True if the representation is calculated on demand.

dict:

On-demand calculation parameters.

classmethod is_paged(persistence: ExplainerPersistence, explanation_type: str) bool

Returns True in case that representation supports paging.

classmethod load_meta(persistence: ExplainerPersistence, explanation_type: str, explanation_format: str) dict

Load representation metadata with class identifier and MIME.

mime: str = None
update_data(format_data: str, file_name: str | None = None)
class h2o_sonar.lib.api.formats.ExplanationFormatUtils

Bases: object

static get_page(data, page_offset: int, page_size: int)

Get page of given data.

Parameters:
data:

Data to page.

page_offset: int

Positive integer or 0 with page offset.

page_size: int

Page size, returns all data entries if 0 or negative integer.

class h2o_sonar.lib.api.formats.Global3dDataJSonCsvFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Representation of global 3D data (3D bar charts, heatmaps, …) as CSV files with JSon index.

JSon representation index file example:

{
    "features": {
        "PAY_0 and AGE": {
            "order": 0,
            "feature_names: ["PAY_0", "AGE"],
            "files": {
                "red_class": "data3d_feature_0_class_0.csv"
                "green_class": "data3d_feature_0_class_1.csv"
                "blue_class": "data3d_feature_0_class_2.csv"
            }
        },
        ...
    },
    "metrics": [{"R2": 0.96}, {"RMSE": 0.03}],
    "documentation": "PD for 2 features..."
}

CSV representation data file example:

,feature_1_bin_1,feature_1_bin_2,feature_1_bin_3
feature_2_bin_1,1,1,1
feature_2_bin_2,2,2,2
feature_2_bin_3,3,3,3
KEY_FEATURE_NAMES = 'feature_names'
classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') Dict
mime: str = 'application/vnd.h2oai.json+csv'
static serialize_index_file(features: List[str], features_names: List[List[str]], classes: List[str], default_class: str = '', metrics: list | None = None, keywords: list | None = None, doc: str = '', data_file_prefix: str = 'data3d', data_file_suffix: str = 'csv', y_file: str | None = None) Tuple[Dict, str]

JSon index file serialization to string.

Parameters:
featureslist

Feature tuples.

features_nameslist

Per-feature tuple feature names.

classeslist

Classes.

default_classstr

Class to be shown as default (the first one) e.g. the class of interest in case of binomial experiment interpretation.

metricslist

Optional list of metrics e.g. [{"RMSE": 0.02}, {"SD": 3.1}]

keywordsList[str]

Optional list of keywords indicating representation features, properties and aspects.

docstr

Chart documentation.

data_file_prefixstr

Prefix for data file names.

data_file_suffixstr

Suffix for data file names.

y_filestr

Predictions file.

Returns:
Tuple[dict, str]

Dictionary with mapping of features and classes to file names AND JSon serialization (as string).

static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.Global3dDataJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Representation of global 3D data (3D bar charts, heatmaps, …) as JSon.

JSon representation index file example:

{
    "features": {
        "PAY_0 and AGE": {
            "order": 0,
            "feature_names: ["PAY_0", "AGE"],
            "files": {
                "red_class": "data3d_feature_0_class_0.json"
                "green_class": "data3d_feature_0_class_1.json"
                "blue_class": "data3d_feature_0_class_2.json"
            }
        },
        ...
    },
    "metrics": [{"R2": 0.96}, {"RMSE": 0.03}],
    "documentation": "PD for 2 features..."
}

JSon representation data file example:

"data_dictionary": {
    {
        "feature_1_bin_1": {
            "feature_2_bin_1": 1,
            "feature_2_bin_2": 2,
            "feature_2_bin_3": 3
        },
        "feature_1_bin_2": {
            "feature_2_bin_1": 1,
            "feature_2_bin_2": 2,
            "feature_2_bin_3": 3
        },
        "feature_1_bin_3": {
            "feature_2_bin_1": 1,
            "feature_2_bin_2": 2,
            "feature_2_bin_3": 3
        }
    }
}

Where:

  • data_dictionary is dictionary which might be used to easily construct data frame where column and row labels represent bin values

  • data key is not intentionally used to be used in the future for Grammar of MLI/Vega friendly representations (like in case of other formats).

KEY_FEATURE_NAMES = 'feature_names'
classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') Dict
mime: str = 'application/json'
static serialize_index_file(features: List[str], features_names: List[List[str]], classes: List[str], default_class: str = '', metrics: list | None = None, keywords: list | None = None, doc: str = '', data_file_prefix: str = 'data3d', data_file_suffix: str = 'json', y_file: str | None = None) Tuple[Dict, str]

JSon index file serialization to string.

Parameters:
featureslist

Feature tuples.

features_nameslist

Per-feature tuple feature names.

classeslist

Classes.

default_classstr

Class to be shown as default (the first one) e.g. the class of interest in case of binomial experiment interpretation.

metricslist

Optional list of metrics e.g. [{"RMSE": 0.02}, {"SD": 3.1}]

keywordsList[str]

Optional list of keywords indicating representation features, properties and aspects.

docstr

Chart documentation.

data_file_prefixstr

Prefix for data file names.

data_file_suffixstr

Suffix for data file names.

y_filestr

Predictions file.

Returns:
Tuple[dict, str]

Dictionary with mapping of features and classes to file names AND JSon serialization (as string).

static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.GlobalDtJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat, GrammarOfMliFormat

Representation of decision tree as JSon.

JSon representation index file example:

{
    "files": {
        "red_class": "dt_class_0.json"
        "green_class": "dt_class_1.json"
        "blue_class": "dt_class_2.json"
        ...
    },
    "metrics": [
      {"Training RMSE": 0.96},
      {"CV RMSE": 0.97},
      {"NFolds": 3},
      {"R2": 0.96}
    ]
}

JSon representation data file example:

 1{
 2     data: [
 3         {
 4           key: str,
 5           name: str,
 6           parent: str,
 7           edge_in: str,
 8           edge_weight: num,
 9           leaf_path: bool,
10           total_weight: num,
11           weight: num,
12         }+
13     ]
14}
KEY_CHILDREN = 'children'
KEY_EDGE_IN = 'edge_in'
KEY_EDGE_WEIGHT = 'edge_weight'
KEY_KEY = 'key'
KEY_LEAF_PATH = 'leaf_path'
KEY_NAME = 'name'
KEY_PARENT = 'parent'
KEY_TOTAL_WEIGHT = 'total_weight'
KEY_WEIGHT = 'weight'
class TreeNode(name: str, parent: Any | None, edge_in: str | None, edge_weight: float | None, total_weight: float | None, weight: float | None, leaf_path: bool = False, key: str = '0')

Bases: object

to_dict() dict
classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') dict
mime: str = 'application/json'
static serialize_data_file(dt_root_node) str

JSon data file serialization to string.

Parameters:
dt_root_node: TreeNode

Object representation root node.

Returns:
str:

Data file serialization.

static serialize_index_file(classes: List[str], default_class: str = '', metrics: List | Dict | None = None, doc: str = '', data_file_prefix: str = 'dt', data_file_suffix: str = 'json') Tuple[Dict, str]

JSon index file serialization to string.

Parameters:
classes: list

Classes.

default_class: str

Class to be shown as default (the first one) e.g. the class of interest in case of binomial experiment interpretation.

metrics: list

Optional list of PD related metrics e.g. [{"RMSE": 0.02}, {"SD": 3.1}] in case of binomial/regression or dictionary (per class key, metrics list as value) in case of multinomial.

doc: str

Documentation.

data_file_prefix: str

Prefix for data file names.

data_file_suffix: str

Suffix for data file names.

Returns:
Tuple[dict, str]:

Dictionary with mapping of classes to file names AND JSon serialization (as string).

static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.GlobalFeatImpDatatableFormat(explanation, frame: Frame, frame_file: str | None, persistence: Persistence | None = None)

Bases: DatatableCustomExplanationFormat

Global feature importance datatable representation.

Canonical representation (datatable frame, ltypes):

| Required column    | Type  | Description        |
|--------------------|-------|--------------------|
| feature_name       | str   | Feature name.      |
| feature_importance | real  | Feature importance |

… other optional columns are allowed

COL_IMPORTANCE = 'feature_importance'
COL_NAME = 'feature_name'
static from_lists(explanation, features: list, importances: list)
mime: str = 'application/vnd.h2oai.datatable.jay'
static validate_data(frame_data: Frame) Frame
class h2o_sonar.lib.api.formats.GlobalFeatImpJSonCsvFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: GlobalFeatImpJSonDatatableFormat

static from_json_datatable(json_dt_format: GlobalFeatImpJSonDatatableFormat) GlobalFeatImpJSonCsvFormat
mime: str = 'application/vnd.h2oai.json+csv'
static serialize_index_file(classes: List[str], default_class: str = '', metrics: list | None = None, doc: str = '', total_rows: int | None = None, data_file_prefix: str = 'feature_importance', data_file_suffix: str = 'csv') Tuple[Dict, str]
static validate_data(json_data)
class h2o_sonar.lib.api.formats.GlobalFeatImpJSonDatatableFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Global feature importance JSon (index file) and datatable (data files) representation.

The typical use of JSon+datatable feature importance representation:

featImpJsonDt = GlobalFeatImpJSonDatatableFormat(...create...)
# ... get other representations for free:
featImpJSon = GlobalFeatImpJSonFormat.fromJSonDatatable(featImpJsonDt)
featImpJSonCsv = GlobalFeatImpJSonCsvSonFormat.fromJSonDatatable(featImpJsonDt)

JSon representation index file example:

{
    "files": {
        "red_class": "feature_importance_class_0.jay"
        "green_class": "feature_importance_class_1.jay"
        "blue_class": "feature_importance_class_2.jay"
        ...
    },
    "metrics": [{"R2": 0.96}, {"RMSE": 0.03}],
    "total_rows": 592,
}

Datatable representation data file spec (datatable frame, ltypes; other optional columns are allowed):

| Required column    | Type  | Description                           |
|--------------------|-------|---------------------------------------|
| feature_name       | str   | Feature name.                         |
| feature_importance | real  | Feature importance                    |
| global_scope       | bool  | Global/local feature importance scope |

Datatable representation data file example:

   | feature_name  feature_importance  global_scope
-- + ------------  ------------------  ------------
 0 | feature-a                    1.1             1
 1 | feature-b                    2.2             1
COL_GLOBAL_SCOPE = 'global_scope'
COL_IMPORTANCE = 'feature_importance'
COL_NAME = 'feature_name'
add_data_frame(format_data: Frame, file_name: str | None = None)
static dict_to_data_frame(feature_importances: Dict[str, float], scope: str = 'global') Frame

(Typical) feature importance dictionary to data frame conversion.

Parameters:
feature_importances: dict

Feature importances as dictionary of feature name to importance.

scope: str

global or local.

Returns:
dt.Frame:

Data file.

static from_lists(explanation, features: list, importances: list)
get_data(file_name: str | None = None)
classmethod get_page(persistence: ExplainerPersistence, explanation_type: str, page_offset: int, page_size: int, result_format: str, explanation_filter: List[FilterEntry]) str

Get global explanation page.

classmethod is_paged(persistence: ExplainerPersistence, explanation_type: str) bool

Returns True in case that representation supports paging.

mime: str = 'application/vnd.h2oai.json+datatable.jay'
static serialize_index_file(classes: List[str], default_class: str = '', metrics: list | None = None, doc: str = '', total_rows: int | None = None, data_file_prefix: str = 'feature_importance', data_file_suffix: str = 'jay') Tuple[Dict, str]
static validate_data(json_data)
class h2o_sonar.lib.api.formats.GlobalFeatImpJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat, GrammarOfMliFormat

Representation of global feature importance explanation as JSon.

JSon representation index file example:

{
    "files": {
        "red_class": "feature_importance_class_0.json"
        "green_class": "feature_importance_class_1.json"
        "blue_class": "feature_importance_class_2.json"
        ...
    },
    "metrics": [{"R2": 0.96}, {"RMSE": 0.03}],
    "total_rows": 592,
}

JSon representation data file example:

{
    data: [
        {
            label: str,
            value: num,
            scope: str,
        }+
    ]
    bias: num
}

Where:

  • label is feature name

  • value is feature importance

  • scope is either local or global

KEY_LABEL = 'label'
KEY_VALUE = 'value'
static from_dataframe_to_json(frame: Frame, bias_col: str = None) str
static from_json_datatable(json_dt_format: GlobalFeatImpJSonDatatableFormat, bias_col: str = None) GlobalFeatImpJSonFormat
classmethod get_global_explanation(persistence: ExplainerPersistence, explanation_type: str) str

Get global feature importance explanation.

Parameters:
persistence:

Persistence object initialized for explainer/MLI run.

explanation_type: str

Explanation type ~ explanation ID.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') dict

Load index file and check parameters.

Returns:
dict:

Index file as dictionary.

mime: str = 'application/json'
static serialize_data_file(feature_importances: Dict[str, float], scope: str = 'global', bias: float | None = None) str

JSon data file serialization to string.

Parameters:
feature_importances: dict

Feature importances as dictionary of feature name to importance.

scope: str

global or local.

bias: optional str

Bias value.

Returns:
str:

Data file serialization.

static serialize_index_file(classes: List[str], default_class: str = '', metrics: list | None = None, keywords: list | None = None, doc: str = '', total_rows: int | None = None, data_file_prefix: str = 'feature_importance', data_file_suffix: str = 'json') Tuple[Dict, str]

JSon index file serialization to string.

Parameters:
classes: list

Classes.

default_class: str

Class to be shown as default (the first one) e.g. the class of interest in case of binomial experiment interpretation.

metrics: list

Optional list of PD related metrics e.g. [{"RMSE": 0.02}, {"SD": 3.1}].

keywordsList[str]

Optional list of keywords indicating representation features, properties and aspects.

doc: str

Documentation.

total_rows: int

Total number of rows (which can be used for pagination).

data_file_prefix: str

Prefix for data file names.

data_file_suffix: str

Suffix for data file names.

Returns:
Tuple[dict, str]:

Dictionary with mapping of classes to file names AND JSon serialization (as string).

static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.GlobalGroupedBarChartJSonDatatableFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Global grouped bar chart JSon (index file) and datatable (data files) representation.

COL_X = 'x'
COL_Y_GROUP_1 = 'y_group_1'
COL_Y_GROUP_2 = 'y_group_2'
add_data_frame(format_data: Frame, file_name: str | None = None)
get_data(file_name: str | None = None)
classmethod is_paged(persistence: ExplainerPersistence, explanation_type: str) bool

Returns True in case that representation supports paging.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/vnd.h2oai.json+datatable.jay') Dict
mime: str = 'application/vnd.h2oai.json+datatable.jay'
static serialize_index_file(classes: List[str], default_class: str = '', metrics: list | None = None, doc: str = '', total_rows: int | None = None, data_file_prefix: str = 'feature_importance', data_file_suffix: str = 'jay') Tuple[Dict, str]
static validate_data(json_data)
class h2o_sonar.lib.api.formats.GlobalLinePlotJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: GlobalFeatImpJSonFormat, GrammarOfMliFormat

class h2o_sonar.lib.api.formats.GlobalNlpLocoJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat, GrammarOfMliFormat

Representation of global feature importance explanation as JSon.

JSon representation index file example:

{
    "files": {
        "red_class": "feature_importance_class_0.json"
        "green_class": "feature_importance_class_1.json"
        "blue_class": "feature_importance_class_2.json"
        ...
    },
    "filters": [
        {
            "type": "text_features",
            "name": "TEXT FEATURES",
            "description": "Model text features",
            "values": ["description", "review"]
        }
    ],
    "metrics": [{"R2": 0.96}, {"RMSE": 0.03}],
    "total_rows": 592,
}

JSon representation data file example:

{
    data: [
        {
            label: str,
            value: num,
            scope: str,
        }+
    ]
    bias: num
}

Where:

  • label is feature name

  • value is feature importance

  • scope is either local or global

FILTER_TYPE_TEXT_FEATURES = 'text_feature'
KEY_DESCRIPTION = 'description'
KEY_FILTERS = 'filters'
KEY_LABEL = 'label'
KEY_NAME = 'name'
KEY_TYPE = 'type'
KEY_VALUE = 'value'
KEY_VALUES = 'values'
static from_dataframe_to_json(frame: Frame, bias_col: str = None) str
static from_json_datatable(json_dt_format: GlobalFeatImpJSonDatatableFormat, bias_col: str = None) GlobalFeatImpJSonFormat
classmethod get_global_explanation(persistence: ExplainerPersistence, explanation_type: str) str

Get global feature importance explanation.

Parameters:
persistence:

Persistence object initialized for explainer/MLI run.

explanation_type: str

Explanation type ~ explanation ID.

classmethod get_page(persistence: ExplainerPersistence, explanation_type: str, page_offset: int, page_size: int, result_format: str, explanation_filter: List[FilterEntry]) str

Get global explanation page.

classmethod is_paged(persistence: ExplainerPersistence, explanation_type: str) bool

Returns True in case that representation supports paging.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') dict

Load index file and check parameters.

Returns:
dict:

Index file as dictionary.

mime: str = 'application/json'
static serialize_data_file(feature_importances: Dict[str, float], scope: str = 'global', bias: float | None = None) str

JSon data file serialization to string.

Parameters:
feature_importances: dict

Feature importances as dictionary of feature name to importance.

scope: str

global or local.

bias: optional str

Bias value.

Returns:
str:

Data file serialization.

static serialize_index_file(classes: List[str], default_class: str = '', filters: list | None = None, metrics: list | None = None, keywords: list | None = None, doc: str = '', total_rows: int | None = None, data_file_prefix: str = 'feature_importance', data_file_suffix: str = 'json') Tuple[Dict, str]

JSon index file serialization to string.

Parameters:
classes: list

Classes.

default_class: str

Class to be shown as default (the first one) e.g. the class of interest in case of binomial experiment interpretation.

filters: list

Optional list of per-filter items used to filter data entries.

metrics: list

Optional list of PD related metrics e.g. [{"RMSE": 0.02}, {"SD": 3.1}].

keywordsList[str]

Optional list of keywords indicating representation features, properties and aspects.

doc: str

Documentation.

total_rows: int

Total number of rows (which can be used for pagination).

data_file_prefix: str

Prefix for data file names.

data_file_suffix: str

Suffix for data file names.

Returns:
Tuple[dict, str]:

Dictionary with mapping of classes to file names AND JSon serialization (as string).

static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.GlobalScatterPlotJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: GlobalFeatImpJSonFormat, GrammarOfMliFormat

class h2o_sonar.lib.api.formats.GlobalSummaryFeatImpJsonDatatableFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat, GrammarOfMliFormat

Representation of global summary feature importance explanation as JSon.

JSon representation index file example:

{
    "files": {
        "red_class": "feature_importance_summary_class_0.jay"
        "green_class": "feature_importance_summary_class_1.jay"
        "blue_class": "feature_importance_summary_class_2.jay"
        ...
    },
    "metrics": [{"R2": 0.96}, {"RMSE": 0.03}],
    "total_rows": 25,
}

Where:

  • total_rows is number of features.

Getting data file:

> datatable.fread("feature_importance_summary_class_2.jay")

JSon representation data file example:

   |  feature   shapley_value   count   avg_high_value   clazz   order
-- + --------- --------------- ------- ---------------- ------- -------
 0 |  PAY_0      0.390716        0      0.390716         "red"   0
 1 |  PAY_0     -0.386815       25      0.38681          "red"   0
 ...
 . |  AGE        0.425908       17      0.425908         "red"   1
 ...

Where:

  • feature is feature name (y-axis)

  • shapley_value is Shapley value (x-axis)

  • count frequency of the Shapley value (height, normalized to [0, 1])

  • avg_high_value average feature value height (color) normalized to [0, 1] (if feature value is low, it’s 0, if it’s high, then it’s 1) in case of numerical features, None in case of categorical features.

  • order feature order to ensure “order by feature importance” paging

KEY_FEATURE = 'feature'
KEY_FREQUENCY = 'count'
KEY_HIGH_VALUE = 'avg_high_value'
KEY_ORDER = 'order'
KEY_SHAPLEY = 'shapley_value'
add_data_frame(format_data: Frame, file_name: str | None = None)
classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/vnd.h2oai.json+datatable.jay') dict

Load index file and check parameters.

Returns:
dict:

Index file as dictionary.

mime: str = 'application/vnd.h2oai.json+datatable.jay'
static serialize_index_file(classes: List[str], default_class: str = '', metrics: list | None = None, keywords: list | None = None, doc: str = '', total_rows: int | None = None, data_file_prefix: str = 'summary_feature_importance', data_file_suffix: str = 'jay') Tuple[Dict, str]

JSon index file serialization to string.

Parameters:
classes: list

Classes.

default_class: str

Class to be shown as default (the first one) e.g. the class of interest in case of binomial experiment interpretation.

metrics: list

Optional list of PD related metrics e.g. [{"RMSE": 0.02}, {"SD": 3.1}].

keywordsList[str]

Optional list of keywords indicating representation features, properties and aspects.

doc: str

Documentation.

total_rows: int

Total number of rows (which can be used for pagination).

data_file_prefix: str

Prefix for data file names.

data_file_suffix: str

Suffix for data file names.

Returns:
Tuple[dict, str]:

Dictionary with mapping of classes to file names AND JSon serialization (as string).

static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.GlobalSummaryFeatImpJsonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat, GrammarOfMliFormat

Representation of global summary feature importance explanation as JSon.

JSon representation index file example:

{
    "files": {
        "red_class": {
            "0": "feature_importance_class_0_offset_0.json",
            "10": "feature_importance_class_0_offset_10.json",
            "20": "feature_importance_class_0_offset_20.json"
        },
        "green_class": {
            ...
        },
        "blue_class":  {
            "0": "feature_importance_class_2_offset_0.json",
            "10": "feature_importance_class_2_offset_10.json",
            "20": "feature_importance_class_2_offset_20.json"
        },
        ...
    },
    "metrics": [{"R2": 0.96}, {"RMSE": 0.03}],
    "total_rows": 25,
    "rows_per_page": 10
}

Where:

  • Every class dictionary has per-page offset key with the JSon file containing chart for given page. Offset is based on the number of rows (features) per page.

  • total_rows is number of features.

  • rows_per_page is number of features in every file (created per page)

JSon representation data file example:

{
    data: [
        {
            feature: str,
            shapley_value: num,
            count: num,
            avg_high_value: num,
            order: num,
        }+
    ]
}

Where:

  • feature is feature name (y-axis)

  • shapley_value is Shapley value (x-axis)

  • count frequency of the Shapley value (height, normalized to [0, 1])

  • avg_high_value average feature value height (color) normalized to [0, 1] (if feature value is low, it’s 0, if it’s high, then it’s 1) in case of numerical features, None in case of categorical features.

  • order is feature order (global feature importance).

DATA_FILE_PREFIX = 'summary_feature_importance'
DEFAULT_PAGE_SIZE = 10
KEY_FEATURE = 'feature'
KEY_FEATURES_PER_PAGE = 'features_per_page'
KEY_FREQUENCY = 'count'
KEY_HIGH_VALUE = 'avg_high_value'
KEY_ORDER = 'order'
KEY_SHAPLEY = 'shapley_value'
static from_json_datatable(json_dt_format: GlobalSummaryFeatImpJsonDatatableFormat, page_size: int, total_rows: int = -1, persistence: Persistence | None = None, index_extensions: Dict | None = None) Tuple[GlobalSummaryFeatImpJsonFormat, dict]
classmethod get_page(persistence: ExplainerPersistence, explanation_type: str, page_offset: int, page_size: int, result_format: str, explanation_filter: List[FilterEntry]) str

Representation expect JSon+datatable representation to exist and use it to construct the page as expected

classmethod is_paged(persistence: ExplainerPersistence, explanation_type: str) bool

Returns True in case that representation supports paging.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') dict
mime: str = 'application/json'
static serialize_data_file(feature_importances: Dict[str, float], scope: str = 'global', bias: float | None = None) str

JSon data file serialization to string.

Parameters:
feature_importances: dict

Feature importances as dictionary of feature name to importance.

scope: str

global or local.

bias: optional str

Bias value.

Returns:
str:

Data file serialization.

static serialize_index_file(classes: List[str], default_class: str = '', metrics: list | None = None, keywords: list | None = None, doc: str = '', total_rows: int | None = None, rows_per_page: int | None = None, data_file_prefix: str = 'summary_feature_importance', data_file_suffix: str = 'json') Tuple[Dict, str]
static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.GrammarOfMliFormat

Bases: object

Format class which is child of Grammar of MLI format class is supported in H2O Eval Studio UI - there is UI component which will render such format in an (interactive) chart.

classmethod is_grammar_of_mli() bool

Will representation be rendered in UI?

class h2o_sonar.lib.api.formats.HtmlFormat(explanation, format_data: str, format_file: str | None = None, extra_format_files: List | None = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

HTML representation.

Example local (single row) on-demand NLP HTML explanation:

<feature-text min="-10.0" max="5.0">
  Sentence with <word value="-0.9485">dummy word</word>.
</feature-text>
ATT_MAX = 'max'
ATT_MIN = 'min'
ATT_VALUE = 'value'
EL_FEATURE_TEXT = 'feature-text'
EL_WORD = 'word'
MINIMAL_HTML = "<!DOCTYPE html>\n<html lang='en'><head></head><body></body></html>"
classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) Tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:
bool:

True if the representation is calculated on demand.

dict:

On-demand calculation parameters.

mime: str = 'text/html'
static validate_data(dt_data: Frame)
class h2o_sonar.lib.api.formats.IceCsvFormat(explanation, frame: Frame, frame_file: str = None, persistence: Persistence | None = None)

Bases: CsvFormatCustomExplanationFormat

mime: str = 'text/csv'
static validate_data(dt_data: Frame)
class h2o_sonar.lib.api.formats.IceDatatableFormat(explanation, frame: Frame, frame_file: str = None, persistence: Persistence | None = None)

Bases: DatatableCustomExplanationFormat

Individual conditional explanation as datatable.

Canonical representation (datatable frame, ltypes) for 1D ICE:

| Required column    | Type  | Description            |
|--------------------|-------|------------------------|
| feature_name       | str   | Feature name.          |
| feature_type       | str   | Feature type.          |
| instance_id        | int   | Instance.              |
| bin_value          | str   | Bin value.             |
| prediction         | real  | Prediction.            |

Hints:

  • bin_value is converted to string (can be converted back using feature_type)

… other optional columns are allowed

COL_BIN_VALUE = 'bin_value'
COL_F_LTYPE = 'feature_type'
COL_F_NAME = 'feature_name'
COL_INSTANCE = 'instance'
COL_PREDICTION = 'prediction'
mime: str = 'application/vnd.h2oai.datatable.jay'
static validate_data(dt_data: Frame)
class h2o_sonar.lib.api.formats.IceJsonDatatableFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat, GrammarOfMliFormat

Individual conditional explanation as per-feature and class datatable frames with JSon index file.

JSon representation index file example:

{
    "features": {
        "PAY_0": {
            "order": 0,
            "feature_type": ["categorical"],
            "files": {
                "rec_class": "ice_feature_0_class_0.jay"
                "blue_class": "ice_feature_0_class_1.jay"
                "white_class": "ice_feature_0_class_2.jay"
            }
        },
        ...
    },
    "metrics": [{"RMSE": 0.03}],
    "y_file": "y_hat.jay",
    "on_demand": false
}

or (if on demand e.g. in case of sampled dataset):

{
    "on_demand": true
    "on_demand_parameters": ...
}

Datatable representation data file example:

> datatable.fread("ice_feature_0_class_0.jay")
   |       -2        -1         0         1         2         7
-- + --------  --------  --------  --------  --------  --------
 0 | 0.390716  0.390716  0.390716  0.390716  0.531548  0.531548
 1 | 0.38681   0.38681   0.38681   0.38681   0.508216  0.508216
 2 | 0.425908  0.425908  0.425908  0.425908  0.536061  0.536061
 ...

Remarks:

  • y_file … datatable frame with predictions for every X dataset instance

  • on_demandtrue if there is no cached ICE and it must be computed

FILE_Y_FILE = 'y_hat.jay'
KEY_BIN = 'bin'
KEY_BINS = 'bins'
KEY_BINS_NUMCAT_ASPECT = 'bins_numcat_aspect'
KEY_COL_NAME = 'column_name'
KEY_FEATURE_VALUE = 'feature_value'
KEY_ICE = 'ice'
KEY_PREDICTION = 'prediction'
add_data_frame(format_data: Frame, file_name: str | None = None)
classmethod get_local_explanation(persistence: ExplainerPersistence, explanation_type: str, dataset_path: str, row: int, explanation_filter: List[FilterEntry], **extra_params) str

Get ICE.

Parameters:
persistence

Persistence object initialized for explainer/MLI run.

explanation_typestr

Explanation type ~ explanation ID.

dataset_path

Dataset path.

row: int

Local explanation to be provided for given row.

explanation_filterList[FilterEntry]
Required filter entries:

feature class

Returns:
str

JSon representation of the local explanation.

JSon ICE representation:
1{
2     prediction: float,
3     data: [
4         {
5             bin: any,
6             ice: float,
7         }
8     ]
9}
classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) Tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:
bool:

True if the representation is calculated on demand.

dict:

On-demand calculation parameters.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/vnd.h2oai.json+datatable.jay') dict

Load index file and check parameters.

Returns:
dict:

Index file as dictionary.

classmethod merge_format(from_path: str, to_path: str, overwrite: bool = True, discriminant: str = '', is_numcat_merge: bool = False, persistence: Persistence | None = None)

Merge from representation files to to representation files.

Parameters:
from_pathstr

Directory with the source representation to merge.

to_pathstr

Directory with the target representation where should be new explanations merged.

overwritebool

Overwrite explanations if they already exist in the target representation. Use False to keep existing target explanations in case of a clash.

discriminant: str

Delimiter to make data file names unique (if needed).

is_numcat_mergebool

True if this is num/cat update, False otherwise.

persistenceOptional[persistences.Persistence]

Persistence store to save and load representations.

mime: str = 'application/vnd.h2oai.json+datatable.jay'
classmethod mli_ice_explanation_to_json(ice_df: DataFrame, filter_feature: str, prediction, feature_value, logger=None) str
static serialize_index_file(features: List[str], classes: List[str], default_class: str = '', features_meta: dict | None = None, metrics: list | None = None, doc: str = '', y_file: str | None = None) Tuple[Dict, str]
static serialize_on_demand_index_file(on_demand_params: dict) str
static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.LlmHeatmapLeaderboardJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: LlmLeaderboardJSonFormat, TextCustomExplanationFormat

Representation of LLM Heatmap Leaderboard explanation as JSon.

JSon representation index file example:

{
    "files": {
        "ragas": "leaderboard_0.json"
        "answer_relevance": "leaderboard_1.json"
        ...
        "ALL_METRICS": "leaderboard_n.json"
    },
    ...
}

JSon representation data file example:

{
    "data": {
        "h2oai/h2ogpt-4096-llama2-70b-chat": {
            "answer_similarity": 1
        },
        "h2oai/h2ogpt-4096-llama2-70b-chat-4bit": {
            "answer_similarity": 1
        },
        ...
        "gpt-4-32k-0613": {
            "answer_similarity": 1
        }
    },
    "eda": {
        ...
    }
}
KEY_DEFAULT_METRIC = 'default_metric'
KEY_EDA = 'eda'
classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') dict

Load index file and check parameters.

Returns:
dict:

Index file as dictionary.

mime: str = 'application/json'
static serialize_index_file(metrics: List[str], default_metric: str = '', eda: dict | None = None, doc: str = '', data_file_prefix: str = 'leaderboard', data_file_suffix: str = 'json') Tuple[Dict, str]

JSon index file serialization to string.

Parameters:
metrics: list

Metrics.

default_metric: str

Metric to be shown as default (the first one).

eda: Dict

EDA data.

doc: str

Documentation.

data_file_prefix: str

Prefix for data file names.

data_file_suffix: str

Suffix for data file names.

Returns:
Tuple[dict, str]:

Dictionary with mapping of classes to file names AND JSon serialization (as string).

static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.LlmLeaderboardJSonFormat

Bases: ABC

KEY_ALL_METRICS = 'ALL_METRICS'
mime = 'application/json'
class h2o_sonar.lib.api.formats.LocalDtJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Local representation of decision tree as JSon.

JSon representation index file example:

{
    "files": {
        "red_class": "dt_class_0.json"
        "green_class": "dt_class_1.json"
        "blue_class": "dt_class_2.json"
        ...
    },
    "metrics": [
      {"Training RMSE": 0.96},
      {"CV RMSE": 0.97},
      {"NFolds": 3},
      {"R2": 0.96}
    ]
}

JSon representation data file example:

 1{
 2     data: [
 3         {
 4           key: str,
 5           name: str,
 6           parent: str,
 7           edge_in: str,
 8           edge_weight: num,
 9           leaf_path: bool,
10           total_weight: num,
11           weight: num
12         }+
13     ]
14}

or (if on demand e.g. in case of sampled dataset):

{
    "on_demand": true
    "on_demand_parameters": ...
}

Remarks:

  • leaf_pathtrue if local path (hint in the leaf defines path to

    the root), else global explanation. In other words return the whole tree with leaf selected.

  • on_demandtrue if there is no cached ICE and it must be computed.

static dt_path_to_node_key(path: str) str
static dt_set_tree_path(key: str, tree: dict)
classmethod get_local_explanation(persistence: ExplainerPersistence, explanation_type: str, row: int, explanation_filter: List[FilterEntry], **extra_params) str

Get local DT explanation.

Parameters:
persistence:

Persistence object initialized for explainer/MLI run.

explanation_type: str

Explanation type ~ explanation ID.

row: int

Local explanation to be provided for given row.

explanation_filter: List[FilterEntry]
Required filter entries:

feature class

Returns:
str:

JSon representation of the local explanation.

JSon DT representation:
 1{
 2     data: [
 3         {
 4           key: str,
 5           name: str,
 6           parent: str,
 7           edge_in: str,
 8           edge_weight: num,
 9           leaf_path: bool,
10           total_weight: num,
11           weight: num,
12         }+
13     ]
14}
classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) Tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:
bool:

True if the representation is calculated on demand.

dict:

On-demand calculation parameters.

mime: str = 'application/json'
static serialize_index_file(classes: List[str], default_class: str = '', metrics: list | None = None, doc: str = '', data_file_prefix: str = 'dt', data_file_suffix: str = 'json') Tuple[Dict, str]
static serialize_on_demand_index_file(on_demand_params: dict) str
static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.LocalFeatImpDatatableFormat(explanation, frame: Frame, frame_file: str | None = None, persistence: Persistence | None = None)

Bases: DatatableCustomExplanationFormat, GrammarOfMliFormat

Local feature importance datatable representation.

  • feature importance for all classes

Canonical representation (datatable frame):

| Columns            | Rows                                |
|--------------------|-------------------------------------|
| feature names      | per-dataset row feature importance  |

Example:

  |     activity   ...   max_speed
--- + ------------ ...  -----------
0 |    -0.0143614        -0.142553
. |     ...               ...
9 |     0.0156479        -0.231883
FILE_EXT = 'jay'
classmethod get_local_explanation(persistence: ExplainerPersistence, explanation_type: str, row: int, explanation_filter: List[FilterEntry], **extra_params) str

Get local feature importance explanation.

Parameters:
persistence:

Persistence object initialized for explainer/MLI run.

explanation_type: str

Explanation type ~ explanation ID.

row: int

Local explanation to be provided for given row.

explanation_filter: List[FilterEntry]

Filter (unused in case of feature importance).

Returns:
str:

JSon representation of the local explanation.

JSon representation:
1{
2     data: [
3         {
4             label: str,
5             value: num,
6             scope: str,
7         }+
8     ]
9}
Where:
  • label is feature name
  • value is feature importance
  • scope is local
mime: str = 'application/vnd.h2oai.datatable.jay'
static validate_data(frame_data: Frame) Frame
class h2o_sonar.lib.api.formats.LocalFeatImpJSonDatatableFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: GlobalFeatImpJSonDatatableFormat

add_data_frame(format_data: Frame, file_name: str | None = None)
get_data(file_name: str | None = None)
classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) Tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:
bool:

True if the representation is calculated on demand.

dict:

On-demand calculation parameters.

mime: str = 'application/vnd.h2oai.json+datatable.jay'
static serialize_index_file(classes: List[str], default_class: str = '', metrics: list | None = None, doc: str = '', total_rows: int | None = None, data_file_prefix: str = 'feature_importance', data_file_suffix: str = 'jay') Tuple[Dict, str]
class h2o_sonar.lib.api.formats.LocalFeatImpJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat, GrammarOfMliFormat

Representation of local feature importance explanation as JSon. See GlobalFeatImpJSonFormat for structure of the index file and data.

KEY_Y = 'prediction'
classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) Tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:
bool:

True if the representation is calculated on demand.

dict:

On-demand calculation parameters.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') dict
static merge_local_and_global_page(global_page: dict, local_page: dict, mli_key: str = '', explainer_job_key: str = '', bias_key: str = '', logger=None)

Use this method to merge local and global explanations page (especially if frontend is not able to process local explanations only.

Local explanations page is expected to be sorted (as required) and it defines order of entries in the merged page. Merged result contains global explanation entry followed by local exp entry.

mime: str = 'application/json'
static serialize_index_file(classes: List[str], default_class: str = '', metrics: list | None = None, doc: str = '') Tuple[Dict, str]
static sort_data(json_dict: dict)

Sort local feature importance explanation data by (abs) value:

{'data': [{'label': .,'value': .,'scope': .}, ...
static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.LocalFeatImpWithYhatsJSonDatatableFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: LocalFeatImpJSonDatatableFormat

FILE_Y_HAT = 'y_hat.bin'
KEY_Y_HAT = 'y_hat'
class h2o_sonar.lib.api.formats.LocalNlpLocoJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Representation of local LOCO explanation as JSon. See GlobalNlpLocoJSonFormat for structure of the index file and data.

classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) Tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:
bool:

True if the representation is calculated on demand.

dict:

On-demand calculation parameters.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') dict
static merge_local_and_global_page(global_page: dict, local_page: dict, mli_key: str = '', explainer_job_key: str = '', bias_key: str = '', logger=None)

Use this method to merge local and global explanations page (especially if frontend is not able to process local explanations only.

Local explanations page is expected to be sorted (as required) and it defines order of entries in the merged page. Merged result contains global explanation entry followed by local exp entry.

mime: str = 'application/json'
static serialize_index_file(classes: List[str], default_class: str = '', metrics: list | None = None, doc: str = '') Tuple[Dict, str]
static sort_data(json_dict: dict)

Sort local feature importance explanation data by (abs) value:

{'data': [{'label': .,'value': .,'scope': .}, ...
static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.LocalOnDemandHtmlFormat(explanation, format_data: str, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Local (single row) on-demand representation.

classmethod get_local_explanation(persistence: ExplainerPersistence, explanation_type: str, row: int, explanation_filter: List[FilterEntry], **extra_params) str

Load index file and check parameters.

Returns:
str:

Local explanation as string - can be any (on)structured format.

mime: str = 'text/html'
set_on_demand(is_on_demand: bool, mime: str = '')

Indicate that representation is on-demand.

static validate_data(dt_data: Frame)
class h2o_sonar.lib.api.formats.LocalOnDemandTextFormat(explanation, format_data: str, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Local (single row) on-demand representation.

classmethod get_local_explanation(persistence: ExplainerPersistence, explanation_type: str, row: int, explanation_filter: List[FilterEntry], **extra_params) str

Load index file and check parameters.

Returns:
str:

Local explanation as string - can be any (on)structured format.

mime: str = 'text/plain'
static validate_data(dt_data: Frame)
class h2o_sonar.lib.api.formats.LocalSummaryFeatImplJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Local (on-demand) representation of summary feature importance as JSon.

classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) Tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:
bool:

True if the representation is calculated on demand.

dict:

On-demand calculation parameters.

classmethod is_paged(persistence: ExplainerPersistence, explanation_type: str) bool

Returns True in case that representation supports paging.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/vnd.h2oai.json+datatable.jay') dict
mime: str = 'application/json'
static serialize_index_file(classes: List[str], default_class: str = '', metrics: list | None = None, doc: str = '', data_file_prefix: str = 'dt', data_file_suffix: str = 'json') Tuple[Dict, str]
static serialize_on_demand_index_file(on_demand_params: dict) str
static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.MarkdownFormat(explanation, format_file: str, extra_format_files: List | None = None, persistence: Persistence | None = None)

Bases: ExplanationFormat, GrammarOfMliFormat

Markdown representation (text and images).

mime: str = 'text/markdown'
static validate_data(dt_data: Frame)
class h2o_sonar.lib.api.formats.ModelValidationResultArchiveFormat(explanation, mv_test_type: str | Any, mv_test_name: str, mv_test_id: str, mv_test_results, mv_test_settings, mv_test_artifacts: Dict, mv_test_log, mv_client=None, persistence: Persistence | None = None, logger=None)

Bases: ExplanationFormat

Model Validation test result archived in a ZIP.

mime: str = 'application/zip'
class h2o_sonar.lib.api.formats.PartialDependenceCsvFormat(explanation, frame: Frame, frame_file: str = None, persistence: Persistence | None = None)

Bases: CsvFormatCustomExplanationFormat

static validate_data(dt_data: Frame)
class h2o_sonar.lib.api.formats.PartialDependenceDatatableFormat(explanation, frame: Frame, frame_file: str = None, persistence: Persistence | None = None)

Bases: DatatableCustomExplanationFormat

Representation of partial dependence (PD) explanation as datatable.

Canonical representation (datatable frame, ltypes) for 1D PD:

| Required column    | Type  | Description            |
|--------------------|-------|------------------------|
| feature_name       | str   | Feature name.          |
| feature_type       | str   | Feature type.          |
| bin_value          | str   | Bin value              |
| mean               | real  | Mean.                  |
| sd                 | real  | Standard deviation.    |
| sem                | real  | Standard mean error.   |
| is_oor             | bool  | Is out of range value? |

Hints:

  • bin_value is converted to string (can be converted back using feature_type).

… other optional columns are allowed

COL_BIN_VALUE = 'bin_value'
COL_F_LTYPE = 'feature_type'
COL_F_NAME = 'feature_name'
COL_IS_OOR = 'is_oor'
COL_MEAN = 'mean'
COL_SD = 'sd'
COL_SEM = 'sem'
mime: str = 'application/vnd.h2oai.datatable.jay'
static validate_data(dt_data: Frame)
class h2o_sonar.lib.api.formats.PartialDependenceJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat, GrammarOfMliFormat

Representation of partial dependence (PD) explanation as JSon.

JSon representation index file example:

{
    "features": {
        "PAY_0": {
            "order": 0,
            "feature_type": ["categorical"],
            "files": {
                "red_class": "pd_feature_0_class_0.json"
                "green_class": "pd_feature_0_class_1.json"
                "blue_class": "pd_feature_0_class_2.json"
            }
        },
        ...
    },
    "metrics": [{"R2": 0.96}, {"RMSE": 0.03}]
}

JSon representation data file example:

{
    "data": [{
        "bin": -2,
        "pd": 0.3710160553455353,
        "sd": 0.029299162328243256,,
        "out_of_range": false
    }, {
        "bin": -1,
        "pd": 0.3710160553455353,
        "sd": 0.029299162328243256,,
        "out_of_range": false
    },
    ...
}
KEY_BIN = 'bin'
KEY_FREQUENCY = 'frequency'
KEY_OOR = 'oor'
KEY_PD = 'pd'
KEY_SD = 'sd'
KEY_X = 'x'
classmethod get_bins(persistence: ExplainerPersistence, explanation_type: str, feature: str) list

Get bins for given feature.

Parameters:
persistence:

Persistence object initialized for explainer/MLI run.

explanation_type: str

Explanation type ~ explanation ID.

feature: str

Feature for which to get bins.

Returns:
list:

Bins.

classmethod get_numcat_aspects(feature, idx: dict) List[str]

Get available num/cat aspects for given feature:

  • [] … invalid feature

  • ["numeric"] … numeric PD only

  • ["categorical"] … categorical PD only

  • ["numeric", "categorical"] … numeric and categorical PD

Parameters:
feature: str

Feature name for which to determine available aspects.

idx: dict

PD JSon index file (explanation.json).

Returns:
list[str]:

Available num/cat aspects.

classmethod get_numcat_missing_aspect(feature: str, idx: dict)

Return (missing) aspect to be calculated.

Parameters:
feature: str

Feature name for which to determine available aspects.

idx: dict

PD JSon index file (explanation.json).

Returns:
str:

Aspect to calculate or "" (no aspect is missing).

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') Dict
classmethod merge_format(from_path: str, to_path: str, overwrite: bool = True, discriminant: str = '', is_numcat_merge: bool = False, persistence: Persistence | None = None)

Merge from representation files to to representation files.

Parameters:
from_path: str

Directory with the source representation to merge.

to_path: str

Directory with the target representation where should be new explanations merged.

overwrite: bool

Overwrite explanations if they already exist in the target representation. Use False to keep existing target explanations in case of a clash.

discriminant: str

Delimiter to make data file names unique (if needed).

is_numcat_merge: bool

True if this is num/cat update, False otherwise.

persistenceOptional[persistences.Persistence]

Persistence store to save and load explanation representations.

mime: str = 'application/json'
static serialize_index_file(features: List[str], classes: List[str], default_class: str = '', features_meta: dict | None = None, metrics: list | None = None, keywords: list | None = None, doc: str = '', data_file_prefix: str = 'pd', data_file_suffix: str = 'json', y_file: str | None = None) Tuple[Dict, str]

JSon index file serialization to string.

Parameters:
featureslist

Features.

classeslist

Classes.

default_classstr

Class to be shown as default (the first one) e.g. the class of interest in case of binomial experiment interpretation.

features_metadict

Features metadata allowing to indicate that given feature is categorical (use categorical key and list of feature names), (use date key and list of feature names, to specify format use date-format and list of Python date formats) or numerical (default).

metricslist

Optional list of PD related metrics e.g. [{"RMSE": 0.02}, {"SD": 3.1}]

keywordsList[str]

Optional list of keywords indicating representation features, properties and aspects.

docstr

Chart documentation.

data_file_prefixstr

Prefix for data file names.

data_file_suffixstr

Suffix for data file names.

y_filestr

Predictions file.

Returns:
Tuple[dict, str]

Dictionary with mapping of features and classes to file names AND JSon serialization (as string).

classmethod set_merge_status(dir_path: str, mli_key: str, explainer_job_key: str, clear: bool = False, action: str = 'update_explanation', action_type: str = 'add_aspect', persistence: Persistence | None = None)

Add (clear=False) or remove running interpretation update.

Parameters:
dir_path: str

Directory with index file where the status should be set.

mli_key: str

MLI key of the interpretation which will update another representation.

explainer_job_key: str

Explainer job key of the interpretation which will update another representation.

clear: bool

Add (clear=False) or remove (clear=True) indicator in representation’s dict.

action: str

Running action identifier e.g. update explanation.

action_type: str

Action (sub)type identifier e.g. add feature, add numeric/categorical view.

persistenceOptional[persistences.Persistence]

Persistence store to save and load explanation representations.

static validate_data(json_data: str) str
class h2o_sonar.lib.api.formats.SaTextFormat(explanation, format_data: str, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Sensitivity Analysis (SA) text representation.

mime: str = 'text/plain'
static validate_data(dt_data: Frame)
class h2o_sonar.lib.api.formats.TextCustomExplanationFormat(explanation, format_data: str, format_file: str | None, extra_format_files: List | None = None, persistence: Persistence | None = None)

Bases: ExplanationFormat

FILE_IS_ON_DEMAND = 'IS_ON_DEMAND'
FILTER_CLASS = 'explain_class'
FILTER_FEATURE = 'explain_feature'
FILTER_NUMCAT = 'explain_numcat'
add_data(format_data: str, file_name: str | None = None)

Add TEXT data as new explanation representation file. Child classes with binary data to override this class.

Parameters:
format_data:

Data to store as new explanation’s format file.

file_name: str

Representation file name or file relative path.

add_file(format_file: str, file_name: str | None = None)

Copy file to representation as new explanation representation file.

Parameters:
format_file:

Source file to store (copy) as new explanation’s format file.

file_name: str

Representation file name or file relative path.

get_data(file_name: str | None = None, data_type: PersistenceDataType | None = None)
classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) Tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:
bool:

True if the representation is calculated on demand.

dict:

On-demand calculation parameters.

mime: str = 'text/plain'
static set_index_commons(index_dict: dict, classes: List[str], default_class: str = '', metrics: list | None = None, keywords: int | None = None, doc: str = '', total_rows: int | None = None)
set_on_demand(is_on_demand: bool, mime: str = '')

Indicate that representation is on-demand.

update_index_file(index_dict: dict, metrics: list | None = None, total_rows: int | None = None)
class h2o_sonar.lib.api.formats.TextFormat(explanation, format_data: str, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Text representation.

mime: str = 'text/plain'
static validate_data(dt_data: Frame)
class h2o_sonar.lib.api.formats.WorkDirArchiveZipFormat(explanation, file_filter=<function WorkDirArchiveZipFormat.<lambda>>, persistence: ~h2o_sonar.lib.api.persistences.Persistence | None = None)

Bases: ExplanationFormat, GrammarOfMliFormat

Working directory ZIP archive representation. Just instantiate this class, and it will create the ZIP representation (no need to add files/data). Note that the archive is created exactly on the time of instantiation.

mime: str = 'application/zip'
h2o_sonar.lib.api.formats.get_custom_explanation_formats()

h2o_sonar.lib.api.interpretations module

class h2o_sonar.lib.api.interpretations.ExplainerJob(key: str = '', created: float = 0.0, duration: float = 0.0, progress: float = 0.0, status: ExplainerJobStatus = ExplainerJobStatus.UNKNOWN, message: str = '', error: str = '', explainer_persistence=None, explainer_descriptor: ExplainerDescriptor | None = None, result_descriptor=None, child_explainer_job_keys: List[str] | None = None, job_location: str = '')

Bases: object

Explainer job.

KEY_CHILD_KEYS = 'child_explainer_job_keys'
KEY_CREATED = 'created'
KEY_DURATION = 'duration'
KEY_ERROR = 'error'
KEY_EXPLAINER_DESCRIPTOR = 'explainer'
KEY_JOB_LOCATION = 'job_location'
KEY_KEY = 'key'
KEY_MESSAGE = 'message'
KEY_PROGRESS = 'progress'
KEY_RESULT_DESCRIPTOR = 'result_descriptor'
KEY_STATUS = 'status'
evaluator_id() str
explainer_id() str
static from_dict(explainer_job_dict: Dict)
is_finished() bool
success()
tick(msg: str = '', progress_increment: float = 0.1)
to_dict() Dict
class h2o_sonar.lib.api.interpretations.HtmlInterpretationFormat(interpretation: Interpretation, branding: Branding = Branding.H2O_SONAR, logger: SonarLogger | None = None)

Bases: object

HTML representation of the interpretation.

class Context

Bases: object

Context with data which are needed to create HTML.

clear()
get_purpose_representatives_job_keys() List[str]
KEYWORD_ID_2_NAME = {'explains-approximate-behavior': 'Approximate model behavior', 'explains-fairness': 'Fairness', 'explains-feature-behavior': 'Feature behavior', 'explains-model': 'Model explanations', 'explains-model-debugging': 'Model debugging', 'explains-original-feature-importance': 'Original feature importance', 'explains-transformed-feature-importance': 'Transformed feature importance'}

Inject footer into Airium HTML.

Parameters:
htmlairium.Airium

Airium HTML instance.

brand_h2o_sonarstr

H2O Eval Studio branding.

brandingcommons.Branding

Branding.

static html_h2o_sonar_pitch(brand_h2o_sonar: str) str
static html_head(a, title: str = 'H2O Sonar')

Create head of HTML representation.

Parameters:
aairium.Airium

Airium HTML instance.

titlestr

Optional HTML head title.

static html_safe_str_field(field)

Inject H2O.ai SVG logo to HTML.

Parameters:
htmlairium.Airium

Airium HTML instance.

to_html(include_left_navigation: bool = True, report_style: str = 'HTML') str

Get HTML report for the interpretation.

class h2o_sonar.lib.api.interpretations.HtmlInterpretationsFormat(interpretations: Interpretations, branding: Branding = Branding.H2O_SONAR, logger: SonarLogger | None = None)

Bases: object

HTML representation of an interpretations list.

to_html(branding: Branding = Branding.EVAL_STUDIO) str

Get HTML for the interpretations list.

class h2o_sonar.lib.api.interpretations.Interpretation(common_params: CommonInterpretationParams, created: float, explainers: List[str | ExplainerToRun] | None, explainer_keywords: List[str] | None = None, key: str = '', sampler: DatasetSampler | None = None, branding: Branding = Branding.H2O_SONAR, results_formats: List[str] | None = None, progress_callback: AbstractProgressCallbackContext | None = None, logger=None, extra_params: List | None = None)

Bases: object

Interpretation is request to interpret a model using explainers. Interpretation instance serves also as execution context, however, interpretation instance does not execute explainers itself - it’s purpose is to be prescription (of what is requested) and stateful data holder. Interpretation result (referenced by the instance) is a set of explanations which were created by explainers.

Attributes:
keystr

Interpretation key.

common_paramscommons.CommonInterpretationParams

Interpretation parameters specified by the user.

explainersList

Explainers to be run (if no explainers specified, then all compatible explainers are run).

persistencepersistences.Persistence

Persistence store - file-system, in-memory, DB - where were stored interpretation results and from where it might be loaded using the persistence instance.

KEY_ALL_EXPLAINERS = 'all_explainer_ids'
KEY_CREATED = 'created'
KEY_DATASET = 'dataset'
KEY_ERROR = 'error'
KEY_EXECUTED_EXPLAINERS = 'executed_explainers'
KEY_EXPLAINERS = 'explainers'
KEY_E_PARAMS = 'explainers_parameters'
KEY_INCOMPATIBLE_EXPLAINERS = 'incompatible_explainer_ids'
KEY_INCOMPATIBLE_EXPLAINERS_DS = 'incompatible_explainers'
KEY_INSIGHTS = 'insights'
KEY_INTERPRETATION_LOCATION = 'interpretation_location'
KEY_I_KEY = 'interpretation_key'
KEY_I_PARAMS = 'interpretation_parameters'
KEY_MODEL = 'model'
KEY_MODELS = 'models'
KEY_OVERALL_RESULT = 'overall_result'
KEY_PROBLEMS = 'problems'
KEY_PROGRESS = 'progress'
KEY_PROGRESS_MESSAGE = 'progress_message'
KEY_RESULT = 'result'
KEY_RESULTS_LOCATION = 'results_location'
KEY_SCHEDULED_EXPLAINERS = 'scheduled_explainers'
KEY_STATUS = 'status'
KEY_TARGET_COL = 'target_col'
KEY_TESTSET = 'testset'
KEY_VALIDSET = 'validset'
static dict_to_digest(i_json: Dict)
get_all_explainer_ids() List[str]
get_explainer_ids_by_status(status: int) List[str]
get_explainer_insights(explainer_id: str) List
get_explainer_jobs_by_status(status: int) List[ExplainerJob]
get_explainer_problems(explainer_id: str) List
get_explainer_result(explainer_id: str) ExplainerResult | None
get_explainer_result_metadata(explainer_id: str) Dict | None
get_explanation_file_path(explanation_type: str, explanation_format: str, explainer_id: str = '', evaluator_id: str = '')

Get explanation (index) file path.

Parameters:
explainer_idstr

Explainer ID - either explainer or evaluator ID must be specified.

evaluator_idstr

Evaluator ID - either explainer or evaluator ID must be specified.

explanation_typestr

Explanation type as string.

explanation_formatstr

Explanation (MIME) format.

Returns:
str

Path to the explanation file.

get_failed_explainer_ids() List[str]
get_finished_explainer_ids() List[str]
get_incompatible_explainer_ids() List[str]
get_insights() List
get_jobs_for_evaluator_id(explainer_id: str) List[ExplainerJob]
get_jobs_for_explainer_id(explainer_id: str) List[ExplainerJob]
get_model_insights(model_name: str) List
get_model_problems(model_name: str) List
get_problems_by_severity(severity: ProblemSeverity) List
get_scheduled_explainer_ids() List[str]
get_successful_explainer_ids() List[str]
is_evaluator_failed() bool
is_evaluator_finished() bool
is_evaluator_scheduled() bool
is_evaluator_successful() bool
is_explainer_failed() bool

Indicate whether an explainer failed.

is_explainer_finished() bool

Indicate whether an explainer successfully finished or failed.

is_explainer_scheduled() bool

Indicate whether there was at least one explainer which was ran.

is_explainer_successful() bool

Indicate whether an explainer successfully finished.

load(persistence, logger=None) Interpretation

Load persistence interpretation using given persistence.

Parameters:
persistencepersistences.InterpretationPersistence

Interpretation persistence which can be used to load the interpretation from file-system, memory or DB.

logger

Logger.

Returns:
Interpretation

Interpretation instance.

static load_from_json(interpretation_json_path: str | Path) Interpretation

Load interpretation from JSON.

register_explainer_result(explainer_id: str, result: ExplainerResult)
set_progress(progress: float, message: str | None = None) float
to_dict() Dict
to_html() str
to_html_4_pdf() str
to_json(indent=None) str
to_pdf(input_path: str, output_path: str)
update_overall_result() OverallResult
validate_and_normalize_params()

Validate and check interpretation parameters.

class h2o_sonar.lib.api.interpretations.InterpretationResult(results_location: str = '', interpretation_location: str = '')

Bases: object

Result of the interpretation run.

get_evaluator_job(evaluator_job_id: str) ExplainerJob | None
get_evaluator_jobs() List[ExplainerJob]
get_explainer_job(explainer_job_id: str) ExplainerJob | None
get_explainer_jobs() List[ExplainerJob]
get_html_report_location(absolute_path: bool = True) str
get_interpretation_dir_location(absolute_path: bool = True) str
get_interpretations_html_index_location(absolute_path: bool = True)
get_json_report_location(absolute_path: bool = True) str
get_pdf_report_location(absolute_path: bool = True) str
get_progress_location(absolute_path: bool = True) str
get_results_dir_location(absolute_path: bool = True) str
make_zip_archive(zip_filename)
remove_duplicate_insights()
to_dict() Dict
to_json(indent=None) str
class h2o_sonar.lib.api.interpretations.Interpretations(interpretations_paths: List[str], persistence, branding: Branding = Branding.H2O_SONAR, logger=None)

Bases: object

Interpretations created by H2O Eval Studio in results location.

count() int
load_interpretation_meta(i_path: str, digest: bool = True) Dict
to_html(branding: Branding = Branding.EVAL_STUDIO) str
class h2o_sonar.lib.api.interpretations.OverallResult(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

Overall evaluation/interpretation result in the traffic light style.

high_severity_problems = 4
low_severity_problems = 2
medium_severity_problems = 3
no_problem = 1
class h2o_sonar.lib.api.interpretations.PdfInterpretationFormat(interpretation: Interpretation, logger: SonarLogger)

Bases: HtmlInterpretationFormat

PDF (via HTML) representation of the interpretation.

to_html_4_pdf() str

To HTML which can be used to generate PDF.

static to_pdf(input_path: str, output_path: str)

h2o_sonar.lib.api.judges module

class h2o_sonar.lib.api.judges.EvaluationJudge

Bases: ABC

Bring your own judge (BYOJ) to evaluate the quality of a model’s output.

abstract evaluate(prompts: List[str], **kwargs) List

Evaluate the quality of a model’s output.

health_check() bool

Check if the judge is healthy and available.

class h2o_sonar.lib.api.judges.LlmEvaluationJudge(llm_host_connection: ConnectionConfig, llm_model_name: str, logger: SonarLogger | None = None)

Bases: EvaluationJudge

LLM judge / interrogator for evaluating the quality of a model output.

evaluate(prompts: List[str], **extra_params) List

Evaluate the quality of a model’s output.

class h2o_sonar.lib.api.judges.RagClientEvaluationJudge(client: RagClient, llm_model_name: str, collection_id: str = '')

Bases: EvaluationJudge

RAG judge / interrogator for evaluating the quality of a model output.

evaluate(prompts: List[str], **extra_params) List

Evaluate the quality of a model’s output.

h2o_sonar.lib.api.judges.get_default_evaluation_judge(logger: SonarLogger | None = None)

Get the default evaluation judge - OpenAI GPT-4 LLM model. If the OpenAI API key is not set, then raise exception.

h2o_sonar.lib.api.judges.get_evaluation_judge_for_config(judge_config: EvaluationJudgeConfig, logger: SonarLogger | None = None)

Get an evaluation judge for the given judge config.

h2o_sonar.lib.api.judges.get_evaluation_judge_for_connection(connection: ConnectionConfig, judge_type: str, llm_model_name: str, collection_id: str = '', logger: SonarLogger | None = None)

Get an evaluation judge for the given connection and judge type.

h2o_sonar.lib.api.models module

class h2o_sonar.lib.api.models.DriverlessAiModel(model_src, target_col: str = '', used_features: List[str] | None = None, sanitization_map: SanitizationMap | None = None, dataset: ExplainableDataset | None = None, logger=None)

Bases: ExplainableModel

Explainable model which understands Driverless AI experiments and models thus it can get model metadata, ensure required sanitization and correctly construct predict method which accepts expected input and provides desired output.

ATTR_HAS_SHAPLEYS = 'has_treeshap'
ATTR_LABEL_NAMES = 'output_names'
COL_SHAPLEY_BIAS = 'contrib_bias'
EXT_MOJO = '.mojo'
PREFIX_CLASS = 'class.'
PREFIX_SHAPLEY_COLS = 'contrib_'
static is_dai_model(model_src) bool
shapley_values(X, original_features: bool = True, fast_approx: bool = False, **kwargs)

Get Shapley values.

Parameters:
Xdatatable.Frame

Dataset to calculate Shapley values.

original_featuresbool

True to get Shapley values for original features, False to get Shapley values for transformed features.

fast_approxbool

True to use fast approximation for Shapley values calculation.

Returns:
datatable.Frame

Shapley values based feature contributions.

class h2o_sonar.lib.api.models.DriverlessAiRestServerModel(model_server_url: str, target_col: str = '', used_features: List[str] | None = None, sanitization_map: SanitizationMap | None = None, dataset: ExplainableDataset | None = None)

Bases: ExplainableModel

Explainable model which represents Driverless AI experiments deployed as REST server. Driverless AI is moving from local REST Server to MLOps, therefore it is deprecated in 1.10.4 and will be removed. Anyway it is useful for existing Driverless AI deployments.

See also:

static is_dai_rest_server_model(model_src) bool
class h2o_sonar.lib.api.models.ExplainableLlmModel(connection: [<class 'str'>, <class 'h2o_sonar.config.ConnectionConfig'>], model_type: ~h2o_sonar.lib.api.models.ExplainableModelType = ExplainableModelType.unknown, name: str = '', llm_model_name: str = '', llm_model_meta: ~typing.Dict | None = None, model_cfg: ~typing.Dict | None = None, key: str = '', logger: ~h2o_sonar.loggers.SonarLogger | None = None)

Bases: object

KEY_CONNECTION = 'connection'
KEY_H2OGPTE_STATS = 'h2ogpte_perf_stats'
KEY_H2OGPTE_VISION_M = 'vision_model_name'
KEY_KEY = 'key'
KEY_LLM_MODEL_META = 'llm_model_meta'
KEY_LLM_MODEL_NAME = 'llm_model_name'
KEY_MODEL_CFG = 'model_cfg'
KEY_MODEL_TYPE = 'model_type'
KEY_NAME = 'name'
KEY_STATS_DURATION = 'duration_stats'
KEY_STATS_FAILURE = 'failure_count'
KEY_STATS_RETRY = 'retry_count'
KEY_STATS_SUCCESS = 'success_count'
KEY_STATS_TIMEOUT = 'timeout_count'
clone()
static from_dict(as_dict: Dict, connection=None) ExplainableLlmModel
to_dict()
to_json(indent=None)
class h2o_sonar.lib.api.models.ExplainableModel(model_src, predict_method: Callable, fit_method=None, model_type: ExplainableModelType = ExplainableModelType.unknown, model_meta: ExplainableModelMeta | None = None, transformed_model: TransformedFeaturesModel | None = None, label_encoder: MultiColumnLabelEncoder | None = None, logger: SonarLogger | None = None)

Bases: object

Explainable model - this class provides uniform API for ML models regardless model source, provider or implementation.

fit(X: ExplainableDataset | Frame, y=None, **kwargs)
property has_transformed_model: bool

Does explainable model provides associated model which works on the transformed features?

static load(path: str)

Load model from pickle.

Parameters:
pathstr

Model pickle path.

Returns:
ExplainableModel

Instance of the pickled model.

property meta: ExplainableModelMeta
predict(X: ExplainableDataset | Frame, **kwargs)

Score and return predictions in any format returned by the predict method.

predict_datatable(X, **kwargs) Frame

Score and return predictions as datatable frame.

predict_pandas(X, **kwargs) DataFrame

Score and return predictions as Pandas frame.

save(path: str, update: bool = False)

Pickle the model.

Parameters:
pathstr

Model pickle path.

updatebool

Delete pickled model if it already exists on given path prior saving the new model.

shapley_values(X, original_features: bool = True, **kwargs)

Get Shapley values.

Parameters:
Xdatatable.Frame

Dataset to calculate Shapley values.

original_featuresbool

True to get Shapley values for original features, False to get Shapley values for transformed features.

Returns:
datatable.Frame

Shapley values based feature contributions.

to_dict()
to_json(indent=None)
property transformed_model: TransformedFeaturesModel | None

Get associated model which works on the transformed features.

class h2o_sonar.lib.api.models.ExplainableModelHandle(connection_key: str, model_key: str, model_version: str = '')

Bases: ResourceHandle

Handle to a REMOTE model hosted by a remote system described by its connection configuration.

ExplainableModelHandle differs from the ExplainerModel in that it doesn’t provide the actual predict function, but only the metadata required to access the model.

static from_string(str_handle: str, h2o_sonar_config=None) ExplainableModelHandle

Create a new instance of the model handle from the string.

class h2o_sonar.lib.api.models.ExplainableModelMeta(description: str = '', is_constant: bool = False, is_remote: bool = False, has_shapley_values: bool = False, target_col: str = '', used_features: List | None = None, feature_importances: Dict | None = None, feature_meta: Dict | None = None, transformed_features: List | None = None, model_path: str = '', model_file_size: int = 0, sanitization_map: SanitizationMap | None = None, dataset: ExplainableDataset | None = None)

Bases: object

Explainable ML model metadata - this class provides uniform API to get ML model metadata regardless model source, provider and implementation.

Model labels (labels class field) convention:

  • Regression model: labels field to be empty list [].

  • Binomial model: labels field to be list with two strings or integers which represent the model labels; the positive class of interest to be the second list item.

  • Multinomial model: labels field to be list of strings or integers with the model classes.

default_feature_importances() Dict

Construct default (fallback) feature importances - list of features used by the model with importances 0.0 - to be used if no importances were provided by the user.

property feature_importances: Dict

Return per-feature importance set by the user.

property features_metadata: FeaturesMetadata
get_model_type() ExperimentType

Get experiment type (regression, binomial and multinomial) for model.

Returns:
DaiExperimentType:

DAI experiment type.

property has_shapley_values

Does model provides Shapley values?

property has_text_transformers: bool

Does model has text transformers?

property is_constant

Is model constant?

property num_labels: int
property positive_label_of_interest

In case of binomial classification it returns label of the positive class of interest.

to_dict()
to_json(indent=None)
property transformed_features: List
property used_features: List
class h2o_sonar.lib.api.models.ExplainableModelType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

Explainable model type (extensible via inheritance).

amazon_bedrock_rag = 14
azure_openai_llm = 13
driverless_ai = 2
driverless_ai_rest = 3
static from_connection_type(connection_type: ConnectionConfigType) ExplainableModelType
h2o3 = 4
h2ogpt = 8
h2ogpte = 6
h2ogpte_llm = 7
h2ollmops = 9
static is_llm(explainable_model_type: ExplainableModelType) bool
static is_rag(explainable_model_type: ExplainableModelType) bool
mock = 1
ollama = 10
openai_llm = 12
openai_rag = 11
scikit_learn = 5
static to_connection_type(explainable_model_type: ExplainableModelType) ConnectionConfigType | None
unknown = 15
class h2o_sonar.lib.api.models.ExplainableRagModel(connection: [<class 'str'>, <class 'h2o_sonar.config.ConnectionConfig'>], model_type: ~h2o_sonar.lib.api.models.ExplainableModelType = ExplainableModelType.unknown, name: str = '', collection_id: str = '', collection_name: str = '', llm_model_name: str = '', llm_model_meta: ~typing.Dict | None = None, documents: ~typing.List[str] | None = None, model_cfg: ~typing.Dict | None = None, key: str = '', logger: ~h2o_sonar.loggers.SonarLogger | None = None)

Bases: object

KEY_COLLECTION_ID = 'collection_id'
KEY_COLLECTION_NAME = 'collection_name'
KEY_CONNECTION = 'connection'
KEY_DOCUMENTS = 'documents'
KEY_KEY = 'key'
KEY_LLM_MODEL_META = 'llm_model_meta'
KEY_LLM_MODEL_NAME = 'llm_model_name'
KEY_MODEL_CFG = 'model_cfg'
KEY_MODEL_TYPE = 'model_type'
KEY_NAME = 'name'
clone()
static from_dict(as_dict: Dict, connection=None) ExplainableRagModel
to_dict()
class h2o_sonar.lib.api.models.H2o3Model(model_src, target_col: str = '', used_features: List[str] | None = None, sanitization_map: SanitizationMap | None = None, dataset: ExplainableDataset | None = None)

Bases: ExplainableModel

H2O-3 explainable model implementation.

static is_h2o3_model(model_src) bool
class h2o_sonar.lib.api.models.ModelApi(logger: SonarLogger | None = None)

Bases: object

Model API interface provides uniform API allowing explainers to use any model (scorer) regardless provider, implementation or runtime details.

Detects model (path to model, instance of supported model, ..) and creates instances of the Model class.

create_model(model_src, target_col: str, used_features: List[str] | None = None, model_type: ExplainableModelType = ExplainableModelType.unknown, dataset: ExplainableDataset | Frame | DataFrame | str | Path | None = None, sanitization_map: SanitizationMap | None = None, **extra_params) ExplainableModel

Create explainable model.

Parameters:
model_srcAny

Path to model on the filesystem, instance of a 3rd party model, pickle or any other source that can be used to create explainable model. Information about the model can be passed to 3rd party model implementations (like H2O-3) which can create the model.

target_colstr

Target column.

used_featuresOptional[List[str]]

Optional list of features names used by the model - it’s required in case of all models which don’t provide introspection allowing to determine used features.

model_typeExplainableModelType

Explainable model type hint which can be used to construct the model correctly.

datasetOptional[

Union[ datasets.ExplainableDataset, datatable.Frame, pandas.DataFrame, str, pathlib.Path]

] Optional training dataset.

sanitization_mapOptional[SanitizationMap]

Optional dataset sanitization map used by model.

Returns:
ExplainableModel

Explainable model.

class h2o_sonar.lib.api.models.ModelVendor

Bases: object

DAI = 'daimojo'
H2O = 'h2o'
MLI_H2O = 'hmli'
SKLEARN = 'sklearn'
class h2o_sonar.lib.api.models.OpenAiRagModel(connection: [<class 'str'>, <class 'h2o_sonar.config.ConnectionConfig'>], name: str = '', thread_id: str = '', llm_model_name: str = '', documents: ~typing.List[str] | None = None, key: str = '', logger: ~h2o_sonar.loggers.SonarLogger | None = None)

Bases: ExplainableRagModel

OpenAI RAG model - AI Assistant with File Search/Retrieval tool enabled.

class h2o_sonar.lib.api.models.PickleFileModel(model_src, predict_method: Callable, fit_method=None, model_type: ExplainableModelType = ExplainableModelType.unknown, model_meta: ExplainableModelMeta | None = None, transformed_model: TransformedFeaturesModel | None = None, label_encoder: MultiColumnLabelEncoder | None = None, logger: SonarLogger | None = None)

Bases: ExplainableModel

Pickled explainable model.

EXT_PICKLE = '.pkl'
static from_pickle(model_src, target_col: str = '', used_features: List[str] | None = None, sanitization_map: SanitizationMap | None = None, dataset: ExplainableDataset | None = None) ExplainableModel
static is_pickle_file_model(model_src) bool
class h2o_sonar.lib.api.models.ScikitLearnModel(model_src, target_col: str = '', used_features: List[str] | None = None, labels: List | None = None, sanitization_map: SanitizationMap | None = None, dataset: ExplainableDataset | Frame | DataFrame | None = None, logger=None)

Bases: ExplainableModel

Scikit-learn explainable model implementation.

static is_scikit_learn_model(model_src) bool
predict(X: ExplainableDataset | Frame, **kwargs)

Score and return predictions in any format returned by the predict method. Scikit-learn models require specific constraint which are enforced by this model specific method.

class h2o_sonar.lib.api.models.TransformedFeaturesModel(model_src, transformed_predict_method, transform_dataset_method, model_meta: ExplainableModelMeta | None = None)

Bases: object

Transformed features model is associated with ExplainableModel which works on original (raw features).

ExplainableModel may have associated transformed features model. In order to score a dataset using transformed features model, the dataset must be transformed first from the original (dataset and features) to transformed (dataset and features) using feature transformers.

static load(path: str)

Load model from pickle.

Parameters:
pathstr

Model pickle path.

Returns:
ExplainableModel

Instance of the pickled model.

property meta: ExplainableModelMeta
predict(transformed_x: ExplainableDataset | Frame, **kwargs)

Score and return predictions in any format returned by the predict method.

save(path: str, update: bool = False)

Pickle the model.

Parameters:
pathstr

Model pickle path.

updatebool

Delete pickled model if it already exists on given path prior saving the new model.

transform_dataset(X: ExplainableDataset | Frame, **kwargs) ExplainableDataset | Frame

Transform dataset from original to transformed features.

h2o_sonar.lib.api.models.guess_model_labels(dataset: ExplainableDataset | DataFrame | Frame, target_col, labels: List | None = None, model_type_str='scikit-learn', logger: SonarLogger | None = None) List[str] | None

Guess features used by the model from the dataset.

Parameters:
datasetUnion[datasets.ExplainableDataset, pandas.DataFrame, datatable.Frame]

Dataset used to train the model.

target_colstr

Target column name.

labelsOptional[List[str]]

List of model labels value to return if not possible to determine them.

loggerOptional[loggers.SonarLogger]

Logger instance.

model_type_strstr

Model type string to be used in exception messages.

Returns:
Optional[List[str]]

List of model labels. If None, then it’s not possible to determine it.

h2o_sonar.lib.api.models.guess_model_used_features(dataset: ExplainableDataset | DataFrame | Frame, target_col: str = '', model_type_str: str = 'scikit-learn') List[str]

Guess features used by the model from the dataset.

Parameters:
datasetUnion[datasets.ExplainableDataset, pandas.DataFrame, datatable.Frame]

Dataset used to train the model.

target_colstr

Target column name. If specified, the target column will be removed from the used features, otherwise it will be included.

model_type_strstr

Model type string to be used in exception messages.

Returns:
List[str]

List of features used by the model.

h2o_sonar.lib.api.persistences module

class h2o_sonar.lib.api.persistences.ExplainerPersistence(data_dir: str, username: str, explainer_id: str, explainer_job_key: str, mli_key: str = None, store_persistence: Persistence | None = None)

Bases: InterpretationPersistence

Explainer persistence.

Filesystem structure:

mli_experiment_<UUID>/ (MLI interpretation) OR explanation_<job UUID>/ (ad hoc)
    explainer_<explainer ID>_<job UUID>/
        <explanation name>/
            explanation.<extension>
            ... extra files completing main explanation file allowed in this dir
         work/
            ... directory which can be used for intermediary results persistence

Web access:

http://<HOST>:<PORT>/files/mli_experiment_<UUID>/...
http://<HOST>:<PORT>/files/explanation_<UUID>/...

Hints:

  • Explainer and explanation names are checked to contain safe characters only (alpha, num, ., _ and -). IDs are preserved (filesystem/runtime match).

  • Format identifiers (MIME types) are processed to contain safe characters only.

  • explanation.<extension> is “index file” - directory may contain also other files which form/support the explanations

  • Explainer may be executed multiple times within one MLI interpretation, therefore uniqueness is guaranteed by job UUID.

  • Datatable explanation is canonical (always present), others are optional.

Examples

# MLI interpretation
mli_experiment_4d774e62-3c67-11ea-9c7e-106530ed5ceb/

    # OOTB PD explainer
    explainer_h2oaicore.h2o_sonar.oss.byor.explainers.pd.PD_4d774e62-3c67...06530ed5ceb/
        global_partial_dependence/
            application_vnd_h2oai_datatable_jay/
                explanation.jay
            application_json/
                explanation.json
        local_individual_conditional_explanation/
            application_vnd_h2oai_datatable_jay/
                explanation.jay
            application_json/
                explanation.json
                feature_1_class_1_pd.json
                ...
                feature_n_class_n_pd.json

    # hot deployed feature importance explainer
    explainer_False_test_kernel_shap_f72edb06_...er.TestKernelShap_4d7...d5ceb/
        local_feature_importance/
            application_vnd_h2oai_datatable_jay/
                explanation.jay
            application_json/
                explanation.json

# Ad hoc explainer run
explanation_4d774e62-3c67-11ea-9c7e-106530ed5ceb/

    # OOTB feature importance explainer
    explainer_h2oaicore.h2o_sonar.oss.byor.explainers.kernel_shap.KernelShap_4d7...ceb/
        global_feature_importance/
            application_vnd_h2oai_datatable_jay/
                explanation.jay
            application_json/
                explanation.json
DIR_EXPLAINER = 'explainer_'
DIR_INSIGHTS = 'insights'
DIR_LOG = 'log'
DIR_PROBLEMS = 'problems'
DIR_WORK = 'work'
EXPLAINER_LOG_PREFIX = 'explainer_run_'
EXPLAINER_LOG_SUFFIX_ANON = '_anonymized.log'
FILE_DONE_DONE = 'EXPLAINER_DONE'
FILE_DONE_FAILED = 'EXPLAINER_FAILED'
FILE_EXPLAINER_PICKLE = 'explainer.pickle'
FILE_EXPLANATION = 'explanation'
FILE_INSIGHTS = 'insights_and_actions.json'
FILE_ON_DEMAND_EXPLANATION_SUFFIX = 'on_demand_explanation.txt'
FILE_PROBLEMS = 'problems_and_actions.json'
FILE_RESULT_DESCRIPTOR = 'result_descriptor.json'
property explainer_id: str
property explainer_job_key: str
static get_dirs_for_explainer_id(data_dir: str, username: str, mli_key: str, explainer_id: str, explainer_job_key: str | None = None) list
get_evaluator_working_file(file_name: str) str
get_explainer_ann_log_file() str
get_explainer_ann_log_path() str
get_explainer_dir() str
get_explainer_dir_archive() str
get_explainer_insights_dir() str
get_explainer_insights_file(file_name: str) str
get_explainer_log_dir() str
get_explainer_log_file() str
get_explainer_log_path() str
get_explainer_problems_dir() str
get_explainer_problems_file(file_name: str) str
get_explainer_working_dir() str
get_explainer_working_file(file_name: str) str
get_explanation_dir_path(explanation_type: str, explanation_format: str) str

Get explanation directory path.

Parameters:
explanation_typestr

Explanation identifier returned by explanation_type().

explanation_formatstr

Format MIME type.

Returns:
str

Path to the directory with the explanation.

get_explanation_file_path(explanation_type: str, explanation_format: str, explanation_file: str = None) str
get_explanation_meta_path(explanation_type: str, explanation_format: str) str
static get_key_for_explainer_dir(explainer_dir_path: str) str | None
static get_locators_for_explainer_id(data_dir: str, username: str, mli_key: str, explainer_id: str, explainer_job_key: str | None = None) List[Tuple[str, str]] | None
get_relative_path(path: str, base_entity: str = 'interpretation')
get_result_descriptor_file_path() str
load_insights() List[Dict]

Load insights.

load_problems() List[Dict]

Load model problems.

load_result_descriptor() Dict
static make_dir(target_dir)
make_explainer_dir()
make_explainer_insights_dir()
make_explainer_log_dir()
make_explainer_problems_dir()
make_explainer_sandbox(dai_params=None)

Create explainer working dir and log directories as well as common files.

Parameters:
dai_params: CommonDaiExplainerParameters

Common explainer parameters to be stored in the root of the interpretation (if it already doesn’t exist).

make_explainer_working_dir()
static makedirs(path: str, exist_ok=True)

Avoid some inefficiency in os.makedirs().

Parameters:
pathstr

Path to directory/ies to create.

exist_okbool

Fail if directory exists.

Returns:
str

Path to newly create directory.

resolve_mli_path(mli_key: str, username: str)

Resolve MLI interpretation directory as it should be in the directory with username in path, but potentially it will be possible to create it in directory without it using config.per_user_directories (or can be migrated from 1.8.x).

rm_explainer_dir()
save_insights(insights: List[Dict])

Save insights.

static save_json(data: dict, path: str)
save_problems(problems: List[Dict])

Save model problems.

property username: str
class h2o_sonar.lib.api.persistences.FilesystemPersistence(base_path: str | Path | None = None, logger=None)

Bases: Persistence

File-system store persistence.

copy_file(from_key: str | Path, to_key: str | Path)
delete(key: str | Path) bool
delete_dir_contents(key: str | Path)
delete_file(key: str | Path) bool
delete_tree(key: str | Path)
exists(key: str | Path) bool
static flush_dir_for_file(file_path: str) bool
static get_default_cwl()

Get default current working location when no specified by the user.

getcwl()

Get current working location - directory, memory key or DB locator.

is_dir(key: str | Path) bool
is_file(key: str | Path) bool
list_dir(key: str | Path) List
list_files_by_wildcard(key: str | Path, wildcard: str) List
load(key: str | Path, data_type: PersistenceDataType | None = None) Any
load_json(key: str | Path) Dict | List
make_dir(key: str | Path)
make_dir_zip_archive(src_key: str | ~pathlib.Path, zip_key: str | ~pathlib.Path, file_filter=<function FilesystemPersistence.<lambda>>)

Create ZIP archive of given directory.

Parameters:
src_key: src

Absolute path to directory to be archived.

zip_key: src

ZIP archive path.

file_filter:

Function to be used for filtering - it gets relative path from the src_dir_path as parameter and returns boolean indicating whether to keep (False) or filter file out (True).

save(key: str | Path, data, data_type: PersistenceDataType = PersistenceDataType.text)
static save_json(key: str | Path, data: Dict | List, indent: int = 4, save_explainer_params=False) Dict
touch(key: str | Path)
property type
update(key: str | Path, data, data_type: PersistenceDataType = PersistenceDataType.binary)
class h2o_sonar.lib.api.persistences.InMemoryPersistence

Bases: Persistence

In-memory key-based store persistence.

DIR = <h2o_sonar.lib.api.persistences.InMemoryPersistence.Directory object>
class Directory

Bases: object

copy_file(from_key: str | Path, to_key: str | Path)
delete(key: str | Path) bool
delete_dir_contents(key: str | Path, logger=None)
delete_file(key: str | Path) bool
delete_tree(key: str | Path)
exists(key: str | Path) bool
static get_default_cwl()

Get default current working location when no specified by the user.

getcwl()

Get current working location - directory, memory key or DB locator.

is_dir(key: str | Path) bool
is_file(key: str | Path) bool
list_dir(key: str | Path) List
list_files_by_wildcard(key: str | Path, wildcard: str) List
load(key: str | Path, data_type: PersistenceDataType = PersistenceDataType.binary) Any
load_json(key: str | Path) dict
make_dir(key: str | Path)
make_dir_zip_archive(src_key: str | ~pathlib.Path, zip_key: str | ~pathlib.Path, file_filter=<function InMemoryPersistence.<lambda>>)

Make ZIP archive of given source directory.

Parameters:
src_keystr

Source key (directory path).

zip_keystr

ZIP key (ZIP file path).

file_filterCallable

File filter.

save(key: str | Path, data, data_type: PersistenceDataType = PersistenceDataType.binary)
save_json(key: str | Path, data: Dict, indent: int = 4, save_explainer_params=False)
touch(key: str | Path)
property type
class h2o_sonar.lib.api.persistences.InterpretationPersistence(data_dir: str, username: str, mli_key: str = None, ad_hoc_explainer_job_key: str = None, store_persistence: Persistence | None = None, logger=None)

Bases: object

Interpretation persistence - class used to manage interpretation files and directories within base data directory (or equivalent on particular store type).

Once extended to actual writing/reading of files it should also simplify store switch - like remote/multinode/distributed.

Filesystem structure:

<base data dir>/

mli_experiment_<UUID>/ … MLI interpretation (bulk explainers run) explanation_<job UUID>/ .. ad-hoc

Examples

# MLI interpretation mli_experiment_4d774e62-3c67-11ea-9c7e-106530ed5ceb/

# Ad hoc explainer run explanation_4d774e62-3c67-11ea-9c7e-106530ed5ceb/

DIR_AD_HOC_EXPLANATION = 'explanation_'
DIR_AUTOML_EXPERIMENT = 'h2oai_experiment_'
DIR_MLI_EXPERIMENT = 'mli_experiment_'
DIR_MLI_TS_EXPERIMENT = 'mli_experiment_timeseries_'
FILE_COMMON_PARAMS = 'explainers_common_parameters.json'
FILE_EXPERIMENT_ID_COLS = 'experiment_id_columns.json'
FILE_EXPERIMENT_IMAGE = 'IS_IMAGE'
FILE_EXPERIMENT_TS = 'IS_TIMESERIES'
FILE_H2O_SONAR_HTML = 'h2o-sonar.html'
FILE_INTERPRETATION_HTML = 'interpretation.html'
FILE_INTERPRETATION_HTML_4_PDF = 'interpretation-detailed.html'
FILE_INTERPRETATION_JSON = 'interpretation.json'
FILE_INTERPRETATION_PDF = 'interpretation-detailed.pdf'
FILE_MLI_EXPERIMENT_LOG = 'mli_experiment_log_'
FILE_PREFIX_DATASET = 'dataset_'
FILE_PROGRESS_JSON = 'progress.json'
KEY_E_PARAMS = 'explainers_parameters'
KEY_RESULT = 'result'
property ad_hoc_job_key: str
property base_dir: str
create_dataset_path() str
property data_dir: str
static get_ad_hoc_mli_dir_name(data_dir: str, username: str, explainer_job_key: str)
static get_async_log_file_name(mli_key: str)
static get_base_dir(data_dir: str, dir_name: str)
get_base_dir_file(file_name: str) str
get_experiment_id_cols_path() str
get_html_4_pdf_path() str
get_html_path() str
get_json_path() str
static get_mli_dir_name(data_dir: str, username: str, mli_key: str)
get_pdf_path() str
is_common_params()
static is_safe_name(name: str) bool

Check whether given nameis formed by alphanumeric chars (and therefore filesystem safe).

static list_interpretations(data_dir: str, username: str, store_persistence: Persistence, paths: bool = True)

List interpretations.

Parameters:
data_dirstr

H2O Eval Studio results directory.

usernamestr

Username.

store_persistencePersistence

Handle to the store persistence.

pathsbool

Return list of paths (e.g. file-systems) if True (default), else return interpretation UUIDs.

load_common_params(patch_sequential_execution: bool | None = None) CommonInterpretationParams

Load CommonExplainerParameters entity from interpretation root dir.

load_explainers_params(explainer_id: str = '') Dict

Load explainers parameters dictionary from interpretation JSon.

load_is_image_experiment()
load_is_timeseries_experiment()
load_message_entity(path: str) dict
make_base_dir()
make_dir_zip_archive(src_dir_path: str | ~pathlib.Path, zip_path: str | ~pathlib.Path, file_filter=<function InterpretationPersistence.<lambda>>)
make_interpretation_sandbox()

Create interpretation directory as well as common files.

make_tmp_dir()
property mli_key: str
resolve_model_path(model_path: str)

Resolve fitted model path as there are several combinations of DAI configuration and experiment creation (path):

  • fitted model path MAY have <username> prefix, based on whether it was created in 1.8.x version or with config.per_user_directories=True/False

  • current user directory may be either data directory, or may have username in path based on config.per_user_directories configuration item value

Parameters:
model_path: str

(Un)fitted model relative path as present on model entity as model.fitted_model_path.

rm_base_dir(logger=None)
rm_dir(dir_path)
save_as_html(interpretation_html: str)

Save interpretation as HTML.

save_as_json(interpretation_dict: dict)

Save interpretation as JSon.

save_as_pdf(interpretation)

Save interpretation as PDF.

save_common_params(entity: CommonInterpretationParams)

Save CommonExplainerParameters entity to interpretation root dir.

save_experiment_type_hints(is_timeseries: bool = False, is_image: bool = False)

Write hint (in backward compatible manner) indicating experiment type (like timeseries or image) to interpretation directory (IID is default).

Parameters:
is_timeseriesbool

Write time series hint.

is_imagebool

Write image hint.

save_message_entity(entity, path: str)
property tmp_dir: str
static to_alphanum_name(name: str)

Convert given name to filesystem save string formed by alphanumeric characters.

static to_server_file_path(data_dir: str, path: str)

Return bare server path without data directory

static to_server_path(data_dir: str, path: str)

Return bare server path without data directory

property user_dir: str
class h2o_sonar.lib.api.persistences.JsonPersistableExplanations

Bases: ABC

Interface for classes implementing explanations JSon file persistence.

Examples

 ice = ICE("Step by step ICE loading")
 ice.load_json("cache/ice.json")

 es = ice.explanations()

 es = ICE("On the fly").explain(
   ["Feature"],
   X,
   predict_method=scorer
).save_json()
class PandasJSonEncoder(*, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, indent=None, separators=None, default=None)

Bases: JSONEncoder

Custom Pandas DataFrames serializer.

default(o)

Implement this method in a subclass such that it returns a serializable object for o, or calls the base implementation (to raise a TypeError).

For example, to support arbitrary iterators, you could implement default like this:

def default(self, o):
    try:
        iterable = iter(o)
    except TypeError:
        pass
    else:
        return list(iterable)
    # Let the base class default method raise the TypeError
    return super().default(o)
static check_explanations_serializability(explanations)
property default_json_file_name
abstract load_json(path=None)

Load explanations from JSon file.

Parameters:
path: str

Local file path from where to loadJson explanations. If path isn’t specified, then explanations are loaded from explanations.json in the current directory.

Returns:
dict

Explanations deserialized from JSon.

abstract save_json(path=None)

Save explanations as JSon file.

Parameters:
pathstr

Local file path where to store explanations. If path isn’t specified, then explanations are stored to ‘explanations.json’ in the current directory

class h2o_sonar.lib.api.persistences.NanEncoder(*, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, indent=None, separators=None, default=None)

Bases: JSONEncoder

encode(obj)

Return a JSON string representation of a Python data structure.

>>> from json.encoder import JSONEncoder
>>> JSONEncoder().encode({"foo": ["bar", "baz"]})
'{"foo": ["bar", "baz"]}'
class h2o_sonar.lib.api.persistences.Persistence(logger=None)

Bases: ABC

Key/value-based persistence API interface provides uniform store-agnostic API allowing explainers to use chosen store type regardless container runtime or technology to store explainer results (explanations). It aims to enable writing identical code regardless explanation data is stored/loaded to/from filesystem, memory or DB.

Interface and implementations are based on opaque string keys (which might be filesystem paths, dictionary keys or NoSQL database keys) and data types (text, binary, …). On implementation initialization is be set base in-memory reference, filesystem path or DB connection information.

There are the following special types of data which are written to filesystem (network or memory) regardless chosen store type:

  • temporary files (explainer work/ directory)

  • log filed (explainer log/ directory)

Therefore, an explainer sandbox is always created on the file-system, but it might be located in user specified directory (in case of file-system store) or system temp directory (in case of in-memory or database store).

The persistence API is written with security (barriers) and performance in mind.

PREFIX_INTERNAL_STORE = 'h2o_sonar-of-'
static check_key(key: str | Path) str

Check and fix key.

copy_file(from_key: str | Path, to_key: str | Path)
delete(key: str | Path) bool
delete_dir_contents(key: str | Path, logger=None)
delete_file(key: str | Path) bool
static delete_temp_dir(tmp_dir_path: str | Path)
delete_tree(key: str | Path) bool
exists(key: str | Path) bool
static flush_dir_for_file(file_path: str) bool
getcwl()

Get current working location - directory, memory key or DB locator.

static is_binary_file(key: str) bool
is_dir(key: str | Path) bool
is_dir_or_file(key: str | Path) bool
is_file(key: str | Path) bool
static key_folder(key: str | Path) str

Get (parent) folder key for given key (equivalent of os.path.dirname()).

list_dir(key: str | Path) List
list_files_by_wildcard(key: str | Path, wildcard: str) List
load(key: str | Path, data_type: PersistenceDataType = PersistenceDataType.binary) Any
load_json(key: str | Path) dict
make_dir(key: str | Path)
make_dir_zip_archive(src_key: str, zip_key: str, file_filter=<function Persistence.<lambda>>)

Make ZIP archive of given source directory.

Parameters:
src_keystr

Source key (directory path).

zip_keystr

ZIP key (ZIP file path).

file_filterCallable

File filter.

static make_key(*args) str

Assemble key (path) from the string arguments given to this function (equivalent of os.path.join()).

static make_temp_dir() str
static make_temp_file(file_name: str) str
path_to_internal(path: str | Path) str
static safe_name(key: str) str

Encode name to be store (file-sytem) safe (can be decoded if needed).

save(key: str | Path, data, data_type: PersistenceDataType = PersistenceDataType.binary)
touch(key: str | Path)
property type
update(key: str | Path, data, data_type: PersistenceDataType = PersistenceDataType.binary)
class h2o_sonar.lib.api.persistences.PersistenceApi(logger: SonarLogger | None = None)

Bases: ABC

Factory which creates Persistence implementations for various store types and purposes which are available in specific runtime and/or container(s).

create_explainer_persistence(store_persistence: Persistence, base_path: str | Path, interpretation_key: str, explainer_id: str, explainer_job_key: str, username: str = '') ExplainerPersistence

Create explainer persistence atop given store persistence e.g. to store explainer data to database.

create_interpretation_persistence(store_persistence: Persistence, base_path: str | Path, interpretation_key: str, username: str = '') InterpretationPersistence

Create interpretation persistence atop given store persistence e.g. to store interpretations in-memory.

create_persistence(persistence_type: PersistenceType = PersistenceType.file_system, base_path: str = '', connection_string: str = '') InMemoryPersistence | FilesystemPersistence

Create persistence of given store type - file-system, in-memory or DB. Default store persistence is file-system persistence with base in the current directory.

Parameters:
persistence_typePersistenceType

Type of the persistence to create.

base_pathstr

Optional root path of the persistence on the host store (where meaningful e.g. file-system).

connection_stringstr

Option connection string (where meaningful e.g. database).

Returns:
Any

Persistence to load/store container and explainer artifacts.

get_cwl(persistence_type: PersistenceType = PersistenceType.file_system)
class h2o_sonar.lib.api.persistences.PersistenceDataType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

binary = 1
datatable = 2
json = 4
text = 3
class h2o_sonar.lib.api.persistences.PersistenceType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

database = 3
file_system = 1
in_memory = 2
class h2o_sonar.lib.api.persistences.RobustEncoder(*, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, indent=None, separators=None, default=None)

Bases: JSONEncoder

default(obj)

Implement this method in a subclass such that it returns a serializable object for o, or calls the base implementation (to raise a TypeError).

For example, to support arbitrary iterators, you could implement default like this:

def default(self, o):
    try:
        iterable = iter(o)
    except TypeError:
        pass
    else:
        return list(iterable)
    # Let the base class default method raise the TypeError
    return super().default(o)

h2o_sonar.lib.api.plots module

class h2o_sonar.lib.api.plots.Data3dPlot

Bases: object

Plot 3D data:

  • heatmap

  • 3D surface plot

  • 3D contour plot

PLOT_TYPES = ['heatmap', 'contour-3d', 'surface-3d']
PLOT_TYPE_CONTOUR = 'contour-3d'
PLOT_TYPE_HEATMAP = 'heatmap'
PLOT_TYPE_SURFACE = 'surface-3d'
static plot(x_axis_labels: List, y_axis_labels: List, heatmap_data: Frame, chart_title: str = '', x_axis_label: str = '', y_axis_label: str = '', plot_type: str = 'heatmap', color_map: str = 'autumn', figsize=(12, 10), dpi=120, plot_file_path: str = '', logger=None, log_name: str = '')

Heatmap plot.

Parameters:
x_axis_labelsList

Horizontal axes labels.

y_axis_labelsList

Vertical axes labels.

heatmap_datadatatable.Frame

Datable frame with heatmap data (column names don’t matter, only data are relevant).

chart_titlestr

Chart title.

x_axis_labelstr

Horizontal axis label.

y_axis_labelstr

Vertical axis label.

plot_typestr

Plot type, one of PLOT_TYPES.

color_mapstr

Matplotlib color map name.

figsizetuple

Figure size.

dpiint

Dots per inch.

plot_file_pathstr

Path to save the plot to.

logger

Logger instance.

log_namestr

Name of the logger.

class h2o_sonar.lib.api.plots.ScatterFeatImpPlot

Bases: object

Scatter plot feature importance representation is based on chart from:

https://github.com/slundberg/shap

static plot(contributions, frame, alpha: float = 1.0, colormap: str | None = None, figsize=(12, 12), jitter: float = 0.35, chart_title: str = 'Feature importance summary plot', x_label: str = 'Value', y_label: str = 'Feature', thermometer_label: str = 'Normalized feature value', columns=None, top_n_features: int = 20, samples: int | None = None, colorize_factors: bool = True, drop_zero_contribs=True, hard_asserts=False, logger=None) Figure

Feature importance summary plot.

Summary plot shows contribution of features for each instance. The sum of the feature contributions and the bias term is equal to the raw prediction of the model, i.e., prediction before applying inverse link function.

Parameters:
contributions

Pandas contributions frame with coefficients. Frame column names to be (sanitized) feature names, rows to correspond to dataset rows, cells to be coefficients.

frame

Pandas dataset frame with values. Frame column names to be (sanitized) feature names, rows to correspond to dataset rows, cells to be values.

columns

Either a list of columns or column indices to show. If specified parameter top_n_features will be ignored.

top_n_featuresint

A number of columns to pick using variable importance (where applicable). Set to -1 to show all features.

samples

Maximum number of observations to use; if lower than number of rows in the frame, take a random sample.

colorize_factors

If True, use colors from the colormap to colorize the factors; otherwise all levels will have same color.

alpha

Transparency of the points.

colormap

Colormap to use instead of the default blue to red colormap.

figsize

Figure size - passed directly to matplotlib.

jitter

Amount of jitter used to show the point density.

chart_titlestr

Chart title.

x_labelstr

Chart x-axis label.

y_labelstr

Chart y-axis label.

thermometer_labelstr

Chart thermometer label.

drop_zero_contribs

Whether to drop features that have zero contribution. Features that are not used in the final model will have zero contribution.

hard_assertsbool

Used in testing to raise exception in try except statements.

logger

Optional logger object.

Returns:
pyplot.Figure:

A matplotlib figure object which can be saved or displayed.

h2o_sonar.lib.api.plots.safe_plot_names(column_list: List[str]) List

Return a list of column names that exclude problematic special characters for matplotlib plotting functions.

Parameters:
column_list: List[str]

List of column names.

Returns:
List:

List with column names that are safe to plot.

h2o_sonar.lib.api.problems module

class h2o_sonar.lib.api.problems.AVIDProblemCode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: ProblemCode

Problem codes from AVID https://docs.avidml.org/taxonomy/effect-sep-view

E0100_BIAS = ('E0100', 'Concerns of algorithms propagating societal bias')
E0200_EXPLAINABILITY = ('E0200', 'Ability to explain decisions made by AI')
E0300_TOXICITY = ('E0300', 'Perpetuating/causing/being affected by negative user actions')
E0400_MISINFORMATION = ('E0400', 'Perpetuating/causing the spread of falsehoods')
P0100_DATA = ('P0100', 'Problems arising due to faults in the data pipeline')
P0200_MODEL = ('P0200', 'Ability for the AI to perform as intended')
P0300_PRIVACY = ('P0300', 'Protect leakage of user information as required by rules and regulations')
P0400_SAFETY = ('P0400', 'Minimizing maximum downstream harms')
S0400_MODEL_BYPASS = ('S0400', 'Intentionally try to make a model perform poorly')
S0500_EXFILTRATION = ('S0500', 'Directly or indirectly exfiltrate ML artifacts')
S0600_DATA_POISONING = ('S0600', 'Usage of poisoned data in the ML pipeline')
class h2o_sonar.lib.api.problems.AVIDProblemCodeType(code, description)

Bases: tuple

code

Alias for field number 0

description

Alias for field number 1

class h2o_sonar.lib.api.problems.ProblemAndAction(description: str, description_html: Airium | None = None, severity: ProblemSeverity = ProblemSeverity.medium, problem_type: str = 'problem', problem_attrs: Dict = None, actions_description: str = '', actions_codes: List[str] = None, explainer_id: str = '', explainer_name: str = '', evaluator_id: str = '', evaluator_name: str = '', explanation_type: str = '', explanation_name: str = '', explanation_mime: str = '', resources: List[str] = None, problem_code: ProblemCode = None)

Bases: AbcProblemInsight

Instance of this class represents a problem of the interpreted model identified by an explainer. Apart from the problem description, the entry provides also problem severity, problem category (brief characteristic), problem attributes (dictionary of machine processable data describing the problem which might be used for instance as an input to actions), textual description of suggested actions to mitigate the problem (actionability), explainer which detected the problem, and references to resources (explanations, document URLs, …).

KEY_PROBLEM_ATTRS = 'problem_attrs'
KEY_PROBLEM_TYPE = 'problem_type'
KEY_SEVERITY = 'severity'
static from_dict(problem_dict: Dict) ProblemAndAction
to_dict() Dict
class h2o_sonar.lib.api.problems.ProblemCode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

class h2o_sonar.lib.api.problems.ProblemSeverity(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

static compare(severity1, severity2) int
high = 1
low = 3
medium = 2
h2o_sonar.lib.api.problems.problems_for_bool_leaderboard(evaluator, leaderboard, primary_metric_meta: MetricMeta, metric_threshold: float | None = None, severity: ProblemSeverity | None = None, problem_type: str = 'accuracy', problem_code: ProblemCode = None, explanation_type: str = '', explanation_name: str = '', explanation_mime: str = '', actions_description: str = '', extra_description_actions: str = '') None

Generate problems based on the heatmap leaderboard analytics.

For models whose average Passes metric score is below the threshold, a problem is created with the description of the problem, severity, problem type, problem attributes, actions description,

h2o_sonar.lib.api.problems.problems_for_cls_leaderboard(evaluator, leaderboard, metric_threshold: float | None = None, primary_metric_meta=None, severity: ProblemSeverity | None = None, problem_type: str = 'classification', explanation_type: str = '', explanation_name: str = '', explanation_mime: str = '', actions_description: str = '', extra_description_actions: str = '', problem_code: ProblemCode = None) None

Generate problems based on the classification leaderboard analytics.

h2o_sonar.lib.api.problems.problems_for_heat_leaderboard(evaluator, leaderboard, metric_threshold: float | None = None, primary_metric_meta=None, severity: ProblemSeverity | None = None, problem_type: str = 'accuracy', explanation_type: str = '', explanation_name: str = '', explanation_mime: str = '', actions_description: str = '', extra_description_actions: str = '', problem_code: ProblemCode = None) None

Generate problems based on the heatmap leaderboard analytics.

h2o_sonar.lib.api.results module

class h2o_sonar.lib.api.results.Data3dResult(persistence: ExplainerPersistence, explainer_id: str, h2o_sonar_config=None, logger=None)

Bases: ExplainerResult

data(*, feature_names: str = '') Dict
classmethod help() Dict[str, List[Dict[str, str | bool]]]
plot(*, feature_names: str = '', plot_type: str = 'surface-3d', title: str = '')
class h2o_sonar.lib.api.results.DiaResult(persistence: ExplainerPersistence, explainer_id: str, dia_entry_constants: DiaEntryConstant, h2o_sonar_config=None, logger=None)

Bases: ExplainerResult

class DiaCategory(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

DIA_CATEGORY_CM = 'cm'
DIA_CATEGORY_DISPARITY = 'disparity'
DIA_CATEGORY_ME_SMD = 'me_smd'
DIA_CATEGORY_PARITY = 'parity'
DIA_METRICS = 'metrics'
class DiaEntryConstant(dia_entity_file: str, param_feature_summaries: str, param_feature_name: str, param_name: str, param_features: str, ref_levels: str)

Bases: object

data(*, feature_name: str, category: DiaCategory | str, ref_level: int | str | None = None) Frame
classmethod help() Dict[str, List[Dict[str, str | bool]]]
params() Dict
plot(*, feature_name: str, metrics_of_interest: str | List[str] | None = None, file_path: str = '') List[str]
class h2o_sonar.lib.api.results.DtResult(persistence: ExplainerPersistence, explainer_id: str, explainer_name: str, h2o_sonar_config=None, highlight_highest_residual: bool = False, logger=None)

Bases: ExplainerResult

data()
classmethod help() Dict[str, List[Dict[str, str | bool]]]
plot(*, clazz: str | None = None)
class h2o_sonar.lib.api.results.FeatureImportanceResult(persistence: ~h2o_sonar.lib.api.persistences.ExplainerPersistence, explainer_id: str = '', chart_title: str = 'Global Feature Importance', chart_x_axis: str = 'feature', chart_y_axis: str = 'importance', h2o_sonar_config=None, logger=None, explanation_format: ~typing.Type[~h2o_sonar.lib.api.formats.ExplanationFormat] = <class 'h2o_sonar.lib.api.formats.GlobalFeatImpJSonFormat'>, explanation: ~typing.Type[~h2o_sonar.lib.api.explanations.Explanation] = <class 'h2o_sonar.lib.api.explanations.GlobalFeatImpExplanation'>)

Bases: ExplainerResult

data(*, clazz: str | None = None) Frame
classmethod help() Dict[str, List[Dict[str, str | bool]]]
plot(*, clazz: str | None = None, file_path: str = '')
class h2o_sonar.lib.api.results.LeaderboardResult(persistence: ~h2o_sonar.lib.api.persistences.ExplainerPersistence, explainer_id: str = '', chart_title: str = 'Leaderboard', chart_x_axis: str = 'metrics', chart_y_axis: str = 'models', h2o_sonar_config=None, logger=None, explanation_format: ~typing.Type[~h2o_sonar.lib.api.formats.ExplanationFormat] = <class 'h2o_sonar.lib.api.formats.LlmHeatmapLeaderboardJSonFormat'>, explanation: ~typing.Type[~h2o_sonar.lib.api.explanations.Explanation] = <class 'h2o_sonar.lib.api.explanations.LlmHeatmapLeaderboardExplanation'>)

Bases: ExplainerResult

Make (heatmap-based, bool-based, …) leaderboard evaluator result.

data(*, metric_id: str | None = None) Dict
classmethod help() Dict[str, List[Dict[str, str | bool]]]
plot(*, metric_id: str | None = None, file_path: str = '')
class h2o_sonar.lib.api.results.PdResult(persistence: ExplainerPersistence, explainer_id: str, h2o_sonar_config=None, logger=None)

Bases: ExplainerResult

data(*, feature_name: str, clazz: str | None = None) Frame
classmethod help() Dict[str, List[Dict[str, str | bool]]]
plot(*, feature_name, clazz=None, override_feature_type: Literal['categorical', 'numeric'] | None = None, file_path: str = '', is_problematic: bool = False)
exception h2o_sonar.lib.api.results.ResultValueError

Bases: ValueError

class h2o_sonar.lib.api.results.SummaryShapResult(persistence: ExplainerPersistence, explainer_id: str, raw_contribs_idx_filename: str, h2o_sonar_config=None, logger=None)

Bases: ExplainerResult

data(*, feature_names: str | List[str] | None = None, clazz: str | None = None) Frame
classmethod help() Dict[str, List[Dict[str, str | bool]]]
plot(*, feature_names: str | List[str] | None = None, clazz: str | None = None)
class h2o_sonar.lib.api.results.TemplateResult(persistence: ExplainerPersistence, explainer_id: str, explainer_name: str, logger=None)

Bases: ExplainerResult

data(**kwargs) Frame
plot(**kwargs)
h2o_sonar.lib.api.results.list_in_english(items: List[str], quote_item=True) str
h2o_sonar.lib.api.results.matplotlib_closing(show: bool)

Module contents