h2o_sonar.lib.api package

Submodules

h2o_sonar.lib.api.commons module

class h2o_sonar.lib.api.commons.Branding(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

Branding.

EVAL_STUDIO = 2

H2O_SONAR = 1

class h2o_sonar.lib.api.commons.CommonInterpretationParams(model, models, dataset, target_col: str, validset='', testset='', use_raw_features: bool = '', weight_col: str = '', prediction_col: str = '', drop_cols: List | None = [], sample_num_rows: int | None = 0, results_location: str = '', used_features: List | None = None, extra_params: List | None = None)

Bases: object

PARAM_DATASET = <h2o_sonar.lib.api.commons.Param object>

PARAM_DROP_COLS = <h2o_sonar.lib.api.commons.Param object>

PARAM_MODEL = <h2o_sonar.lib.api.commons.Param object>

PARAM_MODELS = <h2o_sonar.lib.api.commons.Param object>

PARAM_PREDICTION_COL = <h2o_sonar.lib.api.commons.Param object>

PARAM_RESULTS_LOCATION = <h2o_sonar.lib.api.commons.Param object>

PARAM_SAMPLE_NUM_ROWS = <h2o_sonar.lib.api.commons.Param object>

PARAM_TARGET_COL = <h2o_sonar.lib.api.commons.Param object>

PARAM_TESTSET = <h2o_sonar.lib.api.commons.Param object>

PARAM_USED_FEATURES = <h2o_sonar.lib.api.commons.Param object>

PARAM_USE_RAW_FEATURES = <h2o_sonar.lib.api.commons.Param object>

PARAM_VALIDSET = <h2o_sonar.lib.api.commons.Param object>

PARAM_WEIGHT_COL = <h2o_sonar.lib.api.commons.Param object>

clone() → CommonInterpretationParams

describe_config_item(config_item_name: str) → Param | None

describe_config_items() → Dict[str, Param]

dump() → dict

static load(d: dict) → CommonInterpretationParams

to_dict() → dict: Safe string-friendly serialization to dictionary.

class h2o_sonar.lib.api.commons.ConfigItem(name: str = '', description: str = '', comment: str = '', type: str = '', val: Any = '', predefined: List | None = None, tags: List | None = None, min_: float = 0.0, max_: float = 0.0, category: str = '')

Bases: object

KEY_CATEGORY = 'category'

KEY_COMMENT = 'comment'

KEY_DESCRIPTION = 'description'

KEY_MAX = 'max_'

KEY_MIN = 'min_'

KEY_NAME = 'name'

KEY_PREDEFINED = 'predefined'

KEY_TAGS = 'tags'

KEY_TYPE = 'type'

KEY_VAL = 'val'

clone() → ConfigItem

dump() → dict

static load(d: dict) → ConfigItem

class h2o_sonar.lib.api.commons.EvaluatorParamType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

Evaluators parameters.

bool = ExplainerParamType.bool

customlist = ExplainerParamType.customlist

describe()

dict = ExplainerParamType.dict

float = ExplainerParamType.float

int = ExplainerParamType.int

list = ExplainerParamType.list

multilist = ExplainerParamType.multilist

str = ExplainerParamType.str

class h2o_sonar.lib.api.commons.EvaluatorToRun(evaluator_id: str, params: str | Dict = None, extra_params: List | None = None): Bases: ExplainerToRun

class h2o_sonar.lib.api.commons.ExperimentType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

Experiment types.

binomial = 2

multinomial = 3

regression = 1

class h2o_sonar.lib.api.commons.ExplainerFilter

Bases: object

List explainers filters

BLUEPRINT_INPUT_NAME = 'blueprint_input_name'

EXPLAINER_ID = 'explainer_id'

IID: str = 'iid'

IMAGE: str = 'image'

REQUIRES_PREDICT: str = 'requires_predict_method'

TIME_SERIES: str = 'time_series'

UNSUPERVISED: str = 'unsupervised'

class h2o_sonar.lib.api.commons.ExplainerJobStatus(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

ABORTED_BY_RESTART = 4

ABORTED_BY_USER = 3

CANCELLED = 1

FAILED = 2

FINISHED = 0

IN_PROGRESS = -1

RUNNING = -1

SCHEDULED = -3

SUCCESS = 0

SYNCING = -4

TIMED_OUT = 5

UNKNOWN = -2

static from_int(status_code: int) → ExplainerJobStatus

static is_job_failed(status: ExplainerJobStatus) → bool

static is_job_finished(status: ExplainerJobStatus) → bool

static is_job_running(status: ExplainerJobStatus) → bool

to_string(status_code: int)

class h2o_sonar.lib.api.commons.ExplainerParamKey

Bases: object

KEY_ALL_EXPLAINERS_PARAMS = 'explainers_params'

KEY_DATASET = 'dataset'

KEY_DESCR_PATH = 'result_descriptor_path'

KEY_EXPERIMENT_TYPE = 'experiment_type'

KEY_E_DEPS = 'explainer_dependencies'

KEY_E_ID = 'explainer_id'

KEY_E_JOB_KEY = 'explainer_job_key'

KEY_E_PARAMS = 'explainer_params'

KEY_FEATURES_META = 'features_metadata'

KEY_I_DATA_PATH = 'interpretation_data_path'

KEY_KWARGS = 'pk'

KEY_LEGACY_I_PARAMS = 'legacy_i_params'

KEY_MODEL = 'model'

KEY_MODEL_TYPE = 'model_type'

KEY_ON_DEMAND = 'on_demand_explanation'

KEY_ON_DEMAND_MLI_KEY = 'on_demand_mli_key'

KEY_ON_DEMAND_PARAMS = 'on_demand_params'

KEY_PARAMS = 'params'

KEY_RUN_KEY = 'run_key'

KEY_TESTSET = 'testset'

KEY_USER = 'user'

KEY_VALIDSET = 'validset'

KEY_WORKER_NAME = 'worker_name'

class h2o_sonar.lib.api.commons.ExplainerParamType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

Explainer parameters.

bool = 1

customlist = 7

dict = 8

float = 3

int = 2

list = 5

multilist = 6

str = 4

class h2o_sonar.lib.api.commons.ExplainerToRun(explainer_id: str, params: str | Dict = None, extra_params: List | None = None)

Bases: object

Parametrized explainer (to run) - ID and explainer parameters (dictionary, JSon string or any format explainer is able to process).

clone() → ExplainerToRun

dump() → dict

static load(d: dict) → ExplainerToRun

class h2o_sonar.lib.api.commons.ExplanationScope(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

Explanation scope.

global_scope = 2

local_scope = 1

class h2o_sonar.lib.api.commons.FilterEntry(filter_by: str = '', value=None)

Bases: object

KEY_FILTER_BY = 'filter_by'

KEY_VALUE = 'value'

clone() → FilterEntry

dump() → dict

static load(d: dict) → FilterEntry

class h2o_sonar.lib.api.commons.InterpretationParamType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

any = 9

bool = 1

customlist = 7

dict = 8

float = 3

int = 2

list = 5

multilist = 6

str = 4

class h2o_sonar.lib.api.commons.Keyword(key: str, name: str, description: str)

Bases: object

Keyword.

class h2o_sonar.lib.api.commons.KeywordGroup(prefix: str, name: str, description: str, keywords: List[Keyword] | None = None)

Bases: object

Keyword groups.

is_member(keywords: List[str]) → bool: Check if the entity (evaluator, explainer, method) with given keywords is a member of this keyword group.

class h2o_sonar.lib.api.commons.KeywordGroups(groups: List[KeywordGroup] = None)

Bases: object

Keyword groups.

add_group(group: KeywordGroup)

get_group(prefix: str) → KeywordGroup | None

class h2o_sonar.lib.api.commons.LlmModelHostType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

RAG = 2

SERVICE = 1

class h2o_sonar.lib.api.commons.LookAndFeel

Bases: object

BLUE_THEME = 'blue'

COLORMAP_BLUE_2_RED = ['#00AAEE', '#FF1166']

COLORMAP_WHITE_2_BLACK = ['#ffffff', '#000000']

COLORMAP_YELLOW_2_BLACK = ['#fec925', '#000000']

COLOR_BLACK = '#000000'

COLOR_DAI_GREEN = '#bbc600'

COLOR_H2OAI_YELLOW = '#fec925'

COLOR_HOT_ORANGE = '#fd5800'

COLOR_MATPLOTLIB_BLUE = '#3b74b4'

COLOR_RED = '#ff0000'

COLOR_WHITE = '#ffffff'

DRIVERLESS_AI_THEME = 'driverless_ai'

FORMAT_HEXA = 'hexa'

H2O_SONAR_THEME = 'h2o_sonar'

KEY_LF = 'look_and_feel'

THEME_2_BG_COLOR = {'blue': '#ffffff', 'driverless_ai': '#000000', 'h2o_sonar': '#ffffff'}

THEME_2_COLORMAP = {'blue': ['#00AAEE', '#FF1166'], 'driverless_ai': ['#fec925', '#000000'], 'h2o_sonar': ['#fec925', '#000000']}

THEME_2_FG_COLOR = {'blue': '#3b74b4', 'driverless_ai': '#fec925', 'h2o_sonar': '#fec925'}

THEME_2_LINE_COLOR = {'blue': '#000000', 'driverless_ai': '#ffffff', 'h2o_sonar': '#000000'}

static get_bg_color(theme: str)

static get_colormap(colormap_data: List[str] | str = '', theme: str = '')

Get Matplotlib colormap.

Parameters:

colormap_dataUnion[List[str], str]: Create color map either from the list of two colors (string hexadecimal color specification) or by color map name.
themestr: H2O Eval Studio theme to create color map based on the theme.
matplotlib.colors.Colormap: Color map.

static get_fg_color(theme: str)

static get_line_color(theme: str)

class h2o_sonar.lib.api.commons.MetricMeta(key: str, display_name: str = '', data_type: str = 'float', display_format: str = '.4f', description: str = '', value_range: Tuple[float, float] | None = (0.0, 1.0), value_enum: List[str] | None = None, higher_is_better: bool = True, threshold: float | None = 0.5, is_primary_metric: bool = True, parent_metric: str = '', exclude: bool = False)

Bases: object

Evaluation/explanation metric metadata.

DATA_TYPE_SECONDS = 'seconds'

KEY_DATA_TYPE = 'data_type'

KEY_DESCRIPTION = 'description'

KEY_DISPLAY_FORMAT = 'display_value'

KEY_DISPLAY_NAME = 'display_name'

KEY_EXCLUDE = 'exclude'

KEY_HIGHER_IS_BETTER = 'higher_is_better'

KEY_IS_PRIMARY_METRIC = 'is_primary_metric'

KEY_KEY = 'key'

KEY_PARENT_METRIC = 'parent_metric'

KEY_THRESHOLD = 'threshold'

KEY_VALUE_ENUM = 'value_enum'

KEY_VALUE_RANGE = 'value_range'

copy() → MetricMeta

dump() → Dict

static from_dict(data: Dict) → MetricMeta

static load(data: Dict) → MetricMeta

to_dict(threshold: float | None = None) → Dict

to_md(to_rst: bool = False) → str

class h2o_sonar.lib.api.commons.MetricsMeta(metrics: List[MetricMeta] = None)

Bases: object

KEY_META = 'metadata'

add_metric(metric: MetricMeta)

contains(key: str) → bool

copy_with_overrides(metric_key_to_overrides: Dict) → MetricsMeta

Copy metrics meta with updated:

display names
descriptions
exclude flag

Parameters:

metric_key_to_overridesDict: Dictionary with metric key to overrides mapping - map: metric key -> field key -> new value

Returns:

MetricsMeta: Copy of the metrics meta with updated display names and descriptions.

dump() → List

static from_dict(metrics_meta: Dict) → MetricsMeta

get_metric(key: str) → MetricMeta | None

get_metric_description(key: str) → str

get_metric_keys() → List[str]

get_primary_metric() → MetricMeta | None: Return the metric which is marked as primary metric.

get_threshold(key: str, default_value=None) → float | None

is_higher_better(key: str) → bool

is_metric_passed(key: str, value: float) → bool

static load(metrics_meta: List) → MetricsMeta

set_threshold(threshold: float, key: str = '')

size() → int

to_dict(threshold: float | None = None) → Dict

to_list() → List[MetricMeta]

class h2o_sonar.lib.api.commons.MimeType

Bases: object

EXT_CSV = 'csv'

EXT_DATATABLE = 'jay'

EXT_DOCX = 'docx'

EXT_HTML = 'html'

EXT_JPG = 'jpg'

EXT_JSON = 'json'

EXT_MARKDOWN = 'md'

EXT_PNG = 'png'

EXT_SVG = 'svg'

EXT_TEXT = 'txt'

EXT_ZIP = 'zip'

MIME_CSV = 'text/csv'

MIME_DATATABLE = 'application/vnd.h2oai.datatable.jay'

MIME_DOCX = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'

MIME_EVALSTUDIO_MARKDOWN = 'application/vnd.h2oai-evalstudio-leaderboard.markdown'

MIME_HTML = 'text/html'

MIME_IMAGE = 'image/xyz'

MIME_JPG = 'image/jpeg'

MIME_JSON = 'application/json'

MIME_JSON_CSV = 'application/vnd.h2oai.json+csv'

MIME_JSON_DATATABLE = 'application/vnd.h2oai.json+datatable.jay'

MIME_MARKDOWN = 'text/markdown'

MIME_MODEL_PIPELINE = 'application/vnd.h2oai.pipeline+zip'

MIME_PDF = 'application/pdf'

MIME_PNG = 'image/png'

MIME_SVG = 'image/svg+xml'

MIME_TEXT = 'text/plain'

MIME_ZIP = 'application/zip'

static ext_for_mime(mime: str)

class h2o_sonar.lib.api.commons.ModelTypeExplanation

Bases: object

IID: str = 'iid'

IMAGE: str = 'image'

LLM: str = 'llm'

RAG: str = 'rag'

TIME_SERIES: str = 'time_series'

UNSUPERVISED: str = 'unsupervised'

class h2o_sonar.lib.api.commons.Param(param_name: str, param_type: ParamType | InterpretationParamType | ExplainerParamType, description: str = '', default_value='', value_min: float = 0.0, value_max: float = 0.0, predefined: List | None = None, tags: List | None = None)

Bases: object

Generic parameter used as (predecessor) of library, interpretation and explainer parameters.

as_descriptor() → ConfigItem: Explainer parameter to descriptor conversion.

class h2o_sonar.lib.api.commons.ParamType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

bool = 1

customlist = 7

dict = 8

float = 3

int = 2

list = 5

multilist = 6

str = 4

class h2o_sonar.lib.api.commons.PerturbationIntensity(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

EXTREME = 6

HIGH = 4

LOW = 2

MEDIUM = 3

VERY_HIGH = 5

VERY_LOW = 1

class h2o_sonar.lib.api.commons.PerturbatorToRun(perturbator_id: str, intensity: str | PerturbationIntensity = PerturbationIntensity.MEDIUM, params: str | Dict = None)

Bases: object

Parametrized perturbator (to run).

KEYWORD_INTENSITY = 'intensity'

KEYWORD_PARAMS = 'params'

KEYWORD_PERTURBATOR_ID = 'perturbator_id'

clone() → PerturbatorToRun

dump() → dict

static load(d: dict) → PerturbatorToRun

class h2o_sonar.lib.api.commons.ResourceHandle(connection_key: str, resource_key: str, version: str = '')

Bases: object

H_CONNECTION: str = 'connection'

H_KEY: str = 'key'

H_PREFIX: str = 'resource:'

H_VERSION: str = 'version'

static is_handle(handle) → bool

static parse_string_handle(loc_str: str) → Tuple[str, str, str]: Parse CLI argument into connection, resource key and version.

class h2o_sonar.lib.api.commons.ResourceLocatorType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

Resource locator types.

handle = 2

local = 1

class h2o_sonar.lib.api.commons.SafeJavaScript

Bases: object

Safe JavaScript datastructures de/serialization.

INF = 'Infinity'

NAN = 'NaN'

NEG_INF = '-Infinity'

static decode_to_float(obj)

class h2o_sonar.lib.api.commons.SemVer(major: int, minor: int, patch: int)

Bases: object

static from_int_list(version_list: List[int])

Semantic version from the list of 3 integers.

Returns:

Optional[SemVer]: Instance of SemVer class if valid version, None otherwise.

static from_str(version: str)

Parse a semantic version <major>.<minor>.<patch>.

Returns:

Optional[SemVer]: Instance of SemVer class if valid version, None otherwise.

class h2o_sonar.lib.api.commons.UpdateGlobalExplanation

Bases: object

Update mode: merge (to add new explanations) or replace (default).

OPT_CLASS: str = 'class': Driverless AI/common parameters source: inherit or request (default).

OPT_FEATURE: str = 'feature'

OPT_INHERIT: str = 'inherit'

OPT_MERGE: str = 'merge'

OPT_REPLACE: str = 'replace'

OPT_REQUEST: str = 'request'

PARAMS_SOURCE: str = 'params_source'

UPDATE_MODE: str = 'update_mode'

UPDATE_SCOPE: str = 'update_scope'

h2o_sonar.lib.api.commons.add_string_list(items: List | None, add_items: List | None) → List: Robust list handling of features to drop, process, use, skip, …

h2o_sonar.lib.api.commons.base_pkg(obj)

Get base package for given Python object.

Parameters:

objPython object

Returns:

str: Base package of Python object and sub-package, e.g., sklearn or ensemble.

h2o_sonar.lib.api.commons.generate_key() → str

h2o_sonar.lib.api.commons.harmonic_mean(xs) → float

h2o_sonar.lib.api.commons.is_ncname(s: str) → bool

h2o_sonar.lib.api.commons.is_port_used(hostname: str = '127.0.0.1', port: int = 12345, service_name='Driverless AI', timeout=15, logger=None) → bool

h2o_sonar.lib.api.commons.is_valid_key(key: str) → bool

h2o_sonar.lib.api.datasets module

class h2o_sonar.lib.api.datasets.DatasetApi(logger: SonarLogger | None = None)

Bases: object

Dataset API interface provides uniform API allowing explainers to use any dataset regardless format or location details.

static create_dataset(dataset_src, dataset_type: ExplainableDatasetType = ExplainableDatasetType.unknown, target_col: str = '', sampled_dataset_path: str = '', sample_num_rows: int | None = None, sampler: DatasetSampler | None = None, **extra_params) → ExplainableDataset

Create explainable model.

Parameters:

dataset_srcUnion[ExplainableDataset, datatable.Frame, str, Dict,: pandas.DataFrame, h2o.H2OFrame] Create dataset from given source: explainable dataset instance, datatable frame, H2OFrame, Pandas DataFrame, string (expect path to CSV, .jay or any other file type supported by datatable), dictionary (used to construct frame).
dataset_typeExplainableDatasetType: Optional dataset type hint, which can be used to construct the dataset correctly.
sampled_dataset_pathstr: Optional file path, which can be used to create a new file with the sampled dataset (if the datasets are sampled and if the sampling will be needed).
target_colstr: Optional target column name.
sample_num_rowsOptional[int]: If None, then automatically sample based on the dataset and RAM size. If > 0, then do sample the dataset to sample_num_rows number of rows. If == 0, then do NOT sample.
samplerOptional[DatasetSampler]: Sampling method (implementation) to be used - see h2o_sonar.utils.sampling module (documentation) for available sampling methods. Use a sampler instance to use the specific sampling method.

static write_csv(dataset: Frame | DataFrame, path: str, bom: bool = False)

static write_dataset(dataset: Frame | DataFrame | Series | ndarray, path: str)

static write_datatable_dataset(dataset: Frame, path: str)

static write_pandas_dataset(data: DataFrame | Series | ndarray, path: str)

static zip_csv(csv_file_path)

class h2o_sonar.lib.api.datasets.ExplainableColumnMeta(name: str = '', data_type: str = '', logical_types: List | None = None, values_format: str = '', is_id: bool = False, is_numeric: bool = False, is_categorical: bool = False, count: int = 0, frequency: int = 0, unique: int = 0, max_value: int | None = None, min_value: int | None = None, mean: float | None = None, std: float | None = None, histogram_counts: List | None = None, histogram_ticks: List | None = None, properties: Dict | None = None)

Bases: object

Dataset column metadata.

to_dict() → Dict

class h2o_sonar.lib.api.datasets.ExplainableDataset(data=None, meta=None, logger=None)

Bases: object

Dataset with metadata - this class provides a uniform API to get dataset data regardless dataset source, provider or implementation.

COL_BIAS = 'bias'

KEY_DATA = 'data'

KEY_METADATA = 'metadata'

property data: Frame

static frame_2_datatable(frame, columns: List | None = None, trim_to_columns: List | None = None) → Frame

Convert frame to datatable.

Parameters:

frame: A frame to be converted.
columnsOptional[List]: Optional list of column names to be used for newly created frame - column names are overwritten by this list.
trim_to_columnsOptional[List]: Remove all columns that are not on this list from the result frame.

Returns:

pandas.DataFrame: Pandas frame.

static frame_2_numpy(frame, flatten: bool = False) → ndarray | None

static frame_2_pandas(frame, columns: List | None = None, trim_to_columns: List | None = None) → DataFrame

Convert frame to Pandas.

Parameters:

frame: A frame to be converted.
columnsOptional[List]: Optional list of column names to be used for newly created frame - column names are overwritten by this list.
trim_to_columnsOptional[List]: Remove all columns that are not on this list from the result frame.

Returns:

pandas.DataFrame: Pandas frame.

static is_bias_col(col_name) → bool

property meta: ExplainableDatasetMeta

prepare(drop_na_rows: bool = True, used_features: ~typing.List | None = None, le_cat_variables: bool = True, cleaned_frame_type: ~typing.Type[~pandas.core.frame.DataFrame] | ~typing.Type[~datatable.Frame] = <class 'datatable.Frame'>, update: bool = False) → Tuple[Frame | DataFrame, List, MultiColumnLabelEncoder, int]

Method with commonly need actions to preprocess an explainable dataset. 3rd party libraries often require, e.g., numeric features only, examples without N/A or undefined values, … which this method ensures.

Parameters:

drop_na_rowsbool: Drop rows with N/A values.
used_featuresOptional[List]: Trim dataset to used features.
le_cat_variablesbool: Do label encode non-numerical columns.
cleaned_frame_type: Frame type to return - Pandas or datatable.
updatebool: If True, set data field of this ExplainableDataset instance, else return cleaned dataset and keep data field intact.

Returns:

Tuple[datatable.Frame, List[str], Any, int]: Result frame; non-numeric column names (label encoded); label encoder; number of dropped rows with N/A values.

sample(*args, **kwargs): Sample the explainable dataset and return new instance.

to_dict()

to_json(indent=None)

transform(*args, **kwargs): Transform the explainable dataset - sanitize, sample - and return new explainable dataset instance.

class h2o_sonar.lib.api.datasets.ExplainableDatasetHandle(connection_key: str, dataset_key: str, dataset_version: str = '')

Bases: ResourceHandle

Handle to a REMOTE dataset hosted by a remote system described by its connection configuration.

ExplainableDatasetHandle differs from the ExplainerDataset in that it doesn’t provide the actual dataset data, but only the metadata required to access the dataset.

static from_string(str_handle: str, h2o_sonar_config=None) → ExplainableDatasetHandle: Create a new instance of the dataset handle from the string.

class h2o_sonar.lib.api.datasets.ExplainableDatasetMeta(shape: Tuple | None = None, columns_meta: List[ExplainableColumnMeta] | None = None, column_names: List | None = None, column_types: List | None = None, column_uniques: List | None = None, columns_cat: List | None = None, columns_num: List | None = None, file_name: str = '', file_path: str = '', file_size: int = 0, key: str = '', missing_values: List | None = None)

Bases: object

Dataset metadata - this class provides a uniform API to get basic EDA dataset metadata regardless dataset source, provider or implementation.

KEY_COLUMNS_CAT = 'columns_cat'

KEY_COLUMNS_META = 'columns_meta'

KEY_COLUMNS_NUM = 'columns_num'

KEY_COLUMN_NAMES = 'column_names'

KEY_COLUMN_TYPES = 'column_types'

KEY_COLUMN_UNIQUES = 'column_uniques'

KEY_FILE_NAME = 'file_name'

KEY_FILE_PATH = 'file_path'

KEY_FILE_SIZE = 'file_size'

KEY_MISSING_VALUES = 'missing_values'

KEY_ORIGINAL_DATASET_PATH = 'original_dataset_path'

KEY_ORIGINAL_DATASET_SAMPLED = 'original_dataset_sampled'

KEY_ORIGINAL_DATASET_SHAPE = 'original_dataset_shape'

KEY_ORIGINAL_DATASET_SIZE = 'original_dataset_size'

KEY_ROW_COUNT = 'row_count'

KEY_SHAPE = 'shape'

copy()

get_column_meta(column_name: str)

has_column(column_name: str)

is_categorical_column(column_name: str)

is_numeric_column(column_name: str)

to_dict()

to_json(indent=None)

class h2o_sonar.lib.api.datasets.ExplainableDatasetType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

datatable = 3

filesystem = 2

h2o3 = 6

pandas = 4

remote = 1

unknown = 5

class h2o_sonar.lib.api.datasets.ExplainableDatatableDataset(frame: Frame)

Bases: ExplainableDataset

Datatable based dataset.

class h2o_sonar.lib.api.datasets.LlmDataset

Bases: object

Dataset used to evaluate LLMs and RAGs.

COLUMNS = ['input', 'corpus', 'context', 'categories', 'relationships', 'model_key', 'test_key', 'expected_output', 'output_constraints', 'output_condition', 'actual_output', 'actual_duration', 'cost']

COL_ACTUAL_DURATION = 'actual_duration'

COL_ACTUAL_OUTPUT = 'actual_output'

COL_CATEGORIES = 'categories'

COL_CONTEXT = 'context'

COL_CORPUS = 'corpus'

COL_COST = 'cost'

COL_EXPECTED_OUTPUT = 'expected_output'

COL_INPUT = 'input'

COL_MODEL_KEY = 'model_key'

COL_OUTPUT_CONDITION = 'output_condition'

COL_OUTPUT_CONSTRAINTS = 'output_constraints'

COL_RELATIONSHIPS = 'relationships'

COL_TEST_KEY = 'test_key'

KEY_ACTUAL_DURATION = 'actual_duration'

KEY_ACTUAL_OUTPUT = 'actual_output'

KEY_CATEGORIES = 'categories'

KEY_CONTEXT = 'context'

KEY_CORPUS = 'corpus'

KEY_COST = 'cost'

KEY_EXPECTED_OUTPUT = 'expected_output'

KEY_INPUT = 'input'

KEY_INPUTS = 'inputs'

KEY_KEY = 'key'

KEY_MODEL_KEY = 'model_key'

KEY_OUTPUT_CONDITION = 'output_condition'

KEY_OUTPUT_CONSTRAINTS = 'output_constraints'

KEY_RELATIONSHIPS = 'relationships'

KEY_TC_KEY = 'test_case_key'

KEY_TEST_KEY = 'test_key'

class LlmDatasetRow(i: str, context: List[str] | None = None, corpus: List[str] | None = None, categories: str | List[str] = '', relationships: List | None = None, expected_output: str = '', output_constraints: List[str] | Any | None = None, output_condition: str = '', actual_output: str = '', actual_duration: float = 0.0, cost: float = 0.0, model_key: str = '', test_key: str = '', key: str = '')

Bases: object

add_relationship(relationship_type: str, target: str, target_type: str)

copy(update_key: bool = True)

static from_dict(as_dict: Dict)

perturb(perturbators: List[PerturbatorToRun], raised_errors: List | None = None)

Perturb the input (prompt) using the specified perturbator. The perturbation is always performed in place on the input, which is a string.

Parameters:

perturbatorsList[commons.PerturbatorToRun]: List of perturbators to run.
raised_errorsOptional[List]: List of raised errors.

to_dict() → Dict

add_input(i: str, corpus: List[str] | None = None, context: List[str] | None = None, categories: str | List[str] = '', relationships: List | None = None, expected_output: str = '', output_constraints: List[str] | Any | None = None, output_condition: str = '', actual_output: str = '', actual_duration: float = 0.0, cost: float = 0.0, model_key: str = '', test_key: str = '', key: str = '')

Add new dataset row - question / prompt / input with related (meta)data.

Parameters:

istr: Input / question / prompt.
corpusOptional[List[str]]: URLs/paths to document(s) which were used to fine-tune the RAG for this test case.
contextOptional[List[str]]: Context (set of document chunks by value i.e. text snippets) returned by the vector database for augmentation to LLM.
categoriesUnion[str, List[str]]: Categories of the input (question/prompt) like: math, knowledge, reasoning,
relationshipsOptional[List]: Relationships among rows capturing e.g. perturbation source/product.
expected_outputstr: Expected output / answer.
output_constraintsOptional[Union[List[str], Any]]: An optional output / answer constraints which might be any data structure which can be serialized to JSON going forward. It is interpreted by explainer and used for the validation.
output_conditionstr: An optional string condition which is interpreted and used by explainer in order to validate output / answer. output_condition can use output_constraints or vice versa.
actual_outputstr: Actual output / answer returned by the LLM / RAG product.
actual_durationfloat: How much time it took to get the actual answer.
costfloat: Answer/inference cost.
model_keystr: The key of the H2O Eval Studio model which was used to get the actual answer.
test_keystr: The key of the test where the test case belongs to.
keystr: Key of the dataset row.

static from_datatable_dict(as_dict: Dict) → LlmDataset

Deserialize datatable dictionary to LlmDataset. Structured fields (corpus, categories, and output_constraints) are automatically deserialized from JSon string to dictionary if possible.

Parameters:

as_dictDict: Dictionary created using datatable.to_dict().

Returns:

LlmDataset: LLM dataset.

static from_datatable_json_enc_col(enc_json_col: str, logger=None) → List: Robust deserialization of datatable JSON encoded column w/ a list value.

static from_dict(as_dict: Dict) → LlmDataset

static load_from_json(json_file_path: str | Path, datatable_format: bool = False)

merge(other_llm_dataset: LlmDataset)

Merge another dataset into this one.

Parameters:

other_llm_datasetLlmDataset: LLM dataset to be merged into this one.

perturb(perturbators: List[PerturbatorToRun], in_place: bool = True, raised_errors: List | None = None) → LlmDataset

Perturb the inputs (prompts) using the specified perturbator(s).

Parameters:

perturbatorsList[commons.PerturbatorToRun]: Perturbators to run - includes the perturbator ID, intensity, and parameters.
in_placebool: If True, perturb the prompt in place, otherwise create a new perturbed rows.
raised_errorsOptional[List]: If None, then raise error(s) if the perturbator(s) fail(s), otherwise do not raise exceptions and store them in the (empty) list provided by the caller.

prompts() → List[str]: Return the list of unique prompts.

save_as_json(json_path: str | Path)

shape() → List

stats() → Dict[str, int | Dict]

to_datatable() → Frame

to_datatable_dict() → Dict

to_dict() → Dict

class h2o_sonar.lib.api.datasets.LlmEvalResults

Bases: object

LLM dataset with metrics values from the evaluation.

COL_ACTUAL_OUTPUT_META = 'actual_output_meta'

KEY_RESULTS = 'results'

class LlmEvalResultRow(dataset_row: LlmDatasetRow, metrics: Dict, actual_output_meta: List | None = None, metrics_meta: Dict | None = None)

Bases: object

KEY_ACTUAL_OUTPUT_META = 'actual_output_meta'

KEY_METRICS = 'metrics'

KEY_METRICS_META = 'metrics_meta'

KEY_METRIC_KEY = 'key'

KEY_METRIC_VALUE = 'value'

to_dict(type_friendly_metrics: bool = False) → Dict

add_result(result: LlmEvalResultRow)

Add new dataset row - question / prompt / input with related (meta)data.

Parameters:

resultLlmEvalResultRow: Result row.

static from_dict(as_dict: Dict) → LlmEvalResults

static load_from_json(json_file_path: str | Path, datatable_format: bool = False)

prompts() → List[str]: Return the list of unique prompts.

save_as_json(json_path: str | Path)

shape() → List

to_datatable() → Frame

to_datatable_dict() → Dict

to_dict() → Dict

to_llm_dataset() → LlmDataset: Convert evaluation results to the LLM dataset - keep all fields, skip metrics.

class h2o_sonar.lib.api.datasets.LlmInputRel(rel_type: False, target: str = '', target_type: str = 'test_case')

Bases: object

Test case relationship.

KEY_REL_TARGET = 'target'

KEY_REL_TARGET_TYPE = 'target_type'

KEY_REL_TYPE = 'type'

static from_dict(as_dict: Dict) → LlmInputRel

to_dict()

class h2o_sonar.lib.api.datasets.LlmInputRelTargetType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

Test case / input relationships target types.

test_case = 1

class h2o_sonar.lib.api.datasets.LlmInputRelType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

Test case / input relationship types.

perturbation_source = 1

class h2o_sonar.lib.api.datasets.LlmPromptCategories(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

classification = 'classification'

coding = 'coding'

evaluation = 'evaluation'

facts = 'facts'

harm = 'harm'

knowledge = 'knowledge'

math = 'math'

planning = 'planning'

question_answering = 'question_answering'

reasoning = 'reasoning'

recommendation = 'recommendation'

summarization = 'summarization'

troubleshooting = 'troubleshooting'

unknown = 'unknown'

writing = 'writing'

h2o_sonar.lib.api.datasets.filter_importance_greater_than_zero(frame: Frame, label: str | None = None, skip_bias: bool = True) → Frame

Filter out all columns with 0s values.

Parameters:

framedatatable.Frame: Frame to filter.
label: Label for which to pull bias.
skip_biasbool: If bias columns presents, do skip it.

Returns:

Filtered frame.

h2o_sonar.lib.api.explainers module

class h2o_sonar.lib.api.explainers.Explainer

Bases: object

Explainer.

Explainer instance is NOT meant to be reusable i.e. the instance must be created using default constructor, initialized using setup() method and used at most once - fit() method invocation.

Explainer lifecycle:

constructor() Explainer instantiation (for external basic/sanity checks, …). Note that explainer constructor executed by H2O Eval Studio runtime must not have parameters.
check_compatibility(params) -> bool Explainer check verifying that explainer will be able to explain given model. If compatibility check returns False or raises error, then it will not be run. Compatibility check is optional and does not have to be run by the engine.
setup(params) Set required and optional parameters, configuration, etc.
fit(X, y) Optional step to train surrogate model(s) or another explainer means. Method gets data needed for training/creation/initialization. This step might be skipped in case that explainer doesn’t need it.
explain*(X, y) -> [explainer] Actual computation (persistence and upload) of explainer(s) of given data(set). Explanation might be provided by value or reference (in case it would not fit in memory).
get_explanation(type, format) Get (cached/persisted) explanations in desired format.

Attributes:

model: Optional[ExplainerModel]: Instance of ExplainerModel class which has predict and fit functions of the model to be explained. These methods can be used to create predictions using the model/scorer.
persistence: Optional[ExplainerPersistence] = None: Instance of ExplainerPersistence class which provides convenient methods to persist explainer data e.g. to its working directory.
params: Optional[CommonExplainerParameters] = None: Common explainers parameters specified on explainer run like target column or columns to drop.
explainer_params: str: This explainer specific parameters specified on explainer run.
logger:: Explainer’s logger.
config:: Driverless AI server configuration copy.

ARG_EXPLAINER_PARAMS = 'explainer_params_as_str'

EXPLAINERS_PURPOSES = ['explains-dataset', 'explains-approximate-behavior', 'explains-original-feature-importance', 'explains-transformed-feature-importance', 'explains-feature-behavior', 'explains-fairness', 'explains-model-debugging', 'explains-model']

KEYWORD_COMPLIANCE_TEST = 'compliance-test'

KEYWORD_DEFAULT = 'run-by-default'

KEYWORD_EVALUATES_LLM = 'evaluates_llm'

KEYWORD_EVALUATES_RAG = 'evaluates_rag'

KEYWORD_EXPLAINS_APPROX_BEHAVIOR = 'explains-approximate-behavior'

KEYWORD_EXPLAINS_DATASET = 'explains-dataset'

KEYWORD_EXPLAINS_FAIRNESS = 'explains-fairness'

KEYWORD_EXPLAINS_FEATURE_BEHAVIOR = 'explains-feature-behavior'

KEYWORD_EXPLAINS_MODEL_DEBUGGING = 'explains-model-debugging'

KEYWORD_EXPLAINS_O_FEATURE_IMPORTANCE = 'explains-original-feature-importance'

KEYWORD_EXPLAINS_T_FEATURE_IMPORTANCE = 'explains-transformed-feature-importance'

KEYWORD_EXPLAINS_UNKNOWN = 'explains-model'

KEYWORD_H2O_MODEL_VALIDATION = 'h2o-model-validation'

KEYWORD_H2O_SONAR = 'h2o-sonar'

KEYWORD_IS_FAST = 'is_fast'

KEYWORD_IS_SLOW = 'is_slow'

KEYWORD_LLM = 'llm'

KEYWORD_MOCK = 'mock'

KEYWORD_NLP = 'nlp'

KEYWORD_PREFIX_CAPABILITY = 'capability'

KEYWORD_PREFIX_EXPLAINS = 'explains'

KEYWORD_PROXY = 'proxy-explainer'

KEYWORD_REQUIRES_H2O3 = 'requires-h2o3'

KEYWORD_REQUIRES_OPENAI_KEY = 'requires-openai-api-key'

KEYWORD_RQ_AA = 'requires_actual_answer'

KEYWORD_RQ_C = 'requires_constraints'

KEYWORD_RQ_EA = 'requires_expected_answer'

KEYWORD_RQ_J = 'requires_llm_judge'

KEYWORD_RQ_P = 'requires_prompts'

KEYWORD_RQ_RC = 'requires_retrieved_context'

KEYWORD_TEMPLATE = 'template'

KEYWORD_UNLISTED = 'unlisted'

add_insight(insight: InsightAndAction)

Add an evaluated/interpreted model(s) insight identified by explain() method.

Parameters:

insightinsights.InsightAndAction: Insight to be added.

add_problem(problem: ProblemAndAction)

Add an evaluated/interpreted model(s) problem identified by explain() method.

Parameters:

problemproblems.ProblemAndAction: Model problem to be added.

as_descriptor(runtime_view=False) → ExplainerDescriptor

Explainer descriptor as PROTO entity.

Parameters:

runtime_view: bool: Not all descriptor fields (like parameters declaration) are needed in runtime (for instance they are needed before running explainer), therefore they might be skipped in runtime view.

Returns:

ExplainerDescriptor:: Explainer descriptor.

property brief_description

classmethod can_explain(model_meta: ExplainableModelMeta = None, experiment_type: ExperimentType = None) → bool: Return True if explainer can fit either given Driverless AI model’s type or Driverless AI experiment type.

check_compatibility(params: CommonInterpretationParams | None = None, **explainer_params) → bool: Explainer’s check (based on parameters) verifying that explainer will be able to explain a given model. If this compatibility check returns False or raises error, then it will not be run by the engine. This check may, but does not have to be performed by the execution engine.

check_required_modules(required_modules: Set[str] | None = None)

Check whether modules specified in self._modules_needed_by_name are imported.

Parameters:

required_modulesOptional[List[str]]: If defined, then modules specified in the parameter are checked, else self._modules_needed_by_name is checked.

Returns:

bool: True if all modules are available, False otherwise.

classmethod class_brief_description()

classmethod class_description()

classmethod class_display_name()

property class_name

classmethod class_tagline()

create_explanation_workdir_archive(display_name: str = '', display_category: str = '') → WorkDirArchiveExplanation

Easily create working directory archive with ZIP of explanations representations.

Parameters:

display_name: str: Display name e.g. to be used for naming tile in UI.
display_category: str: Display category e.g. to be used for naming tab in UI.

property dependencies: List[Type[Explainer]]

classmethod depends_on() → list

property description

destroy(**destroy_params): Override to release resources created by the explainer (DB entities, files, running processes, …) depending on explainer runtime/container.

property display_name

classmethod evaluator_id() → str

exlainer_params_as_dict() → Dict | None

expected_custom_class: alias of Explainer

abstract explain(X, y=None, explanations_types: list = None, **kwargs) → list

Invoke this method to calculate and persist global, local or both type of explanation(s) for given data(set). This method implementation to be overridden by child class (this class implementation). This method is responsible for the calculations, build and persistence of explanations.

X: datatable.Frame: Dataset frame.
y: Optional[Union[datatable.Frame, Any]]: Labels.
explanations_types: List[Type[Explanation]]: Optional explanations to be built. All will be built if empty list or None provided. Get all supported types using has_explanation_types().

Returns:

List[Explanation]:: Explanations descriptors.

explain_global(X, y=None, **kwargs) → list

Execute explainer to calculate on-demand global explanations. This method is expected to be overridden if explainer doesn’t pre-compute global explanations and/or needs to update global explanation after initial computation. Default implementation just returns global instance explanations computed by explain() method.

X: Union[datatable.Frame, Any]: Data frame.
y: Optional[Union[datatable.Frame, Any]]: Labels.

Returns:

List[Explanation]:: Explanations.

explain_insights() → List[InsightAndAction]

Determine (calculate or get persisted insights identified by explain() method) interpreted/evaluated model(s) problems.

Returns:

List[InsightAndAction]:: Interpreted/evaluated model(s) insights.

explain_local(X, y=None, **kwargs) → list

Execute explainer to calculate on-demand local explanations. This method is expected to be overridden if explainer doesn’t pre-compute local explanations. Default implementation just returns local instance explanations computed by explain() method.

X: Union[datatable.Frame, Any]: Data frame.
y: Optional[Union[datatable.Frame, Any]]: Labels.

Returns:

List[Explanation]:: Explanations.

explain_problems() → List[ProblemAndAction]

Determine (calculate or get persisted problems identified by explain() method) interpreted/evaluated model(s) problems.

Returns:

List[ProblemAndAction]:: Interpreted/evaluated model(s) problems.

classmethod explainer_id() → str

classmethod explainer_version()

explains_binary() → bool

explains_multiclass() → bool

explains_regression() → bool

property explanations: Dict | None: Explanations created by this explainer.

fit(X, y=None, **kwargs)

Optionally, build/train explainer (model) and explainer prerequisites. This method implementation to be overridden by child class (this class implementation). It may be empty if explainer doesn’t have to be built.

Parameters:

X: Union[datatable.Frame, Any]: Data frame.
y: Optional[Union[datatable.Frame, Any]]: Labels.

get_explanations(explanation_types: list) → list

Get instance explanations representations in given format.

Parameters:

explanation_types: List[Type[Explanation]]: Explanation type to return - must be one of explanations declared (supported) by explainer. Returns all supported explanations if None or empty.

Returns:

List[Explanation]:: Explanations by value or reference.

get_result() → Type[ExplainerResult] | None

classmethod has_explanation_scopes() → List[str]

classmethod has_explanation_types() → List[Type[Explanation]]: Explanation types supported by the explainer.

classmethod has_explanations() → List[str]: Experiment types this explainer explains.

classmethod has_model_type_explanations() → List[str]

static is_enabled() → bool: Return True in case that explainer is enabled, else False which will make explainer to be completely ignored (unlisted, not loaded, not executed).

classmethod is_iid() → bool

classmethod is_image() → bool

classmethod is_llm() → bool

classmethod is_rag() → bool

classmethod is_time_series() → bool

classmethod is_unsupervised() → bool

property keywords: List[str]

static load(explainer_path: str | None = None): Load pickled explainer snapshot.

static load_descriptor(descriptor_path: str, persistence: Persistence | None) → ExplainerDescriptor

classmethod metrics_meta() → MetricsMeta

classmethod parameters() → List[ExplainerParam]

classmethod priority() → float: Priority used to order explainers by sequential execution scheduler. Higher number, higher priority.

report_progress(progress: float, message: str = '', precision: int = 1): Report explainer progress in [0, 1] range and message (“” removes previous message, None keeps previous message).

classmethod requires_model() → bool

classmethod requires_predict_method() → bool

classmethod requires_preloaded_predictor() → bool

run_explain(X, y, explanations_types: list = None, **kwargs) → dict

Execute explainer to calculate (persist and upload) explanations(s) of a given model.

This method invokes explainer implementation of explain() and then performs explanation verifications and eventual later actions. It is invoked by explainer execution engine (can add code to be executed before/after explain() overridden by child classes).

Explanation might be provided by value or reference (in case it would not fit in memory).

Parameters:

X: Union[datatable.Frame, Any]: Data frame.
y: Optional[Union[datatable.Frame, Any]]: Labels.
explanations_types: List[Type[Explanation]]: Explanation types to build. All will be built if empty list or None provided. Get all supported types using has_explanation_types().

Returns:

List[Explanation]:: Explanations.

run_explain_global(X, y=None, **kwargs) → list

Execute explainer to calculate (persist and upload) global explanation(s).

This method invokes explainer implementation explain_global() and then performs explanations verifications and eventual subsequent actions. It is invoked by explainer execution engine (can add code to be executed before/after explain_global() overridden by child classes).

Parameters:

X: Union[datatable.Frame, Any]: Data frame.
y: Optional[Union[datatable.Frame, Any]]: Labels.

Returns:

List[Explanation]:: Explanations.

run_explain_local(X, y=None, **kwargs) → list

Execute explainer to calculate (persist and upload) local explanation(s).

This method invokes explainer implementation explain_local() and then performs explanations verifications and eventual subsequent actions. It is invoked by explainer execution engine (can add code to be executed before/after explain_local() overridden by child classes).

Parameters:

X: Union[datatable.Frame, Any]: Data frame.
y: Optional[Union[datatable.Frame, Any]]: Labels.

Returns:

List[Explanation]:: Explanations.

run_fit(X, y=None, **kwargs)

Build explainer and explainer prerequisites.

This is a method invoked by explainer execution engine (can add code to be executed before/after fit() overridden by child classes).

Parameters:

X: Union[datatable.Frame, Any]: Data frame.
y: Optional[Union[datatable.Frame, Any]]: Labels.

save(explainer_path: str | None = None): Save explainer snapshot pickle.

static save_descriptor(descriptor_path: str, descriptor: ExplainerDescriptor, persistence: Persistence | None)

setup(model: ExplainableModel | ExplainableModelHandle | None, persistence: ExplainerPersistence, models=None, key: str = '', params: CommonInterpretationParams | None = None, explainer_params_as_str: str | None = '', dataset_api: DatasetApi | None = None, model_api: ModelApi | None = None, logger: SonarLogger | None = None, **explainer_params) → None

Set all the parameters needed to execute fit() and explain().

Parameters:

modelOptional[Union[models.ExplainableModel, models.ExplainableModelHandle]]: Explainable model with (fit and) score methods (or None if 3rd party).
models: (Explainable) models.
persistence: ExplainerPersistence: Persistence API allowing (controlled) saving and loading of explanations.
key: str: Optional (given) explainer run key (generated otherwise).
params: CommonInterpretationParams: Common explainers parameters specified on explainer run.
explainer_params_as_str: Optional[str]: Explainer specific parameters in string representation.
dataset_apiOptional[datasets.DatasetApi]: Dataset API to create custom explainable datasets needed by this explainer.
model_apiOptional[models.ModelApi]: Model API to create custom explainable models needed by this explainer.
loggerOptional[loggers.SonarLogger]: Logger.
explainer_params:: Other explainers RUNTIME parameters, options, and configuration.

classmethod supports_dataset_locator(locator: ResourceLocatorType) → bool

classmethod supports_model_locator(locator: ResourceLocatorType) → bool

property tagline

validate_explanations() → bool

Optional method which can be used to verify integrity of explanations.

Returns:

bool:: Returns True if explanations are valid, False otherwise.

property working_dir: str: Working directory path where explainer can store any data it needs.

class h2o_sonar.lib.api.explainers.ExplainerArgs(parameters: List[ExplainerParam] = None)

Bases: object

Explainer arguments ~ parameter values.

add_parameter(param_type: ExplainerParam)

as_descriptor() → List: Save parameters as descriptor: [{‘parameter’: {‘type’: ‘str’}}]

from_config_overrides(config_overrides: dict, erase: List[str] | None = None) → dict

Try to get all arguments which are declared as parameters from given config overrides and set (or overwrite) in args.

Parameters:

config_overrides: dict: Config overrides as dictionary.
erase: Optional[List[str]]: Parameters to erase from config overrides.

from_dict(args_dict: dict, erase: List[str] | None = None) → dict: Try to get all arguments which are declared as parameters from given dictionary and set (or overwrite) in args. Erase given parameters - arguments dictionary is not cloned, but modified.

get(param_name: str, default_value=None)

static json_str_to_dict(json_str: str, logger=None) → dict

static resolve_local_paging_args(args: dict, explainer_name: str = '', logger=None): Resolve local explanation paging arguments.

resolve_params(explainer_params: dict | None = None)

Resolve explainer’s self.parameters (arguments) as follows to self.args.

Parameters:

explainer_params: Optional[dict]: Explainer parameters as dictionary.

static toml_str_to_dict(toml_str: str, logger=None) → dict

class h2o_sonar.lib.api.explainers.ExplainerDescriptor(id: str, name: str = '', display_name: str = '', tagline: str = '', description: str = '', brief_description: str = '', model_types: List[str] | None = None, can_explain: List[str] | None = None, explanation_scopes: List[str] | None = None, explanations: List[ExplanationDescriptor] | None = None, parameters: List[ConfigItem] | None = None, keywords: List[str] | None = None, metrics_meta: MetricsMeta | None = None)

Bases: object

KEY_BRIEF_DESCRIPTION = 'brief_description'

KEY_CAN_EXPLAIN = 'can_explain'

KEY_DESCRIPTION = 'description'

KEY_DISPLAY_NAME = 'display_name'

KEY_EXPLANATIONS = 'explanations'

KEY_EXPLANATION_SCOPES = 'explanation_scopes'

KEY_ID = 'id'

KEY_KEYWORDS = 'keywords'

KEY_METRICS_META = 'metrics_meta'

KEY_MODEL_TYPES = 'model_types'

KEY_NAME = 'name'

KEY_PARAMETERS = 'parameters'

KEY_TAGLINE = 'tagline'

clone() → ExplainerDescriptor

dump() → dict

static load(d: Dict) → ExplainerDescriptor

class h2o_sonar.lib.api.explainers.ExplainerParam(param_name: str, param_type: ExplainerParamType | EvaluatorParamType, description: str = '', comment: str = '', default_value: bool | str | float = '', value_min: float = 0.0, value_max: float = 0.0, predefined: List | None = None, tags: List | None = None, category: str = '', src: str = '')

Bases: Param

Explainer parameter declaration.

SRC_ANY = 'any'

SRC_CONFIG_OVERRIDES = 'config_overrides'

SRC_CONFIG_OVERRIDES_ERASE = 'config_overrides_erase'

SRC_EVALUATOR_PARAMS = 'evaluator_params'

SRC_EXPLAINER_PARAMS = 'explainer_params'

TAG_SRC_DATASET_COLUMN_NAMES = 'SOURCE_DATASET_COLUMN_NAMES'

TAG_SRC_DATASET_TEXT_COLUMN_NAMES = 'SOURCE_DATASET_TEXT_COLUMN_NAMES'

as_descriptor() → ConfigItem: Explainer parameter to descriptor conversion.

class h2o_sonar.lib.api.explainers.ExplainerRegistry(singleton_create_key)

Bases: object

Explainer registry provides list of available OOTB and (registered) explainers.

get_class(explainer_id) → Type[Explainer] | None

list_explainers() → Dict

load(): Load registry from configuration.

register(explainer_class, explainer_id: str = '') → str

classmethod registry()

save()

unregister(explainer_id: str) → str

class h2o_sonar.lib.api.explainers.ExplainerResult(persistence: ExplainerPersistence, explainer_id: str, explanation_format: Type[ExplanationFormat] | None, explanation: Type[Explanation] | None, h2o_sonar_config, logger=None)

Bases: ABC

abstract data(**kwargs) → Frame

classmethod help() → Dict[str, Dict[str, List[Dict[str, str | bool]]]]

log(*, path)

params() → Dict

abstract plot(**kwargs)

summary() → Dict

zip(*, file_path)

class h2o_sonar.lib.api.explainers.OnDemandExplainKey

Bases: object

On-demand explainer run parameters keys.

CLASS = 'class'

EXPLAINER_JOB_KEY = 'target_explainer_job_key'

EXPLANATION_TYPE = 'target_explanation_type'

FEATURE = 'feature'

FORMAT = 'target_format'

METHOD = 'method'

MLI_KEY = 'target_mli_key'

ROW = 'row'

UPDATE_STRATEGY = 'update_strategy'

class h2o_sonar.lib.api.explainers.OnDemandExplainMethod(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

explain = 1

explain_global = 2

explain_local = 3

class h2o_sonar.lib.api.explainers.SurrogateExplainer

Bases: Explainer, ABC

Surrogate model explainer.

KEYWORD_SURROGATE = 'surrogate'

abstract predict(X, y=None, **kwargs)

Surrogate explainer provides predict method allowing to get predictions from the surrogate model. This method to be overridden by child classes.

Parameters:

X: Union[datatable.Frame, Any]: Data frame.
y:Union[datatable.Frame, Any]: Labels.

run_predict(X, y=None, **kwargs)

Surrogate explainer provides predict method allowing to get predictions from the surrogate model.

This is method invoked by explainer execution engine (can add code to be executed before/after fit() overridden by child classes).

Parameters:

X: Union[datatable.Frame, Any]: Data frame.
y: Optional[Union[datatable.Frame, Any]]: Labels.

h2o_sonar.lib.api.explanations module

class h2o_sonar.lib.api.explanations.AbcHeatmapExplanation

Bases: ABC

COLOR_FATAL_ERROR = 'ff0000'

METRIC_ALL = 'ALL_METRICS'

PALETTE_BLUE = ['3d83ad', '5e9dc3', '96bcd3', 'c4dcea', 'eef4f8']

PALETTE_GREEN = ['40a481', '56b896', '71c9ab', '8ad9be', 'aaebd5']

PALETTE_RED = ['f2a7c1', 'f6bbd0', 'f7cfde', 'fae5ed', 'fdf3f7']

class h2o_sonar.lib.api.explanations.AutoReportExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

AutoReport explanation provides various document format (Word, Markdown,…) explanations.

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.CustomArchiveExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

Explainer archive representation like zip or tgz.

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.DiaExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.DurationStatsKey

Bases: object

Performance statistics keys.

AVG = 'avg'

MAX = 'max'

MIN = 'min'

N = 'n'

SUM = 'sum'

class h2o_sonar.lib.api.explanations.Explanation(explainer, display_name: str = '', display_category: str = '', has_local=None)

Bases: ABC

Base class of explainer explanations.

DISPLAY_CAT_AUTOREPORT = 'AUTOREPORT'

DISPLAY_CAT_COMPLIANCE = 'COMPLIANCE TESTS'

DISPLAY_CAT_CUSTOM = 'CUSTOM'

DISPLAY_CAT_DAI_MODEL = 'DAI MODEL'

DISPLAY_CAT_DATA = 'DATA'

DISPLAY_CAT_EXAMPLE = 'EXAMPLE'

DISPLAY_CAT_LLM = 'LLM'

DISPLAY_CAT_MOCK = 'MOCK'

DISPLAY_CAT_MODEL = 'MODEL'

DISPLAY_CAT_NLP = 'NLP'

DISPLAY_CAT_SURROGATES = 'SURROGATE MODELS'

DISPLAY_CAT_SURROGATES_ON_RES = 'SURROGATE MODELS ON RESIDUALS'

DISPLAY_CAT_TEMPLATE = 'TEMPLATE'

add_format(explanation_format: ExplanationFormat) → None

Add explanation representation in a new format.

Parameters:

explanation_format: ExplanationFormat: New explanation representation.

classmethod as_class_descriptor() → ExplanationDescriptor

as_descriptor() → ExplanationDescriptor

property display_category: str

property display_name: str

property explainer

classmethod explanation_scope() → str: Explanation scope - either global or local.

classmethod explanation_type() → str

Explanation type may be any string identifier (either defined by this class or user ~ extensibility) which is used for validation and further processing. It must specify unique explanation name and scope. Explanation formats are defined by child classes of this abstract class.

Format: <explanation_scope>-<explanation-type>

Example: global-feature-importance

property format_types: List[str]

Explanation formats provided by the explanation.

Representations are set by explanations as they are created. This is why available format types are initialized as empty instance field, not class one.

Example:

["application/json", "application/vnd.h2oai.datatable", "application/zip" ]

Returns:

List[str]:: Representations (formats) of this explanation.

get_format(explanation_format: str) → ExplanationFormat: Get explanation in specific representation.

property has_local: str

Does explanation have also related local explanation and which?

Returns:

str:: Local explanation type.

classmethod is_global() → bool: Is the explanation global or local?

abstract validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.ExplanationDescriptor(explanation_type: str, name: str = '', category: str = '', scope: str = '', has_local: str = '', formats: List[str] = None)

Bases: object

KEY_CATEGORY = 'category'

KEY_EXPLANATION_TYPE = 'explanation_type'

KEY_FORMATS = 'formats'

KEY_HAS_LOCAL = 'has_local'

KEY_NAME = 'name'

KEY_SCOPE = 'scope'

clone() → ExplanationDescriptor

dump() → dict

static load(d: dict) → ExplanationDescriptor

class h2o_sonar.lib.api.explanations.FlippedPerturbedTestCase(explainable_model_key: str, explainable_model: ExplainableRagModel | ExplainableLlmModel | None, metric_meta: MetricMeta, orig_row: LlmDatasetRow | None = None, orig_metric_value: float = 0.0, orig_pass: bool = False, perturbed_row: LlmDatasetRow | None = None, perturbed_metric_value: float = 0.0, perturbed_pass: bool = False, heat_threshold: float | None = None)

Bases: object

Represents a flipped perturbed test case serialized as LLM dataset row or evaluation result row.

copy() → FlippedPerturbedTestCase

property good_to_bad: bool | None: True if the perturbation flipped the test case from PASSING the metric to FAILING it, else False.

property is_flip: bool | None

static is_flipped() → bool

property llm_model_name: str

static resolve_metrics(metrics: Dict, metrics_meta: MetricsMeta) → Dict[str, Tuple]

Resolve metrics values and pass/fail status for given set of metrics.

Parameters:

metricsDict: Dictionary with metrics.
metrics_metacommons.MetricsMeta: Metrics metadata.

Returns:

Dict[str, Tuple[MetricMeta, float, bool]]: Dictionary which maps metric ID to a tuple with metric meta, metric value, and metric pass/fail status (based on the threshold and higher is better/worse determine from the metadata).

class h2o_sonar.lib.api.explanations.Global3dDataExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

Explanation with per class and feature data frames for rendering of 3D charts like:

3D bar chart
heatmap

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.GlobalDataFrameExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

Generic explanation which doesn’t fit any other type.

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.GlobalDtExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.GlobalFeatImpExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.GlobalGroupedBarChartExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.GlobalHtmlFragmentExplanation(explainer=None, evaluator=None, display_name: str = None, display_category: str = None)

Bases: Explanation

add_html_format(html: str): Add HTML format.

static from_explanation(explainer, explanation, display_name: str = None, display_category: str = None, absolute_paths: bool = False, problems: Dict = None, is_raw_feature: bool = True, data_as_text: bool = True, logger=None) → GlobalHtmlFragmentExplanation

Create HTML fragment explanation:

from GlobalFeatImpExplanation
- with formats.HtmlFormat
from PartialDependenceJSonFormat
- with formats.HtmlFormat

Parameters:

explainer: Explainer instance.
explanation: Explanation instance.
display_namestr: Custom display name.
display_categorystr: Custom display category.
absolute_pathsbool: True to create HTML representation with absolute paths to images and explanations, else False to create relative paths (default).
problemsDict: Dictionary of class to feature names with features which are problematic to highlight their charts.
is_raw_featurebool: True if input explains original features, else False for transformed features.
data_as_textbool: Generate HTML text for the chart data.
logger: Optional logger.

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.GlobalLinePlotExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.GlobalNlpLocoExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.GlobalRuleExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.GlobalScatterPlotExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.GlobalSummaryFeatImpExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.IndividualConditionalExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LlmBoolLeaderboardExplanation(evaluator, metrics_meta: MetricsMeta, display_name: str = None, display_category: str = None, key_2_evaluated_model: Dict = None, llm_host: LlmModelHostType = LlmModelHostType.RAG, do_eval_rc: bool = False, logger=None)

Bases: Explanation, LlmLeaderboardExplanation, AbcHeatmapExplanation

LLM failure leaderboard - leaderboard data and formats for metrics which is of the BOOLEAN type i.e. it is possible to infer:

success / failure
pass / fail
true / false

for each test case (prompt + model) in the test set.

Leaderboard provides multiple aspects of the test results (sub-leaderboards):

summary leaderboard
most problematic prompts leaderboard

Multiple leaderboards within a format are supported via index file:

index file:
- key: leaderboard name
- value: leaderboard file name

class AdditionalDetails(formatting, text)

Bases: tuple

formatting: Alias for field number 0

text: Alias for field number 1

DEFAULT_METRIC_THRESHOLD = 0.5

class Failure(doc_url, error_message, input, expected_output, output_condition, output_constraints, actual_output, actual_output_meta, fail_retrieval, fail_generation, fail_parse, ctx_bytes, ctx_chunks, row_key, model_key)

Bases: tuple

actual_output: Alias for field number 6

actual_output_meta: Alias for field number 7

ctx_bytes: Alias for field number 11

ctx_chunks: Alias for field number 12

doc_url: Alias for field number 0

error_message: Alias for field number 1

expected_output: Alias for field number 3

fail_generation: Alias for field number 9

fail_parse: Alias for field number 10

fail_retrieval: Alias for field number 8

input: Alias for field number 2

model_key: Alias for field number 14

output_condition: Alias for field number 4

output_constraints: Alias for field number 5

row_key: Alias for field number 13

KEY_INPUT_FAILURES = 'input_failures'

KEY_INPUT_FAILURES_COUNT = 'input_failures_count'

KEY_INPUT_FAILURES_GENERATION_COUNT = 'input_failures_generation_count'

KEY_INPUT_FAILURES_PARSE_COUNT = 'input_failures_parse_count'

KEY_INPUT_FAILURES_RETRIEVAL_COUNT = 'input_failures_retrieval_count'

KEY_INPUT_PASSES_COUNT = 'input_passes_count'

KEY_MODEL_FAILURES = 'model_failures'

KEY_MODEL_FAILURES_COUNT = 'model_failures_count'

KEY_MODEL_FAILURES_GENERATION_COUNT = 'model_failures_generation_count'

KEY_MODEL_FAILURES_PARSE_COUNT = 'model_failures_parse_count'

KEY_MODEL_FAILURES_RETRIEVAL_COUNT = 'model_failures_retrieval_count'

KEY_MODEL_PASSES_COUNT = 'model_passes_count'

KEY_RESULT_CHECK_ERR_MSG = 'result_error_message'

KEY_RESULT_CHECK_FAIL = 'model_failures'

KEY_RESULT_CHECK_FAIL_A = 'model_generation_failures'

KEY_RESULT_CHECK_FAIL_P = 'model_parse_failures'

KEY_RESULT_CHECK_FAIL_R = 'model_retrieval_failures'

KEY_RESULT_CHECK_OK = 'model_passes'

KEY_TOTAL_COST = 'total_cost'

KEY_TOTAL_TIME = 'total_time'

LEADERBOARD_METRICS_META = <h2o_sonar.lib.api.commons.MetricsMeta object>

METRIC_META_MODEL_FAILURES = <h2o_sonar.lib.api.commons.MetricMeta object>

METRIC_META_MODEL_GENERATION_FAILURES = <h2o_sonar.lib.api.commons.MetricMeta object>

METRIC_META_MODEL_PARSE_FAILURES = <h2o_sonar.lib.api.commons.MetricMeta object>

METRIC_META_MODEL_PASSES = <h2o_sonar.lib.api.commons.MetricMeta object>

METRIC_META_MODEL_RETRIEVAL_FAILURES = <h2o_sonar.lib.api.commons.MetricMeta object>

METRIC_MODEL_FAILURES = 'model_failures'

METRIC_MODEL_GENERATION_FAILURES = 'model_generation_failures'

METRIC_MODEL_PARSE_FAILURES = 'model_parse_failures'

METRIC_MODEL_PASSES = 'model_passes'

METRIC_MODEL_RETRIEVAL_FAILURES = 'model_retrieval_failures'

add_evalstudio_markdown_format(title='Summary')

add_failure(llm_model_name: str, doc_url, error_message: str, i: str, context: List[str] | None, expected_output: str, output_constraints: List | None, output_condition: str, actual_output: str, actual_output_meta: List | None, duration, cost, fail_retrieval: bool = False, fail_generation: bool = False, fail_parse: bool = False, row_key: str = None, model_key: str = '')

add_json_format(llm_host: LlmModelHostType, metrics_meta: MetricsMeta | None = None, threshold: float | None = None)

Add JSON format for the leaderboard.

Parameters:

llm_hostcommons.LlmModelHostType: LLM model host type.
metrics_metaOptional[commons.MetricsMeta]: Metrics metadata to override leaderboard’s metrics - it is expected that keys are identical, however, caller can customize names, descriptions and other metrics metadata.
thresholdOptional[float]: Threshold for metrics - if not provided, the default metric threshold is used.

add_markdown_format(title='Benchmarks')

add_pass(llm_model_name: str, i: str, context: List[str] | None, duration: float, cost: float, row_key: str, model_key: str = '')

add_total_cost(llm_model_name: str, cost: float)

add_total_time(llm_model_name: str, duration: float)

as_dict() → Dict: All leaderboard data as dictionary.

as_evalstudio_markdown(title: str = 'Summary', top: int = 3) → str

Return Markdown representation of the leaderboard for EvalStudio.

Parameters:

titlestr: Title of the leaderboard.
topint: Number of top model failures, prompt failures, empty context prompts, … entries. 0 for all entries. The motivation is to avoid LONG reports with all failures and prompts, it’s just a summary.

Returns:

str: Markdown representation of the leaderboard.

as_html(title: str = 'RAG Benchmark', include_header: bool = False, include_by_accuracy: bool = True, include_by_time: bool = True, include_by_cost: bool = True, additional_details: Dict | None = None) → str

as_leaderboard_dict(metrics_meta: MetricsMeta | None = None, threshold: float | None = None) → Dict

Create leaderboard dictionary: model -> metric -> value.

By convention, the leaderboard data are always normalized - two options:

<0, 1> range for metrics
<0, 100> range for percentages

There are never absolute values like counts, times or duration.

Parameters:

metrics_metaOptional[commons.MetricsMeta]: Metrics metadata to override leaderboard’s metrics - it is expected that keys are identical, however, caller can customize names, descriptions and other metrics metadata.
thresholdOptional[float]: Threshold for metrics - if not provided, the default metric threshold is used.

as_markdown(title: str = 'Benchmark', extended: bool = True) → str

Markdown representation of the leaderboard.

Parameters:

titlestr: Title of the markdown report.
extendedbool: Extended report (for the h2oGPTe benchmark).

Returns:

str: Markdown representation of the leaderboard.

build(): Analyze, explain, aggregate, and build leaderboard data… so that when HTML representation is built, the leaderboard is ready to be rendered.

check_and_report_negative_cost(cost: float, llm_model_name, i: str, row_key: str, model_key: str) → float: Create a problem for negative cost.

evaluation_cost(): Total evaluation cost.

static from_eval_results(evaluator, eval_results, metrics_meta: MetricsMeta, metric_id_success: str, metric_id_failure_message: str, display_name: str = None, display_category: str = None, key_2_evaluated_model: Dict = None, llm_host: LlmModelHostType = LlmModelHostType.RAG, do_eval_rc: bool = False, logger=None) → LlmBoolLeaderboardExplanation

Create LLM leaderboard explanation from the evaluation results.

Parameters:

evaluator: Evaluator instance.
eval_resultsdatasets.LlmEvalResults: Evaluation results.
metrics_metacommons.MetricsMeta: Metrics metadata.
metric_id_successstr: Metric ID for the success indicator.
metric_id_failure_messagestr: Metric ID for the failure message.
display_namestr: Custom display name.
display_categorystr: Custom display category.
key_2_evaluated_modelDict: Map: key -> RAG/LLM model.
llm_hostcommons.LlmModelHostType: LLM host type - either a RAG (with retrieval) or a LLM (generation only).
do_eval_rcbool: Whether to show retrieval correctness.
logger: Optional logger.

get_insights(insight_type: str = 'accuracy', quality: str = 'accurate', extra_description_actions: str = '', explanation_type: str = '', explanation_name: str = '', explanation_mime: str = '') → None

Create insights for the boolean leaderboard.

Parameters:

insight_typestr: Insight type.
qualitystr: Model quality.
extra_description_actions: str: Additional description for actions.
explanation_typestr: Type of the explanation which can clarify the insight.
explanation_namestr: Name of the explanation which can clarify the insight.
explanation_mimestr: Media type of the explanation which can clarify the insight.

static key_2_rag_type_prefix(evaluated_models) → Dict

sort_models_leaderboard(sort_by: Dict[str, int | float], reverse: bool = True)

sort_prompts_by_empty_ctxs(reverse: bool = True) → List[str]

sort_prompts_by_failures(sort_by: Dict[str, int | float], reverse: bool = True)

static summary_as_markdown(md: str, metrics_count: int, llm_host: LlmModelHostType, m_failures_count: Dict, i_failures_count: Dict, key_2_evaluated_model: Dict, cost_source=None) → str

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LlmClassifierLeaderboardExplanation(evaluator, eval_results, model_2_metrics: Dict, model_2_confusion_matrix: Dict, classes: List[str], false_positives: Dict[str, List[LlmEvalResultRow]], false_negatives: Dict[str, List[LlmEvalResultRow]], i_passes_count: Dict[str, int], metrics_meta: MetricsMeta, key_2_evaluated_model: Dict, llm_host: LlmModelHostType = LlmModelHostType.RAG, display_name: str = '', display_category: str = '', logger=None)

Bases: Explanation, LlmLeaderboardExplanation, AbcHeatmapExplanation

LLM classification leaderboard explanation.

DEFAULT_METRIC_THRESHOLD = 0.75

METRIC_ACCURACY = 'accuracy'

METRIC_F1 = 'f1'

METRIC_META_ACCURACY = <h2o_sonar.lib.api.commons.MetricMeta object>

METRIC_META_F1 = <h2o_sonar.lib.api.commons.MetricMeta object>

METRIC_META_PRECISION = <h2o_sonar.lib.api.commons.MetricMeta object>

METRIC_META_RECALL = <h2o_sonar.lib.api.commons.MetricMeta object>

METRIC_PRECISION = 'precision'

METRIC_RECALL = 'recall'

add_evalstudio_markdown_format(sort_by_metric_id: str, title: str = 'Summary')

add_json_format(threshold: float | None = None) → LlmHeatmapLeaderboardJSonFormat: Add JSon format.

add_markdown_format(sort_by_metric_id: str, title: str = 'Evaluation Report')

as_dict(threshold: float | None = None) → Tuple[Dict, Dict]

Return leaderboard as dictionary.

Parameters:

thresholdOptional[float]: Threshold for metrics - if not provided, the default metric threshold is used.

Returns:

Tuple[Dict, Dict]: Leaderboard data dictionary and metric EDA (min, max, …) dictionary.

as_html(sort_by_metric_id: str, html_src=None, include_failures: bool = True, include_prompts_by_metrics: bool = True, additional_details: Dict | None = None) → str

Create HTML snippet with:

per-metrics heatmap table
per-metrics confusion matrix

as_markdown(sort_by_metric_id: str, title: str = 'Evaluation Report', heading_level: str = '#', include_metrics_leaderboards: bool = True, top: int = 3) → str

Return Markdown representation of the leaderboard for EvalStudio.

Parameters:

sort_by_metric_idstr: Metric ID to sort models by.
titlestr: Title of the leaderboard.
heading_levelstr: Heading level.
include_metrics_leaderboardsbool: Include per-metrics leaderboards.
topint: Number of top model failures, prompt failures, empty context prompts, … entries. 0 for all entries. The motivation is to avoid LONG reports with all failures and prompts, it’s just a summary.

Returns:

str: Markdown representation of the leaderboard.

build(): Build leaderboard.

static from_eval_results(evaluator, eval_results, model_2_metrics: Dict, model_2_confusion_matrix: Dict, classes: List[str], metrics_meta: MetricsMeta, key_2_evaluated_model: Dict, llm_host: LlmModelHostType = LlmModelHostType.RAG, display_name: str = None, display_category: str = None, logger=None) → LlmClassifierLeaderboardExplanation

Create Classification leaderboard explanation from the evaluation results.

Parameters:

evaluator: Evaluator instance.
model_2_metricsDict: Map: model name -> metric ID -> metric value.
model_2_confusion_matrixDict: Map: model name -> confusion matrix.
classesList[str]: List of classes.
eval_resultsdatasets.LlmEvalResults: Evaluation results.
metrics_metacommons.MetricsMeta: Metrics metadata.
key_2_evaluated_modelDict: Map: key -> LLM@RAG/LLM model.
llm_hostcommons.LlmModelHostType: LLM host type - either a RAG (with retrieval) or a LLM (generation only).
display_namestr: Custom leaderboard display name.
display_categorystr: Custom leaderboard display category.
logger: Optional logger.

get_insights(extra_description_best: str = '', extra_description_worst: str = '', insight_type: str = 'accuracy', explanation_type: str = '', explanation_name: str = '', explanation_mime: str = '') → None

Create insights for the classifier leaderboard (based on accuracy metric).

Parameters:

extra_description_best: str: Additional description for insights related to the best models.
extra_description_worst: str: Additional description for insights related to the worst models.
insight_typestr: Insight type.
explanation_typestr: Type of the explanation which can clarify the insight.
explanation_namestr: Name of the explanation which can clarify the insight.
explanation_mimestr: Media type of the explanation which can clarify the insight.

sort_prompts_by_failures(sort_by: Dict[str, int | float], reverse: bool = True)

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LlmEvalResultsExplanation(evaluator, eval_results, display_name: str = None, display_category: str = None)

Bases: Explanation

KEY_EVALUATOR = 'evaluator'

KEY_MODELS = 'models'

KEY_RESULTS = 'results'

add_csv_format(): Add CSV format.

add_datatable_format(): Add datatable format.

add_json_format(): Add JSon format.

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LlmHeatmapLeaderboardExplanation(evaluator, eval_results, metrics_meta: MetricsMeta, key_2_evaluated_model: Dict, llm_host: LlmModelHostType = LlmModelHostType.RAG, nan_tolerance: float = 0.0, display_name: str = '', display_category: str = '', logger=None)

Bases: Explanation, LlmLeaderboardExplanation, AbcHeatmapExplanation

Heatmap leaderboard explanation provides data and formats for a leaderboard which is colorized as heatmap based on metrics values.

LLM_MODEL_ANONYMOUS = 'model'

add_col_value(llm_model_name: str, docs: str, prompt: str, metrics_id: str, value: float, result_row): Add entry to the data dictionary used to build formatted tables later.

add_evalstudio_markdown_format(sort_by_metric_id: str, title: str = 'Summary')

add_json_format(threshold: float | None = None) → LlmHeatmapLeaderboardJSonFormat: Add JSon format.

add_markdown_format(sort_by_metric_id: str, title: str = 'Evaluation Report')

as_dict(threshold: float | None = None) → Tuple[Dict, Dict]

Return leaderboard as dictionary.

Parameters:

thresholdOptional[float]: Threshold for the metrics.

Returns:

Tuple[Dict, Dict]: Leaderboard data dictionary and metric EDA (min, max, …) dictionary.

as_html(sort_by_metric_id: str, html_src=None, include_failures: bool = True, include_prompts_by_metrics: bool = True, additional_details: Dict | None = None) → str

Create HTML snippet with:

metrics heatmap table
failures section: model -> document -> prompt -> [metrics] -> value

as_markdown(sort_by_metric_id: str, title: str = 'Evaluation Report', heading_level: str = '#', top: int = 3) → str

Return Markdown representation of the leaderboard for EvalStudio.

Parameters:

sort_by_metric_idstr: Metric ID to be used as the FIRST one to sort the table. Then the method renders tables for all other metrics (sorted by that particular metric
titlestr: Title of the leaderboard.
heading_levelstr: Markdown title heading level.
topint: Number of top model failures, prompt failures, empty context prompts, … entries. 0 for all entries. The motivation is to avoid LONG reports with all failures and prompts, it’s just a summary.

build(): Analyze, explain, aggregate, and build leaderboard data… so that when HTML representation is built, the leaderboard is ready to be rendered.

static from_eval_results(evaluator, eval_results, metrics_meta: MetricsMeta, key_2_evaluated_model: Dict, llm_host: LlmModelHostType = LlmModelHostType.RAG, nan_tolerance: float = 0.0, display_name: str = None, display_category: str = None, logger=None) → LlmHeatmapLeaderboardExplanation

Create Heatmap leaderboard explanation from the evaluation results.

Parameters:

evaluator: Evaluator instance.
eval_resultsdatasets.LlmEvalResults: Evaluation results.
metrics_metacommons.MetricsMeta: Metadata of the metric to be evaluated.
key_2_evaluated_modelDict: Map: key -> LLM@RAG/LLM model.
llm_hostcommons.LlmModelHostType: LLM host type - either a RAG (with retrieval) or a LLM (generation only).
nan_tolerancefloat: Tolerance for NaN values in the evaluation results.
display_namestr: Custom leaderboard display name.
display_categorystr: Custom leaderboard display category.
logger: Optional logger.

get_insights(metrics_meta: MetricsMeta, metric_id: str = '', metric_name_protection: bool = False, extra_description_best: str = '', extra_description_worst: str = '', insight_type: str = 'accuracy', model_purpose: str = '', explanation_type: str = '', explanation_name: str = '', explanation_mime: str = '') → None

Create insights for the heatmap leaderboard.

Parameters:

metrics_metacommons.MetricsMeta: Metrics metadata.
metric_idstr: Optional metric ID to create insights for. If not specified, then insights are created for the primary metrics as specified by the metrics metadata.
metric_name_protectionbool: If True, then the metric ID is not changed to lowercase.
extra_description_best: str: Additional description for insights related to the best models.
extra_description_worst: str: Additional description for insights related to the worst models.
insight_typestr: Insight type.
model_purposestr: Model purpose.
explanation_typestr: Type of the explanation which can clarify the insight.
explanation_namestr: Name of the explanation which can clarify the insight.
explanation_mimestr: Media type of the explanation which can clarify the insight.

sort_prompts_by_failures(sort_by: Dict[str, int | float], reverse: bool = True)

static truncate(f, n): Truncates a float f to n decimal places without rounding.

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LlmLeaderboardExplanation

Bases: object

static get_leaderboard_data_path(evaluation, evaluator_id, explanation_format: str = 'application/json', metric: str = 'ALL_METRICS')

static markdown_connection_stats_table(evaluated_models_list: List[ExplainableLlmModel])

class h2o_sonar.lib.api.explanations.LlmProcedureEvalLeaderboardExplanation(evaluator, eval_results, metrics_meta: MetricsMeta, key_2_evaluated_model: Dict, llm_host: LlmModelHostType = LlmModelHostType.RAG, display_name: str = '', display_category: str = '', logger=None)

Bases: Explanation, LlmLeaderboardExplanation, AbcHeatmapExplanation

ProcedureEval leaderboard explanation provides data and formats for a leaderboard which is colorized as procedure_eval based on metrics values.

KEY_ALIGNMENT_MATRIX = 'alignment_matrix'

KEY_DYN_PROG_MATRIX = 'dyn_prog_matrix'

LLM_MODEL_ANONYMOUS = 'model'

add_col_value(llm_model_name: str, docs: str, prompt: str, metrics_id: str, value: float, result_row): Add entry to the data dictionary used to build formatted tables later.

add_evalstudio_markdown_format(sort_by_metric_id: str, title: str = 'Summary')

add_json_format(threshold: float | None = None) → LlmHeatmapLeaderboardJSonFormat: Add JSon format.

add_markdown_format(sort_by_metric_id: str, title: str = 'Evaluation Report')

as_dict(threshold: float | None = None) → Tuple[Dict, Dict]

Return leaderboard as dictionary.

Parameters:

thresholdOptional[float]: Threshold for the metrics.

Returns:

Tuple[Dict, Dict]: Leaderboard data dictionary and metric EDA (min, max, …) dictionary.

as_html(sort_by_metric_id: str, html_src=None, include_failures: bool = True, include_prompts_by_metrics: bool = True, additional_details: Dict | None = None) → str

Create HTML snippet with:

metrics procedure_eval table
failures section: model -> document -> prompt -> [metrics] -> value

as_markdown(sort_by_metric_id: str, title: str = 'Evaluation Report', heading_level: str = '#', top: int = 3) → str

Return Markdown representation of the leaderboard for EvalStudio.

Parameters:

sort_by_metric_idstr: Metric ID to be used as the FIRST one to sort the table. Then the method renders tables for all other metrics (sorted by that particular metric
titlestr: Title of the leaderboard.
heading_levelstr: Markdown title heading level.
topint: Number of top model failures, prompt failures, empty context prompts, … entries. 0 for all entries. The motivation is to avoid LONG reports with all failures and prompts, it’s just a summary.

build(): Analyze, explain, aggregate, and build leaderboard data… so that when HTML representation is built, the leaderboard is ready to be rendered.

static from_eval_results(evaluator, eval_results, metrics_meta: MetricsMeta, key_2_evaluated_model: Dict, llm_host: LlmModelHostType = LlmModelHostType.RAG, display_name: str = None, display_category: str = None, logger=None) → LlmProcedureEvalLeaderboardExplanation

Create ProcedureEval leaderboard explanation from the evaluation results.

Parameters:

evaluator: Evaluator instance.
eval_resultsdatasets.LlmEvalResults: Evaluation results.
metrics_metacommons.MetricsMeta: Metrics metadata.
key_2_evaluated_modelDict: Map: key -> LLM@RAG/LLM model.
llm_hostcommons.LlmModelHostType: LLM host type - either a RAG (with retrieval) or a LLM (generation only).
display_namestr: Custom leaderboard display name.
display_categorystr: Custom leaderboard display category.
logger: Optional logger.

get_insights(metrics_meta: MetricsMeta, metric_id: str = '', metric_name_protection: bool = False, extra_description_best: str = '', extra_description_worst: str = '', insight_type: str = 'accuracy', model_purpose: str = '', explanation_type: str = '', explanation_name: str = '', explanation_mime: str = '') → None

Create insights for the procedure_eval leaderboard.

Parameters:

metrics_metacommons.MetricsMeta: Metrics metadata.
metric_idstr: Optional metric ID to create insights for. If not specified, then insights are created for the primary metrics as specified by the metrics metadata.
metric_name_protectionbool: If True, then the metric ID is not changed to lowercase.
extra_description_best: str: Additional description for insights related to the best models.
extra_description_worst: str: Additional description for insights related to the worst models.
insight_typestr: Insight type.
model_purposestr: Model purpose.
explanation_typestr: Type of the explanation which can clarify the insight.
explanation_namestr: Name of the explanation which can clarify the insight.
explanation_mimestr: Media type of the explanation which can clarify the insight.

sort_prompts_by_failures(sort_by: Dict[str, int | float], reverse: bool = True)

static truncate(f, n): Truncates a float f to n decimal places without rounding.

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LocalDataFrameExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

Generic explanation which doesn’t fit any other type.

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LocalDtExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LocalFeatImpExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LocalHtmlSnippetExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LocalNlpLocoExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LocalRuleExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LocalSummaryFeatImpExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LocalTextSnippetExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.LocoExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.ModelValidationResultExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

Model validation result explanation is (archived) tree of directories and documents created by an H2O MV based explainer.

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.NlpTokenizerExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.OnDemandExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

On-demand explanations typically used for ad-hoc local on-demand explainer execution by the explainer executor.

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.PartialDependenceExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

KEYWORD_CAN_ADD_FEATURE = 'can-add-feature'

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.ProxyExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

Proxy explanation is provided by parent explainers.

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.ReportExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

Generic report explanation provides various document formats (like Word, Markdown, …) explanations.

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.SaExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.TextExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.TimeSeriesAppExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

class h2o_sonar.lib.api.explanations.WorkDirArchiveExplanation(explainer, display_name: str = None, display_category: str = None)

Bases: Explanation

Explainer work directory explanation provides various work dir archive representations like zip or tgz.

validate() → bool: Method used to validate (perform sanity check) of the canonical result (frame) produced by the explainer so that it can be subsequently processed without problems, e.g. by grammar of MLI visualization components.

h2o_sonar.lib.api.explanations.diagnose_perturbation_flips(eval_results: LlmEvalResults, metrics_meta: MetricsMeta, key_2_evaluated_model: Dict, logger=None) → Dict[str, Dict[str, FlippedPerturbedTestCase]]

Diagnose perturbation flips.

Returns:

Dict: Map: original row key -> perturbed row key -> FlippedPerturbedTestCase.

h2o_sonar.lib.api.formats module

class h2o_sonar.lib.api.formats.CsvFormatCustomExplanationFormat(explanation, frame: Frame, frame_file: str, persistence: Persistence | None = None)

Bases: ExplanationFormat

add_data(format_data: Frame, file_name: str | None = None)

Add TEXT data as new explanation representation file. Child classes with binary data to override this class.

Parameters:

format_data:: Data to store as new explanation’s format file.
file_name: str: Representation file name or file relative path.

get_data(file_name: str | None = None)

mime: str = 'text/csv'

class h2o_sonar.lib.api.formats.CustomArchiveZipFormat(explanation, format_file: str, persistence: Persistence | None = None)

Bases: ExplanationFormat, GrammarOfMliFormat

Custom ZIP archive representation.

mime: str = 'application/zip'

class h2o_sonar.lib.api.formats.CustomCsvFormat(explanation, frame: Frame, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Representation of custom JSon format.

mime: str = 'text/csv'

static validate_data(json_data: str) → str

class h2o_sonar.lib.api.formats.CustomJsonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Representation of custom JSon format.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') → dict

Load index file and check parameters.

Returns:

dict:: Index file as dictionary.

mime: str = 'application/json'

static validate_data(json_data: str) → str

class h2o_sonar.lib.api.formats.DatatableCustomExplanationFormat(explanation, frame: Frame, frame_file: str, persistence: Persistence | None = None)

Bases: ExplanationFormat

add_data(format_data: Frame, file_name: str | None = None)

Add TEXT data as new explanation representation file. Child classes with binary data to override this class.

Parameters:

format_data:: Data to store as new explanation’s format file.
file_name: str: Representation file name or file relative path.

get_data(file_name: str | None = None)

mime: str = 'application/vnd.h2oai.datatable.jay'

class h2o_sonar.lib.api.formats.DiaTextFormat(explanation, format_data: str, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Disparate Impact Analysis (DIA) text representation.

mime: str = 'text/plain'

static validate_data(dt_data: Frame)

class h2o_sonar.lib.api.formats.DocxFormat(explanation, format_file: str, persistence: Persistence | None = None)

Bases: ExplanationFormat, GrammarOfMliFormat

Open docx document.

mime: str = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'

static validate_data(dt_data: Frame)

class h2o_sonar.lib.api.formats.EvalStudioMarkdownFormat(explanation, format_file: str, extra_format_files: List | None = None, persistence: Persistence | None = None)

Bases: MarkdownFormat

EvalStudio Markdown representation (text and images).

mime: str = 'application/vnd.h2oai-evalstudio-leaderboard.markdown'

class h2o_sonar.lib.api.formats.ExplanationFormat(explanation, format_data, format_file: str | None, extra_format_files: List | None = None, file_extension: str = 'bin', persistence: Persistence | None = None)

Bases: ABC

Base class of explanation representation.

Representation is serialization of explanation in a format like JSon or CSV. Representation has a MIME type. It can be formed by one or more files, but at least one file must be provided.

DEFAULT_PAGE_SIZE = 20

FEATURE_TYPE_CAT = 'categorical'

FEATURE_TYPE_CAT_NUM = 'catnum'

FEATURE_TYPE_DATE = 'date'

FEATURE_TYPE_DATETIME = 'datetime'

FEATURE_TYPE_NUM = 'numeric'

FEATURE_TYPE_TIME = 'time'

FILE_PREFIX_EXPLANATION_IDX = 'explanation.'

KEYWORD_RESIDUALS = 'residuals'

KEY_ACTION = 'action'

KEY_ACTION_TYPE = 'action_type'

KEY_ACTUAL = 'actual'

KEY_BIAS = 'bias'

KEY_CATEGORICAL = 'categorical'

KEY_DATA = 'data'

KEY_DATA_HISTOGRAM = 'data_histogram'

KEY_DATA_HISTOGRAM_CAT = 'data_histogram_categorical'

KEY_DATA_HISTOGRAM_NUM = 'data_histogram_numerical'

KEY_DATE = 'date'

KEY_DATE_TIME = 'datetime'

KEY_DEFAULT_CLASS = 'default_class'

KEY_DOC = 'documentation'

KEY_EXPLAINER_JOB_KEY = 'explainer_job_key'

KEY_FEATURES = 'features'

KEY_FEATURE_TYPE = 'feature_type'

KEY_FEATURE_VALUE = 'feature_value'

KEY_FILES = 'files'

KEY_FILES_DETAILS = 'files_details'

KEY_FILES_NUMCAT_ASPECT = 'files_numcat_aspect'

KEY_FULLNAME = 'full_name'

KEY_ID = 'id'

KEY_IS_MULTI = 'is_multinomial'

KEY_ITEM_ORDER = 'order'

KEY_KEYWORDS = 'keywords'

KEY_LABEL = 'label'

KEY_METADATA = 'metadata'

KEY_METRICS = 'metrics'

KEY_MIME = 'mime'

KEY_MLI_KEY = 'mli_key'

KEY_NAME = 'name'

KEY_NUMERIC = 'numeric'

KEY_ON_DEMAND = 'on_demand'

KEY_ON_DEMAND_PARAMS = 'on_demand_params'

KEY_PAGE_OFFSET = 'page_offset'

KEY_PAGE_SIZE = 'page_size'

KEY_RAW_FEATURES = 'raw_features'

KEY_ROWS_PER_PAGE = 'rows_per_page'

KEY_RUNNING_ACTION = 'running-action'

KEY_SCOPE = 'scope'

KEY_SYNC_ON_DEMAND = 'synchronous_on_demand_exec'

KEY_TIME = 'time'

KEY_TOTAL_ROWS = 'total_rows'

KEY_VALUE = 'value'

KEY_Y_FILE = 'y_file'

LABEL_REGRESSION = 'None (Regression)'

SCOPE_GLOBAL = 'global'

SCOPE_LOCAL = 'local'

add_data(format_data: str, file_name: str | None = None)

Add TEXT data as new explanation representation file. Child classes with binary data to override this class.

Parameters:

format_data:: Data to store as new explanation’s format file.
file_name: str: Representation file name or file relative path.

add_file(format_file: str, file_name: str | None = None) → str

Copy file to representation as new explanation representation file.

Parameters:

format_file:: Source file to store (copy) as new explanation’s format file.
file_name: str: Representation file name or file relative path.

property explanation

property file_names: List[str]

Get file names which form the representation.

Hints:

representation is formed by flat structure of files without directories
representation data are not kept in memory - list of file names is sufficient

get_data(file_name: str | None = None)

classmethod get_local_explanation(persistence: ExplainerPersistence, explanation_type: str, row: int, explanation_filter: List[FilterEntry], **extra_params) → str: Get local explanation for given dataset row and feature/class/… specified by explanation filter. Local explanation is returned as string.

classmethod get_page(persistence: ExplainerPersistence, explanation_type: str, page_offset: int, page_size: int, result_format: str, explanation_filter: List[FilterEntry]) → str: Get global explanation page.

property index_file_name: str: Get (mandatory) index file name which typically references all other files along with various metadata.

classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) → Tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:

bool:: True if the representation is calculated on demand.
dict:: On-demand calculation parameters.

classmethod is_paged(persistence: ExplainerPersistence, explanation_type: str) → bool: Returns True in case that representation supports paging.

classmethod load_meta(persistence: ExplainerPersistence, explanation_type: str, explanation_format: str) → dict: Load representation metadata with class identifier and MIME.

mime: str = None

update_data(format_data: str, file_name: str | None = None)

class h2o_sonar.lib.api.formats.ExplanationFormatUtils

Bases: object

static get_page(data, page_offset: int, page_size: int)

Get page of given data.

Parameters:

data:: Data to page.
page_offset: int: Positive integer or 0 with page offset.
page_size: int: Page size, returns all data entries if 0 or negative integer.

class h2o_sonar.lib.api.formats.Global3dDataJSonCsvFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Representation of global 3D data (3D bar charts, heatmaps, …) as CSV files with JSon index.

JSon representation index file example:

{
    "features": {
        "PAY_0 and AGE": {
            "order": 0,
            "feature_names: ["PAY_0", "AGE"],
            "files": {
                "red_class": "data3d_feature_0_class_0.csv"
                "green_class": "data3d_feature_0_class_1.csv"
                "blue_class": "data3d_feature_0_class_2.csv"
            }
        },
        ...
    },
    "metrics": [{"R2": 0.96}, {"RMSE": 0.03}],
    "documentation": "PD for 2 features..."
}

CSV representation data file example:

,feature_1_bin_1,feature_1_bin_2,feature_1_bin_3
feature_2_bin_1,1,1,1
feature_2_bin_2,2,2,2
feature_2_bin_3,3,3,3

KEY_FEATURE_NAMES = 'feature_names'

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') → Dict

mime: str = 'application/vnd.h2oai.json+csv'

static serialize_index_file(features: List[str], features_names: List[List[str]], classes: List[str], default_class: str = '', metrics: list | None = None, keywords: list | None = None, doc: str = '', data_file_prefix: str = 'data3d', data_file_suffix: str = 'csv', y_file: str | None = None) → Tuple[Dict, str]

JSon index file serialization to string.

Parameters:

featureslist: Feature tuples.
features_nameslist: Per-feature tuple feature names.
classeslist: Classes.
default_classstr: Class to be shown as default (the first one) e.g. the class of interest in case of binomial experiment interpretation.
metricslist: Optional list of metrics e.g. [{"RMSE": 0.02}, {"SD": 3.1}]
keywordsList[str]: Optional list of keywords indicating representation features, properties and aspects.
docstr: Chart documentation.
data_file_prefixstr: Prefix for data file names.
data_file_suffixstr: Suffix for data file names.
y_filestr: Predictions file.

Returns:

Tuple[dict, str]: Dictionary with mapping of features and classes to file names AND JSon serialization (as string).

static validate_data(json_data: str) → str

class h2o_sonar.lib.api.formats.Global3dDataJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Representation of global 3D data (3D bar charts, heatmaps, …) as JSon.

JSon representation index file example:

{
    "features": {
        "PAY_0 and AGE": {
            "order": 0,
            "feature_names: ["PAY_0", "AGE"],
            "files": {
                "red_class": "data3d_feature_0_class_0.json"
                "green_class": "data3d_feature_0_class_1.json"
                "blue_class": "data3d_feature_0_class_2.json"
            }
        },
        ...
    },
    "metrics": [{"R2": 0.96}, {"RMSE": 0.03}],
    "documentation": "PD for 2 features..."
}

JSon representation data file example:

"data_dictionary": {
    {
        "feature_1_bin_1": {
            "feature_2_bin_1": 1,
            "feature_2_bin_2": 2,
            "feature_2_bin_3": 3
        },
        "feature_1_bin_2": {
            "feature_2_bin_1": 1,
            "feature_2_bin_2": 2,
            "feature_2_bin_3": 3
        },
        "feature_1_bin_3": {
            "feature_2_bin_1": 1,
            "feature_2_bin_2": 2,
            "feature_2_bin_3": 3
        }
    }
}

Where:

data_dictionary is dictionary which might be used to easily construct data frame where column and row labels represent bin values
data key is not intentionally used to be used in the future for Grammar of MLI/Vega friendly representations (like in case of other formats).

KEY_FEATURE_NAMES = 'feature_names'

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') → Dict

mime: str = 'application/json'

static serialize_index_file(features: List[str], features_names: List[List[str]], classes: List[str], default_class: str = '', metrics: list | None = None, keywords: list | None = None, doc: str = '', data_file_prefix: str = 'data3d', data_file_suffix: str = 'json', y_file: str | None = None) → Tuple[Dict, str]

JSon index file serialization to string.

Parameters:

featureslist: Feature tuples.
features_nameslist: Per-feature tuple feature names.
classeslist: Classes.
default_classstr: Class to be shown as default (the first one) e.g. the class of interest in case of binomial experiment interpretation.
metricslist: Optional list of metrics e.g. [{"RMSE": 0.02}, {"SD": 3.1}]
keywordsList[str]: Optional list of keywords indicating representation features, properties and aspects.
docstr: Chart documentation.
data_file_prefixstr: Prefix for data file names.
data_file_suffixstr: Suffix for data file names.
y_filestr: Predictions file.

Returns:

Tuple[dict, str]: Dictionary with mapping of features and classes to file names AND JSon serialization (as string).

static validate_data(json_data: str) → str

class h2o_sonar.lib.api.formats.GlobalDtJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat, GrammarOfMliFormat

Representation of decision tree as JSon.

JSon representation index file example:

{
    "files": {
        "red_class": "dt_class_0.json"
        "green_class": "dt_class_1.json"
        "blue_class": "dt_class_2.json"
        ...
    },
    "metrics": [
      {"Training RMSE": 0.96},
      {"CV RMSE": 0.97},
      {"NFolds": 3},
      {"R2": 0.96}
    ]
}

JSon representation data file example:

{
     data: [
         {
           key: str,
           name: str,
           parent: str,
           edge_in: str,
           edge_weight: num,
           leaf_path: bool,
           total_weight: num,
           weight: num,
         }+
     ]
}

KEY_CHILDREN = 'children'

KEY_EDGE_IN = 'edge_in'

KEY_EDGE_WEIGHT = 'edge_weight'

KEY_KEY = 'key'

KEY_LEAF_PATH = 'leaf_path'

KEY_NAME = 'name'

KEY_PARENT = 'parent'

KEY_TOTAL_WEIGHT = 'total_weight'

KEY_WEIGHT = 'weight'

Bases: object

to_dict() → dict

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') → dict

mime: str = 'application/json'

static serialize_data_file(dt_root_node) → str

JSon data file serialization to string.

Parameters:

dt_root_node: TreeNode: Object representation root node.

Returns:

str:: Data file serialization.

static serialize_index_file(classes: List[str], default_class: str = '', metrics: List | Dict | None = None, doc: str = '', data_file_prefix: str = 'dt', data_file_suffix: str = 'json') → Tuple[Dict, str]

JSon index file serialization to string.

Parameters:

classes: list: Classes.
default_class: str: Class to be shown as default (the first one) e.g. the class of interest in case of binomial experiment interpretation.
metrics: list: Optional list of PD related metrics e.g. [{"RMSE": 0.02}, {"SD": 3.1}] in case of binomial/regression or dictionary (per class key, metrics list as value) in case of multinomial.
doc: str: Documentation.
data_file_prefix: str: Prefix for data file names.
data_file_suffix: str: Suffix for data file names.

Returns:

Tuple[dict, str]:: Dictionary with mapping of classes to file names AND JSon serialization (as string).

static validate_data(json_data: str) → str

class h2o_sonar.lib.api.formats.GlobalFeatImpDatatableFormat(explanation, frame: Frame, frame_file: str | None, persistence: Persistence | None = None)

Bases: DatatableCustomExplanationFormat

Global feature importance datatable representation.

Canonical representation (datatable frame, ltypes):

| Required column    | Type  | Description        |
|--------------------|-------|--------------------|
| feature_name       | str   | Feature name.      |
| feature_importance | real  | Feature importance |

… other optional columns are allowed

COL_IMPORTANCE = 'feature_importance'

COL_NAME = 'feature_name'

static from_lists(explanation, features: list, importances: list)

mime: str = 'application/vnd.h2oai.datatable.jay'

static validate_data(frame_data: Frame) → Frame

class h2o_sonar.lib.api.formats.GlobalFeatImpJSonCsvFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: GlobalFeatImpJSonDatatableFormat

static from_json_datatable(json_dt_format: GlobalFeatImpJSonDatatableFormat) → GlobalFeatImpJSonCsvFormat

mime: str = 'application/vnd.h2oai.json+csv'

static serialize_index_file(classes: List[str], default_class: str = '', metrics: list | None = None, doc: str = '', total_rows: int | None = None, data_file_prefix: str = 'feature_importance', data_file_suffix: str = 'csv') → Tuple[Dict, str]

static validate_data(json_data)

class h2o_sonar.lib.api.formats.GlobalFeatImpJSonDatatableFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Global feature importance JSon (index file) and datatable (data files) representation.

The typical use of JSon+datatable feature importance representation:

featImpJsonDt = GlobalFeatImpJSonDatatableFormat(...create...)
# ... get other representations for free:
featImpJSon = GlobalFeatImpJSonFormat.fromJSonDatatable(featImpJsonDt)
featImpJSonCsv = GlobalFeatImpJSonCsvSonFormat.fromJSonDatatable(featImpJsonDt)

JSon representation index file example:

{
    "files": {
        "red_class": "feature_importance_class_0.jay"
        "green_class": "feature_importance_class_1.jay"
        "blue_class": "feature_importance_class_2.jay"
        ...
    },
    "metrics": [{"R2": 0.96}, {"RMSE": 0.03}],
    "total_rows": 592,
}

Datatable representation data file spec (datatable frame, ltypes; other optional columns are allowed):

| Required column    | Type  | Description                           |
|--------------------|-------|---------------------------------------|
| feature_name       | str   | Feature name.                         |
| feature_importance | real  | Feature importance                    |
| global_scope       | bool  | Global/local feature importance scope |

Datatable representation data file example:

   | feature_name  feature_importance  global_scope
-- + ------------  ------------------  ------------
 0 | feature-a                    1.1             1
 1 | feature-b                    2.2             1

COL_GLOBAL_SCOPE = 'global_scope'

COL_IMPORTANCE = 'feature_importance'

COL_NAME = 'feature_name'

add_data_frame(format_data: Frame, file_name: str | None = None)

static dict_to_data_frame(feature_importances: Dict[str, float], scope: str = 'global') → Frame

(Typical) feature importance dictionary to data frame conversion.

Parameters:

feature_importances: dict: Feature importances as dictionary of feature name to importance.
scope: str: global or local.

Returns:

dt.Frame:: Data file.

static from_lists(explanation, features: list, importances: list)

get_data(file_name: str | None = None)

classmethod get_page(persistence: ExplainerPersistence, explanation_type: str, page_offset: int, page_size: int, result_format: str, explanation_filter: List[FilterEntry]) → str: Get global explanation page.

classmethod is_paged(persistence: ExplainerPersistence, explanation_type: str) → bool: Returns True in case that representation supports paging.

mime: str = 'application/vnd.h2oai.json+datatable.jay'

static serialize_index_file(classes: List[str], default_class: str = '', metrics: list | None = None, doc: str = '', total_rows: int | None = None, data_file_prefix: str = 'feature_importance', data_file_suffix: str = 'jay') → Tuple[Dict, str]

static validate_data(json_data)

class h2o_sonar.lib.api.formats.GlobalFeatImpJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat, GrammarOfMliFormat

Representation of global feature importance explanation as JSon.

JSon representation index file example:

{
    "files": {
        "red_class": "feature_importance_class_0.json"
        "green_class": "feature_importance_class_1.json"
        "blue_class": "feature_importance_class_2.json"
        ...
    },
    "metrics": [{"R2": 0.96}, {"RMSE": 0.03}],
    "total_rows": 592,
}

JSon representation data file example:

{
    data: [
        {
            label: str,
            value: num,
            scope: str,
        }+
    ]
    bias: num
}

Where:

label is feature name
value is feature importance
scope is either local or global

KEY_LABEL = 'label'

KEY_VALUE = 'value'

static from_dataframe_to_json(frame: Frame, bias_col: str = None) → str

static from_json_datatable(json_dt_format: GlobalFeatImpJSonDatatableFormat, bias_col: str = None) → GlobalFeatImpJSonFormat

classmethod get_global_explanation(persistence: ExplainerPersistence, explanation_type: str) → str

Get global feature importance explanation.

Parameters:

persistence:: Persistence object initialized for explainer/MLI run.
explanation_type: str: Explanation type ~ explanation ID.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') → dict

Load index file and check parameters.

Returns:

dict:: Index file as dictionary.

mime: str = 'application/json'

static serialize_data_file(feature_importances: Dict[str, float], scope: str = 'global', bias: float | None = None) → str

JSon data file serialization to string.

Parameters:

feature_importances: dict: Feature importances as dictionary of feature name to importance.
scope: str: global or local.
bias: optional str: Bias value.

Returns:

str:: Data file serialization.

static serialize_index_file(classes: List[str], default_class: str = '', metrics: list | None = None, keywords: list | None = None, doc: str = '', total_rows: int | None = None, data_file_prefix: str = 'feature_importance', data_file_suffix: str = 'json') → Tuple[Dict, str]

JSon index file serialization to string.

Parameters:

classes: list: Classes.
default_class: str: Class to be shown as default (the first one) e.g. the class of interest in case of binomial experiment interpretation.
metrics: list: Optional list of PD related metrics e.g. [{"RMSE": 0.02}, {"SD": 3.1}].
keywordsList[str]: Optional list of keywords indicating representation features, properties and aspects.
doc: str: Documentation.
total_rows: int: Total number of rows (which can be used for pagination).
data_file_prefix: str: Prefix for data file names.
data_file_suffix: str: Suffix for data file names.

Returns:

Tuple[dict, str]:: Dictionary with mapping of classes to file names AND JSon serialization (as string).

static validate_data(json_data: str) → str

class h2o_sonar.lib.api.formats.GlobalGroupedBarChartJSonDatatableFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Global grouped bar chart JSon (index file) and datatable (data files) representation.

COL_X = 'x'

COL_Y_GROUP_1 = 'y_group_1'

COL_Y_GROUP_2 = 'y_group_2'

add_data_frame(format_data: Frame, file_name: str | None = None)

get_data(file_name: str | None = None)

classmethod is_paged(persistence: ExplainerPersistence, explanation_type: str) → bool: Returns True in case that representation supports paging.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/vnd.h2oai.json+datatable.jay') → Dict

mime: str = 'application/vnd.h2oai.json+datatable.jay'

static serialize_index_file(classes: List[str], default_class: str = '', metrics: list | None = None, doc: str = '', total_rows: int | None = None, data_file_prefix: str = 'feature_importance', data_file_suffix: str = 'jay') → Tuple[Dict, str]

static validate_data(json_data)

class h2o_sonar.lib.api.formats.GlobalLinePlotJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None): Bases: GlobalFeatImpJSonFormat, GrammarOfMliFormat

class h2o_sonar.lib.api.formats.GlobalNlpLocoJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat, GrammarOfMliFormat

Representation of global feature importance explanation as JSon.

JSon representation index file example:

{
    "files": {
        "red_class": "feature_importance_class_0.json"
        "green_class": "feature_importance_class_1.json"
        "blue_class": "feature_importance_class_2.json"
        ...
    },
    "filters": [
        {
            "type": "text_features",
            "name": "TEXT FEATURES",
            "description": "Model text features",
            "values": ["description", "review"]
        }
    ],
    "metrics": [{"R2": 0.96}, {"RMSE": 0.03}],
    "total_rows": 592,
}

JSon representation data file example:

{
    data: [
        {
            label: str,
            value: num,
            scope: str,
        }+
    ]
    bias: num
}

Where:

label is feature name
value is feature importance
scope is either local or global

FILTER_TYPE_TEXT_FEATURES = 'text_feature'

KEY_DESCRIPTION = 'description'

KEY_FILTERS = 'filters'

KEY_LABEL = 'label'

KEY_NAME = 'name'

KEY_TYPE = 'type'

KEY_VALUE = 'value'

KEY_VALUES = 'values'

static from_dataframe_to_json(frame: Frame, bias_col: str = None) → str

static from_json_datatable(json_dt_format: GlobalFeatImpJSonDatatableFormat, bias_col: str = None) → GlobalFeatImpJSonFormat

classmethod get_global_explanation(persistence: ExplainerPersistence, explanation_type: str) → str

Get global feature importance explanation.

Parameters:

persistence:: Persistence object initialized for explainer/MLI run.
explanation_type: str: Explanation type ~ explanation ID.

classmethod get_page(persistence: ExplainerPersistence, explanation_type: str, page_offset: int, page_size: int, result_format: str, explanation_filter: List[FilterEntry]) → str: Get global explanation page.

classmethod is_paged(persistence: ExplainerPersistence, explanation_type: str) → bool: Returns True in case that representation supports paging.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') → dict

Load index file and check parameters.

Returns:

dict:: Index file as dictionary.

mime: str = 'application/json'

static serialize_data_file(feature_importances: Dict[str, float], scope: str = 'global', bias: float | None = None) → str

JSon data file serialization to string.

Parameters:

feature_importances: dict: Feature importances as dictionary of feature name to importance.
scope: str: global or local.
bias: optional str: Bias value.

Returns:

str:: Data file serialization.

static serialize_index_file(classes: List[str], default_class: str = '', filters: list | None = None, metrics: list | None = None, keywords: list | None = None, doc: str = '', total_rows: int | None = None, data_file_prefix: str = 'feature_importance', data_file_suffix: str = 'json') → Tuple[Dict, str]

JSon index file serialization to string.

Parameters:

classes: list: Classes.
default_class: str: Class to be shown as default (the first one) e.g. the class of interest in case of binomial experiment interpretation.
filters: list: Optional list of per-filter items used to filter data entries.
metrics: list: Optional list of PD related metrics e.g. [{"RMSE": 0.02}, {"SD": 3.1}].
keywordsList[str]: Optional list of keywords indicating representation features, properties and aspects.
doc: str: Documentation.
total_rows: int: Total number of rows (which can be used for pagination).
data_file_prefix: str: Prefix for data file names.
data_file_suffix: str: Suffix for data file names.

Returns:

Tuple[dict, str]:: Dictionary with mapping of classes to file names AND JSon serialization (as string).

static validate_data(json_data: str) → str

class h2o_sonar.lib.api.formats.GlobalScatterPlotJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None): Bases: GlobalFeatImpJSonFormat, GrammarOfMliFormat

class h2o_sonar.lib.api.formats.GlobalSummaryFeatImpJsonDatatableFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat, GrammarOfMliFormat

Representation of global summary feature importance explanation as JSon.

JSon representation index file example:

{
    "files": {
        "red_class": "feature_importance_summary_class_0.jay"
        "green_class": "feature_importance_summary_class_1.jay"
        "blue_class": "feature_importance_summary_class_2.jay"
        ...
    },
    "metrics": [{"R2": 0.96}, {"RMSE": 0.03}],
    "total_rows": 25,
}

Where:

total_rows is number of features.

Getting data file:

> datatable.fread("feature_importance_summary_class_2.jay")

JSon representation data file example:

   |  feature   shapley_value   count   avg_high_value   clazz   order
-- + --------- --------------- ------- ---------------- ------- -------
 0 |  PAY_0      0.390716        0      0.390716         "red"   0
 1 |  PAY_0     -0.386815       25      0.38681          "red"   0
 ...
 . |  AGE        0.425908       17      0.425908         "red"   1
 ...

Where:

feature is feature name (y-axis)
shapley_value is Shapley value (x-axis)
count frequency of the Shapley value (height, normalized to [0, 1])
avg_high_value average feature value height (color) normalized to [0, 1] (if feature value is low, it’s 0, if it’s high, then it’s 1) in case of numerical features, None in case of categorical features.
order feature order to ensure “order by feature importance” paging

KEY_FEATURE = 'feature'

KEY_FREQUENCY = 'count'

KEY_HIGH_VALUE = 'avg_high_value'

KEY_ORDER = 'order'

KEY_SHAPLEY = 'shapley_value'

add_data_frame(format_data: Frame, file_name: str | None = None)

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/vnd.h2oai.json+datatable.jay') → dict

Load index file and check parameters.

Returns:

dict:: Index file as dictionary.

mime: str = 'application/vnd.h2oai.json+datatable.jay'

static serialize_index_file(classes: List[str], default_class: str = '', metrics: list | None = None, keywords: list | None = None, doc: str = '', total_rows: int | None = None, data_file_prefix: str = 'summary_feature_importance', data_file_suffix: str = 'jay') → Tuple[Dict, str]

JSon index file serialization to string.

Parameters:

classes: list: Classes.
default_class: str: Class to be shown as default (the first one) e.g. the class of interest in case of binomial experiment interpretation.
metrics: list: Optional list of PD related metrics e.g. [{"RMSE": 0.02}, {"SD": 3.1}].
keywordsList[str]: Optional list of keywords indicating representation features, properties and aspects.
doc: str: Documentation.
total_rows: int: Total number of rows (which can be used for pagination).
data_file_prefix: str: Prefix for data file names.
data_file_suffix: str: Suffix for data file names.

Returns:

Tuple[dict, str]:: Dictionary with mapping of classes to file names AND JSon serialization (as string).

static validate_data(json_data: str) → str

class h2o_sonar.lib.api.formats.GlobalSummaryFeatImpJsonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat, GrammarOfMliFormat

Representation of global summary feature importance explanation as JSon.

JSon representation index file example:

{
    "files": {
        "red_class": {
            "0": "feature_importance_class_0_offset_0.json",
            "10": "feature_importance_class_0_offset_10.json",
            "20": "feature_importance_class_0_offset_20.json"
        },
        "green_class": {
            ...
        },
        "blue_class":  {
            "0": "feature_importance_class_2_offset_0.json",
            "10": "feature_importance_class_2_offset_10.json",
            "20": "feature_importance_class_2_offset_20.json"
        },
        ...
    },
    "metrics": [{"R2": 0.96}, {"RMSE": 0.03}],
    "total_rows": 25,
    "rows_per_page": 10
}

Where:

Every class dictionary has per-page offset key with the JSon file containing chart for given page. Offset is based on the number of rows (features) per page.
total_rows is number of features.
rows_per_page is number of features in every file (created per page)

JSon representation data file example:

{
    data: [
        {
            feature: str,
            shapley_value: num,
            count: num,
            avg_high_value: num,
            order: num,
        }+
    ]
}

Where:

feature is feature name (y-axis)
shapley_value is Shapley value (x-axis)
count frequency of the Shapley value (height, normalized to [0, 1])
avg_high_value average feature value height (color) normalized to [0, 1] (if feature value is low, it’s 0, if it’s high, then it’s 1) in case of numerical features, None in case of categorical features.
order is feature order (global feature importance).

DATA_FILE_PREFIX = 'summary_feature_importance'

DEFAULT_PAGE_SIZE = 10

KEY_FEATURE = 'feature'

KEY_FEATURES_PER_PAGE = 'features_per_page'

KEY_FREQUENCY = 'count'

KEY_HIGH_VALUE = 'avg_high_value'

KEY_ORDER = 'order'

KEY_SHAPLEY = 'shapley_value'

static from_json_datatable(json_dt_format: GlobalSummaryFeatImpJsonDatatableFormat, page_size: int, total_rows: int = -1, persistence: Persistence | None = None, index_extensions: Dict | None = None) → Tuple[GlobalSummaryFeatImpJsonFormat, dict]

classmethod get_page(persistence: ExplainerPersistence, explanation_type: str, page_offset: int, page_size: int, result_format: str, explanation_filter: List[FilterEntry]) → str: Representation expect JSon+datatable representation to exist and use it to construct the page as expected

classmethod is_paged(persistence: ExplainerPersistence, explanation_type: str) → bool: Returns True in case that representation supports paging.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') → dict

mime: str = 'application/json'

static serialize_data_file(feature_importances: Dict[str, float], scope: str = 'global', bias: float | None = None) → str

JSon data file serialization to string.

Parameters:

feature_importances: dict: Feature importances as dictionary of feature name to importance.
scope: str: global or local.
bias: optional str: Bias value.

Returns:

str:: Data file serialization.

static serialize_index_file(classes: List[str], default_class: str = '', metrics: list | None = None, keywords: list | None = None, doc: str = '', total_rows: int | None = None, rows_per_page: int | None = None, data_file_prefix: str = 'summary_feature_importance', data_file_suffix: str = 'json') → Tuple[Dict, str]

static validate_data(json_data: str) → str

class h2o_sonar.lib.api.formats.GrammarOfMliFormat

Bases: object

Format class which is child of Grammar of MLI format class is supported in H2O Eval Studio UI - there is UI component which will render such format in an (interactive) chart.

classmethod is_grammar_of_mli() → bool: Will representation be rendered in UI?

class h2o_sonar.lib.api.formats.HtmlFormat(explanation, format_data: str, format_file: str | None = None, extra_format_files: List | None = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

HTML representation.

Example local (single row) on-demand NLP HTML explanation:

<feature-text min="-10.0" max="5.0">
  Sentence with <word value="-0.9485">dummy word</word>.
</feature-text>

ATT_MAX = 'max'

ATT_MIN = 'min'

ATT_VALUE = 'value'

EL_FEATURE_TEXT = 'feature-text'

EL_WORD = 'word'

MINIMAL_HTML = "<!DOCTYPE html>\n<html lang='en'><head></head><body></body></html>"

classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) → Tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:

bool:: True if the representation is calculated on demand.
dict:: On-demand calculation parameters.

mime: str = 'text/html'

static validate_data(dt_data: Frame)

class h2o_sonar.lib.api.formats.IceCsvFormat(explanation, frame: Frame, frame_file: str = None, persistence: Persistence | None = None)

Bases: CsvFormatCustomExplanationFormat

mime: str = 'text/csv'

static validate_data(dt_data: Frame)

class h2o_sonar.lib.api.formats.IceDatatableFormat(explanation, frame: Frame, frame_file: str = None, persistence: Persistence | None = None)

Bases: DatatableCustomExplanationFormat

Individual conditional explanation as datatable.

Canonical representation (datatable frame, ltypes) for 1D ICE:

| Required column    | Type  | Description            |
|--------------------|-------|------------------------|
| feature_name       | str   | Feature name.          |
| feature_type       | str   | Feature type.          |
| instance_id        | int   | Instance.              |
| bin_value          | str   | Bin value.             |
| prediction         | real  | Prediction.            |

Hints:

bin_value is converted to string (can be converted back using feature_type)

… other optional columns are allowed

COL_BIN_VALUE = 'bin_value'

COL_F_LTYPE = 'feature_type'

COL_F_NAME = 'feature_name'

COL_INSTANCE = 'instance'

COL_PREDICTION = 'prediction'

mime: str = 'application/vnd.h2oai.datatable.jay'

static validate_data(dt_data: Frame)

class h2o_sonar.lib.api.formats.IceJsonDatatableFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat, GrammarOfMliFormat

Individual conditional explanation as per-feature and class datatable frames with JSon index file.

JSon representation index file example:

{
    "features": {
        "PAY_0": {
            "order": 0,
            "feature_type": ["categorical"],
            "files": {
                "rec_class": "ice_feature_0_class_0.jay"
                "blue_class": "ice_feature_0_class_1.jay"
                "white_class": "ice_feature_0_class_2.jay"
            }
        },
        ...
    },
    "metrics": [{"RMSE": 0.03}],
    "y_file": "y_hat.jay",
    "on_demand": false
}

or (if on demand e.g. in case of sampled dataset):

{
    "on_demand": true
    "on_demand_parameters": ...
}

Datatable representation data file example:

> datatable.fread("ice_feature_0_class_0.jay")

   |       -2        -1         0         1         2         7
-- + --------  --------  --------  --------  --------  --------
 0 | 0.390716  0.390716  0.390716  0.390716  0.531548  0.531548
 1 | 0.38681   0.38681   0.38681   0.38681   0.508216  0.508216
 2 | 0.425908  0.425908  0.425908  0.425908  0.536061  0.536061
 ...

Remarks:

y_file … datatable frame with predictions for every X dataset instance
on_demand … true if there is no cached ICE and it must be computed

FILE_Y_FILE = 'y_hat.jay'

KEY_BIN = 'bin'

KEY_BINS = 'bins'

KEY_BINS_NUMCAT_ASPECT = 'bins_numcat_aspect'

KEY_COL_NAME = 'column_name'

KEY_FEATURE_VALUE = 'feature_value'

KEY_ICE = 'ice'

KEY_PREDICTION = 'prediction'

add_data_frame(format_data: Frame, file_name: str | None = None)

classmethod get_local_explanation(persistence: ExplainerPersistence, explanation_type: str, dataset_path: str, row: int, explanation_filter: List[FilterEntry], **extra_params) → str

Get ICE.

Parameters:

persistence

Persistence object initialized for explainer/MLI run.

explanation_typestr

Explanation type ~ explanation ID.

dataset_path

Dataset path.

row: int

Local explanation to be provided for given row.

explanation_filterList[FilterEntry]

Required filter entries:: feature class

Returns:

str: JSon representation of the local explanation.
JSon ICE representation:

{
     prediction: float,
     data: [
         {
             bin: any,
             ice: float,
         }
     ]
}

classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) → Tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:

bool:: True if the representation is calculated on demand.
dict:: On-demand calculation parameters.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/vnd.h2oai.json+datatable.jay') → dict

Load index file and check parameters.

Returns:

dict:: Index file as dictionary.

classmethod merge_format(from_path: str, to_path: str, overwrite: bool = True, discriminant: str = '', is_numcat_merge: bool = False, persistence: Persistence | None = None)

Merge from representation files to to representation files.

Parameters:

from_pathstr: Directory with the source representation to merge.
to_pathstr: Directory with the target representation where should be new explanations merged.
overwritebool: Overwrite explanations if they already exist in the target representation. Use False to keep existing target explanations in case of a clash.
discriminant: str: Delimiter to make data file names unique (if needed).
is_numcat_mergebool: True if this is num/cat update, False otherwise.
persistenceOptional[persistences.Persistence]: Persistence store to save and load representations.

mime: str = 'application/vnd.h2oai.json+datatable.jay'

classmethod mli_ice_explanation_to_json(ice_df: DataFrame, filter_feature: str, prediction, feature_value, logger=None) → str

static serialize_index_file(features: List[str], classes: List[str], default_class: str = '', features_meta: dict | None = None, metrics: list | None = None, doc: str = '', y_file: str | None = None) → Tuple[Dict, str]

static serialize_on_demand_index_file(on_demand_params: dict) → str

static validate_data(json_data: str) → str

class h2o_sonar.lib.api.formats.LlmHeatmapLeaderboardJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: LlmLeaderboardJSonFormat, TextCustomExplanationFormat

Representation of LLM Heatmap Leaderboard explanation as JSon.

JSon representation index file example:

{
    "files": {
        "ragas": "leaderboard_0.json"
        "answer_relevance": "leaderboard_1.json"
        ...
        "ALL_METRICS": "leaderboard_n.json"
    },
    ...
}

JSon representation data file example:

{
    "data": {
        "h2oai/h2ogpt-4096-llama2-70b-chat": {
            "answer_similarity": 1
        },
        "h2oai/h2ogpt-4096-llama2-70b-chat-4bit": {
            "answer_similarity": 1
        },
        ...
        "gpt-4-32k-0613": {
            "answer_similarity": 1
        }
    },
    "eda": {
        ...
    }
}

KEY_DEFAULT_METRIC = 'default_metric'

KEY_EDA = 'eda'

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') → dict

Load index file and check parameters.

Returns:

dict:: Index file as dictionary.

mime: str = 'application/json'

static serialize_index_file(metrics: List[str], default_metric: str = '', eda: dict | None = None, doc: str = '', data_file_prefix: str = 'leaderboard', data_file_suffix: str = 'json') → Tuple[Dict, str]

JSon index file serialization to string.

Parameters:

metrics: list: Metrics.
default_metric: str: Metric to be shown as default (the first one).
eda: Dict: EDA data.
doc: str: Documentation.
data_file_prefix: str: Prefix for data file names.
data_file_suffix: str: Suffix for data file names.

Returns:

Tuple[dict, str]:: Dictionary with mapping of classes to file names AND JSon serialization (as string).

static validate_data(json_data: str) → str

class h2o_sonar.lib.api.formats.LlmLeaderboardJSonFormat

Bases: ABC

KEY_ALL_METRICS = 'ALL_METRICS'

mime = 'application/json'

class h2o_sonar.lib.api.formats.LocalDtJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Local representation of decision tree as JSon.

JSon representation index file example:

{
    "files": {
        "red_class": "dt_class_0.json"
        "green_class": "dt_class_1.json"
        "blue_class": "dt_class_2.json"
        ...
    },
    "metrics": [
      {"Training RMSE": 0.96},
      {"CV RMSE": 0.97},
      {"NFolds": 3},
      {"R2": 0.96}
    ]
}

JSon representation data file example:

{
     data: [
         {
           key: str,
           name: str,
           parent: str,
           edge_in: str,
           edge_weight: num,
           leaf_path: bool,
           total_weight: num,
           weight: num
         }+
     ]
}

or (if on demand e.g. in case of sampled dataset):

{
    "on_demand": true
    "on_demand_parameters": ...
}

Remarks:

leaf_path … true if local path (hint in the leaf defines path to
the root), else global explanation. In other words return the whole tree with leaf selected.
on_demand … true if there is no cached ICE and it must be computed.

static dt_path_to_node_key(path: str) → str

static dt_set_tree_path(key: str, tree: dict)

classmethod get_local_explanation(persistence: ExplainerPersistence, explanation_type: str, row: int, explanation_filter: List[FilterEntry], **extra_params) → str

Get local DT explanation.

Parameters:

persistence:

Persistence object initialized for explainer/MLI run.

explanation_type: str

Explanation type ~ explanation ID.

row: int

Local explanation to be provided for given row.

explanation_filter: List[FilterEntry]

Required filter entries:: feature class

Returns:

str:: JSon representation of the local explanation.
JSon DT representation:

{
     data: [
         {
           key: str,
           name: str,
           parent: str,
           edge_in: str,
           edge_weight: num,
           leaf_path: bool,
           total_weight: num,
           weight: num,
         }+
     ]
}

classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) → Tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:

bool:: True if the representation is calculated on demand.
dict:: On-demand calculation parameters.

mime: str = 'application/json'

static serialize_index_file(classes: List[str], default_class: str = '', metrics: list | None = None, doc: str = '', data_file_prefix: str = 'dt', data_file_suffix: str = 'json') → Tuple[Dict, str]

static serialize_on_demand_index_file(on_demand_params: dict) → str

static validate_data(json_data: str) → str

class h2o_sonar.lib.api.formats.LocalFeatImpDatatableFormat(explanation, frame: Frame, frame_file: str | None = None, persistence: Persistence | None = None)

Bases: DatatableCustomExplanationFormat, GrammarOfMliFormat

Local feature importance datatable representation.

feature importance for all classes

Canonical representation (datatable frame):

| Columns            | Rows                                |
|--------------------|-------------------------------------|
| feature names      | per-dataset row feature importance  |

Example:

  |     activity   ...   max_speed
--- + ------------ ...  -----------
0 |    -0.0143614        -0.142553
. |     ...               ...
9 |     0.0156479        -0.231883

FILE_EXT = 'jay'

classmethod get_local_explanation(persistence: ExplainerPersistence, explanation_type: str, row: int, explanation_filter: List[FilterEntry], **extra_params) → str

Get local feature importance explanation.

Parameters:

persistence:: Persistence object initialized for explainer/MLI run.
explanation_type: str: Explanation type ~ explanation ID.
row: int: Local explanation to be provided for given row.
explanation_filter: List[FilterEntry]: Filter (unused in case of feature importance).

Returns:

str:: JSon representation of the local explanation.
JSon representation:

{
     data: [
         {
             label: str,
             value: num,
             scope: str,
         }+
     ]
}

Where:

label is feature name
value is feature importance
scope is local

mime: str = 'application/vnd.h2oai.datatable.jay'

static validate_data(frame_data: Frame) → Frame

class h2o_sonar.lib.api.formats.LocalFeatImpJSonDatatableFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: GlobalFeatImpJSonDatatableFormat

add_data_frame(format_data: Frame, file_name: str | None = None)

get_data(file_name: str | None = None)

classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) → Tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:

bool:: True if the representation is calculated on demand.
dict:: On-demand calculation parameters.

mime: str = 'application/vnd.h2oai.json+datatable.jay'

static serialize_index_file(classes: List[str], default_class: str = '', metrics: list | None = None, doc: str = '', total_rows: int | None = None, data_file_prefix: str = 'feature_importance', data_file_suffix: str = 'jay') → Tuple[Dict, str]

class h2o_sonar.lib.api.formats.LocalFeatImpJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat, GrammarOfMliFormat

Representation of local feature importance explanation as JSon. See GlobalFeatImpJSonFormat for structure of the index file and data.

KEY_Y = 'prediction'

classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) → Tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:

bool:: True if the representation is calculated on demand.
dict:: On-demand calculation parameters.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') → dict

static merge_local_and_global_page(global_page: dict, local_page: dict, mli_key: str = '', explainer_job_key: str = '', bias_key: str = '', logger=None)

Use this method to merge local and global explanations page (especially if frontend is not able to process local explanations only.

Local explanations page is expected to be sorted (as required) and it defines order of entries in the merged page. Merged result contains global explanation entry followed by local exp entry.

mime: str = 'application/json'

static serialize_index_file(classes: List[str], default_class: str = '', metrics: list | None = None, doc: str = '') → Tuple[Dict, str]

static sort_data(json_dict: dict)

Sort local feature importance explanation data by (abs) value:

{'data': [{'label': .,'value': .,'scope': .}, ...

static validate_data(json_data: str) → str

class h2o_sonar.lib.api.formats.LocalFeatImpWithYhatsJSonDatatableFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: LocalFeatImpJSonDatatableFormat

FILE_Y_HAT = 'y_hat.bin'

KEY_Y_HAT = 'y_hat'

class h2o_sonar.lib.api.formats.LocalNlpLocoJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Representation of local LOCO explanation as JSon. See GlobalNlpLocoJSonFormat for structure of the index file and data.

classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) → Tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:

bool:: True if the representation is calculated on demand.
dict:: On-demand calculation parameters.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') → dict

static merge_local_and_global_page(global_page: dict, local_page: dict, mli_key: str = '', explainer_job_key: str = '', bias_key: str = '', logger=None)

Use this method to merge local and global explanations page (especially if frontend is not able to process local explanations only.

Local explanations page is expected to be sorted (as required) and it defines order of entries in the merged page. Merged result contains global explanation entry followed by local exp entry.

mime: str = 'application/json'

static serialize_index_file(classes: List[str], default_class: str = '', metrics: list | None = None, doc: str = '') → Tuple[Dict, str]

static sort_data(json_dict: dict)

Sort local feature importance explanation data by (abs) value:

{'data': [{'label': .,'value': .,'scope': .}, ...

static validate_data(json_data: str) → str

class h2o_sonar.lib.api.formats.LocalOnDemandHtmlFormat(explanation, format_data: str, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Local (single row) on-demand representation.

classmethod get_local_explanation(persistence: ExplainerPersistence, explanation_type: str, row: int, explanation_filter: List[FilterEntry], **extra_params) → str

Load index file and check parameters.

Returns:

str:: Local explanation as string - can be any (on)structured format.

mime: str = 'text/html'

set_on_demand(is_on_demand: bool, mime: str = ''): Indicate that representation is on-demand.

static validate_data(dt_data: Frame)

class h2o_sonar.lib.api.formats.LocalOnDemandTextFormat(explanation, format_data: str, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Local (single row) on-demand representation.

classmethod get_local_explanation(persistence: ExplainerPersistence, explanation_type: str, row: int, explanation_filter: List[FilterEntry], **extra_params) → str

Load index file and check parameters.

Returns:

str:: Local explanation as string - can be any (on)structured format.

mime: str = 'text/plain'

static validate_data(dt_data: Frame)

class h2o_sonar.lib.api.formats.LocalSummaryFeatImplJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Local (on-demand) representation of summary feature importance as JSon.

classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) → Tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:

bool:: True if the representation is calculated on demand.
dict:: On-demand calculation parameters.

classmethod is_paged(persistence: ExplainerPersistence, explanation_type: str) → bool: Returns True in case that representation supports paging.

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/vnd.h2oai.json+datatable.jay') → dict

mime: str = 'application/json'

static serialize_index_file(classes: List[str], default_class: str = '', metrics: list | None = None, doc: str = '', data_file_prefix: str = 'dt', data_file_suffix: str = 'json') → Tuple[Dict, str]

static serialize_on_demand_index_file(on_demand_params: dict) → str

static validate_data(json_data: str) → str

class h2o_sonar.lib.api.formats.MarkdownFormat(explanation, format_file: str, extra_format_files: List | None = None, persistence: Persistence | None = None)

Bases: ExplanationFormat, GrammarOfMliFormat

Markdown representation (text and images).

mime: str = 'text/markdown'

static validate_data(dt_data: Frame)

class h2o_sonar.lib.api.formats.ModelValidationResultArchiveFormat(explanation, mv_test_type: str | Any, mv_test_name: str, mv_test_id: str, mv_test_results, mv_test_settings, mv_test_artifacts: Dict, mv_test_log, mv_client=None, persistence: Persistence | None = None, logger=None)

Bases: ExplanationFormat

Model Validation test result archived in a ZIP.

mime: str = 'application/zip'

class h2o_sonar.lib.api.formats.PartialDependenceCsvFormat(explanation, frame: Frame, frame_file: str = None, persistence: Persistence | None = None)

Bases: CsvFormatCustomExplanationFormat

static validate_data(dt_data: Frame)

class h2o_sonar.lib.api.formats.PartialDependenceDatatableFormat(explanation, frame: Frame, frame_file: str = None, persistence: Persistence | None = None)

Bases: DatatableCustomExplanationFormat

Representation of partial dependence (PD) explanation as datatable.

Canonical representation (datatable frame, ltypes) for 1D PD:

| Required column    | Type  | Description            |
|--------------------|-------|------------------------|
| feature_name       | str   | Feature name.          |
| feature_type       | str   | Feature type.          |
| bin_value          | str   | Bin value              |
| mean               | real  | Mean.                  |
| sd                 | real  | Standard deviation.    |
| sem                | real  | Standard mean error.   |
| is_oor             | bool  | Is out of range value? |

Hints:

bin_value is converted to string (can be converted back using feature_type).

… other optional columns are allowed

COL_BIN_VALUE = 'bin_value'

COL_F_LTYPE = 'feature_type'

COL_F_NAME = 'feature_name'

COL_IS_OOR = 'is_oor'

COL_MEAN = 'mean'

COL_SD = 'sd'

COL_SEM = 'sem'

mime: str = 'application/vnd.h2oai.datatable.jay'

static validate_data(dt_data: Frame)

class h2o_sonar.lib.api.formats.PartialDependenceJSonFormat(explanation, json_data: str = None, json_file: str = None, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat, GrammarOfMliFormat

Representation of partial dependence (PD) explanation as JSon.

JSon representation index file example:

{
    "features": {
        "PAY_0": {
            "order": 0,
            "feature_type": ["categorical"],
            "files": {
                "red_class": "pd_feature_0_class_0.json"
                "green_class": "pd_feature_0_class_1.json"
                "blue_class": "pd_feature_0_class_2.json"
            }
        },
        ...
    },
    "metrics": [{"R2": 0.96}, {"RMSE": 0.03}]
}

JSon representation data file example:

{
    "data": [{
        "bin": -2,
        "pd": 0.3710160553455353,
        "sd": 0.029299162328243256,,
        "out_of_range": false
    }, {
        "bin": -1,
        "pd": 0.3710160553455353,
        "sd": 0.029299162328243256,,
        "out_of_range": false
    },
    ...
}

KEY_BIN = 'bin'

KEY_FREQUENCY = 'frequency'

KEY_OOR = 'oor'

KEY_PD = 'pd'

KEY_SD = 'sd'

KEY_X = 'x'

classmethod get_bins(persistence: ExplainerPersistence, explanation_type: str, feature: str) → list

Get bins for given feature.

Parameters:

persistence:: Persistence object initialized for explainer/MLI run.
explanation_type: str: Explanation type ~ explanation ID.
feature: str: Feature for which to get bins.

Returns:

list:: Bins.

classmethod get_numcat_aspects(feature, idx: dict) → List[str]

Get available num/cat aspects for given feature:

[] … invalid feature
["numeric"] … numeric PD only
["categorical"] … categorical PD only
["numeric", "categorical"] … numeric and categorical PD

Parameters:

feature: str: Feature name for which to determine available aspects.
idx: dict: PD JSon index file (explanation.json).

Returns:

list[str]:: Available num/cat aspects.

classmethod get_numcat_missing_aspect(feature: str, idx: dict)

Return (missing) aspect to be calculated.

Parameters:

feature: str: Feature name for which to determine available aspects.
idx: dict: PD JSon index file (explanation.json).

Returns:

str:: Aspect to calculate or "" (no aspect is missing).

classmethod load_index_file(persistence: ExplainerPersistence, explanation_type: str, mime: str = 'application/json') → Dict

classmethod merge_format(from_path: str, to_path: str, overwrite: bool = True, discriminant: str = '', is_numcat_merge: bool = False, persistence: Persistence | None = None)

Merge from representation files to to representation files.

Parameters:

from_path: str: Directory with the source representation to merge.
to_path: str: Directory with the target representation where should be new explanations merged.
overwrite: bool: Overwrite explanations if they already exist in the target representation. Use False to keep existing target explanations in case of a clash.
discriminant: str: Delimiter to make data file names unique (if needed).
is_numcat_merge: bool: True if this is num/cat update, False otherwise.
persistenceOptional[persistences.Persistence]: Persistence store to save and load explanation representations.

mime: str = 'application/json'

static serialize_index_file(features: List[str], classes: List[str], default_class: str = '', features_meta: dict | None = None, metrics: list | None = None, keywords: list | None = None, doc: str = '', data_file_prefix: str = 'pd', data_file_suffix: str = 'json', y_file: str | None = None) → Tuple[Dict, str]

JSon index file serialization to string.

Parameters:

featureslist: Features.
classeslist: Classes.
default_classstr: Class to be shown as default (the first one) e.g. the class of interest in case of binomial experiment interpretation.
features_metadict: Features metadata allowing to indicate that given feature is categorical (use categorical key and list of feature names), (use date key and list of feature names, to specify format use date-format and list of Python date formats) or numerical (default).
metricslist: Optional list of PD related metrics e.g. [{"RMSE": 0.02}, {"SD": 3.1}]
keywordsList[str]: Optional list of keywords indicating representation features, properties and aspects.
docstr: Chart documentation.
data_file_prefixstr: Prefix for data file names.
data_file_suffixstr: Suffix for data file names.
y_filestr: Predictions file.

Returns:

Tuple[dict, str]: Dictionary with mapping of features and classes to file names AND JSon serialization (as string).

classmethod set_merge_status(dir_path: str, mli_key: str, explainer_job_key: str, clear: bool = False, action: str = 'update_explanation', action_type: str = 'add_aspect', persistence: Persistence | None = None)

Add (clear=False) or remove running interpretation update.

Parameters:

dir_path: str: Directory with index file where the status should be set.
mli_key: str: MLI key of the interpretation which will update another representation.
explainer_job_key: str: Explainer job key of the interpretation which will update another representation.
clear: bool: Add (clear=False) or remove (clear=True) indicator in representation’s dict.
action: str: Running action identifier e.g. update explanation.
action_type: str: Action (sub)type identifier e.g. add feature, add numeric/categorical view.
persistenceOptional[persistences.Persistence]: Persistence store to save and load explanation representations.

static validate_data(json_data: str) → str

class h2o_sonar.lib.api.formats.SaTextFormat(explanation, format_data: str, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Sensitivity Analysis (SA) text representation.

mime: str = 'text/plain'

static validate_data(dt_data: Frame)

class h2o_sonar.lib.api.formats.TextCustomExplanationFormat(explanation, format_data: str, format_file: str | None, extra_format_files: List | None = None, persistence: Persistence | None = None)

Bases: ExplanationFormat

FILE_IS_ON_DEMAND = 'IS_ON_DEMAND'

FILTER_CLASS = 'explain_class'

FILTER_FEATURE = 'explain_feature'

FILTER_NUMCAT = 'explain_numcat'

add_data(format_data: str, file_name: str | None = None)

Add TEXT data as new explanation representation file. Child classes with binary data to override this class.

Parameters:

format_data:: Data to store as new explanation’s format file.
file_name: str: Representation file name or file relative path.

add_file(format_file: str, file_name: str | None = None)

Copy file to representation as new explanation representation file.

Parameters:

format_file:: Source file to store (copy) as new explanation’s format file.
file_name: str: Representation file name or file relative path.

get_data(file_name: str | None = None, data_type: PersistenceDataType | None = None)

classmethod is_on_demand(persistence: ExplainerPersistence, explanation_type: str) → Tuple[bool, dict | None]

Returns True in case that there is no pre-computed (cached) local explanation and it must be calculated on demand.

Returns:

bool:: True if the representation is calculated on demand.
dict:: On-demand calculation parameters.

mime: str = 'text/plain'

static set_index_commons(index_dict: dict, classes: List[str], default_class: str = '', metrics: list | None = None, keywords: int | None = None, doc: str = '', total_rows: int | None = None)

set_on_demand(is_on_demand: bool, mime: str = ''): Indicate that representation is on-demand.

update_index_file(index_dict: dict, metrics: list | None = None, total_rows: int | None = None)

class h2o_sonar.lib.api.formats.TextFormat(explanation, format_data: str, persistence: Persistence | None = None)

Bases: TextCustomExplanationFormat

Text representation.

mime: str = 'text/plain'

static validate_data(dt_data: Frame)

class h2o_sonar.lib.api.formats.WorkDirArchiveZipFormat(explanation, file_filter=<function WorkDirArchiveZipFormat.<lambda>>, persistence: ~h2o_sonar.lib.api.persistences.Persistence | None = None)

Bases: ExplanationFormat, GrammarOfMliFormat

Working directory ZIP archive representation. Just instantiate this class, and it will create the ZIP representation (no need to add files/data). Note that the archive is created exactly on the time of instantiation.

mime: str = 'application/zip'

h2o_sonar.lib.api.formats.get_custom_explanation_formats()

h2o_sonar.lib.api.interpretations module

class h2o_sonar.lib.api.interpretations.ExplainerJob(key: str = '', created: float = 0.0, duration: float = 0.0, progress: float = 0.0, status: ExplainerJobStatus = ExplainerJobStatus.UNKNOWN, message: str = '', error: str = '', explainer_persistence=None, explainer_descriptor: ExplainerDescriptor | None = None, result_descriptor=None, child_explainer_job_keys: List[str] | None = None, job_location: str = '')

Bases: object

Explainer job.

KEY_CHILD_KEYS = 'child_explainer_job_keys'

KEY_CREATED = 'created'

KEY_DURATION = 'duration'

KEY_ERROR = 'error'

KEY_EXPLAINER_DESCRIPTOR = 'explainer'

KEY_JOB_LOCATION = 'job_location'

KEY_KEY = 'key'

KEY_MESSAGE = 'message'

KEY_PROGRESS = 'progress'

KEY_RESULT_DESCRIPTOR = 'result_descriptor'

KEY_STATUS = 'status'

evaluator_id() → str

explainer_id() → str

static from_dict(explainer_job_dict: Dict)

is_finished() → bool

success()

tick(msg: str = '', progress_increment: float = 0.1)

to_dict() → Dict

class h2o_sonar.lib.api.interpretations.HtmlInterpretationFormat(interpretation: Interpretation, branding: Branding = Branding.H2O_SONAR, logger: SonarLogger | None = None)

Bases: object

HTML representation of the interpretation.

class Context

Bases: object

Context with data which are needed to create HTML.

clear()

get_purpose_representatives_job_keys() → List[str]

KEYWORD_ID_2_NAME = {'explains-approximate-behavior': 'Approximate model behavior', 'explains-fairness': 'Fairness', 'explains-feature-behavior': 'Feature behavior', 'explains-model': 'Model explanations', 'explains-model-debugging': 'Model debugging', 'explains-original-feature-importance': 'Original feature importance', 'explains-transformed-feature-importance': 'Transformed feature importance'}

static html_footer(html, brand_h2o_sonar: str, branding: Branding)

Inject footer into Airium HTML.

Parameters:

htmlairium.Airium: Airium HTML instance.
brand_h2o_sonarstr: H2O Eval Studio branding.
brandingcommons.Branding: Branding.

static html_h2o_sonar_pitch(brand_h2o_sonar: str) → str

static html_head(a, title: str = 'H2O Sonar')

Create head of HTML representation.

Parameters:

aairium.Airium: Airium HTML instance.
titlestr: Optional HTML head title.

static html_safe_str_field(field)

static html_svg_h2oai_logo(html)

Inject H2O.ai SVG logo to HTML.

Parameters:

htmlairium.Airium: Airium HTML instance.

to_html(include_left_navigation: bool = True, report_style: str = 'HTML') → str: Get HTML report for the interpretation.

class h2o_sonar.lib.api.interpretations.HtmlInterpretationsFormat(interpretations: Interpretations, branding: Branding = Branding.H2O_SONAR, logger: SonarLogger | None = None)

Bases: object

HTML representation of an interpretations list.

to_html(branding: Branding = Branding.EVAL_STUDIO) → str: Get HTML for the interpretations list.

class h2o_sonar.lib.api.interpretations.Interpretation(common_params: CommonInterpretationParams, created: float, explainers: List[str | ExplainerToRun] | None, explainer_keywords: List[str] | None = None, key: str = '', sampler: DatasetSampler | None = None, branding: Branding = Branding.H2O_SONAR, results_formats: List[str] | None = None, progress_callback: AbstractProgressCallbackContext | None = None, logger=None, extra_params: List | None = None)

Bases: object

Interpretation is request to interpret a model using explainers. Interpretation instance serves also as execution context, however, interpretation instance does not execute explainers itself - it’s purpose is to be prescription (of what is requested) and stateful data holder. Interpretation result (referenced by the instance) is a set of explanations which were created by explainers.

Attributes:

keystr: Interpretation key.
common_paramscommons.CommonInterpretationParams: Interpretation parameters specified by the user.
explainersList: Explainers to be run (if no explainers specified, then all compatible explainers are run).
persistencepersistences.Persistence: Persistence store - file-system, in-memory, DB - where were stored interpretation results and from where it might be loaded using the persistence instance.

KEY_ALL_EXPLAINERS = 'all_explainer_ids'

KEY_CREATED = 'created'

KEY_DATASET = 'dataset'

KEY_ERROR = 'error'

KEY_EXECUTED_EXPLAINERS = 'executed_explainers'

KEY_EXPLAINERS = 'explainers'

KEY_E_PARAMS = 'explainers_parameters'

KEY_INCOMPATIBLE_EXPLAINERS = 'incompatible_explainer_ids'

KEY_INCOMPATIBLE_EXPLAINERS_DS = 'incompatible_explainers'

KEY_INSIGHTS = 'insights'

KEY_INTERPRETATION_LOCATION = 'interpretation_location'

KEY_I_KEY = 'interpretation_key'

KEY_I_PARAMS = 'interpretation_parameters'

KEY_MODEL = 'model'

KEY_MODELS = 'models'

KEY_OVERALL_RESULT = 'overall_result'

KEY_PROBLEMS = 'problems'

KEY_PROGRESS = 'progress'

KEY_PROGRESS_MESSAGE = 'progress_message'

KEY_RESULT = 'result'

KEY_RESULTS_LOCATION = 'results_location'

KEY_SCHEDULED_EXPLAINERS = 'scheduled_explainers'

KEY_STATUS = 'status'

KEY_TARGET_COL = 'target_col'

KEY_TESTSET = 'testset'

KEY_VALIDSET = 'validset'

static dict_to_digest(i_json: Dict)

get_all_explainer_ids() → List[str]

get_explainer_ids_by_status(status: int) → List[str]

get_explainer_insights(explainer_id: str) → List

get_explainer_jobs_by_status(status: int) → List[ExplainerJob]

get_explainer_problems(explainer_id: str) → List

get_explainer_result(explainer_id: str) → ExplainerResult | None

get_explainer_result_metadata(explainer_id: str) → Dict | None

get_explanation_file_path(explanation_type: str, explanation_format: str, explainer_id: str = '', evaluator_id: str = '')

Get explanation (index) file path.

Parameters:

explainer_idstr: Explainer ID - either explainer or evaluator ID must be specified.
evaluator_idstr: Evaluator ID - either explainer or evaluator ID must be specified.
explanation_typestr: Explanation type as string.
explanation_formatstr: Explanation (MIME) format.

Returns:

str: Path to the explanation file.

get_failed_explainer_ids() → List[str]

get_finished_explainer_ids() → List[str]

get_incompatible_explainer_ids() → List[str]

get_insights() → List

get_jobs_for_evaluator_id(explainer_id: str) → List[ExplainerJob]

get_jobs_for_explainer_id(explainer_id: str) → List[ExplainerJob]

get_model_insights(model_name: str) → List

get_model_problems(model_name: str) → List

get_problems_by_severity(severity: ProblemSeverity) → List

get_scheduled_explainer_ids() → List[str]

get_successful_explainer_ids() → List[str]

is_evaluator_failed() → bool

is_evaluator_finished() → bool

is_evaluator_scheduled() → bool

is_evaluator_successful() → bool

is_explainer_failed() → bool: Indicate whether an explainer failed.

is_explainer_finished() → bool: Indicate whether an explainer successfully finished or failed.

is_explainer_scheduled() → bool: Indicate whether there was at least one explainer which was ran.

is_explainer_successful() → bool: Indicate whether an explainer successfully finished.

load(persistence, logger=None) → Interpretation

Load persistence interpretation using given persistence.

Parameters:

persistencepersistences.InterpretationPersistence: Interpretation persistence which can be used to load the interpretation from file-system, memory or DB.
logger: Logger.

Returns:

Interpretation: Interpretation instance.

static load_from_json(interpretation_json_path: str | Path) → Interpretation: Load interpretation from JSON.

register_explainer_result(explainer_id: str, result: ExplainerResult)

set_progress(progress: float, message: str | None = None) → float

to_dict() → Dict

to_html() → str

to_html_4_pdf() → str

to_json(indent=None) → str

to_pdf(input_path: str, output_path: str)

update_overall_result() → OverallResult

validate_and_normalize_params(): Validate and check interpretation parameters.

class h2o_sonar.lib.api.interpretations.InterpretationResult(results_location: str = '', interpretation_location: str = '')

Bases: object

Result of the interpretation run.

get_evaluator_job(evaluator_job_id: str) → ExplainerJob | None

get_evaluator_jobs() → List[ExplainerJob]

get_explainer_job(explainer_job_id: str) → ExplainerJob | None

get_explainer_jobs() → List[ExplainerJob]

get_html_report_location(absolute_path: bool = True) → str

get_interpretation_dir_location(absolute_path: bool = True) → str

get_interpretations_html_index_location(absolute_path: bool = True)

get_json_report_location(absolute_path: bool = True) → str

get_pdf_report_location(absolute_path: bool = True) → str

get_progress_location(absolute_path: bool = True) → str

get_results_dir_location(absolute_path: bool = True) → str

make_zip_archive(zip_filename)

remove_duplicate_insights()

to_dict() → Dict

to_json(indent=None) → str

class h2o_sonar.lib.api.interpretations.Interpretations(interpretations_paths: List[str], persistence, branding: Branding = Branding.H2O_SONAR, logger=None)

Bases: object

Interpretations created by H2O Eval Studio in results location.

count() → int

load_interpretation_meta(i_path: str, digest: bool = True) → Dict

to_html(branding: Branding = Branding.EVAL_STUDIO) → str

class h2o_sonar.lib.api.interpretations.OverallResult(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

Overall evaluation/interpretation result in the traffic light style.

high_severity_problems = 4

low_severity_problems = 2

medium_severity_problems = 3

no_problem = 1

class h2o_sonar.lib.api.interpretations.PdfInterpretationFormat(interpretation: Interpretation, logger: SonarLogger)

Bases: HtmlInterpretationFormat

PDF (via HTML) representation of the interpretation.

to_html_4_pdf() → str: To HTML which can be used to generate PDF.

static to_pdf(input_path: str, output_path: str)

h2o_sonar.lib.api.judges module

class h2o_sonar.lib.api.judges.EvaluationJudge

Bases: ABC

Bring your own judge (BYOJ) to evaluate the quality of a model’s output.

abstract evaluate(prompts: List[str], **kwargs) → List: Evaluate the quality of a model’s output.

health_check() → bool: Check if the judge is healthy and available.

class h2o_sonar.lib.api.judges.LlmEvaluationJudge(llm_host_connection: ConnectionConfig, llm_model_name: str, logger: SonarLogger | None = None)

Bases: EvaluationJudge

LLM judge / interrogator for evaluating the quality of a model output.

evaluate(prompts: List[str], **extra_params) → List: Evaluate the quality of a model’s output.

class h2o_sonar.lib.api.judges.RagClientEvaluationJudge(client: RagClient, llm_model_name: str, collection_id: str = '')

Bases: EvaluationJudge

RAG judge / interrogator for evaluating the quality of a model output.

evaluate(prompts: List[str], **extra_params) → List: Evaluate the quality of a model’s output.

h2o_sonar.lib.api.judges.get_default_evaluation_judge(logger: SonarLogger | None = None): Get the default evaluation judge - OpenAI GPT-4 LLM model. If the OpenAI API key is not set, then raise exception.

h2o_sonar.lib.api.judges.get_evaluation_judge_for_config(judge_config: EvaluationJudgeConfig, logger: SonarLogger | None = None): Get an evaluation judge for the given judge config.

h2o_sonar.lib.api.judges.get_evaluation_judge_for_connection(connection: ConnectionConfig, judge_type: str, llm_model_name: str, collection_id: str = '', logger: SonarLogger | None = None): Get an evaluation judge for the given connection and judge type.

h2o_sonar.lib.api.models module

class h2o_sonar.lib.api.models.DriverlessAiModel(model_src, target_col: str = '', used_features: List[str] | None = None, sanitization_map: SanitizationMap | None = None, dataset: ExplainableDataset | None = None, logger=None)

Bases: ExplainableModel

Explainable model which understands Driverless AI experiments and models thus it can get model metadata, ensure required sanitization and correctly construct predict method which accepts expected input and provides desired output.

ATTR_HAS_SHAPLEYS = 'has_treeshap'

ATTR_LABEL_NAMES = 'output_names'

COL_SHAPLEY_BIAS = 'contrib_bias'

EXT_MOJO = '.mojo'

PREFIX_CLASS = 'class.'

PREFIX_SHAPLEY_COLS = 'contrib_'

static is_dai_model(model_src) → bool

shapley_values(X, original_features: bool = True, fast_approx: bool = False, **kwargs)

Get Shapley values.

Parameters:

Xdatatable.Frame: Dataset to calculate Shapley values.
original_featuresbool: True to get Shapley values for original features, False to get Shapley values for transformed features.
fast_approxbool: True to use fast approximation for Shapley values calculation.

Returns:

datatable.Frame: Shapley values based feature contributions.

class h2o_sonar.lib.api.models.DriverlessAiRestServerModel(model_server_url: str, target_col: str = '', used_features: List[str] | None = None, sanitization_map: SanitizationMap | None = None, dataset: ExplainableDataset | None = None)

Bases: ExplainableModel

Explainable model which represents Driverless AI experiments deployed as REST server. Driverless AI is moving from local REST Server to MLOps, therefore it is deprecated in 1.10.4 and will be removed. Anyway it is useful for existing Driverless AI deployments.

h2o_sonar.lib.api.persistences module

class h2o_sonar.lib.api.persistences.ExplainerPersistence(data_dir: str, username: str, explainer_id: str, explainer_job_key: str, mli_key: str = None, store_persistence: Persistence | None = None)

Bases: InterpretationPersistence

Explainer persistence.

Filesystem structure:

mli_experiment_<UUID>/ (MLI interpretation) OR explanation_<job UUID>/ (ad hoc)
    explainer_<explainer ID>_<job UUID>/
        <explanation name>/
            explanation.<extension>
            ... extra files completing main explanation file allowed in this dir
         work/
            ... directory which can be used for intermediary results persistence

Web access:

http://<HOST>:<PORT>/files/mli_experiment_<UUID>/...
http://<HOST>:<PORT>/files/explanation_<UUID>/...

Hints:

Explainer and explanation names are checked to contain safe characters only (alpha, num, ., _ and -). IDs are preserved (filesystem/runtime match).
Format identifiers (MIME types) are processed to contain safe characters only.
explanation.<extension> is “index file” - directory may contain also other files which form/support the explanations
Explainer may be executed multiple times within one MLI interpretation, therefore uniqueness is guaranteed by job UUID.
Datatable explanation is canonical (always present), others are optional.

Examples

# MLI interpretation
mli_experiment_4d774e62-3c67-11ea-9c7e-106530ed5ceb/

    # OOTB PD explainer
    explainer_h2oaicore.h2o_sonar.oss.byor.explainers.pd.PD_4d774e62-3c67...06530ed5ceb/
        global_partial_dependence/
            application_vnd_h2oai_datatable_jay/
                explanation.jay
            application_json/
                explanation.json
        local_individual_conditional_explanation/
            application_vnd_h2oai_datatable_jay/
                explanation.jay
            application_json/
                explanation.json
                feature_1_class_1_pd.json
                ...
                feature_n_class_n_pd.json

    # hot deployed feature importance explainer
    explainer_False_test_kernel_shap_f72edb06_...er.TestKernelShap_4d7...d5ceb/
        local_feature_importance/
            application_vnd_h2oai_datatable_jay/
                explanation.jay
            application_json/
                explanation.json

# Ad hoc explainer run
explanation_4d774e62-3c67-11ea-9c7e-106530ed5ceb/

    # OOTB feature importance explainer
    explainer_h2oaicore.h2o_sonar.oss.byor.explainers.kernel_shap.KernelShap_4d7...ceb/
        global_feature_importance/
            application_vnd_h2oai_datatable_jay/
                explanation.jay
            application_json/
                explanation.json

DIR_EXPLAINER = 'explainer_'

DIR_INSIGHTS = 'insights'

DIR_LOG = 'log'

DIR_PROBLEMS = 'problems'

DIR_WORK = 'work'

EXPLAINER_LOG_PREFIX = 'explainer_run_'

EXPLAINER_LOG_SUFFIX_ANON = '_anonymized.log'

FILE_DONE_DONE = 'EXPLAINER_DONE'

FILE_DONE_FAILED = 'EXPLAINER_FAILED'

FILE_EXPLAINER_PICKLE = 'explainer.pickle'

FILE_EXPLANATION = 'explanation'

FILE_INSIGHTS = 'insights_and_actions.json'

FILE_ON_DEMAND_EXPLANATION_SUFFIX = 'on_demand_explanation.txt'

FILE_PROBLEMS = 'problems_and_actions.json'

FILE_RESULT_DESCRIPTOR = 'result_descriptor.json'

property explainer_id: str

property explainer_job_key: str

static get_dirs_for_explainer_id(data_dir: str, username: str, mli_key: str, explainer_id: str, explainer_job_key: str | None = None) → list

get_evaluator_working_file(file_name: str) → str

get_explainer_ann_log_file() → str

get_explainer_ann_log_path() → str

get_explainer_dir() → str

get_explainer_dir_archive() → str

get_explainer_insights_dir() → str

get_explainer_insights_file(file_name: str) → str

get_explainer_log_dir() → str

get_explainer_log_file() → str

get_explainer_log_path() → str

get_explainer_problems_dir() → str

get_explainer_problems_file(file_name: str) → str

get_explainer_working_dir() → str

get_explainer_working_file(file_name: str) → str

get_explanation_dir_path(explanation_type: str, explanation_format: str) → str

Get explanation directory path.

Parameters:

explanation_typestr: Explanation identifier returned by explanation_type().
explanation_formatstr: Format MIME type.

Returns:

str: Path to the directory with the explanation.

get_explanation_file_path(explanation_type: str, explanation_format: str, explanation_file: str = None) → str

get_explanation_meta_path(explanation_type: str, explanation_format: str) → str

static get_key_for_explainer_dir(explainer_dir_path: str) → str | None

static get_locators_for_explainer_id(data_dir: str, username: str, mli_key: str, explainer_id: str, explainer_job_key: str | None = None) → List[Tuple[str, str]] | None

get_relative_path(path: str, base_entity: str = 'interpretation')

get_result_descriptor_file_path() → str

load_insights() → List[Dict]: Load insights.

load_problems() → List[Dict]: Load model problems.

load_result_descriptor() → Dict

static make_dir(target_dir)

make_explainer_dir()

make_explainer_insights_dir()

make_explainer_log_dir()

make_explainer_problems_dir()

make_explainer_sandbox(dai_params=None)

Create explainer working dir and log directories as well as common files.

Parameters:

dai_params: CommonDaiExplainerParameters: Common explainer parameters to be stored in the root of the interpretation (if it already doesn’t exist).

make_explainer_working_dir()

static makedirs(path: str, exist_ok=True)

Avoid some inefficiency in os.makedirs().

Parameters:

pathstr: Path to directory/ies to create.
exist_okbool: Fail if directory exists.

Returns:

str: Path to newly create directory.

resolve_mli_path(mli_key: str, username: str): Resolve MLI interpretation directory as it should be in the directory with username in path, but potentially it will be possible to create it in directory without it using config.per_user_directories (or can be migrated from 1.8.x).

rm_explainer_dir()

save_insights(insights: List[Dict]): Save insights.

static save_json(data: dict, path: str)

save_problems(problems: List[Dict]): Save model problems.

property username: str

class h2o_sonar.lib.api.persistences.FilesystemPersistence(base_path: str | Path | None = None, logger=None)

Bases: Persistence

File-system store persistence.

copy_file(from_key: str | Path, to_key: str | Path)

delete(key: str | Path) → bool

delete_dir_contents(key: str | Path)

delete_file(key: str | Path) → bool

delete_tree(key: str | Path)

exists(key: str | Path) → bool

static flush_dir_for_file(file_path: str) → bool

static get_default_cwl(): Get default current working location when no specified by the user.

getcwl(): Get current working location - directory, memory key or DB locator.

is_dir(key: str | Path) → bool

is_file(key: str | Path) → bool

list_dir(key: str | Path) → List

list_files_by_wildcard(key: str | Path, wildcard: str) → List

load(key: str | Path, data_type: PersistenceDataType | None = None) → Any

load_json(key: str | Path) → Dict | List

make_dir(key: str | Path)

make_dir_zip_archive(src_key: str | ~pathlib.Path, zip_key: str | ~pathlib.Path, file_filter=<function FilesystemPersistence.<lambda>>)

Create ZIP archive of given directory.

Parameters:

src_key: src: Absolute path to directory to be archived.
zip_key: src: ZIP archive path.
file_filter:: Function to be used for filtering - it gets relative path from the src_dir_path as parameter and returns boolean indicating whether to keep (False) or filter file out (True).

save(key: str | Path, data, data_type: PersistenceDataType = PersistenceDataType.text)

static save_json(key: str | Path, data: Dict | List, indent: int = 4, save_explainer_params=False) → Dict

touch(key: str | Path)

property type

update(key: str | Path, data, data_type: PersistenceDataType = PersistenceDataType.binary)

class h2o_sonar.lib.api.persistences.InMemoryPersistence

Bases: Persistence

In-memory key-based store persistence.

DIR = <h2o_sonar.lib.api.persistences.InMemoryPersistence.Directory object>

class Directory: Bases: object

copy_file(from_key: str | Path, to_key: str | Path)

delete(key: str | Path) → bool

delete_dir_contents(key: str | Path, logger=None)

delete_file(key: str | Path) → bool

delete_tree(key: str | Path)

exists(key: str | Path) → bool

static get_default_cwl(): Get default current working location when no specified by the user.

getcwl(): Get current working location - directory, memory key or DB locator.

is_dir(key: str | Path) → bool

is_file(key: str | Path) → bool

list_dir(key: str | Path) → List

list_files_by_wildcard(key: str | Path, wildcard: str) → List

load(key: str | Path, data_type: PersistenceDataType = PersistenceDataType.binary) → Any

load_json(key: str | Path) → dict

make_dir(key: str | Path)

make_dir_zip_archive(src_key: str | ~pathlib.Path, zip_key: str | ~pathlib.Path, file_filter=<function InMemoryPersistence.<lambda>>)

Make ZIP archive of given source directory.

Parameters:

src_keystr: Source key (directory path).
zip_keystr: ZIP key (ZIP file path).
file_filterCallable: File filter.

save(key: str | Path, data, data_type: PersistenceDataType = PersistenceDataType.binary)

save_json(key: str | Path, data: Dict, indent: int = 4, save_explainer_params=False)

touch(key: str | Path)

property type

class h2o_sonar.lib.api.persistences.InterpretationPersistence(data_dir: str, username: str, mli_key: str = None, ad_hoc_explainer_job_key: str = None, store_persistence: Persistence | None = None, logger=None)

Bases: object

Interpretation persistence - class used to manage interpretation files and directories within base data directory (or equivalent on particular store type).

Once extended to actual writing/reading of files it should also simplify store switch - like remote/multinode/distributed.

Filesystem structure:

<base data dir>/: mli_experiment_<UUID>/ … MLI interpretation (bulk explainers run) explanation_<job UUID>/ .. ad-hoc

Examples

# MLI interpretation mli_experiment_4d774e62-3c67-11ea-9c7e-106530ed5ceb/

# Ad hoc explainer run explanation_4d774e62-3c67-11ea-9c7e-106530ed5ceb/

DIR_AD_HOC_EXPLANATION = 'explanation_'

DIR_AUTOML_EXPERIMENT = 'h2oai_experiment_'

DIR_MLI_EXPERIMENT = 'mli_experiment_'

DIR_MLI_TS_EXPERIMENT = 'mli_experiment_timeseries_'

FILE_COMMON_PARAMS = 'explainers_common_parameters.json'

FILE_EXPERIMENT_ID_COLS = 'experiment_id_columns.json'

FILE_EXPERIMENT_IMAGE = 'IS_IMAGE'

FILE_EXPERIMENT_TS = 'IS_TIMESERIES'

FILE_H2O_SONAR_HTML = 'h2o-sonar.html'

FILE_INTERPRETATION_HTML = 'interpretation.html'

FILE_INTERPRETATION_HTML_4_PDF = 'interpretation-detailed.html'

FILE_INTERPRETATION_JSON = 'interpretation.json'

FILE_INTERPRETATION_PDF = 'interpretation-detailed.pdf'

FILE_MLI_EXPERIMENT_LOG = 'mli_experiment_log_'

FILE_PREFIX_DATASET = 'dataset_'

FILE_PROGRESS_JSON = 'progress.json'

KEY_E_PARAMS = 'explainers_parameters'

KEY_RESULT = 'result'

property ad_hoc_job_key: str

property base_dir: str

create_dataset_path() → str

property data_dir: str

static get_ad_hoc_mli_dir_name(data_dir: str, username: str, explainer_job_key: str)

static get_async_log_file_name(mli_key: str)

static get_base_dir(data_dir: str, dir_name: str)

get_base_dir_file(file_name: str) → str

get_experiment_id_cols_path() → str

get_html_4_pdf_path() → str

get_html_path() → str

get_json_path() → str

static get_mli_dir_name(data_dir: str, username: str, mli_key: str)

get_pdf_path() → str

is_common_params()

static is_safe_name(name: str) → bool: Check whether given nameis formed by alphanumeric chars (and therefore filesystem safe).

static list_interpretations(data_dir: str, username: str, store_persistence: Persistence, paths: bool = True)

List interpretations.

Parameters:

data_dirstr: H2O Eval Studio results directory.
usernamestr: Username.
store_persistencePersistence: Handle to the store persistence.
pathsbool: Return list of paths (e.g. file-systems) if True (default), else return interpretation UUIDs.

load_common_params(patch_sequential_execution: bool | None = None) → CommonInterpretationParams: Load CommonExplainerParameters entity from interpretation root dir.

load_explainers_params(explainer_id: str = '') → Dict: Load explainers parameters dictionary from interpretation JSon.

load_is_image_experiment()

load_is_timeseries_experiment()

load_message_entity(path: str) → dict

make_base_dir()

make_dir_zip_archive(src_dir_path: str | ~pathlib.Path, zip_path: str | ~pathlib.Path, file_filter=<function InterpretationPersistence.<lambda>>)

make_interpretation_sandbox(): Create interpretation directory as well as common files.

make_tmp_dir()

property mli_key: str

resolve_model_path(model_path: str)

Resolve fitted model path as there are several combinations of DAI configuration and experiment creation (path):

fitted model path MAY have <username> prefix, based on whether it was created in 1.8.x version or with config.per_user_directories=True/False
current user directory may be either data directory, or may have username in path based on config.per_user_directories configuration item value

Parameters:

model_path: str: (Un)fitted model relative path as present on model entity as model.fitted_model_path.

rm_base_dir(logger=None)

rm_dir(dir_path)

save_as_html(interpretation_html: str): Save interpretation as HTML.

save_as_json(interpretation_dict: dict): Save interpretation as JSon.

save_as_pdf(interpretation): Save interpretation as PDF.

save_common_params(entity: CommonInterpretationParams): Save CommonExplainerParameters entity to interpretation root dir.

save_experiment_type_hints(is_timeseries: bool = False, is_image: bool = False)

Write hint (in backward compatible manner) indicating experiment type (like timeseries or image) to interpretation directory (IID is default).

Parameters:

is_timeseriesbool: Write time series hint.
is_imagebool: Write image hint.

save_message_entity(entity, path: str)

property tmp_dir: str

static to_alphanum_name(name: str): Convert given name to filesystem save string formed by alphanumeric characters.

static to_server_file_path(data_dir: str, path: str): Return bare server path without data directory

static to_server_path(data_dir: str, path: str): Return bare server path without data directory

property user_dir: str

class h2o_sonar.lib.api.persistences.JsonPersistableExplanations

Bases: ABC

Interface for classes implementing explanations JSon file persistence.

Examples

 ice = ICE("Step by step ICE loading")
 ice.load_json("cache/ice.json")

 es = ice.explanations()

 es = ICE("On the fly").explain(
   ["Feature"],
   X,
   predict_method=scorer
).save_json()

class PandasJSonEncoder(*, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, indent=None, separators=None, default=None)

Bases: JSONEncoder

Custom Pandas DataFrames serializer.

default(o)

Implement this method in a subclass such that it returns a serializable object for o, or calls the base implementation (to raise a TypeError).

For example, to support arbitrary iterators, you could implement default like this:

def default(self, o):
    try:
        iterable = iter(o)
    except TypeError:
        pass
    else:
        return list(iterable)
    # Let the base class default method raise the TypeError
    return super().default(o)

static check_explanations_serializability(explanations)

property default_json_file_name

abstract load_json(path=None)

Load explanations from JSon file.

Parameters:

path: str: Local file path from where to loadJson explanations. If path isn’t specified, then explanations are loaded from explanations.json in the current directory.

Returns:

dict: Explanations deserialized from JSon.

abstract save_json(path=None)

Save explanations as JSon file.

Parameters:

pathstr: Local file path where to store explanations. If path isn’t specified, then explanations are stored to ‘explanations.json’ in the current directory

class h2o_sonar.lib.api.persistences.NanEncoder(*, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, indent=None, separators=None, default=None)

Bases: JSONEncoder

encode(obj)

Return a JSON string representation of a Python data structure.

>>> from json.encoder import JSONEncoder
>>> JSONEncoder().encode({"foo": ["bar", "baz"]})
'{"foo": ["bar", "baz"]}'

class h2o_sonar.lib.api.persistences.Persistence(logger=None)

Bases: ABC

Key/value-based persistence API interface provides uniform store-agnostic API allowing explainers to use chosen store type regardless container runtime or technology to store explainer results (explanations). It aims to enable writing identical code regardless explanation data is stored/loaded to/from filesystem, memory or DB.

Interface and implementations are based on opaque string keys (which might be filesystem paths, dictionary keys or NoSQL database keys) and data types (text, binary, …). On implementation initialization is be set base in-memory reference, filesystem path or DB connection information.

There are the following special types of data which are written to filesystem (network or memory) regardless chosen store type:

temporary files (explainer work/ directory)
log filed (explainer log/ directory)

Therefore, an explainer sandbox is always created on the file-system, but it might be located in user specified directory (in case of file-system store) or system temp directory (in case of in-memory or database store).

The persistence API is written with security (barriers) and performance in mind.

PREFIX_INTERNAL_STORE = 'h2o_sonar-of-'

static check_key(key: str | Path) → str: Check and fix key.

copy_file(from_key: str | Path, to_key: str | Path)

delete(key: str | Path) → bool

delete_dir_contents(key: str | Path, logger=None)

delete_file(key: str | Path) → bool

static delete_temp_dir(tmp_dir_path: str | Path)

delete_tree(key: str | Path) → bool

exists(key: str | Path) → bool

static flush_dir_for_file(file_path: str) → bool

getcwl(): Get current working location - directory, memory key or DB locator.

static is_binary_file(key: str) → bool

is_dir(key: str | Path) → bool

is_dir_or_file(key: str | Path) → bool

is_file(key: str | Path) → bool

static key_folder(key: str | Path) → str: Get (parent) folder key for given key (equivalent of os.path.dirname()).

list_dir(key: str | Path) → List

list_files_by_wildcard(key: str | Path, wildcard: str) → List

load(key: str | Path, data_type: PersistenceDataType = PersistenceDataType.binary) → Any

load_json(key: str | Path) → dict

make_dir(key: str | Path)

make_dir_zip_archive(src_key: str, zip_key: str, file_filter=<function Persistence.<lambda>>)

Make ZIP archive of given source directory.

Parameters:

src_keystr: Source key (directory path).
zip_keystr: ZIP key (ZIP file path).
file_filterCallable: File filter.

static make_key(*args) → str: Assemble key (path) from the string arguments given to this function (equivalent of os.path.join()).

static make_temp_dir() → str

static make_temp_file(file_name: str) → str

path_to_internal(path: str | Path) → str

static safe_name(key: str) → str: Encode name to be store (file-sytem) safe (can be decoded if needed).

save(key: str | Path, data, data_type: PersistenceDataType = PersistenceDataType.binary)

touch(key: str | Path)

property type

update(key: str | Path, data, data_type: PersistenceDataType = PersistenceDataType.binary)

class h2o_sonar.lib.api.persistences.PersistenceApi(logger: SonarLogger | None = None)

Bases: ABC

Factory which creates Persistence implementations for various store types and purposes which are available in specific runtime and/or container(s).

create_explainer_persistence(store_persistence: Persistence, base_path: str | Path, interpretation_key: str, explainer_id: str, explainer_job_key: str, username: str = '') → ExplainerPersistence: Create explainer persistence atop given store persistence e.g. to store explainer data to database.

create_interpretation_persistence(store_persistence: Persistence, base_path: str | Path, interpretation_key: str, username: str = '') → InterpretationPersistence: Create interpretation persistence atop given store persistence e.g. to store interpretations in-memory.

create_persistence(persistence_type: PersistenceType = PersistenceType.file_system, base_path: str = '', connection_string: str = '') → InMemoryPersistence | FilesystemPersistence

Create persistence of given store type - file-system, in-memory or DB. Default store persistence is file-system persistence with base in the current directory.

Parameters:

persistence_typePersistenceType: Type of the persistence to create.
base_pathstr: Optional root path of the persistence on the host store (where meaningful e.g. file-system).
connection_stringstr: Option connection string (where meaningful e.g. database).

Returns:

Any: Persistence to load/store container and explainer artifacts.

get_cwl(persistence_type: PersistenceType = PersistenceType.file_system)

class h2o_sonar.lib.api.persistences.PersistenceDataType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

binary = 1

datatable = 2

json = 4

text = 3

class h2o_sonar.lib.api.persistences.PersistenceType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

database = 3

file_system = 1

in_memory = 2

class h2o_sonar.lib.api.persistences.RobustEncoder(*, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, indent=None, separators=None, default=None)

Bases: JSONEncoder

default(obj)

Implement this method in a subclass such that it returns a serializable object for o, or calls the base implementation (to raise a TypeError).

For example, to support arbitrary iterators, you could implement default like this:

def default(self, o):
    try:
        iterable = iter(o)
    except TypeError:
        pass
    else:
        return list(iterable)
    # Let the base class default method raise the TypeError
    return super().default(o)

h2o_sonar.lib.api.plots module

class h2o_sonar.lib.api.plots.Data3dPlot

Bases: object

Plot 3D data:

heatmap
3D surface plot
3D contour plot

PLOT_TYPES = ['heatmap', 'contour-3d', 'surface-3d']

PLOT_TYPE_CONTOUR = 'contour-3d'

PLOT_TYPE_HEATMAP = 'heatmap'

PLOT_TYPE_SURFACE = 'surface-3d'

static plot(x_axis_labels: List, y_axis_labels: List, heatmap_data: Frame, chart_title: str = '', x_axis_label: str = '', y_axis_label: str = '', plot_type: str = 'heatmap', color_map: str = 'autumn', figsize=(12, 10), dpi=120, plot_file_path: str = '', logger=None, log_name: str = '')

Heatmap plot.

Parameters:

x_axis_labelsList: Horizontal axes labels.
y_axis_labelsList: Vertical axes labels.
heatmap_datadatatable.Frame: Datable frame with heatmap data (column names don’t matter, only data are relevant).
chart_titlestr: Chart title.
x_axis_labelstr: Horizontal axis label.
y_axis_labelstr: Vertical axis label.
plot_typestr: Plot type, one of PLOT_TYPES.
color_mapstr: Matplotlib color map name.
figsizetuple: Figure size.
dpiint: Dots per inch.
plot_file_pathstr: Path to save the plot to.
logger: Logger instance.
log_namestr: Name of the logger.

class h2o_sonar.lib.api.plots.ScatterFeatImpPlot

Bases: object

Scatter plot feature importance representation is based on chart from:

https://github.com/slundberg/shap

static plot(contributions, frame, alpha: float = 1.0, colormap: str | None = None, figsize=(12, 12), jitter: float = 0.35, chart_title: str = 'Feature importance summary plot', x_label: str = 'Value', y_label: str = 'Feature', thermometer_label: str = 'Normalized feature value', columns=None, top_n_features: int = 20, samples: int | None = None, colorize_factors: bool = True, drop_zero_contribs=True, hard_asserts=False, logger=None) → Figure

Feature importance summary plot.

Summary plot shows contribution of features for each instance. The sum of the feature contributions and the bias term is equal to the raw prediction of the model, i.e., prediction before applying inverse link function.

Parameters:

contributions: Pandas contributions frame with coefficients. Frame column names to be (sanitized) feature names, rows to correspond to dataset rows, cells to be coefficients.
frame: Pandas dataset frame with values. Frame column names to be (sanitized) feature names, rows to correspond to dataset rows, cells to be values.
columns: Either a list of columns or column indices to show. If specified parameter top_n_features will be ignored.
top_n_featuresint: A number of columns to pick using variable importance (where applicable). Set to -1 to show all features.
samples: Maximum number of observations to use; if lower than number of rows in the frame, take a random sample.
colorize_factors: If True, use colors from the colormap to colorize the factors; otherwise all levels will have same color.
alpha: Transparency of the points.
colormap: Colormap to use instead of the default blue to red colormap.
figsize: Figure size - passed directly to matplotlib.
jitter: Amount of jitter used to show the point density.
chart_titlestr: Chart title.
x_labelstr: Chart x-axis label.
y_labelstr: Chart y-axis label.
thermometer_labelstr: Chart thermometer label.
drop_zero_contribs: Whether to drop features that have zero contribution. Features that are not used in the final model will have zero contribution.
hard_assertsbool: Used in testing to raise exception in try except statements.
logger: Optional logger object.

Returns:

pyplot.Figure:: A matplotlib figure object which can be saved or displayed.

h2o_sonar.lib.api.plots.safe_plot_names(column_list: List[str]) → List

Return a list of column names that exclude problematic special characters for matplotlib plotting functions.

Parameters:

column_list: List[str]: List of column names.

Returns:

List:: List with column names that are safe to plot.

h2o_sonar.lib.api.problems module

class h2o_sonar.lib.api.problems.AVIDProblemCode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: ProblemCode

Problem codes from AVID https://docs.avidml.org/taxonomy/effect-sep-view

E0100_BIAS = ('E0100', 'Concerns of algorithms propagating societal bias')

E0200_EXPLAINABILITY = ('E0200', 'Ability to explain decisions made by AI')

E0300_TOXICITY = ('E0300', 'Perpetuating/causing/being affected by negative user actions')

E0400_MISINFORMATION = ('E0400', 'Perpetuating/causing the spread of falsehoods')

P0100_DATA = ('P0100', 'Problems arising due to faults in the data pipeline')

P0200_MODEL = ('P0200', 'Ability for the AI to perform as intended')

P0300_PRIVACY = ('P0300', 'Protect leakage of user information as required by rules and regulations')

P0400_SAFETY = ('P0400', 'Minimizing maximum downstream harms')

S0400_MODEL_BYPASS = ('S0400', 'Intentionally try to make a model perform poorly')

S0500_EXFILTRATION = ('S0500', 'Directly or indirectly exfiltrate ML artifacts')

S0600_DATA_POISONING = ('S0600', 'Usage of poisoned data in the ML pipeline')

class h2o_sonar.lib.api.problems.AVIDProblemCodeType(code, description)

Bases: tuple

code: Alias for field number 0

description: Alias for field number 1

class h2o_sonar.lib.api.problems.ProblemAndAction(description: str, description_html: Airium | None = None, severity: ProblemSeverity = ProblemSeverity.medium, problem_type: str = 'problem', problem_attrs: Dict = None, actions_description: str = '', actions_codes: List[str] = None, explainer_id: str = '', explainer_name: str = '', evaluator_id: str = '', evaluator_name: str = '', explanation_type: str = '', explanation_name: str = '', explanation_mime: str = '', resources: List[str] = None, problem_code: ProblemCode = None)

Bases: AbcProblemInsight

Instance of this class represents a problem of the interpreted model identified by an explainer. Apart from the problem description, the entry provides also problem severity, problem category (brief characteristic), problem attributes (dictionary of machine processable data describing the problem which might be used for instance as an input to actions), textual description of suggested actions to mitigate the problem (actionability), explainer which detected the problem, and references to resources (explanations, document URLs, …).

KEY_PROBLEM_ATTRS = 'problem_attrs'

KEY_PROBLEM_TYPE = 'problem_type'

KEY_SEVERITY = 'severity'

static from_dict(problem_dict: Dict) → ProblemAndAction

to_dict() → Dict

class h2o_sonar.lib.api.problems.ProblemCode(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None): Bases: Enum

class h2o_sonar.lib.api.problems.ProblemSeverity(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

static compare(severity1, severity2) → int

high = 1

low = 3

medium = 2

h2o_sonar.lib.api.problems.problems_for_bool_leaderboard(evaluator, leaderboard, primary_metric_meta: MetricMeta, metric_threshold: float | None = None, severity: ProblemSeverity | None = None, problem_type: str = 'accuracy', problem_code: ProblemCode = None, explanation_type: str = '', explanation_name: str = '', explanation_mime: str = '', actions_description: str = '', extra_description_actions: str = '') → None

Generate problems based on the heatmap leaderboard analytics.

For models whose average Passes metric score is below the threshold, a problem is created with the description of the problem, severity, problem type, problem attributes, actions description,

h2o_sonar.lib.api.problems.problems_for_cls_leaderboard(evaluator, leaderboard, metric_threshold: float | None = None, primary_metric_meta=None, severity: ProblemSeverity | None = None, problem_type: str = 'classification', explanation_type: str = '', explanation_name: str = '', explanation_mime: str = '', actions_description: str = '', extra_description_actions: str = '', problem_code: ProblemCode = None) → None: Generate problems based on the classification leaderboard analytics.

h2o_sonar.lib.api.problems.problems_for_heat_leaderboard(evaluator, leaderboard, metric_threshold: float | None = None, primary_metric_meta=None, severity: ProblemSeverity | None = None, problem_type: str = 'accuracy', explanation_type: str = '', explanation_name: str = '', explanation_mime: str = '', actions_description: str = '', extra_description_actions: str = '', problem_code: ProblemCode = None) → None: Generate problems based on the heatmap leaderboard analytics.

h2o_sonar.lib.api.results module

class h2o_sonar.lib.api.results.Data3dResult(persistence: ExplainerPersistence, explainer_id: str, h2o_sonar_config=None, logger=None)

Bases: ExplainerResult

data(*, feature_names: str = '') → Dict

classmethod help() → Dict[str, List[Dict[str, str | bool]]]

plot(*, feature_names: str = '', plot_type: str = 'surface-3d', title: str = '')

class h2o_sonar.lib.api.results.DiaResult(persistence: ExplainerPersistence, explainer_id: str, dia_entry_constants: DiaEntryConstant, h2o_sonar_config=None, logger=None)

Bases: ExplainerResult

class DiaCategory(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)

Bases: Enum

DIA_CATEGORY_CM = 'cm'

DIA_CATEGORY_DISPARITY = 'disparity'

DIA_CATEGORY_ME_SMD = 'me_smd'

DIA_CATEGORY_PARITY = 'parity'

DIA_METRICS = 'metrics'

class DiaEntryConstant(dia_entity_file: str, param_feature_summaries: str, param_feature_name: str, param_name: str, param_features: str, ref_levels: str): Bases: object

data(*, feature_name: str, category: DiaCategory | str, ref_level: int | str | None = None) → Frame

classmethod help() → Dict[str, List[Dict[str, str | bool]]]

params() → Dict

plot(*, feature_name: str, metrics_of_interest: str | List[str] | None = None, file_path: str = '') → List[str]

class h2o_sonar.lib.api.results.DtResult(persistence: ExplainerPersistence, explainer_id: str, explainer_name: str, h2o_sonar_config=None, highlight_highest_residual: bool = False, logger=None)

Bases: ExplainerResult

data()

classmethod help() → Dict[str, List[Dict[str, str | bool]]]

plot(*, clazz: str | None = None)

class h2o_sonar.lib.api.results.FeatureImportanceResult(persistence: ~h2o_sonar.lib.api.persistences.ExplainerPersistence, explainer_id: str = '', chart_title: str = 'Global Feature Importance', chart_x_axis: str = 'feature', chart_y_axis: str = 'importance', h2o_sonar_config=None, logger=None, explanation_format: ~typing.Type[~h2o_sonar.lib.api.formats.ExplanationFormat] = <class 'h2o_sonar.lib.api.formats.GlobalFeatImpJSonFormat'>, explanation: ~typing.Type[~h2o_sonar.lib.api.explanations.Explanation] = <class 'h2o_sonar.lib.api.explanations.GlobalFeatImpExplanation'>)

Bases: ExplainerResult

data(*, clazz: str | None = None) → Frame

classmethod help() → Dict[str, List[Dict[str, str | bool]]]

plot(*, clazz: str | None = None, file_path: str = '')

class h2o_sonar.lib.api.results.LeaderboardResult(persistence: ~h2o_sonar.lib.api.persistences.ExplainerPersistence, explainer_id: str = '', chart_title: str = 'Leaderboard', chart_x_axis: str = 'metrics', chart_y_axis: str = 'models', h2o_sonar_config=None, logger=None, explanation_format: ~typing.Type[~h2o_sonar.lib.api.formats.ExplanationFormat] = <class 'h2o_sonar.lib.api.formats.LlmHeatmapLeaderboardJSonFormat'>, explanation: ~typing.Type[~h2o_sonar.lib.api.explanations.Explanation] = <class 'h2o_sonar.lib.api.explanations.LlmHeatmapLeaderboardExplanation'>)

Bases: ExplainerResult

Make (heatmap-based, bool-based, …) leaderboard evaluator result.

data(*, metric_id: str | None = None) → Dict

classmethod help() → Dict[str, List[Dict[str, str | bool]]]

plot(*, metric_id: str | None = None, file_path: str = '')

class h2o_sonar.lib.api.results.PdResult(persistence: ExplainerPersistence, explainer_id: str, h2o_sonar_config=None, logger=None)

Bases: ExplainerResult

data(*, feature_name: str, clazz: str | None = None) → Frame

classmethod help() → Dict[str, List[Dict[str, str | bool]]]

plot(*, feature_name, clazz=None, override_feature_type: Literal['categorical', 'numeric'] | None = None, file_path: str = '', is_problematic: bool = False)

exception h2o_sonar.lib.api.results.ResultValueError: Bases: ValueError

class h2o_sonar.lib.api.results.SummaryShapResult(persistence: ExplainerPersistence, explainer_id: str, raw_contribs_idx_filename: str, h2o_sonar_config=None, logger=None)

Bases: ExplainerResult

data(*, feature_names: str | List[str] | None = None, clazz: str | None = None) → Frame

classmethod help() → Dict[str, List[Dict[str, str | bool]]]

plot(*, feature_names: str | List[str] | None = None, clazz: str | None = None)

class h2o_sonar.lib.api.results.TemplateResult(persistence: ExplainerPersistence, explainer_id: str, explainer_name: str, logger=None)

Bases: ExplainerResult

data(**kwargs) → Frame

plot(**kwargs)

h2o_sonar.lib.api.results.list_in_english(items: List[str], quote_item=True) → str

h2o_sonar.lib.api.results.matplotlib_closing(show: bool)

h2o_sonar.lib.api package

Submodules

h2o_sonar.lib.api.commons module

h2o_sonar.lib.api.datasets module

h2o_sonar.lib.api.explainers module

h2o_sonar.lib.api.explanations module

h2o_sonar.lib.api.formats module

h2o_sonar.lib.api.interpretations module

h2o_sonar.lib.api.judges module

h2o_sonar.lib.api.models module

h2o_sonar.lib.api.persistences module

h2o_sonar.lib.api.plots module

h2o_sonar.lib.api.problems module

h2o_sonar.lib.api.results module

Module contents