Source code for validatex.core.result

"""
Validation result data models.

Every expectation run produces an :class:`ExpectationResult`.
A full validation run aggregates them into a :class:`ValidationResult`.
"""

from __future__ import annotations

import json
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any, Dict, List, Optional

# ---------------------------------------------------------------------------
# Severity constants
# ---------------------------------------------------------------------------

SEVERITY_CRITICAL = "critical"
SEVERITY_WARNING = "warning"
SEVERITY_INFO = "info"

SEVERITY_WEIGHTS = {
    SEVERITY_CRITICAL: 3,
    SEVERITY_WARNING: 2,
    SEVERITY_INFO: 1,
}

# Default severity mapping based on expectation type keywords
_SEVERITY_MAP: Dict[str, str] = {
    # Critical — data integrity
    "expect_column_to_exist": SEVERITY_CRITICAL,
    "expect_column_to_not_be_null": SEVERITY_CRITICAL,
    "expect_column_values_to_be_unique": SEVERITY_CRITICAL,
    "expect_table_row_count_to_equal": SEVERITY_CRITICAL,
    "expect_table_row_count_to_be_between": SEVERITY_CRITICAL,
    "expect_table_columns_to_match_ordered_list": SEVERITY_CRITICAL,
    "expect_table_columns_to_match_set": SEVERITY_CRITICAL,
    "expect_table_column_count_to_equal": SEVERITY_CRITICAL,
    "expect_compound_columns_to_be_unique": SEVERITY_CRITICAL,
    # Warning — data quality
    "expect_column_values_to_be_between": SEVERITY_WARNING,
    "expect_column_values_to_be_in_set": SEVERITY_WARNING,
    "expect_column_values_to_not_be_in_set": SEVERITY_WARNING,
    "expect_column_values_to_match_regex": SEVERITY_WARNING,
    "expect_column_values_to_be_of_type": SEVERITY_WARNING,
    "expect_column_values_to_be_dateutil_parseable": SEVERITY_WARNING,
    "expect_column_pair_values_a_to_be_greater_than_b": SEVERITY_WARNING,
    "expect_column_pair_values_to_be_equal": SEVERITY_WARNING,
    "expect_multicolumn_sum_to_equal": SEVERITY_WARNING,
    # Info — statistical / informational
    "expect_column_value_lengths_to_be_between": SEVERITY_INFO,
    "expect_column_max_to_be_between": SEVERITY_INFO,
    "expect_column_min_to_be_between": SEVERITY_INFO,
    "expect_column_mean_to_be_between": SEVERITY_INFO,
    "expect_column_stdev_to_be_between": SEVERITY_INFO,
    "expect_column_distinct_values_to_be_in_set": SEVERITY_INFO,
    "expect_column_proportion_of_unique_values_to_be_between": SEVERITY_INFO,
}


[docs] def get_severity(expectation_type: str, meta: Optional[Dict] = None) -> str: """Return severity for an expectation type (user meta overrides default).""" if meta and "severity" in meta: return str(meta["severity"]) return _SEVERITY_MAP.get(expectation_type, SEVERITY_WARNING)
# --------------------------------------------------------------------------- # Native-type coercion helper # ---------------------------------------------------------------------------
[docs] def to_native(value: Any) -> Any: """ Convert numpy / pandas scalar types to native Python types. Professional tools NEVER leak internal types like ``np.int64(20)``. """ if value is None: return None if isinstance(value, (bool,)): return bool(value) if isinstance(value, (int, float, str)): return value # numpy scalar types try: import numpy as np if isinstance(value, np.integer): return int(value) if isinstance(value, np.floating): return float(value) if isinstance(value, np.bool_): return bool(value) if isinstance(value, np.ndarray): return [to_native(v) for v in value.tolist()] except ImportError: pass # dict — recursively clean if isinstance(value, dict): return {k: to_native(v) for k, v in value.items()} # list / tuple if isinstance(value, (list, tuple)): return [to_native(v) for v in value] return value
# --------------------------------------------------------------------------- # ExpectationResult # ---------------------------------------------------------------------------
[docs] @dataclass class ExpectationResult: """Result of a single expectation evaluation.""" expectation_type: str success: bool column: Optional[str] = None observed_value: Any = None element_count: int = 0 unexpected_count: int = 0 unexpected_percent: float = 0.0 unexpected_values: List[Any] = field(default_factory=list) details: Dict[str, Any] = field(default_factory=dict) exception_info: Optional[str] = None meta: Dict[str, Any] = field(default_factory=dict) def __post_init__(self): """Sanitize all numeric values to native Python types.""" self.observed_value = to_native(self.observed_value) self.element_count = int(self.element_count) if self.element_count else 0 self.unexpected_count = int(self.unexpected_count) if self.unexpected_count else 0 self.unexpected_percent = float(self.unexpected_percent) if self.unexpected_percent else 0.0 self.unexpected_values = [to_native(v) for v in self.unexpected_values] self.details = to_native(self.details) or {} @property def status(self) -> str: if self.exception_info: return "ERROR" return "PASSED" if self.success else "FAILED" @property def status_icon(self) -> str: icons = {"PASSED": "✅", "FAILED": "❌", "ERROR": "⚠️"} return icons.get(self.status, "❓") @property def severity(self) -> str: """Return severity level for this expectation.""" return get_severity(self.expectation_type, self.meta) @property def severity_icon(self) -> str: icons = { SEVERITY_CRITICAL: "🔴", SEVERITY_WARNING: "🟡", SEVERITY_INFO: "🔵", } return icons.get(self.severity, "🟡") @property def human_observed(self) -> str: """ Return a human-readable string for the observed value. Converts raw dicts / technical strings into executive-friendly text. """ val = self.observed_value if val is None: return "—" # Dict-style observed values → readable sentences if isinstance(val, dict): parts = [] if "min" in val and "max" in val: parts.append(f"Min: {val['min']} | Max: {val['max']}") if "min_length" in val and "max_length" in val: parts.append(f"Length: {val['min_length']}{val['max_length']}") if "unique_values" in val: parts.append(f"Distinct values: {val['unique_values']}") if "distinct_values" in val: vals = val["distinct_values"] if isinstance(vals, list): parts.append(f"Distinct: {', '.join(str(v) for v in vals[:8])}") else: parts.append(f"Distinct values: {vals}") if parts: return " · ".join(parts) # Fallback: key=value pairs return " · ".join(f"{k}: {v}" for k, v in val.items()) # String containing "unique out of" → reformat s = str(val) if "unique out of" in s: try: parts = s.split() uniq = int(parts[0]) total = int(parts[4]) pct = round(uniq / total * 100, 1) if total > 0 else 0 return f"Unique: {uniq}/{total} ({pct}%)" except (IndexError, ValueError): pass # List → join if isinstance(val, list): if len(val) == 0: return "—" return ", ".join(str(v) for v in val[:10]) return str(val)
[docs] def to_dict(self) -> Dict[str, Any]: return { "expectation_type": self.expectation_type, "success": self.success, "status": self.status, "severity": self.severity, "column": self.column, "observed_value": to_native(self.observed_value), "element_count": self.element_count, "unexpected_count": self.unexpected_count, "unexpected_percent": round(self.unexpected_percent, 4), "unexpected_values": [to_native(v) for v in self.unexpected_values[:20]], "details": to_native(self.details), "exception_info": self.exception_info, "meta": self.meta, }
# --------------------------------------------------------------------------- # ColumnHealthSummary # ---------------------------------------------------------------------------
[docs] @dataclass class ColumnHealthSummary: """Aggregated health metrics for a single column.""" column: str checks: int = 0 passed: int = 0 failed: int = 0 errors: int = 0 null_count: Optional[int] = None null_percent: Optional[float] = None unique_count: Optional[int] = None unique_percent: Optional[float] = None total_rows: Optional[int] = None @property def health_score(self) -> float: if self.checks == 0: return 100.0 return round((self.passed / self.checks) * 100, 1)
[docs] def to_dict(self) -> Dict[str, Any]: return { "column": self.column, "checks": self.checks, "passed": self.passed, "failed": self.failed, "errors": self.errors, "health_score": self.health_score, "null_count": self.null_count, "null_percent": (round(self.null_percent, 2) if self.null_percent is not None else None), "unique_count": self.unique_count, "unique_percent": (round(self.unique_percent, 2) if self.unique_percent is not None else None), }
# --------------------------------------------------------------------------- # ValidationResult # ---------------------------------------------------------------------------
[docs] @dataclass class ValidationResult: """Aggregate result of running an entire expectation suite.""" suite_name: str results: List[ExpectationResult] = field(default_factory=list) run_time: Optional[datetime] = None run_duration_seconds: float = 0.0 data_source: Optional[str] = None engine: str = "pandas" statistics: Dict[str, Any] = field(default_factory=dict) def __post_init__(self): if self.run_time is None: self.run_time = datetime.now() @property def success(self) -> bool: """True only if *every* expectation passed.""" return all(r.success for r in self.results) @property def total_expectations(self) -> int: return len(self.results) @property def successful_expectations(self) -> int: return sum(1 for r in self.results if r.success) @property def failed_expectations(self) -> int: return sum(1 for r in self.results if not r.success and not r.exception_info) @property def errored_expectations(self) -> int: return sum(1 for r in self.results if r.exception_info) @property def success_percent(self) -> float: if not self.results: return 0.0 return round((self.successful_expectations / self.total_expectations) * 100, 2) # -- Quality Score -----------------------------------------------------
[docs] def compute_quality_score(self) -> float: """ Compute a weighted data quality score (0–100). Severity weights: - Critical: ×3 - Warning : ×2 - Info : ×1 Score = 100 × (weighted_passed / weighted_total) """ if not self.results: return 100.0 weighted_passed = 0.0 weighted_total = 0.0 for r in self.results: w = SEVERITY_WEIGHTS.get(r.severity, 2) weighted_total += w if r.success: weighted_passed += w if weighted_total == 0: return 100.0 return round((weighted_passed / weighted_total) * 100, 1)
# -- Column Health Summary ---------------------------------------------
[docs] def column_health(self) -> List[ColumnHealthSummary]: """ Aggregate expectation results by column. Extracts null % and unique % from specific expectation types when present. """ col_map: Dict[str, ColumnHealthSummary] = {} for r in self.results: col = r.column or "__table__" if col not in col_map: col_map[col] = ColumnHealthSummary(column=col) summary = col_map[col] summary.checks += 1 if r.success: summary.passed += 1 elif r.exception_info: summary.errors += 1 else: summary.failed += 1 # Extract null info if r.expectation_type == "expect_column_to_not_be_null": details = r.details or {} summary.null_count = details.get("null_count", r.unexpected_count) total = details.get("total_count", r.element_count) summary.total_rows = total if total and total > 0: nc = summary.null_count or 0 summary.null_percent = (nc / total) * 100 # Extract uniqueness info if r.expectation_type == "expect_column_values_to_be_unique": details = r.details or {} total = r.element_count dup = details.get("duplicate_count", r.unexpected_count) summary.total_rows = total if total and total > 0: summary.unique_count = total - dup summary.unique_percent = ((total - dup) / total) * 100 # Return column summaries (table-level last) cols = sorted( col_map.values(), key=lambda c: (c.column == "__table__", c.column), ) return cols
# -- Statistics --------------------------------------------------------
[docs] def compute_statistics(self) -> Dict[str, Any]: """Compute summary statistics and store them.""" self.statistics = { "total": self.total_expectations, "passed": self.successful_expectations, "failed": self.failed_expectations, "errors": self.errored_expectations, "success_percent": self.success_percent, "overall_success": self.success, "quality_score": self.compute_quality_score(), "run_time": self.run_time.isoformat() if self.run_time else None, "run_duration_seconds": round(self.run_duration_seconds, 3), } return self.statistics
[docs] def to_dict(self) -> Dict[str, Any]: self.compute_statistics() return { "suite_name": self.suite_name, "success": self.success, "statistics": self.statistics, "quality_score": self.compute_quality_score(), "column_health": [c.to_dict() for c in self.column_health()], "data_source": self.data_source, "engine": self.engine, "results": [r.to_dict() for r in self.results], }
[docs] def to_json(self, indent: int = 2) -> str: """Serialize the full result to a JSON string.""" return json.dumps(self.to_dict(), indent=indent, default=str)
[docs] def to_json_file(self, filepath: str) -> None: """Write the validation result to a JSON file.""" with open(filepath, "w", encoding="utf-8") as f: f.write(self.to_json())
[docs] def to_html(self, filepath: str) -> None: """Generate a rich HTML report and write to *filepath*.""" from validatex.reporting.html_report import HTMLReportGenerator generator = HTMLReportGenerator() generator.generate(self, filepath)
[docs] def summary(self) -> str: """Return a human-readable summary string.""" self.compute_statistics() score = self.compute_quality_score() status = "✅ ALL PASSED" if self.success else "❌ SOME FAILED" lines = [ f"{'='*60}", f" ValidateX Validation Report — {self.suite_name}", f"{'='*60}", f" Status : {status}", f" Quality Score : {score} / 100", f" Total Expectations: {self.total_expectations}", f" Passed : {self.successful_expectations}", f" Failed : {self.failed_expectations}", f" Errors : {self.errored_expectations}", f" Success Rate : {self.success_percent}%", f" Run Duration : {self.run_duration_seconds:.3f}s", f" Engine : {self.engine}", f"{'='*60}", ] if self.failed_expectations > 0 or self.errored_expectations > 0: lines.append(" Failed / Errored Expectations:") lines.append(f" {'-'*56}") for r in self.results: if not r.success: col_str = f" (column: {r.column})" if r.column else "" sev = r.severity_icon lines.append(f" {r.status_icon} {sev} {r.expectation_type}{col_str}") if r.exception_info: lines.append(f" Error: {r.exception_info}") elif r.unexpected_count: lines.append(f" Unexpected: {r.unexpected_count} " f"({r.unexpected_percent:.2f}%)") lines.append(f"{'='*60}") return "\n".join(lines)
def __repr__(self) -> str: return ( f"ValidationResult(suite={self.suite_name!r}, " f"success={self.success}, " f"passed={self.successful_expectations}/{self.total_expectations})" )