Source code for validatex.core.validator

"""
Validator — orchestrates expectation suite execution against a dataset.

The :func:`validate` convenience function is the primary public entry point.
"""

from __future__ import annotations

import time
from typing import Any, Optional

from validatex.core.result import ValidationResult
from validatex.core.suite import ExpectationSuite



[docs]
class Validator:
    """
    Runs an :class:`ExpectationSuite` against a dataset.

    Parameters
    ----------
    suite : ExpectationSuite
        The suite of expectations to evaluate.
    engine : str
        ``"pandas"`` or ``"spark"``.
    """

    def __init__(self, suite: ExpectationSuite, engine: str = "pandas"):
        self.suite = suite
        self.engine = engine.lower()


[docs]
    def run(
        self,
        data: Any,
        data_source: Optional[str] = None,
    ) -> ValidationResult:
        """
        Execute every expectation in the suite against *data*.

        Parameters
        ----------
        data : pd.DataFrame | pyspark.sql.DataFrame
            The dataset to validate.
        data_source : str, optional
            A label describing where the data came from.

        Returns
        -------
        ValidationResult
        """
        result = ValidationResult(
            suite_name=self.suite.name,
            data_source=data_source,
            engine=self.engine,
        )

        start = time.perf_counter()
        for expectation in self.suite:
            exp_result = expectation.validate(data, engine=self.engine)
            result.results.append(exp_result)
        result.run_duration_seconds = time.perf_counter() - start
        result.compute_statistics()
        return result





[docs]
def validate(
    data: Any,
    suite: ExpectationSuite,
    engine: str = "pandas",
    data_source: Optional[str] = None,
) -> ValidationResult:
    """
    Convenience function to validate *data* against a *suite*.

    Parameters
    ----------
    data : pd.DataFrame | pyspark.sql.DataFrame
    suite : ExpectationSuite
    engine : str
        ``"pandas"`` or ``"spark"``.
    data_source : str, optional

    Returns
    -------
    ValidationResult
    """
    return Validator(suite, engine=engine).run(data, data_source=data_source)