Constraints
This section demonstrates how to use the Constraints
module in ydata-sdk
.
Don't forget to set up your license key
Example Code
import numpy as np
import pandas as pd
from ydata.constraints.engine import ConstraintEngine
from ydata.constraints.rows import (CustomRowConstraint, EqualColumnConstraint, GreaterThanColumnConstraint,
IntervalColumnConstraint, LowerThanColumnConstraint)
from ydata.dataset import Dataset
def create_dummy_dataset(size: int = 100) -> Dataset:
df = pd.DataFrame(
{
"constant": np.ones(size),
"low_cardinality": np.random.randint(2, size=size),
"ascending": np.arange(size),
"negatives": -1 * np.arange(size),
"missings": [np.nan] * size,
}
)
return Dataset(df)
def nunique(column):
return column.nunique()
def col_sum(column):
return column.sum()
def all_negatives(column):
return all(column < 0)
def validate_constraints(dataset):
# should fail on missings since there is only nan values
c1 = GreaterThanColumnConstraint(
check=nunique, columns=["constant", "missings"], value=0
)
# should pass
c2 = LowerThanColumnConstraint(
check=col_sum, columns=["negatives"], value=0)
# should fail since negatives max value is 0
c3 = EqualColumnConstraint(
check=all_negatives, columns="negatives", value=True)
# should fail on only one row
c4 = CustomRowConstraint(
check=lambda x: x < 0, columns="negatives", name="negative_rows"
)
# should pass since the cadinality of the columns is at most 2
c5 = EqualColumnConstraint(
nunique, columns=["constant", "low_cardinality"], value=1, tolerance=1
)
# should fail since ascending has high cardinality
c6 = IntervalColumnConstraint(
nunique, columns=["ascending"], lower_bound=0, upper_bound=1
)
constraints = [c1, c2, c3, c4, c5, c6]
ce = ConstraintEngine()
ce.add_constraints(constraints)
ce.validate(dataset)
return ce
if __name__ == "__main__":
dataset = create_dummy_dataset()
constraint_engine = validate_constraints(dataset)
# we can now print clear summaries of the constraint engine
# we can 'print' out the constraint engine results
print(constraint_engine)
# to only check which constraints have been added to the engine
# we can use the repr method
print(repr(constraint_engine))