Skip to content

Profiling a single table

This section demonstrates how to use the Profile Report module in ydata-sdk.

Don't forget to set up your license key

    import os

    os.environ['YDATA_LICENSE_KEY'] = '{add-your-key}'

Example Code

"""Example using YData's regular & timeseries data synthesizer."""
from ydata.utils.formats import read_json

from ydata.connectors import GCSConnector
from ydata.connectors.filetype import FileType

from ydata.profiling import ProfileReport

def get_token(token_path: str):
    "Utility to load a token from .secrets"
    return read_json(token_path)


if __name__ == "__main__":

    # defining the arguments and acces token to Google Cloud Storage
    token = get_token("gcs_credentials.json")

    # init the connector
    connector = GCSConnector("ydatasynthetic", keyfile_dict=token)
    # Read the file from the GCS storage
    data = connector.read_file(
        "gs://ydata_testdata/tabular/cardio/data.csv", file_type=FileType.CSV
    )

    report = ProfileReport(data,
                           title="Cardio dataset profiling",
                           outlier=True)

    report.to_file("cardio_report.html")