Skip to content

Timeseries Data Report

This section demonstrates how to use the Timeseries Data Profiling Report module in ydata-sdk. Generate the Profiling Report and the PDF report for Synthetic Data Quality.

Example Code

"""Example using YData's TimeSeriesSynthesizer and SyntheticDataProfile."""
import pandas as pd

from ydata.connectors import GCSConnector
from ydata.dataset import Dataset
from ydata.metadata.metadata import Metadata
from ydata.report import SyntheticDataProfile
from ydata.synthesizers.timeseries.model import TimeSeriesSynthesizer
from ydata.utils.formats import read_json


def get_token(token_path: str):
    "Utility to load a token from .secrets"
    return read_json(token_path)


if __name__ == "__main__":

    # Reading a time-series dataset
    gcs_connector = GCSConnector(
        project_id="ydatasynthetic",
        keyfile_dict=get_token("gcs_credentials.json")
    )

    data = gcs_connector.read_file(
        "gs://ydata_testdata/timeseries/stock/data.csv"
    ).to_pandas()
    data["period"] = pd.date_range("2010-01-01", periods=len(data), freq="D")
    data = Dataset(data)
    meta = Metadata(data, {"sortbykey": "period"})

    # Fit the data with a Time Series Synthesizer
    ts_synth = TimeSeriesSynthesizer()
    ts_synth.fit(data, meta)
    synth_data = ts_synth.sample()

    # Output the Profile
    report = SyntheticDataProfile(
        data,
        synth_data,
        metadata=meta,
        data_types=ts_synth.data_types,
        target="Volume",
        report_type="timeseries")

    report.generate_report(
        output_path="./timeseries_stock_report.pdf",
    )