Timeseries Synthetic Data
This section demonstrates how to use the Time-series Synthetic Data Generator
module in ydata-sdk
.
Don't forget to set up your license key
Example Code
"""Example using YData's time-series synthesizer."""
from ydata.connectors import GCSConnector
from ydata.dataset.filetype import FileType
from ydata.metadata import Metadata
from ydata.synthesizers.timeseries.model import TimeSeriesSynthesizer
from ydata.utils.data_types import VariableType
from ydata.utils.formats import read_json
def get_token(token_path: str):
"Utility to load a token from .secrets"
return read_json(token_path)
if __name__ == "__main__":
TRAIN = True
SYNTHESIZE = True
keyfile_dict = get_token("gcs_credentials.json")
gcs_connector = GCSConnector(
project_id="ydatasynthetic", keyfile_dict=keyfile_dict)
data = gcs_connector.read_file(
r"gs://ydata_testdata/timeseries/verbund_energy_prices/verbund_final_h1.csv",
file_type=FileType.CSV,
has_header=True,
)
print(data.head())
data.select_columns(
["DELIVERY_DATE", "PRICE_france"], copy=False
) # Keep only index and a numerical column
data.astype("DELIVERY_DATE", VariableType.DATE) # update vartype
dataset_attrs = {"sortbykey": "DELIVERY_DATE"}
m = Metadata(data, dataset_attrs=dataset_attrs)
if TRAIN is True:
out_path = "./test_trained_model.pkl"
synth = TimeSeriesSynthesizer()
synth.fit(data, metadata=m)
synth.save(out_path)
if SYNTHESIZE is True:
synth = TimeSeriesSynthesizer.load(out_path)
n_entities = 10
sample = synth.sample(n_entities=n_entities)
sample.to_pandas().to_csv(r"test_synth_samples.csv")