Skip to content

Faker Synthesizer from source

This section demonstrates how to use the FakerSynthesizer module to generate fake data from a source in ydata-sdk.

Don't forget to set up your license key

    import os

    os.environ['YDATA_LICENSE_KEY'] = '{add-your-key}'

Example Code

"""Example using YData's regular & timeseries data synthesizer."""
from ydata.utils.formats import read_json

from ydata.connectors import GCSConnector
from ydata.connectors.filetype import FileType
from ydata.metadata import Metadata
from ydata.synthesizers.faker.model import FakerSynthesizer

def get_token(token_path: str):
    "Utility to load a token from .secrets"
    return read_json(token_path)

if __name__ == "__main__":

    # defining the arguments and acces token to Google Cloud Storage
    token = get_token("gcs_credentials.json")

    # init the connector
    connector = GCSConnector("bucketname", keyfile_dict=token)
    # Read the file from the GCS storage
    data = connector.read_file(
        "gs://path-to-file/data.csv", file_type=FileType.CSV
    )

    # calculating the metadata
    metadata = Metadata(data)

    # Instantiate a synthesizer
    cardio_synth = FakerSynthesizer()

    # fit model to the provided data
    cardio_synth.fit(metadata)

    # Store the synthesizer
    cardio_synth.save("./teste.pkl")

    # Load and Sample
    model = FakerSynthesizer.load("./teste.pkl")
    sample = model.sample(100)
    print(sample.head())