Faker Synthesizer from source
This section demonstrates how to use the FakerSynthesizer
module to generate fake data from a source in ydata-sdk
.
Don't forget to set up your license key
Example Code
"""Example using YData's regular & timeseries data synthesizer."""
from ydata.utils.formats import read_json
from ydata.connectors import GCSConnector
from ydata.connectors.filetype import FileType
from ydata.metadata import Metadata
from ydata.synthesizers.faker.model import FakerSynthesizer
def get_token(token_path: str):
"Utility to load a token from .secrets"
return read_json(token_path)
if __name__ == "__main__":
# defining the arguments and acces token to Google Cloud Storage
token = get_token("gcs_credentials.json")
# init the connector
connector = GCSConnector("bucketname", keyfile_dict=token)
# Read the file from the GCS storage
data = connector.read_file(
"gs://path-to-file/data.csv", file_type=FileType.CSV
)
# calculating the metadata
metadata = Metadata(data)
# Instantiate a synthesizer
cardio_synth = FakerSynthesizer()
# fit model to the provided data
cardio_synth.fit(metadata)
# Store the synthesizer
cardio_synth.save("./teste.pkl")
# Load and Sample
model = FakerSynthesizer.load("./teste.pkl")
sample = model.sample(100)
print(sample.head())