Skip to content

MultiTable Synthetic Data

This section demonstrates how to generate synthetic data from an existing MySQL database.

Don't forget to set up your license key

    import os

    os.environ['YDATA_LICENSE_KEY'] = '{add-your-key}'

Example Code

"""
    File to test the multitable synthesizer.
    This is an example for the Berka database using the MySQL connector
"""
import os

from ydata.connectors import MySQLConnector

from ydata.metadata import MultiMetadata
from ydata.synthesizers import MultiTableSynthesizer

TRAIN = True
GENERATE = True
WRITE = False
out_path = "./multitable_model.pkl"

if __name__ == "__main__":

    USERNAME = "username"
    PASSWORD = "password"
    HOSTNAME = "hostname"
    PORT = "3306"
    DATABASE_NAME = "database_name"

    conn_str = {
        "hostname": HOSTNAME,
        "username": USERNAME,
        "password": PASSWORD,
        "port": PORT,
        "database": DATABASE_NAME,
    }

    # Create the cnnection to the database
    conn = MySQLConnector(conn_string=conn_str)
    # test the connection
    conn.connection.connect()

    database = conn.read_database()
    print(f"Original database: {database}")

    #calculate the Metadata
    m = MultiMetadata(database)
    print(f"Metadata calculated: {m}")

    if TRAIN:
        synth = MultiTableSynthesizer()
        synth.fit(database, metadata=m)
        synth.save(out_path)
    else:
        synth = MultiTableSynthesizer.load(out_path)

    if GENERATE and WRITE:
        sample = 1
        sample = synth.sample(sample, connector=conn)
    else:
        sample = 1
        sample = synth.sample(sample)
        print(f"Generate synthetic database: {sample}")