MultiTable Synthetic Data
This section demonstrates how to generate synthetic data from an existing MySQL database.
Don't forget to set up your license key
Example Code
"""
File to test the multitable synthesizer.
This is an example for the Berka database using the MySQL connector
"""
import os
from ydata.connectors import MySQLConnector
from ydata.metadata import MultiMetadata
from ydata.synthesizers import MultiTableSynthesizer
TRAIN = True
GENERATE = True
WRITE = False
out_path = "./multitable_model.pkl"
if __name__ == "__main__":
USERNAME = "username"
PASSWORD = "password"
HOSTNAME = "hostname"
PORT = "3306"
DATABASE_NAME = "database_name"
conn_str = {
"hostname": HOSTNAME,
"username": USERNAME,
"password": PASSWORD,
"port": PORT,
"database": DATABASE_NAME,
}
# Create the cnnection to the database
conn = MySQLConnector(conn_string=conn_str)
# test the connection
conn.connection.connect()
database = conn.read_database()
print(f"Original database: {database}")
#calculate the Metadata
m = MultiMetadata(database)
print(f"Metadata calculated: {m}")
if TRAIN:
synth = MultiTableSynthesizer()
synth.fit(database, metadata=m)
synth.save(out_path)
else:
synth = MultiTableSynthesizer.load(out_path)
if GENERATE and WRITE:
sample = 1
sample = synth.sample(sample, connector=conn)
else:
sample = 1
sample = synth.sample(sample)
print(f"Generate synthetic database: {sample}")