Dataset And Metadata
This section demonstrates how to use the Dataset and Metadata
modules in ydata-sdk
.
Don't forget to set up your license key
Example Code
import pandas as pd
from ydata.dataset import Dataset
from ydata.metadata import Metadata
data = pd.read_csv("/path-to-file/data.csv")
# Create the dataset object
dataset = Dataset(data)
# Getting some info from the Dataset
# Schema - Columns and variable types
print("\033[1m Dataset schema \033[0m")
print(dataset.schema)
# Nrows - Number of rows
print(dataset.nrows)
# Calculate a metadata for a fiven dataset
metadata = Metadata(dataset)
# Getting the all metadata summary
print("\n\033[1mMetadata summary\033[0m")
print(metadata.summary)
# Print the metadata
print(metadata)
# Filter the metadata based on some columns
m2 = metadata[["Age", "Capital Loss", "Capital gain", "Country"]]
print("Filtered Metadata print")
# Update Metadata datatypes
metadata.update_datatypes(
{"Capital gain": "numerical", "Capital Loss": "numerical"})
# Printing metadata after the updates
print(metadata)