AWS S3

This example demonstrates how to use the AWS S3 connector in ydata-sdk.

Don't forget to set up your license key

    import os

    os.environ['YDATA_LICENSE_KEY'] = '{add-your-key}'

Example Code

"""AWS S3 read, read_sample and write file example."""
from pathlib import Path

from ydata.connectors import S3Connector
from ydata.dataset.filetype import FileType
from ydata.utils.formats import read_json

def get_token(token_name: str):
    "Utility to load a token from .secrets"
    # Use relative path from file to token to be able to run regardless of the cwd()
    token_path = (
        Path(__file__).absolute().parent.parent.parent.joinpath(
            ".secrets", token_name)
    )
    return read_json(token_path)


if __name__ == "__main__":

    # Load the private credentials
    token = get_token("s3_credentials.json")

    # Init the Connector to S3 Storage
    connector = S3Connector(**token)

    # Read a file to a Dataset object
    data = connector.read_file(
        path="s3://path-to-file/data.csv",
        file_type=FileType.CSV)
    print(f"My data is of type {type(data).__name__}.")

    # Read sample of a .csv file
    data = connector.read_sample(
        path="s3://path-to-file/data.csv",
        file_type=FileType.CSV)

    # Write parquet file to Amazon S3
    connector.write_file(
        data=data, path="s3://path-to-file/example-write.parquet")

    # List all file from Amazon S3
    # lists a dict with files and directories
    objects = connector.ls("s3://path-to-file")
    print("List of objects in the AWS S3: {}".format(objects))