AWS S3
This example demonstrates how to use the AWS S3
connector in ydata-sdk
.
Don't forget to set up your license key
Example Code
"""AWS S3 read, read_sample and write file example."""
from pathlib import Path
from examples.local import setting_dask_env
from ydata.connectors import S3Connector
from ydata.connectors.filetype import FileType
from ydata.utils.formats import read_json
# If running locally this example please set your environment variables first: 'RUNNING_ENV'=='LOCAL'
setting_dask_env()
def get_token(token_name: str):
"Utility to load a token from .secrets"
# Use relative path from file to token to be able to run regardless of the cwd()
token_path = (
Path(__file__).absolute().parent.parent.parent.joinpath(
".secrets", token_name)
)
return read_json(token_path)
if __name__ == "__main__":
# Load the private credentials
token = get_token("s3_credentials.json")
# Init the Connector to S3 Storage
connector = S3Connector(**token)
# Read a file to a Dataset object
data = connector.read_file(
"s3://path-to-file/data.csv", file_type=FileType.CSV)
print(f"My data is of type {type(data).__name__}.")
# Read sample of a .csv file
data = connector.read_sample(
"s3://path-to-file/data.csv", file_type=FileType.CSV)
# Write parquet file to Amazon S3
connector.write_file(
data, path="s3://path-to-file/example-write.parquet")
# List all file from Amazon S3
# lists a dict with files and directories
objects = connector.ls("s3://path-to-file")
print("List of objects in the AWS S3: {}".format(objects))