Skip to content

Google Cloud Storage

This example demonstrates how to use the Google Cloud Storage connector in ydata-sdk.

Don't forget to set up your license key

    import os

    os.environ['YDATA_LICENSE_KEY'] = '{add-your-key}'

Example Code

"""Google Cloud Storage read, read_sample and write file examples."""
from ydata.connectors import GCSConnector
from ydata.connectors.filetype import FileType
from ydata.utils.formats import read_json

def get_token(token_path: str):
    "Utility to load a token from .secrets"
    return read_json(token_path)


if __name__ == "__main__":
    # Use relative path from file to token to be able to run regardless of the cwd()
    token = get_token("gcs_credentials.json")
    connector = GCSConnector("bucketname", keyfile_dict=token)
    data = connector.read_file(
        "gs://path-to-file/data.csv", file_type=FileType.CSV
    )
    print(f"Dataset shape: {data.shape()} [lazy evaluation].")
    print(data)

    # You can also read a specific number of instances
    data = connector.read_sample(
        "gs://path-to-file/data.csv", sample_size=10_000
    )
    print(f"Dataset shape: {data.shape(lazy_eval=False)}.")
    print(data)

    # List existing files in a bucket given a key
    print(connector.list(key="tabular", bucket_name="bucketname"))