Google Cloud Storage
This example demonstrates how to use the Google Cloud Storage
connector in ydata-sdk
.
Don't forget to set up your license key
Example Code
"""Google Cloud Storage read, read_sample and write file examples."""
from ydata.connectors import GCSConnector
from ydata.connectors.filetype import FileType
from ydata.utils.formats import read_json
def get_token(token_path: str):
"Utility to load a token from .secrets"
return read_json(token_path)
if __name__ == "__main__":
# Use relative path from file to token to be able to run regardless of the cwd()
token = get_token("gcs_credentials.json")
connector = GCSConnector("bucketname", keyfile_dict=token)
data = connector.read_file(
"gs://path-to-file/data.csv", file_type=FileType.CSV
)
print(f"Dataset shape: {data.shape()} [lazy evaluation].")
print(data)
# You can also read a specific number of instances
data = connector.read_sample(
"gs://path-to-file/data.csv", sample_size=10_000
)
print(f"Dataset shape: {data.shape(lazy_eval=False)}.")
print(data)
# List existing files in a bucket given a key
print(connector.list(key="tabular", bucket_name="bucketname"))