Skip to content

Databricks Unity Catalog

This example demonstrates how to use the Databricks Unity Catalog connector in ydata-sdk.

Don't forget to set up your license key

    import os

    os.environ['YDATA_LICENSE_KEY'] = '{add-your-key}'

Example Code

"""Example file on how to use the connector for the Databricks Unity
Catalog."""

from examples.local import setting_dask_env
from ydata.connectors import DatabricksUnityCatalog

setting_dask_env()

SHARE_NAME='share'
SCHEMA_NAME='schema'
TABLE_NAME='table'

if __name__ == "__main__":

    # The Catalog requires a config file for the Delta Sharing.
    # The input can be [str, BinaryIO, TextIO, Path, DeltaSharingProfile]
    conn = DatabricksUnityCatalog('insert-file-path')

    # list all the available shares
    # returns a list of share names (str)
    # If no share is available the list in empty
    print(conn.list_shares())

    # list all the available schemas within a provided share
    # returns a list of schema names (str)
    print(conn.list_schemas(share_name=SHARE_NAME))

    # list all the available tables for a share and schema
    # return a list of table names (str)
    print(conn.list_tables(share_name=SHARE_NAME, schema_name=SCHEMA_NAME))

    # list all the available tables within all the available shares
    # return a dictionary of tables {table_name: {share: 'share-name', schema: 'schema-name'}}
    print(conn.list_all_tables())

    # Read a table
    # returns a Dataset object
    dataset = conn.read_table(share_name=SHARE_NAME,
                             schema_name=SCHEMA_NAME,
                             table_name=TABLE_NAME)

    # Read a sample from a Table
    # returns a Dataset object
    dataset = conn.read_table_sample(share_name=SHARE_NAME,
                               schema_name=SCHEMA_NAME,
                               table_name=TABLE_NAME,
                               sample_size=1000)

    print(dataset)