hyperapiを使ってhyperファイルを作成、更新する。[python]

この度データのhyper化とpublishを自動化する業務にたずさわったため記録する。

目的

hyperapiを駆使して、
pythonのデータフレームから、hyperファイルを作成する。

hyperapi:Tableau Hyper API

意義

hyperファイル:tableauの抽出ファイルの形式
pythonでhyperファイルを作成できると更新を自動化することができる。

自分の業務では、bigquerryからデータを取り出してそれを自動でhyper化するプロジェクトを開発した。

hyperファイルの作成

サンプルのデータを「customer.hyper」として実行したディレクトリに作成する。


from pathlib import Path

from tableauhyperapi import HyperProcess, Telemetry, \
    Connection, CreateMode, \
    NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \
    Inserter, \
    escape_name, escape_string_literal, \
    TableName, \
    HyperException

# The table is called "Extract" and will be created in the "Extract" schema.
# This has historically been the default table name and schema for extracts created by Tableau
extract_table = TableDefinition(
    table_name=TableName("Extract", "Extract"),
    columns=[
        TableDefinition.Column(name='Customer ID', type=SqlType.text(), nullability=NOT_NULLABLE),
        TableDefinition.Column(name='Customer Name', type=SqlType.text(), nullability=NOT_NULLABLE),
        TableDefinition.Column(name='Loyalty Reward Points', type=SqlType.big_int(), nullability=NOT_NULLABLE),
        TableDefinition.Column(name='Segment', type=SqlType.text(), nullability=NOT_NULLABLE)

#カラムの設定をする

#TableDefinition.Column(name='カラム名(何でもいい)', type='型定義', nullability=NOT_NULLABLE(NULLABLE))


    ]
)

columns[]の中身にカラムを設定する。設定の仕方は以下の通り。

TableDefinition.Column(name=’カラム名(何でもいい)’, type=’型定義’, nullability=NOT_NULLABLE(もしくはNULLABLE))
型定義
整数 = SqlType.big_int() (公式より)
小数 = SqlType.double() (floatはダメだった)
文字列 = SqlType.text()
日付 = SqlType.date()

def run_insert_data_into_single_table():
    """
    An example demonstrating a simple single-table Hyper file including table creation and data insertion with different types
    """
    print("EXAMPLE - Insert data into a single table within a new Hyper file")
    path_to_database = Path("customer.hyper")

    # Starts the Hyper Process with telemetry enabled to send data to Tableau.
    # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:

        # Creates new Hyper file "customer.hyper".
        # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists.
        with Connection(endpoint=hyper.endpoint,
                        database=path_to_database,
                        create_mode=CreateMode.CREATE_AND_REPLACE) as connection:

            connection.catalog.create_schema(schema=extract_table.table_name.schema_name)
            connection.catalog.create_table(table_definition=extract_table)

            # The rows to insert into the "Extract"."Extract" table.
            data_to_insert = [
                ["DJ-13375", "Dennis Kane", 518, "Consumer"],
                ["EB-13705", "Ed Braxton", 815, "Corporate"],
                ["cm-001", "rei tamai", 777, "Vice President"],
            ]

            with Inserter(connection, extract_table) as inserter:
                inserter.add_rows(rows=data_to_insert)
                inserter.execute()

            # The table names in the "Extract" schema (the default schema).
            table_names = connection.catalog.get_table_names("Extract")
            print(f"Tables available in {path_to_database} are: {table_names}")

            # Number of rows in the "Extract"."Extract" table.
            # `execute_scalar_query` is for executing a query that returns exactly one row with one column.
            row_count = connection.execute_scalar_query(query=f"SELECT COUNT(*) FROM {extract_table.table_name}")
            print(f"The number of rows in table {extract_table.table_name} is {row_count}.")

        print("The connection to the Hyper file has been closed.")
    print("The Hyper process has been shut down.")


if __name__ == '__main__':
    try:
        run_insert_data_into_single_table()
    except HyperException as ex:
        print(ex)
        exit(1)

data_to_insert[]にデータを格納する。

格納するデータは

 nmp = data_frame.to_numpy().tolist()

でデータフレームから作れる。
その場合

data_to_insert = nmp

と定義すれば機能してくれる。

PS C:\Users\techn\Documents\program\python> python extract.hyper\geography3.py
The HyperProcess has started.
The connection to the Hyper file is open.
The geo_table is defined.
The data was added to the table.
The connection to the Hyper extract file is closed.
The HyperProcess has shut down.

こんな感じのが出ればOK!

hyperファイルの更新

イメージは

  1. 中身のデータを削除
  2. 新しいデータを挿入
from pathlib import Path

from tableauhyperapi import HyperProcess, Telemetry, \
    Connection, CreateMode, \
    NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \
    Inserter, \
    escape_name, escape_string_literal, \
    TableName, \
    HyperException

# The table is called "Extract" and will be created in the "Extract" schema.
# This has historically been the default table name and schema for extracts created by Tableau
extract_table = TableDefinition(
    table_name=TableName("Extract", "Extract"),
    columns=[
        TableDefinition.Column(name='Customer ID', type=SqlType.text(), nullability=NOT_NULLABLE),
        TableDefinition.Column(name='Customer Name', type=SqlType.text(), nullability=NOT_NULLABLE),
        TableDefinition.Column(name='Loyalty Reward Points', type=SqlType.big_int(), nullability=NOT_NULLABLE),
        TableDefinition.Column(name='Segment', type=SqlType.text(), nullability=NOT_NULLABLE)

#カラムの設定をする

#TableDefinition.Column(name='カラム名(何でもいい)', type='型定義', nullability=NOT_NULLABLE(NULLABLE))


    ]
)

def run_insert_data_into_single_table():
    """
    An example demonstrating a simple single-table Hyper file including table creation and data insertion with different types
    """
    print("EXAMPLE - Insert data into a single table within a new Hyper file")
    path_to_database = Path("customer.hyper")

    # Starts the Hyper Process with telemetry enabled to send data to Tableau.
    # To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:

        # Creates new Hyper file "customer.hyper".
        # Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists.
        with Connection(endpoint=hyper.endpoint,
                        database=path_to_database,
                        create_mode=CreateMode.NONE) as connection:
          #createmode をNONEにする
          
          #ここでデータを消す
             table_name = TableName("Extract", "Extract")
             row_count = connection.execute_command(
                command=f"DELETE FROM {table_name} "
                )

           # connection.catalog.create_schema(schema=extract_table.table_name.schema_name)
           # connection.catalog.create_table(table_definition=extract_table)

           # この部分を消す

            # The rows to insert into the "Extract"."Extract" table.
            data_to_insert = [
                ["DJ-13375", "Dennis Kane", 518, "Consumer"],
                ["EB-13705", "Ed Braxton", 815, "Corporate"],
                ["cm-001", "rei tamai", 777, "Vice President"],
            ]

            with Inserter(connection, extract_table) as inserter:
                inserter.add_rows(rows=data_to_insert)
                inserter.execute()

            # The table names in the "Extract" schema (the default schema).
            table_names = connection.catalog.get_table_names("Extract")
            print(f"Tables available in {path_to_database} are: {table_names}")

            # Number of rows in the "Extract"."Extract" table.
            # `execute_scalar_query` is for executing a query that returns exactly one row with one column.
            row_count = connection.execute_scalar_query(query=f"SELECT COUNT(*) FROM {extract_table.table_name}")
            print(f"The number of rows in table {extract_table.table_name} is {row_count}.")

        print("The connection to the Hyper file has been closed.")
    print("The Hyper process has been shut down.")


if __name__ == '__main__':
    try:
        run_insert_data_into_single_table()
    except HyperException as ex:
        print(ex)
        exit(1)

最後に

これで自由にhyperファイルを作成できるようになりました。

Follow me!

DMP構築のご相談

お気軽にお問い合わせください

コメントを残す

メールアドレスが公開されることはありません。 が付いている欄は必須項目です