この度データのhyper化とpublishを自動化する業務にたずさわったため記録する。
目的
hyperapiを駆使して、
pythonのデータフレームから、hyperファイルを作成する。
hyperapi:Tableau Hyper API
意義
hyperファイル:tableauの抽出ファイルの形式
pythonでhyperファイルを作成できると更新を自動化することができる。
自分の業務では、bigquerryからデータを取り出してそれを自動でhyper化するプロジェクトを開発した。
hyperファイルの作成
サンプルのデータを「customer.hyper」として実行したディレクトリに作成する。
from pathlib import Path
from tableauhyperapi import HyperProcess, Telemetry, \
Connection, CreateMode, \
NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \
Inserter, \
escape_name, escape_string_literal, \
TableName, \
HyperException
# The table is called "Extract" and will be created in the "Extract" schema.
# This has historically been the default table name and schema for extracts created by Tableau
extract_table = TableDefinition(
table_name=TableName("Extract", "Extract"),
columns=[
TableDefinition.Column(name='Customer ID', type=SqlType.text(), nullability=NOT_NULLABLE),
TableDefinition.Column(name='Customer Name', type=SqlType.text(), nullability=NOT_NULLABLE),
TableDefinition.Column(name='Loyalty Reward Points', type=SqlType.big_int(), nullability=NOT_NULLABLE),
TableDefinition.Column(name='Segment', type=SqlType.text(), nullability=NOT_NULLABLE)
#カラムの設定をする
#TableDefinition.Column(name='カラム名(何でもいい)', type='型定義', nullability=NOT_NULLABLE(NULLABLE))
]
)
columns[]の中身にカラムを設定する。設定の仕方は以下の通り。
TableDefinition.Column(name=’カラム名(何でもいい)’, type=’型定義’, nullability=NOT_NULLABLE(もしくはNULLABLE))
型定義
整数 = SqlType.big_int() (公式より)
小数 = SqlType.double() (floatはダメだった)
文字列 = SqlType.text()
日付 = SqlType.date()
def run_insert_data_into_single_table():
"""
An example demonstrating a simple single-table Hyper file including table creation and data insertion with different types
"""
print("EXAMPLE - Insert data into a single table within a new Hyper file")
path_to_database = Path("customer.hyper")
# Starts the Hyper Process with telemetry enabled to send data to Tableau.
# To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
# Creates new Hyper file "customer.hyper".
# Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists.
with Connection(endpoint=hyper.endpoint,
database=path_to_database,
create_mode=CreateMode.CREATE_AND_REPLACE) as connection:
connection.catalog.create_schema(schema=extract_table.table_name.schema_name)
connection.catalog.create_table(table_definition=extract_table)
# The rows to insert into the "Extract"."Extract" table.
data_to_insert = [
["DJ-13375", "Dennis Kane", 518, "Consumer"],
["EB-13705", "Ed Braxton", 815, "Corporate"],
["cm-001", "rei tamai", 777, "Vice President"],
]
with Inserter(connection, extract_table) as inserter:
inserter.add_rows(rows=data_to_insert)
inserter.execute()
# The table names in the "Extract" schema (the default schema).
table_names = connection.catalog.get_table_names("Extract")
print(f"Tables available in {path_to_database} are: {table_names}")
# Number of rows in the "Extract"."Extract" table.
# `execute_scalar_query` is for executing a query that returns exactly one row with one column.
row_count = connection.execute_scalar_query(query=f"SELECT COUNT(*) FROM {extract_table.table_name}")
print(f"The number of rows in table {extract_table.table_name} is {row_count}.")
print("The connection to the Hyper file has been closed.")
print("The Hyper process has been shut down.")
if __name__ == '__main__':
try:
run_insert_data_into_single_table()
except HyperException as ex:
print(ex)
exit(1)
data_to_insert[]にデータを格納する。
格納するデータは
nmp = data_frame.to_numpy().tolist()
でデータフレームから作れる。
その場合
data_to_insert = nmp
と定義すれば機能してくれる。
PS C:\Users\techn\Documents\program\python> python extract.hyper\geography3.py
The HyperProcess has started.
The connection to the Hyper file is open.
The geo_table is defined.
The data was added to the table.
The connection to the Hyper extract file is closed.
The HyperProcess has shut down.
こんな感じのが出ればOK!
hyperファイルの更新
イメージは
- 中身のデータを削除
- 新しいデータを挿入
from pathlib import Path
from tableauhyperapi import HyperProcess, Telemetry, \
Connection, CreateMode, \
NOT_NULLABLE, NULLABLE, SqlType, TableDefinition, \
Inserter, \
escape_name, escape_string_literal, \
TableName, \
HyperException
# The table is called "Extract" and will be created in the "Extract" schema.
# This has historically been the default table name and schema for extracts created by Tableau
extract_table = TableDefinition(
table_name=TableName("Extract", "Extract"),
columns=[
TableDefinition.Column(name='Customer ID', type=SqlType.text(), nullability=NOT_NULLABLE),
TableDefinition.Column(name='Customer Name', type=SqlType.text(), nullability=NOT_NULLABLE),
TableDefinition.Column(name='Loyalty Reward Points', type=SqlType.big_int(), nullability=NOT_NULLABLE),
TableDefinition.Column(name='Segment', type=SqlType.text(), nullability=NOT_NULLABLE)
#カラムの設定をする
#TableDefinition.Column(name='カラム名(何でもいい)', type='型定義', nullability=NOT_NULLABLE(NULLABLE))
]
)
def run_insert_data_into_single_table():
"""
An example demonstrating a simple single-table Hyper file including table creation and data insertion with different types
"""
print("EXAMPLE - Insert data into a single table within a new Hyper file")
path_to_database = Path("customer.hyper")
# Starts the Hyper Process with telemetry enabled to send data to Tableau.
# To opt out, simply set telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU.
with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
# Creates new Hyper file "customer.hyper".
# Replaces file with CreateMode.CREATE_AND_REPLACE if it already exists.
with Connection(endpoint=hyper.endpoint,
database=path_to_database,
create_mode=CreateMode.NONE) as connection:
#createmode をNONEにする
#ここでデータを消す
table_name = TableName("Extract", "Extract")
row_count = connection.execute_command(
command=f"DELETE FROM {table_name} "
)
# connection.catalog.create_schema(schema=extract_table.table_name.schema_name)
# connection.catalog.create_table(table_definition=extract_table)
# この部分を消す
# The rows to insert into the "Extract"."Extract" table.
data_to_insert = [
["DJ-13375", "Dennis Kane", 518, "Consumer"],
["EB-13705", "Ed Braxton", 815, "Corporate"],
["cm-001", "rei tamai", 777, "Vice President"],
]
with Inserter(connection, extract_table) as inserter:
inserter.add_rows(rows=data_to_insert)
inserter.execute()
# The table names in the "Extract" schema (the default schema).
table_names = connection.catalog.get_table_names("Extract")
print(f"Tables available in {path_to_database} are: {table_names}")
# Number of rows in the "Extract"."Extract" table.
# `execute_scalar_query` is for executing a query that returns exactly one row with one column.
row_count = connection.execute_scalar_query(query=f"SELECT COUNT(*) FROM {extract_table.table_name}")
print(f"The number of rows in table {extract_table.table_name} is {row_count}.")
print("The connection to the Hyper file has been closed.")
print("The Hyper process has been shut down.")
if __name__ == '__main__':
try:
run_insert_data_into_single_table()
except HyperException as ex:
print(ex)
exit(1)
最後に
これで自由にhyperファイルを作成できるようになりました。
DMP構築のご相談
お気軽にお問い合わせください