我已经创建了Azure Databricks【Azure的基础架构】
简而言之
用”基础设施即代码”的概念,我们将使用Terraform在Azure环境上构建Azure Databricks。
本地环境
-
- macOS Monterey 12.1
-
- Azure CLI 2.28.0
- terraform v1.0.11
前提条件
-
- 已经准备好Azure环境(租户/订阅)
-
- 已经在本地环境安装了”azure cli”
-
- 已经在本地环境配置了”terraform”环境
- 已经创建了用于在Azure上使用Terraform构建环境的服务主体,并定义了用于Terraform的本地环境变量。
尝试使用Terraform在Azure上创建Databricks。
编写 Terraform 定义文件。
主要定义文件
terraform {
required_providers {
azurerm = "~> 2.33"
random = "~> 2.2"
databricks = {
source = "databrickslabs/databricks"
version = "0.4.4"
}
}
}
provider "azurerm" {
features {}
tenant_id = var.ARM_TENANT_ID
client_id = var.ARM_CLIENT_ID
client_secret = var.ARM_CLIENT_SECRET
}
参数定义文件
# 環境変数(Azureサービスプリンシパル)
variable ARM_TENANT_ID {}
variable ARM_CLIENT_ID {}
variable ARM_CLIENT_SECRET {}
# タグ情報
variable tags_def {
default = {
owner = "ituru"
period = "2022-03-31"
CostCenter = "psg2"
Environment = "Demo"
}
}
# 各種パラメータ
variable resource_group_name {}
variable region {}
variable email_notifier {
description = "The email address to send job status to"
type = list(string)
}
参数值定义文件
# 環境変数の定義(Azureサービスプリンシパル)
ARM_TENANT_ID = "zzzzzzzz-cccc-4645-5757-zzzzzzzzzzzz"
ARM_CLIENT_ID = "xxxxxxxx-xxxx-4444-9922-xxxxxxxxxxxx"
ARM_CLIENT_SECRET = "hogehogehogehogehogehogehogehogege"
# パラメータ値の定義
resource_group_name = "rg_ituru_bricks01" // リソースグループ名
region = "japaneast" // 利用リージョン
email_notifier = ["hogehoge@gegege.com"] // 送信先メールアドレス(List型)
資料搬運公司Databricks所提供的定義檔案
# Databricksプロバイダー
# provider "databricks" {}
provider "databricks" {
host = azurerm_databricks_workspace.this.workspace_url
}
# Get information about the Databricks user that is calling
# the Databricks API (the one associated with "databricks_connection_profile").
# ユーザの定義
data "databricks_current_user" "me" {
depends_on = [azurerm_databricks_workspace.this]
}
# Get the latest Spark version to use for the cluster.
# Sparkバージョンの定義
data "databricks_spark_version" "latest" {
long_term_support = true
depends_on = [azurerm_databricks_workspace.this]
}
# Get the smallest available node type to use for the cluster. Choose
# only from among available node types with local storage.
# ノードタイプの定義
data "databricks_node_type" "smallest" {
local_disk = true
depends_on = [azurerm_databricks_workspace.this]
}
# ランダム文字列生成
resource "random_string" "naming" {
special = false
upper = false
length = 6
}
# # The prefix to use when naming the databricks workspace
# ローカル変数定義
locals {
prefix = "databricksdemo-${random_string.naming.result}"
}
# リソースグループ
resource "azurerm_resource_group" "this" {
name = var.resource_group_name
location = var.region
tags = var.tags_def
}
# Databricks ワークスペース
resource "azurerm_databricks_workspace" "this" {
name = "ituru-${local.prefix}-workspace"
resource_group_name = azurerm_resource_group.this.name
location = azurerm_resource_group.this.location
sku = "trial"
managed_resource_group_name = "ituru-${local.prefix}-workspace-rg"
tags = var.tags_def
}
# シークレットスコープ生成
resource "databricks_secret_scope" "this" {
name = "demo-${data.databricks_current_user.me.alphanumeric}"
}
# シークレットトークン生成
resource "databricks_token" "pat" {
comment = "Created from ${abspath(path.module)}"
lifetime_seconds = 3600
}
# シークレット発行
resource "databricks_secret" "token" {
string_value = databricks_token.pat.token_value
scope = databricks_secret_scope.this.name
key = "token"
}
# Create a simple, sample notebook. Store it in a subfolder within
# the Databricks current user's folder. The notebook contains the
# following basic Spark code in Python.
# ノートブック
resource "databricks_notebook" "this" {
path = "${data.databricks_current_user.me.home}/Terraform"
language = "PYTHON"
content_base64 = base64encode(<<-EOT
token = dbutils.secrets.get('${databricks_secret_scope.this.name}', '${databricks_secret.token.key}')
print(f'This should be redacted: {token}')
EOT
)
}
# Create a job to run the sample notebook. The job will create
# a cluster to run on. The cluster will use the smallest available
# node type and run the latest version of Spark.
# ジョブ
resource "databricks_job" "this" {
name = "Terraform Demo (${data.databricks_current_user.me.alphanumeric})"
new_cluster {
num_workers = 1
spark_version = data.databricks_spark_version.latest.id
node_type_id = data.databricks_node_type.smallest.id
}
notebook_task {
notebook_path = databricks_notebook.this.path
}
email_notifications {
on_success = var.email_notifier
on_failure = var.email_notifier
}
depends_on = [azurerm_databricks_workspace.this, databricks_notebook.this]
}
# クラスター
resource "databricks_cluster" "this" {
cluster_name = "Exploration (${data.databricks_current_user.me.alphanumeric})"
spark_version = data.databricks_spark_version.latest.id
instance_pool_id = databricks_instance_pool.smallest_nodes.id
autotermination_minutes = 20
autoscale {
min_workers = 1
max_workers = 10
}
}
# クラスターポリシー
resource "databricks_cluster_policy" "this" {
name = "Minimal (${data.databricks_current_user.me.alphanumeric})"
definition = jsonencode({
"dbus_per_hour" : {
"type" : "range",
"maxValue" : 10
},
"autotermination_minutes" : {
"type" : "fixed",
"value" : 20,
"hidden" : true
}
})
}
# インスタンスプール
resource "databricks_instance_pool" "smallest_nodes" {
instance_pool_name = "Smallest Nodes (${data.databricks_current_user.me.alphanumeric})"
min_idle_instances = 0
max_capacity = 30
node_type_id = data.databricks_node_type.smallest.id
preloaded_spark_versions = [
data.databricks_spark_version.latest.id
]
idle_instance_autotermination_minutes = 20
}
输出信息文件
# Print the URL to the databrickes workspace.
output "databricks_url" {
value = "https://${azurerm_databricks_workspace.this.workspace_url}/"
}
# Print the URL to the notebook.
output "notebook_url" {
value = databricks_notebook.this.url
}
# Print the URL to the job.
output "job_url" {
value = databricks_job.this.url
}
执行Terraform
## init
$ terraform init
## plan
$ terraform plan
## apply
$ terraform apply
进行 Terraform 执行后的确认
## 作成されたリソースグループの確認
$ az group show --name rg_ituru_bricks01
{
"id": "/subscriptions/nnnnnnnn-1717-4334-9779-mmmmmmmmmmmm/resourceGroups/rg_ituru_bricks01",
"location": "japaneast",
"managedBy": null,
"name": "rg_ituru_bricks01",
"properties": {
"provisioningState": "Succeeded"
},
"tags": {
"CostCenter": "psg2",
"Environment": "Demo",
"owner": "ituru",
"period": "2022-03-31"
},
"type": "Microsoft.Resources/resourceGroups"
}
## 作成した Azure Databricks Workspace の確認
$ az databricks workspace list --resource-group rg_ituru_bricks01 -o table
CreatedDateTime Location ManagedResourceGroupId Name ProvisioningState ResourceGroup WorkspaceId WorkspaceUrl
-------------------------------- ---------- ----------------------------------------------------------------------------------------------------------- ------------------------------------- ------------------- ----------------- ---------------- ------------------------------------------
2022-01-13T00:11:27.479190+00:00 japaneast /subscriptions/nnnnnnnn-1717-4334-9779-mmmmmmmmmmmm/resourceGroups/ituru-databricksdemo-7ke24v-workspace-rg ituru-databricksdemo-7ke24v-workspace Succeeded rg_ituru_bricks01 1975197519751975 adb-1975197519751975.6.azuredatabricks.net
本地工作目录的状态
$ tree -a
.
├── .terraform
│ ├── providers
│ │ └── registry.terraform.io
│ │ ├── databrickslabs
│ │ │ └── databricks
│ │ │ ├── 0.3.9
│ │ │ │ └── darwin_amd64
│ │ │ │ ├── CHANGELOG.md
│ │ │ │ ├── LICENSE
│ │ │ │ ├── NOTICE
│ │ │ │ └── terraform-provider-databricks_v0.3.9
│ │ │ ├── 0.4.0
│ │ │ │ └── darwin_amd64
│ │ │ │ ├── CHANGELOG.md
│ │ │ │ ├── LICENSE
│ │ │ │ ├── NOTICE
│ │ │ │ └── terraform-provider-databricks_v0.4.0
│ │ │ └── 0.4.4
│ │ │ └── darwin_amd64
│ │ │ ├── CHANGELOG.md
│ │ │ ├── LICENSE
│ │ │ ├── NOTICE
│ │ │ └── terraform-provider-databricks_v0.4.4
│ │ └── hashicorp
│ │ ├── azurerm
│ │ │ ├── 2.88.1
│ │ │ │ └── darwin_amd64
│ │ │ │ └── terraform-provider-azurerm_v2.88.1_x5
│ │ │ └── 2.91.0
│ │ │ └── darwin_amd64
│ │ │ └── terraform-provider-azurerm_v2.91.0_x5
│ │ ├── external
│ │ │ └── 2.1.0
│ │ │ └── darwin_amd64
│ │ │ └── terraform-provider-external_v2.1.0_x5
│ │ └── random
│ │ └── 2.3.1
│ │ └── darwin_amd64
│ │ └── terraform-provider-random_v2.3.1_x4
│ └── terraform.tfstate
├── .terraform.lock.hcl
├── databricks.tf
├── main.tf
├── outputs.tf
├── terraform.tfstate
├── terraform.tfstate.backup
├── terraform.tfvars
└── variables.tf
使用Terraform删除已创建的资源
$ terraform destroy
总结
使用Terraform工具,快速在Azure环境上部署Azure Databricks成功了。
请参考这篇文章。
我参考了下面这篇文章,并表示感谢:
– 使用Terraform在Azure上部署Databricks工作区
– Databricks Terraform提供程序
– DatabricksTerraform提供程序
– 使用Terraform在端到端管理工作区