使用Terraform自动设置云监控警报用于自动扩展的服务器
aws-scripts-mon
我们将使用Terraform为注册在CloudWatch中的数据设置警报。
使用aws-scripts-mon时,通过将–autoscaling与dimensions进行组合,可以将AutoScalingGroupName添加到其中。
mon-put-instance-data.pl --mem-util --disk-path=/ --disk-space-util --auto-scaling --from-cron
我将在通过autoscaling启动的服务器上注册一个cron。
需要注意的是,如果IAM策略中没有ec2:DescribeTags权限,将无法获取AutoscalingGroupName并添加到dimensions中。
自动伸缩
只要按照以下步骤,就可以进行CloudWatch配置。假设已经创建了aws_sns_topic.notify_to_slack.arn和aws_autoscaling_group.front。
请注意,我的原生语言是英语,所以可能无法提供完全准确的翻译。
resource "aws_cloudwatch_metric_alarm" "front_cpu" {
alarm_name = "${aws_autoscaling_group.front.name} CPU Utilization"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "2"
metric_name = "CPUUtilization"
namespace = "AWS/EC2"
dimensions { "AutoScalingGroupName"="${aws_autoscaling_group.front.name}" }
statistic = "Average"
period = "300"
threshold = "80"
alarm_description = "${aws_autoscaling_group.front.name} CPU Utilization"
alarm_actions = [ "${aws_sns_topic.notify_to_slack.arn}" ]
insufficient_data_actions = [ "${aws_sns_topic.notify_to_slack.arn}" ]
ok_actions = [ "${aws_sns_topic.notify_to_slack.arn}" ]
}
resource "aws_cloudwatch_metric_alarm" "front_mem" {
alarm_name = "${aws_autoscaling_group.front.name} Memory Utilization"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "2"
metric_name = "MemoryUtilization"
namespace = "System/Linux"
dimensions { "AutoScalingGroupName"="${aws_autoscaling_group.front.name}" }
statistic = "Average"
period = "300"
threshold = "90"
alarm_description = "${aws_autoscaling_group.front.name} Memory Utilization"
alarm_actions = [ "${aws_sns_topic.notify_to_slack.arn}" ]
insufficient_data_actions = [ "${aws_sns_topic.notify_to_slack.arn}" ]
ok_actions = [ "${aws_sns_topic.notify_to_slack.arn}" ]
}
例子
通常情况下,对于实例我们使用InstanceId作为dimensions。
如果在创建 aws_instance 时使用了 count,那么您也可以在 aws_cloudwatch_metric_alarm 中使用 count 来进行相应的处理。
resource "aws_cloudwatch_metric_alarm" "elasticsearch_cpu" {
alarm_name = "${element(aws_instance.elasticsearch.*.id, count.index)} CPU Utilization"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "2"
metric_name = "CPUUtilization"
namespace = "AWS/EC2"
dimensions { "InstanceId"="${element(aws_instance.elasticsearch.*.id, count.index)}" }
statistic = "Average"
period = "300"
threshold = "80"
alarm_description = "${element(aws_instance.elasticsearch.*.id, count.index)} CPU Utilization"
alarm_actions = [ "${aws_sns_topic.notify_to_slack.arn}" ]
insufficient_data_actions = [ "${aws_sns_topic.notify_to_slack.arn}" ]
ok_actions = [ "${aws_sns_topic.notify_to_slack.arn}" ]
count = "${var.elasticsearch_server_count}"
}
resource "aws_cloudwatch_metric_alarm" "elasticsearch_mem" {
alarm_name = "${element(aws_instance.elasticsearch.*.id, count.index)} Memory Usage"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "2"
metric_name = "MemoryUtilization"
namespace = "System/Linux"
dimensions { "InstanceId"="${element(aws_instance.elasticsearch.*.id, count.index)}" }
statistic = "Average"
period = "300"
threshold = "90"
alarm_description = "${element(aws_instance.elasticsearch.*.id, count.index)} CPU Utilization"
alarm_actions = [ "${aws_sns_topic.notify_to_slack.arn}" ]
insufficient_data_actions = [ "${aws_sns_topic.notify_to_slack.arn}" ]
ok_actions = [ "${aws_sns_topic.notify_to_slack.arn}" ]
count = "${var.elasticsearch_server_count}"
}