1 year ago

#42618

test-img

BPS

How do I capture AWS Backup failures in terraform when Windows VSS fails?

I'm using AWS Backups to back up several EC2 instances. I have terraform that seems to report correctly when there is a backup failure, but I am also interested in when the disks have backed up correctly, but when Windows VSS fails. Ultimately, the failed events are going to be published to Opsgenie. Is there a way to accomplish this? I have tried capturing all events with the 'aws_backup_vault_notifications' resource, and I have tried a filter as described in this AWS blog: https://aws.amazon.com/premiumsupport/knowledge-center/aws-backup-failed-job-notification/

I have included most of my terraform below, minus the opsgenie module; I can get successful or fully failing events published to Opsgenie just fine if I include those events:

locals {
  backup_vault_events = toset(["BACKUP_JOB_FAILED", "COPY_JOB_FAILED"])
}

resource "aws_backup_region_settings" "legacy" {
  resource_type_opt_in_preference = {
    "Aurora"          = false
    "DynamoDB"        = false
    "EFS"             = false
    "FSx"             = false
    "RDS"             = false
    "Storage Gateway" = false
    "EBS"             = true
    "EC2"             = true
    "DocumentDB"      = false
    "Neptune"         = false
    "VirtualMachine"  = false
  }
}

resource "aws_backup_vault" "legacy" {
  name        = "Legacy${var.environment_tag}"
  kms_key_arn = aws_kms_key.key.arn

}

resource "aws_iam_role" "legacy_backup" {
  name                 = "AWSBackupService"
  permissions_boundary = data.aws_iam_policy.role_permissions_boundary.arn
  assume_role_policy   = <<POLICY
{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Action": ["sts:AssumeRole"],
      "Effect": "allow",
      "Principal": {
        "Service": ["backup.amazonaws.com"]
      }
    }
  ]
}
POLICY
}

resource "aws_iam_role_policy_attachment" "legacy_backup" {
  policy_arn = "arn:aws:iam::aws:policy/service-role/AWSBackupServiceRolePolicyForBackup"
  role       = aws_iam_role.legacy_backup.name
}

###############################################################################
## Second Region Backup
###############################################################################

resource "aws_backup_vault" "secondary" {
  provider    = aws.secondary
  name        = "Legacy${var.environment_tag}SecondaryRegion"
  kms_key_arn = aws_kms_replica_key.secondary_region.arn

  tags = merge(
    local.tags, {
      name = "Legacy${var.environment_tag}SecondaryRegion"
    }
  )
}

data "aws_iam_policy_document" "backups" {
  policy_id = "__default_policy_ID"

  statement {
    actions = [
      "SNS:Publish",
    ]

    effect = "Allow"

    principals {
      type        = "Service"
      identifiers = ["backup.amazonaws.com"]
    }

    resources = [
      aws_sns_topic.backup_alerts.arn
    ]

    sid = "__default_statement_ID"
  }
}

###############################################################################
# SNS
###############################################################################
resource "aws_sns_topic_policy" "backup_alerts" {
  arn    = aws_sns_topic.backup_alerts.arn
  policy = data.aws_iam_policy_document.backups.json
}

resource "aws_backup_vault_notifications" "backup_alerts" {
  backup_vault_name   = aws_backup_vault.legacy.id
  sns_topic_arn       = aws_sns_topic.backup_alerts.arn
  backup_vault_events = local.backup_vault_events
}


resource "aws_sns_topic_subscription" "backup_alerts_opsgenie_target" {
  topic_arn                       = aws_sns_topic.backup_alerts.arn
  protocol                        = "https"
  endpoint                        = module.opsgenie_team.sns_integration_sns_endpoint
  confirmation_timeout_in_minutes = 1
  endpoint_auto_confirms          = true
}

amazon-web-services

terraform

aws-backup

opsgenie

0 Answers

Your Answer

Accepted video resources