» aws_kinesis_firehose_delivery_stream

Provides a Kinesis Firehose Delivery Stream resource. Amazon Kinesis Firehose is a fully managed, elastic service to easily deliver real-time data streams to destinations such as Amazon S3 and Amazon Redshift.

For more details, see the Amazon Kinesis Firehose Documentation.

» Example Usage

» Extended S3 Destination

resource "aws_kinesis_firehose_delivery_stream" "extended_s3_stream" {
  name        = "terraform-kinesis-firehose-extended-s3-test-stream"
  destination = "extended_s3"

  extended_s3_configuration {
    role_arn   = "${aws_iam_role.firehose_role.arn}"
    bucket_arn = "${aws_s3_bucket.bucket.arn}"
    processing_configuration = [
      {
        enabled = "true"
        processors = [
          {
            type = "Lambda"
            parameters = [
              {
                parameter_name = "LambdaArn"
                parameter_value = "${aws_lambda_function.lambda_processor.arn}:$LATEST"
              }
            ]
          }
        ]
      }
    ]
  }
}

resource "aws_s3_bucket" "bucket" {
  bucket = "tf-test-bucket"
  acl    = "private"
}

resource "aws_iam_role" "firehose_role" {
  name = "firehose_test_role"

  assume_role_policy = <<EOF
{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Action": "sts:AssumeRole",
      "Principal": {
        "Service": "firehose.amazonaws.com"
      },
      "Effect": "Allow",
      "Sid": ""
    }
  ]
}
EOF
}

resource "aws_iam_role" "lambda_iam" {
  name = "lambda_iam"
  assume_role_policy = <<EOF
{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Action": "sts:AssumeRole",
      "Principal": {
        "Service": "lambda.amazonaws.com"
      },
      "Effect": "Allow",
      "Sid": ""
    }
  ]
}
EOF
}

resource "aws_lambda_function" "lambda_processor" {
  filename = "lambda.zip"
  function_name = "firehose_lambda_processor"
  role = "${aws_iam_role.lambda_iam.arn}"
  handler = "exports.handler"
  runtime = "nodejs4.3"
}

» S3 Destination

resource "aws_s3_bucket" "bucket" {
  bucket = "tf-test-bucket"
  acl    = "private"
}

resource "aws_iam_role" "firehose_role" {
  name = "firehose_test_role"

  assume_role_policy = <<EOF
{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Action": "sts:AssumeRole",
      "Principal": {
        "Service": "firehose.amazonaws.com"
      },
      "Effect": "Allow",
      "Sid": ""
    }
  ]
}
EOF
}

resource "aws_kinesis_firehose_delivery_stream" "test_stream" {
  name        = "terraform-kinesis-firehose-test-stream"
  destination = "s3"

  s3_configuration {
    role_arn   = "${aws_iam_role.firehose_role.arn}"
    bucket_arn = "${aws_s3_bucket.bucket.arn}"
  }
}

» Redshift Destination

resource "aws_redshift_cluster" "test_cluster" {
  cluster_identifier = "tf-redshift-cluster-%d"
  database_name      = "test"
  master_username    = "testuser"
  master_password    = "T3stPass"
  node_type          = "dc1.large"
  cluster_type       = "single-node"
}

resource "aws_kinesis_firehose_delivery_stream" "test_stream" {
  name        = "terraform-kinesis-firehose-test-stream"
  destination = "redshift"

  s3_configuration {
    role_arn           = "${aws_iam_role.firehose_role.arn}"
    bucket_arn         = "${aws_s3_bucket.bucket.arn}"
    buffer_size        = 10
    buffer_interval    = 400
    compression_format = "GZIP"
  }

  redshift_configuration {
    role_arn           = "${aws_iam_role.firehose_role.arn}"
    cluster_jdbcurl    = "jdbc:redshift://${aws_redshift_cluster.test_cluster.endpoint}/${aws_redshift_cluster.test_cluster.database_name}"
    username           = "testuser"
    password           = "T3stPass"
    data_table_name    = "test-table"
    copy_options       = "delimiter '|'" # the default delimiter
    data_table_columns = "test-col"
    s3_backup_mode     = "Enabled"
    s3_backup_configuration {
      role_arn           = "${aws_iam_role.firehose_role.arn}"
      bucket_arn         = "${aws_s3_bucket.bucket.arn}"
      buffer_size        = 15
      buffer_interval    = 300
      compression_format = "GZIP"
    }
  }
}

» Elasticsearch Destination

resource "aws_elasticsearch_domain" "test_cluster" {
  domain_name = "firehose-es-test"
}

resource "aws_kinesis_firehose_delivery_stream" "test_stream" {
  name        = "terraform-kinesis-firehose-test-stream"
  destination = "elasticsearch"

  s3_configuration {
    role_arn           = "${aws_iam_role.firehose_role.arn}"
    bucket_arn         = "${aws_s3_bucket.bucket.arn}"
    buffer_size        = 10
    buffer_interval    = 400
    compression_format = "GZIP"
  }

  elasticsearch_configuration {
    domain_arn = "${aws_elasticsearch_domain.test_cluster.arn}"
    role_arn   = "${aws_iam_role.firehose_role.arn}"
    index_name = "test"
    type_name  = "test"

    processing_configuration = [
      {
        enabled = "true"
        processors = [
          {
            type = "Lambda"
            parameters = [
              {
                parameter_name = "LambdaArn"
                parameter_value = "${aws_lambda_function.lambda_processor.arn}:$LATEST"
              }
            ]
          }
        ]
      }
    ]
  }
}

» Splunk Destination

resource "aws_kinesis_firehose_delivery_stream" "test_stream" {
  name        = "terraform-kinesis-firehose-test-stream"
  destination = "splunk"

  s3_configuration {
    role_arn           = "${aws_iam_role.firehose.arn}"
    bucket_arn         = "${aws_s3_bucket.bucket.arn}"
    buffer_size        = 10
    buffer_interval    = 400
    compression_format = "GZIP"
  }

  splunk_configuration {
    hec_endpoint               = "https://http-inputs-mydomain.splunkcloud.com:443"
    hec_token                  = "51D4DA16-C61B-4F5F-8EC7-ED4301342A4A"
    hec_acknowledgment_timeout = 600
    hec_endpoint_type          = "Event"
    s3_backup_mode             = "FailedEventsOnly"
  }
}

» Argument Reference

The following arguments are supported:

  • name - (Required) A name to identify the stream. This is unique to the AWS account and region the Stream is created in.
  • kinesis_source_configuration - (Optional) Allows the ability to specify the kinesis stream that is used as the source of the firehose delivery stream.
  • destination – (Required) This is the destination to where the data is delivered. The only options are s3 (Deprecated, use extended_s3 instead), extended_s3, redshift, and elasticsearch.
  • s3_configuration - (Optional, Deprecated, see/use extended_s3_configuration unless destination is redshift) Configuration options for the s3 destination (or the intermediate bucket if the destination is redshift). More details are given below.
  • extended_s3_configuration - (Optional, only Required when destination is extended_s3) Enhanced configuration options for the s3 destination. More details are given below.
  • redshift_configuration - (Optional) Configuration options if redshift is the destination. Using redshift_configuration requires the user to also specify a s3_configuration block. More details are given below.

The kinesis_source_configuration object supports the following: * kinesis_stream_arn (Required) The kinesis stream used as the source of the firehose delivery stream. * role_arn (Required) The ARN of the role that provides access to the source Kinesis stream.

The s3_configuration object supports the following:

  • role_arn - (Required) The ARN of the AWS credentials.
  • bucket_arn - (Required) The ARN of the S3 bucket
  • prefix - (Optional) The "YYYY/MM/DD/HH" time format prefix is automatically used for delivered S3 files. You can specify an extra prefix to be added in front of the time format prefix. Note that if the prefix ends with a slash, it appears as a folder in the S3 bucket
  • buffer_size - (Optional) Buffer incoming data to the specified size, in MBs, before delivering it to the destination. The default value is 5. We recommend setting SizeInMBs to a value greater than the amount of data you typically ingest into the delivery stream in 10 seconds. For example, if you typically ingest data at 1 MB/sec set SizeInMBs to be 10 MB or higher.
  • buffer_interval - (Optional) Buffer incoming data for the specified period of time, in seconds, before delivering it to the destination. The default value is 300.
  • compression_format - (Optional) The compression format. If no value is specified, the default is UNCOMPRESSED. Other supported values are GZIP, ZIP & Snappy. If the destination is redshift you cannot use ZIP or Snappy.
  • kms_key_arn - (Optional) Specifies the KMS key ARN the stream will use to encrypt data. If not set, no encryption will be used.
  • cloudwatch_logging_options - (Optional) The CloudWatch Logging Options for the delivery stream. More details are given below

The extended_s3_configuration object supports the same fields from s3_configuration as well as the following:

  • processing_configuration - (Optional) The data processing configuration. More details are given below.
  • s3_backup_mode - (Optional) The Amazon S3 backup mode. Valid values are Disabled and Enabled. Default value is Disabled.
  • s3_backup_configuration - (Optional) The configuration for backup in Amazon S3. Required if s3_backup_mode is Enabled. Supports the same fields as s3_configuration object.

The redshift_configuration object supports the following:

  • cluster_jdbcurl - (Required) The jdbcurl of the redshift cluster.
  • username - (Required) The username that the firehose delivery stream will assume. It is strongly recommended that the username and password provided is used exclusively for Amazon Kinesis Firehose purposes, and that the permissions for the account are restricted for Amazon Redshift INSERT permissions.
  • password - (Required) The password for the username above.
  • retry_duration - (Optional) The length of time during which Firehose retries delivery after a failure, starting from the initial request and including the first attempt. The default value is 3600 seconds (60 minutes). Firehose does not retry if the value of DurationInSeconds is 0 (zero) or if the first delivery attempt takes longer than the current value.
  • role_arn - (Required) The arn of the role the stream assumes.
  • s3_backup_mode - (Optional) The Amazon S3 backup mode. Valid values are Disabled and Enabled. Default value is Disabled.
  • s3_backup_configuration - (Optional) The configuration for backup in Amazon S3. Required if s3_backup_mode is Enabled. Supports the same fields as s3_configuration object.
  • data_table_name - (Required) The name of the table in the redshift cluster that the s3 bucket will copy to.
  • copy_options - (Optional) Copy options for copying the data from the s3 intermediate bucket into redshift, for example to change the default delimiter. For valid values, see the AWS documentation
  • data_table_columns - (Optional) The data table columns that will be targeted by the copy command.
  • cloudwatch_logging_options - (Optional) The CloudWatch Logging Options for the delivery stream. More details are given below
  • processing_configuration - (Optional) The data processing configuration. More details are given below.

The elasticsearch_configuration object supports the following:

  • buffering_interval - (Optional) Buffer incoming data for the specified period of time, in seconds between 60 to 900, before delivering it to the destination. The default value is 300s.
  • buffering_size - (Optional) Buffer incoming data to the specified size, in MBs between 1 to 100, before delivering it to the destination. The default value is 5MB.
  • domain_arn - (Required) The ARN of the Amazon ES domain. The IAM role must have permission for DescribeElasticsearchDomain, DescribeElasticsearchDomains, and DescribeElasticsearchDomainConfig after assuming RoleARN. The pattern needs to be arn:.*.
  • index_name - (Required) The Elasticsearch index name.
  • index_rotation_period - (Optional) The Elasticsearch index rotation period. Index rotation appends a timestamp to the IndexName to facilitate expiration of old data. Valid values are NoRotation, OneHour, OneDay, OneWeek, and OneMonth. The default value is OneDay.
  • retry_duration - (Optional) After an initial failure to deliver to Amazon Elasticsearch, the total amount of time, in seconds between 0 to 7200, during which Firehose re-attempts delivery (including the first attempt). After this time has elapsed, the failed documents are written to Amazon S3. The default value is 300s. There will be no retry if the value is 0.
  • role_arn - (Required) The ARN of the IAM role to be assumed by Firehose for calling the Amazon ES Configuration API and for indexing documents. The pattern needs to be arn:.*.
  • s3_backup_mode - (Optional) Defines how documents should be delivered to Amazon S3. Valid values are FailedDocumentsOnly and AllDocuments. Default value is FailedDocumentsOnly.
  • type_name - (Required) The Elasticsearch type name with maximum length of 100 characters.
  • cloudwatch_logging_options - (Optional) The CloudWatch Logging Options for the delivery stream. More details are given below
  • processing_configuration - (Optional) The data processing configuration. More details are given below.

The splunk_configuration objects supports the following:

  • hec_acknowledgment_timeout - (Optional) The amount of time, in seconds between 180 and 600, that Kinesis Firehose waits to receive an acknowledgment from Splunk after it sends it data.
  • hec_endpoint - (Required) The HTTP Event Collector (HEC) endpoint to which Kinesis Firehose sends your data.
  • hec_endpoint_type - (Optional) The HEC endpoint type. Valid values are Raw or Event. The default value is Raw.
  • hec_token - The GUID that you obtain from your Splunk cluster when you create a new HEC endpoint.
  • s3_backup_mode - (Optional) Defines how documents should be delivered to Amazon S3. Valid values are FailedEventsOnly and AllEvents. Default value is FailedEventsOnly.
  • retry_duration - (Optional) After an initial failure to deliver to Amazon Elasticsearch, the total amount of time, in seconds between 0 to 7200, during which Firehose re-attempts delivery (including the first attempt). After this time has elapsed, the failed documents are written to Amazon S3. The default value is 300s. There will be no retry if the value is 0.
  • cloudwatch_logging_options - (Optional) The CloudWatch Logging Options for the delivery stream. More details are given below.

The cloudwatch_logging_options object supports the following:

  • enabled - (Optional) Enables or disables the logging. Defaults to false.
  • log_group_name - (Optional) The CloudWatch group name for logging. This value is required if enabled is true.
  • log_stream_name - (Optional) The CloudWatch log stream name for logging. This value is required if enabled is true.

The processing_configuration object supports the following:

  • enabled - (Optional) Enables or disables data processing.
  • processors - (Optional) Array of data processors. More details are given below

The processors array objects support the following:

  • type - (Required) The type of processor. Valid Values: Lambda
  • parameters - (Optional) Array of processor parameters. More details are given below

The parameters array objects support the following:

  • parameter_name - (Required) Parameter name. Valid Values: LambdaArn, NumberOfRetries, RoleArn, BufferSizeInMBs, BufferIntervalInSeconds
  • parameter_value - (Required) Parameter value. Must be between 1 and 512 length (inclusive). When providing a Lambda ARN, you should specify the resource version as well.

» Attributes Reference

  • arn - The Amazon Resource Name (ARN) specifying the Stream

» Import

Kinesis Firehose Delivery streams can be imported using the stream ARN, e.g.

$ terraform import aws_kinesis_firehose_delivery_stream.foo arn:aws:firehose:us-east-1:XXX:deliverystream/example

Note: Import does not work for stream destination s3. Consider using extended_s3 since s3 destination is deprecated.