Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
### Added

- Input to configure stac-server with ENABLE_INGEST_ACTION_TRUNCATE
- OpenSearch cluster warning + critical alarms
- `stac-server` dead letter queue warning + critical alarms

### Changed

Expand Down
1 change: 1 addition & 0 deletions default.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ stac_server_inputs = {
ingest_lambda = null
pre_hook_lambda = null
private_certificate_arn = ""
deploy_alarms = true
auth_function = {
cf_function_name = ""
cf_function_runtime = "cloudfront-js-2.0"
Expand Down
2 changes: 2 additions & 0 deletions inputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ variable "stac_server_inputs" {
stac_title = optional(string)
stac_description = optional(string)
deploy_cloudfront = bool
deploy_alarms = bool
web_acl_id = string
domain_alias = string
enable_transactions_extension = bool
Expand Down Expand Up @@ -159,6 +160,7 @@ variable "stac_server_inputs" {
stac_title = "STAC API"
stac_description = "A STAC API using stac-server"
deploy_cloudfront = true
deploy_alarms = true
web_acl_id = ""
domain_alias = ""
enable_transactions_extension = false
Expand Down
42 changes: 42 additions & 0 deletions modules/stac-server/ingest.tf
Original file line number Diff line number Diff line change
Expand Up @@ -131,3 +131,45 @@ resource "aws_lambda_permission" "stac_server_ingest_sqs_lambda_permission" {
principal = "sqs.amazonaws.com"
source_arn = aws_sqs_queue.stac_server_ingest_sqs_queue.arn
}

resource "aws_cloudwatch_metric_alarm" "warning_stac_server_dlq_alarm" {
count = var.deploy_alarms ? 1 : 0
alarm_name = "WARNING: ${local.name_prefix}-stac-server-dlq SQS DLQ Warning Alarm"
alarm_description = "WARNING: ${var.dead_letter_queue_warning_alarm_threshold} or more messages are persisting in the ${local.name_prefix}-stac-server SQS dead letter queue"
evaluation_periods = 2
period = 60
threshold = var.dead_letter_queue_warning_alarm_threshold
comparison_operator = "GreaterThanOrEqualToThreshold"
metric_name = "ApproximateNumberOfMessagesVisible"
namespace = "AWS/SQS"
statistic = "Average"
treat_missing_data = "notBreaching"
alarm_actions = [var.warning_sns_topic_arn]
ok_actions = [var.warning_sns_topic_arn]
insufficient_data_actions = []

dimensions = {
QueueName = aws_sqs_queue.stac_server_ingest_dead_letter_sqs_queue.name
}
}

resource "aws_cloudwatch_metric_alarm" "critical_stac_server_dlq_alarm" {
count = var.deploy_alarms ? 1 : 0
alarm_name = "CRITICAL: ${local.name_prefix}-stac-server-dlq SQS DLQ Critical Alarm"
alarm_description = "CRITICAL: ${var.dead_letter_queue_critical_alarm_threshold} or more messages are persisting in the ${local.name_prefix}-stac-server SQS dead letter queue"
evaluation_periods = 5
Copy link
Copy Markdown
Contributor Author

@theodorehreuter theodorehreuter Apr 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

might be high. Thought is that if a bunch of messages are there for over 5 minutes its really properly stuck there, thus a CRITICAL alarm

period = 60
threshold = var.dead_letter_queue_critical_alarm_threshold
comparison_operator = "GreaterThanOrEqualToThreshold"
metric_name = "ApproximateNumberOfMessagesVisible"
namespace = "AWS/SQS"
statistic = "Average"
treat_missing_data = "notBreaching"
alarm_actions = [var.critical_sns_topic_arn]
ok_actions = [var.warning_sns_topic_arn]
insufficient_data_actions = []

dimensions = {
QueueName = aws_sqs_queue.stac_server_ingest_dead_letter_sqs_queue.name
}
}
28 changes: 28 additions & 0 deletions modules/stac-server/inputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,34 @@ variable "cors_headers" {
default = ""
}

variable "deploy_alarms" {
type = bool
default = true
description = "Deploy stac-server dead letter queue alarm stack"
}

variable "dead_letter_queue_warning_alarm_threshold" {
description = "Message count threshold trigger for dead letter queue for warning alarm"
type = number
default = 2
}

variable "dead_letter_queue_critical_alarm_threshold" {
description = "Message count threshold trigger for dead letter queue for critical alarm"
type = number
default = 10
}

variable "warning_sns_topic_arn" {
description = "SNS topic to be used by all stac-server `warning` alarms."
type = string
}

variable "critical_sns_topic_arn" {
description = "SNS topic to be used by all stac-server `critical` alarms"
type = string
}

variable "domain_alias" {
description = "Custom domain alias for private API Gateway endpoint"
type = string
Expand Down
42 changes: 42 additions & 0 deletions modules/stac-server/opensearch_domain.tf
Original file line number Diff line number Diff line change
Expand Up @@ -320,3 +320,45 @@ resource "aws_lambda_invocation" "stac_server_opensearch_domain_ingest_create_in
aws_opensearch_domain.stac_server_opensearch_domain
]
}

resource "aws_cloudwatch_metric_alarm" "warning_stac_server_opensearch_custer_alarm" {
count = var.deploy_stac_server_opensearch_serverless ? 0 : 1
alarm_name = "WARNING: ${local.name_prefix}-stac-server-opensearch-cluster YELLOW count > 0"
evaluation_periods = 1
comparison_operator = "GreaterThanOrEqualToThreshold"
threshold = 1
statistic = "Maximum"
treat_missing_data = "notBreaching"
namespace = "AWS/ES"
period = 60
metric_name = "ClusterStatus.yellow"
alarm_description = "WARNING: 1 or more ${local.name_prefix}-stac-server OpenSearch Cluster nodes are in a YELLOW state"
alarm_actions = [var.warning_sns_topic_arn]
ok_actions = [var.warning_sns_topic_arn]
insufficient_data_actions = []

dimensions = {
cluster = aws_opensearch_domain.stac_server_opensearch_domain[0].arn
}
}

resource "aws_cloudwatch_metric_alarm" "critical_stac_server_opensearch_cluster_alarm" {
count = var.deploy_stac_server_opensearch_serverless ? 0 : 1
alarm_name = "CRITICAL: ${local.name_prefix}-stac-server-opensearch-cluster RED count > 0"
evaluation_periods = 1
comparison_operator = "GreaterThanOrEqualToThreshold"
threshold = 1
statistic = "Maximum"
treat_missing_data = "notBreaching"
namespace = "AWS/ES"
period = 60
metric_name = "ClusterStatus.red"
alarm_description = "CRITICAL: 1 or more ${local.name_prefix}-stac-server OpenSearch Cluster nodes are in a RED state"
alarm_actions = [var.critical_sns_topic_arn]
ok_actions = [var.warning_sns_topic_arn]
insufficient_data_actions = []

dimensions = {
cluster = aws_opensearch_domain.stac_server_opensearch_domain[0].arn
}
}
2 changes: 2 additions & 0 deletions profiles/core/inputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ variable "stac_server_inputs" {
stac_title = optional(string)
stac_description = optional(string)
deploy_cloudfront = bool
deploy_alarms = bool
web_acl_id = string
domain_alias = string
enable_transactions_extension = bool
Expand Down Expand Up @@ -159,6 +160,7 @@ variable "stac_server_inputs" {
stac_title = "STAC API"
stac_description = "A STAC API using stac-server"
deploy_cloudfront = true
deploy_alarms = true
web_acl_id = ""
domain_alias = ""
enable_transactions_extension = false
Expand Down
2 changes: 2 additions & 0 deletions profiles/core/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ module "stac-server" {
deploy_stac_server_opensearch_serverless = var.deploy_stac_server_opensearch_serverless
deploy_stac_server_outside_vpc = var.deploy_stac_server_outside_vpc
fd_web_acl_id = var.deploy_waf_rule ? module.base_infra.web_acl_id : var.ext_web_acl_id
warning_sns_topic_arn = module.base_infra.warning_sns_topic_arn
critical_sns_topic_arn = module.base_infra.critical_sns_topic_arn
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should allow the user to toggle whether these stac-server alarms are deployed or not via boolean flag; automatically deploying alarms can be handy but they may prefer to manage all tracked dimensions themselves, too.

Suggest using the same approach used in the cirrus module for consistency:

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added deploy_alarms trigger similar to cirrus module


depends_on = [
module.setup
Expand Down
12 changes: 12 additions & 0 deletions profiles/stac-server/inputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ variable "stac_server_inputs" {
stac_title = optional(string)
stac_description = optional(string)
deploy_cloudfront = bool
deploy_alarms = bool
web_acl_id = string
domain_alias = string
enable_transactions_extension = bool
Expand Down Expand Up @@ -124,6 +125,7 @@ variable "stac_server_inputs" {
deploy_cloudfront = true
web_acl_id = ""
domain_alias = ""
deploy_alarms = true
enable_transactions_extension = false
enable_collections_authx = false
enable_ingest_action_truncate = false
Expand Down Expand Up @@ -221,3 +223,13 @@ variable "fd_web_acl_id" {
type = string
default = ""
}

variable "warning_sns_topic_arn" {
description = "SNS topic to be used by all stac-server `warning` alarms."
type = string
}

variable "critical_sns_topic_arn" {
description = "SNS topic to be used by all stac-server `critical` alarms"
type = string
}
10 changes: 6 additions & 4 deletions profiles/stac-server/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,12 @@ module "stac-server" {
stac_api_rootpath = var.stac_server_inputs.deploy_cloudfront || var.stac_server_inputs.domain_alias != "" ? "" : "/${var.environment}"
stac_api_url = var.stac_server_inputs.domain_alias != "" ? "https://${var.stac_server_inputs.domain_alias}" : ""

cors_origin = var.stac_server_inputs.cors_origin
cors_credentials = var.stac_server_inputs.cors_credentials
cors_methods = var.stac_server_inputs.cors_methods
cors_headers = var.stac_server_inputs.cors_headers
cors_origin = var.stac_server_inputs.cors_origin
cors_credentials = var.stac_server_inputs.cors_credentials
cors_methods = var.stac_server_inputs.cors_methods
cors_headers = var.stac_server_inputs.cors_headers
warning_sns_topic_arn = var.warning_sns_topic_arn
critical_sns_topic_arn = var.critical_sns_topic_arn
}

module "cloudfront_api_gateway_endpoint" {
Expand Down
Loading