From dada8345e26ea7416c4587f05806887bd9d80d13 Mon Sep 17 00:00:00 2001 From: Yan Cui Date: Fri, 8 Feb 2019 14:49:24 +0000 Subject: [PATCH] - allow treatMissingData field to be configured --- README.md | 43 +++++-- lib/deploy/stepFunctions/compileAlarms.js | 27 +++-- .../stepFunctions/compileAlarms.test.js | 112 ++++++++++++++++++ 3 files changed, 163 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index b1363606..687e1d3a 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,9 @@ stepFunctions: - executionsTimeOut - executionsFailed - executionsAborted - - executionThrottled + - metric: executionThrottled + treatMissingData: breaching # overrides below default + treatMissingData: ignore # optional hellostepfunc2: definition: StartAt: HelloWorld2 @@ -70,16 +72,6 @@ stepFunctions: - DynamoDBTable - KinesisStream - CUstomIamRole - alarms: - topics: - ok: arn:aws:sns:us-east-1:1234567890:NotifyMe - alarm: arn:aws:sns:us-east-1:1234567890:NotifyMe - insufficientData: arn:aws:sns:us-east-1:1234567890:NotifyMe - metrics: - - executionsTimeOut - - executionsFailed - - executionsAborted - - executionThrottled activities: - myTask - yourTask @@ -172,10 +164,20 @@ stepFunctions: - executionsFailed - executionsAborted - executionThrottled + treatMissingData: missing ``` Both `topics` and `metrics` are required properties. There are 4 supported metrics, each map to the CloudWatch Metrics that Step Functions publishes for your executions. +You can configure how the CloudWatch Alarms should treat missing data: + +* `missing` (AWS default): The alarm does not consider missing data points when evaluating whether to change state. +* `ignore`: The current alarm state is maintained. +* `breaching`: Missing data points are treated as breaching the threshold. +* `notBreaching`: Missing data points are treated as being within the threshold. + +For more information, please refer to the [official documentation](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/AlarmThatSendsEmail.html#alarms-and-missing-data). + The generated CloudWatch alarms would have the following configurations: ```yaml namespace: 'AWS/States' @@ -185,12 +187,29 @@ period: 60 evaluationPeriods: 1 ComparisonOperator: GreaterThanOrEqualToThreshold Statistic: Sum -treatMissingData: missing +treatMissingData: Dimensions: - Name: StateMachineArn Value: ``` +You can also override the default `treatMissingData` setting for a particular alarm by specifying an override: + +```yml +alarms: + topics: + ok: arn:aws:sns:us-east-1:1234567890:NotifyMe + alarm: arn:aws:sns:us-east-1:1234567890:NotifyMe + insufficientData: arn:aws:sns:us-east-1:1234567890:NotifyMe + metrics: + - executionsTimeOut + - executionsFailed + - executionsAborted + - metric: executionThrottled + treatMissingData: breaching # override + treatMissingData: ignore # default +``` + #### Current Gotcha Please keep this gotcha in mind if you want to reference the `name` from the `resources` section. To generate Logical ID for CloudFormation, the plugin transforms the specified name in serverless.yml based on the following scheme. diff --git a/lib/deploy/stepFunctions/compileAlarms.js b/lib/deploy/stepFunctions/compileAlarms.js index 8acf648a..c92f66e7 100644 --- a/lib/deploy/stepFunctions/compileAlarms.js +++ b/lib/deploy/stepFunctions/compileAlarms.js @@ -24,9 +24,12 @@ function getCloudWatchAlarms( const alarmActions = alarmAction ? [alarmAction] : []; const insufficientDataAction = _.get(alarmsObj, 'topics.insufficientData'); const insufficientDataActions = insufficientDataAction ? [insufficientDataAction] : []; + const defaultTreatMissingData = _.get(alarmsObj, 'treatMissingData', 'missing'); const metrics = _.uniq(_.get(alarmsObj, 'metrics', [])); - const [valid, invalid] = _.partition(metrics, m => _.has(cloudWatchMetricNames, m)); + const [valid, invalid] = _.partition( + metrics, + m => _.has(cloudWatchMetricNames, _.get(m, 'metric', m))); if (!_.isEmpty(invalid)) { serverless.cli.consoleLog( @@ -37,10 +40,13 @@ function getCloudWatchAlarms( } return valid.map(metric => { - const MetricName = cloudWatchMetricNames[metric]; + // metric can be either a string or object + const metricName = _.get(metric, 'metric', metric); + const cloudWatchMetricName = cloudWatchMetricNames[metricName]; const AlarmDescription = - `${stateMachineName}[${stage}][${region}]: ${alarmDescriptions[metric]}`; - const logicalId = `${stateMachineLogicalId}${MetricName}Alarm`; + `${stateMachineName}[${stage}][${region}]: ${alarmDescriptions[metricName]}`; + const logicalId = `${stateMachineLogicalId}${cloudWatchMetricName}Alarm`; + const treatMissingData = _.get(metric, 'treatMissingData', defaultTreatMissingData); return { logicalId, @@ -48,7 +54,7 @@ function getCloudWatchAlarms( Type: 'AWS::CloudWatch::Alarm', Properties: { Namespace: 'AWS/States', - MetricName, + MetricName: cloudWatchMetricName, AlarmDescription, Threshold: 1, Period: 60, @@ -58,7 +64,7 @@ function getCloudWatchAlarms( OKActions: okActions, AlarmActions: alarmActions, InsufficientDataActions: insufficientDataActions, - TreatMissingData: 'missing', + TreatMissingData: treatMissingData, Dimensions: [ { Name: 'StateMachineArn', @@ -79,9 +85,16 @@ function validateConfig(serverless, stateMachineName, alarmsObj) { return false; } + // metrics can be either short form (e.g. "executionsTimeOut") or + // long form, which allows you to optionally specify treatMissingData override, e.g. + // { "metric": "executionsTimeOut", "treatMissingData": "ignore" } + const validateMetric = x => + _.isString(x) || + (_.isObject(x) && _.has(x, 'metric') && _.isString(x.metric)); + if (!_.isObject(alarmsObj.topics) || !_.isArray(alarmsObj.metrics) || - !_.every(alarmsObj.metrics, _.isString)) { + !_.every(alarmsObj.metrics, validateMetric)) { serverless.cli.consoleLog( `state machine [${stateMachineName}] : alarms config is malformed. ` + 'Please see https://github.com/horike37/serverless-step-functions for examples'); diff --git a/lib/deploy/stepFunctions/compileAlarms.test.js b/lib/deploy/stepFunctions/compileAlarms.test.js index ee997e21..aa5fc0df 100644 --- a/lib/deploy/stepFunctions/compileAlarms.test.js +++ b/lib/deploy/stepFunctions/compileAlarms.test.js @@ -268,4 +268,116 @@ describe('#compileAlarms', () => { expect(consoleLogSpy.callCount).equal(2); }); + + it('should use specified treatMissingData for all alarms', () => { + const genStateMachine = (name) => ({ + name, + definition: { + StartAt: 'A', + States: { + A: { + Type: 'Pass', + End: true, + }, + }, + }, + alarms: { + topics: { + ok: '${self:service}-${opt:stage}-alerts-ok', + alarm: '${self:service}-${opt:stage}-alerts-alarm', + insufficientData: '${self:service}-${opt:stage}-alerts-missing', + }, + metrics: [ + 'executionsTimeOut', + 'executionsFailed', + 'executionsAborted', + 'executionThrottled', + ], + treatMissingData: 'ignore', + }, + }); + + serverless.service.stepFunctions = { + stateMachines: { + myStateMachine1: genStateMachine('stateMachineBeta1'), + myStateMachine2: genStateMachine('stateMachineBeta2'), + }, + }; + + serverlessStepFunctions.compileAlarms(); + const resources = serverlessStepFunctions.serverless.service + .provider.compiledCloudFormationTemplate.Resources; + + const verify = (resourceName) => { + expect(resources).to.have.property(resourceName); + expect(resources[resourceName].Properties.TreatMissingData).to.equal('ignore'); + }; + + verify('StateMachineBeta1ExecutionsTimeOutAlarm'); + verify('StateMachineBeta1ExecutionsFailedAlarm'); + verify('StateMachineBeta1ExecutionsAbortedAlarm'); + verify('StateMachineBeta1ExecutionThrottledAlarm'); + verify('StateMachineBeta2ExecutionsTimeOutAlarm'); + verify('StateMachineBeta2ExecutionsFailedAlarm'); + verify('StateMachineBeta2ExecutionsAbortedAlarm'); + verify('StateMachineBeta2ExecutionThrottledAlarm'); + + expect(consoleLogSpy.callCount).equal(0); + }); + + it('should allow individual alarms to override default treatMissingData', () => { + const genStateMachine = (name) => ({ + name, + definition: { + StartAt: 'A', + States: { + A: { + Type: 'Pass', + End: true, + }, + }, + }, + alarms: { + topics: { + ok: '${self:service}-${opt:stage}-alerts-ok', + alarm: '${self:service}-${opt:stage}-alerts-alarm', + insufficientData: '${self:service}-${opt:stage}-alerts-missing', + }, + metrics: [ + 'executionsTimeOut', + { metric: 'executionsFailed', treatMissingData: 'breaching' }, + 'executionsAborted', + 'executionThrottled', + ], + treatMissingData: 'ignore', + }, + }); + + serverless.service.stepFunctions = { + stateMachines: { + myStateMachine1: genStateMachine('stateMachineBeta1'), + myStateMachine2: genStateMachine('stateMachineBeta2'), + }, + }; + + serverlessStepFunctions.compileAlarms(); + const resources = serverlessStepFunctions.serverless.service + .provider.compiledCloudFormationTemplate.Resources; + + const verify = (resourceName, expectedConfig = 'ignore') => { + expect(resources).to.have.property(resourceName); + expect(resources[resourceName].Properties.TreatMissingData).to.equal(expectedConfig); + }; + + verify('StateMachineBeta1ExecutionsTimeOutAlarm'); + verify('StateMachineBeta1ExecutionsFailedAlarm', 'breaching'); + verify('StateMachineBeta1ExecutionsAbortedAlarm'); + verify('StateMachineBeta1ExecutionThrottledAlarm'); + verify('StateMachineBeta2ExecutionsTimeOutAlarm'); + verify('StateMachineBeta2ExecutionsFailedAlarm', 'breaching'); + verify('StateMachineBeta2ExecutionsAbortedAlarm'); + verify('StateMachineBeta2ExecutionThrottledAlarm'); + + expect(consoleLogSpy.callCount).equal(0); + }); });