From cdac2a340223ecc084f9dc7f7557d886b6490278 Mon Sep 17 00:00:00 2001 From: Yan Cui Date: Thu, 31 Jan 2019 17:11:32 +0000 Subject: [PATCH 1/2] - added support for declaring CW alarms --- lib/deploy/stepFunctions/compileAlarms.js | 128 ++++++++++ .../stepFunctions/compileAlarms.test.js | 229 ++++++++++++++++++ lib/index.js | 5 +- lib/index.test.js | 4 + 4 files changed, 365 insertions(+), 1 deletion(-) create mode 100644 lib/deploy/stepFunctions/compileAlarms.js create mode 100644 lib/deploy/stepFunctions/compileAlarms.test.js diff --git a/lib/deploy/stepFunctions/compileAlarms.js b/lib/deploy/stepFunctions/compileAlarms.js new file mode 100644 index 00000000..b293e30e --- /dev/null +++ b/lib/deploy/stepFunctions/compileAlarms.js @@ -0,0 +1,128 @@ +'use strict'; +const _ = require('lodash'); +const BbPromise = require('bluebird'); + +const cloudWatchMetricNames = { + executionsTimeOut: 'ExecutionsTimeOut', + executionsFailed: 'ExecutionsFailed', + executionsAborted: 'ExecutionsAborted', + executionThrottled: 'ExecutionThrottled', +}; + +const alarmDescriptions = { + executionsTimeOut: 'executions timed out', + executionsFailed: 'executions failed', + executionsAborted: 'executions were aborted', + executionThrottled: 'execution were throttled', +}; + +function getCloudWatchAlarms( + serverless, region, stage, stateMachineName, stateMachineLogicalId, alarmsObj) { + const okAction = _.get(alarmsObj, 'topics.ok'); + const okActions = okAction ? [okAction] : []; + const alarmAction = _.get(alarmsObj, 'topics.alarm'); + const alarmActions = alarmAction ? [alarmAction] : []; + const insufficientDataAction = _.get(alarmsObj, 'topics.insufficientData'); + const insufficientDataActions = insufficientDataAction ? [insufficientDataAction] : []; + + const metrics = _.uniq(_.get(alarmsObj, 'metrics', [])); + const [valid, invalid] = _.partition(metrics, m => _.has(cloudWatchMetricNames, m)); + + if (!_.isEmpty(invalid)) { + serverless.cli.consoleLog( + `state machine [${stateMachineName}] : alarms.metrics has invalid metrics `, + `[${invalid.join(',')}]. ` + + 'No CloudWatch Alarms would be created for these. ' + + 'Please see https://github.com/horike37/serverless-step-functions for supported metrics'); + } + + return valid.map(metric => { + const MetricName = cloudWatchMetricNames[metric]; + const AlarmDescription = + `${stateMachineName}[${stage}][${region}]: ${alarmDescriptions[metric]}`; + const logicalId = `${stateMachineLogicalId}${MetricName}Alarm`; + + return { + logicalId, + alarm: { + Type: 'AWS::CloudWatch::Alarm', + Properties: { + Namespace: 'AWS/States', + MetricName, + AlarmDescription, + Threshold: 1, + Period: 60, + EvaluationPeriods: 1, + ComparisonOperator: 'GreaterThanOrEqualToThreshold', + Statistic: 'Sum', + OKActions: okActions, + AlarmActions: alarmActions, + InsufficientDataActions: insufficientDataActions, + TreatMissingData: 'missing', + Dimensions: [ + { + Name: 'StateMachineArn', + Value: { + Ref: stateMachineLogicalId, + }, + }, + ], + }, + }, + }; + }); +} + +function validateConfig(serverless, stateMachineName, alarmsObj) { + if (!_.isObject(alarmsObj) || + !_.isObject(alarmsObj.topics) || + !_.isArray(alarmsObj.metrics) || + !_.every(alarmsObj.metrics, _.isString)) { + serverless.cli.consoleLog( + `state machine [${stateMachineName}] : alarms config is malformed. ` + + 'Please see https://github.com/horike37/serverless-step-functions for examples'); + return false; + } + + if (!_.has(alarmsObj.topics, 'ok') && + !_.has(alarmsObj.topics, 'alarm') && + !_.has(alarmsObj.topics, 'insufficientData')) { + serverless.cli.consoleLog( + `state machine [${stateMachineName}] : alarms config is malformed. ` + + "alarms.topics must specify 'ok', 'alarms' or 'insufficientData'" + ); + return false; + } + + return true; +} + +module.exports = { + compileAlarms() { + const cloudWatchAlarms = _.flatMap(this.getAllStateMachines(), (name) => { + const stateMachineObj = this.getStateMachine(name); + const stateMachineLogicalId = this.getStateMachineLogicalId(name, stateMachineObj); + const stateMachineName = stateMachineObj.name || name; + const alarmsObj = stateMachineObj.alarms; + + if (!validateConfig(this.serverless, stateMachineName, alarmsObj)) { + return []; + } + + return getCloudWatchAlarms( + this.serverless, + this.region, + this.stage, + stateMachineName, + stateMachineLogicalId, + alarmsObj); + }); + + const newResources = _.mapValues(_.keyBy(cloudWatchAlarms, 'logicalId'), 'alarm'); + + _.merge( + this.serverless.service.provider.compiledCloudFormationTemplate.Resources, + newResources); + return BbPromise.resolve(); + }, +}; diff --git a/lib/deploy/stepFunctions/compileAlarms.test.js b/lib/deploy/stepFunctions/compileAlarms.test.js new file mode 100644 index 00000000..709889f6 --- /dev/null +++ b/lib/deploy/stepFunctions/compileAlarms.test.js @@ -0,0 +1,229 @@ +'use strict'; + +const _ = require('lodash'); +const expect = require('chai').expect; +const Serverless = require('serverless/lib/Serverless'); +const AwsProvider = require('serverless/lib/plugins/aws/provider/awsProvider'); +const ServerlessStepFunctions = require('./../../index'); + +describe('#compileAlarms', () => { + let serverless; + let serverlessStepFunctions; + + beforeEach(() => { + serverless = new Serverless(); + serverless.servicePath = true; + serverless.service.service = 'step-functions'; + serverless.service.provider.compiledCloudFormationTemplate = { Resources: {} }; + serverless.setProvider('aws', new AwsProvider(serverless)); + serverless.cli = { consoleLog: console.log }; + const options = { + stage: 'dev', + region: 'ap-northeast-1', + }; + serverlessStepFunctions = new ServerlessStepFunctions(serverless, options); + }); + + const validateCloudWatchAlarm = (alarm) => { + expect(alarm.Type).to.equal('AWS::CloudWatch::Alarm'); + expect(alarm.Properties.Namespace).to.equal('AWS/States'); + expect(alarm.Properties.Threshold).to.equal(1); + expect(alarm.Properties.Period).to.equal(60); + expect(alarm.Properties.Statistic).to.equal('Sum'); + expect(alarm.Properties.Dimensions).to.have.lengthOf(1); + expect(alarm.Properties.Dimensions[0].Name).to.equal('StateMachineArn'); + }; + + it('should generate CloudWatch Alarms', () => { + const genStateMachine = (name) => ({ + name, + definition: { + StartAt: 'A', + States: { + A: { + Type: 'Pass', + End: true, + }, + }, + }, + alarms: { + topics: { + ok: '${self:service}-${opt:stage}-alerts-ok', + alarm: '${self:service}-${opt:stage}-alerts-alarm', + insufficientData: '${self:service}-${opt:stage}-alerts-missing', + }, + metrics: [ + 'executionsTimeOut', + 'executionsFailed', + 'executionsAborted', + 'executionThrottled', + ], + }, + }); + + serverless.service.stepFunctions = { + stateMachines: { + myStateMachine1: genStateMachine('stateMachineBeta1'), + myStateMachine2: genStateMachine('stateMachineBeta2'), + }, + }; + + serverlessStepFunctions.compileAlarms(); + const resources = serverlessStepFunctions.serverless.service + .provider.compiledCloudFormationTemplate.Resources; + expect(resources).to.have.property('StateMachineBeta1ExecutionsTimeOutAlarm'); + validateCloudWatchAlarm(resources.StateMachineBeta1ExecutionsTimeOutAlarm); + expect(resources).to.have.property('StateMachineBeta1ExecutionsFailedAlarm'); + validateCloudWatchAlarm(resources.StateMachineBeta1ExecutionsFailedAlarm); + expect(resources).to.have.property('StateMachineBeta1ExecutionsAbortedAlarm'); + validateCloudWatchAlarm(resources.StateMachineBeta1ExecutionsAbortedAlarm); + expect(resources).to.have.property('StateMachineBeta1ExecutionThrottledAlarm'); + validateCloudWatchAlarm(resources.StateMachineBeta1ExecutionThrottledAlarm); + expect(resources).to.have.property('StateMachineBeta2ExecutionsTimeOutAlarm'); + validateCloudWatchAlarm(resources.StateMachineBeta2ExecutionsTimeOutAlarm); + expect(resources).to.have.property('StateMachineBeta2ExecutionsFailedAlarm'); + validateCloudWatchAlarm(resources.StateMachineBeta2ExecutionsFailedAlarm); + expect(resources).to.have.property('StateMachineBeta2ExecutionsAbortedAlarm'); + validateCloudWatchAlarm(resources.StateMachineBeta2ExecutionsAbortedAlarm); + expect(resources).to.have.property('StateMachineBeta2ExecutionThrottledAlarm'); + validateCloudWatchAlarm(resources.StateMachineBeta2ExecutionThrottledAlarm); + }); + + it('should not generate CloudWatch Alarms when alarms.topics is missing', () => { + const genStateMachine = (name) => ({ + name, + definition: { + StartAt: 'A', + States: { + A: { + Type: 'Pass', + End: true, + }, + }, + }, + alarms: { + metrics: [ + 'executionsTimeOut', + ], + }, + }); + + serverless.service.stepFunctions = { + stateMachines: { + myStateMachine1: genStateMachine('stateMachineBeta1'), + myStateMachine2: genStateMachine('stateMachineBeta2'), + }, + }; + + serverlessStepFunctions.compileAlarms(); + const resources = serverlessStepFunctions.serverless.service + .provider.compiledCloudFormationTemplate.Resources; + expect(_.keys(resources)).to.have.lengthOf(0); + }); + + it('should not generate CloudWatch Alarms when alarms.topics is empty', () => { + const genStateMachine = (name) => ({ + name, + definition: { + StartAt: 'A', + States: { + A: { + Type: 'Pass', + End: true, + }, + }, + }, + alarms: { + topics: {}, + metrics: [ + 'executionsTimeOut', + ], + }, + }); + + serverless.service.stepFunctions = { + stateMachines: { + myStateMachine1: genStateMachine('stateMachineBeta1'), + myStateMachine2: genStateMachine('stateMachineBeta2'), + }, + }; + + serverlessStepFunctions.compileAlarms(); + const resources = serverlessStepFunctions.serverless.service + .provider.compiledCloudFormationTemplate.Resources; + expect(_.keys(resources)).to.have.lengthOf(0); + }); + + it('should not generate CloudWatch Alarms when alarms.metrics is missing', () => { + const genStateMachine = (name) => ({ + name, + definition: { + StartAt: 'A', + States: { + A: { + Type: 'Pass', + End: true, + }, + }, + }, + alarms: { + topics: { + ok: '${self:service}-${opt:stage}-alerts-ok', + }, + }, + }); + + serverless.service.stepFunctions = { + stateMachines: { + myStateMachine1: genStateMachine('stateMachineBeta1'), + myStateMachine2: genStateMachine('stateMachineBeta2'), + }, + }; + + serverlessStepFunctions.compileAlarms(); + const resources = serverlessStepFunctions.serverless.service + .provider.compiledCloudFormationTemplate.Resources; + expect(_.keys(resources)).to.have.lengthOf(0); + }); + + it('should not generate CloudWatch Alarms for unsupported metrics', () => { + const genStateMachine = (name) => ({ + name, + definition: { + StartAt: 'A', + States: { + A: { + Type: 'Pass', + End: true, + }, + }, + }, + alarms: { + topics: { + ok: '${self:service}-${opt:stage}-alerts-ok', + }, + metrics: [ + 'executionsFailed', + 'executionsFail', + ], + }, + }); + + serverless.service.stepFunctions = { + stateMachines: { + myStateMachine1: genStateMachine('stateMachineBeta1'), + myStateMachine2: genStateMachine('stateMachineBeta2'), + }, + }; + + serverlessStepFunctions.compileAlarms(); + const resources = serverlessStepFunctions.serverless.service + .provider.compiledCloudFormationTemplate.Resources; + // valid metrics => CW alarms + expect(resources).to.have.property('StateMachineBeta1ExecutionsFailedAlarm'); + expect(resources).to.have.property('StateMachineBeta2ExecutionsFailedAlarm'); + + // but invalid metric names are skipped + expect(_.keys(resources)).to.have.lengthOf(2); + }); +}); diff --git a/lib/index.js b/lib/index.js index 10e6880e..9c9c3e65 100644 --- a/lib/index.js +++ b/lib/index.js @@ -4,6 +4,7 @@ const BbPromise = require('bluebird'); const compileStateMachines = require('./deploy/stepFunctions/compileStateMachines'); const compileActivities = require('./deploy/stepFunctions/compileActivities'); const compileIamRole = require('./deploy/stepFunctions/compileIamRole'); +const compileAlarms = require('./deploy/stepFunctions/compileAlarms'); const httpValidate = require('./deploy/events/apiGateway/validate'); const httpResources = require('./deploy/events/apiGateway/resources'); const httpMethods = require('./deploy/events/apiGateway/methods'); @@ -40,6 +41,7 @@ class ServerlessStepFunctions { compileStateMachines, compileActivities, compileIamRole, + compileAlarms, httpRestApi, httpInfo, httpValidate, @@ -105,7 +107,8 @@ class ServerlessStepFunctions { 'package:compileFunctions': () => BbPromise.bind(this) .then(this.compileIamRole) .then(this.compileStateMachines) - .then(this.compileActivities), + .then(this.compileActivities) + .then(this.compileAlarms), 'package:compileEvents': () => this.compileScheduledEvents().then(() => { // FIXME: Rename pluginhttpValidated to validated diff --git a/lib/index.test.js b/lib/index.test.js index b81a2654..724b1ae7 100644 --- a/lib/index.test.js +++ b/lib/index.test.js @@ -78,14 +78,18 @@ describe('#index', () => { .stub(serverlessStepFunctions, 'compileStateMachines').returns(BbPromise.resolve()); const compileActivitiesStub = sinon .stub(serverlessStepFunctions, 'compileActivities').returns(BbPromise.resolve()); + const compileAlarmsStub = sinon + .stub(serverlessStepFunctions, 'compileAlarms').returns(BbPromise.resolve()); return serverlessStepFunctions.hooks['package:compileFunctions']() .then(() => { expect(compileIamRoleStub.calledOnce).to.be.equal(true); expect(compileStateMachinesStub.calledAfter(compileIamRoleStub)).to.be.equal(true); expect(compileActivitiesStub.calledAfter(compileStateMachinesStub)).to.be.equal(true); + expect(compileAlarmsStub.calledAfter(compileActivitiesStub)).to.be.equal(true); serverlessStepFunctions.compileIamRole.restore(); serverlessStepFunctions.compileStateMachines.restore(); serverlessStepFunctions.compileActivities.restore(); + serverlessStepFunctions.compileAlarms.restore(); }); }); From 7bf46b1aafec1c29528d5a5cdbf7e8617e77deb3 Mon Sep 17 00:00:00 2001 From: Yan Cui Date: Thu, 31 Jan 2019 17:21:28 +0000 Subject: [PATCH 2/2] - updated README --- README.md | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/README.md b/README.md index 03f2fdbc..b1363606 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,16 @@ stepFunctions: Resource: arn:aws:lambda:#{AWS::Region}:#{AWS::AccountId}:function:${self:service}-${opt:stage}-hello End: true dependsOn: CustomIamRole + alarms: + topics: + ok: arn:aws:sns:us-east-1:1234567890:NotifyMe + alarm: arn:aws:sns:us-east-1:1234567890:NotifyMe + insufficientData: arn:aws:sns:us-east-1:1234567890:NotifyMe + metrics: + - executionsTimeOut + - executionsFailed + - executionsAborted + - executionThrottled hellostepfunc2: definition: StartAt: HelloWorld2 @@ -60,6 +70,16 @@ stepFunctions: - DynamoDBTable - KinesisStream - CUstomIamRole + alarms: + topics: + ok: arn:aws:sns:us-east-1:1234567890:NotifyMe + alarm: arn:aws:sns:us-east-1:1234567890:NotifyMe + insufficientData: arn:aws:sns:us-east-1:1234567890:NotifyMe + metrics: + - executionsTimeOut + - executionsFailed + - executionsAborted + - executionThrottled activities: - myTask - yourTask @@ -132,6 +152,45 @@ stepFunctions: - myStream ``` +#### CloudWatch Alarms +It's common practice to want to monitor the health of your state machines and be alerted when something goes wrong. You can either: + +* do this using the [serverless-plugin-aws-alerts](https://github.com/ACloudGuru/serverless-plugin-aws-alerts), which lets you configure custom CloudWatch Alarms against the various metrics that Step Functions publishes. +* or, you can use the built-in `alarms` configuration from this plugin, which gives you an opinionated set of default alarms (see below) + +```yaml +stepFunctions: + stateMachines: + myStateMachine: + alarms: + topics: + ok: arn:aws:sns:us-east-1:1234567890:NotifyMe + alarm: arn:aws:sns:us-east-1:1234567890:NotifyMe + insufficientData: arn:aws:sns:us-east-1:1234567890:NotifyMe + metrics: + - executionsTimeOut + - executionsFailed + - executionsAborted + - executionThrottled +``` + +Both `topics` and `metrics` are required properties. There are 4 supported metrics, each map to the CloudWatch Metrics that Step Functions publishes for your executions. + +The generated CloudWatch alarms would have the following configurations: +```yaml +namespace: 'AWS/States' +metric: +threshold: 1 +period: 60 +evaluationPeriods: 1 +ComparisonOperator: GreaterThanOrEqualToThreshold +Statistic: Sum +treatMissingData: missing +Dimensions: + - Name: StateMachineArn + Value: +``` + #### Current Gotcha Please keep this gotcha in mind if you want to reference the `name` from the `resources` section. To generate Logical ID for CloudFormation, the plugin transforms the specified name in serverless.yml based on the following scheme.