Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,16 @@ stepFunctions:
Resource: arn:aws:lambda:#{AWS::Region}:#{AWS::AccountId}:function:${self:service}-${opt:stage}-hello
End: true
dependsOn: CustomIamRole
alarms:
topics:
ok: arn:aws:sns:us-east-1:1234567890:NotifyMe
alarm: arn:aws:sns:us-east-1:1234567890:NotifyMe
insufficientData: arn:aws:sns:us-east-1:1234567890:NotifyMe
metrics:
- executionsTimeOut
- executionsFailed
- executionsAborted
- executionThrottled
hellostepfunc2:
definition:
StartAt: HelloWorld2
Expand All @@ -60,6 +70,16 @@ stepFunctions:
- DynamoDBTable
- KinesisStream
- CUstomIamRole
alarms:
topics:
ok: arn:aws:sns:us-east-1:1234567890:NotifyMe
alarm: arn:aws:sns:us-east-1:1234567890:NotifyMe
insufficientData: arn:aws:sns:us-east-1:1234567890:NotifyMe
metrics:
- executionsTimeOut
- executionsFailed
- executionsAborted
- executionThrottled
activities:
- myTask
- yourTask
Expand Down Expand Up @@ -132,6 +152,45 @@ stepFunctions:
- myStream
```

#### CloudWatch Alarms
It's common practice to want to monitor the health of your state machines and be alerted when something goes wrong. You can either:

* do this using the [serverless-plugin-aws-alerts](https://github.com/ACloudGuru/serverless-plugin-aws-alerts), which lets you configure custom CloudWatch Alarms against the various metrics that Step Functions publishes.
* or, you can use the built-in `alarms` configuration from this plugin, which gives you an opinionated set of default alarms (see below)

```yaml
stepFunctions:
stateMachines:
myStateMachine:
alarms:
topics:
ok: arn:aws:sns:us-east-1:1234567890:NotifyMe
alarm: arn:aws:sns:us-east-1:1234567890:NotifyMe
insufficientData: arn:aws:sns:us-east-1:1234567890:NotifyMe
metrics:
- executionsTimeOut
- executionsFailed
- executionsAborted
- executionThrottled
```

Both `topics` and `metrics` are required properties. There are 4 supported metrics, each map to the CloudWatch Metrics that Step Functions publishes for your executions.

The generated CloudWatch alarms would have the following configurations:
```yaml
namespace: 'AWS/States'
metric: <ExecutionsTimeOut | ExecutionsFailed | ExecutionsAborted | ExecutionThrottled>
threshold: 1
period: 60
evaluationPeriods: 1
ComparisonOperator: GreaterThanOrEqualToThreshold
Statistic: Sum
treatMissingData: missing
Dimensions:
- Name: StateMachineArn
Value: <ArnOfTheStateMachine>
```

#### Current Gotcha
Please keep this gotcha in mind if you want to reference the `name` from the `resources` section. To generate Logical ID for CloudFormation, the plugin transforms the specified name in serverless.yml based on the following scheme.

Expand Down
132 changes: 132 additions & 0 deletions lib/deploy/stepFunctions/compileAlarms.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
'use strict';
const _ = require('lodash');
const BbPromise = require('bluebird');

const cloudWatchMetricNames = {
executionsTimeOut: 'ExecutionsTimeOut',
executionsFailed: 'ExecutionsFailed',
executionsAborted: 'ExecutionsAborted',
executionThrottled: 'ExecutionThrottled',
};

const alarmDescriptions = {
executionsTimeOut: 'executions timed out',
executionsFailed: 'executions failed',
executionsAborted: 'executions were aborted',
executionThrottled: 'execution were throttled',
};

function getCloudWatchAlarms(
serverless, region, stage, stateMachineName, stateMachineLogicalId, alarmsObj) {
const okAction = _.get(alarmsObj, 'topics.ok');
const okActions = okAction ? [okAction] : [];
const alarmAction = _.get(alarmsObj, 'topics.alarm');
const alarmActions = alarmAction ? [alarmAction] : [];
const insufficientDataAction = _.get(alarmsObj, 'topics.insufficientData');
const insufficientDataActions = insufficientDataAction ? [insufficientDataAction] : [];

const metrics = _.uniq(_.get(alarmsObj, 'metrics', []));
const [valid, invalid] = _.partition(metrics, m => _.has(cloudWatchMetricNames, m));

if (!_.isEmpty(invalid)) {
serverless.cli.consoleLog(
`state machine [${stateMachineName}] : alarms.metrics has invalid metrics `,
`[${invalid.join(',')}]. ` +
'No CloudWatch Alarms would be created for these. ' +
'Please see https://github.com/horike37/serverless-step-functions for supported metrics');
}

return valid.map(metric => {
const MetricName = cloudWatchMetricNames[metric];
const AlarmDescription =
`${stateMachineName}[${stage}][${region}]: ${alarmDescriptions[metric]}`;
const logicalId = `${stateMachineLogicalId}${MetricName}Alarm`;

return {
logicalId,
alarm: {
Type: 'AWS::CloudWatch::Alarm',
Properties: {
Namespace: 'AWS/States',
MetricName,
AlarmDescription,
Threshold: 1,
Period: 60,
EvaluationPeriods: 1,
ComparisonOperator: 'GreaterThanOrEqualToThreshold',
Statistic: 'Sum',
OKActions: okActions,
AlarmActions: alarmActions,
InsufficientDataActions: insufficientDataActions,
TreatMissingData: 'missing',
Dimensions: [
{
Name: 'StateMachineArn',
Value: {
Ref: stateMachineLogicalId,
},
},
],
},
},
};
});
}

function validateConfig(serverless, stateMachineName, alarmsObj) {
// no alarms defined at all
if (!_.isObject(alarmsObj)) {
return false;
}

if (!_.isObject(alarmsObj.topics) ||
!_.isArray(alarmsObj.metrics) ||
!_.every(alarmsObj.metrics, _.isString)) {
serverless.cli.consoleLog(
`state machine [${stateMachineName}] : alarms config is malformed. ` +
'Please see https://github.com/horike37/serverless-step-functions for examples');
return false;
}

if (!_.has(alarmsObj.topics, 'ok') &&
!_.has(alarmsObj.topics, 'alarm') &&
!_.has(alarmsObj.topics, 'insufficientData')) {
serverless.cli.consoleLog(
`state machine [${stateMachineName}] : alarms config is malformed. ` +
"alarms.topics must specify 'ok', 'alarms' or 'insufficientData'"
);
return false;
}

return true;
}

module.exports = {
compileAlarms() {
const cloudWatchAlarms = _.flatMap(this.getAllStateMachines(), (name) => {
const stateMachineObj = this.getStateMachine(name);
const stateMachineLogicalId = this.getStateMachineLogicalId(name, stateMachineObj);
const stateMachineName = stateMachineObj.name || name;
const alarmsObj = stateMachineObj.alarms;

if (!validateConfig(this.serverless, stateMachineName, alarmsObj)) {
return [];
}

return getCloudWatchAlarms(
this.serverless,
this.region,
this.stage,
stateMachineName,
stateMachineLogicalId,
alarmsObj);
});

const newResources = _.mapValues(_.keyBy(cloudWatchAlarms, 'logicalId'), 'alarm');

_.merge(
this.serverless.service.provider.compiledCloudFormationTemplate.Resources,
newResources);
return BbPromise.resolve();
},
};
Loading