Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 31 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ stepFunctions:
- executionsTimeOut
- executionsFailed
- executionsAborted
- executionThrottled
- metric: executionThrottled
treatMissingData: breaching # overrides below default
treatMissingData: ignore # optional
hellostepfunc2:
definition:
StartAt: HelloWorld2
Expand All @@ -70,16 +72,6 @@ stepFunctions:
- DynamoDBTable
- KinesisStream
- CUstomIamRole
alarms:
topics:
ok: arn:aws:sns:us-east-1:1234567890:NotifyMe
alarm: arn:aws:sns:us-east-1:1234567890:NotifyMe
insufficientData: arn:aws:sns:us-east-1:1234567890:NotifyMe
metrics:
- executionsTimeOut
- executionsFailed
- executionsAborted
- executionThrottled
activities:
- myTask
- yourTask
Expand Down Expand Up @@ -172,10 +164,20 @@ stepFunctions:
- executionsFailed
- executionsAborted
- executionThrottled
treatMissingData: missing
```

Both `topics` and `metrics` are required properties. There are 4 supported metrics, each map to the CloudWatch Metrics that Step Functions publishes for your executions.

You can configure how the CloudWatch Alarms should treat missing data:

* `missing` (AWS default): The alarm does not consider missing data points when evaluating whether to change state.
* `ignore`: The current alarm state is maintained.
* `breaching`: Missing data points are treated as breaching the threshold.
* `notBreaching`: Missing data points are treated as being within the threshold.

For more information, please refer to the [official documentation](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/AlarmThatSendsEmail.html#alarms-and-missing-data).

The generated CloudWatch alarms would have the following configurations:
```yaml
namespace: 'AWS/States'
Expand All @@ -185,12 +187,29 @@ period: 60
evaluationPeriods: 1
ComparisonOperator: GreaterThanOrEqualToThreshold
Statistic: Sum
treatMissingData: missing
treatMissingData: <missing (default) | ignore | breaching | notBreaching>
Dimensions:
- Name: StateMachineArn
Value: <ArnOfTheStateMachine>
```

You can also override the default `treatMissingData` setting for a particular alarm by specifying an override:

```yml
alarms:
topics:
ok: arn:aws:sns:us-east-1:1234567890:NotifyMe
alarm: arn:aws:sns:us-east-1:1234567890:NotifyMe
insufficientData: arn:aws:sns:us-east-1:1234567890:NotifyMe
metrics:
- executionsTimeOut
- executionsFailed
- executionsAborted
- metric: executionThrottled
treatMissingData: breaching # override
treatMissingData: ignore # default
```

#### Current Gotcha
Please keep this gotcha in mind if you want to reference the `name` from the `resources` section. To generate Logical ID for CloudFormation, the plugin transforms the specified name in serverless.yml based on the following scheme.

Expand Down
27 changes: 20 additions & 7 deletions lib/deploy/stepFunctions/compileAlarms.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,12 @@ function getCloudWatchAlarms(
const alarmActions = alarmAction ? [alarmAction] : [];
const insufficientDataAction = _.get(alarmsObj, 'topics.insufficientData');
const insufficientDataActions = insufficientDataAction ? [insufficientDataAction] : [];
const defaultTreatMissingData = _.get(alarmsObj, 'treatMissingData', 'missing');

const metrics = _.uniq(_.get(alarmsObj, 'metrics', []));
const [valid, invalid] = _.partition(metrics, m => _.has(cloudWatchMetricNames, m));
const [valid, invalid] = _.partition(
metrics,
m => _.has(cloudWatchMetricNames, _.get(m, 'metric', m)));

if (!_.isEmpty(invalid)) {
serverless.cli.consoleLog(
Expand All @@ -37,18 +40,21 @@ function getCloudWatchAlarms(
}

return valid.map(metric => {
const MetricName = cloudWatchMetricNames[metric];
// metric can be either a string or object
const metricName = _.get(metric, 'metric', metric);
const cloudWatchMetricName = cloudWatchMetricNames[metricName];
const AlarmDescription =
`${stateMachineName}[${stage}][${region}]: ${alarmDescriptions[metric]}`;
const logicalId = `${stateMachineLogicalId}${MetricName}Alarm`;
`${stateMachineName}[${stage}][${region}]: ${alarmDescriptions[metricName]}`;
const logicalId = `${stateMachineLogicalId}${cloudWatchMetricName}Alarm`;
const treatMissingData = _.get(metric, 'treatMissingData', defaultTreatMissingData);

return {
logicalId,
alarm: {
Type: 'AWS::CloudWatch::Alarm',
Properties: {
Namespace: 'AWS/States',
MetricName,
MetricName: cloudWatchMetricName,
AlarmDescription,
Threshold: 1,
Period: 60,
Expand All @@ -58,7 +64,7 @@ function getCloudWatchAlarms(
OKActions: okActions,
AlarmActions: alarmActions,
InsufficientDataActions: insufficientDataActions,
TreatMissingData: 'missing',
TreatMissingData: treatMissingData,
Dimensions: [
{
Name: 'StateMachineArn',
Expand All @@ -79,9 +85,16 @@ function validateConfig(serverless, stateMachineName, alarmsObj) {
return false;
}

// metrics can be either short form (e.g. "executionsTimeOut") or
// long form, which allows you to optionally specify treatMissingData override, e.g.
// { "metric": "executionsTimeOut", "treatMissingData": "ignore" }
const validateMetric = x =>
_.isString(x) ||
(_.isObject(x) && _.has(x, 'metric') && _.isString(x.metric));

if (!_.isObject(alarmsObj.topics) ||
!_.isArray(alarmsObj.metrics) ||
!_.every(alarmsObj.metrics, _.isString)) {
!_.every(alarmsObj.metrics, validateMetric)) {
serverless.cli.consoleLog(
`state machine [${stateMachineName}] : alarms config is malformed. ` +
'Please see https://github.com/horike37/serverless-step-functions for examples');
Expand Down
112 changes: 112 additions & 0 deletions lib/deploy/stepFunctions/compileAlarms.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -268,4 +268,116 @@ describe('#compileAlarms', () => {

expect(consoleLogSpy.callCount).equal(2);
});

it('should use specified treatMissingData for all alarms', () => {
const genStateMachine = (name) => ({
name,
definition: {
StartAt: 'A',
States: {
A: {
Type: 'Pass',
End: true,
},
},
},
alarms: {
topics: {
ok: '${self:service}-${opt:stage}-alerts-ok',
alarm: '${self:service}-${opt:stage}-alerts-alarm',
insufficientData: '${self:service}-${opt:stage}-alerts-missing',
},
metrics: [
'executionsTimeOut',
'executionsFailed',
'executionsAborted',
'executionThrottled',
],
treatMissingData: 'ignore',
},
});

serverless.service.stepFunctions = {
stateMachines: {
myStateMachine1: genStateMachine('stateMachineBeta1'),
myStateMachine2: genStateMachine('stateMachineBeta2'),
},
};

serverlessStepFunctions.compileAlarms();
const resources = serverlessStepFunctions.serverless.service
.provider.compiledCloudFormationTemplate.Resources;

const verify = (resourceName) => {
expect(resources).to.have.property(resourceName);
expect(resources[resourceName].Properties.TreatMissingData).to.equal('ignore');
};

verify('StateMachineBeta1ExecutionsTimeOutAlarm');
verify('StateMachineBeta1ExecutionsFailedAlarm');
verify('StateMachineBeta1ExecutionsAbortedAlarm');
verify('StateMachineBeta1ExecutionThrottledAlarm');
verify('StateMachineBeta2ExecutionsTimeOutAlarm');
verify('StateMachineBeta2ExecutionsFailedAlarm');
verify('StateMachineBeta2ExecutionsAbortedAlarm');
verify('StateMachineBeta2ExecutionThrottledAlarm');

expect(consoleLogSpy.callCount).equal(0);
});

it('should allow individual alarms to override default treatMissingData', () => {
const genStateMachine = (name) => ({
name,
definition: {
StartAt: 'A',
States: {
A: {
Type: 'Pass',
End: true,
},
},
},
alarms: {
topics: {
ok: '${self:service}-${opt:stage}-alerts-ok',
alarm: '${self:service}-${opt:stage}-alerts-alarm',
insufficientData: '${self:service}-${opt:stage}-alerts-missing',
},
metrics: [
'executionsTimeOut',
{ metric: 'executionsFailed', treatMissingData: 'breaching' },
'executionsAborted',
'executionThrottled',
],
treatMissingData: 'ignore',
},
});

serverless.service.stepFunctions = {
stateMachines: {
myStateMachine1: genStateMachine('stateMachineBeta1'),
myStateMachine2: genStateMachine('stateMachineBeta2'),
},
};

serverlessStepFunctions.compileAlarms();
const resources = serverlessStepFunctions.serverless.service
.provider.compiledCloudFormationTemplate.Resources;

const verify = (resourceName, expectedConfig = 'ignore') => {
expect(resources).to.have.property(resourceName);
expect(resources[resourceName].Properties.TreatMissingData).to.equal(expectedConfig);
};

verify('StateMachineBeta1ExecutionsTimeOutAlarm');
verify('StateMachineBeta1ExecutionsFailedAlarm', 'breaching');
verify('StateMachineBeta1ExecutionsAbortedAlarm');
verify('StateMachineBeta1ExecutionThrottledAlarm');
verify('StateMachineBeta2ExecutionsTimeOutAlarm');
verify('StateMachineBeta2ExecutionsFailedAlarm', 'breaching');
verify('StateMachineBeta2ExecutionsAbortedAlarm');
verify('StateMachineBeta2ExecutionThrottledAlarm');

expect(consoleLogSpy.callCount).equal(0);
});
});