AWS Cloudformation template for Lambda access to Elasticache redis (private subnet) and DynamoDB (public subnet)

The lambda code zip file

  • Create a dynamo table, make sure to replace “MyDynamoTable” below with its name
  • Create an index.js file, stick this inside it:
// private access
var redis = require('redis');
var rClient = redis.createClient(process.env.RedisPort, process.env.RedisEndpoint);

// public access
var AWS = require('aws-sdk');
var dynamodb = new AWS.DynamoDB();

exports.handler = function (event, context, callback) {
    context.callbackWaitsForEmptyEventLoop = false;
    dynamodb.scan({TableName: 'MyDynamoTable'}, function(err, data) {
        if (err){
            return callback(err);
        }else{
            return callback(null, data);
        }
    });
};

It will be the code run by the lambda. We’re establishing a redis connection and a dynamo connection from the same lambda.

  • Next to your index.js file, create a node_modules and install redis inside inside it (or just “npm install redis” from that folder, and it will do the work for you.
  • zip the whole thing thing and put it on s3. I used this command to do it: “zip -r file.zip index.js node_modules”. It should look something like this:

Screen Shot 2016-12-30 at 1.31.29 PM.png

  • Upload to S3, and replace the bucket name in the template below “YOUR-BUCKET-NAME-GOES-HERE”. in the lambda section.

The Cloudformation template

  • Beware! You’re paying for the resources this template creates. Specifically, the redis instance (t2.micro) and the NAT gateways are not cheap.
  • This is the Cloudformation that will create all the necessary resources for you:
{
  "AWSTemplateFormatVersion": "2010-09-09",
  "Resources": {
    "VPC": {
      "Type": "AWS::EC2::VPC",
      "Properties": { "CidrBlock": "10.0.0.0/16" }
    },
    "PublicSubnet": {
      "Type": "AWS::EC2::Subnet",
      "Properties": {
        "VpcId": { "Ref": "VPC" },
        "CidrBlock": "10.0.0.0/24"
      }
    },
    "PrivateSubnet": {
      "Type": "AWS::EC2::Subnet",
      "Properties": {
        "VpcId": { "Ref": "VPC" },
        "CidrBlock": "10.0.1.0/24"
      }
    },
    "InternetGateway": {
      "Type": "AWS::EC2::InternetGateway"
    },
    "AttachGateway": {
      "Type": "AWS::EC2::VPCGatewayAttachment",
      "Properties": {
        "VpcId": { "Ref": "VPC" },
        "InternetGatewayId": { "Ref": "InternetGateway" }
      }
    },
    "PublicRouteTable": {
      "Type": "AWS::EC2::RouteTable",
      "Properties": {
        "VpcId": { "Ref": "VPC" }
      }
    },
    "PrivateRouteTable": {
      "Type": "AWS::EC2::RouteTable",
      "Properties": {
        "VpcId": { "Ref": "VPC" }
      }
    },
    "PublicRoute": {
      "Type": "AWS::EC2::Route",
      "DependsOn": "AttachGateway",
      "Properties": {
        "RouteTableId": {
          "Ref": "PublicRouteTable"
        },
        "DestinationCidrBlock": "0.0.0.0/0",
        "GatewayId": { "Ref": "InternetGateway" }
      }
    },
    "PrivateRoute": {
      "Type": "AWS::EC2::Route",
      "DependsOn": "AttachGateway",
      "Properties": {
        "RouteTableId": { "Ref": "PrivateRouteTable" },
        "DestinationCidrBlock": "0.0.0.0/0",
        "NatGatewayId": { "Ref": "NatGateway" }
      }
    },
    "NatGateway": {
      "Type": "AWS::EC2::NatGateway",
      "Properties": {
        "AllocationId": {
          "Fn::GetAtt": [ "ElasticIp", "AllocationId"]
        },
        "SubnetId": { "Ref": "PublicSubnet" }
      }
    },
    "GatewayAttachment": {
      "Type": "AWS::EC2::VPCGatewayAttachment",
      "Properties": {
        "VpcId": { "Ref": "VPC"},
        "InternetGatewayId": { "Ref": "InternetGateway" }
      }
    },
    "ElasticIp": {
      "Type": "AWS::EC2::EIP",
      "DependsOn": "GatewayAttachment",
      "Properties": { "Domain": "vpc" }
    },
    "PublicSubnetRouteTableAssociation": {
      "Type": "AWS::EC2::SubnetRouteTableAssociation",
      "Properties": {
        "SubnetId": { "Ref": "PublicSubnet" },
        "RouteTableId": { "Ref": "PublicRouteTable"}
      }
    },
    "PrivateSubnetRouteTableAssociation": {
      "Type": "AWS::EC2::SubnetRouteTableAssociation",
      "Properties": {
        "SubnetId": { "Ref": "PrivateSubnet" },
        "RouteTableId": { "Ref": "PrivateRouteTable" }
      }
    },
    "RedisInstance": {
      "Type": "AWS::ElastiCache::CacheCluster",
      "Properties": {
        "CacheNodeType": "cache.t2.micro",
        "CacheSubnetGroupName": { "Ref": "CacheSubnetGroup" },
        "VpcSecurityGroupIds": [ {"Ref": "RedisSecurityGroup"}
        ],
        "Engine": "redis",
        "NumCacheNodes": "1",
        "PreferredMaintenanceWindow": "Tue:18:00-Tue:19:00"
      }
    },
    "RedisSecurityGroup": {
      "Type": "AWS::EC2::SecurityGroup",
      "DependsOn": "CacheSubnetGroup",
      "Properties": {
        "GroupDescription": "Security Group for Redis",
        "VpcId": { "Ref": "VPC" },
        "SecurityGroupIngress": [
          {
            "IpProtocol": "tcp",
            "FromPort": "6379",
            "ToPort": "6379",
            "CidrIp": "10.0.1.0/24"
          }
        ]
      }
    },
    "CacheSubnetGroup": {
      "Type": "AWS::ElastiCache::SubnetGroup",
      "DependsOn": [ "PrivateSubnet"],
      "Properties": {
        "Description": "Subnet Group",
        "SubnetIds": [ {"Ref": "PrivateSubnet"}]
      }
    },
    "ReallyGreatLamba": {
      "Type": "AWS::Lambda::Function",
      "DependsOn": "RedisInstance",
      "Properties": {
        "Environment": {
          "Variables": {
            "RedisEndpoint": { "Fn::GetAtt": [ "RedisInstance", "RedisEndpoint.Address"]},
            "RedisPort": { "Fn::GetAtt": ["RedisInstance","RedisEndpoint.Port"]
            }
          }
        },
        "Code": {
          "S3Bucket": "YOUR-BUCKET-NAME-GOES-HERE",
          "S3Key": "file.zip"
        },
        "Role": {
          "Fn::GetAtt": ["MyLambdaRole","Arn"]
        },
        "VpcConfig": {
          "SecurityGroupIds": [{"Ref": "RedisSecurityGroup"}],
          "SubnetIds": [{"Ref": "PrivateSubnet"}]
        },
        "Description": "A Lambda that can talk to redis and the public internet at once",
        "Handler": "index.handler",
        "Runtime": "nodejs4.3",
        "Timeout": 10,
        "MemorySize": 128
      }
    },
    "MyLambdaRole": {
      "Type": "AWS::IAM::Role",
      "Properties": {
        "AssumeRolePolicyDocument": {
          "Statement": [{
              "Effect": "Allow",
              "Principal": {"Service": "lambda.amazonaws.com"},
              "Action": ["sts:AssumeRole"]
            }
          ]
        },
        "Policies": [{
            "PolicyName": "adminPolicy",
            "PolicyDocument": {
              "Statement": [{
                  "Effect": "Allow",
                  "Action": "*",
                  "Resource": "*"
                }
              ]
            }
          }
        ]
      }
    }
  }
}

The easiest way to execute this template is probably through the template designer. Once you’ve finished the previous steps, paste the template in the “template” section, hit the “refresh” button at the top right if you want to visualize the stack, and then “create the stack” at the top left.

Screen Shot 2016-12-30 at 3.22.31 PM.png

Once you get through the Create Stack steps (the only required field is the stack name), it might take 5 or 10 minutes to create all the resources for you. You can track progress in the Cloudformation section of the AWS UI.

Screen Shot 2016-12-30 at 3.28.31 PM.png

What this creates

  • A VPC with 2 subnets, 1 private and 1 public
  • A managed NAT instance to access the internet from your lambda (called an “internet gateway” in AWS terms.
  • All the wiring for your VPC to work:
    • 2 routes
    • 2 route gateways
    • 2 route table associations
    • a VPC gateway attachment
    • a cache subnet group
  • A t2.micro redis instance
  • A security group to access redis
  • A lambda
  • An IAM role for your lambda

For the visually-inclined:

new-designer (1).png

As mentioned before, don’t forget that these things cost money! Don’t forget to shut them down when you’re done using them.

Finally, I should also point out that this setup is NOT production-ready. You’ll want to make sure you are more highly-available (multi-az), the IAM role create has way too liberal permissions and you may need larger subnets if you plan to have a lot of lambdas running (256 IPs right now).

Fun gotchas and random observations

  • If you are using the new node4.3 runtime (as we are here), the signature for your lambda handler has changed to “(event, context, callback)”. By default, calling “callback” while you have persistent connections (like redis here) will not return as expected, unless you explicitly clean it up. It will instead time out and refuse to return as expected. This is why we need to add:
context.callbackWaitsForEmptyEventLoop = false;

inside the lambda handler. You can read more about it here, but the important part is:

The default value is true. This property is useful only to modify the default behavior of the callback. By default, the callback will wait until the Node.js runtime event loop is empty before freezing the process and returning the results to the caller. You can set this property to false to request AWS Lambda to freeze the process soon after the callback is called, even if there are events in the event loop. AWS Lambda will freeze the process, any state data and the events in the Node.js event loop (any remaining events in the event loop processed when the Lambda function is called next and if AWS Lambda chooses to use the frozen process).

  • Lambda recently announced support for environment variables, and it’s clear for a use case like this, how powerful they can be. We’re creating a redis cluster with a single machine, and referencing its endpoint and port in the environment variable section of the lambda resource definition:
"Environment": {
  "Variables": {
    "RedisEndpoint": { "Fn::GetAtt": [ "RedisInstance", "RedisEndpoint.Address"]},
    "RedisPort": { "Fn::GetAtt": ["RedisInstance","RedisEndpoint.Port"]
    }
  }
}

We can then access the endpoint and port inside our lambda with

console.log(process.env.RedisPort, process.env.RedisEndpoint)

Stateful AWS lambdas part3: bootstrapping like an EC2, reusing resources and node_modules in /tmp

This one falls under the category of “you should probably never do this”, but is also an interesting case study to understand better how lambda works, and how much of the infrastructure powering our lambdas actually get reused between invocations.

We already know from many clues (i.e. role permissions for certain things require the “ec2:” prefix) that our lambda hosts are EC2s, but functions probably run inside ECS containers atop EC2 clusters. We have no control over any of this, Amazon guaranteeing only that our code will execute when it is triggered, everything else fading away between those invocations.

The application

Lambda has pretty tight limits as to how much code can be uploaded into a zip/jar file (50 MB), so let’s suppose we have a very large npm dependency (i.e. phantomjs) that we don’t want to bundle and upload every time we update our function.

Could we download the file to disk, say /tmp (of which we have 512MB) and only re-download it when our container gets switched over / cleared / disappears for whatever reason? How often would that happen?

Control flow

  • Check if our node_modules in /tmp already exists
  • If it doesn’t, download the .zip package, unzip it on disk and keep going
  • If it does, just keep going

The actual code

For this example, we’ll use a zip package of lodash, but this could be anything. Make sure to change the URL to download. To keep things fast, put your file on S3 in the same region, odds are you’ll be hitting a server not too far from your lambda.

var http = require('http');
var fs = require('fs');
var _;
var spawn = require('child_process').spawn;
process.env['NODE_PATH'] = process.env['NODE_PATH'] + ':/tmp';

var lodashUrl = 'http://s3.amazonaws.com/your-bucket-name/lodash.zip'; //change this!
var lodashLocation = '/tmp/lodash.zip';
var nodeModulesLocation = '/tmp';

function doNormalStuff(callback) {
    _ = require(nodeModulesLocation+'/lodash');
    console.log(_.filter([]));
    return callback(null, 'done');
}

exports.handler = function (event, context, callback) {

    if (!fs.existsSync(nodeModulesLocation)){
        fs.mkdirSync(nodeModulesLocation); // if our tmp folder does not exist, create it
    }

    function unzip() { // unzip the downloaded file
        return new Promise(function (resolve) {
            console.log('Unzipping file');
            const unzip = spawn('unzip', ['-q', '-o', lodashLocation, '-d', nodeModulesLocation]);
            unzip.on('close', (code) => {
                console.log(`child process exited with code ${code}`);
                resolve();
            });
        })
    }

    function download(url, dest) { // download the file
        return new Promise(function (resolve, reject) {
            var file = fs.createWriteStream(dest);
            http.get(url, function (response) {
                response.pipe(file);
                file.on('finish', function () {
                    console.log('Download done.');
                    file.close(resolve);
                });
            }).on('error', reject);
        }).catch(console.log)
    }

    // our main control function
    if (!fs.existsSync(lodashLocation)) {
        console.log('File does not exist, downloading...');
        download(lodashUrl, lodashLocation)
            .then(() => console.log('Download has finished, unzipping.'))
            .then(() => unzip())
            .then(() => doNormalStuff(callback))
    }else{
        return doNormalStuff(callback);
    }
};

The results

Now the interesting part…how often did we need to re-download our bundle? Would this actually be a usable strategy to bootstrap a lambda?

I’ve setup a Cloudwatch event trigger each minute to run this function, and let it go for a few hours.

  • Invocations: 133
  • File re-downloaded, according to the logs: 5

So this ain’t bad – we only had to re-fetch the file 3-4% of the time, meaning that our container was stable for long periods of time, keeping its disk state the same.

Obviously, there are zero guarantees as to whether this would be the same for any function, but it gives interesting insight into how often state changes behind the scenes.

Stateful AWS lambdas part2: scheduled lambda invoking every second

Building on Part 1, one of the restrictions of lambda is the the minimum scheduled time allowed by the Cloudwatch event trigger, which is 1 minute. What happens if you need to invoke your lambda every second? It’s not really possible using the current triggers, but we can build our own. To do this, we’ll have 2 lambdas

  • 1 target lambda, the one we invoke every second
  • 1 trigger lambda, which runs for exactly 1 minute and inside that minute, we trigger our target lambda every second

The target lambda

This can be anything, really – for demonstration purposes let’s put this in:

exports.handler = function (event, context, callback) {
    return callback(null, 'Doing some work!');
};

The trigger lambda

Following our pattern in Part 1:

var exitTimeout = false;
var lambdaTimeout = 60000;
var AWS = require('aws-sdk');
var lambda = new AWS.Lambda();

function main(callback) {
    exitTimeout = false;

    setTimeout(() => {
        exitTimeout = true;
    }, lambdaTimeout - 1000);

    function invokeWithTimeout() {
        if (exitTimeout === true) callback('Out of time, exiting');
        var params = {
            FunctionName: 'target',
            InvocationType: 'Event',
        };
        lambda.invoke(params).promise()
            .then(() => {
                console.log('Waiting a bit before invoking again');
                setTimeout(invokeWithTimeout, 1000);
            })
            .catch(console.log);
    }
    invokeWithTimeout();
}

exports.handler = function (event, context, callback) {
    return main(callback);
};
  • don’t forget, this lambda needs to have a role with a permission to invoke our target lambda
  • make sure you set the lambda timeout to 60 seconds
  • make sure to add an event source trigger for this lambda with an expression of 1 minuteScreen Shot 2016-12-26 at 9.21.24 PM.png

Your invocation graph should show ~60 invokes for that last minute:

Screen Shot 2016-12-26 at 9.23.31 PM.png

This won’t be perfect, since processing time is not taken into account (we only wait 1 second AFTER the invoke is done), but we could make this more sophisticated and precise if we wanted to.

Stateful AWS lambdas part1: polling SQS

Lambdas are presented as a stateless product, but there are a few good reasons (and many more bad ones) to get around this and do more than a single operation on each invocation.

Suppose you want to poll an SQS queue, but only for 5 minutes during a day? You could launch an EC2 on a schedule, but bootstrapping will be slow and you’ll still pay for a full hour, which is not great. Lambda is meant to be event-based (i.e. something triggers your lambda), so polling can be a bit tricky.

Here’s a pretty easy way to get around this limitation, the gist of it is:

On the AWS side

  • set your lambda timeout to the maximum time you’ll need to run your code
  • create a cron-like expression to run your job as (not) ofter as you like
  • make sure your lambda role has permission to poll from SQS

In your code

  • keep a timer running and fire off your callback before your lambda timeout
  • during that time, run your code as usual
var exitTimeout = false;
var lambdaTimeout = 300000;
var AWS = require('aws-sdk');
var sqs = new AWS.SQS({'region': 'us-east-1'});
var queueUrl = 'https://sqs.us-east-1.amazonaws.com/account-id-probably/your-queue-name';

function main(callback) {
    exitTimeout = false;

    setTimeout(function () {
        exitTimeout = true;
    }, lambdaTimeout - (1000 * 25)); // set our timeout to 2 minutes, minus 25 seconds (to be sure, and leave time to finish whatever work the loop was doing)

    function poll() {
        if (exitTimeout === true) {
            console.log('Out of time, exiting');
            return callback(null);
        } else {
            console.log('Still got time...polling the queue');
        }

        return sqs.receiveMessage({'QueueUrl': queueUrl}).promise()
            .then(result => {
                if (!result.Messages) return []; // no messages were found in the queue, return an empty array of messages to delete
                var deleteOperations = []; // this is where we store the messages to delete
                result.Messages.forEach(message => {
                    // do some interesting processing here!
                    console.log(`MessageId: ${message.MessageId}`);

                    // line up all messages to be deleted
                    deleteOperations.push(sqs.deleteMessage({
                        QueueUrl: queueUrl,
                        ReceiptHandle: message.ReceiptHandle
                    }).promise())
                });
                return deleteOperations;

            })
            .then((data) => Promise.all(data)) // delete all messages at once
            .then(() => {
                console.log('Waiting a bit before polling again');
                setTimeout(function () {
                    return poll(); // call ourselves again to keep polling the queue
                }, 500);
            })
            .catch(console.log);
    }

    poll();
}

exports.handler = function (event, context, callback) {
    return main(callback);
};

A few notes on the code above:

  • lambdaTimeout should match (in milliseconds) the timeout value on your lambda
  • I’ve decided in this example to exit at lambdaTimeout – 25 seconds, since long polling on SQS was set to 20 seconds on my queue (giving my lambda 5 seconds to process the data and delete the message). This is completely arbitrary though, and depends on the “Receive Message Wait Time” you have set on your queue, and the work you intend to do on each message.
  • Notice how we recall poll() at the end of the promise chain – this function is recursive
  • There is a timeout of 500ms before calling poll(). It is not strictly necessary, but for some types of work is a good idea to avoid throttling.
  • Don’t forget that if your lambda invocation fails, it may retry. Depending on what you’re doing in the poll() loop, you may want to ignore/silence those and keep going (notice how nothing rejects() in this code, but only catches and logs the error).

Decrypting AWS lambda environment variables using node 4.3 and KMS

AWS recently announced support for environment variables, and KMS integration to encrypt and decrypt them. Here’s how to do it using the AWS console UI.

Create a KMS key

  • At the top right of the console UI, find go to My Security Credentials
  • At the bottom left, find the KMS section for Encryption keys

Screen Shot 2016-12-25 at 10.18.11 PM.png

  • Create a new key, make sure that in “Define key Usage Permissions”, the role associated has permission to decrypt using KMS.Screen Shot 2016-12-25 at 10.10.42 PM.png

Create lambda function and encrypt an environment variable

  • Select the “Hello World” blueprint

Screen Shot 2016-12-25 at 10.23.07 PM.png

  • Create a new lambda function. In the code section, use the following:
console.log('Loading function');
var AWS = require('aws-sdk');
var kms = new AWS.KMS();
var encrypted = process.env['foo'];

exports.handler = (event, context, callback) => {
    console.log(encrypted)
    kms.decrypt({CiphertextBlob: new Buffer(encrypted, 'base64')}, (err, data) => {
        if (err) {
            console.log('Decrypt error:', err);
            return callback(err);
        }
        var decrypted = data.Plaintext.toString('ascii');
        callback(null, decrypted);
    });
};

  • In the other settings, check the “Enable encryption helpers” box and select the KMS key you have created
  • In the environment variables, add “foo” as a key, and anything as the value
  • Click “encrypt”, you should see the value turn into a bunch of dots
  • In the role section, make sure you select your role which has KMS:decrypt permissions, otherwise your lambda will error out

Screen Shot 2016-12-25 at 10.26.13 PM.png

Once you hit “encrypt”

Screen Shot 2016-12-25 at 10.27.03 PM.png

Test your function

Finally, save and test your function, you should see the unencrypted value echoed out like so:

Screen Shot 2016-12-25 at 10.41.06 PM.png

Listing and getting S3 files from an AWS node 4.3 lambda

Here’s how you can efficiently list all files in a bucket, and then download them all at once. If you use the node4.3 runtime on lambda, you can use promises natively. Beware, Promise.all will attempt to download all the files at once. If you have many files in your bucket, this could cause problems (i.e. throttling).

Don’t forget to give your lambda a role that has permission to listObjects and getObject

var AWS = require('aws-sdk');
var s3 = new AWS.S3();

var bucketName = 'myBucketName';

function main(callback) {
    s3.listObjects({
        Bucket: bucketName,
        Delimiter: '/'
    }).promise()
        .then(data => {
            return data.Contents.map((object) => {
                return s3.getObject({
                    Bucket: bucketName,
                    Key: object.Key
                }).promise();
            });
        })
        .then((data) => Promise.all(data))
        .then(console.log)
        .then(callback)
        .catch(console.log)

}

exports.handler = function(event, context, callback){
    return main(callback);
};