Last Updated: February 25, 2016
·
441
· minimal

Rewriting s3 headers concurrently

https://gist.github.com/3359321

from boto.s3.connection import S3Connection
from futures import ThreadPoolExecutor
import futures

connection = S3Connection('id', 'key')

import logging
logging.basicConfig(level=logging.INFO)


def mod_key(key):
    """Rewrite the headers of js files in js/
    """
    if not key.name.startswith("js/") or not key.name.endswith('.js'):
        logging.info("not doing %s", key)
        return

    key.metadata.update({
        'Content-Type': 'application/javascript',
        'Cache-Control': 'max-age=2592000',  # 30 days. Rewritten at fastly
    })
    key.copy(
        key.bucket.name,
        key.name,
        key.metadata,
        preserve_acl=True
    )

    return "Done %s" % key.name


def main():
    """Use future threads to run concurrent s3 updates
    """
    bucket_name = 'mybucket.com'
    bucket = connection.get_bucket(bucket_name)

    logging.info("starting")

    with ThreadPoolExecutor(40) as executor:
        future_s3 = dict((executor.submit(mod_key, key), key)
                         for key in bucket.list())

        for future in futures.as_completed(future_s3):
            key = future_s3[future]
            if future.exception() is not None:
                print('%r generated an exception: %s' % (key,
                                                         future.exception()))
            else:
                print('%s' % future.result())


if __name__ == '__main__':
     main()