Backing Up Files to Amazon S3 with Rake

Just a quick script I hacked together to backup file submissions for a project. Naturally, it could be cleaned up, and certain items could be set using sane defaults based on environmental variables. For example, I'm setting current_bucket to a hard-coded value based off the RAILS_ENV variable. One could instead pass this in doing something like BACKUP_BUCKET_PREFIX=myproject_ RAILS_ENV=production rake backup:all. Or something. Whatever.

                  require 'aws/s3'
                  
                  ############################
                  ### HELPERS HERE ###########
                  ############################
                  # Just litterring up my tasks a bit. =)
                  
                  # I don't want to load these helpers unless I'm actually connecting, so let's wrap them in a module
                  module AWSHelpers
                    include AWS
                    def msg(msg, post_msg = nil)
                      print "#{msg}... "
                        yield if block_given?
                      puts "done. #{post_msg.nil? ? "" : "#{post_msg}."}"
                    end
                  
                    def get_submission_paths
                      # We need the full path for accessing the file, and a key to reference it by
                      # in S3
                      files = []
                      msg("Finding local files") do
                        # abusing is just a string helper, explained below
                        abusing "#{RAILS_ROOT}/public" do |doc_root|
                          # Replace this with whatever path you're aiming to back up.
                          files = Dir["#{doc_root}/system/submissions/*/*"].collect{|f| {:path => f, :key => f[(doc_root.length + 1)..-1]}}
                        end
                      end
                      files
                    end
                  
                  
                    def set_current_bucket
                      # The name of the bucket
                      current_bucket = "projectname_backup_#{RAILS_ENV}"
                      msg("Setting current bucket to #{current_bucket}", "Current bucket set to #{current_bucket}") do
                        begin
                          S3::Bucket.find(current_bucket)
                        rescue
                          # Stupid AWS::S3 doesn't pass a named exception. Catch all.
                          begin
                            S3::Bucket.create(current_bucket, :access => :public_read)
                          rescue
                            raise "Couldn't get a bucket for storage. Sorry mate."
                          end
                          # Try to create it.
                        end
                      end
                      S3::Bucket.current_bucket = S3::S3Object.current_bucket = current_bucket
                  
                    end
                  
                    def get_current_objects
                      objs = []
                      msg("Getting current objects") do
                        objs = S3::Bucket.objects
                      end
                      objs
                    end
                  
                  
                    def list_new_files(file_list, current_objects)
                      key_list = current_objects.collect{|o| o.key }
                      file_list.reject{|f| key_list.include?(f[:key])}
                    end
                  
                    def upload_files(file_list)
                      puts "Nothing to upload!" and return if file_list.empty?
                      file_list.each do |f|
                        msg("+ Uploading #{f[:path]}") do
                          S3::S3Object.store(f[:key], open(f[:path]))
                        end
                      end
                    end
                  end
                  
                  
                  ############################
                  ### TASKS HERE #############
                  ############################
                  
                  namespace :backup do
                    desc "Backup all files to S3"
                    task :all => :connect do
                      current_files = get_submission_paths
                      upload_files(current_files)
                    end
                  
                    desc "Backup previously unbacked-up files to S3"
                    task :new => :connect do
                      current_files = get_submission_paths
                      current_objects = get_current_objects
                      new_files = list_new_files(current_files, current_objects)
                      upload_files(new_files)
                    end
                  
                    desc "Backup a tarball of all files to S3"
                    task :tar => :connect do
                      puts "Mason has not implemented this yet."
                    end
                  
                    task :connect => :environment do
                      include AWSHelpers
                      AWS::S3::Base.establish_connection!(
                          :access_key_id     => 'your_key',
                          :secret_access_key => 'your_secret'
                        )
                        set_current_bucket
                    end
                  
                  end
                  

For an explanation of abusing see DRYing/Cleaning Up Your Code/Namespaces With 'using'

Requires the AWS::S3 library.