Last Updated: July 14, 2016
·
3.075K
· tmartin314

Rails rake task, clean up database, remove duplicates on multiple columns

# lib/tasks/people.rb
   task cleanup: :environment do
     # find all models and group them on keys which should be common
    grouped_by_first_last_name = Person.all.group_by{|model| [model.fname, model.lname] }
    grouped_by_email = Person.all.group_by{|model| [model.email] }
    grouped_by_first_last_name.merge(grouped_by_email).values.each do |duplicates|
      # first_one = duplicates.shift # keep the first one
      last_one = duplicates.pop # keep the last one
      # if there are any more left, they are duplicates
      # so delete all of them
      duplicates.each{  |double| double.destroy } # duplicates can now be destroyed
    end
  end