Last Updated: February 25, 2016
·
3.726K
· oscar peña

Easy string similarity comparison in python list

import difflib
import itertools

# min similarity ratio between strings
threshold_ratio = 0.75

for str_1, str_2 in itertools.combinations(str_list, 2):
    ratio = difflib.SequenceMatcher(None, str_1, str_2).ratio()
    if (ratio > threshold_ratio):
        print '%f\t%s\t\t could be \t\t%s' % (ratio, str_1, str_2)