123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171 |
- """ fontTools.misc.classifyTools.py -- tools for classifying things.
- """
- class Classifier(object):
- """
- Main Classifier object, used to classify things into similar sets.
- """
- def __init__(self, sort=True):
- self._things = set() # set of all things known so far
- self._sets = [] # list of class sets produced so far
- self._mapping = {} # map from things to their class set
- self._dirty = False
- self._sort = sort
- def add(self, set_of_things):
- """
- Add a set to the classifier. Any iterable is accepted.
- """
- if not set_of_things:
- return
- self._dirty = True
- things, sets, mapping = self._things, self._sets, self._mapping
- s = set(set_of_things)
- intersection = s.intersection(things) # existing things
- s.difference_update(intersection) # new things
- difference = s
- del s
- # Add new class for new things
- if difference:
- things.update(difference)
- sets.append(difference)
- for thing in difference:
- mapping[thing] = difference
- del difference
- while intersection:
- # Take one item and process the old class it belongs to
- old_class = mapping[next(iter(intersection))]
- old_class_intersection = old_class.intersection(intersection)
- # Update old class to remove items from new set
- old_class.difference_update(old_class_intersection)
- # Remove processed items from todo list
- intersection.difference_update(old_class_intersection)
- # Add new class for the intersection with old class
- sets.append(old_class_intersection)
- for thing in old_class_intersection:
- mapping[thing] = old_class_intersection
- del old_class_intersection
- def update(self, list_of_sets):
- """
- Add a a list of sets to the classifier. Any iterable of iterables is accepted.
- """
- for s in list_of_sets:
- self.add(s)
- def _process(self):
- if not self._dirty:
- return
- # Do any deferred processing
- sets = self._sets
- self._sets = [s for s in sets if s]
- if self._sort:
- self._sets = sorted(self._sets, key=lambda s: (-len(s), sorted(s)))
- self._dirty = False
- # Output methods
- def getThings(self):
- """Returns the set of all things known so far.
- The return value belongs to the Classifier object and should NOT
- be modified while the classifier is still in use.
- """
- self._process()
- return self._things
- def getMapping(self):
- """Returns the mapping from things to their class set.
- The return value belongs to the Classifier object and should NOT
- be modified while the classifier is still in use.
- """
- self._process()
- return self._mapping
- def getClasses(self):
- """Returns the list of class sets.
- The return value belongs to the Classifier object and should NOT
- be modified while the classifier is still in use.
- """
- self._process()
- return self._sets
- def classify(list_of_sets, sort=True):
- """
- Takes a iterable of iterables (list of sets from here on; but any
- iterable works.), and returns the smallest list of sets such that
- each set, is either a subset, or is disjoint from, each of the input
- sets.
- In other words, this function classifies all the things present in
- any of the input sets, into similar classes, based on which sets
- things are a member of.
- If sort=True, return class sets are sorted by decreasing size and
- their natural sort order within each class size. Otherwise, class
- sets are returned in the order that they were identified, which is
- generally not significant.
- >>> classify([]) == ([], {})
- True
- >>> classify([[]]) == ([], {})
- True
- >>> classify([[], []]) == ([], {})
- True
- >>> classify([[1]]) == ([{1}], {1: {1}})
- True
- >>> classify([[1,2]]) == ([{1, 2}], {1: {1, 2}, 2: {1, 2}})
- True
- >>> classify([[1],[2]]) == ([{1}, {2}], {1: {1}, 2: {2}})
- True
- >>> classify([[1,2],[2]]) == ([{1}, {2}], {1: {1}, 2: {2}})
- True
- >>> classify([[1,2],[2,4]]) == ([{1}, {2}, {4}], {1: {1}, 2: {2}, 4: {4}})
- True
- >>> classify([[1,2],[2,4,5]]) == (
- ... [{4, 5}, {1}, {2}], {1: {1}, 2: {2}, 4: {4, 5}, 5: {4, 5}})
- True
- >>> classify([[1,2],[2,4,5]], sort=False) == (
- ... [{1}, {4, 5}, {2}], {1: {1}, 2: {2}, 4: {4, 5}, 5: {4, 5}})
- True
- >>> classify([[1,2,9],[2,4,5]], sort=False) == (
- ... [{1, 9}, {4, 5}, {2}], {1: {1, 9}, 2: {2}, 4: {4, 5}, 5: {4, 5},
- ... 9: {1, 9}})
- True
- >>> classify([[1,2,9,15],[2,4,5]], sort=False) == (
- ... [{1, 9, 15}, {4, 5}, {2}], {1: {1, 9, 15}, 2: {2}, 4: {4, 5},
- ... 5: {4, 5}, 9: {1, 9, 15}, 15: {1, 9, 15}})
- True
- >>> classes, mapping = classify([[1,2,9,15],[2,4,5],[15,5]], sort=False)
- >>> set([frozenset(c) for c in classes]) == set(
- ... [frozenset(s) for s in ({1, 9}, {4}, {2}, {5}, {15})])
- True
- >>> mapping == {1: {1, 9}, 2: {2}, 4: {4}, 5: {5}, 9: {1, 9}, 15: {15}}
- True
- """
- classifier = Classifier(sort=sort)
- classifier.update(list_of_sets)
- return classifier.getClasses(), classifier.getMapping()
- if __name__ == "__main__":
- import sys, doctest
- sys.exit(doctest.testmod(optionflags=doctest.ELLIPSIS).failed)
|