123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291 |
- """
- User name to file name conversion.
- This was taken from the UFO 3 spec.
- """
- # Restrictions are taken mostly from
- # https://docs.microsoft.com/en-gb/windows/win32/fileio/naming-a-file#naming-conventions.
- #
- # 1. Integer value zero, sometimes referred to as the ASCII NUL character.
- # 2. Characters whose integer representations are in the range 1 to 31,
- # inclusive.
- # 3. Various characters that (mostly) Windows and POSIX-y filesystems don't
- # allow, plus "(" and ")", as per the specification.
- illegalCharacters = {
- "\x00",
- "\x01",
- "\x02",
- "\x03",
- "\x04",
- "\x05",
- "\x06",
- "\x07",
- "\x08",
- "\t",
- "\n",
- "\x0b",
- "\x0c",
- "\r",
- "\x0e",
- "\x0f",
- "\x10",
- "\x11",
- "\x12",
- "\x13",
- "\x14",
- "\x15",
- "\x16",
- "\x17",
- "\x18",
- "\x19",
- "\x1a",
- "\x1b",
- "\x1c",
- "\x1d",
- "\x1e",
- "\x1f",
- '"',
- "*",
- "+",
- "/",
- ":",
- "<",
- ">",
- "?",
- "[",
- "\\",
- "]",
- "(",
- ")",
- "|",
- "\x7f",
- }
- reservedFileNames = {
- "aux",
- "clock$",
- "com1",
- "com2",
- "com3",
- "com4",
- "com5",
- "com6",
- "com7",
- "com8",
- "com9",
- "con",
- "lpt1",
- "lpt2",
- "lpt3",
- "lpt4",
- "lpt5",
- "lpt6",
- "lpt7",
- "lpt8",
- "lpt9",
- "nul",
- "prn",
- }
- maxFileNameLength = 255
- class NameTranslationError(Exception):
- pass
- def userNameToFileName(userName: str, existing=(), prefix="", suffix=""):
- """
- `existing` should be a set-like object.
- >>> userNameToFileName("a") == "a"
- True
- >>> userNameToFileName("A") == "A_"
- True
- >>> userNameToFileName("AE") == "A_E_"
- True
- >>> userNameToFileName("Ae") == "A_e"
- True
- >>> userNameToFileName("ae") == "ae"
- True
- >>> userNameToFileName("aE") == "aE_"
- True
- >>> userNameToFileName("a.alt") == "a.alt"
- True
- >>> userNameToFileName("A.alt") == "A_.alt"
- True
- >>> userNameToFileName("A.Alt") == "A_.A_lt"
- True
- >>> userNameToFileName("A.aLt") == "A_.aL_t"
- True
- >>> userNameToFileName(u"A.alT") == "A_.alT_"
- True
- >>> userNameToFileName("T_H") == "T__H_"
- True
- >>> userNameToFileName("T_h") == "T__h"
- True
- >>> userNameToFileName("t_h") == "t_h"
- True
- >>> userNameToFileName("F_F_I") == "F__F__I_"
- True
- >>> userNameToFileName("f_f_i") == "f_f_i"
- True
- >>> userNameToFileName("Aacute_V.swash") == "A_acute_V_.swash"
- True
- >>> userNameToFileName(".notdef") == "_notdef"
- True
- >>> userNameToFileName("con") == "_con"
- True
- >>> userNameToFileName("CON") == "C_O_N_"
- True
- >>> userNameToFileName("con.alt") == "_con.alt"
- True
- >>> userNameToFileName("alt.con") == "alt._con"
- True
- """
- # the incoming name must be a string
- if not isinstance(userName, str):
- raise ValueError("The value for userName must be a string.")
- # establish the prefix and suffix lengths
- prefixLength = len(prefix)
- suffixLength = len(suffix)
- # replace an initial period with an _
- # if no prefix is to be added
- if not prefix and userName[0] == ".":
- userName = "_" + userName[1:]
- # filter the user name
- filteredUserName = []
- for character in userName:
- # replace illegal characters with _
- if character in illegalCharacters:
- character = "_"
- # add _ to all non-lower characters
- elif character != character.lower():
- character += "_"
- filteredUserName.append(character)
- userName = "".join(filteredUserName)
- # clip to 255
- sliceLength = maxFileNameLength - prefixLength - suffixLength
- userName = userName[:sliceLength]
- # test for illegal files names
- parts = []
- for part in userName.split("."):
- if part.lower() in reservedFileNames:
- part = "_" + part
- parts.append(part)
- userName = ".".join(parts)
- # test for clash
- fullName = prefix + userName + suffix
- if fullName.lower() in existing:
- fullName = handleClash1(userName, existing, prefix, suffix)
- # finished
- return fullName
- def handleClash1(userName, existing=[], prefix="", suffix=""):
- """
- existing should be a case-insensitive list
- of all existing file names.
- >>> prefix = ("0" * 5) + "."
- >>> suffix = "." + ("0" * 10)
- >>> existing = ["a" * 5]
- >>> e = list(existing)
- >>> handleClash1(userName="A" * 5, existing=e,
- ... prefix=prefix, suffix=suffix) == (
- ... '00000.AAAAA000000000000001.0000000000')
- True
- >>> e = list(existing)
- >>> e.append(prefix + "aaaaa" + "1".zfill(15) + suffix)
- >>> handleClash1(userName="A" * 5, existing=e,
- ... prefix=prefix, suffix=suffix) == (
- ... '00000.AAAAA000000000000002.0000000000')
- True
- >>> e = list(existing)
- >>> e.append(prefix + "AAAAA" + "2".zfill(15) + suffix)
- >>> handleClash1(userName="A" * 5, existing=e,
- ... prefix=prefix, suffix=suffix) == (
- ... '00000.AAAAA000000000000001.0000000000')
- True
- """
- # if the prefix length + user name length + suffix length + 15 is at
- # or past the maximum length, silce 15 characters off of the user name
- prefixLength = len(prefix)
- suffixLength = len(suffix)
- if prefixLength + len(userName) + suffixLength + 15 > maxFileNameLength:
- l = prefixLength + len(userName) + suffixLength + 15
- sliceLength = maxFileNameLength - l
- userName = userName[:sliceLength]
- finalName = None
- # try to add numbers to create a unique name
- counter = 1
- while finalName is None:
- name = userName + str(counter).zfill(15)
- fullName = prefix + name + suffix
- if fullName.lower() not in existing:
- finalName = fullName
- break
- else:
- counter += 1
- if counter >= 999999999999999:
- break
- # if there is a clash, go to the next fallback
- if finalName is None:
- finalName = handleClash2(existing, prefix, suffix)
- # finished
- return finalName
- def handleClash2(existing=[], prefix="", suffix=""):
- """
- existing should be a case-insensitive list
- of all existing file names.
- >>> prefix = ("0" * 5) + "."
- >>> suffix = "." + ("0" * 10)
- >>> existing = [prefix + str(i) + suffix for i in range(100)]
- >>> e = list(existing)
- >>> handleClash2(existing=e, prefix=prefix, suffix=suffix) == (
- ... '00000.100.0000000000')
- True
- >>> e = list(existing)
- >>> e.remove(prefix + "1" + suffix)
- >>> handleClash2(existing=e, prefix=prefix, suffix=suffix) == (
- ... '00000.1.0000000000')
- True
- >>> e = list(existing)
- >>> e.remove(prefix + "2" + suffix)
- >>> handleClash2(existing=e, prefix=prefix, suffix=suffix) == (
- ... '00000.2.0000000000')
- True
- """
- # calculate the longest possible string
- maxLength = maxFileNameLength - len(prefix) - len(suffix)
- maxValue = int("9" * maxLength)
- # try to find a number
- finalName = None
- counter = 1
- while finalName is None:
- fullName = prefix + str(counter) + suffix
- if fullName.lower() not in existing:
- finalName = fullName
- break
- else:
- counter += 1
- if counter >= maxValue:
- break
- # raise an error if nothing has been found
- if finalName is None:
- raise NameTranslationError("No unique name could be found.")
- # finished
- return finalName
- if __name__ == "__main__":
- import doctest
- doctest.testmod()
|