filenames.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. """
  2. User name to file name conversion.
  3. This was taken from the UFO 3 spec.
  4. """
  5. # Restrictions are taken mostly from
  6. # https://docs.microsoft.com/en-gb/windows/win32/fileio/naming-a-file#naming-conventions.
  7. #
  8. # 1. Integer value zero, sometimes referred to as the ASCII NUL character.
  9. # 2. Characters whose integer representations are in the range 1 to 31,
  10. # inclusive.
  11. # 3. Various characters that (mostly) Windows and POSIX-y filesystems don't
  12. # allow, plus "(" and ")", as per the specification.
  13. illegalCharacters = {
  14. "\x00",
  15. "\x01",
  16. "\x02",
  17. "\x03",
  18. "\x04",
  19. "\x05",
  20. "\x06",
  21. "\x07",
  22. "\x08",
  23. "\t",
  24. "\n",
  25. "\x0b",
  26. "\x0c",
  27. "\r",
  28. "\x0e",
  29. "\x0f",
  30. "\x10",
  31. "\x11",
  32. "\x12",
  33. "\x13",
  34. "\x14",
  35. "\x15",
  36. "\x16",
  37. "\x17",
  38. "\x18",
  39. "\x19",
  40. "\x1a",
  41. "\x1b",
  42. "\x1c",
  43. "\x1d",
  44. "\x1e",
  45. "\x1f",
  46. '"',
  47. "*",
  48. "+",
  49. "/",
  50. ":",
  51. "<",
  52. ">",
  53. "?",
  54. "[",
  55. "\\",
  56. "]",
  57. "(",
  58. ")",
  59. "|",
  60. "\x7f",
  61. }
  62. reservedFileNames = {
  63. "aux",
  64. "clock$",
  65. "com1",
  66. "com2",
  67. "com3",
  68. "com4",
  69. "com5",
  70. "com6",
  71. "com7",
  72. "com8",
  73. "com9",
  74. "con",
  75. "lpt1",
  76. "lpt2",
  77. "lpt3",
  78. "lpt4",
  79. "lpt5",
  80. "lpt6",
  81. "lpt7",
  82. "lpt8",
  83. "lpt9",
  84. "nul",
  85. "prn",
  86. }
  87. maxFileNameLength = 255
  88. class NameTranslationError(Exception):
  89. pass
  90. def userNameToFileName(userName: str, existing=(), prefix="", suffix=""):
  91. """
  92. `existing` should be a set-like object.
  93. >>> userNameToFileName("a") == "a"
  94. True
  95. >>> userNameToFileName("A") == "A_"
  96. True
  97. >>> userNameToFileName("AE") == "A_E_"
  98. True
  99. >>> userNameToFileName("Ae") == "A_e"
  100. True
  101. >>> userNameToFileName("ae") == "ae"
  102. True
  103. >>> userNameToFileName("aE") == "aE_"
  104. True
  105. >>> userNameToFileName("a.alt") == "a.alt"
  106. True
  107. >>> userNameToFileName("A.alt") == "A_.alt"
  108. True
  109. >>> userNameToFileName("A.Alt") == "A_.A_lt"
  110. True
  111. >>> userNameToFileName("A.aLt") == "A_.aL_t"
  112. True
  113. >>> userNameToFileName(u"A.alT") == "A_.alT_"
  114. True
  115. >>> userNameToFileName("T_H") == "T__H_"
  116. True
  117. >>> userNameToFileName("T_h") == "T__h"
  118. True
  119. >>> userNameToFileName("t_h") == "t_h"
  120. True
  121. >>> userNameToFileName("F_F_I") == "F__F__I_"
  122. True
  123. >>> userNameToFileName("f_f_i") == "f_f_i"
  124. True
  125. >>> userNameToFileName("Aacute_V.swash") == "A_acute_V_.swash"
  126. True
  127. >>> userNameToFileName(".notdef") == "_notdef"
  128. True
  129. >>> userNameToFileName("con") == "_con"
  130. True
  131. >>> userNameToFileName("CON") == "C_O_N_"
  132. True
  133. >>> userNameToFileName("con.alt") == "_con.alt"
  134. True
  135. >>> userNameToFileName("alt.con") == "alt._con"
  136. True
  137. """
  138. # the incoming name must be a string
  139. if not isinstance(userName, str):
  140. raise ValueError("The value for userName must be a string.")
  141. # establish the prefix and suffix lengths
  142. prefixLength = len(prefix)
  143. suffixLength = len(suffix)
  144. # replace an initial period with an _
  145. # if no prefix is to be added
  146. if not prefix and userName[0] == ".":
  147. userName = "_" + userName[1:]
  148. # filter the user name
  149. filteredUserName = []
  150. for character in userName:
  151. # replace illegal characters with _
  152. if character in illegalCharacters:
  153. character = "_"
  154. # add _ to all non-lower characters
  155. elif character != character.lower():
  156. character += "_"
  157. filteredUserName.append(character)
  158. userName = "".join(filteredUserName)
  159. # clip to 255
  160. sliceLength = maxFileNameLength - prefixLength - suffixLength
  161. userName = userName[:sliceLength]
  162. # test for illegal files names
  163. parts = []
  164. for part in userName.split("."):
  165. if part.lower() in reservedFileNames:
  166. part = "_" + part
  167. parts.append(part)
  168. userName = ".".join(parts)
  169. # test for clash
  170. fullName = prefix + userName + suffix
  171. if fullName.lower() in existing:
  172. fullName = handleClash1(userName, existing, prefix, suffix)
  173. # finished
  174. return fullName
  175. def handleClash1(userName, existing=[], prefix="", suffix=""):
  176. """
  177. existing should be a case-insensitive list
  178. of all existing file names.
  179. >>> prefix = ("0" * 5) + "."
  180. >>> suffix = "." + ("0" * 10)
  181. >>> existing = ["a" * 5]
  182. >>> e = list(existing)
  183. >>> handleClash1(userName="A" * 5, existing=e,
  184. ... prefix=prefix, suffix=suffix) == (
  185. ... '00000.AAAAA000000000000001.0000000000')
  186. True
  187. >>> e = list(existing)
  188. >>> e.append(prefix + "aaaaa" + "1".zfill(15) + suffix)
  189. >>> handleClash1(userName="A" * 5, existing=e,
  190. ... prefix=prefix, suffix=suffix) == (
  191. ... '00000.AAAAA000000000000002.0000000000')
  192. True
  193. >>> e = list(existing)
  194. >>> e.append(prefix + "AAAAA" + "2".zfill(15) + suffix)
  195. >>> handleClash1(userName="A" * 5, existing=e,
  196. ... prefix=prefix, suffix=suffix) == (
  197. ... '00000.AAAAA000000000000001.0000000000')
  198. True
  199. """
  200. # if the prefix length + user name length + suffix length + 15 is at
  201. # or past the maximum length, silce 15 characters off of the user name
  202. prefixLength = len(prefix)
  203. suffixLength = len(suffix)
  204. if prefixLength + len(userName) + suffixLength + 15 > maxFileNameLength:
  205. l = prefixLength + len(userName) + suffixLength + 15
  206. sliceLength = maxFileNameLength - l
  207. userName = userName[:sliceLength]
  208. finalName = None
  209. # try to add numbers to create a unique name
  210. counter = 1
  211. while finalName is None:
  212. name = userName + str(counter).zfill(15)
  213. fullName = prefix + name + suffix
  214. if fullName.lower() not in existing:
  215. finalName = fullName
  216. break
  217. else:
  218. counter += 1
  219. if counter >= 999999999999999:
  220. break
  221. # if there is a clash, go to the next fallback
  222. if finalName is None:
  223. finalName = handleClash2(existing, prefix, suffix)
  224. # finished
  225. return finalName
  226. def handleClash2(existing=[], prefix="", suffix=""):
  227. """
  228. existing should be a case-insensitive list
  229. of all existing file names.
  230. >>> prefix = ("0" * 5) + "."
  231. >>> suffix = "." + ("0" * 10)
  232. >>> existing = [prefix + str(i) + suffix for i in range(100)]
  233. >>> e = list(existing)
  234. >>> handleClash2(existing=e, prefix=prefix, suffix=suffix) == (
  235. ... '00000.100.0000000000')
  236. True
  237. >>> e = list(existing)
  238. >>> e.remove(prefix + "1" + suffix)
  239. >>> handleClash2(existing=e, prefix=prefix, suffix=suffix) == (
  240. ... '00000.1.0000000000')
  241. True
  242. >>> e = list(existing)
  243. >>> e.remove(prefix + "2" + suffix)
  244. >>> handleClash2(existing=e, prefix=prefix, suffix=suffix) == (
  245. ... '00000.2.0000000000')
  246. True
  247. """
  248. # calculate the longest possible string
  249. maxLength = maxFileNameLength - len(prefix) - len(suffix)
  250. maxValue = int("9" * maxLength)
  251. # try to find a number
  252. finalName = None
  253. counter = 1
  254. while finalName is None:
  255. fullName = prefix + str(counter) + suffix
  256. if fullName.lower() not in existing:
  257. finalName = fullName
  258. break
  259. else:
  260. counter += 1
  261. if counter >= maxValue:
  262. break
  263. # raise an error if nothing has been found
  264. if finalName is None:
  265. raise NameTranslationError("No unique name could be found.")
  266. # finished
  267. return finalName
  268. if __name__ == "__main__":
  269. import doctest
  270. doctest.testmod()