__init__.py 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977
  1. """biplist -- a library for reading and writing binary property list files.
  2. Binary Property List (plist) files provide a faster and smaller serialization
  3. format for property lists on OS X. This is a library for generating binary
  4. plists which can be read by OS X, iOS, or other clients.
  5. The API models the plistlib API, and will call through to plistlib when
  6. XML serialization or deserialization is required.
  7. To generate plists with UID values, wrap the values with the Uid object. The
  8. value must be an int.
  9. To generate plists with NSData/CFData values, wrap the values with the
  10. Data object. The value must be a string.
  11. Date values can only be datetime.datetime objects.
  12. The exceptions InvalidPlistException and NotBinaryPlistException may be
  13. thrown to indicate that the data cannot be serialized or deserialized as
  14. a binary plist.
  15. Plist generation example:
  16. from biplist import *
  17. from datetime import datetime
  18. plist = {'aKey':'aValue',
  19. '0':1.322,
  20. 'now':datetime.now(),
  21. 'list':[1,2,3],
  22. 'tuple':('a','b','c')
  23. }
  24. try:
  25. writePlist(plist, "example.plist")
  26. except (InvalidPlistException, NotBinaryPlistException), e:
  27. print "Something bad happened:", e
  28. Plist parsing example:
  29. from biplist import *
  30. try:
  31. plist = readPlist("example.plist")
  32. print plist
  33. except (InvalidPlistException, NotBinaryPlistException), e:
  34. print "Not a plist:", e
  35. """
  36. from collections import namedtuple
  37. import datetime
  38. import io
  39. import math
  40. import plistlib
  41. from struct import pack, unpack, unpack_from
  42. from struct import error as struct_error
  43. import sys
  44. import time
  45. try:
  46. unicode
  47. unicodeEmpty = r''
  48. except NameError:
  49. unicode = str
  50. unicodeEmpty = ''
  51. try:
  52. long
  53. except NameError:
  54. long = int
  55. try:
  56. {}.iteritems
  57. iteritems = lambda x: x.iteritems()
  58. except AttributeError:
  59. iteritems = lambda x: x.items()
  60. __all__ = [
  61. 'Uid', 'Data', 'readPlist', 'writePlist', 'readPlistFromString',
  62. 'writePlistToString', 'InvalidPlistException', 'NotBinaryPlistException'
  63. ]
  64. # Apple uses Jan 1, 2001 as a base for all plist date/times.
  65. apple_reference_date = datetime.datetime.utcfromtimestamp(978307200)
  66. class Uid(object):
  67. """Wrapper around integers for representing UID values. This
  68. is used in keyed archiving."""
  69. integer = 0
  70. def __init__(self, integer):
  71. self.integer = integer
  72. def __repr__(self):
  73. return "Uid(%d)" % self.integer
  74. def __eq__(self, other):
  75. if isinstance(self, Uid) and isinstance(other, Uid):
  76. return self.integer == other.integer
  77. return False
  78. def __cmp__(self, other):
  79. return self.integer - other.integer
  80. def __lt__(self, other):
  81. return self.integer < other.integer
  82. def __hash__(self):
  83. return self.integer
  84. def __int__(self):
  85. return int(self.integer)
  86. class Data(bytes):
  87. """Wrapper around bytes to distinguish Data values."""
  88. class InvalidPlistException(Exception):
  89. """Raised when the plist is incorrectly formatted."""
  90. class NotBinaryPlistException(Exception):
  91. """Raised when a binary plist was expected but not encountered."""
  92. def readPlist(pathOrFile):
  93. """Raises NotBinaryPlistException, InvalidPlistException"""
  94. didOpen = False
  95. result = None
  96. if isinstance(pathOrFile, (bytes, unicode)):
  97. pathOrFile = open(pathOrFile, 'rb')
  98. didOpen = True
  99. try:
  100. reader = PlistReader(pathOrFile)
  101. result = reader.parse()
  102. except NotBinaryPlistException as e:
  103. try:
  104. pathOrFile.seek(0)
  105. result = None
  106. if hasattr(plistlib, 'loads'):
  107. contents = None
  108. if isinstance(pathOrFile, (bytes, unicode)):
  109. with open(pathOrFile, 'rb') as f:
  110. contents = f.read()
  111. else:
  112. contents = pathOrFile.read()
  113. result = plistlib.loads(contents)
  114. else:
  115. result = plistlib.readPlist(pathOrFile)
  116. result = wrapDataObject(result, for_binary=True)
  117. except Exception as e:
  118. raise InvalidPlistException(e)
  119. finally:
  120. if didOpen:
  121. pathOrFile.close()
  122. return result
  123. def wrapDataObject(o, for_binary=False):
  124. if isinstance(o, Data) and not for_binary:
  125. v = sys.version_info
  126. if not (v[0] >= 3 and v[1] >= 4):
  127. o = plistlib.Data(o)
  128. elif isinstance(o, (bytes, plistlib.Data)) and for_binary:
  129. if hasattr(o, 'data'):
  130. o = Data(o.data)
  131. elif isinstance(o, tuple):
  132. o = wrapDataObject(list(o), for_binary)
  133. o = tuple(o)
  134. elif isinstance(o, list):
  135. for i in range(len(o)):
  136. o[i] = wrapDataObject(o[i], for_binary)
  137. elif isinstance(o, dict):
  138. for k in o:
  139. o[k] = wrapDataObject(o[k], for_binary)
  140. return o
  141. def writePlist(rootObject, pathOrFile, binary=True):
  142. if not binary:
  143. rootObject = wrapDataObject(rootObject, binary)
  144. if hasattr(plistlib, "dump"):
  145. if isinstance(pathOrFile, (bytes, unicode)):
  146. with open(pathOrFile, 'wb') as f:
  147. return plistlib.dump(rootObject, f)
  148. else:
  149. return plistlib.dump(rootObject, pathOrFile)
  150. else:
  151. return plistlib.writePlist(rootObject, pathOrFile)
  152. else:
  153. didOpen = False
  154. if isinstance(pathOrFile, (bytes, unicode)):
  155. pathOrFile = open(pathOrFile, 'wb')
  156. didOpen = True
  157. writer = PlistWriter(pathOrFile)
  158. result = writer.writeRoot(rootObject)
  159. if didOpen:
  160. pathOrFile.close()
  161. return result
  162. def readPlistFromString(data):
  163. return readPlist(io.BytesIO(data))
  164. def writePlistToString(rootObject, binary=True):
  165. if not binary:
  166. rootObject = wrapDataObject(rootObject, binary)
  167. if hasattr(plistlib, "dumps"):
  168. return plistlib.dumps(rootObject)
  169. elif hasattr(plistlib, "writePlistToBytes"):
  170. return plistlib.writePlistToBytes(rootObject)
  171. else:
  172. return plistlib.writePlistToString(rootObject)
  173. else:
  174. ioObject = io.BytesIO()
  175. writer = PlistWriter(ioObject)
  176. writer.writeRoot(rootObject)
  177. return ioObject.getvalue()
  178. def is_stream_binary_plist(stream):
  179. stream.seek(0)
  180. header = stream.read(7)
  181. if header == b'bplist0':
  182. return True
  183. else:
  184. return False
  185. PlistTrailer = namedtuple('PlistTrailer', 'offsetSize, objectRefSize, offsetCount, topLevelObjectNumber, offsetTableOffset')
  186. PlistByteCounts = namedtuple('PlistByteCounts', 'nullBytes, boolBytes, intBytes, realBytes, dateBytes, dataBytes, stringBytes, uidBytes, arrayBytes, setBytes, dictBytes')
  187. class PlistReader(object):
  188. file = None
  189. contents = ''
  190. offsets = None
  191. trailer = None
  192. currentOffset = 0
  193. # Used to detect recursive object references.
  194. offsetsStack = []
  195. def __init__(self, fileOrStream):
  196. """Raises NotBinaryPlistException."""
  197. self.reset()
  198. self.file = fileOrStream
  199. def parse(self):
  200. return self.readRoot()
  201. def reset(self):
  202. self.trailer = None
  203. self.contents = ''
  204. self.offsets = []
  205. self.currentOffset = 0
  206. self.offsetsStack = []
  207. def readRoot(self):
  208. result = None
  209. self.reset()
  210. # Get the header, make sure it's a valid file.
  211. if not is_stream_binary_plist(self.file):
  212. raise NotBinaryPlistException()
  213. self.file.seek(0)
  214. self.contents = self.file.read()
  215. if len(self.contents) < 32:
  216. raise InvalidPlistException("File is too short.")
  217. trailerContents = self.contents[-32:]
  218. try:
  219. self.trailer = PlistTrailer._make(unpack("!xxxxxxBBQQQ", trailerContents))
  220. if pow(2, self.trailer.offsetSize*8) < self.trailer.offsetTableOffset:
  221. raise InvalidPlistException("Offset size insufficient to reference all objects.")
  222. if pow(2, self.trailer.objectRefSize*8) < self.trailer.offsetCount:
  223. raise InvalidPlistException("Too many offsets to represent in size of object reference representation.")
  224. offset_size = self.trailer.offsetSize * self.trailer.offsetCount
  225. offset = self.trailer.offsetTableOffset
  226. if offset + offset_size > pow(2, 64):
  227. raise InvalidPlistException("Offset table is excessively long.")
  228. if self.trailer.offsetSize > 16:
  229. raise InvalidPlistException("Offset size is greater than maximum integer size.")
  230. if self.trailer.objectRefSize == 0:
  231. raise InvalidPlistException("Object reference size is zero.")
  232. if offset >= len(self.contents) - 32:
  233. raise InvalidPlistException("Offset table offset is too large.")
  234. if offset < len("bplist00x"):
  235. raise InvalidPlistException("Offset table offset is too small.")
  236. if self.trailer.topLevelObjectNumber >= self.trailer.offsetCount:
  237. raise InvalidPlistException("Top level object number is larger than the number of objects.")
  238. offset_contents = self.contents[offset:offset+offset_size]
  239. offset_i = 0
  240. offset_table_length = len(offset_contents)
  241. while offset_i < self.trailer.offsetCount:
  242. begin = self.trailer.offsetSize*offset_i
  243. end = begin+self.trailer.offsetSize
  244. if end > offset_table_length:
  245. raise InvalidPlistException("End of object is at invalid offset %d in offset table of length %d" % (end, offset_table_length))
  246. tmp_contents = offset_contents[begin:end]
  247. tmp_sized = self.getSizedInteger(tmp_contents, self.trailer.offsetSize)
  248. self.offsets.append(tmp_sized)
  249. offset_i += 1
  250. self.setCurrentOffsetToObjectNumber(self.trailer.topLevelObjectNumber)
  251. result = self.readObject()
  252. except TypeError as e:
  253. raise InvalidPlistException(e)
  254. return result
  255. def setCurrentOffsetToObjectNumber(self, objectNumber):
  256. if objectNumber > len(self.offsets) - 1:
  257. raise InvalidPlistException("Invalid offset number: %d" % objectNumber)
  258. self.currentOffset = self.offsets[objectNumber]
  259. if self.currentOffset in self.offsetsStack:
  260. raise InvalidPlistException("Recursive data structure detected in object: %d" % objectNumber)
  261. def beginOffsetProtection(self):
  262. self.offsetsStack.append(self.currentOffset)
  263. return self.currentOffset
  264. def endOffsetProtection(self, offset):
  265. try:
  266. index = self.offsetsStack.index(offset)
  267. self.offsetsStack = self.offsetsStack[:index]
  268. except ValueError as e:
  269. pass
  270. def readObject(self):
  271. protection = self.beginOffsetProtection()
  272. result = None
  273. tmp_byte = self.contents[self.currentOffset:self.currentOffset+1]
  274. if len(tmp_byte) != 1:
  275. raise InvalidPlistException("No object found at offset: %d" % self.currentOffset)
  276. marker_byte = unpack("!B", tmp_byte)[0]
  277. format = (marker_byte >> 4) & 0x0f
  278. extra = marker_byte & 0x0f
  279. self.currentOffset += 1
  280. def proc_extra(extra):
  281. if extra == 0b1111:
  282. extra = self.readObject()
  283. return extra
  284. # bool, null, or fill byte
  285. if format == 0b0000:
  286. if extra == 0b0000:
  287. result = None
  288. elif extra == 0b1000:
  289. result = False
  290. elif extra == 0b1001:
  291. result = True
  292. elif extra == 0b1111:
  293. pass # fill byte
  294. else:
  295. raise InvalidPlistException("Invalid object found at offset: %d" % (self.currentOffset - 1))
  296. # int
  297. elif format == 0b0001:
  298. result = self.readInteger(pow(2, extra))
  299. # real
  300. elif format == 0b0010:
  301. result = self.readReal(extra)
  302. # date
  303. elif format == 0b0011 and extra == 0b0011:
  304. result = self.readDate()
  305. # data
  306. elif format == 0b0100:
  307. extra = proc_extra(extra)
  308. result = self.readData(extra)
  309. # ascii string
  310. elif format == 0b0101:
  311. extra = proc_extra(extra)
  312. result = self.readAsciiString(extra)
  313. # Unicode string
  314. elif format == 0b0110:
  315. extra = proc_extra(extra)
  316. result = self.readUnicode(extra)
  317. # uid
  318. elif format == 0b1000:
  319. result = self.readUid(extra)
  320. # array
  321. elif format == 0b1010:
  322. extra = proc_extra(extra)
  323. result = self.readArray(extra)
  324. # set
  325. elif format == 0b1100:
  326. extra = proc_extra(extra)
  327. result = set(self.readArray(extra))
  328. # dict
  329. elif format == 0b1101:
  330. extra = proc_extra(extra)
  331. result = self.readDict(extra)
  332. else:
  333. raise InvalidPlistException("Invalid object found: {format: %s, extra: %s}" % (bin(format), bin(extra)))
  334. self.endOffsetProtection(protection)
  335. return result
  336. def readContents(self, length, description="Object contents"):
  337. end = self.currentOffset + length
  338. if end >= len(self.contents) - 32:
  339. raise InvalidPlistException("%s extends into trailer" % description)
  340. elif length < 0:
  341. raise InvalidPlistException("%s length is less than zero" % length)
  342. data = self.contents[self.currentOffset:end]
  343. return data
  344. def readInteger(self, byteSize):
  345. data = self.readContents(byteSize, "Integer")
  346. self.currentOffset = self.currentOffset + byteSize
  347. return self.getSizedInteger(data, byteSize, as_number=True)
  348. def readReal(self, length):
  349. to_read = pow(2, length)
  350. data = self.readContents(to_read, "Real")
  351. if length == 2: # 4 bytes
  352. result = unpack('>f', data)[0]
  353. elif length == 3: # 8 bytes
  354. result = unpack('>d', data)[0]
  355. else:
  356. raise InvalidPlistException("Unknown Real of length %d bytes" % to_read)
  357. return result
  358. def readRefs(self, count):
  359. refs = []
  360. i = 0
  361. while i < count:
  362. fragment = self.readContents(self.trailer.objectRefSize, "Object reference")
  363. ref = self.getSizedInteger(fragment, len(fragment))
  364. refs.append(ref)
  365. self.currentOffset += self.trailer.objectRefSize
  366. i += 1
  367. return refs
  368. def readArray(self, count):
  369. if not isinstance(count, (int, long)):
  370. raise InvalidPlistException("Count of entries in dict isn't of integer type.")
  371. result = []
  372. values = self.readRefs(count)
  373. i = 0
  374. while i < len(values):
  375. self.setCurrentOffsetToObjectNumber(values[i])
  376. value = self.readObject()
  377. result.append(value)
  378. i += 1
  379. return result
  380. def readDict(self, count):
  381. if not isinstance(count, (int, long)):
  382. raise InvalidPlistException("Count of keys/values in dict isn't of integer type.")
  383. result = {}
  384. keys = self.readRefs(count)
  385. values = self.readRefs(count)
  386. i = 0
  387. while i < len(keys):
  388. self.setCurrentOffsetToObjectNumber(keys[i])
  389. key = self.readObject()
  390. self.setCurrentOffsetToObjectNumber(values[i])
  391. value = self.readObject()
  392. result[key] = value
  393. i += 1
  394. return result
  395. def readAsciiString(self, length):
  396. if not isinstance(length, (int, long)):
  397. raise InvalidPlistException("Length of ASCII string isn't of integer type.")
  398. data = self.readContents(length, "ASCII string")
  399. result = unpack("!%ds" % length, data)[0]
  400. self.currentOffset += length
  401. return str(result.decode('ascii'))
  402. def readUnicode(self, length):
  403. if not isinstance(length, (int, long)):
  404. raise InvalidPlistException("Length of Unicode string isn't of integer type.")
  405. actual_length = length*2
  406. data = self.readContents(actual_length, "Unicode string")
  407. self.currentOffset += actual_length
  408. return data.decode('utf_16_be')
  409. def readDate(self):
  410. data = self.readContents(8, "Date")
  411. x = unpack(">d", data)[0]
  412. if math.isnan(x):
  413. raise InvalidPlistException("Date is NaN")
  414. # Use timedelta to workaround time_t size limitation on 32-bit python.
  415. try:
  416. result = datetime.timedelta(seconds=x) + apple_reference_date
  417. except OverflowError:
  418. if x > 0:
  419. result = datetime.datetime.max
  420. else:
  421. result = datetime.datetime.min
  422. self.currentOffset += 8
  423. return result
  424. def readData(self, length):
  425. if not isinstance(length, (int, long)):
  426. raise InvalidPlistException("Length of data isn't of integer type.")
  427. result = self.readContents(length, "Data")
  428. self.currentOffset += length
  429. return Data(result)
  430. def readUid(self, length):
  431. if not isinstance(length, (int, long)):
  432. raise InvalidPlistException("Uid length isn't of integer type.")
  433. return Uid(self.readInteger(length+1))
  434. def getSizedInteger(self, data, byteSize, as_number=False):
  435. """Numbers of 8 bytes are signed integers when they refer to numbers, but unsigned otherwise."""
  436. result = 0
  437. if byteSize == 0:
  438. raise InvalidPlistException("Encountered integer with byte size of 0.")
  439. # 1, 2, and 4 byte integers are unsigned
  440. elif byteSize == 1:
  441. result = unpack('>B', data)[0]
  442. elif byteSize == 2:
  443. result = unpack('>H', data)[0]
  444. elif byteSize == 4:
  445. result = unpack('>L', data)[0]
  446. elif byteSize == 8:
  447. if as_number:
  448. result = unpack('>q', data)[0]
  449. else:
  450. result = unpack('>Q', data)[0]
  451. elif byteSize <= 16:
  452. # Handle odd-sized or integers larger than 8 bytes
  453. # Don't naively go over 16 bytes, in order to prevent infinite loops.
  454. result = 0
  455. if hasattr(int, 'from_bytes'):
  456. result = int.from_bytes(data, 'big')
  457. else:
  458. for byte in data:
  459. if not isinstance(byte, int): # Python3.0-3.1.x return ints, 2.x return str
  460. byte = unpack_from('>B', byte)[0]
  461. result = (result << 8) | byte
  462. else:
  463. raise InvalidPlistException("Encountered integer longer than 16 bytes.")
  464. return result
  465. class HashableWrapper(object):
  466. def __init__(self, value):
  467. self.value = value
  468. def __repr__(self):
  469. return "<HashableWrapper: %s>" % [self.value]
  470. class BoolWrapper(object):
  471. def __init__(self, value):
  472. self.value = value
  473. def __repr__(self):
  474. return "<BoolWrapper: %s>" % self.value
  475. class FloatWrapper(object):
  476. _instances = {}
  477. def __new__(klass, value):
  478. # Ensure FloatWrapper(x) for a given float x is always the same object
  479. wrapper = klass._instances.get(value)
  480. if wrapper is None:
  481. wrapper = object.__new__(klass)
  482. wrapper.value = value
  483. klass._instances[value] = wrapper
  484. return wrapper
  485. def __repr__(self):
  486. return "<FloatWrapper: %s>" % self.value
  487. class StringWrapper(object):
  488. __instances = {}
  489. encodedValue = None
  490. encoding = None
  491. def __new__(cls, value):
  492. '''Ensure we only have a only one instance for any string,
  493. and that we encode ascii as 1-byte-per character when possible'''
  494. encodedValue = None
  495. for encoding in ('ascii', 'utf_16_be'):
  496. try:
  497. encodedValue = value.encode(encoding)
  498. except: pass
  499. if encodedValue is not None:
  500. if encodedValue not in cls.__instances:
  501. cls.__instances[encodedValue] = super(StringWrapper, cls).__new__(cls)
  502. cls.__instances[encodedValue].encodedValue = encodedValue
  503. cls.__instances[encodedValue].encoding = encoding
  504. return cls.__instances[encodedValue]
  505. raise ValueError('Unable to get ascii or utf_16_be encoding for %s' % repr(value))
  506. def __len__(self):
  507. '''Return roughly the number of characters in this string (half the byte length)'''
  508. if self.encoding == 'ascii':
  509. return len(self.encodedValue)
  510. else:
  511. return len(self.encodedValue)//2
  512. def __lt__(self, other):
  513. return self.encodedValue < other.encodedValue
  514. @property
  515. def encodingMarker(self):
  516. if self.encoding == 'ascii':
  517. return 0b0101
  518. else:
  519. return 0b0110
  520. def __repr__(self):
  521. return '<StringWrapper (%s): %s>' % (self.encoding, self.encodedValue)
  522. class PlistWriter(object):
  523. header = b'bplist00bybiplist1.0'
  524. file = None
  525. byteCounts = None
  526. trailer = None
  527. computedUniques = None
  528. writtenReferences = None
  529. referencePositions = None
  530. wrappedTrue = None
  531. wrappedFalse = None
  532. # Used to detect recursive object references.
  533. objectsStack = []
  534. def __init__(self, file):
  535. self.reset()
  536. self.file = file
  537. self.wrappedTrue = BoolWrapper(True)
  538. self.wrappedFalse = BoolWrapper(False)
  539. def reset(self):
  540. self.byteCounts = PlistByteCounts(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
  541. self.trailer = PlistTrailer(0, 0, 0, 0, 0)
  542. # A set of all the uniques which have been computed.
  543. self.computedUniques = set()
  544. # A list of all the uniques which have been written.
  545. self.writtenReferences = {}
  546. # A dict of the positions of the written uniques.
  547. self.referencePositions = {}
  548. self.objectsStack = []
  549. def positionOfObjectReference(self, obj):
  550. """If the given object has been written already, return its
  551. position in the offset table. Otherwise, return None."""
  552. return self.writtenReferences.get(obj)
  553. def writeRoot(self, root):
  554. """
  555. Strategy is:
  556. - write header
  557. - wrap root object so everything is hashable
  558. - compute size of objects which will be written
  559. - need to do this in order to know how large the object refs
  560. will be in the list/dict/set reference lists
  561. - write objects
  562. - keep objects in writtenReferences
  563. - keep positions of object references in referencePositions
  564. - write object references with the length computed previously
  565. - computer object reference length
  566. - write object reference positions
  567. - write trailer
  568. """
  569. output = self.header
  570. wrapped_root = self.wrapRoot(root)
  571. self.computeOffsets(wrapped_root, asReference=True, isRoot=True)
  572. self.trailer = self.trailer._replace(**{'objectRefSize':self.intSize(len(self.computedUniques))})
  573. self.writeObjectReference(wrapped_root, output)
  574. output = self.writeObject(wrapped_root, output, setReferencePosition=True)
  575. # output size at this point is an upper bound on how big the
  576. # object reference offsets need to be.
  577. self.trailer = self.trailer._replace(**{
  578. 'offsetSize':self.intSize(len(output)),
  579. 'offsetCount':len(self.computedUniques),
  580. 'offsetTableOffset':len(output),
  581. 'topLevelObjectNumber':0
  582. })
  583. output = self.writeOffsetTable(output)
  584. output += pack('!xxxxxxBBQQQ', *self.trailer)
  585. self.file.write(output)
  586. def beginRecursionProtection(self, obj):
  587. if not isinstance(obj, (set, dict, list, tuple)):
  588. return
  589. if id(obj) in self.objectsStack:
  590. raise InvalidPlistException("Recursive containers are not allowed in plists.")
  591. self.objectsStack.append(id(obj))
  592. def endRecursionProtection(self, obj):
  593. if not isinstance(obj, (set, dict, list, tuple)):
  594. return
  595. try:
  596. index = self.objectsStack.index(id(obj))
  597. self.objectsStack = self.objectsStack[:index]
  598. except ValueError as e:
  599. pass
  600. def wrapRoot(self, root):
  601. result = None
  602. self.beginRecursionProtection(root)
  603. if isinstance(root, bool):
  604. if root is True:
  605. result = self.wrappedTrue
  606. else:
  607. result = self.wrappedFalse
  608. elif isinstance(root, float):
  609. result = FloatWrapper(root)
  610. elif isinstance(root, set):
  611. n = set()
  612. for value in root:
  613. n.add(self.wrapRoot(value))
  614. result = HashableWrapper(n)
  615. elif isinstance(root, dict):
  616. n = {}
  617. for key, value in iteritems(root):
  618. n[self.wrapRoot(key)] = self.wrapRoot(value)
  619. result = HashableWrapper(n)
  620. elif isinstance(root, list):
  621. n = []
  622. for value in root:
  623. n.append(self.wrapRoot(value))
  624. result = HashableWrapper(n)
  625. elif isinstance(root, tuple):
  626. n = tuple([self.wrapRoot(value) for value in root])
  627. result = HashableWrapper(n)
  628. elif isinstance(root, (str, unicode)) and not isinstance(root, Data):
  629. result = StringWrapper(root)
  630. elif isinstance(root, bytes):
  631. result = Data(root)
  632. else:
  633. result = root
  634. self.endRecursionProtection(root)
  635. return result
  636. def incrementByteCount(self, field, incr=1):
  637. self.byteCounts = self.byteCounts._replace(**{field:self.byteCounts.__getattribute__(field) + incr})
  638. def computeOffsets(self, obj, asReference=False, isRoot=False):
  639. def check_key(key):
  640. if key is None:
  641. raise InvalidPlistException('Dictionary keys cannot be null in plists.')
  642. elif isinstance(key, Data):
  643. raise InvalidPlistException('Data cannot be dictionary keys in plists.')
  644. elif not isinstance(key, StringWrapper):
  645. raise InvalidPlistException('Keys must be strings.')
  646. def proc_size(size):
  647. if size > 0b1110:
  648. size += self.intSize(size)
  649. return size
  650. # If this should be a reference, then we keep a record of it in the
  651. # uniques table.
  652. if asReference:
  653. if obj in self.computedUniques:
  654. return
  655. else:
  656. self.computedUniques.add(obj)
  657. if obj is None:
  658. self.incrementByteCount('nullBytes')
  659. elif isinstance(obj, BoolWrapper):
  660. self.incrementByteCount('boolBytes')
  661. elif isinstance(obj, Uid):
  662. size = self.intSize(obj.integer)
  663. self.incrementByteCount('uidBytes', incr=1+size)
  664. elif isinstance(obj, (int, long)):
  665. size = self.intSize(obj)
  666. self.incrementByteCount('intBytes', incr=1+size)
  667. elif isinstance(obj, FloatWrapper):
  668. size = self.realSize(obj)
  669. self.incrementByteCount('realBytes', incr=1+size)
  670. elif isinstance(obj, datetime.datetime):
  671. self.incrementByteCount('dateBytes', incr=2)
  672. elif isinstance(obj, Data):
  673. size = proc_size(len(obj))
  674. self.incrementByteCount('dataBytes', incr=1+size)
  675. elif isinstance(obj, StringWrapper):
  676. size = proc_size(len(obj))
  677. self.incrementByteCount('stringBytes', incr=1+size)
  678. elif isinstance(obj, HashableWrapper):
  679. obj = obj.value
  680. if isinstance(obj, set):
  681. size = proc_size(len(obj))
  682. self.incrementByteCount('setBytes', incr=1+size)
  683. for value in obj:
  684. self.computeOffsets(value, asReference=True)
  685. elif isinstance(obj, (list, tuple)):
  686. size = proc_size(len(obj))
  687. self.incrementByteCount('arrayBytes', incr=1+size)
  688. for value in obj:
  689. asRef = True
  690. self.computeOffsets(value, asReference=True)
  691. elif isinstance(obj, dict):
  692. size = proc_size(len(obj))
  693. self.incrementByteCount('dictBytes', incr=1+size)
  694. for key, value in iteritems(obj):
  695. check_key(key)
  696. self.computeOffsets(key, asReference=True)
  697. self.computeOffsets(value, asReference=True)
  698. else:
  699. raise InvalidPlistException("Unknown object type: %s (%s)" % (type(obj).__name__, repr(obj)))
  700. def writeObjectReference(self, obj, output):
  701. """Tries to write an object reference, adding it to the references
  702. table. Does not write the actual object bytes or set the reference
  703. position. Returns a tuple of whether the object was a new reference
  704. (True if it was, False if it already was in the reference table)
  705. and the new output.
  706. """
  707. position = self.positionOfObjectReference(obj)
  708. if position is None:
  709. self.writtenReferences[obj] = len(self.writtenReferences)
  710. output += self.binaryInt(len(self.writtenReferences) - 1, byteSize=self.trailer.objectRefSize)
  711. return (True, output)
  712. else:
  713. output += self.binaryInt(position, byteSize=self.trailer.objectRefSize)
  714. return (False, output)
  715. def writeObject(self, obj, output, setReferencePosition=False):
  716. """Serializes the given object to the output. Returns output.
  717. If setReferencePosition is True, will set the position the
  718. object was written.
  719. """
  720. def proc_variable_length(format, length):
  721. result = b''
  722. if length > 0b1110:
  723. result += pack('!B', (format << 4) | 0b1111)
  724. result = self.writeObject(length, result)
  725. else:
  726. result += pack('!B', (format << 4) | length)
  727. return result
  728. def timedelta_total_seconds(td):
  729. # Shim for Python 2.6 compatibility, which doesn't have total_seconds.
  730. # Make one argument a float to ensure the right calculation.
  731. return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10.0**6) / 10.0**6
  732. if setReferencePosition:
  733. self.referencePositions[obj] = len(output)
  734. if obj is None:
  735. output += pack('!B', 0b00000000)
  736. elif isinstance(obj, BoolWrapper):
  737. if obj.value is False:
  738. output += pack('!B', 0b00001000)
  739. else:
  740. output += pack('!B', 0b00001001)
  741. elif isinstance(obj, Uid):
  742. size = self.intSize(obj.integer)
  743. output += pack('!B', (0b1000 << 4) | size - 1)
  744. output += self.binaryInt(obj.integer)
  745. elif isinstance(obj, (int, long)):
  746. byteSize = self.intSize(obj)
  747. root = math.log(byteSize, 2)
  748. output += pack('!B', (0b0001 << 4) | int(root))
  749. output += self.binaryInt(obj, as_number=True)
  750. elif isinstance(obj, FloatWrapper):
  751. # just use doubles
  752. output += pack('!B', (0b0010 << 4) | 3)
  753. output += self.binaryReal(obj)
  754. elif isinstance(obj, datetime.datetime):
  755. try:
  756. timestamp = (obj - apple_reference_date).total_seconds()
  757. except AttributeError:
  758. timestamp = timedelta_total_seconds(obj - apple_reference_date)
  759. output += pack('!B', 0b00110011)
  760. output += pack('!d', float(timestamp))
  761. elif isinstance(obj, Data):
  762. output += proc_variable_length(0b0100, len(obj))
  763. output += obj
  764. elif isinstance(obj, StringWrapper):
  765. output += proc_variable_length(obj.encodingMarker, len(obj))
  766. output += obj.encodedValue
  767. elif isinstance(obj, bytes):
  768. output += proc_variable_length(0b0101, len(obj))
  769. output += obj
  770. elif isinstance(obj, HashableWrapper):
  771. obj = obj.value
  772. if isinstance(obj, (set, list, tuple)):
  773. if isinstance(obj, set):
  774. output += proc_variable_length(0b1100, len(obj))
  775. else:
  776. output += proc_variable_length(0b1010, len(obj))
  777. objectsToWrite = []
  778. for objRef in sorted(obj) if isinstance(obj, set) else obj:
  779. (isNew, output) = self.writeObjectReference(objRef, output)
  780. if isNew:
  781. objectsToWrite.append(objRef)
  782. for objRef in objectsToWrite:
  783. output = self.writeObject(objRef, output, setReferencePosition=True)
  784. elif isinstance(obj, dict):
  785. output += proc_variable_length(0b1101, len(obj))
  786. keys = []
  787. values = []
  788. objectsToWrite = []
  789. for key, value in sorted(iteritems(obj)):
  790. keys.append(key)
  791. values.append(value)
  792. for key in keys:
  793. (isNew, output) = self.writeObjectReference(key, output)
  794. if isNew:
  795. objectsToWrite.append(key)
  796. for value in values:
  797. (isNew, output) = self.writeObjectReference(value, output)
  798. if isNew:
  799. objectsToWrite.append(value)
  800. for objRef in objectsToWrite:
  801. output = self.writeObject(objRef, output, setReferencePosition=True)
  802. return output
  803. def writeOffsetTable(self, output):
  804. """Writes all of the object reference offsets."""
  805. all_positions = []
  806. writtenReferences = list(self.writtenReferences.items())
  807. writtenReferences.sort(key=lambda x: x[1])
  808. for obj,order in writtenReferences:
  809. # Porting note: Elsewhere we deliberately replace empty unicdoe strings
  810. # with empty binary strings, but the empty unicode string
  811. # goes into writtenReferences. This isn't an issue in Py2
  812. # because u'' and b'' have the same hash; but it is in
  813. # Py3, where they don't.
  814. if bytes != str and obj == unicodeEmpty:
  815. obj = b''
  816. position = self.referencePositions.get(obj)
  817. if position is None:
  818. raise InvalidPlistException("Error while writing offsets table. Object not found. %s" % obj)
  819. output += self.binaryInt(position, self.trailer.offsetSize)
  820. all_positions.append(position)
  821. return output
  822. def binaryReal(self, obj):
  823. # just use doubles
  824. result = pack('>d', obj.value)
  825. return result
  826. def binaryInt(self, obj, byteSize=None, as_number=False):
  827. result = b''
  828. if byteSize is None:
  829. byteSize = self.intSize(obj)
  830. if byteSize == 1:
  831. result += pack('>B', obj)
  832. elif byteSize == 2:
  833. result += pack('>H', obj)
  834. elif byteSize == 4:
  835. result += pack('>L', obj)
  836. elif byteSize == 8:
  837. if as_number:
  838. result += pack('>q', obj)
  839. else:
  840. result += pack('>Q', obj)
  841. elif byteSize <= 16:
  842. try:
  843. result = pack('>Q', 0) + pack('>Q', obj)
  844. except struct_error as e:
  845. raise InvalidPlistException("Unable to pack integer %d: %s" % (obj, e))
  846. else:
  847. raise InvalidPlistException("Core Foundation can't handle integers with size greater than 16 bytes.")
  848. return result
  849. def intSize(self, obj):
  850. """Returns the number of bytes necessary to store the given integer."""
  851. # SIGNED
  852. if obj < 0: # Signed integer, always 8 bytes
  853. return 8
  854. # UNSIGNED
  855. elif obj <= 0xFF: # 1 byte
  856. return 1
  857. elif obj <= 0xFFFF: # 2 bytes
  858. return 2
  859. elif obj <= 0xFFFFFFFF: # 4 bytes
  860. return 4
  861. # SIGNED
  862. # 0x7FFFFFFFFFFFFFFF is the max.
  863. elif obj <= 0x7FFFFFFFFFFFFFFF: # 8 bytes signed
  864. return 8
  865. elif obj <= 0xffffffffffffffff: # 8 bytes unsigned
  866. return 16
  867. else:
  868. raise InvalidPlistException("Core Foundation can't handle integers with size greater than 8 bytes.")
  869. def realSize(self, obj):
  870. return 8