Package portage :: Module xpak
[hide private]

Source Code for Module portage.xpak

  1  # Copyright 2001-2014 Gentoo Foundation 
  2  # Distributed under the terms of the GNU General Public License v2 
  3   
  4   
  5  # The format for a tbz2/xpak: 
  6  # 
  7  #  tbz2: tar.bz2 + xpak + (xpak_offset) + "STOP" 
  8  #  xpak: "XPAKPACK" + (index_len) + (data_len) + index + data + "XPAKSTOP" 
  9  # index: (pathname_len) + pathname + (data_offset) + (data_len) 
 10  #        index entries are concatenated end-to-end. 
 11  #  data: concatenated data chunks, end-to-end. 
 12  # 
 13  # [tarball]XPAKPACKIIIIDDDD[index][data]XPAKSTOPOOOOSTOP 
 14  # 
 15  # (integer) == encodeint(integer)  ===> 4 characters (big-endian copy) 
 16  # '+' means concatenate the fields ===> All chunks are strings 
 17   
 18  __all__ = [ 
 19          'addtolist', 'decodeint', 'encodeint', 'getboth', 
 20          'getindex', 'getindex_mem', 'getitem', 'listindex', 
 21          'searchindex', 'tbz2', 'xpak_mem', 'xpak', 'xpand', 
 22          'xsplit', 'xsplit_mem', 
 23  ] 
 24   
 25  import array 
 26  import errno 
 27  import sys 
 28   
 29  import portage 
 30  from portage import os 
 31  from portage import shutil 
 32  from portage import normalize_path 
 33  from portage import _encodings 
 34  from portage import _unicode_decode 
 35  from portage import _unicode_encode 
 36   
37 -def addtolist(mylist, curdir):
38 """(list, dir) --- Takes an array(list) and appends all files from dir down 39 the directory tree. Returns nothing. list is modified.""" 40 curdir = normalize_path(_unicode_decode(curdir, 41 encoding=_encodings['fs'], errors='strict')) 42 for parent, dirs, files in os.walk(curdir): 43 44 parent = _unicode_decode(parent, 45 encoding=_encodings['fs'], errors='strict') 46 if parent != curdir: 47 mylist.append(parent[len(curdir) + 1:] + os.sep) 48 49 for x in dirs: 50 try: 51 _unicode_decode(x, encoding=_encodings['fs'], errors='strict') 52 except UnicodeDecodeError: 53 dirs.remove(x) 54 55 for x in files: 56 try: 57 x = _unicode_decode(x, 58 encoding=_encodings['fs'], errors='strict') 59 except UnicodeDecodeError: 60 continue 61 mylist.append(os.path.join(parent, x)[len(curdir) + 1:])
62
63 -def encodeint(myint):
64 """Takes a 4 byte integer and converts it into a string of 4 characters. 65 Returns the characters in a string.""" 66 a = array.array('B') 67 a.append((myint >> 24) & 0xff) 68 a.append((myint >> 16) & 0xff) 69 a.append((myint >> 8) & 0xff) 70 a.append(myint & 0xff) 71 try: 72 # Python >= 3.2 73 return a.tobytes() 74 except AttributeError: 75 return a.tostring()
76
77 -def decodeint(mystring):
78 """Takes a 4 byte string and converts it into a 4 byte integer. 79 Returns an integer.""" 80 if sys.hexversion < 0x3000000: 81 mystring = [ord(x) for x in mystring] 82 myint = 0 83 myint += mystring[3] 84 myint += mystring[2] << 8 85 myint += mystring[1] << 16 86 myint += mystring[0] << 24 87 return myint
88
89 -def xpak(rootdir, outfile=None):
90 """(rootdir, outfile) -- creates an xpak segment of the directory 'rootdir' 91 and under the name 'outfile' if it is specified. Otherwise it returns the 92 xpak segment.""" 93 94 mylist = [] 95 96 addtolist(mylist, rootdir) 97 mylist.sort() 98 mydata = {} 99 for x in mylist: 100 if x == 'CONTENTS': 101 # CONTENTS is generated during the merge process. 102 continue 103 x = _unicode_encode(x, encoding=_encodings['fs'], errors='strict') 104 with open(os.path.join(rootdir, x), 'rb') as f: 105 mydata[x] = f.read() 106 107 xpak_segment = xpak_mem(mydata) 108 if outfile: 109 outf = open(_unicode_encode(outfile, 110 encoding=_encodings['fs'], errors='strict'), 'wb') 111 outf.write(xpak_segment) 112 outf.close() 113 else: 114 return xpak_segment
115
116 -def xpak_mem(mydata):
117 """Create an xpack segment from a map object.""" 118 119 mydata_encoded = {} 120 for k, v in mydata.items(): 121 k = _unicode_encode(k, 122 encoding=_encodings['repo.content'], errors='backslashreplace') 123 v = _unicode_encode(v, 124 encoding=_encodings['repo.content'], errors='backslashreplace') 125 mydata_encoded[k] = v 126 mydata = mydata_encoded 127 del mydata_encoded 128 129 indexglob = b'' 130 indexpos = 0 131 dataglob = b'' 132 datapos = 0 133 for x, newglob in mydata.items(): 134 mydatasize = len(newglob) 135 indexglob = indexglob + encodeint(len(x)) + x + encodeint(datapos) + encodeint(mydatasize) 136 indexpos = indexpos + 4 + len(x) + 4 + 4 137 dataglob = dataglob + newglob 138 datapos = datapos + mydatasize 139 return b'XPAKPACK' \ 140 + encodeint(len(indexglob)) \ 141 + encodeint(len(dataglob)) \ 142 + indexglob \ 143 + dataglob \ 144 + b'XPAKSTOP'
145
146 -def xsplit(infile):
147 """(infile) -- Splits the infile into two files. 148 'infile.index' contains the index segment. 149 'infile.dat' contails the data segment.""" 150 infile = _unicode_decode(infile, 151 encoding=_encodings['fs'], errors='strict') 152 myfile = open(_unicode_encode(infile, 153 encoding=_encodings['fs'], errors='strict'), 'rb') 154 mydat = myfile.read() 155 myfile.close() 156 157 splits = xsplit_mem(mydat) 158 if not splits: 159 return False 160 161 myfile = open(_unicode_encode(infile + '.index', 162 encoding=_encodings['fs'], errors='strict'), 'wb') 163 myfile.write(splits[0]) 164 myfile.close() 165 myfile = open(_unicode_encode(infile + '.dat', 166 encoding=_encodings['fs'], errors='strict'), 'wb') 167 myfile.write(splits[1]) 168 myfile.close() 169 return True
170
171 -def xsplit_mem(mydat):
172 if mydat[0:8] != b'XPAKPACK': 173 return None 174 if mydat[-8:] != b'XPAKSTOP': 175 return None 176 indexsize = decodeint(mydat[8:12]) 177 return (mydat[16:indexsize + 16], mydat[indexsize + 16:-8])
178
179 -def getindex(infile):
180 """(infile) -- grabs the index segment from the infile and returns it.""" 181 myfile = open(_unicode_encode(infile, 182 encoding=_encodings['fs'], errors='strict'), 'rb') 183 myheader = myfile.read(16) 184 if myheader[0:8] != b'XPAKPACK': 185 myfile.close() 186 return 187 indexsize = decodeint(myheader[8:12]) 188 myindex = myfile.read(indexsize) 189 myfile.close() 190 return myindex
191
192 -def getboth(infile):
193 """(infile) -- grabs the index and data segments from the infile. 194 Returns an array [indexSegment, dataSegment]""" 195 myfile = open(_unicode_encode(infile, 196 encoding=_encodings['fs'], errors='strict'), 'rb') 197 myheader = myfile.read(16) 198 if myheader[0:8] != b'XPAKPACK': 199 myfile.close() 200 return 201 indexsize = decodeint(myheader[8:12]) 202 datasize = decodeint(myheader[12:16]) 203 myindex = myfile.read(indexsize) 204 mydata = myfile.read(datasize) 205 myfile.close() 206 return myindex, mydata
207
208 -def listindex(myindex):
209 """Print to the terminal the filenames listed in the indexglob passed in.""" 210 for x in getindex_mem(myindex): 211 print(x)
212
213 -def getindex_mem(myindex):
214 """Returns the filenames listed in the indexglob passed in.""" 215 myindexlen = len(myindex) 216 startpos = 0 217 myret = [] 218 while ((startpos + 8) < myindexlen): 219 mytestlen = decodeint(myindex[startpos:startpos + 4]) 220 myret = myret + [myindex[startpos + 4:startpos + 4 + mytestlen]] 221 startpos = startpos + mytestlen + 12 222 return myret
223
224 -def searchindex(myindex, myitem):
225 """(index, item) -- Finds the offset and length of the file 'item' in the 226 datasegment via the index 'index' provided.""" 227 myitem = _unicode_encode(myitem, 228 encoding=_encodings['repo.content'], errors='backslashreplace') 229 mylen = len(myitem) 230 myindexlen = len(myindex) 231 startpos = 0 232 while ((startpos + 8) < myindexlen): 233 mytestlen = decodeint(myindex[startpos:startpos + 4]) 234 if mytestlen == mylen: 235 if myitem == myindex[startpos + 4:startpos + 4 + mytestlen]: 236 #found 237 datapos = decodeint(myindex[startpos + 4 + mytestlen:startpos + 8 + mytestlen]) 238 datalen = decodeint(myindex[startpos + 8 + mytestlen:startpos + 12 + mytestlen]) 239 return datapos, datalen 240 startpos = startpos + mytestlen + 12
241
242 -def getitem(myid, myitem):
243 myindex = myid[0] 244 mydata = myid[1] 245 myloc = searchindex(myindex, myitem) 246 if not myloc: 247 return None 248 return mydata[myloc[0]:myloc[0] + myloc[1]]
249
250 -def xpand(myid, mydest):
251 mydest = normalize_path(mydest) + os.sep 252 myindex = myid[0] 253 mydata = myid[1] 254 myindexlen = len(myindex) 255 startpos = 0 256 while ((startpos + 8) < myindexlen): 257 namelen = decodeint(myindex[startpos:startpos + 4]) 258 datapos = decodeint(myindex[startpos + 4 + namelen:startpos + 8 + namelen]) 259 datalen = decodeint(myindex[startpos + 8 + namelen:startpos + 12 + namelen]) 260 myname = myindex[startpos + 4:startpos + 4 + namelen] 261 myname = _unicode_decode(myname, 262 encoding=_encodings['repo.content'], errors='replace') 263 filename = os.path.join(mydest, myname.lstrip(os.sep)) 264 filename = normalize_path(filename) 265 if not filename.startswith(mydest): 266 # myname contains invalid ../ component(s) 267 continue 268 dirname = os.path.dirname(filename) 269 if dirname: 270 if not os.path.exists(dirname): 271 os.makedirs(dirname) 272 mydat = open(_unicode_encode(filename, 273 encoding=_encodings['fs'], errors='strict'), 'wb') 274 mydat.write(mydata[datapos:datapos + datalen]) 275 mydat.close() 276 startpos = startpos + namelen + 12
277
278 -class tbz2(object):
279 - def __init__(self, myfile):
280 self.file = myfile 281 self.filestat = None 282 self.index = b'' 283 self.infosize = 0 284 self.xpaksize = 0 285 self.indexsize = None 286 self.datasize = None 287 self.indexpos = None 288 self.datapos = None
289
290 - def decompose(self, datadir, cleanup=1):
291 """Alias for unpackinfo() --- Complement to recompose() but optionally 292 deletes the destination directory. Extracts the xpak from the tbz2 into 293 the directory provided. Raises IOError if scan() fails. 294 Returns result of upackinfo().""" 295 if not self.scan(): 296 raise IOError 297 if cleanup: 298 self.cleanup(datadir) 299 if not os.path.exists(datadir): 300 os.makedirs(datadir) 301 return self.unpackinfo(datadir)
302 - def compose(self, datadir, cleanup=0):
303 """Alias for recompose().""" 304 return self.recompose(datadir, cleanup)
305
306 - def recompose(self, datadir, cleanup=0, break_hardlinks=True):
307 """Creates an xpak segment from the datadir provided, truncates the tbz2 308 to the end of regular data if an xpak segment already exists, and adds 309 the new segment to the file with terminating info.""" 310 xpdata = xpak(datadir) 311 self.recompose_mem(xpdata, break_hardlinks=break_hardlinks) 312 if cleanup: 313 self.cleanup(datadir)
314
315 - def recompose_mem(self, xpdata, break_hardlinks=True):
316 """ 317 Update the xpak segment. 318 @param xpdata: A new xpak segment to be written, like that returned 319 from the xpak_mem() function. 320 @param break_hardlinks: If hardlinks exist, create a copy in order 321 to break them. This makes it safe to use hardlinks to create 322 cheap snapshots of the repository, which is useful for solving 323 race conditions on binhosts as described here: 324 http://code.google.com/p/chromium-os/issues/detail?id=3225. 325 Default is True. 326 """ 327 self.scan() # Don't care about condition... We'll rewrite the data anyway. 328 329 if break_hardlinks and self.filestat and self.filestat.st_nlink > 1: 330 tmp_fname = "%s.%d" % (self.file, os.getpid()) 331 shutil.copyfile(self.file, tmp_fname) 332 try: 333 portage.util.apply_stat_permissions(self.file, self.filestat) 334 except portage.exception.OperationNotPermitted: 335 pass 336 os.rename(tmp_fname, self.file) 337 338 myfile = open(_unicode_encode(self.file, 339 encoding=_encodings['fs'], errors='strict'), 'ab+') 340 if not myfile: 341 raise IOError 342 myfile.seek(-self.xpaksize, 2) # 0,2 or -0,2 just mean EOF. 343 myfile.truncate() 344 myfile.write(xpdata + encodeint(len(xpdata)) + b'STOP') 345 myfile.flush() 346 myfile.close() 347 return 1
348
349 - def cleanup(self, datadir):
350 datadir_split = os.path.split(datadir) 351 if len(datadir_split) >= 2 and len(datadir_split[1]) > 0: 352 # This is potentially dangerous, 353 # thus the above sanity check. 354 try: 355 shutil.rmtree(datadir) 356 except OSError as oe: 357 if oe.errno == errno.ENOENT: 358 pass 359 else: 360 raise oe
361
362 - def scan(self):
363 """Scans the tbz2 to locate the xpak segment and setup internal values. 364 This function is called by relevant functions already.""" 365 a = None 366 try: 367 mystat = os.stat(self.file) 368 if self.filestat: 369 changed = 0 370 if mystat.st_size != self.filestat.st_size \ 371 or mystat.st_mtime != self.filestat.st_mtime \ 372 or mystat.st_ctime != self.filestat.st_ctime: 373 changed = True 374 if not changed: 375 return 1 376 self.filestat = mystat 377 a = open(_unicode_encode(self.file, 378 encoding=_encodings['fs'], errors='strict'), 'rb') 379 a.seek(-16, 2) 380 trailer = a.read() 381 self.infosize = 0 382 self.xpaksize = 0 383 if trailer[-4:] != b'STOP': 384 return 0 385 if trailer[0:8] != b'XPAKSTOP': 386 return 0 387 self.infosize = decodeint(trailer[8:12]) 388 self.xpaksize = self.infosize + 8 389 a.seek(-(self.xpaksize), 2) 390 header = a.read(16) 391 if header[0:8] != b'XPAKPACK': 392 return 0 393 self.indexsize = decodeint(header[8:12]) 394 self.datasize = decodeint(header[12:16]) 395 self.indexpos = a.tell() 396 self.index = a.read(self.indexsize) 397 self.datapos = a.tell() 398 return 2 399 except SystemExit: 400 raise 401 except: 402 return 0 403 finally: 404 if a is not None: 405 a.close()
406
407 - def filelist(self):
408 """Return an array of each file listed in the index.""" 409 if not self.scan(): 410 return None 411 return getindex_mem(self.index)
412
413 - def getfile(self, myfile, mydefault=None):
414 """Finds 'myfile' in the data segment and returns it.""" 415 if not self.scan(): 416 return None 417 myresult = searchindex(self.index, myfile) 418 if not myresult: 419 return mydefault 420 a = open(_unicode_encode(self.file, 421 encoding=_encodings['fs'], errors='strict'), 'rb') 422 a.seek(self.datapos + myresult[0], 0) 423 myreturn = a.read(myresult[1]) 424 a.close() 425 return myreturn
426
427 - def getelements(self, myfile):
428 """A split/array representation of tbz2.getfile()""" 429 mydat = self.getfile(myfile) 430 if not mydat: 431 return [] 432 return mydat.split()
433
434 - def unpackinfo(self, mydest):
435 """Unpacks all the files from the dataSegment into 'mydest'.""" 436 if not self.scan(): 437 return 0 438 mydest = normalize_path(mydest) + os.sep 439 a = open(_unicode_encode(self.file, 440 encoding=_encodings['fs'], errors='strict'), 'rb') 441 if not os.path.exists(mydest): 442 os.makedirs(mydest) 443 startpos = 0 444 while ((startpos + 8) < self.indexsize): 445 namelen = decodeint(self.index[startpos:startpos + 4]) 446 datapos = decodeint(self.index[startpos + 4 + namelen:startpos + 8 + namelen]) 447 datalen = decodeint(self.index[startpos + 8 + namelen:startpos + 12 + namelen]) 448 myname = self.index[startpos + 4:startpos + 4 + namelen] 449 myname = _unicode_decode(myname, 450 encoding=_encodings['repo.content'], errors='replace') 451 filename = os.path.join(mydest, myname.lstrip(os.sep)) 452 filename = normalize_path(filename) 453 if not filename.startswith(mydest): 454 # myname contains invalid ../ component(s) 455 continue 456 dirname = os.path.dirname(filename) 457 if dirname: 458 if not os.path.exists(dirname): 459 os.makedirs(dirname) 460 mydat = open(_unicode_encode(filename, 461 encoding=_encodings['fs'], errors='strict'), 'wb') 462 a.seek(self.datapos + datapos) 463 mydat.write(a.read(datalen)) 464 mydat.close() 465 startpos = startpos + namelen + 12 466 a.close() 467 return 1
468
469 - def get_data(self):
470 """Returns all the files from the dataSegment as a map object.""" 471 if not self.scan(): 472 return {} 473 a = open(_unicode_encode(self.file, 474 encoding=_encodings['fs'], errors='strict'), 'rb') 475 mydata = {} 476 startpos = 0 477 while ((startpos + 8) < self.indexsize): 478 namelen = decodeint(self.index[startpos:startpos + 4]) 479 datapos = decodeint(self.index[startpos + 4 + namelen:startpos + 8 + namelen]) 480 datalen = decodeint(self.index[startpos + 8 + namelen:startpos + 12 + namelen]) 481 myname = self.index[startpos + 4:startpos + 4 + namelen] 482 a.seek(self.datapos + datapos) 483 mydata[myname] = a.read(datalen) 484 startpos = startpos + namelen + 12 485 a.close() 486 return mydata
487
488 - def getboth(self):
489 """Returns an array [indexSegment, dataSegment]""" 490 if not self.scan(): 491 return None 492 493 a = open(_unicode_encode(self.file, 494 encoding=_encodings['fs'], errors='strict'), 'rb') 495 a.seek(self.datapos) 496 mydata = a.read(self.datasize) 497 a.close() 498 499 return self.index, mydata
500