Package portage :: Module xpak
[hide private]

Source Code for Module portage.xpak

  1  # Copyright 2001-2012 Gentoo Foundation 
  2  # Distributed under the terms of the GNU General Public License v2 
  3   
  4   
  5  # The format for a tbz2/xpak: 
  6  # 
  7  #  tbz2: tar.bz2 + xpak + (xpak_offset) + "STOP" 
  8  #  xpak: "XPAKPACK" + (index_len) + (data_len) + index + data + "XPAKSTOP" 
  9  # index: (pathname_len) + pathname + (data_offset) + (data_len) 
 10  #        index entries are concatenated end-to-end. 
 11  #  data: concatenated data chunks, end-to-end. 
 12  # 
 13  # [tarball]XPAKPACKIIIIDDDD[index][data]XPAKSTOPOOOOSTOP 
 14  # 
 15  # (integer) == encodeint(integer)  ===> 4 characters (big-endian copy) 
 16  # '+' means concatenate the fields ===> All chunks are strings 
 17   
 18  __all__ = ['addtolist', 'decodeint', 'encodeint', 'getboth', 
 19          'getindex', 'getindex_mem', 'getitem', 'listindex', 
 20          'searchindex', 'tbz2', 'xpak_mem', 'xpak', 'xpand', 
 21          'xsplit', 'xsplit_mem'] 
 22   
 23  import array 
 24  import errno 
 25  import sys 
 26   
 27  import portage 
 28  from portage import os 
 29  from portage import shutil 
 30  from portage import normalize_path 
 31  from portage import _encodings 
 32  from portage import _unicode_decode 
 33  from portage import _unicode_encode 
 34   
35 -def addtolist(mylist, curdir):
36 """(list, dir) --- Takes an array(list) and appends all files from dir down 37 the directory tree. Returns nothing. list is modified.""" 38 curdir = normalize_path(_unicode_decode(curdir, 39 encoding=_encodings['fs'], errors='strict')) 40 for parent, dirs, files in os.walk(curdir): 41 42 parent = _unicode_decode(parent, 43 encoding=_encodings['fs'], errors='strict') 44 if parent != curdir: 45 mylist.append(parent[len(curdir) + 1:] + os.sep) 46 47 for x in dirs: 48 try: 49 _unicode_decode(x, encoding=_encodings['fs'], errors='strict') 50 except UnicodeDecodeError: 51 dirs.remove(x) 52 53 for x in files: 54 try: 55 x = _unicode_decode(x, 56 encoding=_encodings['fs'], errors='strict') 57 except UnicodeDecodeError: 58 continue 59 mylist.append(os.path.join(parent, x)[len(curdir) + 1:])
60
61 -def encodeint(myint):
62 """Takes a 4 byte integer and converts it into a string of 4 characters. 63 Returns the characters in a string.""" 64 a = array.array('B') 65 a.append((myint >> 24) & 0xff) 66 a.append((myint >> 16) & 0xff) 67 a.append((myint >> 8) & 0xff) 68 a.append(myint & 0xff) 69 try: 70 # Python >= 3.2 71 return a.tobytes() 72 except AttributeError: 73 return a.tostring()
74
75 -def decodeint(mystring):
76 """Takes a 4 byte string and converts it into a 4 byte integer. 77 Returns an integer.""" 78 if sys.hexversion < 0x3000000: 79 mystring = [ord(x) for x in mystring] 80 myint = 0 81 myint += mystring[3] 82 myint += mystring[2] << 8 83 myint += mystring[1] << 16 84 myint += mystring[0] << 24 85 return myint
86
87 -def xpak(rootdir, outfile=None):
88 """(rootdir, outfile) -- creates an xpak segment of the directory 'rootdir' 89 and under the name 'outfile' if it is specified. Otherwise it returns the 90 xpak segment.""" 91 92 mylist = [] 93 94 addtolist(mylist, rootdir) 95 mylist.sort() 96 mydata = {} 97 for x in mylist: 98 if x == 'CONTENTS': 99 # CONTENTS is generated during the merge process. 100 continue 101 x = _unicode_encode(x, encoding=_encodings['fs'], errors='strict') 102 with open(os.path.join(rootdir, x), 'rb') as f: 103 mydata[x] = f.read() 104 105 xpak_segment = xpak_mem(mydata) 106 if outfile: 107 outf = open(_unicode_encode(outfile, 108 encoding=_encodings['fs'], errors='strict'), 'wb') 109 outf.write(xpak_segment) 110 outf.close() 111 else: 112 return xpak_segment
113
114 -def xpak_mem(mydata):
115 """Create an xpack segment from a map object.""" 116 117 mydata_encoded = {} 118 for k, v in mydata.items(): 119 k = _unicode_encode(k, 120 encoding=_encodings['repo.content'], errors='backslashreplace') 121 v = _unicode_encode(v, 122 encoding=_encodings['repo.content'], errors='backslashreplace') 123 mydata_encoded[k] = v 124 mydata = mydata_encoded 125 del mydata_encoded 126 127 indexglob = b'' 128 indexpos = 0 129 dataglob = b'' 130 datapos = 0 131 for x, newglob in mydata.items(): 132 mydatasize = len(newglob) 133 indexglob = indexglob + encodeint(len(x)) + x + encodeint(datapos) + encodeint(mydatasize) 134 indexpos = indexpos + 4 + len(x) + 4 + 4 135 dataglob = dataglob + newglob 136 datapos = datapos + mydatasize 137 return b'XPAKPACK' \ 138 + encodeint(len(indexglob)) \ 139 + encodeint(len(dataglob)) \ 140 + indexglob \ 141 + dataglob \ 142 + b'XPAKSTOP'
143
144 -def xsplit(infile):
145 """(infile) -- Splits the infile into two files. 146 'infile.index' contains the index segment. 147 'infile.dat' contails the data segment.""" 148 infile = _unicode_decode(infile, 149 encoding=_encodings['fs'], errors='strict') 150 myfile = open(_unicode_encode(infile, 151 encoding=_encodings['fs'], errors='strict'), 'rb') 152 mydat = myfile.read() 153 myfile.close() 154 155 splits = xsplit_mem(mydat) 156 if not splits: 157 return False 158 159 myfile = open(_unicode_encode(infile + '.index', 160 encoding=_encodings['fs'], errors='strict'), 'wb') 161 myfile.write(splits[0]) 162 myfile.close() 163 myfile = open(_unicode_encode(infile + '.dat', 164 encoding=_encodings['fs'], errors='strict'), 'wb') 165 myfile.write(splits[1]) 166 myfile.close() 167 return True
168
169 -def xsplit_mem(mydat):
170 if mydat[0:8] != b'XPAKPACK': 171 return None 172 if mydat[-8:] != b'XPAKSTOP': 173 return None 174 indexsize = decodeint(mydat[8:12]) 175 return (mydat[16:indexsize + 16], mydat[indexsize + 16:-8])
176
177 -def getindex(infile):
178 """(infile) -- grabs the index segment from the infile and returns it.""" 179 myfile = open(_unicode_encode(infile, 180 encoding=_encodings['fs'], errors='strict'), 'rb') 181 myheader = myfile.read(16) 182 if myheader[0:8] != b'XPAKPACK': 183 myfile.close() 184 return 185 indexsize = decodeint(myheader[8:12]) 186 myindex = myfile.read(indexsize) 187 myfile.close() 188 return myindex
189
190 -def getboth(infile):
191 """(infile) -- grabs the index and data segments from the infile. 192 Returns an array [indexSegment, dataSegment]""" 193 myfile = open(_unicode_encode(infile, 194 encoding=_encodings['fs'], errors='strict'), 'rb') 195 myheader = myfile.read(16) 196 if myheader[0:8] != b'XPAKPACK': 197 myfile.close() 198 return 199 indexsize = decodeint(myheader[8:12]) 200 datasize = decodeint(myheader[12:16]) 201 myindex = myfile.read(indexsize) 202 mydata = myfile.read(datasize) 203 myfile.close() 204 return myindex, mydata
205
206 -def listindex(myindex):
207 """Print to the terminal the filenames listed in the indexglob passed in.""" 208 for x in getindex_mem(myindex): 209 print(x)
210
211 -def getindex_mem(myindex):
212 """Returns the filenames listed in the indexglob passed in.""" 213 myindexlen = len(myindex) 214 startpos = 0 215 myret = [] 216 while ((startpos + 8) < myindexlen): 217 mytestlen = decodeint(myindex[startpos:startpos + 4]) 218 myret = myret + [myindex[startpos + 4:startpos + 4 + mytestlen]] 219 startpos = startpos + mytestlen + 12 220 return myret
221
222 -def searchindex(myindex, myitem):
223 """(index, item) -- Finds the offset and length of the file 'item' in the 224 datasegment via the index 'index' provided.""" 225 myitem = _unicode_encode(myitem, 226 encoding=_encodings['repo.content'], errors='backslashreplace') 227 mylen = len(myitem) 228 myindexlen = len(myindex) 229 startpos = 0 230 while ((startpos + 8) < myindexlen): 231 mytestlen = decodeint(myindex[startpos:startpos + 4]) 232 if mytestlen == mylen: 233 if myitem == myindex[startpos + 4:startpos + 4 + mytestlen]: 234 #found 235 datapos = decodeint(myindex[startpos + 4 + mytestlen:startpos + 8 + mytestlen]) 236 datalen = decodeint(myindex[startpos + 8 + mytestlen:startpos + 12 + mytestlen]) 237 return datapos, datalen 238 startpos = startpos + mytestlen + 12
239
240 -def getitem(myid, myitem):
241 myindex = myid[0] 242 mydata = myid[1] 243 myloc = searchindex(myindex, myitem) 244 if not myloc: 245 return None 246 return mydata[myloc[0]:myloc[0] + myloc[1]]
247
248 -def xpand(myid, mydest):
249 mydest = normalize_path(mydest) + os.sep 250 myindex = myid[0] 251 mydata = myid[1] 252 myindexlen = len(myindex) 253 startpos = 0 254 while ((startpos + 8) < myindexlen): 255 namelen = decodeint(myindex[startpos:startpos + 4]) 256 datapos = decodeint(myindex[startpos + 4 + namelen:startpos + 8 + namelen]) 257 datalen = decodeint(myindex[startpos + 8 + namelen:startpos + 12 + namelen]) 258 myname = myindex[startpos + 4:startpos + 4 + namelen] 259 myname = _unicode_decode(myname, 260 encoding=_encodings['repo.content'], errors='replace') 261 filename = os.path.join(mydest, myname.lstrip(os.sep)) 262 filename = normalize_path(filename) 263 if not filename.startswith(mydest): 264 # myname contains invalid ../ component(s) 265 continue 266 dirname = os.path.dirname(filename) 267 if dirname: 268 if not os.path.exists(dirname): 269 os.makedirs(dirname) 270 mydat = open(_unicode_encode(filename, 271 encoding=_encodings['fs'], errors='strict'), 'wb') 272 mydat.write(mydata[datapos:datapos + datalen]) 273 mydat.close() 274 startpos = startpos + namelen + 12
275
276 -class tbz2(object):
277 - def __init__(self, myfile):
278 self.file = myfile 279 self.filestat = None 280 self.index = b'' 281 self.infosize = 0 282 self.xpaksize = 0 283 self.indexsize = None 284 self.datasize = None 285 self.indexpos = None 286 self.datapos = None
287
288 - def decompose(self, datadir, cleanup=1):
289 """Alias for unpackinfo() --- Complement to recompose() but optionally 290 deletes the destination directory. Extracts the xpak from the tbz2 into 291 the directory provided. Raises IOError if scan() fails. 292 Returns result of upackinfo().""" 293 if not self.scan(): 294 raise IOError 295 if cleanup: 296 self.cleanup(datadir) 297 if not os.path.exists(datadir): 298 os.makedirs(datadir) 299 return self.unpackinfo(datadir)
300 - def compose(self, datadir, cleanup=0):
301 """Alias for recompose().""" 302 return self.recompose(datadir, cleanup)
303
304 - def recompose(self, datadir, cleanup=0, break_hardlinks=True):
305 """Creates an xpak segment from the datadir provided, truncates the tbz2 306 to the end of regular data if an xpak segment already exists, and adds 307 the new segment to the file with terminating info.""" 308 xpdata = xpak(datadir) 309 self.recompose_mem(xpdata, break_hardlinks=break_hardlinks) 310 if cleanup: 311 self.cleanup(datadir)
312
313 - def recompose_mem(self, xpdata, break_hardlinks=True):
314 """ 315 Update the xpak segment. 316 @param xpdata: A new xpak segment to be written, like that returned 317 from the xpak_mem() function. 318 @param break_hardlinks: If hardlinks exist, create a copy in order 319 to break them. This makes it safe to use hardlinks to create 320 cheap snapshots of the repository, which is useful for solving 321 race conditions on binhosts as described here: 322 http://code.google.com/p/chromium-os/issues/detail?id=3225. 323 Default is True. 324 """ 325 self.scan() # Don't care about condition... We'll rewrite the data anyway. 326 327 if break_hardlinks and self.filestat and self.filestat.st_nlink > 1: 328 tmp_fname = "%s.%d" % (self.file, os.getpid()) 329 shutil.copyfile(self.file, tmp_fname) 330 try: 331 portage.util.apply_stat_permissions(self.file, self.filestat) 332 except portage.exception.OperationNotPermitted: 333 pass 334 os.rename(tmp_fname, self.file) 335 336 myfile = open(_unicode_encode(self.file, 337 encoding=_encodings['fs'], errors='strict'), 'ab+') 338 if not myfile: 339 raise IOError 340 myfile.seek(-self.xpaksize, 2) # 0,2 or -0,2 just mean EOF. 341 myfile.truncate() 342 myfile.write(xpdata + encodeint(len(xpdata)) + b'STOP') 343 myfile.flush() 344 myfile.close() 345 return 1
346
347 - def cleanup(self, datadir):
348 datadir_split = os.path.split(datadir) 349 if len(datadir_split) >= 2 and len(datadir_split[1]) > 0: 350 # This is potentially dangerous, 351 # thus the above sanity check. 352 try: 353 shutil.rmtree(datadir) 354 except OSError as oe: 355 if oe.errno == errno.ENOENT: 356 pass 357 else: 358 raise oe
359
360 - def scan(self):
361 """Scans the tbz2 to locate the xpak segment and setup internal values. 362 This function is called by relevant functions already.""" 363 a = None 364 try: 365 mystat = os.stat(self.file) 366 if self.filestat: 367 changed = 0 368 if mystat.st_size != self.filestat.st_size \ 369 or mystat.st_mtime != self.filestat.st_mtime \ 370 or mystat.st_ctime != self.filestat.st_ctime: 371 changed = True 372 if not changed: 373 return 1 374 self.filestat = mystat 375 a = open(_unicode_encode(self.file, 376 encoding=_encodings['fs'], errors='strict'), 'rb') 377 a.seek(-16, 2) 378 trailer = a.read() 379 self.infosize = 0 380 self.xpaksize = 0 381 if trailer[-4:] != b'STOP': 382 return 0 383 if trailer[0:8] != b'XPAKSTOP': 384 return 0 385 self.infosize = decodeint(trailer[8:12]) 386 self.xpaksize = self.infosize + 8 387 a.seek(-(self.xpaksize), 2) 388 header = a.read(16) 389 if header[0:8] != b'XPAKPACK': 390 return 0 391 self.indexsize = decodeint(header[8:12]) 392 self.datasize = decodeint(header[12:16]) 393 self.indexpos = a.tell() 394 self.index = a.read(self.indexsize) 395 self.datapos = a.tell() 396 return 2 397 except SystemExit: 398 raise 399 except: 400 return 0 401 finally: 402 if a is not None: 403 a.close()
404
405 - def filelist(self):
406 """Return an array of each file listed in the index.""" 407 if not self.scan(): 408 return None 409 return getindex_mem(self.index)
410
411 - def getfile(self, myfile, mydefault=None):
412 """Finds 'myfile' in the data segment and returns it.""" 413 if not self.scan(): 414 return None 415 myresult = searchindex(self.index, myfile) 416 if not myresult: 417 return mydefault 418 a = open(_unicode_encode(self.file, 419 encoding=_encodings['fs'], errors='strict'), 'rb') 420 a.seek(self.datapos + myresult[0], 0) 421 myreturn = a.read(myresult[1]) 422 a.close() 423 return myreturn
424
425 - def getelements(self, myfile):
426 """A split/array representation of tbz2.getfile()""" 427 mydat = self.getfile(myfile) 428 if not mydat: 429 return [] 430 return mydat.split()
431
432 - def unpackinfo(self, mydest):
433 """Unpacks all the files from the dataSegment into 'mydest'.""" 434 if not self.scan(): 435 return 0 436 mydest = normalize_path(mydest) + os.sep 437 a = open(_unicode_encode(self.file, 438 encoding=_encodings['fs'], errors='strict'), 'rb') 439 if not os.path.exists(mydest): 440 os.makedirs(mydest) 441 startpos = 0 442 while ((startpos + 8) < self.indexsize): 443 namelen = decodeint(self.index[startpos:startpos + 4]) 444 datapos = decodeint(self.index[startpos + 4 + namelen:startpos + 8 + namelen]) 445 datalen = decodeint(self.index[startpos + 8 + namelen:startpos + 12 + namelen]) 446 myname = self.index[startpos + 4:startpos + 4 + namelen] 447 myname = _unicode_decode(myname, 448 encoding=_encodings['repo.content'], errors='replace') 449 filename = os.path.join(mydest, myname.lstrip(os.sep)) 450 filename = normalize_path(filename) 451 if not filename.startswith(mydest): 452 # myname contains invalid ../ component(s) 453 continue 454 dirname = os.path.dirname(filename) 455 if dirname: 456 if not os.path.exists(dirname): 457 os.makedirs(dirname) 458 mydat = open(_unicode_encode(filename, 459 encoding=_encodings['fs'], errors='strict'), 'wb') 460 a.seek(self.datapos + datapos) 461 mydat.write(a.read(datalen)) 462 mydat.close() 463 startpos = startpos + namelen + 12 464 a.close() 465 return 1
466
467 - def get_data(self):
468 """Returns all the files from the dataSegment as a map object.""" 469 if not self.scan(): 470 return {} 471 a = open(_unicode_encode(self.file, 472 encoding=_encodings['fs'], errors='strict'), 'rb') 473 mydata = {} 474 startpos = 0 475 while ((startpos + 8) < self.indexsize): 476 namelen = decodeint(self.index[startpos:startpos + 4]) 477 datapos = decodeint(self.index[startpos + 4 + namelen:startpos + 8 + namelen]) 478 datalen = decodeint(self.index[startpos + 8 + namelen:startpos + 12 + namelen]) 479 myname = self.index[startpos + 4:startpos + 4 + namelen] 480 a.seek(self.datapos + datapos) 481 mydata[myname] = a.read(datalen) 482 startpos = startpos + namelen + 12 483 a.close() 484 return mydata
485
486 - def getboth(self):
487 """Returns an array [indexSegment, dataSegment]""" 488 if not self.scan(): 489 return None 490 491 a = open(_unicode_encode(self.file, 492 encoding=_encodings['fs'], errors='strict'), 'rb') 493 a.seek(self.datapos) 494 mydata = a.read(self.datasize) 495 a.close() 496 497 return self.index, mydata
498