Package portage :: Module checksum
[hide private]

Source Code for Module portage.checksum

  1  # checksum.py -- core Portage functionality 
  2  # Copyright 1998-2012 Gentoo Foundation 
  3  # Distributed under the terms of the GNU General Public License v2 
  4   
  5  import portage 
  6  from portage.const import PRELINK_BINARY,HASHING_BLOCKSIZE 
  7  from portage.localization import _ 
  8  from portage import os 
  9  from portage import _encodings 
 10  from portage import _unicode_encode 
 11  import errno 
 12  import stat 
 13  import sys 
 14  import subprocess 
 15  import tempfile 
 16   
 17  #dict of all available hash functions 
 18  hashfunc_map = {} 
 19  hashorigin_map = {} 
 20   
21 -def _open_file(filename):
22 try: 23 return open(_unicode_encode(filename, 24 encoding=_encodings['fs'], errors='strict'), 'rb') 25 except IOError as e: 26 func_call = "open('%s')" % filename 27 if e.errno == errno.EPERM: 28 raise portage.exception.OperationNotPermitted(func_call) 29 elif e.errno == errno.EACCES: 30 raise portage.exception.PermissionDenied(func_call) 31 elif e.errno == errno.ENOENT: 32 raise portage.exception.FileNotFound(filename) 33 else: 34 raise
35
36 -class _generate_hash_function(object):
37 38 __slots__ = ("_hashobject",) 39
40 - def __init__(self, hashtype, hashobject, origin="unknown"):
41 self._hashobject = hashobject 42 hashfunc_map[hashtype] = self 43 hashorigin_map[hashtype] = origin
44
45 - def __call__(self, filename):
46 """ 47 Run a checksum against a file. 48 49 @param filename: File to run the checksum against 50 @type filename: String 51 @return: The hash and size of the data 52 """ 53 with _open_file(filename) as f: 54 blocksize = HASHING_BLOCKSIZE 55 size = 0 56 checksum = self._hashobject() 57 data = f.read(blocksize) 58 while data: 59 checksum.update(data) 60 size = size + len(data) 61 data = f.read(blocksize) 62 63 return (checksum.hexdigest(), size)
64 65 # Define hash functions, try to use the best module available. Later definitions 66 # override earlier ones 67 68 # Use the internal modules as last fallback 69 try: 70 from hashlib import md5 as _new_md5 71 except ImportError: 72 from md5 import new as _new_md5 73 74 md5hash = _generate_hash_function("MD5", _new_md5, origin="internal") 75 76 try: 77 from hashlib import sha1 as _new_sha1 78 except ImportError: 79 from sha import new as _new_sha1 80 81 sha1hash = _generate_hash_function("SHA1", _new_sha1, origin="internal") 82 83 # Try to use mhash if available 84 # mhash causes GIL presently, so it gets less priority than hashlib and 85 # pycrypto. However, it might be the only accelerated implementation of 86 # WHIRLPOOL available. 87 try: 88 import mhash, functools 89 md5hash = _generate_hash_function("MD5", functools.partial(mhash.MHASH, mhash.MHASH_MD5), origin="mhash") 90 sha1hash = _generate_hash_function("SHA1", functools.partial(mhash.MHASH, mhash.MHASH_SHA1), origin="mhash") 91 sha256hash = _generate_hash_function("SHA256", functools.partial(mhash.MHASH, mhash.MHASH_SHA256), origin="mhash") 92 sha512hash = _generate_hash_function("SHA512", functools.partial(mhash.MHASH, mhash.MHASH_SHA512), origin="mhash") 93 for local_name, hash_name in (("rmd160", "ripemd160"), ("whirlpool", "whirlpool")): 94 if hasattr(mhash, 'MHASH_%s' % local_name.upper()): 95 globals()['%shash' % local_name] = \ 96 _generate_hash_function(local_name.upper(), \ 97 functools.partial(mhash.MHASH, getattr(mhash, 'MHASH_%s' % hash_name.upper())), \ 98 origin='mhash') 99 except ImportError: 100 pass 101 102 # Use pycrypto when available, prefer it over the internal fallbacks 103 # Check for 'new' attributes, since they can be missing if the module 104 # is broken somehow. 105 try: 106 from Crypto.Hash import SHA256, RIPEMD 107 sha256hash = getattr(SHA256, 'new', None) 108 if sha256hash is not None: 109 sha256hash = _generate_hash_function("SHA256", 110 sha256hash, origin="pycrypto") 111 rmd160hash = getattr(RIPEMD, 'new', None) 112 if rmd160hash is not None: 113 rmd160hash = _generate_hash_function("RMD160", 114 rmd160hash, origin="pycrypto") 115 except ImportError: 116 pass 117 118 # Use hashlib from python-2.5 if available and prefer it over pycrypto and internal fallbacks. 119 # Need special handling for RMD160/WHIRLPOOL as they may not always be provided by hashlib. 120 try: 121 import hashlib, functools 122 123 md5hash = _generate_hash_function("MD5", hashlib.md5, origin="hashlib") 124 sha1hash = _generate_hash_function("SHA1", hashlib.sha1, origin="hashlib") 125 sha256hash = _generate_hash_function("SHA256", hashlib.sha256, origin="hashlib") 126 sha512hash = _generate_hash_function("SHA512", hashlib.sha512, origin="hashlib") 127 for local_name, hash_name in (("rmd160", "ripemd160"), ("whirlpool", "whirlpool")): 128 try: 129 hashlib.new(hash_name) 130 except ValueError: 131 pass 132 else: 133 globals()['%shash' % local_name] = \ 134 _generate_hash_function(local_name.upper(), \ 135 functools.partial(hashlib.new, hash_name), \ 136 origin='hashlib') 137 138 except ImportError: 139 pass 140 141 _whirlpool_unaccelerated = False 142 if "WHIRLPOOL" not in hashfunc_map: 143 # Bundled WHIRLPOOL implementation 144 _whirlpool_unaccelerated = True 145 from portage.util.whirlpool import new as _new_whirlpool 146 whirlpoolhash = _generate_hash_function("WHIRLPOOL", _new_whirlpool, origin="bundled") 147 148 # Use python-fchksum if available, prefer it over all other MD5 implementations 149 try: 150 from fchksum import fmd5t as md5hash 151 hashfunc_map["MD5"] = md5hash 152 hashorigin_map["MD5"] = "python-fchksum" 153 154 except ImportError: 155 pass 156 157 # There is only one implementation for size
158 -def getsize(filename):
159 size = os.stat(filename).st_size 160 return (size, size)
161 hashfunc_map["size"] = getsize 162 163 # end actual hash functions 164 165 prelink_capable = False 166 if os.path.exists(PRELINK_BINARY): 167 cmd = [PRELINK_BINARY, "--version"] 168 if sys.hexversion < 0x3000000 or sys.hexversion >= 0x3020000: 169 # Python 3.1 does not support bytes in Popen args. 170 cmd = [_unicode_encode(x, encoding=_encodings['fs'], errors='strict') 171 for x in cmd] 172 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, 173 stderr=subprocess.STDOUT) 174 proc.communicate() 175 status = proc.wait() 176 if os.WIFEXITED(status) and os.WEXITSTATUS(status) == os.EX_OK: 177 prelink_capable=1 178 del cmd, proc, status 179
180 -def is_prelinkable_elf(filename):
181 f = _open_file(filename) 182 try: 183 magic = f.read(17) 184 finally: 185 f.close() 186 return (len(magic) == 17 and magic.startswith(b'\x7fELF') and 187 magic[16] in (b'\x02', b'\x03')) # 2=ET_EXEC, 3=ET_DYN
188
189 -def perform_md5(x, calc_prelink=0):
190 return perform_checksum(x, "MD5", calc_prelink)[0]
191
192 -def _perform_md5_merge(x, **kwargs):
193 return perform_md5(_unicode_encode(x, 194 encoding=_encodings['merge'], errors='strict'), **kwargs)
195
196 -def perform_all(x, calc_prelink=0):
197 mydict = {} 198 for k in hashfunc_map: 199 mydict[k] = perform_checksum(x, k, calc_prelink)[0] 200 return mydict
201
202 -def get_valid_checksum_keys():
203 return list(hashfunc_map)
204
205 -def get_hash_origin(hashtype):
206 if hashtype not in hashfunc_map: 207 raise KeyError(hashtype) 208 return hashorigin_map.get(hashtype, "unknown")
209
210 -def _filter_unaccelarated_hashes(digests):
211 """ 212 If multiple digests are available and some are unaccelerated, 213 then return a new dict that omits the unaccelerated ones. This 214 allows extreme performance problems like bug #425046 to be 215 avoided whenever practical, especially for cases like stage 216 builds where acceleration may not be available for some hashes 217 due to minimization of dependencies. 218 """ 219 if _whirlpool_unaccelerated and "WHIRLPOOL" in digests: 220 verifiable_hash_types = set(digests).intersection(hashfunc_map) 221 verifiable_hash_types.discard("size") 222 if len(verifiable_hash_types) > 1: 223 digests = dict(digests) 224 digests.pop("WHIRLPOOL") 225 226 return digests
227
228 -class _hash_filter(object):
229 """ 230 Implements filtering for PORTAGE_CHECKSUM_FILTER. 231 """ 232 233 __slots__ = ('transparent', '_tokens',) 234
235 - def __init__(self, filter_str):
236 tokens = filter_str.upper().split() 237 if not tokens or tokens[-1] == "*": 238 del tokens[:] 239 self.transparent = not tokens 240 tokens.reverse() 241 self._tokens = tuple(tokens)
242
243 - def __call__(self, hash_name):
244 if self.transparent: 245 return True 246 matches = ("*", hash_name) 247 for token in self._tokens: 248 if token in matches: 249 return True 250 elif token[:1] == "-": 251 if token[1:] in matches: 252 return False 253 return False
254
255 -def _apply_hash_filter(digests, hash_filter):
256 """ 257 Return a new dict containing the filtered digests, or the same 258 dict if no changes are necessary. This will always preserve at 259 at least one digest, in order to ensure that they are not all 260 discarded. 261 @param digests: dictionary of digests 262 @type digests: dict 263 @param hash_filter: A callable that takes a single hash name 264 argument, and returns True if the hash is to be used or 265 False otherwise 266 @type hash_filter: callable 267 """ 268 269 verifiable_hash_types = set(digests).intersection(hashfunc_map) 270 verifiable_hash_types.discard("size") 271 modified = False 272 if len(verifiable_hash_types) > 1: 273 for k in list(verifiable_hash_types): 274 if not hash_filter(k): 275 modified = True 276 verifiable_hash_types.remove(k) 277 if len(verifiable_hash_types) == 1: 278 break 279 280 if modified: 281 digests = dict((k, v) for (k, v) in digests.items() 282 if k == "size" or k in verifiable_hash_types) 283 284 return digests
285
286 -def verify_all(filename, mydict, calc_prelink=0, strict=0):
287 """ 288 Verify all checksums against a file. 289 290 @param filename: File to run the checksums against 291 @type filename: String 292 @param calc_prelink: Whether or not to reverse prelink before running the checksum 293 @type calc_prelink: Integer 294 @param strict: Enable/Disable strict checking (which stops exactly at a checksum failure and throws an exception) 295 @type strict: Integer 296 @rtype: Tuple 297 @return: Result of the checks and possible message: 298 1) If size fails, False, and a tuple containing a message, the given size, and the actual size 299 2) If there is an os error, False, and a tuple containing the system error followed by 2 nulls 300 3) If a checksum fails, False and a tuple containing a message, the given hash, and the actual hash 301 4) If all checks succeed, return True and a fake reason 302 """ 303 # Dict relates to single file only. 304 # returns: (passed,reason) 305 file_is_ok = True 306 reason = "Reason unknown" 307 try: 308 mysize = os.stat(filename)[stat.ST_SIZE] 309 if mydict["size"] != mysize: 310 return False,(_("Filesize does not match recorded size"), mysize, mydict["size"]) 311 except OSError as e: 312 if e.errno == errno.ENOENT: 313 raise portage.exception.FileNotFound(filename) 314 return False, (str(e), None, None) 315 316 verifiable_hash_types = set(mydict).intersection(hashfunc_map) 317 verifiable_hash_types.discard("size") 318 if not verifiable_hash_types: 319 expected = set(hashfunc_map) 320 expected.discard("size") 321 expected = list(expected) 322 expected.sort() 323 expected = " ".join(expected) 324 got = set(mydict) 325 got.discard("size") 326 got = list(got) 327 got.sort() 328 got = " ".join(got) 329 return False, (_("Insufficient data for checksum verification"), got, expected) 330 331 for x in sorted(mydict): 332 if x == "size": 333 continue 334 elif x in hashfunc_map: 335 myhash = perform_checksum(filename, x, calc_prelink=calc_prelink)[0] 336 if mydict[x] != myhash: 337 if strict: 338 raise portage.exception.DigestException( 339 ("Failed to verify '$(file)s' on " + \ 340 "checksum type '%(type)s'") % \ 341 {"file" : filename, "type" : x}) 342 else: 343 file_is_ok = False 344 reason = (("Failed on %s verification" % x), myhash,mydict[x]) 345 break 346 return file_is_ok,reason
347
348 -def perform_checksum(filename, hashname="MD5", calc_prelink=0):
349 """ 350 Run a specific checksum against a file. The filename can 351 be either unicode or an encoded byte string. If filename 352 is unicode then a UnicodeDecodeError will be raised if 353 necessary. 354 355 @param filename: File to run the checksum against 356 @type filename: String 357 @param hashname: The type of hash function to run 358 @type hashname: String 359 @param calc_prelink: Whether or not to reverse prelink before running the checksum 360 @type calc_prelink: Integer 361 @rtype: Tuple 362 @return: The hash and size of the data 363 """ 364 global prelink_capable 365 # Make sure filename is encoded with the correct encoding before 366 # it is passed to spawn (for prelink) and/or the hash function. 367 filename = _unicode_encode(filename, 368 encoding=_encodings['fs'], errors='strict') 369 myfilename = filename 370 prelink_tmpfile = None 371 try: 372 if (calc_prelink and prelink_capable and 373 is_prelinkable_elf(filename)): 374 # Create non-prelinked temporary file to checksum. 375 # Files rejected by prelink are summed in place. 376 try: 377 tmpfile_fd, prelink_tmpfile = tempfile.mkstemp() 378 try: 379 retval = portage.process.spawn([PRELINK_BINARY, 380 "--verify", filename], fd_pipes={1:tmpfile_fd}) 381 finally: 382 os.close(tmpfile_fd) 383 if retval == os.EX_OK: 384 myfilename = prelink_tmpfile 385 except portage.exception.CommandNotFound: 386 # This happens during uninstallation of prelink. 387 prelink_capable = False 388 try: 389 if hashname not in hashfunc_map: 390 raise portage.exception.DigestException(hashname + \ 391 " hash function not available (needs dev-python/pycrypto)") 392 myhash, mysize = hashfunc_map[hashname](myfilename) 393 except (OSError, IOError) as e: 394 if e.errno in (errno.ENOENT, errno.ESTALE): 395 raise portage.exception.FileNotFound(myfilename) 396 elif e.errno == portage.exception.PermissionDenied.errno: 397 raise portage.exception.PermissionDenied(myfilename) 398 raise 399 return myhash, mysize 400 finally: 401 if prelink_tmpfile: 402 try: 403 os.unlink(prelink_tmpfile) 404 except OSError as e: 405 if e.errno != errno.ENOENT: 406 raise 407 del e
408
409 -def perform_multiple_checksums(filename, hashes=["MD5"], calc_prelink=0):
410 """ 411 Run a group of checksums against a file. 412 413 @param filename: File to run the checksums against 414 @type filename: String 415 @param hashes: A list of checksum functions to run against the file 416 @type hashname: List 417 @param calc_prelink: Whether or not to reverse prelink before running the checksum 418 @type calc_prelink: Integer 419 @rtype: Tuple 420 @return: A dictionary in the form: 421 return_value[hash_name] = (hash_result,size) 422 for each given checksum 423 """ 424 rVal = {} 425 for x in hashes: 426 if x not in hashfunc_map: 427 raise portage.exception.DigestException(x+" hash function not available (needs dev-python/pycrypto or >=dev-lang/python-2.5)") 428 rVal[x] = perform_checksum(filename, x, calc_prelink)[0] 429 return rVal
430