Package portage :: Module checksum
[hide private]

Source Code for Module portage.checksum

  1  # checksum.py -- core Portage functionality 
  2  # Copyright 1998-2017 Gentoo Foundation 
  3  # Distributed under the terms of the GNU General Public License v2 
  4   
  5  import portage 
  6  from portage.const import PRELINK_BINARY, HASHING_BLOCKSIZE 
  7  from portage.localization import _ 
  8  from portage import os 
  9  from portage import _encodings 
 10  from portage import _unicode_decode, _unicode_encode 
 11  import errno 
 12  import functools 
 13  import hashlib 
 14  import stat 
 15  import sys 
 16  import subprocess 
 17  import tempfile 
 18   
 19   
 20  # Summary of all available hashes and their implementations, 
 21  # most preferred first. Please keep this in sync with logic below. 
 22  # ================================================================ 
 23  # 
 24  # MD5: hashlib 
 25  # SHA1: hashlib 
 26  # SHA256: hashlib 
 27  # SHA512: hashlib 
 28  # RMD160: hashlib, pycrypto, mhash 
 29  # WHIRLPOOL: hashlib, mhash, bundled 
 30  # BLAKE2B (512): hashlib (3.6+), pycrypto 
 31  # BLAKE2S (512): hashlib (3.6+), pycrypto 
 32  # SHA3_256: hashlib (3.6+), pysha3, pycrypto 
 33  # SHA3_512: hashlib (3.6+), pysha3, pycrypto 
 34   
 35   
 36  #dict of all available hash functions 
 37  hashfunc_map = {} 
 38  hashorigin_map = {} 
 39   
40 -def _open_file(filename):
41 try: 42 return open(_unicode_encode(filename, 43 encoding=_encodings['fs'], errors='strict'), 'rb') 44 except IOError as e: 45 func_call = "open('%s')" % _unicode_decode(filename) 46 if e.errno == errno.EPERM: 47 raise portage.exception.OperationNotPermitted(func_call) 48 elif e.errno == errno.EACCES: 49 raise portage.exception.PermissionDenied(func_call) 50 elif e.errno == errno.ENOENT: 51 raise portage.exception.FileNotFound(filename) 52 else: 53 raise
54
55 -class _generate_hash_function(object):
56 57 __slots__ = ("_hashobject",) 58
59 - def __init__(self, hashtype, hashobject, origin="unknown"):
60 self._hashobject = hashobject 61 hashfunc_map[hashtype] = self 62 hashorigin_map[hashtype] = origin
63
64 - def checksum_str(self, data):
65 """ 66 Obtain a checksum of a byte-string. 67 68 @param data: Data to hash 69 @type data: bytes 70 @return: The hash of the data (hex-digest) 71 """ 72 checksum = self._hashobject() 73 checksum.update(data) 74 return checksum.hexdigest()
75
76 - def checksum_file(self, filename):
77 """ 78 Run a checksum against a file. 79 80 @param filename: File to run the checksum against 81 @type filename: String 82 @return: The hash and size of the data 83 """ 84 with _open_file(filename) as f: 85 blocksize = HASHING_BLOCKSIZE 86 size = 0 87 checksum = self._hashobject() 88 data = f.read(blocksize) 89 while data: 90 checksum.update(data) 91 size = size + len(data) 92 data = f.read(blocksize) 93 94 return (checksum.hexdigest(), size)
95 96 97 # Define hash functions, try to use the best module available. Preferred 98 # modules should go first, latter ones should check if the hashes aren't 99 # already defined. 100 101 102 # Use hashlib from python-2.5 if available and prefer it over pycrypto and internal fallbacks. 103 # Need special handling for RMD160/WHIRLPOOL as they may not always be provided by hashlib. 104 _generate_hash_function("MD5", hashlib.md5, origin="hashlib") 105 _generate_hash_function("SHA1", hashlib.sha1, origin="hashlib") 106 _generate_hash_function("SHA256", hashlib.sha256, origin="hashlib") 107 _generate_hash_function("SHA512", hashlib.sha512, origin="hashlib") 108 for local_name, hash_name in ( 109 ("RMD160", "ripemd160"), 110 ("WHIRLPOOL", "whirlpool"), 111 # available since Python 3.6 112 ("BLAKE2B", "blake2b"), 113 ("BLAKE2S", "blake2s"), 114 ("SHA3_256", "sha3_256"), 115 ("SHA3_512", "sha3_512"), 116 ): 117 try: 118 hashlib.new(hash_name) 119 except ValueError: 120 pass 121 else: 122 _generate_hash_function(local_name, 123 functools.partial(hashlib.new, hash_name), 124 origin='hashlib') 125 126 127 # Support using pysha3 as fallback for python<3.6 128 if "SHA3_256" not in hashfunc_map or "SHA3_512" not in hashfunc_map: 129 try: 130 import sha3 131 132 _generate_hash_function("SHA3_256", sha3.sha3_256, origin="pysha3") 133 _generate_hash_function("SHA3_512", sha3.sha3_512, origin="pysha3") 134 except ImportError: 135 pass 136 137 138 # Support pygcrypt as fallback using optimized routines from libgcrypt 139 # (GnuPG). 140 gcrypt_algos = frozenset(('RMD160', 'WHIRLPOOL', 'SHA3_256', 'SHA3_512', 141 'STREEBOG256', 'STREEBOG512')) 142 if gcrypt_algos.difference(hashfunc_map): 143 try: 144 import binascii 145 import pygcrypt.hashcontext 146
147 - class GCryptHashWrapper(object):
148 - def __init__(self, algo):
149 self._obj = pygcrypt.hashcontext.HashContext(algo=algo)
150
151 - def update(self, data):
152 self._obj.write(data)
153
154 - def hexdigest(self):
155 return binascii.b2a_hex(self._obj.read()).decode()
156 157 name_mapping = { 158 'RMD160': 'ripemd160', 159 'WHIRLPOOL': 'whirlpool', 160 'SHA3_256': 'sha3-256', 161 'SHA3_512': 'sha3-512', 162 'STREEBOG256': 'stribog256', 163 'STREEBOG512': 'stribog512', 164 } 165 166 for local_name, gcry_name in name_mapping.items(): 167 try: 168 pygcrypt.hashcontext.HashContext(algo=gcry_name) 169 except Exception: # yes, it throws Exception... 170 pass 171 else: 172 _generate_hash_function(local_name, 173 functools.partial(GCryptHashWrapper, gcry_name), 174 origin="pygcrypt") 175 except ImportError: 176 pass 177 178 179 # Use pycrypto when available, prefer it over the internal fallbacks 180 # Check for 'new' attributes, since they can be missing if the module 181 # is broken somehow. 182 if 'RMD160' not in hashfunc_map: 183 try: 184 from Crypto.Hash import RIPEMD 185 rmd160hash_ = getattr(RIPEMD, 'new', None) 186 if rmd160hash_ is not None: 187 _generate_hash_function("RMD160", 188 rmd160hash_, origin="pycrypto") 189 except ImportError: 190 pass 191 192 # The following hashes were added in pycryptodome (pycrypto fork) 193 if 'BLAKE2B' not in hashfunc_map: 194 try: 195 from Crypto.Hash import BLAKE2b 196 blake2bhash_ = getattr(BLAKE2b, 'new', None) 197 if blake2bhash_ is not None: 198 _generate_hash_function("BLAKE2B", 199 functools.partial(blake2bhash_, digest_bytes=64), origin="pycrypto") 200 except ImportError: 201 pass 202 203 if 'BLAKE2S' not in hashfunc_map: 204 try: 205 from Crypto.Hash import BLAKE2s 206 blake2shash_ = getattr(BLAKE2s, 'new', None) 207 if blake2shash_ is not None: 208 _generate_hash_function("BLAKE2S", 209 functools.partial(blake2shash_, digest_bytes=32), origin="pycrypto") 210 except ImportError: 211 pass 212 213 if 'SHA3_256' not in hashfunc_map: 214 try: 215 from Crypto.Hash import SHA3_256 216 sha3_256hash_ = getattr(SHA3_256, 'new', None) 217 if sha3_256hash_ is not None: 218 _generate_hash_function("SHA3_256", 219 sha3_256hash_, origin="pycrypto") 220 except ImportError: 221 pass 222 223 if 'SHA3_512' not in hashfunc_map: 224 try: 225 from Crypto.Hash import SHA3_512 226 sha3_512hash_ = getattr(SHA3_512, 'new', None) 227 if sha3_512hash_ is not None: 228 _generate_hash_function("SHA3_512", 229 sha3_512hash_, origin="pycrypto") 230 except ImportError: 231 pass 232 233 234 # Try to use mhash if available 235 # mhash causes GIL presently, so it gets less priority than hashlib and 236 # pycrypto. However, it might be the only accelerated implementation of 237 # WHIRLPOOL available. 238 if 'RMD160' not in hashfunc_map or 'WHIRLPOOL' not in hashfunc_map: 239 try: 240 import mhash 241 for local_name, hash_name in (("RMD160", "RIPEMD160"), ("WHIRLPOOL", "WHIRLPOOL")): 242 if local_name not in hashfunc_map and hasattr(mhash, 'MHASH_%s' % hash_name): 243 _generate_hash_function(local_name, 244 functools.partial(mhash.MHASH, getattr(mhash, 'MHASH_%s' % hash_name)), 245 origin='mhash') 246 except ImportError: 247 pass 248 249 250 # Support pygost as fallback streebog provider 251 # It's mostly provided as a reference implementation; it's pure Python, 252 # slow and reads all data to memory (i.e. doesn't hash on update()...) 253 if 'STREEBOG256' not in hashfunc_map or 'STREEBOG512' not in hashfunc_map: 254 try: 255 import pygost.gost34112012 256 257 _generate_hash_function("STREEBOG256", 258 functools.partial(pygost.gost34112012.GOST34112012, digest_size=32), origin="pygost") 259 _generate_hash_function("STREEBOG512", 260 functools.partial(pygost.gost34112012.GOST34112012, digest_size=64), origin="pygost") 261 except ImportError: 262 pass 263 264 265 _whirlpool_unaccelerated = False 266 if "WHIRLPOOL" not in hashfunc_map: 267 # Bundled WHIRLPOOL implementation 268 _whirlpool_unaccelerated = True 269 from portage.util.whirlpool import new as _new_whirlpool 270 _generate_hash_function("WHIRLPOOL", _new_whirlpool, origin="bundled") 271 272 273 # There is only one implementation for size
274 -class SizeHash(object):
275 - def checksum_file(self, filename):
276 size = os.stat(filename).st_size 277 return (size, size)
278 279 hashfunc_map["size"] = SizeHash() 280 281 # cache all supported hash methods in a frozenset 282 hashfunc_keys = frozenset(hashfunc_map) 283 284 # end actual hash functions 285 286 287 prelink_capable = False 288 if os.path.exists(PRELINK_BINARY): 289 cmd = [PRELINK_BINARY, "--version"] 290 cmd = [_unicode_encode(x, encoding=_encodings['fs'], errors='strict') 291 for x in cmd] 292 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, 293 stderr=subprocess.STDOUT) 294 proc.communicate() 295 status = proc.wait() 296 if os.WIFEXITED(status) and os.WEXITSTATUS(status) == os.EX_OK: 297 prelink_capable = 1 298 del cmd, proc, status 299
300 -def is_prelinkable_elf(filename):
301 f = _open_file(filename) 302 try: 303 magic = f.read(17) 304 finally: 305 f.close() 306 return (len(magic) == 17 and magic.startswith(b'\x7fELF') and 307 magic[16] in (b'\x02', b'\x03')) # 2=ET_EXEC, 3=ET_DYN
308
309 -def perform_md5(x, calc_prelink=0):
310 return perform_checksum(x, "MD5", calc_prelink)[0]
311
312 -def _perform_md5_merge(x, **kwargs):
313 return perform_md5(_unicode_encode(x, 314 encoding=_encodings['merge'], errors='strict'), **kwargs)
315
316 -def perform_all(x, calc_prelink=0):
317 mydict = {} 318 for k in hashfunc_keys: 319 mydict[k] = perform_checksum(x, k, calc_prelink)[0] 320 return mydict
321
322 -def get_valid_checksum_keys():
323 return hashfunc_keys
324
325 -def get_hash_origin(hashtype):
326 if hashtype not in hashfunc_keys: 327 raise KeyError(hashtype) 328 return hashorigin_map.get(hashtype, "unknown")
329
330 -def _filter_unaccelarated_hashes(digests):
331 """ 332 If multiple digests are available and some are unaccelerated, 333 then return a new dict that omits the unaccelerated ones. This 334 allows extreme performance problems like bug #425046 to be 335 avoided whenever practical, especially for cases like stage 336 builds where acceleration may not be available for some hashes 337 due to minimization of dependencies. 338 """ 339 if _whirlpool_unaccelerated and "WHIRLPOOL" in digests: 340 verifiable_hash_types = set(digests).intersection(hashfunc_keys) 341 verifiable_hash_types.discard("size") 342 if len(verifiable_hash_types) > 1: 343 digests = dict(digests) 344 digests.pop("WHIRLPOOL") 345 346 return digests
347
348 -class _hash_filter(object):
349 """ 350 Implements filtering for PORTAGE_CHECKSUM_FILTER. 351 """ 352 353 __slots__ = ('transparent', '_tokens',) 354
355 - def __init__(self, filter_str):
356 tokens = filter_str.upper().split() 357 if not tokens or tokens[-1] == "*": 358 del tokens[:] 359 self.transparent = not tokens 360 tokens.reverse() 361 self._tokens = tuple(tokens)
362
363 - def __call__(self, hash_name):
364 if self.transparent: 365 return True 366 matches = ("*", hash_name) 367 for token in self._tokens: 368 if token in matches: 369 return True 370 elif token[:1] == "-": 371 if token[1:] in matches: 372 return False 373 return False
374
375 -def _apply_hash_filter(digests, hash_filter):
376 """ 377 Return a new dict containing the filtered digests, or the same 378 dict if no changes are necessary. This will always preserve at 379 at least one digest, in order to ensure that they are not all 380 discarded. 381 @param digests: dictionary of digests 382 @type digests: dict 383 @param hash_filter: A callable that takes a single hash name 384 argument, and returns True if the hash is to be used or 385 False otherwise 386 @type hash_filter: callable 387 """ 388 389 verifiable_hash_types = set(digests).intersection(hashfunc_keys) 390 verifiable_hash_types.discard("size") 391 modified = False 392 if len(verifiable_hash_types) > 1: 393 for k in list(verifiable_hash_types): 394 if not hash_filter(k): 395 modified = True 396 verifiable_hash_types.remove(k) 397 if len(verifiable_hash_types) == 1: 398 break 399 400 if modified: 401 digests = dict((k, v) for (k, v) in digests.items() 402 if k == "size" or k in verifiable_hash_types) 403 404 return digests
405
406 -def verify_all(filename, mydict, calc_prelink=0, strict=0):
407 """ 408 Verify all checksums against a file. 409 410 @param filename: File to run the checksums against 411 @type filename: String 412 @param calc_prelink: Whether or not to reverse prelink before running the checksum 413 @type calc_prelink: Integer 414 @param strict: Enable/Disable strict checking (which stops exactly at a checksum failure and throws an exception) 415 @type strict: Integer 416 @rtype: Tuple 417 @return: Result of the checks and possible message: 418 1) If size fails, False, and a tuple containing a message, the given size, and the actual size 419 2) If there is an os error, False, and a tuple containing the system error followed by 2 nulls 420 3) If a checksum fails, False and a tuple containing a message, the given hash, and the actual hash 421 4) If all checks succeed, return True and a fake reason 422 """ 423 # Dict relates to single file only. 424 # returns: (passed,reason) 425 file_is_ok = True 426 reason = "Reason unknown" 427 try: 428 mysize = os.stat(filename)[stat.ST_SIZE] 429 if mydict.get("size") is not None and mydict["size"] != mysize: 430 return False,(_("Filesize does not match recorded size"), mysize, mydict["size"]) 431 except OSError as e: 432 if e.errno == errno.ENOENT: 433 raise portage.exception.FileNotFound(filename) 434 return False, (str(e), None, None) 435 436 verifiable_hash_types = set(mydict).intersection(hashfunc_keys) 437 verifiable_hash_types.discard("size") 438 if not verifiable_hash_types: 439 expected = set(hashfunc_keys) 440 expected.discard("size") 441 expected = list(expected) 442 expected.sort() 443 expected = " ".join(expected) 444 got = set(mydict) 445 got.discard("size") 446 got = list(got) 447 got.sort() 448 got = " ".join(got) 449 return False, (_("Insufficient data for checksum verification"), got, expected) 450 451 for x in sorted(mydict): 452 if x == "size": 453 continue 454 elif x in hashfunc_keys: 455 myhash = perform_checksum(filename, x, calc_prelink=calc_prelink)[0] 456 if mydict[x] != myhash: 457 if strict: 458 raise portage.exception.DigestException( 459 ("Failed to verify '$(file)s' on " + \ 460 "checksum type '%(type)s'") % \ 461 {"file" : filename, "type" : x}) 462 else: 463 file_is_ok = False 464 reason = (("Failed on %s verification" % x), myhash, mydict[x]) 465 break 466 467 return file_is_ok, reason
468
469 -def perform_checksum(filename, hashname="MD5", calc_prelink=0):
470 """ 471 Run a specific checksum against a file. The filename can 472 be either unicode or an encoded byte string. If filename 473 is unicode then a UnicodeDecodeError will be raised if 474 necessary. 475 476 @param filename: File to run the checksum against 477 @type filename: String 478 @param hashname: The type of hash function to run 479 @type hashname: String 480 @param calc_prelink: Whether or not to reverse prelink before running the checksum 481 @type calc_prelink: Integer 482 @rtype: Tuple 483 @return: The hash and size of the data 484 """ 485 global prelink_capable 486 # Make sure filename is encoded with the correct encoding before 487 # it is passed to spawn (for prelink) and/or the hash function. 488 filename = _unicode_encode(filename, 489 encoding=_encodings['fs'], errors='strict') 490 myfilename = filename 491 prelink_tmpfile = None 492 try: 493 if (calc_prelink and prelink_capable and 494 is_prelinkable_elf(filename)): 495 # Create non-prelinked temporary file to checksum. 496 # Files rejected by prelink are summed in place. 497 try: 498 tmpfile_fd, prelink_tmpfile = tempfile.mkstemp() 499 try: 500 retval = portage.process.spawn([PRELINK_BINARY, 501 "--verify", filename], fd_pipes={1:tmpfile_fd}) 502 finally: 503 os.close(tmpfile_fd) 504 if retval == os.EX_OK: 505 myfilename = prelink_tmpfile 506 except portage.exception.CommandNotFound: 507 # This happens during uninstallation of prelink. 508 prelink_capable = False 509 try: 510 if hashname not in hashfunc_keys: 511 raise portage.exception.DigestException(hashname + \ 512 " hash function not available (needs dev-python/pycrypto)") 513 myhash, mysize = hashfunc_map[hashname].checksum_file(myfilename) 514 except (OSError, IOError) as e: 515 if e.errno in (errno.ENOENT, errno.ESTALE): 516 raise portage.exception.FileNotFound(myfilename) 517 elif e.errno == portage.exception.PermissionDenied.errno: 518 raise portage.exception.PermissionDenied(myfilename) 519 raise 520 return myhash, mysize 521 finally: 522 if prelink_tmpfile: 523 try: 524 os.unlink(prelink_tmpfile) 525 except OSError as e: 526 if e.errno != errno.ENOENT: 527 raise 528 del e
529
530 -def perform_multiple_checksums(filename, hashes=["MD5"], calc_prelink=0):
531 """ 532 Run a group of checksums against a file. 533 534 @param filename: File to run the checksums against 535 @type filename: String 536 @param hashes: A list of checksum functions to run against the file 537 @type hashname: List 538 @param calc_prelink: Whether or not to reverse prelink before running the checksum 539 @type calc_prelink: Integer 540 @rtype: Tuple 541 @return: A dictionary in the form: 542 return_value[hash_name] = (hash_result,size) 543 for each given checksum 544 """ 545 rVal = {} 546 for x in hashes: 547 if x not in hashfunc_keys: 548 raise portage.exception.DigestException(x+" hash function not available (needs dev-python/pycrypto or >=dev-lang/python-2.5)") 549 rVal[x] = perform_checksum(filename, x, calc_prelink)[0] 550 return rVal
551 552
553 -def checksum_str(data, hashname="MD5"):
554 """ 555 Run a specific checksum against a byte string. 556 557 @param filename: Data to checksum 558 @type filename: Bytes 559 @param hashname: The type of hash function to run 560 @type hashname: String 561 @rtype: String 562 @return: The hash (hex-digest) of the data 563 """ 564 if hashname not in hashfunc_keys: 565 raise portage.exception.DigestException(hashname + \ 566 " hash function not available (needs dev-python/pycrypto)") 567 return hashfunc_map[hashname].checksum_str(data)
568