Package portage :: Module checksum
[hide private]

Source Code for Module portage.checksum

  1  # checksum.py -- core Portage functionality 
  2  # Copyright 1998-2014 Gentoo Foundation 
  3  # Distributed under the terms of the GNU General Public License v2 
  4   
  5  import portage 
  6  from portage.const import PRELINK_BINARY, HASHING_BLOCKSIZE 
  7  from portage.localization import _ 
  8  from portage import os 
  9  from portage import _encodings 
 10  from portage import _unicode_encode 
 11  import errno 
 12  import stat 
 13  import subprocess 
 14  import tempfile 
 15   
 16  #dict of all available hash functions 
 17  hashfunc_map = {} 
 18  hashorigin_map = {} 
 19   
20 -def _open_file(filename):
21 try: 22 return open(_unicode_encode(filename, 23 encoding=_encodings['fs'], errors='strict'), 'rb') 24 except IOError as e: 25 func_call = "open('%s')" % filename 26 if e.errno == errno.EPERM: 27 raise portage.exception.OperationNotPermitted(func_call) 28 elif e.errno == errno.EACCES: 29 raise portage.exception.PermissionDenied(func_call) 30 elif e.errno == errno.ENOENT: 31 raise portage.exception.FileNotFound(filename) 32 else: 33 raise
34
35 -class _generate_hash_function(object):
36 37 __slots__ = ("_hashobject",) 38
39 - def __init__(self, hashtype, hashobject, origin="unknown"):
40 self._hashobject = hashobject 41 hashfunc_map[hashtype] = self 42 hashorigin_map[hashtype] = origin
43
44 - def __call__(self, filename):
45 """ 46 Run a checksum against a file. 47 48 @param filename: File to run the checksum against 49 @type filename: String 50 @return: The hash and size of the data 51 """ 52 with _open_file(filename) as f: 53 blocksize = HASHING_BLOCKSIZE 54 size = 0 55 checksum = self._hashobject() 56 data = f.read(blocksize) 57 while data: 58 checksum.update(data) 59 size = size + len(data) 60 data = f.read(blocksize) 61 62 return (checksum.hexdigest(), size)
63 64 # Define hash functions, try to use the best module available. Later definitions 65 # override earlier ones 66 67 # Use the internal modules as last fallback 68 try: 69 from hashlib import md5 as _new_md5 70 except ImportError: 71 from md5 import new as _new_md5 72 73 md5hash = _generate_hash_function("MD5", _new_md5, origin="internal") 74 75 try: 76 from hashlib import sha1 as _new_sha1 77 except ImportError: 78 from sha import new as _new_sha1 79 80 sha1hash = _generate_hash_function("SHA1", _new_sha1, origin="internal") 81 82 # Try to use mhash if available 83 # mhash causes GIL presently, so it gets less priority than hashlib and 84 # pycrypto. However, it might be the only accelerated implementation of 85 # WHIRLPOOL available. 86 try: 87 import mhash, functools 88 md5hash = _generate_hash_function("MD5", functools.partial(mhash.MHASH, mhash.MHASH_MD5), origin="mhash") 89 sha1hash = _generate_hash_function("SHA1", functools.partial(mhash.MHASH, mhash.MHASH_SHA1), origin="mhash") 90 sha256hash = _generate_hash_function("SHA256", functools.partial(mhash.MHASH, mhash.MHASH_SHA256), origin="mhash") 91 sha512hash = _generate_hash_function("SHA512", functools.partial(mhash.MHASH, mhash.MHASH_SHA512), origin="mhash") 92 for local_name, hash_name in (("rmd160", "ripemd160"), ("whirlpool", "whirlpool")): 93 if hasattr(mhash, 'MHASH_%s' % local_name.upper()): 94 globals()['%shash' % local_name] = \ 95 _generate_hash_function(local_name.upper(), \ 96 functools.partial(mhash.MHASH, getattr(mhash, 'MHASH_%s' % hash_name.upper())), \ 97 origin='mhash') 98 except ImportError: 99 pass 100 101 # Use pycrypto when available, prefer it over the internal fallbacks 102 # Check for 'new' attributes, since they can be missing if the module 103 # is broken somehow. 104 try: 105 from Crypto.Hash import SHA256, RIPEMD 106 sha256hash = getattr(SHA256, 'new', None) 107 if sha256hash is not None: 108 sha256hash = _generate_hash_function("SHA256", 109 sha256hash, origin="pycrypto") 110 rmd160hash = getattr(RIPEMD, 'new', None) 111 if rmd160hash is not None: 112 rmd160hash = _generate_hash_function("RMD160", 113 rmd160hash, origin="pycrypto") 114 except ImportError: 115 pass 116 117 # Use hashlib from python-2.5 if available and prefer it over pycrypto and internal fallbacks. 118 # Need special handling for RMD160/WHIRLPOOL as they may not always be provided by hashlib. 119 try: 120 import hashlib, functools 121 122 md5hash = _generate_hash_function("MD5", hashlib.md5, origin="hashlib") 123 sha1hash = _generate_hash_function("SHA1", hashlib.sha1, origin="hashlib") 124 sha256hash = _generate_hash_function("SHA256", hashlib.sha256, origin="hashlib") 125 sha512hash = _generate_hash_function("SHA512", hashlib.sha512, origin="hashlib") 126 for local_name, hash_name in (("rmd160", "ripemd160"), ("whirlpool", "whirlpool")): 127 try: 128 hashlib.new(hash_name) 129 except ValueError: 130 pass 131 else: 132 globals()['%shash' % local_name] = \ 133 _generate_hash_function(local_name.upper(), \ 134 functools.partial(hashlib.new, hash_name), \ 135 origin='hashlib') 136 137 except ImportError: 138 pass 139 140 _whirlpool_unaccelerated = False 141 if "WHIRLPOOL" not in hashfunc_map: 142 # Bundled WHIRLPOOL implementation 143 _whirlpool_unaccelerated = True 144 from portage.util.whirlpool import new as _new_whirlpool 145 whirlpoolhash = _generate_hash_function("WHIRLPOOL", _new_whirlpool, origin="bundled") 146 147 # Use python-fchksum if available, prefer it over all other MD5 implementations 148 try: 149 from fchksum import fmd5t as md5hash 150 hashfunc_map["MD5"] = md5hash 151 hashorigin_map["MD5"] = "python-fchksum" 152 153 except ImportError: 154 pass 155 156 # There is only one implementation for size
157 -def getsize(filename):
158 size = os.stat(filename).st_size 159 return (size, size)
160 hashfunc_map["size"] = getsize 161 162 # end actual hash functions 163 164 prelink_capable = False 165 if os.path.exists(PRELINK_BINARY): 166 cmd = [PRELINK_BINARY, "--version"] 167 cmd = [_unicode_encode(x, encoding=_encodings['fs'], errors='strict') 168 for x in cmd] 169 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, 170 stderr=subprocess.STDOUT) 171 proc.communicate() 172 status = proc.wait() 173 if os.WIFEXITED(status) and os.WEXITSTATUS(status) == os.EX_OK: 174 prelink_capable = 1 175 del cmd, proc, status 176
177 -def is_prelinkable_elf(filename):
178 f = _open_file(filename) 179 try: 180 magic = f.read(17) 181 finally: 182 f.close() 183 return (len(magic) == 17 and magic.startswith(b'\x7fELF') and 184 magic[16] in (b'\x02', b'\x03')) # 2=ET_EXEC, 3=ET_DYN
185
186 -def perform_md5(x, calc_prelink=0):
187 return perform_checksum(x, "MD5", calc_prelink)[0]
188
189 -def _perform_md5_merge(x, **kwargs):
190 return perform_md5(_unicode_encode(x, 191 encoding=_encodings['merge'], errors='strict'), **kwargs)
192
193 -def perform_all(x, calc_prelink=0):
194 mydict = {} 195 for k in hashfunc_map: 196 mydict[k] = perform_checksum(x, k, calc_prelink)[0] 197 return mydict
198
199 -def get_valid_checksum_keys():
200 return list(hashfunc_map)
201
202 -def get_hash_origin(hashtype):
203 if hashtype not in hashfunc_map: 204 raise KeyError(hashtype) 205 return hashorigin_map.get(hashtype, "unknown")
206
207 -def _filter_unaccelarated_hashes(digests):
208 """ 209 If multiple digests are available and some are unaccelerated, 210 then return a new dict that omits the unaccelerated ones. This 211 allows extreme performance problems like bug #425046 to be 212 avoided whenever practical, especially for cases like stage 213 builds where acceleration may not be available for some hashes 214 due to minimization of dependencies. 215 """ 216 if _whirlpool_unaccelerated and "WHIRLPOOL" in digests: 217 verifiable_hash_types = set(digests).intersection(hashfunc_map) 218 verifiable_hash_types.discard("size") 219 if len(verifiable_hash_types) > 1: 220 digests = dict(digests) 221 digests.pop("WHIRLPOOL") 222 223 return digests
224
225 -class _hash_filter(object):
226 """ 227 Implements filtering for PORTAGE_CHECKSUM_FILTER. 228 """ 229 230 __slots__ = ('transparent', '_tokens',) 231
232 - def __init__(self, filter_str):
233 tokens = filter_str.upper().split() 234 if not tokens or tokens[-1] == "*": 235 del tokens[:] 236 self.transparent = not tokens 237 tokens.reverse() 238 self._tokens = tuple(tokens)
239
240 - def __call__(self, hash_name):
241 if self.transparent: 242 return True 243 matches = ("*", hash_name) 244 for token in self._tokens: 245 if token in matches: 246 return True 247 elif token[:1] == "-": 248 if token[1:] in matches: 249 return False 250 return False
251
252 -def _apply_hash_filter(digests, hash_filter):
253 """ 254 Return a new dict containing the filtered digests, or the same 255 dict if no changes are necessary. This will always preserve at 256 at least one digest, in order to ensure that they are not all 257 discarded. 258 @param digests: dictionary of digests 259 @type digests: dict 260 @param hash_filter: A callable that takes a single hash name 261 argument, and returns True if the hash is to be used or 262 False otherwise 263 @type hash_filter: callable 264 """ 265 266 verifiable_hash_types = set(digests).intersection(hashfunc_map) 267 verifiable_hash_types.discard("size") 268 modified = False 269 if len(verifiable_hash_types) > 1: 270 for k in list(verifiable_hash_types): 271 if not hash_filter(k): 272 modified = True 273 verifiable_hash_types.remove(k) 274 if len(verifiable_hash_types) == 1: 275 break 276 277 if modified: 278 digests = dict((k, v) for (k, v) in digests.items() 279 if k == "size" or k in verifiable_hash_types) 280 281 return digests
282
283 -def verify_all(filename, mydict, calc_prelink=0, strict=0):
284 """ 285 Verify all checksums against a file. 286 287 @param filename: File to run the checksums against 288 @type filename: String 289 @param calc_prelink: Whether or not to reverse prelink before running the checksum 290 @type calc_prelink: Integer 291 @param strict: Enable/Disable strict checking (which stops exactly at a checksum failure and throws an exception) 292 @type strict: Integer 293 @rtype: Tuple 294 @return: Result of the checks and possible message: 295 1) If size fails, False, and a tuple containing a message, the given size, and the actual size 296 2) If there is an os error, False, and a tuple containing the system error followed by 2 nulls 297 3) If a checksum fails, False and a tuple containing a message, the given hash, and the actual hash 298 4) If all checks succeed, return True and a fake reason 299 """ 300 # Dict relates to single file only. 301 # returns: (passed,reason) 302 file_is_ok = True 303 reason = "Reason unknown" 304 try: 305 mysize = os.stat(filename)[stat.ST_SIZE] 306 if mydict["size"] != mysize: 307 return False,(_("Filesize does not match recorded size"), mysize, mydict["size"]) 308 except OSError as e: 309 if e.errno == errno.ENOENT: 310 raise portage.exception.FileNotFound(filename) 311 return False, (str(e), None, None) 312 313 verifiable_hash_types = set(mydict).intersection(hashfunc_map) 314 verifiable_hash_types.discard("size") 315 if not verifiable_hash_types: 316 expected = set(hashfunc_map) 317 expected.discard("size") 318 expected = list(expected) 319 expected.sort() 320 expected = " ".join(expected) 321 got = set(mydict) 322 got.discard("size") 323 got = list(got) 324 got.sort() 325 got = " ".join(got) 326 return False, (_("Insufficient data for checksum verification"), got, expected) 327 328 for x in sorted(mydict): 329 if x == "size": 330 continue 331 elif x in hashfunc_map: 332 myhash = perform_checksum(filename, x, calc_prelink=calc_prelink)[0] 333 if mydict[x] != myhash: 334 if strict: 335 raise portage.exception.DigestException( 336 ("Failed to verify '$(file)s' on " + \ 337 "checksum type '%(type)s'") % \ 338 {"file" : filename, "type" : x}) 339 else: 340 file_is_ok = False 341 reason = (("Failed on %s verification" % x), myhash, mydict[x]) 342 break 343 344 return file_is_ok, reason
345
346 -def perform_checksum(filename, hashname="MD5", calc_prelink=0):
347 """ 348 Run a specific checksum against a file. The filename can 349 be either unicode or an encoded byte string. If filename 350 is unicode then a UnicodeDecodeError will be raised if 351 necessary. 352 353 @param filename: File to run the checksum against 354 @type filename: String 355 @param hashname: The type of hash function to run 356 @type hashname: String 357 @param calc_prelink: Whether or not to reverse prelink before running the checksum 358 @type calc_prelink: Integer 359 @rtype: Tuple 360 @return: The hash and size of the data 361 """ 362 global prelink_capable 363 # Make sure filename is encoded with the correct encoding before 364 # it is passed to spawn (for prelink) and/or the hash function. 365 filename = _unicode_encode(filename, 366 encoding=_encodings['fs'], errors='strict') 367 myfilename = filename 368 prelink_tmpfile = None 369 try: 370 if (calc_prelink and prelink_capable and 371 is_prelinkable_elf(filename)): 372 # Create non-prelinked temporary file to checksum. 373 # Files rejected by prelink are summed in place. 374 try: 375 tmpfile_fd, prelink_tmpfile = tempfile.mkstemp() 376 try: 377 retval = portage.process.spawn([PRELINK_BINARY, 378 "--verify", filename], fd_pipes={1:tmpfile_fd}) 379 finally: 380 os.close(tmpfile_fd) 381 if retval == os.EX_OK: 382 myfilename = prelink_tmpfile 383 except portage.exception.CommandNotFound: 384 # This happens during uninstallation of prelink. 385 prelink_capable = False 386 try: 387 if hashname not in hashfunc_map: 388 raise portage.exception.DigestException(hashname + \ 389 " hash function not available (needs dev-python/pycrypto)") 390 myhash, mysize = hashfunc_map[hashname](myfilename) 391 except (OSError, IOError) as e: 392 if e.errno in (errno.ENOENT, errno.ESTALE): 393 raise portage.exception.FileNotFound(myfilename) 394 elif e.errno == portage.exception.PermissionDenied.errno: 395 raise portage.exception.PermissionDenied(myfilename) 396 raise 397 return myhash, mysize 398 finally: 399 if prelink_tmpfile: 400 try: 401 os.unlink(prelink_tmpfile) 402 except OSError as e: 403 if e.errno != errno.ENOENT: 404 raise 405 del e
406
407 -def perform_multiple_checksums(filename, hashes=["MD5"], calc_prelink=0):
408 """ 409 Run a group of checksums against a file. 410 411 @param filename: File to run the checksums against 412 @type filename: String 413 @param hashes: A list of checksum functions to run against the file 414 @type hashname: List 415 @param calc_prelink: Whether or not to reverse prelink before running the checksum 416 @type calc_prelink: Integer 417 @rtype: Tuple 418 @return: A dictionary in the form: 419 return_value[hash_name] = (hash_result,size) 420 for each given checksum 421 """ 422 rVal = {} 423 for x in hashes: 424 if x not in hashfunc_map: 425 raise portage.exception.DigestException(x+" hash function not available (needs dev-python/pycrypto or >=dev-lang/python-2.5)") 426 rVal[x] = perform_checksum(filename, x, calc_prelink)[0] 427 return rVal
428