Package portage :: Module checksum
[hide private]

Source Code for Module portage.checksum

  1  # checksum.py -- core Portage functionality 
  2  # Copyright 1998-2013 Gentoo Foundation 
  3  # Distributed under the terms of the GNU General Public License v2 
  4   
  5  import portage 
  6  from portage.const import PRELINK_BINARY,HASHING_BLOCKSIZE 
  7  from portage.localization import _ 
  8  from portage import os 
  9  from portage import _encodings 
 10  from portage import _unicode_encode 
 11  import errno 
 12  import stat 
 13  import sys 
 14  import subprocess 
 15  import tempfile 
 16   
 17  #dict of all available hash functions 
 18  hashfunc_map = {} 
 19  hashorigin_map = {} 
 20   
21 -def _open_file(filename):
22 try: 23 return open(_unicode_encode(filename, 24 encoding=_encodings['fs'], errors='strict'), 'rb') 25 except IOError as e: 26 func_call = "open('%s')" % filename 27 if e.errno == errno.EPERM: 28 raise portage.exception.OperationNotPermitted(func_call) 29 elif e.errno == errno.EACCES: 30 raise portage.exception.PermissionDenied(func_call) 31 elif e.errno == errno.ENOENT: 32 raise portage.exception.FileNotFound(filename) 33 else: 34 raise
35
36 -class _generate_hash_function(object):
37 38 __slots__ = ("_hashobject",) 39
40 - def __init__(self, hashtype, hashobject, origin="unknown"):
41 self._hashobject = hashobject 42 hashfunc_map[hashtype] = self 43 hashorigin_map[hashtype] = origin
44
45 - def __call__(self, filename):
46 """ 47 Run a checksum against a file. 48 49 @param filename: File to run the checksum against 50 @type filename: String 51 @return: The hash and size of the data 52 """ 53 with _open_file(filename) as f: 54 blocksize = HASHING_BLOCKSIZE 55 size = 0 56 checksum = self._hashobject() 57 data = f.read(blocksize) 58 while data: 59 checksum.update(data) 60 size = size + len(data) 61 data = f.read(blocksize) 62 63 return (checksum.hexdigest(), size)
64 65 # Define hash functions, try to use the best module available. Later definitions 66 # override earlier ones 67 68 # Use the internal modules as last fallback 69 try: 70 from hashlib import md5 as _new_md5 71 except ImportError: 72 from md5 import new as _new_md5 73 74 md5hash = _generate_hash_function("MD5", _new_md5, origin="internal") 75 76 try: 77 from hashlib import sha1 as _new_sha1 78 except ImportError: 79 from sha import new as _new_sha1 80 81 sha1hash = _generate_hash_function("SHA1", _new_sha1, origin="internal") 82 83 # Try to use mhash if available 84 # mhash causes GIL presently, so it gets less priority than hashlib and 85 # pycrypto. However, it might be the only accelerated implementation of 86 # WHIRLPOOL available. 87 try: 88 import mhash, functools 89 md5hash = _generate_hash_function("MD5", functools.partial(mhash.MHASH, mhash.MHASH_MD5), origin="mhash") 90 sha1hash = _generate_hash_function("SHA1", functools.partial(mhash.MHASH, mhash.MHASH_SHA1), origin="mhash") 91 sha256hash = _generate_hash_function("SHA256", functools.partial(mhash.MHASH, mhash.MHASH_SHA256), origin="mhash") 92 sha512hash = _generate_hash_function("SHA512", functools.partial(mhash.MHASH, mhash.MHASH_SHA512), origin="mhash") 93 for local_name, hash_name in (("rmd160", "ripemd160"), ("whirlpool", "whirlpool")): 94 if hasattr(mhash, 'MHASH_%s' % local_name.upper()): 95 globals()['%shash' % local_name] = \ 96 _generate_hash_function(local_name.upper(), \ 97 functools.partial(mhash.MHASH, getattr(mhash, 'MHASH_%s' % hash_name.upper())), \ 98 origin='mhash') 99 except ImportError: 100 pass 101 102 # Use pycrypto when available, prefer it over the internal fallbacks 103 # Check for 'new' attributes, since they can be missing if the module 104 # is broken somehow. 105 try: 106 from Crypto.Hash import SHA256, RIPEMD 107 sha256hash = getattr(SHA256, 'new', None) 108 if sha256hash is not None: 109 sha256hash = _generate_hash_function("SHA256", 110 sha256hash, origin="pycrypto") 111 rmd160hash = getattr(RIPEMD, 'new', None) 112 if rmd160hash is not None: 113 rmd160hash = _generate_hash_function("RMD160", 114 rmd160hash, origin="pycrypto") 115 except ImportError: 116 pass 117 118 # Use hashlib from python-2.5 if available and prefer it over pycrypto and internal fallbacks. 119 # Need special handling for RMD160/WHIRLPOOL as they may not always be provided by hashlib. 120 try: 121 import hashlib, functools 122 123 md5hash = _generate_hash_function("MD5", hashlib.md5, origin="hashlib") 124 sha1hash = _generate_hash_function("SHA1", hashlib.sha1, origin="hashlib") 125 sha256hash = _generate_hash_function("SHA256", hashlib.sha256, origin="hashlib") 126 sha512hash = _generate_hash_function("SHA512", hashlib.sha512, origin="hashlib") 127 for local_name, hash_name in (("rmd160", "ripemd160"), ("whirlpool", "whirlpool")): 128 try: 129 hashlib.new(hash_name) 130 except ValueError: 131 pass 132 else: 133 globals()['%shash' % local_name] = \ 134 _generate_hash_function(local_name.upper(), \ 135 functools.partial(hashlib.new, hash_name), \ 136 origin='hashlib') 137 138 except ImportError: 139 pass 140 141 _whirlpool_unaccelerated = False 142 if "WHIRLPOOL" not in hashfunc_map: 143 # Bundled WHIRLPOOL implementation 144 _whirlpool_unaccelerated = True 145 from portage.util.whirlpool import new as _new_whirlpool 146 whirlpoolhash = _generate_hash_function("WHIRLPOOL", _new_whirlpool, origin="bundled") 147 148 # Use python-fchksum if available, prefer it over all other MD5 implementations 149 try: 150 from fchksum import fmd5t as md5hash 151 hashfunc_map["MD5"] = md5hash 152 hashorigin_map["MD5"] = "python-fchksum" 153 154 except ImportError: 155 pass 156 157 # There is only one implementation for size
158 -def getsize(filename):
159 size = os.stat(filename).st_size 160 return (size, size)
161 hashfunc_map["size"] = getsize 162 163 # end actual hash functions 164 165 prelink_capable = False 166 if os.path.exists(PRELINK_BINARY): 167 cmd = [PRELINK_BINARY, "--version"] 168 cmd = [_unicode_encode(x, encoding=_encodings['fs'], errors='strict') 169 for x in cmd] 170 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, 171 stderr=subprocess.STDOUT) 172 proc.communicate() 173 status = proc.wait() 174 if os.WIFEXITED(status) and os.WEXITSTATUS(status) == os.EX_OK: 175 prelink_capable=1 176 del cmd, proc, status 177
178 -def is_prelinkable_elf(filename):
179 f = _open_file(filename) 180 try: 181 magic = f.read(17) 182 finally: 183 f.close() 184 return (len(magic) == 17 and magic.startswith(b'\x7fELF') and 185 magic[16] in (b'\x02', b'\x03')) # 2=ET_EXEC, 3=ET_DYN
186
187 -def perform_md5(x, calc_prelink=0):
188 return perform_checksum(x, "MD5", calc_prelink)[0]
189
190 -def _perform_md5_merge(x, **kwargs):
191 return perform_md5(_unicode_encode(x, 192 encoding=_encodings['merge'], errors='strict'), **kwargs)
193
194 -def perform_all(x, calc_prelink=0):
195 mydict = {} 196 for k in hashfunc_map: 197 mydict[k] = perform_checksum(x, k, calc_prelink)[0] 198 return mydict
199
200 -def get_valid_checksum_keys():
201 return list(hashfunc_map)
202
203 -def get_hash_origin(hashtype):
204 if hashtype not in hashfunc_map: 205 raise KeyError(hashtype) 206 return hashorigin_map.get(hashtype, "unknown")
207
208 -def _filter_unaccelarated_hashes(digests):
209 """ 210 If multiple digests are available and some are unaccelerated, 211 then return a new dict that omits the unaccelerated ones. This 212 allows extreme performance problems like bug #425046 to be 213 avoided whenever practical, especially for cases like stage 214 builds where acceleration may not be available for some hashes 215 due to minimization of dependencies. 216 """ 217 if _whirlpool_unaccelerated and "WHIRLPOOL" in digests: 218 verifiable_hash_types = set(digests).intersection(hashfunc_map) 219 verifiable_hash_types.discard("size") 220 if len(verifiable_hash_types) > 1: 221 digests = dict(digests) 222 digests.pop("WHIRLPOOL") 223 224 return digests
225
226 -class _hash_filter(object):
227 """ 228 Implements filtering for PORTAGE_CHECKSUM_FILTER. 229 """ 230 231 __slots__ = ('transparent', '_tokens',) 232
233 - def __init__(self, filter_str):
234 tokens = filter_str.upper().split() 235 if not tokens or tokens[-1] == "*": 236 del tokens[:] 237 self.transparent = not tokens 238 tokens.reverse() 239 self._tokens = tuple(tokens)
240
241 - def __call__(self, hash_name):
242 if self.transparent: 243 return True 244 matches = ("*", hash_name) 245 for token in self._tokens: 246 if token in matches: 247 return True 248 elif token[:1] == "-": 249 if token[1:] in matches: 250 return False 251 return False
252
253 -def _apply_hash_filter(digests, hash_filter):
254 """ 255 Return a new dict containing the filtered digests, or the same 256 dict if no changes are necessary. This will always preserve at 257 at least one digest, in order to ensure that they are not all 258 discarded. 259 @param digests: dictionary of digests 260 @type digests: dict 261 @param hash_filter: A callable that takes a single hash name 262 argument, and returns True if the hash is to be used or 263 False otherwise 264 @type hash_filter: callable 265 """ 266 267 verifiable_hash_types = set(digests).intersection(hashfunc_map) 268 verifiable_hash_types.discard("size") 269 modified = False 270 if len(verifiable_hash_types) > 1: 271 for k in list(verifiable_hash_types): 272 if not hash_filter(k): 273 modified = True 274 verifiable_hash_types.remove(k) 275 if len(verifiable_hash_types) == 1: 276 break 277 278 if modified: 279 digests = dict((k, v) for (k, v) in digests.items() 280 if k == "size" or k in verifiable_hash_types) 281 282 return digests
283
284 -def verify_all(filename, mydict, calc_prelink=0, strict=0):
285 """ 286 Verify all checksums against a file. 287 288 @param filename: File to run the checksums against 289 @type filename: String 290 @param calc_prelink: Whether or not to reverse prelink before running the checksum 291 @type calc_prelink: Integer 292 @param strict: Enable/Disable strict checking (which stops exactly at a checksum failure and throws an exception) 293 @type strict: Integer 294 @rtype: Tuple 295 @return: Result of the checks and possible message: 296 1) If size fails, False, and a tuple containing a message, the given size, and the actual size 297 2) If there is an os error, False, and a tuple containing the system error followed by 2 nulls 298 3) If a checksum fails, False and a tuple containing a message, the given hash, and the actual hash 299 4) If all checks succeed, return True and a fake reason 300 """ 301 # Dict relates to single file only. 302 # returns: (passed,reason) 303 file_is_ok = True 304 reason = "Reason unknown" 305 try: 306 mysize = os.stat(filename)[stat.ST_SIZE] 307 if mydict["size"] != mysize: 308 return False,(_("Filesize does not match recorded size"), mysize, mydict["size"]) 309 except OSError as e: 310 if e.errno == errno.ENOENT: 311 raise portage.exception.FileNotFound(filename) 312 return False, (str(e), None, None) 313 314 verifiable_hash_types = set(mydict).intersection(hashfunc_map) 315 verifiable_hash_types.discard("size") 316 if not verifiable_hash_types: 317 expected = set(hashfunc_map) 318 expected.discard("size") 319 expected = list(expected) 320 expected.sort() 321 expected = " ".join(expected) 322 got = set(mydict) 323 got.discard("size") 324 got = list(got) 325 got.sort() 326 got = " ".join(got) 327 return False, (_("Insufficient data for checksum verification"), got, expected) 328 329 for x in sorted(mydict): 330 if x == "size": 331 continue 332 elif x in hashfunc_map: 333 myhash = perform_checksum(filename, x, calc_prelink=calc_prelink)[0] 334 if mydict[x] != myhash: 335 if strict: 336 raise portage.exception.DigestException( 337 ("Failed to verify '$(file)s' on " + \ 338 "checksum type '%(type)s'") % \ 339 {"file" : filename, "type" : x}) 340 else: 341 file_is_ok = False 342 reason = (("Failed on %s verification" % x), myhash,mydict[x]) 343 break 344 return file_is_ok,reason
345
346 -def perform_checksum(filename, hashname="MD5", calc_prelink=0):
347 """ 348 Run a specific checksum against a file. The filename can 349 be either unicode or an encoded byte string. If filename 350 is unicode then a UnicodeDecodeError will be raised if 351 necessary. 352 353 @param filename: File to run the checksum against 354 @type filename: String 355 @param hashname: The type of hash function to run 356 @type hashname: String 357 @param calc_prelink: Whether or not to reverse prelink before running the checksum 358 @type calc_prelink: Integer 359 @rtype: Tuple 360 @return: The hash and size of the data 361 """ 362 global prelink_capable 363 # Make sure filename is encoded with the correct encoding before 364 # it is passed to spawn (for prelink) and/or the hash function. 365 filename = _unicode_encode(filename, 366 encoding=_encodings['fs'], errors='strict') 367 myfilename = filename 368 prelink_tmpfile = None 369 try: 370 if (calc_prelink and prelink_capable and 371 is_prelinkable_elf(filename)): 372 # Create non-prelinked temporary file to checksum. 373 # Files rejected by prelink are summed in place. 374 try: 375 tmpfile_fd, prelink_tmpfile = tempfile.mkstemp() 376 try: 377 retval = portage.process.spawn([PRELINK_BINARY, 378 "--verify", filename], fd_pipes={1:tmpfile_fd}) 379 finally: 380 os.close(tmpfile_fd) 381 if retval == os.EX_OK: 382 myfilename = prelink_tmpfile 383 except portage.exception.CommandNotFound: 384 # This happens during uninstallation of prelink. 385 prelink_capable = False 386 try: 387 if hashname not in hashfunc_map: 388 raise portage.exception.DigestException(hashname + \ 389 " hash function not available (needs dev-python/pycrypto)") 390 myhash, mysize = hashfunc_map[hashname](myfilename) 391 except (OSError, IOError) as e: 392 if e.errno in (errno.ENOENT, errno.ESTALE): 393 raise portage.exception.FileNotFound(myfilename) 394 elif e.errno == portage.exception.PermissionDenied.errno: 395 raise portage.exception.PermissionDenied(myfilename) 396 raise 397 return myhash, mysize 398 finally: 399 if prelink_tmpfile: 400 try: 401 os.unlink(prelink_tmpfile) 402 except OSError as e: 403 if e.errno != errno.ENOENT: 404 raise 405 del e
406
407 -def perform_multiple_checksums(filename, hashes=["MD5"], calc_prelink=0):
408 """ 409 Run a group of checksums against a file. 410 411 @param filename: File to run the checksums against 412 @type filename: String 413 @param hashes: A list of checksum functions to run against the file 414 @type hashname: List 415 @param calc_prelink: Whether or not to reverse prelink before running the checksum 416 @type calc_prelink: Integer 417 @rtype: Tuple 418 @return: A dictionary in the form: 419 return_value[hash_name] = (hash_result,size) 420 for each given checksum 421 """ 422 rVal = {} 423 for x in hashes: 424 if x not in hashfunc_map: 425 raise portage.exception.DigestException(x+" hash function not available (needs dev-python/pycrypto or >=dev-lang/python-2.5)") 426 rVal[x] = perform_checksum(filename, x, calc_prelink)[0] 427 return rVal
428