# (Be in -*- python -*- mode.) # # ==================================================================== # Copyright (c) 2000-2008 CollabNet. All rights reserved. # # This software is licensed as described in the file COPYING, which # you should have received as part of this distribution. The terms # are also available at http://subversion.tigris.org/license-1.html. # If newer versions of this license are posted there, you may use a # newer version instead, at your option. # # This software consists of voluntary contributions made by many # individuals. For exact contribution history, see the revision # history and logs, available at http://cvs2svn.tigris.org/. # ==================================================================== """Classes to manage Databases of fixed-length records. The databases map small, non-negative integers to fixed-size records. The records are written in index order to a disk file. Gaps in the index sequence leave gaps in the data file, so for best space efficiency the indexes of existing records should be approximately continuous. To use a RecordTable, you need a class derived from Packer which can serialize/deserialize your records into fixed-size strings. Deriving classes have to specify how to pack records into strings and unpack strings into records by overwriting the pack() and unpack() methods respectively. Note that these classes keep track of gaps in the records that have been written by filling them with packer.empty_value. If a record is read which contains packer.empty_value, then a KeyError is raised.""" import os import types import struct import mmap from cvs2svn_lib.common import DB_OPEN_READ from cvs2svn_lib.common import DB_OPEN_WRITE from cvs2svn_lib.common import DB_OPEN_NEW from cvs2svn_lib.log import Log # A unique value that can be used to stand for "unset" without # preventing the use of None. _unset = object() class Packer(object): def __init__(self, record_len, empty_value=None): self.record_len = record_len if empty_value is None: self.empty_value = '\0' * self.record_len else: assert type(empty_value) is types.StringType assert len(empty_value) == self.record_len self.empty_value = empty_value def pack(self, v): """Pack record V into a string of length self.record_len.""" raise NotImplementedError() def unpack(self, s): """Unpack string S into a record.""" raise NotImplementedError() class StructPacker(Packer): def __init__(self, format, empty_value=_unset): self.format = format if empty_value is not _unset: empty_value = self.pack(empty_value) else: empty_value = None Packer.__init__(self, struct.calcsize(self.format), empty_value=empty_value) def pack(self, v): return struct.pack(self.format, v) def unpack(self, v): return struct.unpack(self.format, v)[0] class UnsignedIntegerPacker(StructPacker): def __init__(self, empty_value=0): StructPacker.__init__(self, '=I', empty_value) class SignedIntegerPacker(StructPacker): def __init__(self, empty_value=0): StructPacker.__init__(self, '=i', empty_value) class FileOffsetPacker(Packer): """A packer suitable for file offsets. We store the 5 least significant bytes of the file offset. This is enough bits to represent 1 TiB. Of course if the computer doesn't have large file support, only the lowest 31 bits can be nonzero, and the offsets are limited to 2 GiB.""" # Convert file offsets to 8-bit little-endian unsigned longs... INDEX_FORMAT = '= self._max_memory_cache: self.flush() self._limit = max(self._limit, i + 1) def _get_packed_record(self, i): try: return self._cache[i][1] except KeyError: if not 0 <= i < self._limit_written: raise KeyError(i) self.f.seek(i * self._record_len) s = self.f.read(self._record_len) self._cache[i] = (False, s) if len(self._cache) >= self._max_memory_cache: self.flush() return s def close(self): self.flush() self._cache = None self.f.close() self.f = None class MmapRecordTable(AbstractRecordTable): GROWTH_INCREMENT = 65536 def __init__(self, filename, mode, packer): AbstractRecordTable.__init__(self, filename, mode, packer) if self.mode == DB_OPEN_NEW: self.python_file = open(self.filename, 'wb+') self.python_file.write('\0' * self.GROWTH_INCREMENT) self.python_file.flush() self._filesize = self.GROWTH_INCREMENT self.f = mmap.mmap( self.python_file.fileno(), self._filesize, access=mmap.ACCESS_WRITE ) # The index just beyond the last record ever written: self._limit = 0 elif self.mode == DB_OPEN_WRITE: self.python_file = open(self.filename, 'rb+') self._filesize = os.path.getsize(self.filename) self.f = mmap.mmap( self.python_file.fileno(), self._filesize, access=mmap.ACCESS_WRITE ) # The index just beyond the last record ever written: self._limit = os.path.getsize(self.filename) // self._record_len elif self.mode == DB_OPEN_READ: self.python_file = open(self.filename, 'rb') self._filesize = os.path.getsize(self.filename) self.f = mmap.mmap( self.python_file.fileno(), self._filesize, access=mmap.ACCESS_READ ) # The index just beyond the last record ever written: self._limit = os.path.getsize(self.filename) // self._record_len else: raise RuntimeError('Invalid mode %r' % self.mode) def flush(self): self.f.flush() def _set_packed_record(self, i, s): if self.mode == DB_OPEN_READ: raise RecordTableAccessError() if i < 0: raise KeyError() if i >= self._limit: # This write extends the range of valid indices. First check # whether the file has to be enlarged: new_size = (i + 1) * self._record_len if new_size > self._filesize: self._filesize = ( (new_size + self.GROWTH_INCREMENT - 1) // self.GROWTH_INCREMENT * self.GROWTH_INCREMENT ) self.f.resize(self._filesize) if i > self._limit: # Pad up to the new record with empty_value: self.f[self._limit * self._record_len:i * self._record_len] = \ self.packer.empty_value * (i - self._limit) self._limit = i + 1 self.f[i * self._record_len:(i + 1) * self._record_len] = s def _get_packed_record(self, i): if not 0 <= i < self._limit: raise KeyError(i) return self.f[i * self._record_len:(i + 1) * self._record_len] def close(self): self.flush() self.f.close() self.python_file.close()