diff --git a/.gitignore b/.gitignore
index eec80860b..8dd671052 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@
/dist
/doc/_build
nbproject
+.nosebazinga
diff --git a/.gitmodules b/.gitmodules
index 83a5207ef..8535685a3 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,4 +1,6 @@
-[submodule "gitdb"]
- path = git/ext/gitdb
- url = git://github.com/gitpython-developers/gitdb.git
- branch = master
+[submodule "async"]
+ path = git/ext/async
+ url = git://github.com/gitpython-developers/async.git
+[submodule "smmap"]
+ path = git/ext/smmap
+ url = git://github.com/Byron/smmap.git
diff --git a/doc/source/changes.rst b/doc/source/changes.rst
index 2433d00ee..ad0f2530e 100644
--- a/doc/source/changes.rst
+++ b/doc/source/changes.rst
@@ -4,8 +4,36 @@ Changelog
NEXT
====
-* Blob Type
- * Added mode constants to ease the manual creation of blobs
+* ### Class Renames ###
+
+ * Renamed **GitCmdObjectDB** to **CmdGitDB** (analogue to **PureCmdDB**)
+
+* ### Interface Changes ###
+
+ * **SymbolicReference**
+
+ * object_binsha property added
+
+ * **Blob** Type
+
+ * Added mode constants to ease the manual creation of blobs
+
+ * **Repo** (i.e. **HighLevelRepository**) now supports a progress instance to be provided for its ``clone()`` and ``clone_from`` methods.
+
+* ### Module Changes ###
+
+ * Removed rev_parse function from git.repo.fun - the respective functionality is available only through the repository's rev_parse method, which might in turn translate to any implementation.
+
+* ### Git Cmd ###
+
+ * Added ``version_info`` property to git command, returning a tuple of version numbers.
+ * Added GIT_PYTHON_GIT_EXECUTABLE environment variable, which can be used to set the desired git executable to be used. despite of what would be found in the path.
+ * GIT_PYTHON_TRACE is now set on class level of the Git type, previously it was a module level global variable.
+ * GIT_PYTHON_GIT_EXECUTABLE is a class level variable as well.
+
+* ### Exceptions ###
+
+ * There is a new common base for all exceptions git-python will throw, namely `GitPythonError`.
0.3.1 Beta 2
============
diff --git a/doc/source/tutorial.rst b/doc/source/tutorial.rst
index 5530cedd6..4dcfad4aa 100644
--- a/doc/source/tutorial.rst
+++ b/doc/source/tutorial.rst
@@ -412,6 +412,20 @@ The special notion ``git.command(flag=True)`` will create a flag without value l
If ``None`` is found in the arguments, it will be dropped silently. Lists and tuples passed as arguments will be unpacked recursively to individual arguments. Objects are converted to strings using the str(...) function.
+Git Command Debugging and Customization
+***************************************
+
+Using environment variables, you can further adjust the behaviour of the git command.
+
+* **GIT_PYTHON_TRACE**
+
+ * If set to non-0, all executed git commands will be printed to stdout.
+ * if set to *full*, the executed git command will be printed along with its output.
+
+* **GIT_PYTHON_GIT_EXECUTABLE**
+
+ * If set, it should contain the full path to the git executable, e.g. *c:\\Program Files (x86)\\Git\\bin\\git.exe* on windows or */usr/bin/git* on linux.
+
And even more ...
*****************
diff --git a/git/__init__.py b/git/__init__.py
index 0658c3306..adc5487e6 100644
--- a/git/__init__.py
+++ b/git/__init__.py
@@ -14,13 +14,15 @@
#{ Initialization
def _init_externals():
"""Initialize external projects by putting them into the path"""
- sys.path.append(os.path.join(os.path.dirname(__file__), 'ext', 'gitdb'))
-
- try:
- import gitdb
- except ImportError:
- raise ImportError("'gitdb' could not be found in your PYTHONPATH")
- #END verify import
+ ext_base = os.path.join(os.path.dirname(__file__), 'ext')
+ for package in ('async', 'smmap'):
+ sys.path.append(os.path.join(ext_base, package))
+ try:
+ __import__(package)
+ except ImportError:
+ raise ImportError("%r could not be found in your PYTHONPATH" % package)
+ #END verify import
+ #END handle external import
#} END initialization
@@ -37,9 +39,9 @@ def _init_externals():
from git.exc import *
from git.db import *
from git.cmd import Git
-from git.repo import Repo
from git.remote import *
from git.index import *
+from git.repo import Repo
from git.util import (
LockFile,
BlockingLockFile,
diff --git a/git/base.py b/git/base.py
new file mode 100644
index 000000000..ff1062bf6
--- /dev/null
+++ b/git/base.py
@@ -0,0 +1,311 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Module with basic data structures - they are designed to be lightweight and fast"""
+from util import (
+ bin_to_hex,
+ zlib
+ )
+
+from fun import (
+ type_id_to_type_map,
+ type_to_type_id_map
+ )
+
+__all__ = ('OInfo', 'OPackInfo', 'ODeltaPackInfo',
+ 'OStream', 'OPackStream', 'ODeltaPackStream',
+ 'IStream', 'InvalidOInfo', 'InvalidOStream' )
+
+#{ ODB Bases
+
+class OInfo(tuple):
+ """Carries information about an object in an ODB, provding information
+ about the binary sha of the object, the type_string as well as the uncompressed size
+ in bytes.
+
+ It can be accessed using tuple notation and using attribute access notation::
+
+ assert dbi[0] == dbi.binsha
+ assert dbi[1] == dbi.type
+ assert dbi[2] == dbi.size
+
+ The type is designed to be as lighteight as possible."""
+ __slots__ = tuple()
+
+ def __new__(cls, sha, type, size):
+ return tuple.__new__(cls, (sha, type, size))
+
+ def __init__(self, *args):
+ tuple.__init__(self)
+
+ #{ Interface
+ @property
+ def binsha(self):
+ """:return: our sha as binary, 20 bytes"""
+ return self[0]
+
+ @property
+ def hexsha(self):
+ """:return: our sha, hex encoded, 40 bytes"""
+ return bin_to_hex(self[0])
+
+ @property
+ def type(self):
+ return self[1]
+
+ @property
+ def type_id(self):
+ return type_to_type_id_map[self[1]]
+
+ @property
+ def size(self):
+ return self[2]
+ #} END interface
+
+
+class OPackInfo(tuple):
+ """As OInfo, but provides a type_id property to retrieve the numerical type id, and
+ does not include a sha.
+
+ Additionally, the pack_offset is the absolute offset into the packfile at which
+ all object information is located. The data_offset property points to the abosolute
+ location in the pack at which that actual data stream can be found."""
+ __slots__ = tuple()
+
+ def __new__(cls, packoffset, type, size):
+ return tuple.__new__(cls, (packoffset,type, size))
+
+ def __init__(self, *args):
+ tuple.__init__(self)
+
+ #{ Interface
+
+ @property
+ def pack_offset(self):
+ return self[0]
+
+ @property
+ def type(self):
+ return type_id_to_type_map[self[1]]
+
+ @property
+ def type_id(self):
+ return self[1]
+
+ @property
+ def size(self):
+ return self[2]
+
+ #} END interface
+
+
+class ODeltaPackInfo(OPackInfo):
+ """Adds delta specific information,
+ Either the 20 byte sha which points to some object in the database,
+ or the negative offset from the pack_offset, so that pack_offset - delta_info yields
+ the pack offset of the base object"""
+ __slots__ = tuple()
+
+ def __new__(cls, packoffset, type, size, delta_info):
+ return tuple.__new__(cls, (packoffset, type, size, delta_info))
+
+ #{ Interface
+ @property
+ def delta_info(self):
+ return self[3]
+ #} END interface
+
+
+class OStream(OInfo):
+ """Base for object streams retrieved from the database, providing additional
+ information about the stream.
+ Generally, ODB streams are read-only as objects are immutable"""
+ __slots__ = tuple()
+
+ def __new__(cls, sha, type, size, stream, *args, **kwargs):
+ """Helps with the initialization of subclasses"""
+ return tuple.__new__(cls, (sha, type, size, stream))
+
+
+ def __init__(self, *args, **kwargs):
+ tuple.__init__(self)
+
+ #{ Stream Reader Interface
+
+ def read(self, size=-1):
+ return self[3].read(size)
+
+ @property
+ def stream(self):
+ return self[3]
+
+ #} END stream reader interface
+
+
+class ODeltaStream(OStream):
+ """Uses size info of its stream, delaying reads"""
+
+ def __new__(cls, sha, type, size, stream, *args, **kwargs):
+ """Helps with the initialization of subclasses"""
+ return tuple.__new__(cls, (sha, type, size, stream))
+
+ #{ Stream Reader Interface
+
+ @property
+ def size(self):
+ return self[3].size
+
+ #} END stream reader interface
+
+
+class OPackStream(OPackInfo):
+ """Next to pack object information, a stream outputting an undeltified base object
+ is provided"""
+ __slots__ = tuple()
+
+ def __new__(cls, packoffset, type, size, stream, *args):
+ """Helps with the initialization of subclasses"""
+ return tuple.__new__(cls, (packoffset, type, size, stream))
+
+ #{ Stream Reader Interface
+ def read(self, size=-1):
+ return self[3].read(size)
+
+ @property
+ def stream(self):
+ return self[3]
+ #} END stream reader interface
+
+
+class ODeltaPackStream(ODeltaPackInfo):
+ """Provides a stream outputting the uncompressed offset delta information"""
+ __slots__ = tuple()
+
+ def __new__(cls, packoffset, type, size, delta_info, stream):
+ return tuple.__new__(cls, (packoffset, type, size, delta_info, stream))
+
+
+ #{ Stream Reader Interface
+ def read(self, size=-1):
+ return self[4].read(size)
+
+ @property
+ def stream(self):
+ return self[4]
+ #} END stream reader interface
+
+
+class IStream(list):
+ """Represents an input content stream to be fed into the ODB. It is mutable to allow
+ the ODB to record information about the operations outcome right in this instance.
+
+ It provides interfaces for the OStream and a StreamReader to allow the instance
+ to blend in without prior conversion.
+
+ The only method your content stream must support is 'read'"""
+ __slots__ = tuple()
+
+ def __new__(cls, type, size, stream, sha=None):
+ return list.__new__(cls, (sha, type, size, stream, None))
+
+ def __init__(self, type, size, stream, sha=None):
+ list.__init__(self, (sha, type, size, stream, None))
+
+ #{ Interface
+ @property
+ def hexsha(self):
+ """:return: our sha, hex encoded, 40 bytes"""
+ return bin_to_hex(self[0])
+
+ def _error(self):
+ """:return: the error that occurred when processing the stream, or None"""
+ return self[4]
+
+ def _set_error(self, exc):
+ """Set this input stream to the given exc, may be None to reset the error"""
+ self[4] = exc
+
+ error = property(_error, _set_error)
+
+ #} END interface
+
+ #{ Stream Reader Interface
+
+ def read(self, size=-1):
+ """Implements a simple stream reader interface, passing the read call on
+ to our internal stream"""
+ return self[3].read(size)
+
+ #} END stream reader interface
+
+ #{ interface
+
+ def _set_binsha(self, binsha):
+ self[0] = binsha
+
+ def _binsha(self):
+ return self[0]
+
+ binsha = property(_binsha, _set_binsha)
+
+
+ def _type(self):
+ return self[1]
+
+ def _set_type(self, type):
+ self[1] = type
+
+ type = property(_type, _set_type)
+
+ def _size(self):
+ return self[2]
+
+ def _set_size(self, size):
+ self[2] = size
+
+ size = property(_size, _set_size)
+
+ def _stream(self):
+ return self[3]
+
+ def _set_stream(self, stream):
+ self[3] = stream
+
+ stream = property(_stream, _set_stream)
+
+ #} END odb info interface
+
+
+class InvalidOInfo(tuple):
+ """Carries information about a sha identifying an object which is invalid in
+ the queried database. The exception attribute provides more information about
+ the cause of the issue"""
+ __slots__ = tuple()
+
+ def __new__(cls, sha, exc):
+ return tuple.__new__(cls, (sha, exc))
+
+ def __init__(self, sha, exc):
+ tuple.__init__(self, (sha, exc))
+
+ @property
+ def binsha(self):
+ return self[0]
+
+ @property
+ def hexsha(self):
+ return bin_to_hex(self[0])
+
+ @property
+ def error(self):
+ """:return: exception instance explaining the failure"""
+ return self[1]
+
+
+class InvalidOStream(InvalidOInfo):
+ """Carries information about an invalid ODB stream"""
+ __slots__ = tuple()
+
+#} END ODB Bases
+
diff --git a/git/cmd.py b/git/cmd.py
index 60887f5da..576a5300a 100644
--- a/git/cmd.py
+++ b/git/cmd.py
@@ -5,7 +5,10 @@
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
import os, sys
-from util import *
+from util import (
+ LazyMixin,
+ stream_copy
+ )
from exc import GitCommandError
from subprocess import (
@@ -14,9 +17,6 @@
PIPE
)
-# Enables debugging of GitPython's git commands
-GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False)
-
execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output',
'with_exceptions', 'as_process',
'output_stream' )
@@ -26,7 +26,8 @@
def dashify(string):
return string.replace('_', '-')
-class Git(object):
+
+class Git(LazyMixin):
"""
The Git class manages communication with the Git binary.
@@ -41,12 +42,23 @@ class Git(object):
of the command to stdout.
Set its value to 'full' to see details about the returned values.
"""
- __slots__ = ("_working_dir", "cat_file_all", "cat_file_header")
+ __slots__ = ("_working_dir", "cat_file_all", "cat_file_header", "_version_info")
# CONFIGURATION
# The size in bytes read from stdout when copying git's output to another stream
max_chunk_size = 1024*64
+ git_exec_name = "git" # default that should work on linux and windows
+ git_exec_name_win = "git.cmd" # alternate command name, windows only
+
+ # Enables debugging of GitPython's git commands
+ GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False)
+
+ # Provide the full path to the git executable. Otherwise it assumes git is in the path
+ _git_exec_env_var = "GIT_PYTHON_GIT_EXECUTABLE"
+ GIT_PYTHON_GIT_EXECUTABLE = os.environ.get(_git_exec_env_var, git_exec_name)
+
+
class AutoInterrupt(object):
"""Kill/Interrupt the stored process instance once this instance goes out of scope. It is
used to prevent processes piling up in case iterators stop reading.
@@ -214,14 +226,32 @@ def __getattr__(self, name):
"""A convenience method as it allows to call the command as if it was
an object.
:return: Callable object that will execute call _call_process with your arguments."""
- if name[:1] == '_':
- raise AttributeError(name)
+ if name[0] == '_':
+ return LazyMixin.__getattr__(self, name)
return lambda *args, **kwargs: self._call_process(name, *args, **kwargs)
+ def _set_cache_(self, attr):
+ if attr == '_version_info':
+ # We only use the first 4 numbers, as everthing else could be strings in fact (on windows)
+ version_numbers = self._call_process('version').split(' ')[2]
+ self._version_info = tuple(int(n) for n in version_numbers.split('.')[:4])
+ else:
+ super(Git, self)._set_cache_(attr)
+ #END handle version info
+
+
@property
def working_dir(self):
""":return: Git directory we are working on"""
return self._working_dir
+
+ @property
+ def version_info(self):
+ """
+ :return: tuple(int, int, int, int) tuple with integers representing the major, minor
+ and additional version numbers as parsed from git version.
+ This value is generated on demand and is cached"""
+ return self._version_info
def execute(self, command,
istream=None,
@@ -290,7 +320,7 @@ def execute(self, command,
:note:
If you add additional keyword arguments to the signature of this method,
you must update the execute_kwargs tuple housed in this module."""
- if GIT_PYTHON_TRACE and not GIT_PYTHON_TRACE == 'full':
+ if self.GIT_PYTHON_TRACE and not self.GIT_PYTHON_TRACE == 'full':
print ' '.join(command)
# Allow the user to have the command executed in their working dir.
@@ -337,7 +367,7 @@ def execute(self, command,
proc.stdout.close()
proc.stderr.close()
- if GIT_PYTHON_TRACE == 'full':
+ if self.GIT_PYTHON_TRACE == 'full':
cmdstr = " ".join(command)
if stderr_value:
print "%s -> %d; stdout: '%s'; stderr: '%s'" % (cmdstr, status, stdout_value, stderr_value)
@@ -423,11 +453,40 @@ def _call_process(self, method, *args, **kwargs):
ext_args = self.__unpack_args([a for a in args if a is not None])
args = opt_args + ext_args
-
- call = ["git", dashify(method)]
- call.extend(args)
-
- return self.execute(call, **_kwargs)
+
+ def make_call():
+ call = [self.GIT_PYTHON_GIT_EXECUTABLE, dashify(method)]
+ call.extend(args)
+ return call
+ #END utility to recreate call after changes
+
+ if sys.platform == 'win32':
+ try:
+ try:
+ return self.execute(make_call(), **_kwargs)
+ except WindowsError:
+ # did we switch to git.cmd already, or was it changed from default ? permanently fail
+ if self.GIT_PYTHON_GIT_EXECUTABLE != self.git_exec_name:
+ raise
+ #END handle overridden variable
+ type(self).GIT_PYTHON_GIT_EXECUTABLE = self.git_exec_name_win
+ call = [self.GIT_PYTHON_GIT_EXECUTABLE] + list(args)
+
+ try:
+ return self.execute(make_call(), **_kwargs)
+ finally:
+ import warnings
+ msg = "WARNING: Automatically switched to use git.cmd as git executable, which reduces performance by ~70%."
+ msg += "Its recommended to put git.exe into the PATH or to set the %s environment variable to the executable's location" % self._git_exec_env_var
+ warnings.warn(msg)
+ #END print of warning
+ #END catch first failure
+ except WindowsError:
+ raise WindowsError("The system cannot find or execute the file at %r" % self.GIT_PYTHON_GIT_EXECUTABLE)
+ #END provide better error message
+ else:
+ return self.execute(make_call(), **_kwargs)
+ #END handle windows default installation
def _parse_object_header(self, header_line):
"""
diff --git a/git/config.py b/git/config.py
index f1a8832e1..c71bb8ca4 100644
--- a/git/config.py
+++ b/git/config.py
@@ -120,11 +120,12 @@ class GitConfigParser(cp.RawConfigParser, object):
# They must be compatible to the LockFile interface.
# A suitable alternative would be the BlockingLockFile
t_lock = LockFile
+ re_comment = re.compile('^\s*[#;]')
#} END configuration
OPTCRE = re.compile(
- r'\s?(?P[^:=\s][^:=]*)' # very permissive, incuding leading whitespace
+ r'\s*(?P [^:=\s][^:=]*)' # very permissive, incuding leading whitespace
r'\s*(?P[:=])\s*' # any number of space/tab,
# followed by separator
# (either : or =), followed
@@ -211,16 +212,16 @@ def _read(self, fp, fpname):
break
lineno = lineno + 1
# comment or blank line?
- if line.strip() == '' or line[0] in '#;':
+ if line.strip() == '' or self.re_comment.match(line):
continue
if line.split(None, 1)[0].lower() == 'rem' and line[0] in "rR":
# no leading whitespace
continue
else:
# is it a section header?
- mo = self.SECTCRE.match(line)
+ mo = self.SECTCRE.match(line.strip())
if mo:
- sectname = mo.group('header')
+ sectname = mo.group('header').strip()
if sectname in self._sections:
cursect = self._sections[sectname]
elif sectname == cp.DEFAULTSECT:
@@ -332,6 +333,10 @@ def write(self):
close_fp = True
else:
fp.seek(0)
+ # make sure we do not overwrite into an existing file
+ if hasattr(fp, 'truncate'):
+ fp.truncate()
+ #END
# END handle stream or file
# WRITE DATA
diff --git a/git/db.py b/git/db.py
deleted file mode 100644
index b1c653779..000000000
--- a/git/db.py
+++ /dev/null
@@ -1,61 +0,0 @@
-"""Module with our own gitdb implementation - it uses the git command"""
-from exc import (
- GitCommandError,
- BadObject
- )
-
-from gitdb.base import (
- OInfo,
- OStream
- )
-
-from gitdb.util import (
- bin_to_hex,
- hex_to_bin
- )
-from gitdb.db import GitDB
-from gitdb.db import LooseObjectDB
-
-
-__all__ = ('GitCmdObjectDB', 'GitDB' )
-
-#class GitCmdObjectDB(CompoundDB, ObjectDBW):
-class GitCmdObjectDB(LooseObjectDB):
- """A database representing the default git object store, which includes loose
- objects, pack files and an alternates file
-
- It will create objects only in the loose object database.
- :note: for now, we use the git command to do all the lookup, just until he
- have packs and the other implementations
- """
- def __init__(self, root_path, git):
- """Initialize this instance with the root and a git command"""
- super(GitCmdObjectDB, self).__init__(root_path)
- self._git = git
-
- def info(self, sha):
- hexsha, typename, size = self._git.get_object_header(bin_to_hex(sha))
- return OInfo(hex_to_bin(hexsha), typename, size)
-
- def stream(self, sha):
- """For now, all lookup is done by git itself"""
- hexsha, typename, size, stream = self._git.stream_object_data(bin_to_hex(sha))
- return OStream(hex_to_bin(hexsha), typename, size, stream)
-
-
- # { Interface
-
- def partial_to_complete_sha_hex(self, partial_hexsha):
- """:return: Full binary 20 byte sha from the given partial hexsha
- :raise AmbiguousObjectName:
- :raise BadObject:
- :note: currently we only raise BadObject as git does not communicate
- AmbiguousObjects separately"""
- try:
- hexsha, typename, size = self._git.get_object_header(partial_hexsha)
- return hex_to_bin(hexsha)
- except (GitCommandError, ValueError):
- raise BadObject(partial_hexsha)
- # END handle exceptions
-
- #} END interface
diff --git a/git/db/__init__.py b/git/db/__init__.py
new file mode 100644
index 000000000..25948326d
--- /dev/null
+++ b/git/db/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+
+from interface import *
diff --git a/git/db/cmd/__init__.py b/git/db/cmd/__init__.py
new file mode 100644
index 000000000..8a681e428
--- /dev/null
+++ b/git/db/cmd/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
diff --git a/git/db/cmd/base.py b/git/db/cmd/base.py
new file mode 100644
index 000000000..31e0221b5
--- /dev/null
+++ b/git/db/cmd/base.py
@@ -0,0 +1,862 @@
+"""module with git command implementations of the basic interfaces
+:note: we could add all implementations of the basic interfaces, its more efficient though
+ to obtain them from the pure implementation"""
+from git.exc import (
+ GitCommandError,
+ BadObject
+ )
+
+from git.base import (
+ OInfo,
+ OStream
+ )
+
+from git.util import (
+ bin_to_hex,
+ hex_to_bin,
+ isfile,
+ join_path,
+ join,
+ Actor,
+ IterableList,
+ )
+from git.db.interface import (
+ FetchInfo,
+ PushInfo,
+ HighLevelRepository,
+ TransportDB,
+ RemoteProgress
+ )
+from git.cmd import Git
+from git.refs import (
+ Reference,
+ RemoteReference,
+ SymbolicReference,
+ TagReference
+ )
+from git.objects.commit import Commit
+from cStringIO import StringIO
+import re
+import os
+import sys
+
+
+__all__ = ('CmdTransportMixin', 'GitCommandMixin', 'CmdPushInfo', 'CmdFetchInfo',
+ 'CmdRemoteProgress', 'CmdObjectDBRMixin', 'CmdHighLevelRepository')
+
+
+#{ Utilities
+
+def touch(filename):
+ fp = open(filename, "a")
+ fp.close()
+
+
+def digest_process_messages(fh, progress):
+ """Read progress messages from file-like object fh, supplying the respective
+ progress messages to the progress instance.
+
+ :return: list(line, ...) list of lines without linebreaks that did
+ not contain progress information"""
+ line_so_far = ''
+ dropped_lines = list()
+ while True:
+ char = fh.read(1)
+ if not char:
+ break
+
+ if char in ('\r', '\n'):
+ dropped_lines.extend(progress._parse_progress_line(line_so_far))
+ line_so_far = ''
+ else:
+ line_so_far += char
+ # END process parsed line
+ # END while file is not done reading
+ return dropped_lines
+
+def finalize_process(proc):
+ """Wait for the process (fetch, pull or push) and handle its errors accordingly"""
+ try:
+ proc.wait()
+ except GitCommandError, e:
+ # if a push has rejected items, the command has non-zero return status
+ # a return status of 128 indicates a connection error - reraise the previous one
+ if proc.poll() == 128:
+ raise
+ pass
+ # END exception handling
+
+
+def get_fetch_info_from_stderr(repo, proc, progress):
+ # skip first line as it is some remote info we are not interested in
+ output = IterableList('name')
+
+
+ # lines which are no progress are fetch info lines
+ # this also waits for the command to finish
+ # Skip some progress lines that don't provide relevant information
+ fetch_info_lines = list()
+ for line in digest_process_messages(proc.stderr, progress):
+ if line.startswith('From') or line.startswith('remote: Total'):
+ continue
+ elif line.startswith('warning:'):
+ print >> sys.stderr, line
+ continue
+ elif line.startswith('fatal:'):
+ raise GitCommandError(("Error when fetching: %s" % line,), 2)
+ # END handle special messages
+ fetch_info_lines.append(line)
+ # END for each line
+
+ # read head information
+ fp = open(join(repo.git_dir, 'FETCH_HEAD'), 'r')
+ fetch_head_info = fp.readlines()
+ fp.close()
+
+ assert len(fetch_info_lines) == len(fetch_head_info)
+
+ output.extend(CmdFetchInfo._from_line(repo, err_line, fetch_line)
+ for err_line, fetch_line in zip(fetch_info_lines, fetch_head_info))
+
+ finalize_process(proc)
+ return output
+
+def get_push_info(repo, remotename_or_url, proc, progress):
+ # read progress information from stderr
+ # we hope stdout can hold all the data, it should ...
+ # read the lines manually as it will use carriage returns between the messages
+ # to override the previous one. This is why we read the bytes manually
+ digest_process_messages(proc.stderr, progress)
+
+ output = IterableList('name')
+ for line in proc.stdout.readlines():
+ try:
+ output.append(CmdPushInfo._from_line(repo, remotename_or_url, line))
+ except ValueError:
+ # if an error happens, additional info is given which we cannot parse
+ pass
+ # END exception handling
+ # END for each line
+
+ finalize_process(proc)
+ return output
+
+def add_progress(kwargs, git, progress):
+ """Add the --progress flag to the given kwargs dict if supported by the
+ git command. If the actual progress in the given progress instance is not
+ given, we do not request any progress
+ :return: possibly altered kwargs"""
+ if progress._progress is not None:
+ v = git.version_info
+ if v[0] > 1 or v[1] > 7 or v[2] > 0 or v[3] > 3:
+ kwargs['progress'] = True
+ #END handle --progress
+ #END handle progress
+ return kwargs
+
+#} END utilities
+
+class CmdRemoteProgress(RemoteProgress):
+ """
+ A Remote progress implementation taking a user derived progress to call the
+ respective methods on.
+ """
+ __slots__ = ("_seen_ops", '_progress')
+ re_op_absolute = re.compile("(remote: )?([\w\s]+):\s+()(\d+)()(.*)")
+ re_op_relative = re.compile("(remote: )?([\w\s]+):\s+(\d+)% \((\d+)/(\d+)\)(.*)")
+
+ def __init__(self, progress_instance=None):
+ self._seen_ops = list()
+ if progress_instance is None:
+ progress_instance = RemoteProgress()
+ #END assure proper instance
+ self._progress = progress_instance
+
+ def _parse_progress_line(self, line):
+ """Parse progress information from the given line as retrieved by git-push
+ or git-fetch
+
+ Call the own update(), __call__() and line_dropped() methods according
+ to the parsed result.
+
+ :return: list(line, ...) list of lines that could not be processed"""
+ # handle
+ # Counting objects: 4, done.
+ # Compressing objects: 50% (1/2) \rCompressing objects: 100% (2/2) \rCompressing objects: 100% (2/2), done.
+ sub_lines = line.split('\r')
+ failed_lines = list()
+ for sline in sub_lines:
+ # find esacpe characters and cut them away - regex will not work with
+ # them as they are non-ascii. As git might expect a tty, it will send them
+ last_valid_index = None
+ for i, c in enumerate(reversed(sline)):
+ if ord(c) < 32:
+ # its a slice index
+ last_valid_index = -i - 1
+ # END character was non-ascii
+ # END for each character in sline
+ if last_valid_index is not None:
+ sline = sline[:last_valid_index]
+ # END cut away invalid part
+ sline = sline.rstrip()
+
+ cur_count, max_count = None, None
+ match = self.re_op_relative.match(sline)
+ if match is None:
+ match = self.re_op_absolute.match(sline)
+
+ if not match:
+ self._progress.line_dropped(sline)
+ failed_lines.append(sline)
+ continue
+ # END could not get match
+
+ op_code = 0
+ remote, op_name, percent, cur_count, max_count, message = match.groups()
+
+ # get operation id
+ if op_name == "Counting objects":
+ op_code |= self.COUNTING
+ elif op_name == "Compressing objects":
+ op_code |= self.COMPRESSING
+ elif op_name == "Writing objects":
+ op_code |= self.WRITING
+ elif op_name == "Receiving objects":
+ op_code |= self.RECEIVING
+ elif op_name == "Resolving deltas":
+ op_code |= self.RESOLVING
+ else:
+ # Note: On windows it can happen that partial lines are sent
+ # Hence we get something like "CompreReceiving objects", which is
+ # a blend of "Compressing objects" and "Receiving objects".
+ # This can't really be prevented, so we drop the line verbosely
+ # to make sure we get informed in case the process spits out new
+ # commands at some point.
+ self.line_dropped(sline)
+ sys.stderr.write("Operation name %r unknown - skipping line '%s'" % (op_name, sline))
+ # Note: Don't add this line to the failed lines, as we have to silently
+ # drop it
+ return failed_lines
+ #END handle opcode
+
+ # figure out stage
+ if op_code not in self._seen_ops:
+ self._seen_ops.append(op_code)
+ op_code |= self.BEGIN
+ # END begin opcode
+
+ if message is None:
+ message = ''
+ # END message handling
+
+ message = message.strip()
+ done_token = ', done.'
+ if message.endswith(done_token):
+ op_code |= self.END
+ message = message[:-len(done_token)]
+ # END end message handling
+
+ self._progress.update(op_code, cur_count, max_count, message, line)
+ self._progress(message, line)
+ # END for each sub line
+ return failed_lines
+
+
+class CmdPushInfo(PushInfo):
+ """
+ Pure Python implementation of a PushInfo interface
+ """
+ __slots__ = ('local_ref', 'remote_ref_string', 'flags', 'old_commit_binsha',
+ '_remotename_or_url', 'repo', 'summary')
+
+ _flag_map = { 'X' : PushInfo.NO_MATCH,
+ '-' : PushInfo.DELETED, '*' : 0,
+ '+' : PushInfo.FORCED_UPDATE,
+ ' ' : PushInfo.FAST_FORWARD,
+ '=' : PushInfo.UP_TO_DATE,
+ '!' : PushInfo.ERROR }
+
+ def __init__(self, flags, local_ref, remote_ref_string, repo, remotename_or_url, old_commit_binsha=None,
+ summary=''):
+ """ Initialize a new instance """
+ self.flags = flags
+ self.local_ref = local_ref
+ self.repo = repo
+ self.remote_ref_string = remote_ref_string
+ self._remotename_or_url = remotename_or_url
+ self.old_commit_binsha = old_commit_binsha
+ self.summary = summary
+
+ @property
+ def remote_ref(self):
+ """
+ :return:
+ Remote Reference or TagReference in the local repository corresponding
+ to the remote_ref_string kept in this instance."""
+ # translate heads to a local remote, tags stay as they are
+ if self.remote_ref_string.startswith("refs/tags"):
+ return TagReference(self.repo, self.remote_ref_string)
+ elif self.remote_ref_string.startswith("refs/heads"):
+ remote_ref = Reference(self.repo, self.remote_ref_string)
+ if '/' in self._remotename_or_url:
+ sys.stderr.write("Cannot provide RemoteReference instance if it was created from a url instead of of a remote name: %s. Returning Reference instance instead" % self._remotename_or_url)
+ return remote_ref
+ #END assert correct input
+ return RemoteReference(self.repo, "refs/remotes/%s/%s" % (str(self._remotename_or_url), remote_ref.name))
+ else:
+ raise ValueError("Could not handle remote ref: %r" % self.remote_ref_string)
+ # END
+
+ @classmethod
+ def _from_line(cls, repo, remotename_or_url, line):
+ """Create a new PushInfo instance as parsed from line which is expected to be like
+ refs/heads/master:refs/heads/master 05d2687..1d0568e"""
+ control_character, from_to, summary = line.split('\t', 3)
+ flags = 0
+
+ # control character handling
+ try:
+ flags |= cls._flag_map[ control_character ]
+ except KeyError:
+ raise ValueError("Control Character %r unknown as parsed from line %r" % (control_character, line))
+ # END handle control character
+
+ # from_to handling
+ from_ref_string, to_ref_string = from_to.split(':')
+ if flags & cls.DELETED:
+ from_ref = None
+ else:
+ from_ref = Reference.from_path(repo, from_ref_string)
+
+ # commit handling, could be message or commit info
+ old_commit_binsha = None
+ if summary.startswith('['):
+ if "[rejected]" in summary:
+ flags |= cls.REJECTED
+ elif "[remote rejected]" in summary:
+ flags |= cls.REMOTE_REJECTED
+ elif "[remote failure]" in summary:
+ flags |= cls.REMOTE_FAILURE
+ elif "[no match]" in summary:
+ flags |= cls.ERROR
+ elif "[new tag]" in summary:
+ flags |= cls.NEW_TAG
+ elif "[new branch]" in summary:
+ flags |= cls.NEW_HEAD
+ # uptodate encoded in control character
+ else:
+ # fast-forward or forced update - was encoded in control character,
+ # but we parse the old and new commit
+ split_token = "..."
+ if control_character == " ":
+ split_token = ".."
+ old_sha, new_sha = summary.split(' ')[0].split(split_token)
+ old_commit_binsha = repo.resolve(old_sha)
+ # END message handling
+
+ return cls(flags, from_ref, to_ref_string, repo, remotename_or_url, old_commit_binsha, summary)
+
+
+class CmdFetchInfo(FetchInfo):
+ """
+ Pure python implementation of a FetchInfo interface
+ """
+ __slots__ = ('ref', 'old_commit_binsha', 'flags', 'note')
+
+ # %c %-*s %-*s -> %s (%s)
+ re_fetch_result = re.compile("^\s*(.) (\[?[\w\s\.]+\]?)\s+(.+) -> ([/\w_\+\.-]+)( \(.*\)?$)?")
+
+ _flag_map = { '!' : FetchInfo.ERROR,
+ '+' : FetchInfo.FORCED_UPDATE,
+ '-' : FetchInfo.TAG_UPDATE,
+ '*' : 0,
+ '=' : FetchInfo.HEAD_UPTODATE,
+ ' ' : FetchInfo.FAST_FORWARD }
+
+ def __init__(self, ref, flags, note='', old_commit_binsha=None):
+ """
+ Initialize a new instance
+ """
+ self.ref = ref
+ self.flags = flags
+ self.note = note
+ self.old_commit_binsha = old_commit_binsha
+
+ def __str__(self):
+ return self.name
+
+ @property
+ def name(self):
+ """:return: Name of our remote ref"""
+ return self.ref.name
+
+ @property
+ def commit(self):
+ """:return: Commit of our remote ref"""
+ return self.ref.commit
+
+ @classmethod
+ def _from_line(cls, repo, line, fetch_line):
+ """Parse information from the given line as returned by git-fetch -v
+ and return a new CmdFetchInfo object representing this information.
+
+ We can handle a line as follows
+ "%c %-*s %-*s -> %s%s"
+
+ Where c is either ' ', !, +, -, *, or =
+ ! means error
+ + means success forcing update
+ - means a tag was updated
+ * means birth of new branch or tag
+ = means the head was up to date ( and not moved )
+ ' ' means a fast-forward
+
+ fetch line is the corresponding line from FETCH_HEAD, like
+ acb0fa8b94ef421ad60c8507b634759a472cd56c not-for-merge branch '0.1.7RC' of /tmp/tmpya0vairemote_repo"""
+ match = cls.re_fetch_result.match(line)
+ if match is None:
+ raise ValueError("Failed to parse line: %r" % line)
+
+ # parse lines
+ control_character, operation, local_remote_ref, remote_local_ref, note = match.groups()
+ try:
+ new_hex_sha, fetch_operation, fetch_note = fetch_line.split("\t")
+ ref_type_name, fetch_note = fetch_note.split(' ', 1)
+ except ValueError: # unpack error
+ raise ValueError("Failed to parse FETCH__HEAD line: %r" % fetch_line)
+
+ # handle FETCH_HEAD and figure out ref type
+ # If we do not specify a target branch like master:refs/remotes/origin/master,
+ # the fetch result is stored in FETCH_HEAD which destroys the rule we usually
+ # have. In that case we use a symbolic reference which is detached
+ ref_type = None
+ if remote_local_ref == "FETCH_HEAD":
+ ref_type = SymbolicReference
+ elif ref_type_name in ("remote-tracking", "branch"):
+ # note: remote-tracking is just the first part of the 'remote-tracking branch' token.
+ # We don't parse it correctly, but its enough to know what to do, and its new in git 1.7something
+ ref_type = RemoteReference
+ elif ref_type_name == "tag":
+ ref_type = TagReference
+ else:
+ raise TypeError("Cannot handle reference type: %r" % ref_type_name)
+ #END handle ref type
+
+ # create ref instance
+ if ref_type is SymbolicReference:
+ remote_local_ref = ref_type(repo, "FETCH_HEAD")
+ else:
+ # determine prefix. Tags are usually pulled into refs/tags, they may have subdirectories.
+ # It is not clear sometimes where exactly the item is, unless we have an absolute path as indicated
+ # by the 'ref/' prefix. Otherwise even a tag could be in refs/remotes, which is when it will have the
+ # 'tags/' subdirectory in its path.
+ # We don't want to test for actual existence, but try to figure everything out analytically.
+ ref_path = None
+ remote_local_ref = remote_local_ref.strip()
+ if remote_local_ref.startswith(Reference._common_path_default + "/"):
+ # always use actual type if we get absolute paths
+ # Will always be the case if something is fetched outside of refs/remotes (if its not a tag)
+ ref_path = remote_local_ref
+ if ref_type is not TagReference and not remote_local_ref.startswith(RemoteReference._common_path_default + "/"):
+ ref_type = Reference
+ #END downgrade remote reference
+ elif ref_type is TagReference and 'tags/' in remote_local_ref:
+ # even though its a tag, it is located in refs/remotes
+ ref_path = join_path(RemoteReference._common_path_default, remote_local_ref)
+ else:
+ ref_path = join_path(ref_type._common_path_default, remote_local_ref)
+ #END obtain refpath
+
+ # even though the path could be within the git conventions, we make
+ # sure we respect whatever the user wanted, and disabled path checking
+ remote_local_ref = ref_type(repo, ref_path, check_path=False)
+ # END create ref instance
+
+
+ note = (note and note.strip()) or ''
+
+ # parse flags from control_character
+ flags = 0
+ try:
+ flags |= cls._flag_map[control_character]
+ except KeyError:
+ raise ValueError("Control character %r unknown as parsed from line %r" % (control_character, line))
+ # END control char exception hanlding
+
+ # parse operation string for more info - makes no sense for symbolic refs
+ old_commit_binsha = None
+ if isinstance(remote_local_ref, Reference):
+ if 'rejected' in operation:
+ flags |= cls.REJECTED
+ if 'new tag' in operation:
+ flags |= cls.NEW_TAG
+ if 'new branch' in operation:
+ flags |= cls.NEW_HEAD
+ if '...' in operation or '..' in operation:
+ split_token = '...'
+ if control_character == ' ':
+ split_token = split_token[:-1]
+ old_commit_binsha = repo.resolve(operation.split(split_token)[0])
+ # END handle refspec
+ # END reference flag handling
+
+ return cls(remote_local_ref, flags, note, old_commit_binsha)
+
+
+class GitCommandMixin(object):
+ """A mixin to provide the git command object through the git property"""
+
+ def __init__(self, *args, **kwargs):
+ """Initialize this instance with the root and a git command"""
+ super(GitCommandMixin, self).__init__(*args, **kwargs)
+ self._git = Git(self.working_dir)
+
+ @property
+ def git(self):
+ return self._git
+
+
+class CmdObjectDBRMixin(object):
+ """A mixing implementing object reading through a git command
+ It will create objects only in the loose object database.
+ :note: for now, we use the git command to do all the lookup, just until he
+ have packs and the other implementations
+ """
+ #{ ODB Interface
+ # overrides from PureOdb Implementation, which is responsible only for writing
+ # objects
+ def info(self, sha):
+ hexsha, typename, size = self._git.get_object_header(bin_to_hex(sha))
+ return OInfo(hex_to_bin(hexsha), typename, size)
+
+ def stream(self, sha):
+ """For now, all lookup is done by git itself
+ :note: As we don't know when the stream is actually read (and if it is
+ stored for later use) we read the data rigth away and cache it.
+ This has HUGE performance implication, both for memory as for
+ reading/deserializing objects, but we have no other choice in order
+ to make the database behaviour consistent with other implementations !"""
+
+ hexsha, typename, size, data = self._git.get_object_data(bin_to_hex(sha))
+ return OStream(hex_to_bin(hexsha), typename, size, StringIO(data))
+
+ def partial_to_complete_sha_hex(self, partial_hexsha):
+ """:return: Full binary 20 byte sha from the given partial hexsha
+ :raise AmbiguousObjectName:
+ :raise BadObject:
+ :note: currently we only raise BadObject as git does not communicate
+ AmbiguousObjects separately"""
+ try:
+ hexsha, typename, size = self._git.get_object_header(partial_hexsha)
+ return hex_to_bin(hexsha)
+ except (GitCommandError, ValueError):
+ raise BadObject(partial_hexsha)
+ # END handle exceptions
+
+ #} END odb interface
+
+
+class CmdTransportMixin(TransportDB):
+ """A mixin requiring the .git property as well as repository paths
+
+ It will create objects only in the loose object database.
+ :note: for now, we use the git command to do all the lookup, just until he
+ have packs and the other implementations
+ """
+
+ #{ Transport DB interface
+
+ def push(self, url, refspecs=None, progress=None, **kwargs):
+ """Push given refspecs using the git default implementation
+ :param url: may be a remote name or a url
+ :param refspecs: single string, RefSpec instance or list of such or None.
+ :param progress: RemoteProgress derived instance or None
+ :param **kwargs: Additional arguments to be passed to the git-push process"""
+ progress = CmdRemoteProgress(progress)
+ proc = self._git.push(url, refspecs, porcelain=True, as_process=True, **add_progress(kwargs, self.git, progress))
+ return get_push_info(self, url, proc, progress)
+
+ def pull(self, url, refspecs=None, progress=None, **kwargs):
+ """Fetch and merge the given refspecs.
+ If not refspecs are given, the merge will only work properly if you
+ have setup upstream (tracking) branches.
+ :param url: may be a remote name or a url
+ :param refspecs: see push()
+ :param progress: see push()"""
+ progress = CmdRemoteProgress(progress)
+ proc = self._git.pull(url, refspecs, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, self.git, progress))
+ return get_fetch_info_from_stderr(self, proc, progress)
+
+ def fetch(self, url, refspecs=None, progress=None, **kwargs):
+ """Fetch the latest changes
+ :param url: may be a remote name or a url
+ :param refspecs: see push()
+ :param progress: see push()"""
+ progress = CmdRemoteProgress(progress)
+ proc = self._git.fetch(url, refspecs, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, self.git, progress))
+ return get_fetch_info_from_stderr(self, proc, progress)
+
+ #} end transport db interface
+
+
+class CmdHighLevelRepository(HighLevelRepository):
+ """An intermediate interface carrying advanced git functionality that can be used
+ in other comound repositories which do not implement this functionality themselves.
+
+ The mixin must be used with repositories compatible to the GitCommandMixin.
+
+ :note: at some point, methods provided here are supposed to be provided by custom interfaces"""
+ DAEMON_EXPORT_FILE = 'git-daemon-export-ok'
+
+ # precompiled regex
+ re_whitespace = re.compile(r'\s+')
+ re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$')
+ re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$')
+ re_author_committer_start = re.compile(r'^(author|committer)')
+ re_tab_full_line = re.compile(r'^\t(.*)$')
+
+ #{ Configuration
+ CommitCls = Commit
+ GitCls = Git
+ #} END configuration
+
+ def daemon_export():
+ def _get_daemon_export(self):
+ filename = join(self.git_dir, self.DAEMON_EXPORT_FILE)
+ return os.path.exists(filename)
+
+ def _set_daemon_export(self, value):
+ filename = join(self.git_dir, self.DAEMON_EXPORT_FILE)
+ fileexists = os.path.exists(filename)
+ if value and not fileexists:
+ touch(filename)
+ elif not value and fileexists:
+ os.unlink(filename)
+
+ return property(_get_daemon_export, _set_daemon_export,
+ doc="If True, git-daemon may export this repository")
+
+ daemon_export = daemon_export()
+
+ def is_dirty(self, index=True, working_tree=True, untracked_files=False):
+ if self._bare:
+ # Bare repositories with no associated working directory are
+ # always consired to be clean.
+ return False
+
+ # start from the one which is fastest to evaluate
+ default_args = ('--abbrev=40', '--full-index', '--raw')
+ if index:
+ # diff index against HEAD
+ if isfile(self.index.path) and self.head.is_valid() and \
+ len(self.git.diff('HEAD', '--cached', *default_args)):
+ return True
+ # END index handling
+ if working_tree:
+ # diff index against working tree
+ if len(self.git.diff(*default_args)):
+ return True
+ # END working tree handling
+ if untracked_files:
+ if len(self.untracked_files):
+ return True
+ # END untracked files
+ return False
+
+ @property
+ def untracked_files(self):
+ # make sure we get all files, no only untracked directores
+ proc = self.git.status(untracked_files=True, as_process=True)
+ stream = iter(proc.stdout)
+ untracked_files = list()
+ for line in stream:
+ if not line.startswith("# Untracked files:"):
+ continue
+ # skip two lines
+ stream.next()
+ stream.next()
+
+ for untracked_info in stream:
+ if not untracked_info.startswith("#\t"):
+ break
+ untracked_files.append(untracked_info.replace("#\t", "").rstrip())
+ # END for each utracked info line
+ # END for each line
+ return untracked_files
+
+ def blame(self, file):
+ data = self.git.blame(file, p=True, w=True)
+ commits = dict()
+ blames = list()
+ info = None
+
+ for line in data.splitlines(False):
+ parts = self.re_whitespace.split(line, 1)
+ firstpart = parts[0]
+ if self.re_hexsha_only.search(firstpart):
+ # handles
+ # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start
+ # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2
+ digits = parts[-1].split(" ")
+ if len(digits) == 3:
+ info = {'id': firstpart}
+ blames.append([None, []])
+ elif info['id'] != firstpart:
+ info = {'id': firstpart}
+ blames.append([commits.get(firstpart), []])
+ # END blame data initialization
+ else:
+ m = self.re_author_committer_start.search(firstpart)
+ if m:
+ # handles:
+ # author Tom Preston-Werner
+ # author-mail
+ # author-time 1192271832
+ # author-tz -0700
+ # committer Tom Preston-Werner
+ # committer-mail
+ # committer-time 1192271832
+ # committer-tz -0700 - IGNORED BY US
+ role = m.group(0)
+ if firstpart.endswith('-mail'):
+ info["%s_email" % role] = parts[-1]
+ elif firstpart.endswith('-time'):
+ info["%s_date" % role] = int(parts[-1])
+ elif role == firstpart:
+ info[role] = parts[-1]
+ # END distinguish mail,time,name
+ else:
+ # handle
+ # filename lib/grit.rb
+ # summary add Blob
+ #
+ if firstpart.startswith('filename'):
+ info['filename'] = parts[-1]
+ elif firstpart.startswith('summary'):
+ info['summary'] = parts[-1]
+ elif firstpart == '':
+ if info:
+ sha = info['id']
+ c = commits.get(sha)
+ if c is None:
+ c = self.CommitCls(self, hex_to_bin(sha),
+ author=Actor._from_string(info['author'] + ' ' + info['author_email']),
+ authored_date=info['author_date'],
+ committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']),
+ committed_date=info['committer_date'],
+ message=info['summary'])
+ commits[sha] = c
+ # END if commit objects needs initial creation
+ m = self.re_tab_full_line.search(line)
+ text, = m.groups()
+ blames[-1][0] = c
+ blames[-1][1].append(text)
+ info = { 'id' : sha }
+ # END if we collected commit info
+ # END distinguish filename,summary,rest
+ # END distinguish author|committer vs filename,summary,rest
+ # END distinguish hexsha vs other information
+ return blames
+
+ @classmethod
+ def init(cls, path=None, mkdir=True, **kwargs):
+ """
+ :parm kwargs:
+ keyword arguments serving as additional options to the git-init command
+
+ For more information, see the respective docs of HighLevelRepository"""
+
+ if mkdir and path and not os.path.exists(path):
+ os.makedirs(path, 0755)
+
+ # git command automatically chdir into the directory
+ git = cls.GitCls(path)
+ output = git.init(**kwargs)
+ return cls(path)
+
+ @classmethod
+ def _clone(cls, git, url, path, progress, **kwargs):
+ # special handling for windows for path at which the clone should be
+ # created.
+ # tilde '~' will be expanded to the HOME no matter where the ~ occours. Hence
+ # we at least give a proper error instead of letting git fail
+ prev_cwd = None
+ prev_path = None
+ if os.name == 'nt':
+ if '~' in path:
+ raise OSError("Git cannot handle the ~ character in path %r correctly" % path)
+
+ # on windows, git will think paths like c: are relative and prepend the
+ # current working dir ( before it fails ). We temporarily adjust the working
+ # dir to make this actually work
+ match = re.match("(\w:[/\\\])(.*)", path)
+ if match:
+ prev_cwd = os.getcwd()
+ prev_path = path
+ drive, rest_of_path = match.groups()
+ os.chdir(drive)
+ path = rest_of_path
+ kwargs['with_keep_cwd'] = True
+ # END cwd preparation
+ # END windows handling
+
+ try:
+ proc = git.clone(url, path, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, git, progress))
+ if progress is not None:
+ digest_process_messages(proc.stderr, progress)
+ #END digest progress messages
+ finalize_process(proc)
+ finally:
+ if prev_cwd is not None:
+ os.chdir(prev_cwd)
+ path = prev_path
+ # END reset previous working dir
+ # END bad windows handling
+
+ # our git command could have a different working dir than our actual
+ # environment, hence we prepend its working dir if required
+ if not os.path.isabs(path) and git.working_dir:
+ path = join(git._working_dir, path)
+
+ # adjust remotes - there may be operating systems which use backslashes,
+ # These might be given as initial paths, but when handling the config file
+ # that contains the remote from which we were clones, git stops liking it
+ # as it will escape the backslashes. Hence we undo the escaping just to be
+ # sure
+ repo = cls(os.path.abspath(path))
+ if repo.remotes:
+ repo.remotes[0].config_writer.set_value('url', repo.remotes[0].url.replace("\\\\", "\\").replace("\\", "/"))
+ # END handle remote repo
+ return repo
+
+ def clone(self, path, progress=None, **kwargs):
+ """
+ :param kwargs:
+ All remaining keyword arguments are given to the git-clone command
+
+ For more information, see the respective method in HighLevelRepository"""
+ return self._clone(self.git, self.git_dir, path, CmdRemoteProgress(progress), **kwargs)
+
+ @classmethod
+ def clone_from(cls, url, to_path, progress=None, **kwargs):
+ """
+ :param kwargs: see the ``clone`` method
+ For more information, see the respective method in the HighLevelRepository"""
+ return cls._clone(cls.GitCls(os.getcwd()), url, to_path, CmdRemoteProgress(progress), **kwargs)
+
+ def archive(self, ostream, treeish=None, prefix=None, **kwargs):
+ """For all args see HighLevelRepository interface
+ :parm kwargs:
+ Additional arguments passed to git-archive
+ NOTE: Use the 'format' argument to define the kind of format. Use
+ specialized ostreams to write any format supported by python
+
+ :raise GitCommandError: in case something went wrong"""
+ if treeish is None:
+ treeish = self.head.commit
+ if prefix and 'prefix' not in kwargs:
+ kwargs['prefix'] = prefix
+ kwargs['output_stream'] = ostream
+
+ self.git.archive(treeish, **kwargs)
+ return self
diff --git a/git/db/cmd/complex.py b/git/db/cmd/complex.py
new file mode 100644
index 000000000..49e8c590a
--- /dev/null
+++ b/git/db/cmd/complex.py
@@ -0,0 +1,16 @@
+"""Module with our own git implementation - it uses the git command"""
+
+from git.db.compat import RepoCompatibilityInterface
+from base import *
+
+
+__all__ = ['CmdPartialGitDB']
+
+
+class CmdPartialGitDB( GitCommandMixin, CmdObjectDBRMixin, CmdTransportMixin,
+ CmdHighLevelRepository ):
+ """Utility repository which only partially implements all required methods.
+ It cannot be reliably used alone, but is provided to allow mixing it with other
+ implementations"""
+ pass
+
diff --git a/git/db/compat.py b/git/db/compat.py
new file mode 100644
index 000000000..771a1e770
--- /dev/null
+++ b/git/db/compat.py
@@ -0,0 +1,45 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Module providing adaptors to maintain backwards compatability"""
+
+class RepoCompatibilityInterfaceNoBare(object):
+ """Interface to install backwards compatability of the new complex repository
+ types with the previous, all in one, repository."""
+
+ def rev_parse(self, *args, **kwargs):
+ return self.resolve_object(*args, **kwargs)
+
+ @property
+ def odb(self):
+ """The odb is now an integrated part of each repository"""
+ return self
+
+ @property
+ def active_branch(self):
+ """The name of the currently active branch.
+
+ :return: Head to the active branch"""
+ return self.head.reference
+
+ def __repr__(self):
+ """Return the representation of the repository, the way it used to be"""
+ return '' % self.git_dir
+
+ @property
+ def branches(self):
+ return self.heads
+
+
+class RepoCompatibilityInterface(RepoCompatibilityInterfaceNoBare):
+ """Interface to install backwards compatability of the new complex repository
+ types with the previous, all in one, repository."""
+
+ @property
+ def bare(self):
+ return self.is_bare
+
+ @property
+ def refs(self):
+ return self.references
diff --git a/git/db/complex.py b/git/db/complex.py
new file mode 100644
index 000000000..31b047a0b
--- /dev/null
+++ b/git/db/complex.py
@@ -0,0 +1,28 @@
+"""Module with many useful complex databases with different useful combinations of primary implementations"""
+
+from py.complex import PurePartialGitDB
+from cmd.complex import CmdPartialGitDB
+from compat import RepoCompatibilityInterface
+
+__all__ = ['CmdGitDB', 'PureGitDB', 'CmdCompatibilityGitDB', 'PureCompatibilityGitDB']
+
+class CmdGitDB(CmdPartialGitDB, PurePartialGitDB):
+ """A database which uses primarily the git command implementation, but falls back
+ to pure python where it is more feasible
+ :note: To assure consistent behaviour across implementations, when calling the
+ ``stream()`` method a cache is created. This makes this implementation a bad
+ choice when reading big files as these are streamed from memory in all cases."""
+
+class CmdCompatibilityGitDB(RepoCompatibilityInterface, CmdGitDB):
+ """A database which fills in its missing implementation using the pure python
+ implementation"""
+ pass
+
+class PureGitDB(PurePartialGitDB, CmdPartialGitDB):
+ """A repository which uses the pure implementation primarily, but falls back
+ on using the git command for high-level functionality"""
+
+class PureCompatibilityGitDB(RepoCompatibilityInterface, PureGitDB):
+ """Repository which uses the pure implementation primarily, but falls back
+ to the git command implementation. Please note that the CmdGitDB does it
+ the opposite way around."""
diff --git a/git/db/dulwich/__init__.py b/git/db/dulwich/__init__.py
new file mode 100644
index 000000000..92d30941f
--- /dev/null
+++ b/git/db/dulwich/__init__.py
@@ -0,0 +1,13 @@
+"""Dulwich module initialization"""
+
+def init_dulwich():
+ """:raise ImportError: if dulwich is not present"""
+ try:
+ import dulwich
+ except ImportError:
+ raise ImportError("Could not find 'dulwich' in the PYTHONPATH - dulwich functionality is not available")
+ #END handle dulwich import
+
+
+
+init_dulwich()
diff --git a/git/db/dulwich/complex.py b/git/db/dulwich/complex.py
new file mode 100644
index 000000000..ad5b97a4b
--- /dev/null
+++ b/git/db/dulwich/complex.py
@@ -0,0 +1,90 @@
+
+__all__ = ['DulwichGitODB', 'DulwichGitDB', 'DulwichCompatibilityGitDB']
+
+from git.db.py.complex import PureGitODB
+from git.db.py.base import (
+ PureRepositoryPathsMixin,
+ PureConfigurationMixin,
+ PureIndexDB,
+ )
+from git.db.py.resolve import PureReferencesMixin
+from git.db.py.transport import PureTransportDB
+from git.db.py.submodule import PureSubmoduleDB
+
+from git.db.cmd.complex import CmdHighLevelRepository, GitCommandMixin
+from git.db.compat import RepoCompatibilityInterfaceNoBare
+
+#from git.db.interface import ObjectDBW, ObjectDBR
+from dulwich.repo import Repo as DulwichRepo
+from dulwich.objects import ShaFile
+
+from git.base import OInfo, OStream
+from git.fun import type_id_to_type_map, type_to_type_id_map
+
+from cStringIO import StringIO
+import os
+
+
+class DulwichGitODB(PureGitODB):
+ """A full fledged database to read and write object files from all kinds of sources."""
+
+ def __init__(self, objects_root):
+ """Initalize this instance"""
+ PureGitODB.__init__(self, objects_root)
+ if hasattr(self, 'working_dir'):
+ wd = self.working_dir
+ else:
+ wd = os.path.dirname(os.path.dirname(objects_root))
+ #END try to figure out good entry for dulwich, which doesn't do an extensive search
+ self._dw_repo = DulwichRepo(wd)
+
+ def __getattr__(self, attr):
+ try:
+ # supply LazyMixin with this call first
+ return super(DulwichGitODB, self).__getattr__(attr)
+ except AttributeError:
+ # now assume its on the dulwich repository ... for now
+ return getattr(self._dw_repo, attr)
+ #END handle attr
+
+ #{ Object DBR
+
+ def info(self, binsha):
+ type_id, uncomp_data = self._dw_repo.object_store.get_raw(binsha)
+ return OInfo(binsha, type_id_to_type_map[type_id], len(uncomp_data))
+
+ def stream(self, binsha):
+ type_id, uncomp_data = self._dw_repo.object_store.get_raw(binsha)
+ return OStream(binsha, type_id_to_type_map[type_id], len(uncomp_data), StringIO(uncomp_data))
+
+ #}END object dbr
+
+ #{ Object DBW
+
+ def store(self, istream):
+ obj = ShaFile.from_raw_string(type_to_type_id_map[istream.type], istream.read())
+ self._dw_repo.object_store.add_object(obj)
+ istream.binsha = obj.sha().digest()
+ return istream
+
+ #}END object dbw
+
+class DulwichGitDB( PureRepositoryPathsMixin, PureConfigurationMixin,
+ PureReferencesMixin, PureSubmoduleDB,
+ PureIndexDB,
+ PureTransportDB, # not fully implemented
+ GitCommandMixin,
+ CmdHighLevelRepository,
+ DulwichGitODB): # must come last, as it doesn't pass on __init__ with super
+
+
+ def __init__(self, root_path):
+ """Initialize ourselves on the .git directory, or the .git/objects directory."""
+ PureRepositoryPathsMixin._initialize(self, root_path)
+ super(DulwichGitDB, self).__init__(self.objects_dir)
+
+
+class DulwichCompatibilityGitDB(RepoCompatibilityInterfaceNoBare, DulwichGitDB):
+ """Basic dulwich compatibility database"""
+ pass
+
diff --git a/git/db/interface.py b/git/db/interface.py
new file mode 100644
index 000000000..2ff44f261
--- /dev/null
+++ b/git/db/interface.py
@@ -0,0 +1,836 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Contains interfaces for basic database building blocks"""
+
+__all__ = ( 'ObjectDBR', 'ObjectDBW', 'RootPathDB', 'CompoundDB', 'CachingDB',
+ 'TransportDB', 'ConfigurationMixin', 'RepositoryPathsMixin',
+ 'RefSpec', 'FetchInfo', 'PushInfo', 'ReferencesMixin', 'SubmoduleDB',
+ 'IndexDB', 'HighLevelRepository')
+
+
+class ObjectDBR(object):
+ """Defines an interface for object database lookup.
+ Objects are identified either by their 20 byte bin sha"""
+
+ def __contains__(self, sha):
+ return self.has_obj(sha)
+
+ #{ Query Interface
+ def has_object(self, sha):
+ """
+ :return: True if the object identified by the given 20 bytes
+ binary sha is contained in the database"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def has_object_async(self, reader):
+ """Return a reader yielding information about the membership of objects
+ as identified by shas
+ :param reader: Reader yielding 20 byte shas.
+ :return: async.Reader yielding tuples of (sha, bool) pairs which indicate
+ whether the given sha exists in the database or not"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def info(self, sha):
+ """ :return: OInfo instance
+ :param sha: bytes binary sha
+ :raise BadObject:"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def info_async(self, reader):
+ """Retrieve information of a multitude of objects asynchronously
+ :param reader: Channel yielding the sha's of the objects of interest
+ :return: async.Reader yielding OInfo|InvalidOInfo, in any order"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def stream(self, sha):
+ """:return: OStream instance
+ :param sha: 20 bytes binary sha
+ :raise BadObject:"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def stream_async(self, reader):
+ """Retrieve the OStream of multiple objects
+ :param reader: see ``info``
+ :param max_threads: see ``ObjectDBW.store``
+ :return: async.Reader yielding OStream|InvalidOStream instances in any order
+ :note: depending on the system configuration, it might not be possible to
+ read all OStreams at once. Instead, read them individually using reader.read(x)
+ where x is small enough."""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def size(self):
+ """:return: amount of objects in this database"""
+ raise NotImplementedError()
+
+ def sha_iter(self):
+ """Return iterator yielding 20 byte shas for all objects in this data base"""
+ raise NotImplementedError()
+
+ def partial_to_complete_sha_hex(self, partial_hexsha):
+ """
+ :return: 20 byte binary sha1 from the given less-than-40 byte hexsha
+ :param partial_hexsha: hexsha with less than 40 byte
+ :raise AmbiguousObjectName: If multiple objects would match the given sha
+ :raies BadObject: If object was not found"""
+ raise NotImplementedError()
+
+ def partial_to_complete_sha(self, partial_binsha, canonical_length):
+ """:return: 20 byte sha as inferred by the given partial binary sha
+ :param partial_binsha: binary sha with less than 20 bytes
+ :param canonical_length: length of the corresponding canonical (hexadecimal) representation.
+ It is required as binary sha's cannot display whether the original hex sha
+ had an odd or even number of characters
+ :raise AmbiguousObjectName:
+ :raise BadObject: """
+ #} END query interface
+
+
+class ObjectDBW(object):
+ """Defines an interface to create objects in the database"""
+
+ #{ Edit Interface
+ def set_ostream(self, stream):
+ """
+ Adjusts the stream to which all data should be sent when storing new objects
+
+ :param stream: if not None, the stream to use, if None the default stream
+ will be used.
+ :return: previously installed stream, or None if there was no override
+ :raise TypeError: if the stream doesn't have the supported functionality"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def ostream(self):
+ """
+ :return: overridden output stream this instance will write to, or None
+ if it will write to the default stream"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def store(self, istream):
+ """
+ Create a new object in the database
+ :return: the input istream object with its sha set to its corresponding value
+
+ :param istream: IStream compatible instance. If its sha is already set
+ to a value, the object will just be stored in the our database format,
+ in which case the input stream is expected to be in object format ( header + contents ).
+ :raise IOError: if data could not be written"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def store_async(self, reader):
+ """
+ Create multiple new objects in the database asynchronously. The method will
+ return right away, returning an output channel which receives the results as
+ they are computed.
+
+ :return: Channel yielding your IStream which served as input, in any order.
+ The IStreams sha will be set to the sha it received during the process,
+ or its error attribute will be set to the exception informing about the error.
+
+ :param reader: async.Reader yielding IStream instances.
+ The same instances will be used in the output channel as were received
+ in by the Reader.
+
+ :note:As some ODB implementations implement this operation atomic, they might
+ abort the whole operation if one item could not be processed. Hence check how
+ many items have actually been produced."""
+ raise NotImplementedError("To be implemented in subclass")
+
+ #} END edit interface
+
+
+class RootPathDB(object):
+ """Provides basic facilities to retrieve files of interest"""
+
+ def __init__(self, root_path):
+ """Initialize this instance to look for its files at the given root path
+ All subsequent operations will be relative to this path
+ :raise InvalidDBRoot:
+ :note: The base will not perform any accessablity checking as the base
+ might not yet be accessible, but become accessible before the first
+ access."""
+ try:
+ super(RootPathDB, self).__init__(root_path)
+ except TypeError:
+ pass
+ # END handle py 2.6
+
+ #{ Interface
+ def root_path(self):
+ """:return: path at which this db operates"""
+ raise NotImplementedError()
+
+ def db_path(self, rela_path):
+ """
+ :return: the given relative path relative to our database root, allowing
+ to pontentially access datafiles
+ :param rela_path: if not None or '', the relative path will be appended
+ to the database root path. Otherwise you will obtain the database root path itself"""
+ raise NotImplementedError()
+ #} END interface
+
+
+class CachingDB(object):
+ """A database which uses caches to speed-up access"""
+
+ #{ Interface
+
+ def update_cache(self, force=False):
+ """
+ Call this method if the underlying data changed to trigger an update
+ of the internal caching structures.
+
+ :param force: if True, the update must be performed. Otherwise the implementation
+ may decide not to perform an update if it thinks nothing has changed.
+ :return: True if an update was performed as something change indeed"""
+
+ # END interface
+
+
+class CompoundDB(object):
+ """A database which delegates calls to sub-databases.
+ They should usually be cached and lazy-loaded"""
+
+ #{ Interface
+
+ def databases(self):
+ """:return: tuple of database instances we use for lookups"""
+ raise NotImplementedError()
+
+ #} END interface
+
+
+class IndexDB(object):
+ """A database which provides a flattened index to all objects in its currently
+ active tree."""
+ @property
+ def index(self):
+ """:return: IndexFile compatible instance"""
+ raise NotImplementedError()
+
+
+class RefSpec(object):
+ """A refspec is a simple container which provides information about the way
+ something should be fetched or pushed. It requires to use symbols to describe
+ the actual objects which is done using reference names (or respective instances
+ which resolve to actual reference names)."""
+ __slots__ = ('source', 'destination', 'force')
+
+ def __init__(self, source, destination, force=False):
+ """initalize the instance with the required values
+ :param source: reference name or instance. If None, the Destination
+ is supposed to be deleted."""
+ self.source = source
+ self.destination = destination
+ self.force = force
+ if self.destination is None:
+ raise ValueError("Destination must be set")
+
+ def __str__(self):
+ """:return: a git-style refspec"""
+ s = str(self.source)
+ if self.source is None:
+ s = ''
+ #END handle source
+ d = str(self.destination)
+ p = ''
+ if self.force:
+ p = '+'
+ #END handle force
+ res = "%s%s:%s" % (p, s, d)
+
+ def delete_destination(self):
+ return self.source is None
+
+
+class RemoteProgress(object):
+ """
+ Handler providing an interface to parse progress information emitted by git-push
+ and git-fetch and to dispatch callbacks allowing subclasses to react to the progress.
+
+ Subclasses should derive from this type.
+ """
+ _num_op_codes = 7
+ BEGIN, END, COUNTING, COMPRESSING, WRITING, RECEIVING, RESOLVING = [1 << x for x in range(_num_op_codes)]
+ STAGE_MASK = BEGIN|END
+ OP_MASK = ~STAGE_MASK
+
+ #{ Subclass Interface
+
+ def line_dropped(self, line):
+ """Called whenever a line could not be understood and was therefore dropped."""
+ pass
+
+ def update(self, op_code, cur_count, max_count=None, message='', input=''):
+ """Called whenever the progress changes
+
+ :param op_code:
+ Integer allowing to be compared against Operation IDs and stage IDs.
+
+ Stage IDs are BEGIN and END. BEGIN will only be set once for each Operation
+ ID as well as END. It may be that BEGIN and END are set at once in case only
+ one progress message was emitted due to the speed of the operation.
+ Between BEGIN and END, none of these flags will be set
+
+ Operation IDs are all held within the OP_MASK. Only one Operation ID will
+ be active per call.
+ :param cur_count: Current absolute count of items
+
+ :param max_count:
+ The maximum count of items we expect. It may be None in case there is
+ no maximum number of items or if it is (yet) unknown.
+
+ :param message:
+ In case of the 'WRITING' operation, it contains the amount of bytes
+ transferred. It may possibly be used for other purposes as well.
+
+ :param input:
+ The actual input string that was used to parse the information from.
+ This is usually a line from the output of git-fetch, but really
+ depends on the implementation
+
+ You may read the contents of the current line in self._cur_line"""
+ pass
+
+ def __call__(self, message, input=''):
+ """Same as update, but with a simpler interface which only provides the
+ message of the operation.
+ :note: This method will be called in addition to the update method. It is
+ up to you which one you implement"""
+ pass
+ #} END subclass interface
+
+
+class PushInfo(object):
+ """A type presenting information about the result of a push operation for exactly
+ one refspec
+
+ flags # bitflags providing more information about the result
+ local_ref # Reference pointing to the local reference that was pushed
+ # It is None if the ref was deleted.
+ remote_ref_string # path to the remote reference located on the remote side
+ remote_ref # Remote Reference on the local side corresponding to
+ # the remote_ref_string. It can be a TagReference as well.
+ old_commit_binsha # binary sha to commit at which the remote_ref was standing before we pushed
+ # it to local_ref.commit. Will be None if an error was indicated
+ summary # summary line providing human readable english text about the push
+ """
+ __slots__ = tuple()
+
+ NEW_TAG, NEW_HEAD, NO_MATCH, REJECTED, REMOTE_REJECTED, REMOTE_FAILURE, DELETED, \
+ FORCED_UPDATE, FAST_FORWARD, UP_TO_DATE, ERROR = [ 1 << x for x in range(11) ]
+
+
+class FetchInfo(object):
+ """A type presenting information about the fetch operation on exactly one refspec
+
+ The following members are defined:
+ ref # name of the reference to the changed
+ # remote head or FETCH_HEAD. Implementations can provide
+ # actual class instance which convert to a respective string
+ flags # additional flags to be & with enumeration members,
+ # i.e. info.flags & info.REJECTED
+ # is 0 if ref is FETCH_HEAD
+ note # additional notes given by the fetch-pack implementation intended for the user
+ old_commit_binsha# if info.flags & info.FORCED_UPDATE|info.FAST_FORWARD,
+ # field is set to the previous location of ref as binary sha or None"""
+ __slots__ = tuple()
+
+ NEW_TAG, NEW_HEAD, HEAD_UPTODATE, TAG_UPDATE, REJECTED, FORCED_UPDATE, \
+ FAST_FORWARD, ERROR = [ 1 << x for x in range(8) ]
+
+
+class TransportDB(object):
+ """A database which allows to transport objects from and to different locations
+ which are specified by urls (location) and refspecs (what to transport,
+ see http://www.kernel.org/pub/software/scm/git/docs/git-fetch.html).
+
+ At the beginning of a transport operation, it will be determined which objects
+ have to be sent (either by this or by the other side).
+
+ Afterwards a pack with the required objects is sent (or received). If there is
+ nothing to send, the pack will be empty.
+
+ As refspecs involve symbolic names for references to be handled, we require
+ RefParse functionality. How this is done is up to the actual implementation."""
+ # The following variables need to be set by the derived class
+
+ #{ Interface
+
+ def fetch(self, url, refspecs, progress=None, **kwargs):
+ """Fetch the objects defined by the given refspec from the given url.
+ :param url: url identifying the source of the objects. It may also be
+ a symbol from which the respective url can be resolved, like the
+ name of the remote. The implementation should allow objects as input
+ as well, these are assumed to resovle to a meaningful string though.
+ :param refspecs: iterable of reference specifiers or RefSpec instance,
+ identifying the references to be fetch from the remote.
+ :param progress: RemoteProgress derived instance which receives progress messages for user consumption or None
+ :param kwargs: may be used for additional parameters that the actual implementation could
+ find useful.
+ :return: List of FetchInfo compatible instances which provide information about what
+ was previously fetched, in the order of the input refspecs.
+ :note: even if the operation fails, one of the returned FetchInfo instances
+ may still contain errors or failures in only part of the refspecs.
+ :raise: if any issue occours during the transport or if the url is not
+ supported by the protocol.
+ """
+ raise NotImplementedError()
+
+ def push(self, url, refspecs, progress=None, **kwargs):
+ """Transport the objects identified by the given refspec to the remote
+ at the given url.
+ :param url: Decribes the location which is to receive the objects
+ see fetch() for more details
+ :param refspecs: iterable of refspecs strings or RefSpec instances
+ to identify the objects to push
+ :param progress: see fetch()
+ :param kwargs: additional arguments which may be provided by the caller
+ as they may be useful to the actual implementation
+ :todo: what to return ?
+ :raise: if any issue arises during transport or if the url cannot be handled"""
+ raise NotImplementedError()
+
+ @property
+ def remotes(self):
+ """:return: An IterableList of Remote objects allowing to access and manipulate remotes
+ :note: Remote objects can also be used for the actual push or fetch operation"""
+ raise NotImplementedError()
+
+ def remote(self, name='origin'):
+ """:return: Remote object with the given name
+ :note: it does not necessarily exist, hence this is just a more convenient way
+ to construct Remote objects"""
+ raise NotImplementedError()
+
+ #}end interface
+
+
+ #{ Utility Methods
+
+ def create_remote(self, name, url, **kwargs):
+ """Create a new remote with the given name pointing to the given url
+ :return: Remote instance, compatible to the Remote interface"""
+ return Remote.create(self, name, url, **kwargs)
+
+ def delete_remote(self, remote):
+ """Delete the given remote.
+ :param remote: a Remote instance"""
+ return Remote.remove(self, remote)
+
+ #} END utility methods
+
+
+class ReferencesMixin(object):
+ """Database providing reference objects which in turn point to database objects
+ like Commits or Tag(Object)s.
+
+ The returned types are compatible to the interfaces of the pure python
+ reference implementation in GitDB.ref"""
+
+ def resolve(self, name):
+ """Resolve the given name into a binary sha. Valid names are as defined
+ in the rev-parse documentation http://www.kernel.org/pub/software/scm/git/docs/git-rev-parse.html
+ :return: binary sha matching the name
+ :raise AmbiguousObjectName:
+ :raise BadObject: """
+ raise NotImplementedError()
+
+ def resolve_object(self, name):
+ """As ``resolve()``, but returns the Objecft instance pointed to by the
+ resolved binary sha
+ :return: Object instance of the correct type, e.g. shas pointing to commits
+ will be represented by a Commit object"""
+ raise NotImplementedError()
+
+ @property
+ def references(self):
+ """:return: iterable list of all Reference objects representing tags, heads
+ and remote references. This is the most general method to obtain any
+ references."""
+ raise NotImplementedError()
+
+ @property
+ def heads(self):
+ """:return: IterableList with HeadReference objects pointing to all
+ heads in the repository."""
+ raise NotImplementedError()
+
+ @property
+ def head(self):
+ """:return: HEAD Object pointing to the current head reference"""
+ raise NotImplementedError()
+
+ @property
+ def tags(self):
+ """:return: An IterableList of TagReferences or compatible items that
+ are available in this repo"""
+ raise NotImplementedError()
+
+ #{ Utility Methods
+
+ def tag(self, name):
+ """:return: Tag with the given name
+ :note: It does not necessarily exist, hence this is just a more convenient
+ way to construct TagReference objects"""
+ raise NotImplementedError()
+
+
+ def commit(self, rev=None):
+ """The Commit object for the specified revision
+ :param rev: revision specifier, see git-rev-parse for viable options.
+ :return: Commit compatible object"""
+ raise NotImplementedError()
+
+ def iter_trees(self, *args, **kwargs):
+ """:return: Iterator yielding Tree compatible objects
+ :note: Takes all arguments known to iter_commits method"""
+ raise NotImplementedError()
+
+ def tree(self, rev=None):
+ """The Tree (compatible) object for the given treeish revision
+ Examples::
+
+ repo.tree(repo.heads[0])
+
+ :param rev: is a revision pointing to a Treeish ( being a commit or tree )
+ :return: ``git.Tree``
+
+ :note:
+ If you need a non-root level tree, find it by iterating the root tree. Otherwise
+ it cannot know about its path relative to the repository root and subsequent
+ operations might have unexpected results."""
+ raise NotImplementedError()
+
+ def iter_commits(self, rev=None, paths='', **kwargs):
+ """A list of Commit objects representing the history of a given ref/commit
+
+ :parm rev:
+ revision specifier, see git-rev-parse for viable options.
+ If None, the active branch will be used.
+
+ :parm paths:
+ is an optional path or a list of paths to limit the returned commits to
+ Commits that do not contain that path or the paths will not be returned.
+
+ :parm kwargs:
+ Arguments to be passed to git-rev-list - common ones are
+ max_count and skip
+
+ :note: to receive only commits between two named revisions, use the
+ "revA..revB" revision specifier
+
+ :return: iterator yielding Commit compatible instances"""
+ raise NotImplementedError()
+
+
+ #} END utility methods
+
+ #{ Edit Methods
+
+ def create_head(self, path, commit='HEAD', force=False, logmsg=None ):
+ """Create a new head within the repository.
+ :param commit: a resolvable name to the commit or a Commit or Reference instance the new head should point to
+ :param force: if True, a head will be created even though it already exists
+ Otherwise an exception will be raised.
+ :param logmsg: message to append to the reference log. If None, a default message
+ will be used
+ :return: newly created Head instances"""
+ raise NotImplementedError()
+
+ def delete_head(self, *heads):
+ """Delete the given heads
+ :param heads: list of Head references that are to be deleted"""
+ raise NotImplementedError()
+
+ def create_tag(self, path, ref='HEAD', message=None, force=False):
+ """Create a new tag reference.
+ :param path: name or path of the new tag.
+ :param ref: resolvable name of the reference or commit, or Commit or Reference
+ instance describing the commit the tag should point to.
+ :param message: message to be attached to the tag reference. This will
+ create an actual Tag object carrying the message. Otherwise a TagReference
+ will be generated.
+ :param force: if True, the Tag will be created even if another tag does already
+ exist at the given path. Otherwise an exception will be thrown
+ :return: TagReference object """
+ raise NotImplementedError()
+
+ def delete_tag(self, *tags):
+ """Delete the given tag references
+ :param tags: TagReferences to delete"""
+ raise NotImplementedError()
+
+ #}END edit methods
+
+
+class RepositoryPathsMixin(object):
+ """Represents basic functionality of a full git repository. This involves an
+ optional working tree, a git directory with references and an object directory.
+
+ This type collects the respective paths and verifies the provided base path
+ truly is a git repository.
+
+ If the underlying type provides the config_reader() method, we can properly determine
+ whether this is a bare repository as well. Otherwise it will make an educated guess
+ based on the path name."""
+ #{ Subclass Interface
+ def _initialize(self, path):
+ """initialize this instance with the given path. It may point to
+ any location within the repositories own data, as well as the working tree.
+
+ The implementation will move up and search for traces of a git repository,
+ which is indicated by a child directory ending with .git or the
+ current path portion ending with .git.
+
+ The paths made available for query are suitable for full git repositories
+ only. Plain object databases need to be fed the "objects" directory path.
+
+ :param path: the path to initialize the repository with
+ It is a path to either the root git directory or the bare git repo::
+
+ repo = Repo("/Users/mtrier/Development/git-python")
+ repo = Repo("/Users/mtrier/Development/git-python.git")
+ repo = Repo("~/Development/git-python.git")
+ repo = Repo("$REPOSITORIES/Development/git-python.git")
+
+ :raise InvalidDBRoot:
+ """
+ raise NotImplementedError()
+ #} end subclass interface
+
+ #{ Object Interface
+
+ def __eq__(self, rhs):
+ raise NotImplementedError()
+
+ def __ne__(self, rhs):
+ raise NotImplementedError()
+
+ def __hash__(self):
+ raise NotImplementedError()
+
+ def __repr__(self):
+ raise NotImplementedError()
+
+ #} END object interface
+
+ #{ Interface
+
+ @property
+ def is_bare(self):
+ """:return: True if this is a bare repository
+ :note: this value is cached upon initialization"""
+ raise NotImplementedError()
+
+ @property
+ def git_dir(self):
+ """:return: path to directory containing this actual git repository (which
+ in turn provides access to objects and references"""
+ raise NotImplementedError()
+
+ @property
+ def working_tree_dir(self):
+ """:return: path to directory containing the working tree checkout of our
+ git repository.
+ :raise AssertionError: If this is a bare repository"""
+ raise NotImplementedError()
+
+ @property
+ def objects_dir(self):
+ """:return: path to the repository's objects directory"""
+ raise NotImplementedError()
+
+ @property
+ def working_dir(self):
+ """:return: working directory of the git process or related tools, being
+ either the working_tree_dir if available or the git_path"""
+ raise NotImplementedError()
+
+ @property
+ def description(self):
+ """:return: description text associated with this repository or set the
+ description."""
+ raise NotImplementedError()
+
+ #} END interface
+
+
+class ConfigurationMixin(object):
+ """Interface providing configuration handler instances, which provide locked access
+ to a single git-style configuration file (ini like format, using tabs as improve readablity).
+
+ Configuration readers can be initialized with multiple files at once, whose information is concatenated
+ when reading. Lower-level files overwrite values from higher level files, i.e. a repository configuration file
+ overwrites information coming from a system configuration file
+
+ :note: for the 'repository' config level, a git_path() compatible type is required"""
+ config_level = ("system", "global", "repository")
+
+ #{ Interface
+
+ def config_reader(self, config_level=None):
+ """
+ :return:
+ GitConfigParser allowing to read the full git configuration, but not to write it
+
+ The configuration will include values from the system, user and repository
+ configuration files.
+
+ :param config_level:
+ For possible values, see config_writer method
+ If None, all applicable levels will be used. Specify a level in case
+ you know which exact file you whish to read to prevent reading multiple files for
+ instance
+ :note: On windows, system configuration cannot currently be read as the path is
+ unknown, instead the global path will be used."""
+ raise NotImplementedError()
+
+ def config_writer(self, config_level="repository"):
+ """
+ :return:
+ GitConfigParser allowing to write values of the specified configuration file level.
+ Config writers should be retrieved, used to change the configuration ,and written
+ right away as they will lock the configuration file in question and prevent other's
+ to write it.
+
+ :param config_level:
+ One of the following values
+ system = sytem wide configuration file
+ global = user level configuration file
+ repository = configuration file for this repostory only"""
+ raise NotImplementedError()
+
+
+ #} END interface
+
+
+class SubmoduleDB(object):
+ """Interface providing access to git repository submodules.
+ The actual implementation is found in the Submodule object type, which is
+ currently only available in one implementation."""
+
+ @property
+ def submodules(self):
+ """
+ :return: git.IterableList(Submodule, ...) of direct submodules
+ available from the current head"""
+ raise NotImplementedError()
+
+ def submodule(self, name):
+ """ :return: Submodule with the given name
+ :raise ValueError: If no such submodule exists"""
+ raise NotImplementedError()
+
+ def create_submodule(self, *args, **kwargs):
+ """Create a new submodule
+
+ :note: See the documentation of Submodule.add for a description of the
+ applicable parameters
+ :return: created submodules"""
+ raise NotImplementedError()
+
+ def iter_submodules(self, *args, **kwargs):
+ """An iterator yielding Submodule instances, see Traversable interface
+ for a description of args and kwargs
+ :return: Iterator"""
+ raise NotImplementedError()
+
+ def submodule_update(self, *args, **kwargs):
+ """Update the submodules, keeping the repository consistent as it will
+ take the previous state into consideration. For more information, please
+ see the documentation of RootModule.update"""
+ raise NotImplementedError()
+
+
+class HighLevelRepository(object):
+ """An interface combining several high-level repository functionality and properties"""
+
+ @property
+ def daemon_export(self):
+ """:return: True if the repository may be published by the git-daemon"""
+ raise NotImplementedError()
+
+ def is_dirty(self, index=True, working_tree=True, untracked_files=False):
+ """
+ :return:
+ ``True``, the repository is considered dirty. By default it will react
+ like a git-status without untracked files, hence it is dirty if the
+ index or the working copy have changes."""
+ raise NotImplementedError()
+
+ @property
+ def untracked_files(self):
+ """
+ :return:
+ list(str,...)
+
+ :note:
+ ignored files will not appear here, i.e. files mentioned in .gitignore.
+ Bare repositories never have untracked files"""
+ raise NotImplementedError()
+
+ def blame(self, rev, file):
+ """The blame information for the given file at the given revision.
+
+ :parm rev: revision specifier, see git-rev-parse for viable options.
+ :return:
+ list: [Commit, list: []]
+ A list of tuples associating a Commit object with a list of lines that
+ changed within the given commit. The Commit objects will be given in order
+ of appearance."""
+ raise NotImplementedError()
+
+ @classmethod
+ def init(cls, path=None, mkdir=True):
+ """Initialize a git repository at the given path if specified
+
+ :param path:
+ is the full path to the repo (traditionally ends with /.git)
+ or None in which case the repository will be created in the current
+ working directory
+
+ :parm mkdir:
+ if specified will create the repository directory if it doesn't
+ already exists. Creates the directory with a mode=0755.
+ Only effective if a path is explicitly given
+
+ :return: Instance pointing to the newly created repository with similar capabilities
+ of this class"""
+ raise NotImplementedError()
+
+ def clone(self, path, progress = None):
+ """Create a clone from this repository.
+ :param path:
+ is the full path of the new repo (traditionally ends with ./.git).
+
+ :param progress:
+ a RemoteProgress instance or None if no progress information is required
+
+ :return: ``git.Repo`` (the newly cloned repo)"""
+ raise NotImplementedError()
+
+ @classmethod
+ def clone_from(cls, url, to_path, progress = None):
+ """Create a clone from the given URL
+ :param url: valid git url, see http://www.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS
+ :param to_path: Path to which the repository should be cloned to
+ :param progress:
+ a RemoteProgress instance or None if no progress information is required
+ :return: instance pointing to the cloned directory with similar capabilities as this class"""
+ raise NotImplementedError()
+
+ def archive(self, ostream, treeish=None, prefix=None):
+ """Archive the tree at the given revision.
+ :parm ostream: file compatible stream object to which the archive will be written
+ :parm treeish: is the treeish name/id, defaults to active branch
+ :parm prefix: is the optional prefix to prepend to each filename in the archive
+ :parm kwargs:
+ Additional arguments passed to git-archive
+ NOTE: Use the 'format' argument to define the kind of format. Use
+ specialized ostreams to write any format supported by python
+ :return: self"""
+ raise NotImplementedError()
+
+
diff --git a/git/db/py/__init__.py b/git/db/py/__init__.py
new file mode 100644
index 000000000..8a681e428
--- /dev/null
+++ b/git/db/py/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
diff --git a/git/db/py/base.py b/git/db/py/base.py
new file mode 100644
index 000000000..d34e8b124
--- /dev/null
+++ b/git/db/py/base.py
@@ -0,0 +1,481 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Contains basic implementations for the interface building blocks"""
+from git.db.interface import *
+
+from git.util import (
+ pool,
+ join,
+ isfile,
+ normpath,
+ abspath,
+ dirname,
+ LazyMixin,
+ hex_to_bin,
+ bin_to_hex,
+ expandvars,
+ expanduser,
+ exists,
+ is_git_dir,
+ )
+
+from git.index import IndexFile
+from git.config import GitConfigParser
+from git.exc import (
+ BadObject,
+ AmbiguousObjectName,
+ InvalidGitRepositoryError,
+ NoSuchPathError
+ )
+
+from async import ChannelThreadTask
+
+from itertools import chain
+import sys
+import os
+
+
+__all__ = ( 'PureObjectDBR', 'PureObjectDBW', 'PureRootPathDB', 'PureCompoundDB',
+ 'PureConfigurationMixin', 'PureRepositoryPathsMixin', 'PureAlternatesFileMixin',
+ 'PureIndexDB')
+
+
+class PureObjectDBR(ObjectDBR):
+
+ #{ Query Interface
+
+ def has_object_async(self, reader):
+ task = ChannelThreadTask(reader, str(self.has_object_async), lambda sha: (sha, self.has_object(sha)))
+ return pool.add_task(task)
+
+ def info_async(self, reader):
+ task = ChannelThreadTask(reader, str(self.info_async), self.info)
+ return pool.add_task(task)
+
+ def stream_async(self, reader):
+ # base implementation just uses the stream method repeatedly
+ task = ChannelThreadTask(reader, str(self.stream_async), self.stream)
+ return pool.add_task(task)
+
+ def partial_to_complete_sha_hex(self, partial_hexsha):
+ len_partial_hexsha = len(partial_hexsha)
+ if len_partial_hexsha % 2 != 0:
+ partial_binsha = hex_to_bin(partial_hexsha + "0")
+ else:
+ partial_binsha = hex_to_bin(partial_hexsha)
+ # END assure successful binary conversion
+ return self.partial_to_complete_sha(partial_binsha, len(partial_hexsha))
+
+ #} END query interface
+
+
+class PureObjectDBW(ObjectDBW):
+
+ def __init__(self, *args, **kwargs):
+ try:
+ super(PureObjectDBW, self).__init__(*args, **kwargs)
+ except TypeError:
+ pass
+ #END handle py 2.6
+ self._ostream = None
+
+ #{ Edit Interface
+ def set_ostream(self, stream):
+ cstream = self._ostream
+ self._ostream = stream
+ return cstream
+
+ def ostream(self):
+ return self._ostream
+
+ def store_async(self, reader):
+ task = ChannelThreadTask(reader, str(self.store_async), self.store)
+ return pool.add_task(task)
+
+ #} END edit interface
+
+
+class PureRootPathDB(RootPathDB):
+
+ def __init__(self, root_path):
+ self._root_path = root_path
+ super(PureRootPathDB, self).__init__(root_path)
+
+
+ #{ Interface
+ def root_path(self):
+ return self._root_path
+
+ def db_path(self, rela_path=None):
+ if not rela_path:
+ return self._root_path
+ return join(self._root_path, rela_path)
+ #} END interface
+
+
+def _databases_recursive(database, output):
+ """Fill output list with database from db, in order. Deals with Loose, Packed
+ and compound databases."""
+ if isinstance(database, CompoundDB):
+ compounds = list()
+ dbs = database.databases()
+ output.extend(db for db in dbs if not isinstance(db, CompoundDB))
+ for cdb in (db for db in dbs if isinstance(db, CompoundDB)):
+ _databases_recursive(cdb, output)
+ else:
+ output.append(database)
+ # END handle database type
+
+
+class PureCompoundDB(CompoundDB, PureObjectDBR, LazyMixin, CachingDB):
+ def _set_cache_(self, attr):
+ if attr == '_dbs':
+ self._dbs = list()
+ else:
+ super(PureCompoundDB, self)._set_cache_(attr)
+
+ #{ PureObjectDBR interface
+
+ def has_object(self, sha):
+ for db in self._dbs:
+ if db.has_object(sha):
+ return True
+ #END for each db
+ return False
+
+ def info(self, sha):
+ for db in self._dbs:
+ try:
+ return db.info(sha)
+ except BadObject:
+ pass
+ #END for each db
+
+ def stream(self, sha):
+ for db in self._dbs:
+ try:
+ return db.stream(sha)
+ except BadObject:
+ pass
+ #END for each db
+
+ def size(self):
+ return reduce(lambda x,y: x+y, (db.size() for db in self._dbs), 0)
+
+ def sha_iter(self):
+ return chain(*(db.sha_iter() for db in self._dbs))
+
+ #} END object DBR Interface
+
+ #{ Interface
+
+ def databases(self):
+ return tuple(self._dbs)
+
+ def update_cache(self, force=False):
+ # something might have changed, clear everything
+ stat = False
+ for db in self._dbs:
+ if isinstance(db, CachingDB):
+ stat |= db.update_cache(force)
+ # END if is caching db
+ # END for each database to update
+ return stat
+
+ def partial_to_complete_sha_hex(self, partial_hexsha):
+ len_partial_hexsha = len(partial_hexsha)
+ if len_partial_hexsha % 2 != 0:
+ partial_binsha = hex_to_bin(partial_hexsha + "0")
+ else:
+ partial_binsha = hex_to_bin(partial_hexsha)
+ # END assure successful binary conversion
+
+ candidate = None
+ for db in self._dbs:
+ full_bin_sha = None
+ try:
+ if hasattr(db, 'partial_to_complete_sha_hex'):
+ full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha)
+ else:
+ full_bin_sha = db.partial_to_complete_sha(partial_binsha, len_partial_hexsha)
+ # END handle database type
+ except BadObject:
+ continue
+ # END ignore bad objects
+ if full_bin_sha:
+ if candidate and candidate != full_bin_sha:
+ raise AmbiguousObjectName(partial_hexsha)
+ candidate = full_bin_sha
+ # END handle candidate
+ # END for each db
+ if not candidate:
+ raise BadObject(partial_binsha)
+ return candidate
+
+ def partial_to_complete_sha(self, partial_binsha, hex_len):
+ """Simple adaptor to feed into our implementation"""
+ return self.partial_to_complete_sha_hex(bin_to_hex(partial_binsha)[:hex_len])
+ #} END interface
+
+
+class PureRepositoryPathsMixin(RepositoryPathsMixin):
+ # slots has no effect here, its just to keep track of used attrs
+ __slots__ = ("_git_path", '_bare', '_working_tree_dir')
+
+ #{ Configuration
+ repo_dir = '.git'
+ objs_dir = 'objects'
+ #} END configuration
+
+ #{ Subclass Interface
+ def _initialize(self, path):
+ epath = abspath(expandvars(expanduser(path or os.getcwd())))
+
+ if not exists(epath):
+ raise NoSuchPathError(epath)
+ #END check file
+
+ self._working_tree_dir = None
+ self._git_path = None
+ curpath = epath
+
+ # walk up the path to find the .git dir
+ while curpath:
+ if is_git_dir(curpath):
+ self._git_path = curpath
+ self._working_tree_dir = os.path.dirname(curpath)
+ break
+ gitpath = join(curpath, self.repo_dir)
+ if is_git_dir(gitpath):
+ self._git_path = gitpath
+ self._working_tree_dir = curpath
+ break
+ curpath, dummy = os.path.split(curpath)
+ if not dummy:
+ break
+ # END while curpath
+
+ if self._git_path is None:
+ raise InvalidGitRepositoryError(epath)
+ # END path not found
+
+ self._bare = self._working_tree_dir is None
+ if hasattr(self, 'config_reader'):
+ try:
+ self._bare = self.config_reader("repository").getboolean('core','bare')
+ except Exception:
+ # lets not assume the option exists, although it should
+ pass
+ #END handle exception
+ #END check bare flag
+ self._working_tree_dir = self._bare and None or self._working_tree_dir
+
+ #} end subclass interface
+
+ #{ Object Interface
+
+ def __eq__(self, rhs):
+ if hasattr(rhs, 'git_dir'):
+ return self.git_dir == rhs.git_dir
+ return False
+
+ def __ne__(self, rhs):
+ return not self.__eq__(rhs)
+
+ def __hash__(self):
+ return hash(self.git_dir)
+
+ def __repr__(self):
+ return "%s(%r)" % (type(self).__name__, self.git_dir)
+
+ #} END object interface
+
+ #{ Interface
+
+ @property
+ def is_bare(self):
+ return self._bare
+
+ @property
+ def git_dir(self):
+ return self._git_path
+
+ @property
+ def working_tree_dir(self):
+ if self._working_tree_dir is None:
+ raise AssertionError("Repository at %s is bare and does not have a working tree directory" % self.git_dir)
+ #END assertion
+ return dirname(self.git_dir)
+
+ @property
+ def objects_dir(self):
+ return join(self.git_dir, self.objs_dir)
+
+ @property
+ def working_dir(self):
+ if self.is_bare:
+ return self.git_dir
+ else:
+ return self.working_tree_dir
+ #END handle bare state
+
+ def _mk_description():
+ def _get_description(self):
+ filename = join(self.git_dir, 'description')
+ return file(filename).read().rstrip()
+
+ def _set_description(self, descr):
+ filename = join(self.git_dir, 'description')
+ file(filename, 'w').write(descr+'\n')
+
+ return property(_get_description, _set_description, "Descriptive text for the content of the repository")
+
+ description = _mk_description()
+ del(_mk_description)
+
+ #} END interface
+
+
+class PureConfigurationMixin(ConfigurationMixin):
+
+ #{ Configuration
+ system_config_file_name = "gitconfig"
+ repo_config_file_name = "config"
+ #} END
+
+ def __new__(cls, *args, **kwargs):
+ """This is just a stupid workaround for the evil py2.6 change which makes mixins quite impossible"""
+ return super(PureConfigurationMixin, cls).__new__(cls, *args, **kwargs)
+
+ def __init__(self, *args, **kwargs):
+ """Verify prereqs"""
+ try:
+ super(PureConfigurationMixin, self).__init__(*args, **kwargs)
+ except TypeError:
+ pass
+ #END handle code-breaking change in python 2.6
+ assert hasattr(self, 'git_dir')
+
+ def _path_at_level(self, level ):
+ # we do not support an absolute path of the gitconfig on windows ,
+ # use the global config instead
+ if sys.platform == "win32" and level == "system":
+ level = "global"
+ #END handle windows
+
+ if level == "system":
+ return "/etc/%s" % self.system_config_file_name
+ elif level == "global":
+ return normpath(expanduser("~/.%s" % self.system_config_file_name))
+ elif level == "repository":
+ return join(self.git_dir, self.repo_config_file_name)
+ #END handle level
+
+ raise ValueError("Invalid configuration level: %r" % level)
+
+ #{ Interface
+
+ def config_reader(self, config_level=None):
+ files = None
+ if config_level is None:
+ files = [ self._path_at_level(f) for f in self.config_level ]
+ else:
+ files = [ self._path_at_level(config_level) ]
+ #END handle level
+ return GitConfigParser(files, read_only=True)
+
+ def config_writer(self, config_level="repository"):
+ return GitConfigParser(self._path_at_level(config_level), read_only=False)
+
+
+ #} END interface
+
+
+class PureIndexDB(IndexDB):
+ #{ Configuration
+ IndexCls = IndexFile
+ #} END configuration
+
+ @property
+ def index(self):
+ return self.IndexCls(self)
+
+
+class PureAlternatesFileMixin(object):
+ """Utility able to read and write an alternates file through the alternates property
+ It needs to be part of a type with the git_dir or db_path property.
+
+ The file by default is assumed to be located at the default location as imposed
+ by the standard git repository layout"""
+
+ #{ Configuration
+ alternates_filepath = os.path.join('info', 'alternates') # relative path to alternates file
+
+ #} END configuration
+
+ def __init__(self, *args, **kwargs):
+ try:
+ super(PureAlternatesFileMixin, self).__init__(*args, **kwargs)
+ except TypeError:
+ pass
+ #END handle py2.6 code breaking changes
+ self._alternates_path() # throws on incompatible type
+
+ #{ Interface
+
+ def _alternates_path(self):
+ if hasattr(self, 'git_dir'):
+ return join(self.git_dir, 'objects', self.alternates_filepath)
+ elif hasattr(self, 'db_path'):
+ return self.db_path(self.alternates_filepath)
+ else:
+ raise AssertionError("This mixin requires a parent type with either the git_dir property or db_path method")
+ #END handle path
+
+ def _get_alternates(self):
+ """The list of alternates for this repo from which objects can be retrieved
+
+ :return: list of strings being pathnames of alternates"""
+ alternates_path = self._alternates_path()
+
+ if os.path.exists(alternates_path):
+ try:
+ f = open(alternates_path)
+ alts = f.read()
+ finally:
+ f.close()
+ return alts.strip().splitlines()
+ else:
+ return list()
+ # END handle path exists
+
+ def _set_alternates(self, alts):
+ """Sets the alternates
+
+ :parm alts:
+ is the array of string paths representing the alternates at which
+ git should look for objects, i.e. /home/user/repo/.git/objects
+
+ :raise NoSuchPathError:
+ :note:
+ The method does not check for the existance of the paths in alts
+ as the caller is responsible."""
+ alternates_path = self._alternates_path()
+ if not alts:
+ if isfile(alternates_path):
+ os.remove(alternates_path)
+ else:
+ try:
+ f = open(alternates_path, 'w')
+ f.write("\n".join(alts))
+ finally:
+ f.close()
+ # END file handling
+ # END alts handling
+
+ alternates = property(_get_alternates, _set_alternates, doc="Retrieve a list of alternates paths or set a list paths to be used as alternates")
+
+ #} END interface
+
diff --git a/git/db/py/complex.py b/git/db/py/complex.py
new file mode 100644
index 000000000..5f4e81e0a
--- /dev/null
+++ b/git/db/py/complex.py
@@ -0,0 +1,118 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of PurePartialGitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from git.db.interface import HighLevelRepository
+from base import (
+ PureCompoundDB,
+ PureObjectDBW,
+ PureRootPathDB,
+ PureRepositoryPathsMixin,
+ PureConfigurationMixin,
+ PureAlternatesFileMixin,
+ PureIndexDB,
+ )
+from transport import PureTransportDB
+from resolve import PureReferencesMixin
+
+from loose import PureLooseObjectODB
+from pack import PurePackedODB
+from ref import PureReferenceDB
+from submodule import PureSubmoduleDB
+
+from git.db.compat import RepoCompatibilityInterface
+
+from git.exc import InvalidDBRoot
+import os
+
+__all__ = ('PureGitODB', 'PurePartialGitDB', 'PureCompatibilityGitDB')
+
+
+class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB, PureAlternatesFileMixin):
+ """A git-style object-only database, which contains all objects in the 'objects'
+ subdirectory.
+ :note: The type needs to be initialized on the ./objects directory to function,
+ as it deals solely with object lookup. Use a PurePartialGitDB type if you need
+ reference and push support."""
+ # Configuration
+ PackDBCls = PurePackedODB
+ LooseDBCls = PureLooseObjectODB
+ PureReferenceDBCls = PureReferenceDB
+
+ # Directories
+ packs_dir = 'pack'
+ loose_dir = ''
+
+
+ def __init__(self, root_path):
+ """Initialize ourselves on a git ./objects directory"""
+ super(PureGitODB, self).__init__(root_path)
+
+ def _set_cache_(self, attr):
+ if attr == '_dbs' or attr == '_loose_db':
+ self._dbs = list()
+ loose_db = None
+ for subpath, dbcls in ((self.packs_dir, self.PackDBCls),
+ (self.loose_dir, self.LooseDBCls),
+ (self.alternates_filepath, self.PureReferenceDBCls)):
+ path = self.db_path(subpath)
+ if os.path.exists(path):
+ self._dbs.append(dbcls(path))
+ if dbcls is self.LooseDBCls:
+ loose_db = self._dbs[-1]
+ # END remember loose db
+ # END check path exists
+ # END for each db type
+
+ # should have at least one subdb
+ if not self._dbs:
+ raise InvalidDBRoot(self.root_path())
+ # END handle error
+
+ # we the first one should have the store method
+ assert loose_db is not None and hasattr(loose_db, 'store'), "One database needs store functionality"
+
+ # finally set the value
+ self._loose_db = loose_db
+ else:
+ super(PureGitODB, self)._set_cache_(attr)
+ # END handle attrs
+
+ #{ PureObjectDBW interface
+
+ def store(self, istream):
+ return self._loose_db.store(istream)
+
+ def ostream(self):
+ return self._loose_db.ostream()
+
+ def set_ostream(self, ostream):
+ return self._loose_db.set_ostream(ostream)
+
+ #} END objectdbw interface
+
+
+
+class PurePartialGitDB(PureGitODB,
+ PureRepositoryPathsMixin, PureConfigurationMixin,
+ PureReferencesMixin, PureSubmoduleDB,
+ PureIndexDB,
+ PureTransportDB # not fully implemented
+ # HighLevelRepository Currently not implemented !
+ ):
+ """Git like database with support for object lookup as well as reference resolution.
+ Our rootpath is set to the actual .git directory (bare on unbare).
+
+ The root_path will be the git objects directory. Use git_path() to obtain the actual top-level
+ git directory."""
+ #directories
+
+ def __init__(self, root_path):
+ """Initialize ourselves on the .git directory, or the .git/objects directory."""
+ PureRepositoryPathsMixin._initialize(self, root_path)
+ super(PurePartialGitDB, self).__init__(self.objects_dir)
+
+
+class PureCompatibilityGitDB(PurePartialGitDB, RepoCompatibilityInterface):
+ """Pure git database with a compatability layer required by 0.3x code"""
+
diff --git a/git/db/py/loose.py b/git/db/py/loose.py
new file mode 100644
index 000000000..6e72aff01
--- /dev/null
+++ b/git/db/py/loose.py
@@ -0,0 +1,263 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from base import (
+ PureRootPathDB,
+ PureObjectDBR,
+ PureObjectDBW
+ )
+
+
+from git.exc import (
+ InvalidDBRoot,
+ BadObject,
+ AmbiguousObjectName
+ )
+
+from git.stream import (
+ DecompressMemMapReader,
+ FDCompressedSha1Writer,
+ FDStream,
+ Sha1Writer
+ )
+
+from git.base import (
+ OStream,
+ OInfo
+ )
+
+from git.util import (
+ file_contents_ro_filepath,
+ ENOENT,
+ hex_to_bin,
+ bin_to_hex,
+ exists,
+ chmod,
+ isdir,
+ isfile,
+ remove,
+ mkdir,
+ rename,
+ dirname,
+ basename,
+ join
+ )
+
+from git.fun import (
+ chunk_size,
+ loose_object_header_info,
+ write_object,
+ stream_copy
+ )
+
+import tempfile
+import mmap
+import sys
+import os
+
+
+__all__ = ( 'PureLooseObjectODB', )
+
+
+class PureLooseObjectODB(PureRootPathDB, PureObjectDBR, PureObjectDBW):
+ """A database which operates on loose object files"""
+
+ # CONFIGURATION
+ # chunks in which data will be copied between streams
+ stream_chunk_size = chunk_size
+
+ # On windows we need to keep it writable, otherwise it cannot be removed
+ # either
+ new_objects_mode = 0444
+ if os.name == 'nt':
+ new_objects_mode = 0644
+
+
+ def __init__(self, root_path):
+ super(PureLooseObjectODB, self).__init__(root_path)
+ self._hexsha_to_file = dict()
+ # Additional Flags - might be set to 0 after the first failure
+ # Depending on the root, this might work for some mounts, for others not, which
+ # is why it is per instance
+ self._fd_open_flags = getattr(os, 'O_NOATIME', 0)
+
+ #{ Interface
+ def object_path(self, hexsha):
+ """
+ :return: path at which the object with the given hexsha would be stored,
+ relative to the database root"""
+ return join(hexsha[:2], hexsha[2:])
+
+ def readable_db_object_path(self, hexsha):
+ """
+ :return: readable object path to the object identified by hexsha
+ :raise BadObject: If the object file does not exist"""
+ try:
+ return self._hexsha_to_file[hexsha]
+ except KeyError:
+ pass
+ # END ignore cache misses
+
+ # try filesystem
+ path = self.db_path(self.object_path(hexsha))
+ if exists(path):
+ self._hexsha_to_file[hexsha] = path
+ return path
+ # END handle cache
+ raise BadObject(hexsha)
+
+
+ #} END interface
+
+ def _map_loose_object(self, sha):
+ """
+ :return: memory map of that file to allow random read access
+ :raise BadObject: if object could not be located"""
+ db_path = self.db_path(self.object_path(bin_to_hex(sha)))
+ try:
+ return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
+ except OSError,e:
+ if e.errno != ENOENT:
+ # try again without noatime
+ try:
+ return file_contents_ro_filepath(db_path)
+ except OSError:
+ raise BadObject(sha)
+ # didn't work because of our flag, don't try it again
+ self._fd_open_flags = 0
+ else:
+ raise BadObject(sha)
+ # END handle error
+ # END exception handling
+ try:
+ return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
+ finally:
+ os.close(fd)
+ # END assure file is closed
+
+ def set_ostream(self, stream):
+ """:raise TypeError: if the stream does not support the Sha1Writer interface"""
+ if stream is not None and not isinstance(stream, Sha1Writer):
+ raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
+ return super(PureLooseObjectODB, self).set_ostream(stream)
+
+ def info(self, sha):
+ m = self._map_loose_object(sha)
+ try:
+ type, size = loose_object_header_info(m)
+ return OInfo(sha, type, size)
+ finally:
+ m.close()
+ # END assure release of system resources
+
+ def stream(self, sha):
+ m = self._map_loose_object(sha)
+ type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True)
+ return OStream(sha, type, size, stream)
+
+ def has_object(self, sha):
+ try:
+ self.readable_db_object_path(bin_to_hex(sha))
+ return True
+ except BadObject:
+ return False
+ # END check existance
+
+ def partial_to_complete_sha_hex(self, partial_hexsha):
+ """:return: 20 byte binary sha1 string which matches the given name uniquely
+ :param name: hexadecimal partial name
+ :raise AmbiguousObjectName:
+ :raise BadObject: """
+ candidate = None
+ for binsha in self.sha_iter():
+ if bin_to_hex(binsha).startswith(partial_hexsha):
+ # it can't ever find the same object twice
+ if candidate is not None:
+ raise AmbiguousObjectName(partial_hexsha)
+ candidate = binsha
+ # END for each object
+ if candidate is None:
+ raise BadObject(partial_hexsha)
+ return candidate
+
+ def store(self, istream):
+ """note: The sha we produce will be hex by nature"""
+ tmp_path = None
+ writer = self.ostream()
+ if writer is None:
+ # open a tmp file to write the data to
+ fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
+
+ if istream.binsha is None:
+ writer = FDCompressedSha1Writer(fd)
+ else:
+ writer = FDStream(fd)
+ # END handle direct stream copies
+ # END handle custom writer
+
+ try:
+ try:
+ if istream.binsha is not None:
+ # copy as much as possible, the actual uncompressed item size might
+ # be smaller than the compressed version
+ stream_copy(istream.read, writer.write, sys.maxint, self.stream_chunk_size)
+ else:
+ # write object with header, we have to make a new one
+ write_object(istream.type, istream.size, istream.read, writer.write,
+ chunk_size=self.stream_chunk_size)
+ # END handle direct stream copies
+ finally:
+ if tmp_path:
+ writer.close()
+ # END assure target stream is closed
+ except:
+ if tmp_path:
+ os.remove(tmp_path)
+ raise
+ # END assure tmpfile removal on error
+
+ hexsha = None
+ if istream.binsha:
+ hexsha = istream.hexsha
+ else:
+ hexsha = writer.sha(as_hex=True)
+ # END handle sha
+
+ if tmp_path:
+ obj_path = self.db_path(self.object_path(hexsha))
+ obj_dir = dirname(obj_path)
+ if not isdir(obj_dir):
+ mkdir(obj_dir)
+ # END handle destination directory
+ # rename onto existing doesn't work on windows
+ if os.name == 'nt' and isfile(obj_path):
+ remove(obj_path)
+ # END handle win322
+ rename(tmp_path, obj_path)
+
+ # make sure its readable for all ! It started out as rw-- tmp file
+ # but needs to be rwrr
+ chmod(obj_path, self.new_objects_mode)
+ # END handle dry_run
+
+ istream.binsha = hex_to_bin(hexsha)
+ return istream
+
+ def sha_iter(self):
+ # find all files which look like an object, extract sha from there
+ for root, dirs, files in os.walk(self.root_path()):
+ root_base = basename(root)
+ if len(root_base) != 2:
+ continue
+
+ for f in files:
+ if len(f) != 38:
+ continue
+ yield hex_to_bin(root_base + f)
+ # END for each file
+ # END for each walk iteration
+
+ def size(self):
+ return len(tuple(self.sha_iter()))
+
diff --git a/git/db/py/mem.py b/git/db/py/mem.py
new file mode 100644
index 000000000..da02dbdd7
--- /dev/null
+++ b/git/db/py/mem.py
@@ -0,0 +1,112 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Contains the MemoryDatabase implementation"""
+from base import (
+ PureObjectDBR,
+ PureObjectDBW
+ )
+from loose import PureLooseObjectODB
+from git.base import (
+ OStream,
+ IStream,
+ )
+
+from git.exc import (
+ BadObject,
+ UnsupportedOperation
+ )
+from git.stream import (
+ ZippedStoreShaWriter,
+ DecompressMemMapReader,
+ )
+
+from cStringIO import StringIO
+
+__all__ = ("PureMemoryDB", )
+
+class PureMemoryDB(PureObjectDBR, PureObjectDBW):
+ """A memory database stores everything to memory, providing fast IO and object
+ retrieval. It should be used to buffer results and obtain SHAs before writing
+ it to the actual physical storage, as it allows to query whether object already
+ exists in the target storage before introducing actual IO
+
+ :note: memory is currently not threadsafe, hence the async methods cannot be used
+ for storing"""
+
+ def __init__(self):
+ super(PureMemoryDB, self).__init__()
+ self._db = PureLooseObjectODB("path/doesnt/matter")
+
+ # maps 20 byte shas to their OStream objects
+ self._cache = dict()
+
+ def set_ostream(self, stream):
+ raise UnsupportedOperation("PureMemoryDB's always stream into memory")
+
+ def store(self, istream):
+ zstream = ZippedStoreShaWriter()
+ self._db.set_ostream(zstream)
+
+ istream = self._db.store(istream)
+ zstream.close() # close to flush
+ zstream.seek(0)
+
+ # don't provide a size, the stream is written in object format, hence the
+ # header needs decompression
+ decomp_stream = DecompressMemMapReader(zstream.getvalue(), close_on_deletion=False)
+ self._cache[istream.binsha] = OStream(istream.binsha, istream.type, istream.size, decomp_stream)
+
+ return istream
+
+ def store_async(self, reader):
+ raise UnsupportedOperation("PureMemoryDBs cannot currently be used for async write access")
+
+ def has_object(self, sha):
+ return sha in self._cache
+
+ def info(self, sha):
+ # we always return streams, which are infos as well
+ return self.stream(sha)
+
+ def stream(self, sha):
+ try:
+ ostream = self._cache[sha]
+ # rewind stream for the next one to read
+ ostream.stream.seek(0)
+ return ostream
+ except KeyError:
+ raise BadObject(sha)
+ # END exception handling
+
+ def size(self):
+ return len(self._cache)
+
+ def sha_iter(self):
+ return self._cache.iterkeys()
+
+
+ #{ Interface
+ def stream_copy(self, sha_iter, odb):
+ """Copy the streams as identified by sha's yielded by sha_iter into the given odb
+ The streams will be copied directly
+ :note: the object will only be written if it did not exist in the target db
+ :return: amount of streams actually copied into odb. If smaller than the amount
+ of input shas, one or more objects did already exist in odb"""
+ count = 0
+ for sha in sha_iter:
+ if odb.has_object(sha):
+ continue
+ # END check object existance
+
+ ostream = self.stream(sha)
+ # compressed data including header
+ sio = StringIO(ostream.stream.data())
+ istream = IStream(ostream.type, ostream.size, sio, sha)
+
+ odb.store(istream)
+ count += 1
+ # END for each sha
+ return count
+ #} END interface
diff --git a/git/db/py/pack.py b/git/db/py/pack.py
new file mode 100644
index 000000000..75b75468e
--- /dev/null
+++ b/git/db/py/pack.py
@@ -0,0 +1,212 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Module containing a database to deal with packs"""
+from git.db import CachingDB
+from base import (
+ PureRootPathDB,
+ PureObjectDBR
+ )
+
+from git.util import LazyMixin
+
+from git.exc import (
+ BadObject,
+ UnsupportedOperation,
+ AmbiguousObjectName
+ )
+
+from git.pack import PackEntity
+
+import os
+import glob
+
+__all__ = ('PurePackedODB', )
+
+#{ Utilities
+
+
+class PurePackedODB(PureRootPathDB, PureObjectDBR, CachingDB, LazyMixin):
+ """A database operating on a set of object packs"""
+
+ # the type to use when instantiating a pack entity
+ PackEntityCls = PackEntity
+
+ # sort the priority list every N queries
+ # Higher values are better, performance tests don't show this has
+ # any effect, but it should have one
+ _sort_interval = 500
+
+ def __init__(self, root_path):
+ super(PurePackedODB, self).__init__(root_path)
+ # list of lists with three items:
+ # * hits - number of times the pack was hit with a request
+ # * entity - Pack entity instance
+ # * sha_to_index - PackIndexFile.sha_to_index method for direct cache query
+ # self._entities = list() # lazy loaded list
+ self._hit_count = 0 # amount of hits
+ self._st_mtime = 0 # last modification data of our root path
+
+ def _set_cache_(self, attr):
+ if attr == '_entities':
+ self._entities = list()
+ self.update_cache(force=True)
+ # END handle entities initialization
+
+ def _sort_entities(self):
+ self._entities.sort(key=lambda l: l[0], reverse=True)
+
+ def _pack_info(self, sha):
+ """:return: tuple(entity, index) for an item at the given sha
+ :param sha: 20 or 40 byte sha
+ :raise BadObject:
+ :note: This method is not thread-safe, but may be hit in multi-threaded
+ operation. The worst thing that can happen though is a counter that
+ was not incremented, or the list being in wrong order. So we safe
+ the time for locking here, lets see how that goes"""
+ # presort ?
+ if self._hit_count % self._sort_interval == 0:
+ self._sort_entities()
+ # END update sorting
+
+ for item in self._entities:
+ index = item[2](sha)
+ if index is not None:
+ item[0] += 1 # one hit for you
+ self._hit_count += 1 # general hit count
+ return (item[1], index)
+ # END index found in pack
+ # END for each item
+
+ # no hit, see whether we have to update packs
+ # NOTE: considering packs don't change very often, we safe this call
+ # and leave it to the super-caller to trigger that
+ raise BadObject(sha)
+
+ #{ Object DB Read
+
+ def has_object(self, sha):
+ try:
+ self._pack_info(sha)
+ return True
+ except BadObject:
+ return False
+ # END exception handling
+
+ def info(self, sha):
+ entity, index = self._pack_info(sha)
+ return entity.info_at_index(index)
+
+ def stream(self, sha):
+ entity, index = self._pack_info(sha)
+ return entity.stream_at_index(index)
+
+ def sha_iter(self):
+ sha_list = list()
+ for entity in self.entities():
+ index = entity.index()
+ sha_by_index = index.sha
+ for index in xrange(index.size()):
+ yield sha_by_index(index)
+ # END for each index
+ # END for each entity
+
+ def size(self):
+ sizes = [item[1].index().size() for item in self._entities]
+ return reduce(lambda x,y: x+y, sizes, 0)
+
+ #} END object db read
+
+ #{ object db write
+
+ def store(self, istream):
+ """Storing individual objects is not feasible as a pack is designed to
+ hold multiple objects. Writing or rewriting packs for single objects is
+ inefficient"""
+ raise UnsupportedOperation()
+
+ def store_async(self, reader):
+ # TODO: add PureObjectDBRW before implementing this
+ raise NotImplementedError()
+
+ #} END object db write
+
+
+ #{ Interface
+
+ def update_cache(self, force=False):
+ """
+ Update our cache with the acutally existing packs on disk. Add new ones,
+ and remove deleted ones. We keep the unchanged ones
+
+ :param force: If True, the cache will be updated even though the directory
+ does not appear to have changed according to its modification timestamp.
+ :return: True if the packs have been updated so there is new information,
+ False if there was no change to the pack database"""
+ stat = os.stat(self.root_path())
+ if not force and stat.st_mtime <= self._st_mtime:
+ return False
+ # END abort early on no change
+ self._st_mtime = stat.st_mtime
+
+ # packs are supposed to be prefixed with pack- by git-convention
+ # get all pack files, figure out what changed
+ pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack")))
+ our_pack_files = set(item[1].pack().path() for item in self._entities)
+
+ # new packs
+ for pack_file in (pack_files - our_pack_files):
+ # init the hit-counter/priority with the size, a good measure for hit-
+ # probability. Its implemented so that only 12 bytes will be read
+ entity = self.PackEntityCls(pack_file)
+ self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index])
+ # END for each new packfile
+
+ # removed packs
+ for pack_file in (our_pack_files - pack_files):
+ del_index = -1
+ for i, item in enumerate(self._entities):
+ if item[1].pack().path() == pack_file:
+ del_index = i
+ break
+ # END found index
+ # END for each entity
+ assert del_index != -1
+ del(self._entities[del_index])
+ # END for each removed pack
+
+ # reinitialize prioritiess
+ self._sort_entities()
+ return True
+
+ def entities(self):
+ """:return: list of pack entities operated upon by this database"""
+ return [ item[1] for item in self._entities ]
+
+ def partial_to_complete_sha(self, partial_binsha, canonical_length):
+ """:return: 20 byte sha as inferred by the given partial binary sha
+ :param partial_binsha: binary sha with less than 20 bytes
+ :param canonical_length: length of the corresponding canonical representation.
+ It is required as binary sha's cannot display whether the original hex sha
+ had an odd or even number of characters
+ :raise AmbiguousObjectName:
+ :raise BadObject: """
+ candidate = None
+ for item in self._entities:
+ item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length)
+ if item_index is not None:
+ sha = item[1].index().sha(item_index)
+ if candidate and candidate != sha:
+ raise AmbiguousObjectName(partial_binsha)
+ candidate = sha
+ # END handle full sha could be found
+ # END for each entity
+
+ if candidate:
+ return candidate
+
+ # still not found ?
+ raise BadObject(partial_binsha)
+
+ #} END interface
diff --git a/git/db/py/ref.py b/git/db/py/ref.py
new file mode 100644
index 000000000..d2c77a3a7
--- /dev/null
+++ b/git/db/py/ref.py
@@ -0,0 +1,77 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from base import PureCompoundDB
+
+import os
+__all__ = ('PureReferenceDB', )
+
+class PureReferenceDB(PureCompoundDB):
+ """A database consisting of database referred to in a file"""
+
+ # Configuration
+ # Specifies the object database to use for the paths found in the alternates
+ # file. If None, it defaults to the PureGitODB
+ ObjectDBCls = None
+
+ def __init__(self, ref_file):
+ super(PureReferenceDB, self).__init__()
+ self._ref_file = ref_file
+
+ def _set_cache_(self, attr):
+ if attr == '_dbs':
+ self._dbs = list()
+ self._update_dbs_from_ref_file()
+ else:
+ super(PureReferenceDB, self)._set_cache_(attr)
+ # END handle attrs
+
+ def _update_dbs_from_ref_file(self):
+ dbcls = self.ObjectDBCls
+ if dbcls is None:
+ # late import
+ import complex
+ dbcls = complex.PureGitODB
+ # END get db type
+
+ # try to get as many as possible, don't fail if some are unavailable
+ ref_paths = list()
+ try:
+ ref_paths = [l.strip() for l in open(self._ref_file, 'r').readlines()]
+ except (OSError, IOError):
+ pass
+ # END handle alternates
+
+ ref_paths_set = set(ref_paths)
+ cur_ref_paths_set = set(db.root_path() for db in self._dbs)
+
+ # remove existing
+ for path in (cur_ref_paths_set - ref_paths_set):
+ for i, db in enumerate(self._dbs[:]):
+ if db.root_path() == path:
+ del(self._dbs[i])
+ continue
+ # END del matching db
+ # END for each path to remove
+
+ # add new
+ # sort them to maintain order
+ added_paths = sorted(ref_paths_set - cur_ref_paths_set, key=lambda p: ref_paths.index(p))
+ for path in added_paths:
+ try:
+ db = dbcls(path)
+ # force an update to verify path
+ if isinstance(db, PureCompoundDB):
+ db.databases()
+ # END verification
+ self._dbs.append(db)
+ except Exception, e:
+ # ignore invalid paths or issues
+ pass
+ # END for each path to add
+
+ def update_cache(self, force=False):
+ # re-read alternates and update databases
+ self._update_dbs_from_ref_file()
+ return super(PureReferenceDB, self).update_cache(force)
diff --git a/git/repo/fun.py b/git/db/py/resolve.py
similarity index 71%
rename from git/repo/fun.py
rename to git/db/py/resolve.py
index 03d557164..9a31fbd80 100644
--- a/git/repo/fun.py
+++ b/git/db/py/resolve.py
@@ -1,35 +1,36 @@
-"""Package with general repository related functions"""
-import os
-from gitdb.exc import BadObject
-from git.refs import SymbolicReference
-from git.objects import Object
-from gitdb.util import (
+"""Module with an implementation for refspec parsing. It is the pure-python
+version assuming compatible interface for reference and object types"""
+
+from git.db.interface import ReferencesMixin
+from git.exc import BadObject
+from git.refs import (
+ SymbolicReference,
+ Reference,
+ HEAD,
+ Head,
+ TagReference
+ )
+from git.refs.head import HEAD
+from git.refs.headref import Head
+from git.refs.tag import TagReference
+
+from git.objects.base import Object
+from git.objects.commit import Commit
+from git.util import (
join,
isdir,
isfile,
hex_to_bin,
- bin_to_hex
+ bin_to_hex,
+ is_git_dir
)
from string import digits
+import os
+import re
-__all__ = ('rev_parse', 'is_git_dir', 'touch')
-
-def touch(filename):
- fp = open(filename, "a")
- fp.close()
-
-def is_git_dir(d):
- """ This is taken from the git setup.c:is_git_directory
- function."""
- if isdir(d) and \
- isdir(join(d, 'objects')) and \
- isdir(join(d, 'refs')):
- headref = join(d, 'HEAD')
- return isfile(headref) or \
- (os.path.islink(headref) and
- os.readlink(headref).startswith('refs'))
- return False
+__all__ = ["PureReferencesMixin"]
+#{ Utilities
def short_to_long(odb, hexsha):
""":return: long hexadecimal sha1 from the given less-than-40 byte hexsha
@@ -89,7 +90,7 @@ def name_to_object(repo, name, return_ref=False):
return Object.new_from_sha(repo, hex_to_bin(hexsha))
def deref_tag(tag):
- """Recursively dereerence a tag and return the resulting object"""
+ """Recursively dereference a tag and return the resulting object"""
while True:
try:
tag = tag.object
@@ -185,7 +186,7 @@ def rev_parse(repo, rev):
# END handle tag
elif token == '@':
# try single int
- assert ref is not None, "Requre Reference to access reflog"
+ assert ref is not None, "Require Reference to access reflog"
revlog_index = None
try:
# transform reversed index into the format of our revlog
@@ -282,3 +283,81 @@ def rev_parse(repo, rev):
raise ValueError("Didn't consume complete rev spec %s, consumed part: %s" % (rev, rev[:parsed_to]))
return obj
+
+#} END utilities
+
+class PureReferencesMixin(ReferencesMixin):
+ """Pure-Python refparse implementation"""
+
+ re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$')
+ re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$')
+
+ #{ Configuration
+ # Types to use when instatiating references
+ TagReferenceCls = TagReference
+ HeadCls = Head
+ ReferenceCls = Reference
+ HEADCls = HEAD
+ CommitCls = Commit
+ #} END configuration
+
+ def resolve(self, name):
+ return self.resolve_object(name).binsha
+
+ def resolve_object(self, name):
+ return rev_parse(self, name)
+
+ @property
+ def references(self):
+ return self.ReferenceCls.list_items(self)
+
+ @property
+ def heads(self):
+ return self.HeadCls.list_items(self)
+
+ @property
+ def tags(self):
+ return self.TagReferenceCls.list_items(self)
+
+ def tag(self, name):
+ return self.TagReferenceCls(self, self.TagReferenceCls.to_full_path(name))
+
+ def commit(self, rev=None):
+ if rev is None:
+ return self.head.commit
+ else:
+ return self.resolve_object(str(rev)+"^0")
+ #END handle revision
+
+ def iter_trees(self, *args, **kwargs):
+ return ( c.tree for c in self.iter_commits(*args, **kwargs) )
+
+ def tree(self, rev=None):
+ if rev is None:
+ return self.head.commit.tree
+ else:
+ return self.resolve_object(str(rev)+"^{tree}")
+
+ def iter_commits(self, rev=None, paths='', **kwargs):
+ if rev is None:
+ rev = self.head.commit
+
+ return self.CommitCls.iter_items(self, rev, paths, **kwargs)
+
+
+ @property
+ def head(self):
+ return self.HEADCls(self,'HEAD')
+
+ def create_head(self, path, commit='HEAD', force=False, logmsg=None ):
+ return self.HeadCls.create(self, path, commit, force, logmsg)
+
+ def delete_head(self, *heads, **kwargs):
+ return self.HeadCls.delete(self, *heads, **kwargs)
+
+ def create_tag(self, path, ref='HEAD', message=None, force=False, **kwargs):
+ return self.TagReferenceCls.create(self, path, ref, message, force, **kwargs)
+
+ def delete_tag(self, *tags):
+ return self.TagReferenceCls.delete(self, *tags)
+
diff --git a/git/db/py/submodule.py b/git/db/py/submodule.py
new file mode 100644
index 000000000..735f90b19
--- /dev/null
+++ b/git/db/py/submodule.py
@@ -0,0 +1,33 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from git.objects.submodule.base import Submodule
+from git.objects.submodule.root import RootModule
+from git.db.interface import SubmoduleDB
+
+__all__ = ["PureSubmoduleDB"]
+
+class PureSubmoduleDB(SubmoduleDB):
+ """Pure python implementation of submodule functionality"""
+
+ @property
+ def submodules(self):
+ return Submodule.list_items(self)
+
+ def submodule(self, name):
+ try:
+ return self.submodules[name]
+ except IndexError:
+ raise ValueError("Didn't find submodule named %r" % name)
+ # END exception handling
+
+ def create_submodule(self, *args, **kwargs):
+ return Submodule.add(self, *args, **kwargs)
+
+ def iter_submodules(self, *args, **kwargs):
+ return RootModule(self).traverse(*args, **kwargs)
+
+ def submodule_update(self, *args, **kwargs):
+ return RootModule(self).update(*args, **kwargs)
+
diff --git a/git/db/py/transport.py b/git/db/py/transport.py
new file mode 100644
index 000000000..00d222b04
--- /dev/null
+++ b/git/db/py/transport.py
@@ -0,0 +1,58 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Implement a transport compatible database which sends objects using the git protocol"""
+
+from git.db.interface import ( TransportDB,
+ PushInfo,
+ FetchInfo,
+ RefSpec )
+
+from git.refs.remote import RemoteReference
+from git.remote import Remote
+
+
+__all__ = ["PureTransportDB"]
+
+class PurePushInfo(PushInfo):
+ """TODO: Implementation"""
+ __slots__ = tuple()
+
+
+
+class PureFetchInfo(FetchInfo):
+ """TODO"""
+ __slots__ = tuple()
+
+
+class PureTransportDB(TransportDB):
+ # The following variables need to be set by the derived class
+ #{Configuration
+ protocol = None
+ RemoteCls = Remote
+ #}end configuraiton
+
+ #{ Interface
+
+ def fetch(self, url, refspecs, progress=None, **kwargs):
+ raise NotImplementedError()
+
+ def push(self, url, refspecs, progress=None, **kwargs):
+ raise NotImplementedError()
+
+ @property
+ def remotes(self):
+ return self.RemoteCls.list_items(self)
+
+ def remote(self, name='origin'):
+ return self.remotes[name]
+
+ def create_remote(self, name, url, **kwargs):
+ return self.RemoteCls.create(self, name, url, **kwargs)
+
+ def delete_remote(self, remote):
+ return self.RemoteCls.remove(self, remote)
+
+ #}end interface
+
diff --git a/git/db/pygit2/__init__.py b/git/db/pygit2/__init__.py
new file mode 100644
index 000000000..af005662c
--- /dev/null
+++ b/git/db/pygit2/__init__.py
@@ -0,0 +1,11 @@
+"""Pygit2 module initialization"""
+
+def init_pygit2():
+ """:raise ImportError: if pygit2 is not present"""
+ try:
+ import pygit2
+ except ImportError:
+ raise ImportError("Could not find 'pygit2' in the PYTHONPATH - pygit2 functionality is not available")
+ #END handle pygit2 import
+
+init_pygit2()
diff --git a/git/db/pygit2/complex.py b/git/db/pygit2/complex.py
new file mode 100644
index 000000000..c1563bcaa
--- /dev/null
+++ b/git/db/pygit2/complex.py
@@ -0,0 +1,93 @@
+
+__all__ = ['Pygit2GitODB', 'Pygit2GitDB', 'Pygit2CompatibilityGitDB']
+
+from git.db.py.complex import PureGitODB
+from git.db.py.base import (
+ PureRepositoryPathsMixin,
+ PureConfigurationMixin,
+ PureIndexDB,
+ )
+from git.db.py.resolve import PureReferencesMixin
+from git.db.py.transport import PureTransportDB
+from git.db.py.submodule import PureSubmoduleDB
+
+from git.db.cmd.complex import CmdHighLevelRepository, GitCommandMixin
+from git.db.compat import RepoCompatibilityInterface
+
+from pygit2 import Repository as Pygit2Repo
+
+from git.base import OInfo, OStream
+from git.fun import type_id_to_type_map, type_to_type_id_map
+from git.util import hex_to_bin
+
+from cStringIO import StringIO
+import os
+
+
+class Pygit2GitODB(PureGitODB):
+ """A full fledged database to read and write object files from all kinds of sources."""
+
+ def __init__(self, objects_root):
+ """Initalize this instance"""
+ PureGitODB.__init__(self, objects_root)
+ if hasattr(self, 'git_dir'):
+ wd = self.git_dir
+ else:
+ wd = os.path.dirname(objects_root)
+ #END try to figure out good entry for pygit2 - it needs the .gitdir
+ print objects_root
+ print wd
+ self._py2_repo = Pygit2Repo(wd)
+
+ def __getattr__(self, attr):
+ try:
+ # supply LazyMixin with this call first
+ return super(Pygit2GitODB, self).__getattr__(attr)
+ except AttributeError:
+ # now assume its on the pygit2 repository ... for now
+ return getattr(self._py2_repo, attr)
+ #END handle attr
+
+ #{ Object DBR
+
+ def info(self, binsha):
+ type_id, uncomp_data = self._py2_repo.read(binsha)
+ return OInfo(binsha, type_id_to_type_map[type_id], len(uncomp_data))
+
+ def stream(self, binsha):
+ type_id, uncomp_data = self._py2_repo.read(binsha)
+ return OStream(binsha, type_id_to_type_map[type_id], len(uncomp_data), StringIO(uncomp_data))
+
+ # #}END object dbr
+ #
+ # #{ Object DBW
+ def store(self, istream):
+ # TODO: remove this check once the required functionality was merged in pygit2
+ if hasattr(self._py2_repo, 'write'):
+ istream.binsha = hex_to_bin(self._py2_repo.write(type_to_type_id_map[istream.type], istream.read()))
+ return istream
+ else:
+ return super(Pygit2GitODB, self).store(istream)
+ #END handle write support
+
+ #}END object dbw
+
+class Pygit2GitDB( PureRepositoryPathsMixin, PureConfigurationMixin,
+ PureReferencesMixin, PureSubmoduleDB,
+ PureIndexDB,
+ PureTransportDB, # not fully implemented
+ GitCommandMixin,
+ CmdHighLevelRepository,
+ Pygit2GitODB): # must come last, as it doesn't pass on __init__ with super
+
+
+ def __init__(self, root_path):
+ """Initialize ourselves on the .git directory, or the .git/objects directory."""
+ PureRepositoryPathsMixin._initialize(self, root_path)
+ super(Pygit2GitDB, self).__init__(self.objects_dir)
+
+
+class Pygit2CompatibilityGitDB(RepoCompatibilityInterface, Pygit2GitDB):
+ """Basic pygit2 compatibility database"""
+ pass
+
diff --git a/git/diff.py b/git/diff.py
index 7b3bf6b59..d1c6c0ac3 100644
--- a/git/diff.py
+++ b/git/diff.py
@@ -9,7 +9,7 @@
from objects.util import mode_str_to_int
from exc import GitCommandError
-from gitdb.util import hex_to_bin
+from git.util import hex_to_bin
__all__ = ('Diffable', 'DiffIndex', 'Diff')
diff --git a/git/exc.py b/git/exc.py
index d2cb8d7ea..e14fb7f1c 100644
--- a/git/exc.py
+++ b/git/exc.py
@@ -5,17 +5,53 @@
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
""" Module containing all exceptions thrown througout the git package, """
-from gitdb.exc import *
+from util import to_hex_sha
-class InvalidGitRepositoryError(Exception):
+class GitPythonError(Exception):
+ """Base exception for all git-python related errors"""
+
+class ODBError(GitPythonError):
+ """All errors thrown by the object database"""
+
+
+class InvalidDBRoot(ODBError):
+ """Thrown if an object database cannot be initialized at the given path"""
+
+
+class BadObject(ODBError):
+ """The object with the given SHA does not exist. Instantiate with the
+ failed sha"""
+
+ def __str__(self):
+ return "BadObject: %s" % to_hex_sha(self.args[0])
+
+
+class ParseError(ODBError):
+ """Thrown if the parsing of a file failed due to an invalid format"""
+
+
+class AmbiguousObjectName(ODBError):
+ """Thrown if a possibly shortened name does not uniquely represent a single object
+ in the database"""
+
+
+class BadObjectType(ODBError):
+ """The object had an unsupported type"""
+
+
+class UnsupportedOperation(ODBError):
+ """Thrown if the given operation cannot be supported by the object database"""
+
+
+class InvalidGitRepositoryError(InvalidDBRoot):
""" Thrown if the given repository appears to have an invalid format. """
-class NoSuchPathError(OSError):
+class NoSuchPathError(InvalidDBRoot):
""" Thrown if a path could not be access by the system. """
-class GitCommandError(Exception):
+class GitCommandError(GitPythonError):
""" Thrown if execution of the git command fails with non-zero status code. """
def __init__(self, command, status, stderr=None):
self.stderr = stderr
@@ -27,7 +63,7 @@ def __str__(self):
(' '.join(str(i) for i in self.command), self.status, self.stderr))
-class CheckoutError( Exception ):
+class CheckoutError(GitPythonError):
"""Thrown if a file could not be checked out from the index as it contained
changes.
@@ -50,9 +86,10 @@ def __str__(self):
return Exception.__str__(self) + ":%s" % self.failed_files
-class CacheError(Exception):
+class CacheError(GitPythonError):
"""Base for all errors related to the git index, which is called cache internally"""
+
class UnmergedEntriesError(CacheError):
"""Thrown if an operation cannot proceed as there are still unmerged
entries in the cache"""
diff --git a/git/ext/async b/git/ext/async
new file mode 160000
index 000000000..10310824c
--- /dev/null
+++ b/git/ext/async
@@ -0,0 +1 @@
+Subproject commit 10310824c001deab8fea85b88ebda0696f964b3e
diff --git a/git/ext/gitdb b/git/ext/gitdb
deleted file mode 160000
index 17d9d1395..000000000
--- a/git/ext/gitdb
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 17d9d1395fb6d18d553e085150138463b5827a2f
diff --git a/git/ext/smmap b/git/ext/smmap
new file mode 160000
index 000000000..cf297b7b8
--- /dev/null
+++ b/git/ext/smmap
@@ -0,0 +1 @@
+Subproject commit cf297b7b81bc5f6011c49d818d776ed7915fa1ee
diff --git a/git/fun.py b/git/fun.py
new file mode 100644
index 000000000..5bbe8efc3
--- /dev/null
+++ b/git/fun.py
@@ -0,0 +1,674 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Contains basic c-functions which usually contain performance critical code
+Keeping this code separate from the beginning makes it easier to out-source
+it into c later, if required"""
+
+from exc import (
+ BadObjectType
+ )
+
+from util import zlib
+decompressobj = zlib.decompressobj
+
+import mmap
+from itertools import islice, izip
+
+from cStringIO import StringIO
+
+# INVARIANTS
+OFS_DELTA = 6
+REF_DELTA = 7
+delta_types = (OFS_DELTA, REF_DELTA)
+
+type_id_to_type_map = {
+ 0 : "", # EXT 1
+ 1 : "commit",
+ 2 : "tree",
+ 3 : "blob",
+ 4 : "tag",
+ 5 : "", # EXT 2
+ OFS_DELTA : "OFS_DELTA", # OFFSET DELTA
+ REF_DELTA : "REF_DELTA" # REFERENCE DELTA
+ }
+
+type_to_type_id_map = dict(
+ commit=1,
+ tree=2,
+ blob=3,
+ tag=4,
+ OFS_DELTA=OFS_DELTA,
+ REF_DELTA=REF_DELTA
+ )
+
+# used when dealing with larger streams
+chunk_size = 1000*mmap.PAGESIZE
+
+__all__ = ('is_loose_object', 'loose_object_header_info', 'msb_size', 'pack_object_header_info',
+ 'write_object', 'loose_object_header', 'stream_copy', 'apply_delta_data',
+ 'is_equal_canonical_sha', 'connect_deltas', 'DeltaChunkList', 'create_pack_object_header')
+
+
+#{ Structures
+
+def _set_delta_rbound(d, size):
+ """Truncate the given delta to the given size
+ :param size: size relative to our target offset, may not be 0, must be smaller or equal
+ to our size
+ :return: d"""
+ d.ts = size
+
+ # NOTE: data is truncated automatically when applying the delta
+ # MUST NOT DO THIS HERE
+ return d
+
+def _move_delta_lbound(d, bytes):
+ """Move the delta by the given amount of bytes, reducing its size so that its
+ right bound stays static
+ :param bytes: amount of bytes to move, must be smaller than delta size
+ :return: d"""
+ if bytes == 0:
+ return
+
+ d.to += bytes
+ d.so += bytes
+ d.ts -= bytes
+ if d.data is not None:
+ d.data = d.data[bytes:]
+ # END handle data
+
+ return d
+
+def delta_duplicate(src):
+ return DeltaChunk(src.to, src.ts, src.so, src.data)
+
+def delta_chunk_apply(dc, bbuf, write):
+ """Apply own data to the target buffer
+ :param bbuf: buffer providing source bytes for copy operations
+ :param write: write method to call with data to write"""
+ if dc.data is None:
+ # COPY DATA FROM SOURCE
+ write(buffer(bbuf, dc.so, dc.ts))
+ else:
+ # APPEND DATA
+ # whats faster: if + 4 function calls or just a write with a slice ?
+ # Considering data can be larger than 127 bytes now, it should be worth it
+ if dc.ts < len(dc.data):
+ write(dc.data[:dc.ts])
+ else:
+ write(dc.data)
+ # END handle truncation
+ # END handle chunk mode
+
+
+class DeltaChunk(object):
+ """Represents a piece of a delta, it can either add new data, or copy existing
+ one from a source buffer"""
+ __slots__ = (
+ 'to', # start offset in the target buffer in bytes
+ 'ts', # size of this chunk in the target buffer in bytes
+ 'so', # start offset in the source buffer in bytes or None
+ 'data', # chunk of bytes to be added to the target buffer,
+ # DeltaChunkList to use as base, or None
+ )
+
+ def __init__(self, to, ts, so, data):
+ self.to = to
+ self.ts = ts
+ self.so = so
+ self.data = data
+
+ def __repr__(self):
+ return "DeltaChunk(%i, %i, %s, %s)" % (self.to, self.ts, self.so, self.data or "")
+
+ #{ Interface
+
+ def rbound(self):
+ return self.to + self.ts
+
+ def has_data(self):
+ """:return: True if the instance has data to add to the target stream"""
+ return self.data is not None
+
+ #} END interface
+
+def _closest_index(dcl, absofs):
+ """:return: index at which the given absofs should be inserted. The index points
+ to the DeltaChunk with a target buffer absofs that equals or is greater than
+ absofs.
+ :note: global method for performance only, it belongs to DeltaChunkList"""
+ lo = 0
+ hi = len(dcl)
+ while lo < hi:
+ mid = (lo + hi) / 2
+ dc = dcl[mid]
+ if dc.to > absofs:
+ hi = mid
+ elif dc.rbound() > absofs or dc.to == absofs:
+ return mid
+ else:
+ lo = mid + 1
+ # END handle bound
+ # END for each delta absofs
+ return len(dcl)-1
+
+def delta_list_apply(dcl, bbuf, write):
+ """Apply the chain's changes and write the final result using the passed
+ write function.
+ :param bbuf: base buffer containing the base of all deltas contained in this
+ list. It will only be used if the chunk in question does not have a base
+ chain.
+ :param write: function taking a string of bytes to write to the output"""
+ for dc in dcl:
+ delta_chunk_apply(dc, bbuf, write)
+ # END for each dc
+
+def delta_list_slice(dcl, absofs, size, ndcl):
+ """:return: Subsection of this list at the given absolute offset, with the given
+ size in bytes.
+ :return: None"""
+ cdi = _closest_index(dcl, absofs) # delta start index
+ cd = dcl[cdi]
+ slen = len(dcl)
+ lappend = ndcl.append
+
+ if cd.to != absofs:
+ tcd = DeltaChunk(cd.to, cd.ts, cd.so, cd.data)
+ _move_delta_lbound(tcd, absofs - cd.to)
+ tcd.ts = min(tcd.ts, size)
+ lappend(tcd)
+ size -= tcd.ts
+ cdi += 1
+ # END lbound overlap handling
+
+ while cdi < slen and size:
+ # are we larger than the current block
+ cd = dcl[cdi]
+ if cd.ts <= size:
+ lappend(DeltaChunk(cd.to, cd.ts, cd.so, cd.data))
+ size -= cd.ts
+ else:
+ tcd = DeltaChunk(cd.to, cd.ts, cd.so, cd.data)
+ tcd.ts = size
+ lappend(tcd)
+ size -= tcd.ts
+ break
+ # END hadle size
+ cdi += 1
+ # END for each chunk
+
+
+class DeltaChunkList(list):
+ """List with special functionality to deal with DeltaChunks.
+ There are two types of lists we represent. The one was created bottom-up, working
+ towards the latest delta, the other kind was created top-down, working from the
+ latest delta down to the earliest ancestor. This attribute is queryable
+ after all processing with is_reversed."""
+
+ __slots__ = tuple()
+
+ def rbound(self):
+ """:return: rightmost extend in bytes, absolute"""
+ if len(self) == 0:
+ return 0
+ return self[-1].rbound()
+
+ def lbound(self):
+ """:return: leftmost byte at which this chunklist starts"""
+ if len(self) == 0:
+ return 0
+ return self[0].to
+
+ def size(self):
+ """:return: size of bytes as measured by our delta chunks"""
+ return self.rbound() - self.lbound()
+
+ def apply(self, bbuf, write):
+ """Only used by public clients, internally we only use the global routines
+ for performance"""
+ return delta_list_apply(self, bbuf, write)
+
+ def compress(self):
+ """Alter the list to reduce the amount of nodes. Currently we concatenate
+ add-chunks
+ :return: self"""
+ slen = len(self)
+ if slen < 2:
+ return self
+ i = 0
+ slen_orig = slen
+
+ first_data_index = None
+ while i < slen:
+ dc = self[i]
+ i += 1
+ if dc.data is None:
+ if first_data_index is not None and i-2-first_data_index > 1:
+ #if first_data_index is not None:
+ nd = StringIO() # new data
+ so = self[first_data_index].to # start offset in target buffer
+ for x in xrange(first_data_index, i-1):
+ xdc = self[x]
+ nd.write(xdc.data[:xdc.ts])
+ # END collect data
+
+ del(self[first_data_index:i-1])
+ buf = nd.getvalue()
+ self.insert(first_data_index, DeltaChunk(so, len(buf), 0, buf))
+
+ slen = len(self)
+ i = first_data_index + 1
+
+ # END concatenate data
+ first_data_index = None
+ continue
+ # END skip non-data chunks
+
+ if first_data_index is None:
+ first_data_index = i-1
+ # END iterate list
+
+ #if slen_orig != len(self):
+ # print "INFO: Reduced delta list len to %f %% of former size" % ((float(len(self)) / slen_orig) * 100)
+ return self
+
+ def check_integrity(self, target_size=-1):
+ """Verify the list has non-overlapping chunks only, and the total size matches
+ target_size
+ :param target_size: if not -1, the total size of the chain must be target_size
+ :raise AssertionError: if the size doen't match"""
+ if target_size > -1:
+ assert self[-1].rbound() == target_size
+ assert reduce(lambda x,y: x+y, (d.ts for d in self), 0) == target_size
+ # END target size verification
+
+ if len(self) < 2:
+ return
+
+ # check data
+ for dc in self:
+ assert dc.ts > 0
+ if dc.has_data():
+ assert len(dc.data) >= dc.ts
+ # END for each dc
+
+ left = islice(self, 0, len(self)-1)
+ right = iter(self)
+ right.next()
+ # this is very pythonic - we might have just use index based access here,
+ # but this could actually be faster
+ for lft,rgt in izip(left, right):
+ assert lft.rbound() == rgt.to
+ assert lft.to + lft.ts == rgt.to
+ # END for each pair
+
+
+class TopdownDeltaChunkList(DeltaChunkList):
+ """Represents a list which is generated by feeding its ancestor streams one by
+ one"""
+ __slots__ = tuple()
+
+ def connect_with_next_base(self, bdcl):
+ """Connect this chain with the next level of our base delta chunklist.
+ The goal in this game is to mark as many of our chunks rigid, hence they
+ cannot be changed by any of the upcoming bases anymore. Once all our
+ chunks are marked like that, we can stop all processing
+ :param bdcl: data chunk list being one of our bases. They must be fed in
+ consequtively and in order, towards the earliest ancestor delta
+ :return: True if processing was done. Use it to abort processing of
+ remaining streams if False is returned"""
+ nfc = 0 # number of frozen chunks
+ dci = 0 # delta chunk index
+ slen = len(self) # len of self
+ ccl = list() # temporary list
+ while dci < slen:
+ dc = self[dci]
+ dci += 1
+
+ # all add-chunks which are already topmost don't need additional processing
+ if dc.data is not None:
+ nfc += 1
+ continue
+ # END skip add chunks
+
+ # copy chunks
+ # integrate the portion of the base list into ourselves. Lists
+ # dont support efficient insertion ( just one at a time ), but for now
+ # we live with it. Internally, its all just a 32/64bit pointer, and
+ # the portions of moved memory should be smallish. Maybe we just rebuild
+ # ourselves in order to reduce the amount of insertions ...
+ del(ccl[:])
+ delta_list_slice(bdcl, dc.so, dc.ts, ccl)
+
+ # move the target bounds into place to match with our chunk
+ ofs = dc.to - dc.so
+ for cdc in ccl:
+ cdc.to += ofs
+ # END update target bounds
+
+ if len(ccl) == 1:
+ self[dci-1] = ccl[0]
+ else:
+ # maybe try to compute the expenses here, and pick the right algorithm
+ # It would normally be faster than copying everything physically though
+ # TODO: Use a deque here, and decide by the index whether to extend
+ # or extend left !
+ post_dci = self[dci:]
+ del(self[dci-1:]) # include deletion of dc
+ self.extend(ccl)
+ self.extend(post_dci)
+
+ slen = len(self)
+ dci += len(ccl)-1 # deleted dc, added rest
+
+ # END handle chunk replacement
+ # END for each chunk
+
+ if nfc == slen:
+ return False
+ # END handle completeness
+ return True
+
+
+#} END structures
+
+#{ Routines
+
+def is_loose_object(m):
+ """
+ :return: True the file contained in memory map m appears to be a loose object.
+ Only the first two bytes are needed"""
+ b0, b1 = map(ord, m[:2])
+ word = (b0 << 8) + b1
+ return b0 == 0x78 and (word % 31) == 0
+
+def loose_object_header_info(m):
+ """
+ :return: tuple(type_string, uncompressed_size_in_bytes) the type string of the
+ object as well as its uncompressed size in bytes.
+ :param m: memory map from which to read the compressed object data"""
+ decompress_size = 8192 # is used in cgit as well
+ hdr = decompressobj().decompress(m, decompress_size)
+ type_name, size = hdr[:hdr.find("\0")].split(" ")
+ return type_name, int(size)
+
+def pack_object_header_info(data):
+ """
+ :return: tuple(type_id, uncompressed_size_in_bytes, byte_offset)
+ The type_id should be interpreted according to the ``type_id_to_type_map`` map
+ The byte-offset specifies the start of the actual zlib compressed datastream
+ :param m: random-access memory, like a string or memory map"""
+ c = ord(data[0]) # first byte
+ i = 1 # next char to read
+ type_id = (c >> 4) & 7 # numeric type
+ size = c & 15 # starting size
+ s = 4 # starting bit-shift size
+ while c & 0x80:
+ c = ord(data[i])
+ i += 1
+ size += (c & 0x7f) << s
+ s += 7
+ # END character loop
+ return (type_id, size, i)
+
+def create_pack_object_header(obj_type, obj_size):
+ """:return: string defining the pack header comprised of the object type
+ and its incompressed size in bytes
+ :parmam obj_type: pack type_id of the object
+ :param obj_size: uncompressed size in bytes of the following object stream"""
+ c = 0 # 1 byte
+ hdr = str() # output string
+
+ c = (obj_type << 4) | (obj_size & 0xf)
+ obj_size >>= 4
+ while obj_size:
+ hdr += chr(c | 0x80)
+ c = obj_size & 0x7f
+ obj_size >>= 7
+ #END until size is consumed
+ hdr += chr(c)
+ return hdr
+
+def msb_size(data, offset=0):
+ """
+ :return: tuple(read_bytes, size) read the msb size from the given random
+ access data starting at the given byte offset"""
+ size = 0
+ i = 0
+ l = len(data)
+ hit_msb = False
+ while i < l:
+ c = ord(data[i+offset])
+ size |= (c & 0x7f) << i*7
+ i += 1
+ if not c & 0x80:
+ hit_msb = True
+ break
+ # END check msb bit
+ # END while in range
+ if not hit_msb:
+ raise AssertionError("Could not find terminating MSB byte in data stream")
+ return i+offset, size
+
+def loose_object_header(type, size):
+ """
+ :return: string representing the loose object header, which is immediately
+ followed by the content stream of size 'size'"""
+ return "%s %i\0" % (type, size)
+
+def write_object(type, size, read, write, chunk_size=chunk_size):
+ """
+ Write the object as identified by type, size and source_stream into the
+ target_stream
+
+ :param type: type string of the object
+ :param size: amount of bytes to write from source_stream
+ :param read: read method of a stream providing the content data
+ :param write: write method of the output stream
+ :param close_target_stream: if True, the target stream will be closed when
+ the routine exits, even if an error is thrown
+ :return: The actual amount of bytes written to stream, which includes the header and a trailing newline"""
+ tbw = 0 # total num bytes written
+
+ # WRITE HEADER: type SP size NULL
+ tbw += write(loose_object_header(type, size))
+ tbw += stream_copy(read, write, size, chunk_size)
+
+ return tbw
+
+def stream_copy(read, write, size, chunk_size):
+ """
+ Copy a stream up to size bytes using the provided read and write methods,
+ in chunks of chunk_size
+
+ :note: its much like stream_copy utility, but operates just using methods"""
+ dbw = 0 # num data bytes written
+
+ # WRITE ALL DATA UP TO SIZE
+ while True:
+ cs = min(chunk_size, size-dbw)
+ # NOTE: not all write methods return the amount of written bytes, like
+ # mmap.write. Its bad, but we just deal with it ... perhaps its not
+ # even less efficient
+ # data_len = write(read(cs))
+ # dbw += data_len
+ data = read(cs)
+ data_len = len(data)
+ dbw += data_len
+ write(data)
+ if data_len < cs or dbw == size:
+ break
+ # END check for stream end
+ # END duplicate data
+ return dbw
+
+def connect_deltas(dstreams):
+ """
+ Read the condensed delta chunk information from dstream and merge its information
+ into a list of existing delta chunks
+
+ :param dstreams: iterable of delta stream objects, the delta to be applied last
+ comes first, then all its ancestors in order
+ :return: DeltaChunkList, containing all operations to apply"""
+ tdcl = None # topmost dcl
+
+ dcl = tdcl = TopdownDeltaChunkList()
+ for dsi, ds in enumerate(dstreams):
+ # print "Stream", dsi
+ db = ds.read()
+ delta_buf_size = ds.size
+
+ # read header
+ i, base_size = msb_size(db)
+ i, target_size = msb_size(db, i)
+
+ # interpret opcodes
+ tbw = 0 # amount of target bytes written
+ while i < delta_buf_size:
+ c = ord(db[i])
+ i += 1
+ if c & 0x80:
+ cp_off, cp_size = 0, 0
+ if (c & 0x01):
+ cp_off = ord(db[i])
+ i += 1
+ if (c & 0x02):
+ cp_off |= (ord(db[i]) << 8)
+ i += 1
+ if (c & 0x04):
+ cp_off |= (ord(db[i]) << 16)
+ i += 1
+ if (c & 0x08):
+ cp_off |= (ord(db[i]) << 24)
+ i += 1
+ if (c & 0x10):
+ cp_size = ord(db[i])
+ i += 1
+ if (c & 0x20):
+ cp_size |= (ord(db[i]) << 8)
+ i += 1
+ if (c & 0x40):
+ cp_size |= (ord(db[i]) << 16)
+ i += 1
+
+ if not cp_size:
+ cp_size = 0x10000
+
+ rbound = cp_off + cp_size
+ if (rbound < cp_size or
+ rbound > base_size):
+ break
+
+ dcl.append(DeltaChunk(tbw, cp_size, cp_off, None))
+ tbw += cp_size
+ elif c:
+ # NOTE: in C, the data chunks should probably be concatenated here.
+ # In python, we do it as a post-process
+ dcl.append(DeltaChunk(tbw, c, 0, db[i:i+c]))
+ i += c
+ tbw += c
+ else:
+ raise ValueError("unexpected delta opcode 0")
+ # END handle command byte
+ # END while processing delta data
+
+ dcl.compress()
+
+ # merge the lists !
+ if dsi > 0:
+ if not tdcl.connect_with_next_base(dcl):
+ break
+ # END handle merge
+
+ # prepare next base
+ dcl = DeltaChunkList()
+ # END for each delta stream
+
+ return tdcl
+
+def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, write):
+ """
+ Apply data from a delta buffer using a source buffer to the target file
+
+ :param src_buf: random access data from which the delta was created
+ :param src_buf_size: size of the source buffer in bytes
+ :param delta_buf_size: size fo the delta buffer in bytes
+ :param delta_buf: random access delta data
+ :param write: write method taking a chunk of bytes
+ :note: transcribed to python from the similar routine in patch-delta.c"""
+ i = 0
+ db = delta_buf
+ while i < delta_buf_size:
+ c = ord(db[i])
+ i += 1
+ if c & 0x80:
+ cp_off, cp_size = 0, 0
+ if (c & 0x01):
+ cp_off = ord(db[i])
+ i += 1
+ if (c & 0x02):
+ cp_off |= (ord(db[i]) << 8)
+ i += 1
+ if (c & 0x04):
+ cp_off |= (ord(db[i]) << 16)
+ i += 1
+ if (c & 0x08):
+ cp_off |= (ord(db[i]) << 24)
+ i += 1
+ if (c & 0x10):
+ cp_size = ord(db[i])
+ i += 1
+ if (c & 0x20):
+ cp_size |= (ord(db[i]) << 8)
+ i += 1
+ if (c & 0x40):
+ cp_size |= (ord(db[i]) << 16)
+ i += 1
+
+ if not cp_size:
+ cp_size = 0x10000
+
+ rbound = cp_off + cp_size
+ if (rbound < cp_size or
+ rbound > src_buf_size):
+ break
+ write(buffer(src_buf, cp_off, cp_size))
+ elif c:
+ write(db[i:i+c])
+ i += c
+ else:
+ raise ValueError("unexpected delta opcode 0")
+ # END handle command byte
+ # END while processing delta data
+
+ # yes, lets use the exact same error message that git uses :)
+ assert i == delta_buf_size, "delta replay has gone wild"
+
+
+def is_equal_canonical_sha(canonical_length, match, sha1):
+ """
+ :return: True if the given lhs and rhs 20 byte binary shas
+ The comparison will take the canonical_length of the match sha into account,
+ hence the comparison will only use the last 4 bytes for uneven canonical representations
+ :param match: less than 20 byte sha
+ :param sha1: 20 byte sha"""
+ binary_length = canonical_length/2
+ if match[:binary_length] != sha1[:binary_length]:
+ return False
+
+ if canonical_length - binary_length and \
+ (ord(match[-1]) ^ ord(sha1[len(match)-1])) & 0xf0:
+ return False
+ # END handle uneven canonnical length
+ return True
+
+#} END routines
+
+
+try:
+ # raise ImportError; # DEBUG
+ from _perf import connect_deltas
+except ImportError:
+ pass
diff --git a/git/index/base.py b/git/index/base.py
index 88410e20d..12097922d 100644
--- a/git/index/base.py
+++ b/git/index/base.py
@@ -62,9 +62,8 @@
S_IFGITLINK
)
-from gitdb.base import IStream
-from gitdb.db import MemoryDB
-from gitdb.util import to_bin_sha
+from git.base import IStream
+from git.util import to_bin_sha
from itertools import izip
__all__ = ( 'IndexFile', 'CheckoutError' )
@@ -512,7 +511,9 @@ def write_tree(self):
:raise UnmergedEntriesError: """
# we obtain no lock as we just flush our contents to disk as tree
# If we are a new index, the entries access will load our data accordingly
- mdb = MemoryDB()
+ # Needs delayed import as db.py import IndexFile as well
+ import git.db.py.mem
+ mdb = git.db.py.mem.PureMemoryDB()
entries = self._entries_sorted()
binsha, tree_items = write_tree_from_cache(entries, mdb, slice(0, len(entries)))
@@ -959,12 +960,16 @@ def handle_stderr(proc, iter_checked_out_files):
if not line.startswith("git checkout-index: ") and not line.startswith("git-checkout-index: "):
is_a_dir = " is a directory"
unlink_issue = "unable to unlink old '"
+ already_exists_issue = ' already exists, no checkout' # created by entry.c:checkout_entry(...)
if line.endswith(is_a_dir):
failed_files.append(line[:-len(is_a_dir)])
failed_reasons.append(is_a_dir)
elif line.startswith(unlink_issue):
failed_files.append(line[len(unlink_issue):line.rfind("'")])
failed_reasons.append(unlink_issue)
+ elif line.endswith(already_exists_issue):
+ failed_files.append(line[:-len(already_exists_issue)])
+ failed_reasons.append(already_exists_issue)
else:
unknown_lines.append(line)
continue
diff --git a/git/index/fun.py b/git/index/fun.py
index 9b35bf04a..e2813c0bb 100644
--- a/git/index/fun.py
+++ b/git/index/fun.py
@@ -36,8 +36,8 @@
unpack
)
-from gitdb.base import IStream
-from gitdb.typ import str_tree_type
+from git.base import IStream
+from git.typ import str_tree_type
__all__ = ('write_cache', 'read_cache', 'write_tree_from_cache', 'entry_key',
'stat_mode_to_index_mode', 'S_IFGITLINK')
diff --git a/git/objects/base.py b/git/objects/base.py
index 5f2f78093..61b3e674e 100644
--- a/git/objects/base.py
+++ b/git/objects/base.py
@@ -3,15 +3,20 @@
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from git.util import LazyMixin, join_path_native, stream_copy
+
from util import get_object_type_by_name
-from gitdb.util import (
+from git.util import (
hex_to_bin,
bin_to_hex,
- basename
+ dirname,
+ basename,
+ LazyMixin,
+ join_path_native,
+ stream_copy
)
-
-import gitdb.typ as dbtyp
+from git.db.interface import RepositoryPathsMixin
+from git.exc import UnsupportedOperation
+from git.typ import ObjectType
_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r"
@@ -22,24 +27,26 @@ class Object(LazyMixin):
NULL_HEX_SHA = '0'*40
NULL_BIN_SHA = '\0'*20
- TYPES = (dbtyp.str_blob_type, dbtyp.str_tree_type, dbtyp.str_commit_type, dbtyp.str_tag_type)
- __slots__ = ("repo", "binsha", "size" )
+ TYPES = (ObjectType.blob, ObjectType.tree, ObjectType.commit, ObjectType.tag)
+ __slots__ = ("odb", "binsha", "size" )
+
type = None # to be set by subclass
+ type_id = None # to be set by subclass
- def __init__(self, repo, binsha):
+ def __init__(self, odb, binsha):
"""Initialize an object by identifying it by its binary sha.
All keyword arguments will be set on demand if None.
- :param repo: repository this object is located in
+ :param odb: repository this object is located in
:param binsha: 20 byte SHA1"""
super(Object,self).__init__()
- self.repo = repo
+ self.odb = odb
self.binsha = binsha
assert len(binsha) == 20, "Require 20 byte binary sha, got %r, len = %i" % (binsha, len(binsha))
@classmethod
- def new(cls, repo, id):
+ def new(cls, odb, id):
"""
:return: New Object instance of a type appropriate to the object type behind
id. The id of the newly created object will be a binsha even though
@@ -49,27 +56,27 @@ def new(cls, repo, id):
:note: This cannot be a __new__ method as it would always call __init__
with the input id which is not necessarily a binsha."""
- return repo.rev_parse(str(id))
+ return odb.rev_parse(str(id))
@classmethod
- def new_from_sha(cls, repo, sha1):
+ def new_from_sha(cls, odb, sha1):
"""
:return: new object instance of a type appropriate to represent the given
binary sha1
:param sha1: 20 byte binary sha1"""
if sha1 == cls.NULL_BIN_SHA:
# the NULL binsha is always the root commit
- return get_object_type_by_name('commit')(repo, sha1)
+ return get_object_type_by_name('commit')(odb, sha1)
#END handle special case
- oinfo = repo.odb.info(sha1)
- inst = get_object_type_by_name(oinfo.type)(repo, oinfo.binsha)
+ oinfo = odb.info(sha1)
+ inst = get_object_type_by_name(oinfo.type)(odb, oinfo.binsha)
inst.size = oinfo.size
return inst
def _set_cache_(self, attr):
"""Retrieve object information"""
if attr == "size":
- oinfo = self.repo.odb.info(self.binsha)
+ oinfo = self.odb.info(self.binsha)
self.size = oinfo.size
# assert oinfo.type == self.type, _assertion_msg_format % (self.binsha, oinfo.type, self.type)
else:
@@ -77,10 +84,14 @@ def _set_cache_(self, attr):
def __eq__(self, other):
""":return: True if the objects have the same SHA1"""
+ if not hasattr(other, 'binsha'):
+ return False
return self.binsha == other.binsha
def __ne__(self, other):
""":return: True if the objects do not have the same SHA1 """
+ if not hasattr(other, 'binsha'):
+ return True
return self.binsha != other.binsha
def __hash__(self):
@@ -104,13 +115,13 @@ def hexsha(self):
def data_stream(self):
""" :return: File Object compatible stream to the uncompressed raw data of the object
:note: returned streams must be read in order"""
- return self.repo.odb.stream(self.binsha)
+ return self.odb.stream(self.binsha)
def stream_data(self, ostream):
"""Writes our data directly to the given output stream
:param ostream: File object compatible stream object.
:return: self"""
- istream = self.repo.odb.stream(self.binsha)
+ istream = self.odb.stream(self.binsha)
stream_copy(istream, ostream)
return self
@@ -123,9 +134,9 @@ class IndexObject(Object):
# for compatability with iterable lists
_id_attribute_ = 'path'
- def __init__(self, repo, binsha, mode=None, path=None):
+ def __init__(self, odb, binsha, mode=None, path=None):
"""Initialize a newly instanced IndexObject
- :param repo: is the Repo we are located in
+ :param odb: is the object database we are located in
:param binsha: 20 byte sha1
:param mode: is the stat compatible file mode as int, use the stat module
to evaluate the infomration
@@ -135,7 +146,7 @@ def __init__(self, repo, binsha, mode=None, path=None):
:note:
Path may not be set of the index object has been created directly as it cannot
be retrieved without knowing the parent tree."""
- super(IndexObject, self).__init__(repo, binsha)
+ super(IndexObject, self).__init__(odb, binsha)
if mode is not None:
self.mode = mode
if path is not None:
@@ -167,6 +178,15 @@ def abspath(self):
Absolute path to this index object in the file system ( as opposed to the
.path field which is a path relative to the git repository ).
- The returned path will be native to the system and contains '\' on windows. """
- return join_path_native(self.repo.working_tree_dir, self.path)
+ The returned path will be native to the system and contains '\' on windows.
+ :raise UnsupportedOperation: if underlying odb does not support the required method to obtain a working dir"""
+ # TODO: Here we suddenly need something better than a plain object database
+ # which indicates our odb should better be named repo !
+ root = ''
+ if isinstance(self.odb, RepositoryPathsMixin):
+ root = self.odb.working_tree_dir
+ else:
+ raise UnsupportedOperation("Cannot provide absolute path from a database without Repository path support")
+ #END handle odb type
+ return join_path_native(root, self.path)
diff --git a/git/objects/blob.py b/git/objects/blob.py
index f52d1a531..9c51f99f3 100644
--- a/git/objects/blob.py
+++ b/git/objects/blob.py
@@ -4,15 +4,19 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+from git.util import RepoAliasMixin
from mimetypes import guess_type
+from git.typ import ObjectType
+
import base
__all__ = ('Blob', )
-class Blob(base.IndexObject):
+class Blob(base.IndexObject, RepoAliasMixin):
"""A Blob encapsulates a git blob object"""
DEFAULT_MIME_TYPE = "text/plain"
- type = "blob"
+ type = ObjectType.blob
+ type_id = ObjectType.blob_id
# valid blob modes
executable_mode = 0100755
diff --git a/git/objects/commit.py b/git/objects/commit.py
index fd4187b08..c32bbf1a0 100644
--- a/git/objects/commit.py
+++ b/git/objects/commit.py
@@ -3,42 +3,45 @@
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+import base
-from git.util import (
- Actor,
- Iterable,
- Stats,
- )
-from git.diff import Diffable
+from git.typ import ObjectType
from tree import Tree
-from gitdb import IStream
from cStringIO import StringIO
-import base
-from gitdb.util import (
- hex_to_bin
+from git.util import (
+ hex_to_bin,
+ Actor,
+ RepoAliasMixin,
+ Iterable,
+ Actor,
+ Stats
)
+
from util import (
- Traversable,
- Serializable,
- parse_date,
- altz_to_utctz_str,
- parse_actor_and_date
- )
-from time import (
- time,
- altzone
+ Traversable,
+ Serializable,
+ altz_to_utctz_str,
+ parse_actor_and_date
)
+from git.diff import Diffable
+from git.base import IStream
+from cStringIO import StringIO
+
+from util import parse_date
+from time import altzone, time
+
import os
import sys
__all__ = ('Commit', )
-class Commit(base.Object, Iterable, Diffable, Traversable, Serializable):
+class Commit(Diffable, Iterable, RepoAliasMixin, base.Object, Traversable, Serializable):
"""Wraps a git Commit object.
This class will act lazily on some of its attributes and will query the
value on demand only if it involves calling the git binary."""
+ __slots__ = tuple()
# ENVIRONMENT VARIABLES
# read when creating new commits
@@ -53,92 +56,16 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable):
# object configuration
- type = "commit"
+ type = ObjectType.commit
+ type_id = ObjectType.commit_id
+
__slots__ = ("tree",
"author", "authored_date", "author_tz_offset",
"committer", "committed_date", "committer_tz_offset",
"message", "parents", "encoding")
_id_attribute_ = "binsha"
- def __init__(self, repo, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None,
- committer=None, committed_date=None, committer_tz_offset=None,
- message=None, parents=None, encoding=None):
- """Instantiate a new Commit. All keyword arguments taking None as default will
- be implicitly set on first query.
-
- :param binsha: 20 byte sha1
- :param parents: tuple( Commit, ... )
- is a tuple of commit ids or actual Commits
- :param tree: Tree
- Tree object
- :param author: Actor
- is the author string ( will be implicitly converted into an Actor object )
- :param authored_date: int_seconds_since_epoch
- is the authored DateTime - use time.gmtime() to convert it into a
- different format
- :param author_tz_offset: int_seconds_west_of_utc
- is the timezone that the authored_date is in
- :param committer: Actor
- is the committer string
- :param committed_date: int_seconds_since_epoch
- is the committed DateTime - use time.gmtime() to convert it into a
- different format
- :param committer_tz_offset: int_seconds_west_of_utc
- is the timezone that the authored_date is in
- :param message: string
- is the commit message
- :param encoding: string
- encoding of the message, defaults to UTF-8
- :param parents:
- List or tuple of Commit objects which are our parent(s) in the commit
- dependency graph
- :return: git.Commit
-
- :note: Timezone information is in the same format and in the same sign
- as what time.altzone returns. The sign is inverted compared to git's
- UTC timezone."""
- super(Commit,self).__init__(repo, binsha)
- if tree is not None:
- assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree)
- if tree is not None:
- self.tree = tree
- if author is not None:
- self.author = author
- if authored_date is not None:
- self.authored_date = authored_date
- if author_tz_offset is not None:
- self.author_tz_offset = author_tz_offset
- if committer is not None:
- self.committer = committer
- if committed_date is not None:
- self.committed_date = committed_date
- if committer_tz_offset is not None:
- self.committer_tz_offset = committer_tz_offset
- if message is not None:
- self.message = message
- if parents is not None:
- self.parents = parents
- if encoding is not None:
- self.encoding = encoding
-
- @classmethod
- def _get_intermediate_items(cls, commit):
- return commit.parents
-
- def _set_cache_(self, attr):
- if attr in Commit.__slots__:
- # read the data in a chunk, its faster - then provide a file wrapper
- binsha, typename, self.size, stream = self.repo.odb.stream(self.binsha)
- self._deserialize(StringIO(stream.read()))
- else:
- super(Commit, self)._set_cache_(attr)
- # END handle attrs
-
- @property
- def summary(self):
- """:return: First line of the commit message"""
- return self.message.split('\n', 1)[0]
-
+
def count(self, paths='', **kwargs):
"""Count the number of commits reachable from this commit
@@ -225,33 +152,6 @@ def stats(self):
text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, '--', numstat=True)
return Stats._list_from_string(self.repo, text)
- @classmethod
- def _iter_from_process_or_stream(cls, repo, proc_or_stream):
- """Parse out commit information into a list of Commit objects
- We expect one-line per commit, and parse the actual commit information directly
- from our lighting fast object database
-
- :param proc: git-rev-list process instance - one sha per line
- :return: iterator returning Commit objects"""
- stream = proc_or_stream
- if not hasattr(stream,'readline'):
- stream = proc_or_stream.stdout
-
- readline = stream.readline
- while True:
- line = readline()
- if not line:
- break
- hexsha = line.strip()
- if len(hexsha) > 40:
- # split additional information, as returned by bisect for instance
- hexsha, rest = line.split(None, 1)
- # END handle extra info
-
- assert len(hexsha) == 40, "Invalid line: %s" % hexsha
- yield Commit(repo, hex_to_bin(hexsha))
- # END for each line in stream
-
@classmethod
def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False):
@@ -361,6 +261,112 @@ def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False):
# END advance head handling
return new_commit
+
+ def __init__(self, odb, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None,
+ committer=None, committed_date=None, committer_tz_offset=None,
+ message=None, parents=None, encoding=None):
+ """Instantiate a new Commit. All keyword arguments taking None as default will
+ be implicitly set on first query.
+
+ :param binsha: 20 byte sha1
+ :param parents: tuple( Commit, ... )
+ is a tuple of commit ids or actual Commits
+ :param tree: Tree
+ Tree object
+ :param author: Actor
+ is the author string ( will be implicitly converted into an Actor object )
+ :param authored_date: int_seconds_since_epoch
+ is the authored DateTime - use time.gmtime() to convert it into a
+ different format
+ :param author_tz_offset: int_seconds_west_of_utc
+ is the timezone that the authored_date is in
+ :param committer: Actor
+ is the committer string
+ :param committed_date: int_seconds_since_epoch
+ is the committed DateTime - use time.gmtime() to convert it into a
+ different format
+ :param committer_tz_offset: int_seconds_west_of_utc
+ is the timezone that the authored_date is in
+ :param message: string
+ is the commit message
+ :param encoding: string
+ encoding of the message, defaults to UTF-8
+ :param parents:
+ List or tuple of Commit objects which are our parent(s) in the commit
+ dependency graph
+ :return: git.Commit
+
+ :note: Timezone information is in the same format and in the same sign
+ as what time.altzone returns. The sign is inverted compared to git's
+ UTC timezone."""
+ super(Commit,self).__init__(odb, binsha)
+ if tree is not None:
+ assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree)
+ if tree is not None:
+ self.tree = tree
+ if author is not None:
+ self.author = author
+ if authored_date is not None:
+ self.authored_date = authored_date
+ if author_tz_offset is not None:
+ self.author_tz_offset = author_tz_offset
+ if committer is not None:
+ self.committer = committer
+ if committed_date is not None:
+ self.committed_date = committed_date
+ if committer_tz_offset is not None:
+ self.committer_tz_offset = committer_tz_offset
+ if message is not None:
+ self.message = message
+ if parents is not None:
+ self.parents = parents
+ if encoding is not None:
+ self.encoding = encoding
+
+ @classmethod
+ def _get_intermediate_items(cls, commit):
+ return commit.parents
+
+ def _set_cache_(self, attr):
+ if attr in Commit.__slots__:
+ # read the data in a chunk, its faster - then provide a file wrapper
+ binsha, typename, self.size, stream = self.odb.stream(self.binsha)
+ self._deserialize(StringIO(stream.read()))
+ else:
+ super(Commit, self)._set_cache_(attr)
+ # END handle attrs
+
+ @property
+ def summary(self):
+ """:return: First line of the commit message"""
+ return self.message.split('\n', 1)[0]
+
+ @classmethod
+ def _iter_from_process_or_stream(cls, odb, proc_or_stream):
+ """Parse out commit information into a list of Commit objects
+ We expect one-line per commit, and parse the actual commit information directly
+ from our lighting fast object database
+
+ :param proc: git-rev-list process instance - one sha per line
+ :return: iterator returning Commit objects"""
+ stream = proc_or_stream
+ if not hasattr(stream,'readline'):
+ stream = proc_or_stream.stdout
+
+ readline = stream.readline
+ while True:
+ line = readline()
+ if not line:
+ break
+ hexsha = line.strip()
+ if len(hexsha) > 40:
+ # split additional information, as returned by bisect for instance
+ hexsha, rest = line.split(None, 1)
+ # END handle extra info
+
+ assert len(hexsha) == 40, "Invalid line: %s" % hexsha
+ yield cls(odb, hex_to_bin(hexsha))
+ # END for each line in stream
#{ Serializable Implementation
@@ -408,7 +414,7 @@ def _deserialize(self, stream):
""":param from_rev_list: if true, the stream format is coming from the rev-list command
Otherwise it is assumed to be a plain data stream from our object"""
readline = stream.readline
- self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '')
+ self.tree = Tree(self.odb, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '')
self.parents = list()
next_line = None
@@ -418,7 +424,7 @@ def _deserialize(self, stream):
next_line = parent_line
break
# END abort reading parents
- self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1])))
+ self.parents.append(type(self)(self.odb, hex_to_bin(parent_line.split()[-1])))
# END for each parent line
self.parents = tuple(self.parents)
@@ -461,5 +467,6 @@ def _deserialize(self, stream):
print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding)
# END exception handling
return self
-
+
#} END serializable implementation
+
diff --git a/git/objects/fun.py b/git/objects/fun.py
index 9b0a377cb..6f2eaaad0 100644
--- a/git/objects/fun.py
+++ b/git/objects/fun.py
@@ -1,4 +1,5 @@
"""Module with functions which are supposed to be as fast as possible"""
+
from stat import S_ISDIR
__all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive',
@@ -197,3 +198,4 @@ def traverse_tree_recursive(odb, tree_sha, path_prefix):
# END for each item
return entries
+
diff --git a/git/objects/submodule/__init__.py b/git/objects/submodule/__init__.py
index 82df59b0d..c8bf2d493 100644
--- a/git/objects/submodule/__init__.py
+++ b/git/objects/submodule/__init__.py
@@ -1,2 +1,6 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
# NOTE: Cannot import anything here as the top-level _init_ has to handle
# our dependencies
diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py
index 2160299b9..1b94f5221 100644
--- a/git/objects/submodule/base.py
+++ b/git/objects/submodule/base.py
@@ -1,3 +1,8 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from git.util import RepoAliasMixin
import util
from util import (
mkhead,
@@ -13,9 +18,11 @@
Iterable,
join_path_native,
to_native_path_linux,
- RemoteProgress
+ rmtree
)
+from git.db.interface import RemoteProgress
+
from git.config import SectionConstraint
from git.exc import (
InvalidGitRepositoryError,
@@ -23,14 +30,12 @@
)
import stat
-import git
+import git # we use some types indirectly to prevent cyclic imports !
import os
import sys
import time
-import shutil
-
__all__ = ["Submodule", "UpdateProgress"]
@@ -53,7 +58,7 @@ class UpdateProgress(RemoteProgress):
# IndexObject comes via util module, its a 'hacky' fix thanks to pythons import
# mechanism which cause plenty of trouble of the only reason for packages and
# modules is refactoring - subpackages shoudn't depend on parent packages
-class Submodule(util.IndexObject, Iterable, Traversable):
+class Submodule(util.IndexObject, Iterable, Traversable, RepoAliasMixin):
"""Implements access to a git submodule. They are special in that their sha
represents a commit in the submodule's repository which is to be checked out
at the path of this instance.
@@ -71,6 +76,9 @@ class Submodule(util.IndexObject, Iterable, Traversable):
# this is a bogus type for base class compatability
type = 'submodule'
+ # this type doesn't really have a type id
+ type_id = 0
+
__slots__ = ('_parent_commit', '_url', '_branch_path', '_name', '__weakref__')
_cache_attrs = ('path', '_url', '_branch_path')
@@ -80,7 +88,7 @@ def __init__(self, repo, binsha, mode=None, path=None, name = None, parent_commi
:param repo: Our parent repository
:param binsha: binary sha referring to a commit in the remote repository, see url parameter
- :param parent_commit: see set_parent_commit()
+ :param parent_commit: a Commit object instance, see set_parent_commit() for more information
:param url: The url to the remote repository which is the submodule
:param branch_path: full (relative) path to ref to checkout when cloning the remote repository"""
super(Submodule, self).__init__(repo, binsha, mode, path)
@@ -195,7 +203,7 @@ def _config_parser_constrained(self, read_only):
#{ Edit Interface
@classmethod
- def add(cls, repo, name, path, url=None, branch=None, no_checkout=False):
+ def add(cls, repo, name, path, url=None, branch=None, no_checkout=False, repoType=None):
"""Add a new submodule to the given repository. This will alter the index
as well as the .gitmodules file, but will not create a new commit.
If the submodule already exists, no matter if the configuration differs
@@ -220,6 +228,8 @@ def add(cls, repo, name, path, url=None, branch=None, no_checkout=False):
Examples are 'master' or 'feature/new'
:param no_checkout: if True, and if the repository has to be cloned manually,
no checkout will be performed
+ :param repoType: The repository type to use. It must provide the clone_from method.
+ If None, the default implementation is used.
:return: The newly created submodule instance
:note: works atomically, such that no change will be done if the repository
update fails for instance"""
@@ -227,6 +237,8 @@ def add(cls, repo, name, path, url=None, branch=None, no_checkout=False):
raise InvalidGitRepositoryError("Cannot add submodules to bare repositories")
# END handle bare repos
+ repoType = repoType or git.Repo
+
path = to_native_path_linux(path)
if path.endswith('/'):
path = path[:-1]
@@ -280,7 +292,7 @@ def add(cls, repo, name, path, url=None, branch=None, no_checkout=False):
if not branch_is_default:
kwargs['b'] = br.name
# END setup checkout-branch
- mrepo = git.Repo.clone_from(url, path, **kwargs)
+ mrepo = repoType.clone_from(url, path, **kwargs)
# END verify url
# update configuration and index
@@ -306,7 +318,7 @@ def add(cls, repo, name, path, url=None, branch=None, no_checkout=False):
return sm
def update(self, recursive=False, init=True, to_latest_revision=False, progress=None,
- dry_run=False):
+ dry_run=False, ):
"""Update the repository of this submodule to point to the checkout
we point at with the binsha of this instance.
@@ -368,7 +380,6 @@ def update(self, recursive=False, init=True, to_latest_revision=False, progress=
if not init:
return self
# END early abort if init is not allowed
- import git
# there is no git-repository yet - but delete empty paths
module_path = join_path_native(self.repo.working_tree_dir, self.path)
@@ -384,7 +395,7 @@ def update(self, recursive=False, init=True, to_latest_revision=False, progress=
# branch according to the remote-HEAD if possible
progress.update(BEGIN|CLONE, 0, 1, prefix+"Cloning %s to %s in submodule %r" % (self.url, module_path, self.name))
if not dry_run:
- mrepo = git.Repo.clone_from(self.url, module_path, n=True)
+ mrepo = type(self.repo).clone_from(self.url, module_path, n=True)
#END handle dry-run
progress.update(END|CLONE, 0, 1, prefix+"Done cloning to %s" % module_path)
@@ -622,7 +633,7 @@ def remove(self, module=True, force=False, configuration=True, dry_run=False):
if os.path.islink(mp):
method = os.remove
elif os.path.isdir(mp):
- method = shutil.rmtree
+ method = rmtree
elif os.path.exists(mp):
raise AssertionError("Cannot forcibly delete repository as it was neither a link, nor a directory")
#END handle brutal deletion
@@ -671,7 +682,7 @@ def remove(self, module=True, force=False, configuration=True, dry_run=False):
if not dry_run:
wtd = mod.working_tree_dir
del(mod) # release file-handles (windows)
- shutil.rmtree(wtd)
+ rmtree(wtd)
# END delete tree if possible
# END handle force
# END handle module deletion
@@ -760,14 +771,19 @@ def config_writer(self, index=None, write=True):
#{ Query Interface
@unbare_repo
- def module(self):
- """:return: Repo instance initialized from the repository at our submodule path
+ def module(self, repoType=None):
+ """:return: Repository instance initialized from the repository at our submodule path
+ :param repoType: The type of repository to be created. It must be possible to instatiate it
+ from a single repository path.
+ If None, a default repository type will be used
:raise InvalidGitRepositoryError: if a repository was not available. This could
also mean that it was not yet initialized"""
# late import to workaround circular dependencies
- module_path = self.abspath
+ module_path = self.abspath
+ repoType = repoType or git.Repo
+
try:
- repo = git.Repo(module_path)
+ repo = repoType(module_path)
if repo != self.repo:
return repo
# END handle repo uninitialized
diff --git a/git/objects/submodule/root.py b/git/objects/submodule/root.py
index 132604f60..6917045a3 100644
--- a/git/objects/submodule/root.py
+++ b/git/objects/submodule/root.py
@@ -1,3 +1,7 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
from base import Submodule, UpdateProgress
from util import (
find_first_remote_branch
@@ -24,6 +28,7 @@ class RootUpdateProgress(UpdateProgress):
URLCHANGE = RootUpdateProgress.URLCHANGE
PATHCHANGE = RootUpdateProgress.PATHCHANGE
+
class RootModule(Submodule):
"""A (virtual) Root of all submodules in the given repository. It can be used
to more easily traverse all submodules of the master repository"""
@@ -32,15 +37,14 @@ class RootModule(Submodule):
k_root_name = '__ROOT__'
- def __init__(self, repo):
- # repo, binsha, mode=None, path=None, name = None, parent_commit=None, url=None, ref=None)
+ def __init__(self, repo, parent_commit = None):
super(RootModule, self).__init__(
repo,
binsha = self.NULL_BIN_SHA,
mode = self.k_default_mode,
path = '',
name = self.k_root_name,
- parent_commit = repo.head.commit,
+ parent_commit = parent_commit or repo.head.commit,
url = '',
branch_path = git.Head.to_full_path(self.k_head_default)
)
diff --git a/git/objects/submodule/util.py b/git/objects/submodule/util.py
index 9b32807ae..2c5f6bc10 100644
--- a/git/objects/submodule/util.py
+++ b/git/objects/submodule/util.py
@@ -1,3 +1,7 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
import git
from git.exc import InvalidGitRepositoryError
from git.config import GitConfigParser
diff --git a/git/objects/tag.py b/git/objects/tag.py
index c7d02abe7..5dcd9bf97 100644
--- a/git/objects/tag.py
+++ b/git/objects/tag.py
@@ -5,24 +5,28 @@
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
""" Module containing all object based types. """
import base
-from gitdb.util import hex_to_bin
+from git.util import RepoAliasMixin
+from git.util import hex_to_bin
from util import (
- get_object_type_by_name,
- parse_actor_and_date
- )
+ get_object_type_by_name,
+ parse_actor_and_date
+ )
+from git.typ import ObjectType
__all__ = ("TagObject", )
-class TagObject(base.Object):
+class TagObject(base.Object, RepoAliasMixin):
"""Non-Lightweight tag carrying additional information about an object we are pointing to."""
- type = "tag"
+ type = ObjectType.tag
+ type_id = ObjectType.tag_id
+
__slots__ = ( "object", "tag", "tagger", "tagged_date", "tagger_tz_offset", "message" )
- def __init__(self, repo, binsha, object=None, tag=None,
+ def __init__(self, odb, binsha, object=None, tag=None,
tagger=None, tagged_date=None, tagger_tz_offset=None, message=None):
"""Initialize a tag object with additional data
- :param repo: repository this object is located in
+ :param odb: repository this object is located in
:param binsha: 20 byte SHA1
:param object: Object instance of object we are pointing to
:param tag: name of this tag
@@ -32,7 +36,7 @@ def __init__(self, repo, binsha, object=None, tag=None,
it into a different format
:param tagged_tz_offset: int_seconds_west_of_utc is the timezone that the
authored_date is in, in a format similar to time.altzone"""
- super(TagObject, self).__init__(repo, binsha )
+ super(TagObject, self).__init__(odb, binsha )
if object is not None:
self.object = object
if tag is not None:
@@ -49,12 +53,12 @@ def __init__(self, repo, binsha, object=None, tag=None,
def _set_cache_(self, attr):
"""Cache all our attributes at once"""
if attr in TagObject.__slots__:
- ostream = self.repo.odb.stream(self.binsha)
+ ostream = self.odb.stream(self.binsha)
lines = ostream.read().splitlines()
obj, hexsha = lines[0].split(" ") # object
type_token, type_name = lines[1].split(" ") # type
- self.object = get_object_type_by_name(type_name)(self.repo, hex_to_bin(hexsha))
+ self.object = get_object_type_by_name(type_name)(self.odb, hex_to_bin(hexsha))
self.tag = lines[2][4:] # tag
diff --git a/git/objects/tree.py b/git/objects/tree.py
index 67431686b..31f2602dd 100644
--- a/git/objects/tree.py
+++ b/git/objects/tree.py
@@ -3,21 +3,23 @@
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-import util
+from git.util import RepoAliasMixin
+import git.diff as diff
+from git.typ import ObjectType
from base import IndexObject
-from git.util import join_path
from blob import Blob
from submodule.base import Submodule
-import git.diff as diff
from fun import (
tree_entries_from_data,
tree_to_stream
)
-from gitdb.util import (
- to_bin_sha,
+from git.util import (
+ to_bin_sha,
+ join_path
)
+import util
__all__ = ("TreeModifier", "Tree")
@@ -100,7 +102,7 @@ def __delitem__(self, name):
#} END mutators
-class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable):
+class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable, RepoAliasMixin):
"""Tree objects represent an ordered list of Blobs and other Trees.
``Tree as a list``::
@@ -112,7 +114,9 @@ class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable):
blob = tree[0]
"""
- type = "tree"
+ type = ObjectType.tree
+ type_id = ObjectType.tree_id
+
__slots__ = "_cache"
# actual integer ids for comparison
@@ -121,6 +125,9 @@ class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable):
symlink_id = 012
tree_id = 004
+ #{ Configuration
+
+ # override in subclass if you would like your own types to be instantiated instead
_map_id_to_type = {
commit_id : Submodule,
blob_id : Blob,
@@ -128,6 +135,8 @@ class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable):
# tree id added once Tree is defined
}
+ #} end configuration
+
def __init__(self, repo, binsha, mode=tree_id<<12, path=None):
super(Tree, self).__init__(repo, binsha, mode, path)
@@ -141,7 +150,7 @@ def _get_intermediate_items(cls, index_object):
def _set_cache_(self, attr):
if attr == "_cache":
# Set the data when we need it
- ostream = self.repo.odb.stream(self.binsha)
+ ostream = self.odb.stream(self.binsha)
self._cache = tree_entries_from_data(ostream.read())
else:
super(Tree, self)._set_cache_(attr)
diff --git a/git/objects/util.py b/git/objects/util.py
index 4c9323b85..8ac590f2d 100644
--- a/git/objects/util.py
+++ b/git/objects/util.py
@@ -20,6 +20,7 @@
'ProcessStreamAdapter', 'Traversable', 'altz_to_utctz_str', 'utctz_to_altz',
'verify_utctz', 'Actor')
+
#{ Functions
def mode_str_to_int(modestr):
diff --git a/git/odict.py b/git/odict.py
index 2c8391d78..80f6965f0 100644
--- a/git/odict.py
+++ b/git/odict.py
@@ -16,16 +16,11 @@
"""A dict that keeps keys in insertion order"""
from __future__ import generators
-
__author__ = ('Nicola Larosa ,'
'Michael Foord ')
-
__docformat__ = "restructuredtext en"
-
__revision__ = '$Id: odict.py 129 2005-09-12 18:15:28Z teknico $'
-
__version__ = '0.2.2'
-
__all__ = ['OrderedDict', 'SequenceOrderedDict']
import sys
diff --git a/git/pack.py b/git/pack.py
new file mode 100644
index 000000000..627035fdc
--- /dev/null
+++ b/git/pack.py
@@ -0,0 +1,1007 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Contains PackIndexFile and PackFile implementations"""
+from git.exc import (
+ BadObject,
+ UnsupportedOperation,
+ ParseError
+ )
+from util import (
+ zlib,
+ mman,
+ LazyMixin,
+ unpack_from,
+ bin_to_hex,
+ )
+
+from fun import (
+ create_pack_object_header,
+ pack_object_header_info,
+ is_equal_canonical_sha,
+ type_id_to_type_map,
+ write_object,
+ stream_copy,
+ chunk_size,
+ delta_types,
+ OFS_DELTA,
+ REF_DELTA,
+ msb_size
+ )
+
+try:
+ from _perf import PackIndexFile_sha_to_index
+except ImportError:
+ pass
+# END try c module
+
+from base import ( # Amazing !
+ OInfo,
+ OStream,
+ OPackInfo,
+ OPackStream,
+ ODeltaStream,
+ ODeltaPackInfo,
+ ODeltaPackStream,
+ )
+from stream import (
+ DecompressMemMapReader,
+ DeltaApplyReader,
+ Sha1Writer,
+ NullStream,
+ FlexibleSha1Writer
+ )
+
+from struct import (
+ pack,
+ unpack,
+ )
+
+from binascii import crc32
+
+from itertools import izip
+import tempfile
+import array
+import os
+import sys
+
+__all__ = ('PackIndexFile', 'PackFile', 'PackEntity')
+
+
+
+
+#{ Utilities
+
+def pack_object_at(cursor, offset, as_stream):
+ """
+ :return: Tuple(abs_data_offset, PackInfo|PackStream)
+ an object of the correct type according to the type_id of the object.
+ If as_stream is True, the object will contain a stream, allowing the
+ data to be read decompressed.
+ :param data: random accessable data containing all required information
+ :parma offset: offset in to the data at which the object information is located
+ :param as_stream: if True, a stream object will be returned that can read
+ the data, otherwise you receive an info object only"""
+ data = cursor.use_region(offset).buffer()
+ type_id, uncomp_size, data_rela_offset = pack_object_header_info(data)
+ total_rela_offset = None # set later, actual offset until data stream begins
+ delta_info = None
+
+ # OFFSET DELTA
+ if type_id == OFS_DELTA:
+ i = data_rela_offset
+ c = ord(data[i])
+ i += 1
+ delta_offset = c & 0x7f
+ while c & 0x80:
+ c = ord(data[i])
+ i += 1
+ delta_offset += 1
+ delta_offset = (delta_offset << 7) + (c & 0x7f)
+ # END character loop
+ delta_info = delta_offset
+ total_rela_offset = i
+ # REF DELTA
+ elif type_id == REF_DELTA:
+ total_rela_offset = data_rela_offset+20
+ delta_info = data[data_rela_offset:total_rela_offset]
+ # BASE OBJECT
+ else:
+ # assume its a base object
+ total_rela_offset = data_rela_offset
+ # END handle type id
+
+ abs_data_offset = offset + total_rela_offset
+ if as_stream:
+ stream = DecompressMemMapReader(buffer(data, total_rela_offset), False, uncomp_size)
+ if delta_info is None:
+ return abs_data_offset, OPackStream(offset, type_id, uncomp_size, stream)
+ else:
+ return abs_data_offset, ODeltaPackStream(offset, type_id, uncomp_size, delta_info, stream)
+ else:
+ if delta_info is None:
+ return abs_data_offset, OPackInfo(offset, type_id, uncomp_size)
+ else:
+ return abs_data_offset, ODeltaPackInfo(offset, type_id, uncomp_size, delta_info)
+ # END handle info
+ # END handle stream
+
+def write_stream_to_pack(read, write, zstream, base_crc=None):
+ """Copy a stream as read from read function, zip it, and write the result.
+ Count the number of written bytes and return it
+ :param base_crc: if not None, the crc will be the base for all compressed data
+ we consecutively write and generate a crc32 from. If None, no crc will be generated
+ :return: tuple(no bytes read, no bytes written, crc32) crc might be 0 if base_crc
+ was false"""
+ br = 0 # bytes read
+ bw = 0 # bytes written
+ want_crc = base_crc is not None
+ crc = 0
+ if want_crc:
+ crc = base_crc
+ #END initialize crc
+
+ while True:
+ chunk = read(chunk_size)
+ br += len(chunk)
+ compressed = zstream.compress(chunk)
+ bw += len(compressed)
+ write(compressed) # cannot assume return value
+
+ if want_crc:
+ crc = crc32(compressed, crc)
+ #END handle crc
+
+ if len(chunk) != chunk_size:
+ break
+ #END copy loop
+
+ compressed = zstream.flush()
+ bw += len(compressed)
+ write(compressed)
+ if want_crc:
+ crc = crc32(compressed, crc)
+ #END handle crc
+
+ return (br, bw, crc)
+
+
+#} END utilities
+
+
+class IndexWriter(object):
+ """Utility to cache index information, allowing to write all information later
+ in one go to the given stream
+ :note: currently only writes v2 indices"""
+ __slots__ = '_objs'
+
+ def __init__(self):
+ self._objs = list()
+
+ def append(self, binsha, crc, offset):
+ """Append one piece of object information"""
+ self._objs.append((binsha, crc, offset))
+
+ def write(self, pack_sha, write):
+ """Write the index file using the given write method
+ :param pack_sha: binary sha over the whole pack that we index
+ :return: sha1 binary sha over all index file contents"""
+ # sort for sha1 hash
+ self._objs.sort(key=lambda o: o[0])
+
+ sha_writer = FlexibleSha1Writer(write)
+ sha_write = sha_writer.write
+ sha_write(PackIndexFile.index_v2_signature)
+ sha_write(pack(">L", PackIndexFile.index_version_default))
+
+ # fanout
+ tmplist = list((0,)*256) # fanout or list with 64 bit offsets
+ for t in self._objs:
+ tmplist[ord(t[0][0])] += 1
+ #END prepare fanout
+ for i in xrange(255):
+ v = tmplist[i]
+ sha_write(pack('>L', v))
+ tmplist[i+1] += v
+ #END write each fanout entry
+ sha_write(pack('>L', tmplist[255]))
+
+ # sha1 ordered
+ # save calls, that is push them into c
+ sha_write(''.join(t[0] for t in self._objs))
+
+ # crc32
+ for t in self._objs:
+ sha_write(pack('>L', t[1]&0xffffffff))
+ #END for each crc
+
+ tmplist = list()
+ # offset 32
+ for t in self._objs:
+ ofs = t[2]
+ if ofs > 0x7fffffff:
+ tmplist.append(ofs)
+ ofs = 0x80000000 + len(tmplist)-1
+ #END hande 64 bit offsets
+ sha_write(pack('>L', ofs&0xffffffff))
+ #END for each offset
+
+ # offset 64
+ for ofs in tmplist:
+ sha_write(pack(">Q", ofs))
+ #END for each offset
+
+ # trailer
+ assert(len(pack_sha) == 20)
+ sha_write(pack_sha)
+ sha = sha_writer.sha(as_hex=False)
+ write(sha)
+ return sha
+
+
+
+class PackIndexFile(LazyMixin):
+ """A pack index provides offsets into the corresponding pack, allowing to find
+ locations for offsets faster."""
+
+ # Dont use slots as we dynamically bind functions for each version, need a dict for this
+ # The slots you see here are just to keep track of our instance variables
+ # __slots__ = ('_indexpath', '_fanout_table', '_cursor', '_version',
+ # '_sha_list_offset', '_crc_list_offset', '_pack_offset', '_pack_64_offset')
+
+ # used in v2 indices
+ _sha_list_offset = 8 + 1024
+ index_v2_signature = '\377tOc'
+ index_version_default = 2
+
+ def __init__(self, indexpath):
+ super(PackIndexFile, self).__init__()
+ self._indexpath = indexpath
+
+ def _set_cache_(self, attr):
+ if attr == "_packfile_checksum":
+ self._packfile_checksum = self._cursor.map()[-40:-20]
+ elif attr == "_packfile_checksum":
+ self._packfile_checksum = self._cursor.map()[-20:]
+ elif attr == "_cursor":
+ # Note: We don't lock the file when reading as we cannot be sure
+ # that we can actually write to the location - it could be a read-only
+ # alternate for instance
+ self._cursor = mman.make_cursor(self._indexpath).use_region()
+ # We will assume that the index will always fully fit into memory !
+ if mman.window_size() > 0 and self._cursor.file_size() > mman.window_size():
+ raise AssertionError("The index file at %s is too large to fit into a mapped window (%i > %i). This is a limitation of the implementation" % (self._indexpath, self._cursor.file_size(), mman.window_size()))
+ #END assert window size
+ else:
+ # now its time to initialize everything - if we are here, someone wants
+ # to access the fanout table or related properties
+
+ # CHECK VERSION
+ mmap = self._cursor.map()
+ self._version = (mmap[:4] == self.index_v2_signature and 2) or 1
+ if self._version == 2:
+ version_id = unpack_from(">L", mmap, 4)[0]
+ assert version_id == self._version, "Unsupported index version: %i" % version_id
+ # END assert version
+
+ # SETUP FUNCTIONS
+ # setup our functions according to the actual version
+ for fname in ('entry', 'offset', 'sha', 'crc'):
+ setattr(self, fname, getattr(self, "_%s_v%i" % (fname, self._version)))
+ # END for each function to initialize
+
+
+ # INITIALIZE DATA
+ # byte offset is 8 if version is 2, 0 otherwise
+ self._initialize()
+ # END handle attributes
+
+
+ #{ Access V1
+
+ def _entry_v1(self, i):
+ """:return: tuple(offset, binsha, 0)"""
+ return unpack_from(">L20s", self._cursor.map(), 1024 + i*24) + (0, )
+
+ def _offset_v1(self, i):
+ """see ``_offset_v2``"""
+ return unpack_from(">L", self._cursor.map(), 1024 + i*24)[0]
+
+ def _sha_v1(self, i):
+ """see ``_sha_v2``"""
+ base = 1024 + (i*24)+4
+ return self._cursor.map()[base:base+20]
+
+ def _crc_v1(self, i):
+ """unsupported"""
+ return 0
+
+ #} END access V1
+
+ #{ Access V2
+ def _entry_v2(self, i):
+ """:return: tuple(offset, binsha, crc)"""
+ return (self._offset_v2(i), self._sha_v2(i), self._crc_v2(i))
+
+ def _offset_v2(self, i):
+ """:return: 32 or 64 byte offset into pack files. 64 byte offsets will only
+ be returned if the pack is larger than 4 GiB, or 2^32"""
+ offset = unpack_from(">L", self._cursor.map(), self._pack_offset + i * 4)[0]
+
+ # if the high-bit is set, this indicates that we have to lookup the offset
+ # in the 64 bit region of the file. The current offset ( lower 31 bits )
+ # are the index into it
+ if offset & 0x80000000:
+ offset = unpack_from(">Q", self._cursor.map(), self._pack_64_offset + (offset & ~0x80000000) * 8)[0]
+ # END handle 64 bit offset
+
+ return offset
+
+ def _sha_v2(self, i):
+ """:return: sha at the given index of this file index instance"""
+ base = self._sha_list_offset + i * 20
+ return self._cursor.map()[base:base+20]
+
+ def _crc_v2(self, i):
+ """:return: 4 bytes crc for the object at index i"""
+ return unpack_from(">L", self._cursor.map(), self._crc_list_offset + i * 4)[0]
+
+ #} END access V2
+
+ #{ Initialization
+
+ def _initialize(self):
+ """initialize base data"""
+ self._fanout_table = self._read_fanout((self._version == 2) * 8)
+
+ if self._version == 2:
+ self._crc_list_offset = self._sha_list_offset + self.size() * 20
+ self._pack_offset = self._crc_list_offset + self.size() * 4
+ self._pack_64_offset = self._pack_offset + self.size() * 4
+ # END setup base
+
+ def _read_fanout(self, byte_offset):
+ """Generate a fanout table from our data"""
+ d = self._cursor.map()
+ out = list()
+ append = out.append
+ for i in range(256):
+ append(unpack_from('>L', d, byte_offset + i*4)[0])
+ # END for each entry
+ return out
+
+ #} END initialization
+
+ #{ Properties
+ def version(self):
+ return self._version
+
+ def size(self):
+ """:return: amount of objects referred to by this index"""
+ return self._fanout_table[255]
+
+ def path(self):
+ """:return: path to the packindexfile"""
+ return self._indexpath
+
+ def packfile_checksum(self):
+ """:return: 20 byte sha representing the sha1 hash of the pack file"""
+ return self._cursor.map()[-40:-20]
+
+ def indexfile_checksum(self):
+ """:return: 20 byte sha representing the sha1 hash of this index file"""
+ return self._cursor.map()[-20:]
+
+ def offsets(self):
+ """:return: sequence of all offsets in the order in which they were written
+ :note: return value can be random accessed, but may be immmutable"""
+ if self._version == 2:
+ # read stream to array, convert to tuple
+ a = array.array('I') # 4 byte unsigned int, long are 8 byte on 64 bit it appears
+ a.fromstring(buffer(self._cursor.map(), self._pack_offset, self._pack_64_offset - self._pack_offset))
+
+ # networkbyteorder to something array likes more
+ if sys.byteorder == 'little':
+ a.byteswap()
+ return a
+ else:
+ return tuple(self.offset(index) for index in xrange(self.size()))
+ # END handle version
+
+ def sha_to_index(self, sha):
+ """
+ :return: index usable with the ``offset`` or ``entry`` method, or None
+ if the sha was not found in this pack index
+ :param sha: 20 byte sha to lookup"""
+ first_byte = ord(sha[0])
+ get_sha = self.sha
+ lo = 0 # lower index, the left bound of the bisection
+ if first_byte != 0:
+ lo = self._fanout_table[first_byte-1]
+ hi = self._fanout_table[first_byte] # the upper, right bound of the bisection
+
+ # bisect until we have the sha
+ while lo < hi:
+ mid = (lo + hi) / 2
+ c = cmp(sha, get_sha(mid))
+ if c < 0:
+ hi = mid
+ elif not c:
+ return mid
+ else:
+ lo = mid + 1
+ # END handle midpoint
+ # END bisect
+ return None
+
+ def partial_sha_to_index(self, partial_bin_sha, canonical_length):
+ """
+ :return: index as in `sha_to_index` or None if the sha was not found in this
+ index file
+ :param partial_bin_sha: an at least two bytes of a partial binary sha
+ :param canonical_length: lenght of the original hexadecimal representation of the
+ given partial binary sha
+ :raise AmbiguousObjectName:"""
+ if len(partial_bin_sha) < 2:
+ raise ValueError("Require at least 2 bytes of partial sha")
+
+ first_byte = ord(partial_bin_sha[0])
+ get_sha = self.sha
+ lo = 0 # lower index, the left bound of the bisection
+ if first_byte != 0:
+ lo = self._fanout_table[first_byte-1]
+ hi = self._fanout_table[first_byte] # the upper, right bound of the bisection
+
+ # fill the partial to full 20 bytes
+ filled_sha = partial_bin_sha + '\0'*(20 - len(partial_bin_sha))
+
+ # find lowest
+ while lo < hi:
+ mid = (lo + hi) / 2
+ c = cmp(filled_sha, get_sha(mid))
+ if c < 0:
+ hi = mid
+ elif not c:
+ # perfect match
+ lo = mid
+ break
+ else:
+ lo = mid + 1
+ # END handle midpoint
+ # END bisect
+
+ if lo < self.size():
+ cur_sha = get_sha(lo)
+ if is_equal_canonical_sha(canonical_length, partial_bin_sha, cur_sha):
+ next_sha = None
+ if lo+1 < self.size():
+ next_sha = get_sha(lo+1)
+ if next_sha and next_sha == cur_sha:
+ raise AmbiguousObjectName(partial_bin_sha)
+ return lo
+ # END if we have a match
+ # END if we found something
+ return None
+
+ if 'PackIndexFile_sha_to_index' in globals():
+ # NOTE: Its just about 25% faster, the major bottleneck might be the attr
+ # accesses
+ def sha_to_index(self, sha):
+ return PackIndexFile_sha_to_index(self, sha)
+ # END redefine heavy-hitter with c version
+
+ #} END properties
+
+
+class PackFile(LazyMixin):
+ """A pack is a file written according to the Version 2 for git packs
+
+ As we currently use memory maps, it could be assumed that the maximum size of
+ packs therefor is 32 bit on 32 bit systems. On 64 bit systems, this should be
+ fine though.
+
+ :note: at some point, this might be implemented using streams as well, or
+ streams are an alternate path in the case memory maps cannot be created
+ for some reason - one clearly doesn't want to read 10GB at once in that
+ case"""
+
+ __slots__ = ('_packpath', '_cursor', '_size', '_version')
+ pack_signature = 0x5041434b # 'PACK'
+ pack_version_default = 2
+
+ # offset into our data at which the first object starts
+ first_object_offset = 3*4 # header bytes
+ footer_size = 20 # final sha
+
+ def __init__(self, packpath):
+ self._packpath = packpath
+
+ def _set_cache_(self, attr):
+ # we fill the whole cache, whichever attribute gets queried first
+ self._cursor = mman.make_cursor(self._packpath).use_region()
+
+ # read the header information
+ type_id, self._version, self._size = unpack_from(">LLL", self._cursor.map(), 0)
+
+ # TODO: figure out whether we should better keep the lock, or maybe
+ # add a .keep file instead ?
+ if type_id != self.pack_signature:
+ raise ParseError("Invalid pack signature: %i" % type_id)
+
+ def _iter_objects(self, start_offset, as_stream=True):
+ """Handle the actual iteration of objects within this pack"""
+ c = self._cursor
+ content_size = c.file_size() - self.footer_size
+ cur_offset = start_offset or self.first_object_offset
+
+ null = NullStream()
+ while cur_offset < content_size:
+ data_offset, ostream = pack_object_at(c, cur_offset, True)
+ # scrub the stream to the end - this decompresses the object, but yields
+ # the amount of compressed bytes we need to get to the next offset
+
+ stream_copy(ostream.read, null.write, ostream.size, chunk_size)
+ cur_offset += (data_offset - ostream.pack_offset) + ostream.stream.compressed_bytes_read()
+
+
+ # if a stream is requested, reset it beforehand
+ # Otherwise return the Stream object directly, its derived from the
+ # info object
+ if as_stream:
+ ostream.stream.seek(0)
+ yield ostream
+ # END until we have read everything
+
+ #{ Pack Information
+
+ def size(self):
+ """:return: The amount of objects stored in this pack"""
+ return self._size
+
+ def version(self):
+ """:return: the version of this pack"""
+ return self._version
+
+ def data(self):
+ """
+ :return: read-only data of this pack. It provides random access and usually
+ is a memory map.
+ :note: This method is unsafe as it returns a window into a file which might be larger than than the actual window size"""
+ # can use map as we are starting at offset 0. Otherwise we would have to use buffer()
+ return self._cursor.use_region().map()
+
+ def checksum(self):
+ """:return: 20 byte sha1 hash on all object sha's contained in this file"""
+ return self._cursor.use_region(self._cursor.file_size()-20).buffer()[:]
+
+ def path(self):
+ """:return: path to the packfile"""
+ return self._packpath
+ #} END pack information
+
+ #{ Pack Specific
+
+ def collect_streams(self, offset):
+ """
+ :return: list of pack streams which are required to build the object
+ at the given offset. The first entry of the list is the object at offset,
+ the last one is either a full object, or a REF_Delta stream. The latter
+ type needs its reference object to be locked up in an ODB to form a valid
+ delta chain.
+ If the object at offset is no delta, the size of the list is 1.
+ :param offset: specifies the first byte of the object within this pack"""
+ out = list()
+ c = self._cursor
+ while True:
+ ostream = pack_object_at(c, offset, True)[1]
+ out.append(ostream)
+ if ostream.type_id == OFS_DELTA:
+ offset = ostream.pack_offset - ostream.delta_info
+ else:
+ # the only thing we can lookup are OFFSET deltas. Everything
+ # else is either an object, or a ref delta, in the latter
+ # case someone else has to find it
+ break
+ # END handle type
+ # END while chaining streams
+ return out
+
+ #} END pack specific
+
+ #{ Read-Database like Interface
+
+ def info(self, offset):
+ """Retrieve information about the object at the given file-absolute offset
+
+ :param offset: byte offset
+ :return: OPackInfo instance, the actual type differs depending on the type_id attribute"""
+ return pack_object_at(self._cursor, offset or self.first_object_offset, False)[1]
+
+ def stream(self, offset):
+ """Retrieve an object at the given file-relative offset as stream along with its information
+
+ :param offset: byte offset
+ :return: OPackStream instance, the actual type differs depending on the type_id attribute"""
+ return pack_object_at(self._cursor, offset or self.first_object_offset, True)[1]
+
+ def stream_iter(self, start_offset=0):
+ """
+ :return: iterator yielding OPackStream compatible instances, allowing
+ to access the data in the pack directly.
+ :param start_offset: offset to the first object to iterate. If 0, iteration
+ starts at the very first object in the pack.
+ :note: Iterating a pack directly is costly as the datastream has to be decompressed
+ to determine the bounds between the objects"""
+ return self._iter_objects(start_offset, as_stream=True)
+
+ #} END Read-Database like Interface
+
+
+class PackEntity(LazyMixin):
+ """Combines the PackIndexFile and the PackFile into one, allowing the
+ actual objects to be resolved and iterated"""
+
+ __slots__ = ( '_index', # our index file
+ '_pack', # our pack file
+ '_offset_map' # on demand dict mapping one offset to the next consecutive one
+ )
+
+ IndexFileCls = PackIndexFile
+ PackFileCls = PackFile
+
+ def __init__(self, pack_or_index_path):
+ """Initialize ourselves with the path to the respective pack or index file"""
+ basename, ext = os.path.splitext(pack_or_index_path)
+ self._index = self.IndexFileCls("%s.idx" % basename) # PackIndexFile instance
+ self._pack = self.PackFileCls("%s.pack" % basename) # corresponding PackFile instance
+
+ def _set_cache_(self, attr):
+ # currently this can only be _offset_map
+ # TODO: make this a simple sorted offset array which can be bisected
+ # to find the respective entry, from which we can take a +1 easily
+ # This might be slower, but should also be much lighter in memory !
+ offsets_sorted = sorted(self._index.offsets())
+ last_offset = len(self._pack.data()) - self._pack.footer_size
+ assert offsets_sorted, "Cannot handle empty indices"
+
+ offset_map = None
+ if len(offsets_sorted) == 1:
+ offset_map = { offsets_sorted[0] : last_offset }
+ else:
+ iter_offsets = iter(offsets_sorted)
+ iter_offsets_plus_one = iter(offsets_sorted)
+ iter_offsets_plus_one.next()
+ consecutive = izip(iter_offsets, iter_offsets_plus_one)
+
+ offset_map = dict(consecutive)
+
+ # the last offset is not yet set
+ offset_map[offsets_sorted[-1]] = last_offset
+ # END handle offset amount
+ self._offset_map = offset_map
+
+ def _sha_to_index(self, sha):
+ """:return: index for the given sha, or raise"""
+ index = self._index.sha_to_index(sha)
+ if index is None:
+ raise BadObject(sha)
+ return index
+
+ def _iter_objects(self, as_stream):
+ """Iterate over all objects in our index and yield their OInfo or OStream instences"""
+ _sha = self._index.sha
+ _object = self._object
+ for index in xrange(self._index.size()):
+ yield _object(_sha(index), as_stream, index)
+ # END for each index
+
+ def _object(self, sha, as_stream, index=-1):
+ """:return: OInfo or OStream object providing information about the given sha
+ :param index: if not -1, its assumed to be the sha's index in the IndexFile"""
+ # its a little bit redundant here, but it needs to be efficient
+ if index < 0:
+ index = self._sha_to_index(sha)
+ if sha is None:
+ sha = self._index.sha(index)
+ # END assure sha is present ( in output )
+ offset = self._index.offset(index)
+ type_id, uncomp_size, data_rela_offset = pack_object_header_info(self._pack._cursor.use_region(offset).buffer())
+ if as_stream:
+ if type_id not in delta_types:
+ packstream = self._pack.stream(offset)
+ return OStream(sha, packstream.type, packstream.size, packstream.stream)
+ # END handle non-deltas
+
+ # produce a delta stream containing all info
+ # To prevent it from applying the deltas when querying the size,
+ # we extract it from the delta stream ourselves
+ streams = self.collect_streams_at_offset(offset)
+ dstream = DeltaApplyReader.new(streams)
+
+ return ODeltaStream(sha, dstream.type, None, dstream)
+ else:
+ if type_id not in delta_types:
+ return OInfo(sha, type_id_to_type_map[type_id], uncomp_size)
+ # END handle non-deltas
+
+ # deltas are a little tougher - unpack the first bytes to obtain
+ # the actual target size, as opposed to the size of the delta data
+ streams = self.collect_streams_at_offset(offset)
+ buf = streams[0].read(512)
+ offset, src_size = msb_size(buf)
+ offset, target_size = msb_size(buf, offset)
+
+ # collect the streams to obtain the actual object type
+ if streams[-1].type_id in delta_types:
+ raise BadObject(sha, "Could not resolve delta object")
+ return OInfo(sha, streams[-1].type, target_size)
+ # END handle stream
+
+ #{ Read-Database like Interface
+
+ def info(self, sha):
+ """Retrieve information about the object identified by the given sha
+
+ :param sha: 20 byte sha1
+ :raise BadObject:
+ :return: OInfo instance, with 20 byte sha"""
+ return self._object(sha, False)
+
+ def stream(self, sha):
+ """Retrieve an object stream along with its information as identified by the given sha
+
+ :param sha: 20 byte sha1
+ :raise BadObject:
+ :return: OStream instance, with 20 byte sha"""
+ return self._object(sha, True)
+
+ def info_at_index(self, index):
+ """As ``info``, but uses a PackIndexFile compatible index to refer to the object"""
+ return self._object(None, False, index)
+
+ def stream_at_index(self, index):
+ """As ``stream``, but uses a PackIndexFile compatible index to refer to the
+ object"""
+ return self._object(None, True, index)
+
+ #} END Read-Database like Interface
+
+ #{ Interface
+
+ def pack(self):
+ """:return: the underlying pack file instance"""
+ return self._pack
+
+ def index(self):
+ """:return: the underlying pack index file instance"""
+ return self._index
+
+ def is_valid_stream(self, sha, use_crc=False):
+ """
+ Verify that the stream at the given sha is valid.
+
+ :param use_crc: if True, the index' crc is run over the compressed stream of
+ the object, which is much faster than checking the sha1. It is also
+ more prone to unnoticed corruption or manipulation.
+ :param sha: 20 byte sha1 of the object whose stream to verify
+ whether the compressed stream of the object is valid. If it is
+ a delta, this only verifies that the delta's data is valid, not the
+ data of the actual undeltified object, as it depends on more than
+ just this stream.
+ If False, the object will be decompressed and the sha generated. It must
+ match the given sha
+
+ :return: True if the stream is valid
+ :raise UnsupportedOperation: If the index is version 1 only
+ :raise BadObject: sha was not found"""
+ if use_crc:
+ if self._index.version() < 2:
+ raise UnsupportedOperation("Version 1 indices do not contain crc's, verify by sha instead")
+ # END handle index version
+
+ index = self._sha_to_index(sha)
+ offset = self._index.offset(index)
+ next_offset = self._offset_map[offset]
+ crc_value = self._index.crc(index)
+
+ # create the current crc value, on the compressed object data
+ # Read it in chunks, without copying the data
+ crc_update = zlib.crc32
+ pack_data = self._pack.data()
+ cur_pos = offset
+ this_crc_value = 0
+ while cur_pos < next_offset:
+ rbound = min(cur_pos + chunk_size, next_offset)
+ size = rbound - cur_pos
+ this_crc_value = crc_update(buffer(pack_data, cur_pos, size), this_crc_value)
+ cur_pos += size
+ # END window size loop
+
+ # crc returns signed 32 bit numbers, the AND op forces it into unsigned
+ # mode ... wow, sneaky, from dulwich.
+ return (this_crc_value & 0xffffffff) == crc_value
+ else:
+ shawriter = Sha1Writer()
+ stream = self._object(sha, as_stream=True)
+ # write a loose object, which is the basis for the sha
+ write_object(stream.type, stream.size, stream.read, shawriter.write)
+
+ assert shawriter.sha(as_hex=False) == sha
+ return shawriter.sha(as_hex=False) == sha
+ # END handle crc/sha verification
+ return True
+
+ def info_iter(self):
+ """
+ :return: Iterator over all objects in this pack. The iterator yields
+ OInfo instances"""
+ return self._iter_objects(as_stream=False)
+
+ def stream_iter(self):
+ """
+ :return: iterator over all objects in this pack. The iterator yields
+ OStream instances"""
+ return self._iter_objects(as_stream=True)
+
+ def collect_streams_at_offset(self, offset):
+ """
+ As the version in the PackFile, but can resolve REF deltas within this pack
+ For more info, see ``collect_streams``
+
+ :param offset: offset into the pack file at which the object can be found"""
+ streams = self._pack.collect_streams(offset)
+
+ # try to resolve the last one if needed. It is assumed to be either
+ # a REF delta, or a base object, as OFFSET deltas are resolved by the pack
+ if streams[-1].type_id == REF_DELTA:
+ stream = streams[-1]
+ while stream.type_id in delta_types:
+ if stream.type_id == REF_DELTA:
+ sindex = self._index.sha_to_index(stream.delta_info)
+ if sindex is None:
+ break
+ stream = self._pack.stream(self._index.offset(sindex))
+ streams.append(stream)
+ else:
+ # must be another OFS DELTA - this could happen if a REF
+ # delta we resolve previously points to an OFS delta. Who
+ # would do that ;) ? We can handle it though
+ stream = self._pack.stream(stream.delta_info)
+ streams.append(stream)
+ # END handle ref delta
+ # END resolve ref streams
+ # END resolve streams
+
+ return streams
+
+ def collect_streams(self, sha):
+ """
+ As ``PackFile.collect_streams``, but takes a sha instead of an offset.
+ Additionally, ref_delta streams will be resolved within this pack.
+ If this is not possible, the stream will be left alone, hence it is adivsed
+ to check for unresolved ref-deltas and resolve them before attempting to
+ construct a delta stream.
+
+ :param sha: 20 byte sha1 specifying the object whose related streams you want to collect
+ :return: list of streams, first being the actual object delta, the last being
+ a possibly unresolved base object.
+ :raise BadObject:"""
+ return self.collect_streams_at_offset(self._index.offset(self._sha_to_index(sha)))
+
+
+ @classmethod
+ def write_pack(cls, object_iter, pack_write, index_write=None,
+ object_count = None, zlib_compression = zlib.Z_BEST_SPEED):
+ """
+ Create a new pack by putting all objects obtained by the object_iterator
+ into a pack which is written using the pack_write method.
+ The respective index is produced as well if index_write is not Non.
+
+ :param object_iter: iterator yielding odb output objects
+ :param pack_write: function to receive strings to write into the pack stream
+ :param indx_write: if not None, the function writes the index file corresponding
+ to the pack.
+ :param object_count: if you can provide the amount of objects in your iteration,
+ this would be the place to put it. Otherwise we have to pre-iterate and store
+ all items into a list to get the number, which uses more memory than necessary.
+ :param zlib_compression: the zlib compression level to use
+ :return: tuple(pack_sha, index_binsha) binary sha over all the contents of the pack
+ and over all contents of the index. If index_write was None, index_binsha will be None
+ :note: The destination of the write functions is up to the user. It could
+ be a socket, or a file for instance
+ :note: writes only undeltified objects"""
+ objs = object_iter
+ if not object_count:
+ if not isinstance(object_iter, (tuple, list)):
+ objs = list(object_iter)
+ #END handle list type
+ object_count = len(objs)
+ #END handle object
+
+ pack_writer = FlexibleSha1Writer(pack_write)
+ pwrite = pack_writer.write
+ ofs = 0 # current offset into the pack file
+ index = None
+ wants_index = index_write is not None
+
+ # write header
+ pwrite(pack('>LLL', PackFile.pack_signature, PackFile.pack_version_default, object_count))
+ ofs += 12
+
+ if wants_index:
+ index = IndexWriter()
+ #END handle index header
+
+ actual_count = 0
+ for obj in objs:
+ actual_count += 1
+ crc = 0
+
+ # object header
+ hdr = create_pack_object_header(obj.type_id, obj.size)
+ if index_write:
+ crc = crc32(hdr)
+ else:
+ crc = None
+ #END handle crc
+ pwrite(hdr)
+
+ # data stream
+ zstream = zlib.compressobj(zlib_compression)
+ ostream = obj.stream
+ br, bw, crc = write_stream_to_pack(ostream.read, pwrite, zstream, base_crc = crc)
+ assert(br == obj.size)
+ if wants_index:
+ index.append(obj.binsha, crc, ofs)
+ #END handle index
+
+ ofs += len(hdr) + bw
+ if actual_count == object_count:
+ break
+ #END abort once we are done
+ #END for each object
+
+ if actual_count != object_count:
+ raise ValueError("Expected to write %i objects into pack, but received only %i from iterators" % (object_count, actual_count))
+ #END count assertion
+
+ # write footer
+ pack_sha = pack_writer.sha(as_hex = False)
+ assert len(pack_sha) == 20
+ pack_write(pack_sha)
+ ofs += len(pack_sha) # just for completeness ;)
+
+ index_sha = None
+ if wants_index:
+ index_sha = index.write(pack_sha, index_write)
+ #END handle index
+
+ return pack_sha, index_sha
+
+ @classmethod
+ def create(cls, object_iter, base_dir, object_count = None, zlib_compression = zlib.Z_BEST_SPEED):
+ """Create a new on-disk entity comprised of a properly named pack file and a properly named
+ and corresponding index file. The pack contains all OStream objects contained in object iter.
+ :param base_dir: directory which is to contain the files
+ :return: PackEntity instance initialized with the new pack
+ :note: for more information on the other parameters see the write_pack method"""
+ pack_fd, pack_path = tempfile.mkstemp('', 'pack', base_dir)
+ index_fd, index_path = tempfile.mkstemp('', 'index', base_dir)
+ pack_write = lambda d: os.write(pack_fd, d)
+ index_write = lambda d: os.write(index_fd, d)
+
+ pack_binsha, index_binsha = cls.write_pack(object_iter, pack_write, index_write, object_count, zlib_compression)
+ os.close(pack_fd)
+ os.close(index_fd)
+
+ fmt = "pack-%s.%s"
+ new_pack_path = os.path.join(base_dir, fmt % (bin_to_hex(pack_binsha), 'pack'))
+ new_index_path = os.path.join(base_dir, fmt % (bin_to_hex(pack_binsha), 'idx'))
+ os.rename(pack_path, new_pack_path)
+ os.rename(index_path, new_index_path)
+
+ return cls(new_pack_path)
+
+
+ #} END interface
diff --git a/git/refs/__init__.py b/git/refs/__init__.py
index fc8ce644c..35b69fcad 100644
--- a/git/refs/__init__.py
+++ b/git/refs/__init__.py
@@ -2,19 +2,20 @@
# import all modules in order, fix the names they require
from symbolic import *
from reference import *
+from headref import *
from head import *
from tag import *
from remote import *
# name fixes
-import head
-head.RemoteReference = RemoteReference
-del(head)
+import headref
+headref.Head.RemoteReferenceCls = RemoteReference
+del(headref)
import symbolic
-for item in (HEAD, Head, RemoteReference, TagReference, Reference, SymbolicReference):
- setattr(symbolic, item.__name__, item)
+for item in (HEAD, Head, RemoteReference, TagReference, Reference):
+ setattr(symbolic.SymbolicReference, item.__name__+'Cls', item)
del(symbolic)
diff --git a/git/refs/head.py b/git/refs/head.py
index d87294341..4345528b4 100644
--- a/git/refs/head.py
+++ b/git/refs/head.py
@@ -1,19 +1,14 @@
-from symbolic import SymbolicReference
-from reference import Reference
-
-from git.config import SectionConstraint
-
-from git.util import join_path
+from symbolic import SymbolicReference
from git.exc import GitCommandError
-__all__ = ["HEAD", "Head"]
-
+__all__ = ["HEAD"]
class HEAD(SymbolicReference):
- """Special case of a Symbolic Reference as it represents the repository's
- HEAD reference."""
+ """Provides additional functionality using the git command"""
+ __slots__ = tuple()
+
_HEAD_NAME = 'HEAD'
_ORIG_HEAD_NAME = 'ORIG_HEAD'
__slots__ = tuple()
@@ -90,157 +85,3 @@ def reset(self, commit='HEAD', index=True, working_tree = False,
return self
-
-class Head(Reference):
- """A Head is a named reference to a Commit. Every Head instance contains a name
- and a Commit object.
-
- Examples::
-
- >>> repo = Repo("/path/to/repo")
- >>> head = repo.heads[0]
-
- >>> head.name
- 'master'
-
- >>> head.commit
-
-
- >>> head.commit.hexsha
- '1c09f116cbc2cb4100fb6935bb162daa4723f455'"""
- _common_path_default = "refs/heads"
- k_config_remote = "remote"
- k_config_remote_ref = "merge" # branch to merge from remote
-
- @classmethod
- def delete(cls, repo, *heads, **kwargs):
- """Delete the given heads
- :param force:
- If True, the heads will be deleted even if they are not yet merged into
- the main development stream.
- Default False"""
- force = kwargs.get("force", False)
- flag = "-d"
- if force:
- flag = "-D"
- repo.git.branch(flag, *heads)
-
- def set_tracking_branch(self, remote_reference):
- """
- Configure this branch to track the given remote reference. This will alter
- this branch's configuration accordingly.
-
- :param remote_reference: The remote reference to track or None to untrack
- any references
- :return: self"""
- if remote_reference is not None and not isinstance(remote_reference, RemoteReference):
- raise ValueError("Incorrect parameter type: %r" % remote_reference)
- # END handle type
-
- writer = self.config_writer()
- if remote_reference is None:
- writer.remove_option(self.k_config_remote)
- writer.remove_option(self.k_config_remote_ref)
- if len(writer.options()) == 0:
- writer.remove_section()
- # END handle remove section
- else:
- writer.set_value(self.k_config_remote, remote_reference.remote_name)
- writer.set_value(self.k_config_remote_ref, Head.to_full_path(remote_reference.remote_head))
- # END handle ref value
-
- return self
-
-
- def tracking_branch(self):
- """
- :return: The remote_reference we are tracking, or None if we are
- not a tracking branch"""
- reader = self.config_reader()
- if reader.has_option(self.k_config_remote) and reader.has_option(self.k_config_remote_ref):
- ref = Head(self.repo, Head.to_full_path(reader.get_value(self.k_config_remote_ref)))
- remote_refpath = RemoteReference.to_full_path(join_path(reader.get_value(self.k_config_remote), ref.name))
- return RemoteReference(self.repo, remote_refpath)
- # END handle have tracking branch
-
- # we are not a tracking branch
- return None
-
- def rename(self, new_path, force=False):
- """Rename self to a new path
-
- :param new_path:
- Either a simple name or a path, i.e. new_name or features/new_name.
- The prefix refs/heads is implied
-
- :param force:
- If True, the rename will succeed even if a head with the target name
- already exists.
-
- :return: self
- :note: respects the ref log as git commands are used"""
- flag = "-m"
- if force:
- flag = "-M"
-
- self.repo.git.branch(flag, self, new_path)
- self.path = "%s/%s" % (self._common_path_default, new_path)
- return self
-
- def checkout(self, force=False, **kwargs):
- """Checkout this head by setting the HEAD to this reference, by updating the index
- to reflect the tree we point to and by updating the working tree to reflect
- the latest index.
-
- The command will fail if changed working tree files would be overwritten.
-
- :param force:
- If True, changes to the index and the working tree will be discarded.
- If False, GitCommandError will be raised in that situation.
-
- :param kwargs:
- Additional keyword arguments to be passed to git checkout, i.e.
- b='new_branch' to create a new branch at the given spot.
-
- :return:
- The active branch after the checkout operation, usually self unless
- a new branch has been created.
-
- :note:
- By default it is only allowed to checkout heads - everything else
- will leave the HEAD detached which is allowed and possible, but remains
- a special state that some tools might not be able to handle."""
- args = list()
- kwargs['f'] = force
- if kwargs['f'] == False:
- kwargs.pop('f')
-
- self.repo.git.checkout(self, **kwargs)
- return self.repo.active_branch
-
- #{ Configruation
-
- def _config_parser(self, read_only):
- if read_only:
- parser = self.repo.config_reader()
- else:
- parser = self.repo.config_writer()
- # END handle parser instance
-
- return SectionConstraint(parser, 'branch "%s"' % self.name)
-
- def config_reader(self):
- """
- :return: A configuration parser instance constrained to only read
- this instance's values"""
- return self._config_parser(read_only=True)
-
- def config_writer(self):
- """
- :return: A configuration writer instance with read-and write acccess
- to options of this head"""
- return self._config_parser(read_only=False)
-
- #} END configuration
-
-
diff --git a/git/refs/headref.py b/git/refs/headref.py
new file mode 100644
index 000000000..67117e96d
--- /dev/null
+++ b/git/refs/headref.py
@@ -0,0 +1,170 @@
+from reference import Reference
+from git.config import SectionConstraint
+from git.util import join_path
+
+__all__ = ["Head"]
+
+class Head(Reference):
+ """The GitPyhton Head implementation provides more git-command based features
+
+ A Head is a named reference to a Commit. Every Head instance contains a name
+ and a Commit object.
+
+ Examples::
+
+ >>> repo = Repo("/path/to/repo")
+ >>> head = repo.heads[0]
+
+ >>> head.name
+ 'master'
+
+ >>> head.commit
+
+
+ >>> head.commit.hexsha
+ '1c09f116cbc2cb4100fb6935bb162daa4723f455'"""
+ __slots__ = tuple()
+
+ _common_path_default = "refs/heads"
+ k_config_remote = "remote"
+ k_config_remote_ref = "merge" # branch to merge from remote
+
+ # will be set by init method !
+ RemoteReferenceCls = None
+
+ #{ Configuration
+
+ def set_tracking_branch(self, remote_reference):
+ """
+ Configure this branch to track the given remote reference. This will alter
+ this branch's configuration accordingly.
+
+ :param remote_reference: The remote reference to track or None to untrack
+ any references
+ :return: self"""
+ if remote_reference is not None and not isinstance(remote_reference, self.RemoteReferenceCls):
+ raise ValueError("Incorrect parameter type: %r" % remote_reference)
+ # END handle type
+
+ writer = self.config_writer()
+ if remote_reference is None:
+ writer.remove_option(self.k_config_remote)
+ writer.remove_option(self.k_config_remote_ref)
+ if len(writer.options()) == 0:
+ writer.remove_section()
+ # END handle remove section
+ else:
+ writer.set_value(self.k_config_remote, remote_reference.remote_name)
+ writer.set_value(self.k_config_remote_ref, Head.to_full_path(remote_reference.remote_head))
+ # END handle ref value
+
+ return self
+
+ def tracking_branch(self):
+ """
+ :return: The remote_reference we are tracking, or None if we are
+ not a tracking branch"""
+ reader = self.config_reader()
+ if reader.has_option(self.k_config_remote) and reader.has_option(self.k_config_remote_ref):
+ ref = Head(self.repo, Head.to_full_path(reader.get_value(self.k_config_remote_ref)))
+ remote_refpath = self.RemoteReferenceCls.to_full_path(join_path(reader.get_value(self.k_config_remote), ref.name))
+ return self.RemoteReferenceCls(self.repo, remote_refpath)
+ # END handle have tracking branch
+
+ # we are not a tracking branch
+ return None
+
+
+ #{ Configruation
+
+ def _config_parser(self, read_only):
+ if read_only:
+ parser = self.repo.config_reader()
+ else:
+ parser = self.repo.config_writer()
+ # END handle parser instance
+
+ return SectionConstraint(parser, 'branch "%s"' % self.name)
+
+ def config_reader(self):
+ """
+ :return: A configuration parser instance constrained to only read
+ this instance's values"""
+ return self._config_parser(read_only=True)
+
+ def config_writer(self):
+ """
+ :return: A configuration writer instance with read-and write acccess
+ to options of this head"""
+ return self._config_parser(read_only=False)
+
+ #} END configuration
+
+ @classmethod
+ def delete(cls, repo, *heads, **kwargs):
+ """Delete the given heads
+ :param force:
+ If True, the heads will be deleted even if they are not yet merged into
+ the main development stream.
+ Default False"""
+ force = kwargs.get("force", False)
+ flag = "-d"
+ if force:
+ flag = "-D"
+ repo.git.branch(flag, *heads)
+
+
+ def rename(self, new_path, force=False):
+ """Rename self to a new path
+
+ :param new_path:
+ Either a simple name or a path, i.e. new_name or features/new_name.
+ The prefix refs/heads is implied
+
+ :param force:
+ If True, the rename will succeed even if a head with the target name
+ already exists.
+
+ :return: self
+ :note: respects the ref log as git commands are used"""
+ flag = "-m"
+ if force:
+ flag = "-M"
+
+ self.repo.git.branch(flag, self, new_path)
+ self.path = "%s/%s" % (self._common_path_default, new_path)
+ return self
+
+ def checkout(self, force=False, **kwargs):
+ """Checkout this head by setting the HEAD to this reference, by updating the index
+ to reflect the tree we point to and by updating the working tree to reflect
+ the latest index.
+
+ The command will fail if changed working tree files would be overwritten.
+
+ :param force:
+ If True, changes to the index and the working tree will be discarded.
+ If False, GitCommandError will be raised in that situation.
+
+ :param kwargs:
+ Additional keyword arguments to be passed to git checkout, i.e.
+ b='new_branch' to create a new branch at the given spot.
+
+ :return:
+ The active branch after the checkout operation, usually self unless
+ a new branch has been created.
+
+ :note:
+ By default it is only allowed to checkout heads - everything else
+ will leave the HEAD detached which is allowed and possible, but remains
+ a special state that some tools might not be able to handle."""
+ args = list()
+ kwargs['f'] = force
+ if kwargs['f'] == False:
+ kwargs.pop('f')
+
+ self.repo.git.checkout(self, **kwargs)
+ return self.repo.active_branch
+
+
+
diff --git a/git/refs/log.py b/git/refs/log.py
index f49c07fda..a11ad0951 100644
--- a/git/refs/log.py
+++ b/git/refs/log.py
@@ -5,12 +5,9 @@
LockFile,
assure_directory_exists,
to_native_path,
- )
-
-from gitdb.util import (
bin_to_hex,
join,
- file_contents_ro_filepath,
+ file_contents_ro_filepath
)
from git.objects.util import (
@@ -129,7 +126,13 @@ def __init__(self, filepath=None):
# END handle filepath
def _read_from_file(self):
- fmap = file_contents_ro_filepath(self._path, stream=False, allow_mmap=True)
+ try:
+ fmap = file_contents_ro_filepath(self._path, stream=True, allow_mmap=True)
+ except OSError:
+ # it is possible and allowed that the file doesn't exist !
+ return
+ #END handle invalid log
+
try:
self._deserialize(fmap)
finally:
diff --git a/git/refs/reference.py b/git/refs/reference.py
index 1a745ee9d..d85b194d7 100644
--- a/git/refs/reference.py
+++ b/git/refs/reference.py
@@ -1,18 +1,28 @@
-from symbolic import SymbolicReference
import os
-from git.objects import Object
-from git.util import (
- LazyMixin,
- Iterable,
- )
-from gitdb.util import (
+from symbolic import SymbolicReference
+from head import HEAD
+from git.util import (
+ LazyMixin,
+ Iterable,
isfile,
hex_to_bin
)
__all__ = ["Reference"]
+#{ Utilities
+def require_remote_ref_path(func):
+ """A decorator raising a TypeError if we are not a valid remote, based on the path"""
+ def wrapper(self, *args):
+ if not self.path.startswith(self._remote_common_path_default + "/"):
+ raise ValueError("ref path does not point to a remote reference: %s" % path)
+ return func(self, *args)
+ #END wrapper
+ wrapper.__name__ = func.__name__
+ return wrapper
+#}END utilites
+
class Reference(SymbolicReference, LazyMixin, Iterable):
"""Represents a named reference to any object. Subclasses may apply restrictions though,
@@ -22,26 +32,30 @@ class Reference(SymbolicReference, LazyMixin, Iterable):
_resolve_ref_on_create = True
_common_path_default = "refs"
- def __init__(self, repo, path):
+ def __init__(self, repo, path, check_path = True):
"""Initialize this instance
:param repo: Our parent repository
:param path:
Path relative to the .git/ directory pointing to the ref in question, i.e.
- refs/heads/master"""
- if not path.startswith(self._common_path_default+'/'):
- raise ValueError("Cannot instantiate %r from path %s" % ( self.__class__.__name__, path ))
+ refs/heads/master
+ :param check_path: if False, you can provide any path. Otherwise the path must start with the
+ default path prefix of this type."""
+ if check_path and not path.startswith(self._common_path_default+'/'):
+ raise ValueError("Cannot instantiate %r from path %s" % (self.__class__.__name__, path))
super(Reference, self).__init__(repo, path)
def __str__(self):
return self.name
+
+ #{ Interface
def set_object(self, object, logmsg = None):
"""Special version which checks if the head-log needs an update as well"""
oldbinsha = None
+ head = HEAD(self.repo)
if logmsg is not None:
- head = self.repo.head
if not head.is_detached and head.ref == self:
oldbinsha = self.commit.binsha
#END handle commit retrieval
@@ -62,7 +76,7 @@ def set_object(self, object, logmsg = None):
# * check with HEAD only which should cover 99% of all usage
# * scenarios (even 100% of the default ones).
# */
- self.repo.head.log_append(oldbinsha, logmsg)
+ head.log_append(oldbinsha, logmsg)
#END check if the head
# NOTE: Don't have to overwrite properties as the will only work without a the log
@@ -82,3 +96,30 @@ def iter_items(cls, repo, common_path = None):
"""Equivalent to SymbolicReference.iter_items, but will return non-detached
references as well."""
return cls._iter_items(repo, common_path)
+
+ #}END interface
+
+
+ #{ Remote Interface
+
+ @property
+ @require_remote_ref_path
+ def remote_name(self):
+ """
+ :return:
+ Name of the remote we are a reference of, such as 'origin' for a reference
+ named 'origin/master'"""
+ tokens = self.path.split('/')
+ # /refs/remotes//
+ return tokens[2]
+
+ @property
+ @require_remote_ref_path
+ def remote_head(self):
+ """:return: Name of the remote head itself, i.e. master.
+ :note: The returned name is usually not qualified enough to uniquely identify
+ a branch"""
+ tokens = self.path.split('/')
+ return '/'.join(tokens[3:])
+
+ #} END remote interface
diff --git a/git/refs/remote.py b/git/refs/remote.py
index b7b07d4ba..d7bfc3e0f 100644
--- a/git/refs/remote.py
+++ b/git/refs/remote.py
@@ -1,16 +1,18 @@
-from head import Head
-from git.util import join_path
-from gitdb.util import join
-
import os
-
+from headref import Head
+from git.util import (
+ join,
+ join_path
+ )
__all__ = ["RemoteReference"]
class RemoteReference(Head):
"""Represents a reference pointing to a remote head."""
- _common_path_default = "refs/remotes"
+ __slots__ = tuple()
+
+ _common_path_default = Head._remote_common_path_default
@classmethod
@@ -22,24 +24,11 @@ def iter_items(cls, repo, common_path = None, remote=None):
# END handle remote constraint
return super(RemoteReference, cls).iter_items(repo, common_path)
- @property
- def remote_name(self):
- """
- :return:
- Name of the remote we are a reference of, such as 'origin' for a reference
- named 'origin/master'"""
- tokens = self.path.split('/')
- # /refs/remotes//
- return tokens[2]
-
- @property
- def remote_head(self):
- """:return: Name of the remote head itself, i.e. master.
- :note: The returned name is usually not qualified enough to uniquely identify
- a branch"""
- tokens = self.path.split('/')
- return '/'.join(tokens[3:])
-
+ @classmethod
+ def create(cls, *args, **kwargs):
+ """Used to disable this method"""
+ raise TypeError("Cannot explicitly create remote references")
+
@classmethod
def delete(cls, repo, *refs, **kwargs):
"""Delete the given remote references.
@@ -56,8 +45,3 @@ def delete(cls, repo, *refs, **kwargs):
except OSError:
pass
# END for each ref
-
- @classmethod
- def create(cls, *args, **kwargs):
- """Used to disable this method"""
- raise TypeError("Cannot explicitly create remote references")
diff --git a/git/refs/symbolic.py b/git/refs/symbolic.py
index aec68750d..2c8faa91b 100644
--- a/git/refs/symbolic.py
+++ b/git/refs/symbolic.py
@@ -1,24 +1,26 @@
import os
-from git.objects import Object, Commit
+import re
+
+from git.objects import (
+ Object,
+ Commit
+ )
from git.util import (
join_path,
join_path_native,
to_native_path_linux,
- assure_directory_exists
+ assure_directory_exists,
+ join,
+ dirname,
+ isdir,
+ exists,
+ isfile,
+ rename,
+ hex_to_bin,
+ LockedFD
)
-from gitdb.exc import BadObject
-from gitdb.util import (
- join,
- dirname,
- isdir,
- exists,
- isfile,
- rename,
- hex_to_bin,
- LockedFD
- )
-
+from git.exc import BadObject
from log import RefLog
__all__ = ["SymbolicReference"]
@@ -30,11 +32,28 @@ class SymbolicReference(object):
A typical example for a symbolic reference is HEAD."""
__slots__ = ("repo", "path")
+
_resolve_ref_on_create = False
_points_to_commits_only = True
_common_path_default = ""
+ _remote_common_path_default = "refs/remotes"
_id_attribute_ = "name"
+ re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$')
+
+ #{ Configuration
+ # Object class to be used when instantiating objects
+ ObjectCls = Object
+ CommitCls = Commit
+
+ # all of the following are set by the package initializer
+ HEADCls = None
+ HeadCls = None
+ RemoteReferenceCls = None
+ TagReferenceCls = None
+ ReferenceCls = None
+ #}END configuration
+
def __init__(self, repo, path):
self.repo = repo
self.path = path
@@ -46,7 +65,9 @@ def __repr__(self):
return '' % (self.__class__.__name__, self.path)
def __eq__(self, other):
- return self.path == other.path
+ if hasattr(other, 'path'):
+ return self.path == other.path
+ return False
def __ne__(self, other):
return not ( self == other )
@@ -75,7 +96,7 @@ def _iter_packed_refs(cls, repo):
"""Returns an iterator yielding pairs of sha1/path pairs for the corresponding refs.
:note: The packed refs file will be kept open as long as we iterate"""
try:
- fp = open(cls._get_packed_refs_path(repo), 'r')
+ fp = open(cls._get_packed_refs_path(repo), 'rb')
for line in fp:
line = line.strip()
if not line:
@@ -143,20 +164,53 @@ def _get_ref_info(cls, repo, ref_path):
return (None, tokens[1])
# its a commit
- if repo.re_hexsha_only.match(tokens[0]):
+ if cls.re_hexsha_only.match(tokens[0]):
return (tokens[0], None)
raise ValueError("Failed to parse reference information from %r" % ref_path)
- def _get_object(self):
+ def _get_object_sha(self):
"""
:return:
- The object our ref currently refers to. Refs can be cached, they will
+ The binary sha to the object our ref currently refers to. Refs can be cached, they will
always point to the actual object as it gets re-created on each query"""
+ return hex_to_bin(self.dereference_recursive(self.repo, self.path))
+
+ def _get_object(self):
+ """
+ :return:
+ The object our ref currently refers to."""
# have to be dynamic here as we may be a tag which can point to anything
# Our path will be resolved to the hexsha which will be used accordingly
- return Object.new_from_sha(self.repo, hex_to_bin(self.dereference_recursive(self.repo, self.path)))
+ return self.ObjectCls.new_from_sha(self.repo, self._get_object_sha())
+ def set_object(self, object_id, logmsg = None):
+ """Set the object we point to, possibly dereference our symbolic reference first.
+ If the reference does not exist, it will be created
+
+ :param object: a reference specifier string, a SymbolicReference or an object hex sha.
+ SymbolicReferences will be dereferenced beforehand to obtain the object they point to
+ :param logmsg: If not None, the message will be used in the reflog entry to be
+ written. Otherwise the reflog is not altered
+ :note: plain SymbolicReferences may not actually point to objects by convention
+ :return: self"""
+ if isinstance(object_id, SymbolicReference):
+ object = object.object
+ #END resolve references
+
+ is_detached = True
+ try:
+ is_detached = self.is_detached
+ except ValueError:
+ pass
+ # END handle non-existing ones
+
+ if is_detached:
+ return self.set_reference(object_id, logmsg)
+
+ # set the commit on our reference
+ return self._get_reference().set_object(object_id, logmsg)
+
def _get_commit(self):
"""
:return:
@@ -167,7 +221,7 @@ def _get_commit(self):
obj = obj.object
#END dereference tag
- if obj.type != Commit.type:
+ if obj.type != self.CommitCls.type:
raise TypeError("Symbolic Reference pointed to object %r, commit was required" % obj)
#END handle type
return obj
@@ -179,20 +233,20 @@ def set_commit(self, commit, logmsg = None):
a commit
:return: self"""
# check the type - assume the best if it is a base-string
- invalid_type = False
- if isinstance(commit, Object):
- invalid_type = commit.type != Commit.type
+ is_invalid_type = False
+ if isinstance(commit, self.ObjectCls):
+ is_invalid_type = commit.type != self.CommitCls.type
elif isinstance(commit, SymbolicReference):
- invalid_type = commit.object.type != Commit.type
+ is_invalid_type = commit.object.type != self.CommitCls.type
else:
try:
- invalid_type = self.repo.rev_parse(commit).type != Commit.type
+ is_invalid_type = self.repo.resolve_object(commit).type != self.CommitCls.type
except BadObject:
raise ValueError("Invalid object: %s" % commit)
#END handle exception
# END verify type
- if invalid_type:
+ if is_invalid_type:
raise ValueError("Need commit, got %r" % commit)
#END handle raise
@@ -202,35 +256,9 @@ def set_commit(self, commit, logmsg = None):
return self
- def set_object(self, object, logmsg = None):
- """Set the object we point to, possibly dereference our symbolic reference first.
- If the reference does not exist, it will be created
-
- :param object: a refspec, a SymbolicReference or an Object instance. SymbolicReferences
- will be dereferenced beforehand to obtain the object they point to
- :param logmsg: If not None, the message will be used in the reflog entry to be
- written. Otherwise the reflog is not altered
- :note: plain SymbolicReferences may not actually point to objects by convention
- :return: self"""
- if isinstance(object, SymbolicReference):
- object = object.object
- #END resolve references
-
- is_detached = True
- try:
- is_detached = self.is_detached
- except ValueError:
- pass
- # END handle non-existing ones
-
- if is_detached:
- return self.set_reference(object, logmsg)
-
- # set the commit on our reference
- return self._get_reference().set_object(object, logmsg)
-
commit = property(_get_commit, set_commit, doc="Query or set commits directly")
object = property(_get_object, set_object, doc="Return the object our ref currently refers to")
+ object_binsha = property(_get_object_sha, set_object, doc="Return the object our ref currently refers to")
def _get_reference(self):
""":return: Reference Object we point to
@@ -247,7 +275,7 @@ def set_reference(self, ref, logmsg = None):
will be set which effectively detaches the refererence if it was a purely
symbolic one.
- :param ref: SymbolicReference instance, Object instance or refspec string
+ :param ref: SymbolicReference instance, hexadecimal sha string or refspec string
Only if the ref is a SymbolicRef instance, we will point to it. Everthiny
else is dereferenced to obtain the actual object.
:param logmsg: If set to a string, the message will be used in the reflog.
@@ -263,12 +291,12 @@ def set_reference(self, ref, logmsg = None):
obj = None
if isinstance(ref, SymbolicReference):
write_value = "ref: %s" % ref.path
- elif isinstance(ref, Object):
+ elif isinstance(ref, self.ObjectCls):
obj = ref
write_value = ref.hexsha
elif isinstance(ref, basestring):
try:
- obj = self.repo.rev_parse(ref+"^{}") # optionally deref tags
+ obj = self.repo.resolve_object(ref+"^{}") # optionally deref tags
write_value = obj.hexsha
except BadObject:
raise ValueError("Could not extract object from %s" % ref)
@@ -318,7 +346,7 @@ def is_valid(self):
a valid object or reference."""
try:
self.object
- except (OSError, ValueError):
+ except (OSError, ValueError, BadObject):
return False
else:
return True
@@ -398,7 +426,7 @@ def delete(cls, repo, path):
# check packed refs
pack_file_path = cls._get_packed_refs_path(repo)
try:
- reader = open(pack_file_path)
+ reader = open(pack_file_path, 'rb')
except (OSError,IOError):
pass # it didnt exist at all
else:
@@ -425,7 +453,10 @@ def delete(cls, repo, path):
# write the new lines
if made_change:
- open(pack_file_path, 'w').writelines(new_lines)
+ # write-binary is required, otherwise windows will
+ # open the file in text mode and change LF to CRLF !
+ open(pack_file_path, 'wb').writelines(new_lines)
+ # END write out file
# END open exception handling
# END handle deletion
@@ -449,7 +480,16 @@ def _create(cls, repo, path, resolve, reference, force, logmsg=None):
# figure out target data
target = reference
if resolve:
- target = repo.rev_parse(str(reference))
+ # could just use the resolve method, but it could be expensive
+ # so we handle most common cases ourselves
+ if isinstance(reference, cls.ObjectCls):
+ target = reference.hexsha
+ elif isinstance(reference, SymbolicReference):
+ target = reference.object.hexsha
+ else:
+ target = repo.resolve_object(str(reference))
+ #END handle resoltion
+ #END need resolution
if not force and isfile(abs_ref_path):
target_data = str(target)
@@ -549,7 +589,7 @@ def _iter_items(cls, repo, common_path = None):
# Currently we do not follow links
for root, dirs, files in os.walk(join_path_native(repo.git_dir, common_path)):
if 'refs/' not in root: # skip non-refs subfolders
- refs_id = [ i for i,d in enumerate(dirs) if d == 'refs' ]
+ refs_id = [ d for d in dirs if d == 'refs' ]
if refs_id:
dirs[0:] = ['refs']
# END prune non-refs folders
@@ -579,7 +619,7 @@ def _iter_items(cls, repo, common_path = None):
def iter_items(cls, repo, common_path = None):
"""Find all refs in the repository
- :param repo: is the Repo
+ :param repo: is the repo
:param common_path:
Optional keyword argument to the path which is to be shared by all
@@ -588,12 +628,12 @@ def iter_items(cls, repo, common_path = None):
refs suitable for the actual class are returned.
:return:
- git.SymbolicReference[], each of them is guaranteed to be a symbolic
- ref which is not detached.
+ git.SymbolicReference[], each of them is guaranteed to be a *only* a symbolic
+ ref, or a derived class which is not detached
List is lexigraphically sorted
The returned objects represent actual subclasses, such as Head or TagReference"""
- return ( r for r in cls._iter_items(repo, common_path) if r.__class__ == SymbolicReference or not r.is_detached )
+ return ( r for r in cls._iter_items(repo, common_path) if r.__class__ == cls or not r.is_detached )
@classmethod
def from_path(cls, repo, path):
@@ -606,7 +646,7 @@ def from_path(cls, repo, path):
if not path:
raise ValueError("Cannot create Reference from %r" % path)
- for ref_type in (HEAD, Head, RemoteReference, TagReference, Reference, SymbolicReference):
+ for ref_type in (cls.HEADCls, cls.HeadCls, cls.RemoteReferenceCls, cls.TagReferenceCls, cls.ReferenceCls, cls):
try:
instance = ref_type(repo, path)
if instance.__class__ == SymbolicReference and instance.is_detached:
diff --git a/git/refs/tag.py b/git/refs/tag.py
index c09d814dc..3a1433be6 100644
--- a/git/refs/tag.py
+++ b/git/refs/tag.py
@@ -2,8 +2,6 @@
__all__ = ["TagReference", "Tag"]
-
-
class TagReference(Reference):
"""Class representing a lightweight tag reference which either points to a commit
,a tag object or any other object. In the latter case additional information,
@@ -16,7 +14,6 @@ class TagReference(Reference):
print tagref.commit.message
if tagref.tag is not None:
print tagref.tag.message"""
-
__slots__ = tuple()
_common_path_default = "refs/tags"
@@ -45,7 +42,7 @@ def tag(self):
# make object read-only
# It should be reasonably hard to adjust an existing tag
object = property(Reference._get_object)
-
+
@classmethod
def create(cls, repo, path, ref='HEAD', message=None, force=False, **kwargs):
"""Create a new tag reference.
@@ -85,7 +82,5 @@ def delete(cls, repo, *tags):
"""Delete the given existing tag or tags"""
repo.git.tag("-d", *tags)
-
-
# provide an alias
Tag = TagReference
diff --git a/git/remote.py b/git/remote.py
index d3639f7bf..47adedbf7 100644
--- a/git/remote.py
+++ b/git/remote.py
@@ -13,255 +13,27 @@
from git.util import (
LazyMixin,
Iterable,
- IterableList,
- RemoteProgress
+ IterableList
)
+from git.db.interface import TransportDB
+from refs import RemoteReference
-from refs import (
- Reference,
- RemoteReference,
- SymbolicReference,
- TagReference
- )
-
-from git.util import join_path
-from gitdb.util import join
-
-import re
import os
-import sys
-__all__ = ('RemoteProgress', 'PushInfo', 'FetchInfo', 'Remote')
+__all__ = ['Remote']
-
class PushInfo(object):
- """
- Carries information about the result of a push operation of a single head::
-
- info = remote.push()[0]
- info.flags # bitflags providing more information about the result
- info.local_ref # Reference pointing to the local reference that was pushed
- # It is None if the ref was deleted.
- info.remote_ref_string # path to the remote reference located on the remote side
- info.remote_ref # Remote Reference on the local side corresponding to
- # the remote_ref_string. It can be a TagReference as well.
- info.old_commit # commit at which the remote_ref was standing before we pushed
- # it to local_ref.commit. Will be None if an error was indicated
- info.summary # summary line providing human readable english text about the push
- """
- __slots__ = ('local_ref', 'remote_ref_string', 'flags', 'old_commit', '_remote', 'summary')
-
- NEW_TAG, NEW_HEAD, NO_MATCH, REJECTED, REMOTE_REJECTED, REMOTE_FAILURE, DELETED, \
- FORCED_UPDATE, FAST_FORWARD, UP_TO_DATE, ERROR = [ 1 << x for x in range(11) ]
-
- _flag_map = { 'X' : NO_MATCH, '-' : DELETED, '*' : 0,
- '+' : FORCED_UPDATE, ' ' : FAST_FORWARD,
- '=' : UP_TO_DATE, '!' : ERROR }
-
- def __init__(self, flags, local_ref, remote_ref_string, remote, old_commit=None,
- summary=''):
- """ Initialize a new instance """
- self.flags = flags
- self.local_ref = local_ref
- self.remote_ref_string = remote_ref_string
- self._remote = remote
- self.old_commit = old_commit
- self.summary = summary
-
- @property
- def remote_ref(self):
- """
- :return:
- Remote Reference or TagReference in the local repository corresponding
- to the remote_ref_string kept in this instance."""
- # translate heads to a local remote, tags stay as they are
- if self.remote_ref_string.startswith("refs/tags"):
- return TagReference(self._remote.repo, self.remote_ref_string)
- elif self.remote_ref_string.startswith("refs/heads"):
- remote_ref = Reference(self._remote.repo, self.remote_ref_string)
- return RemoteReference(self._remote.repo, "refs/remotes/%s/%s" % (str(self._remote), remote_ref.name))
- else:
- raise ValueError("Could not handle remote ref: %r" % self.remote_ref_string)
- # END
-
- @classmethod
- def _from_line(cls, remote, line):
- """Create a new PushInfo instance as parsed from line which is expected to be like
- refs/heads/master:refs/heads/master 05d2687..1d0568e"""
- control_character, from_to, summary = line.split('\t', 3)
- flags = 0
-
- # control character handling
- try:
- flags |= cls._flag_map[ control_character ]
- except KeyError:
- raise ValueError("Control Character %r unknown as parsed from line %r" % (control_character, line))
- # END handle control character
-
- # from_to handling
- from_ref_string, to_ref_string = from_to.split(':')
- if flags & cls.DELETED:
- from_ref = None
- else:
- from_ref = Reference.from_path(remote.repo, from_ref_string)
-
- # commit handling, could be message or commit info
- old_commit = None
- if summary.startswith('['):
- if "[rejected]" in summary:
- flags |= cls.REJECTED
- elif "[remote rejected]" in summary:
- flags |= cls.REMOTE_REJECTED
- elif "[remote failure]" in summary:
- flags |= cls.REMOTE_FAILURE
- elif "[no match]" in summary:
- flags |= cls.ERROR
- elif "[new tag]" in summary:
- flags |= cls.NEW_TAG
- elif "[new branch]" in summary:
- flags |= cls.NEW_HEAD
- # uptodate encoded in control character
- else:
- # fast-forward or forced update - was encoded in control character,
- # but we parse the old and new commit
- split_token = "..."
- if control_character == " ":
- split_token = ".."
- old_sha, new_sha = summary.split(' ')[0].split(split_token)
- # have to use constructor here as the sha usually is abbreviated
- old_commit = remote.repo.commit(old_sha)
- # END message handling
-
- return PushInfo(flags, from_ref, to_ref_string, remote, old_commit, summary)
-
-
-class FetchInfo(object):
- """
- Carries information about the results of a fetch operation of a single head::
-
- info = remote.fetch()[0]
- info.ref # Symbolic Reference or RemoteReference to the changed
- # remote head or FETCH_HEAD
- info.flags # additional flags to be & with enumeration members,
- # i.e. info.flags & info.REJECTED
- # is 0 if ref is SymbolicReference
- info.note # additional notes given by git-fetch intended for the user
- info.old_commit # if info.flags & info.FORCED_UPDATE|info.FAST_FORWARD,
- # field is set to the previous location of ref, otherwise None
- """
- __slots__ = ('ref','old_commit', 'flags', 'note')
-
- NEW_TAG, NEW_HEAD, HEAD_UPTODATE, TAG_UPDATE, REJECTED, FORCED_UPDATE, \
- FAST_FORWARD, ERROR = [ 1 << x for x in range(8) ]
+ """Wrapper for basic PushInfo to provide the previous interface which includes
+ resolved objects instead of plain shas
- # %c %-*s %-*s -> %s (%s)
- re_fetch_result = re.compile("^\s*(.) (\[?[\w\s\.]+\]?)\s+(.+) -> ([/\w_\+\.-]+)( \(.*\)?$)?")
+ old_commit # object for the corresponding old_commit_sha"""
- _flag_map = { '!' : ERROR, '+' : FORCED_UPDATE, '-' : TAG_UPDATE, '*' : 0,
- '=' : HEAD_UPTODATE, ' ' : FAST_FORWARD }
- def __init__(self, ref, flags, note = '', old_commit = None):
- """
- Initialize a new instance
- """
- self.ref = ref
- self.flags = flags
- self.note = note
- self.old_commit = old_commit
-
- def __str__(self):
- return self.name
-
- @property
- def name(self):
- """:return: Name of our remote ref"""
- return self.ref.name
-
- @property
- def commit(self):
- """:return: Commit of our remote ref"""
- return self.ref.commit
-
- @classmethod
- def _from_line(cls, repo, line, fetch_line):
- """Parse information from the given line as returned by git-fetch -v
- and return a new FetchInfo object representing this information.
-
- We can handle a line as follows
- "%c %-*s %-*s -> %s%s"
-
- Where c is either ' ', !, +, -, *, or =
- ! means error
- + means success forcing update
- - means a tag was updated
- * means birth of new branch or tag
- = means the head was up to date ( and not moved )
- ' ' means a fast-forward
-
- fetch line is the corresponding line from FETCH_HEAD, like
- acb0fa8b94ef421ad60c8507b634759a472cd56c not-for-merge branch '0.1.7RC' of /tmp/tmpya0vairemote_repo"""
- match = cls.re_fetch_result.match(line)
- if match is None:
- raise ValueError("Failed to parse line: %r" % line)
-
- # parse lines
- control_character, operation, local_remote_ref, remote_local_ref, note = match.groups()
- try:
- new_hex_sha, fetch_operation, fetch_note = fetch_line.split("\t")
- ref_type_name, fetch_note = fetch_note.split(' ', 1)
- except ValueError: # unpack error
- raise ValueError("Failed to parse FETCH__HEAD line: %r" % fetch_line)
-
- # handle FETCH_HEAD and figure out ref type
- # If we do not specify a target branch like master:refs/remotes/origin/master,
- # the fetch result is stored in FETCH_HEAD which destroys the rule we usually
- # have. In that case we use a symbolic reference which is detached
- ref_type = None
- if remote_local_ref == "FETCH_HEAD":
- ref_type = SymbolicReference
- elif ref_type_name == "branch":
- ref_type = RemoteReference
- elif ref_type_name == "tag":
- ref_type = TagReference
- else:
- raise TypeError("Cannot handle reference type: %r" % ref_type_name)
-
- # create ref instance
- if ref_type is SymbolicReference:
- remote_local_ref = ref_type(repo, "FETCH_HEAD")
- else:
- remote_local_ref = Reference.from_path(repo, join_path(ref_type._common_path_default, remote_local_ref.strip()))
- # END create ref instance
-
- note = ( note and note.strip() ) or ''
-
- # parse flags from control_character
- flags = 0
- try:
- flags |= cls._flag_map[control_character]
- except KeyError:
- raise ValueError("Control character %r unknown as parsed from line %r" % (control_character, line))
- # END control char exception hanlding
-
- # parse operation string for more info - makes no sense for symbolic refs
- old_commit = None
- if isinstance(remote_local_ref, Reference):
- if 'rejected' in operation:
- flags |= cls.REJECTED
- if 'new tag' in operation:
- flags |= cls.NEW_TAG
- if 'new branch' in operation:
- flags |= cls.NEW_HEAD
- if '...' in operation or '..' in operation:
- split_token = '...'
- if control_character == ' ':
- split_token = split_token[:-1]
- old_commit = repo.rev_parse(operation.split(split_token)[0])
- # END handle refspec
- # END reference flag handling
-
- return cls(remote_local_ref, flags, note, old_commit)
+class FetchInfo(object):
+ """Wrapper to restore the previous interface, resolving objects and wrapping
+ references"""
+
class Remote(LazyMixin, Iterable):
"""Provides easy read and write access to a git remote.
@@ -280,6 +52,16 @@ def __init__(self, repo, name):
:param repo: The repository we are a remote of
:param name: the name of the remote, i.e. 'origin'"""
+ if not hasattr(repo, 'git'):
+ # note: at some point we could just create a git command instance ourselves
+ # but lets just be lazy for now
+ raise AssertionError("Require repository to provide a git command instance currently")
+ #END assert git cmd
+
+ if not isinstance(repo, TransportDB):
+ raise AssertionError("Require TransportDB interface implementation")
+ #END verify interface
+
self.repo = repo
self.name = name
@@ -432,97 +214,6 @@ def update(self, **kwargs):
self.repo.git.remote("update", self.name)
return self
- def _digest_process_messages(self, fh, progress):
- """Read progress messages from file-like object fh, supplying the respective
- progress messages to the progress instance.
-
- :return: list(line, ...) list of lines without linebreaks that did
- not contain progress information"""
- line_so_far = ''
- dropped_lines = list()
- while True:
- char = fh.read(1)
- if not char:
- break
-
- if char in ('\r', '\n'):
- dropped_lines.extend(progress._parse_progress_line(line_so_far))
- line_so_far = ''
- else:
- line_so_far += char
- # END process parsed line
- # END while file is not done reading
- return dropped_lines
-
-
- def _finalize_proc(self, proc):
- """Wait for the process (fetch, pull or push) and handle its errors accordingly"""
- try:
- proc.wait()
- except GitCommandError,e:
- # if a push has rejected items, the command has non-zero return status
- # a return status of 128 indicates a connection error - reraise the previous one
- if proc.poll() == 128:
- raise
- pass
- # END exception handling
-
-
- def _get_fetch_info_from_stderr(self, proc, progress):
- # skip first line as it is some remote info we are not interested in
- output = IterableList('name')
-
-
- # lines which are no progress are fetch info lines
- # this also waits for the command to finish
- # Skip some progress lines that don't provide relevant information
- fetch_info_lines = list()
- for line in self._digest_process_messages(proc.stderr, progress):
- if line.startswith('From') or line.startswith('remote: Total'):
- continue
- elif line.startswith('warning:'):
- print >> sys.stderr, line
- continue
- elif line.startswith('fatal:'):
- raise GitCommandError(("Error when fetching: %s" % line,), 2)
- # END handle special messages
- fetch_info_lines.append(line)
- # END for each line
-
- # read head information
- fp = open(join(self.repo.git_dir, 'FETCH_HEAD'),'r')
- fetch_head_info = fp.readlines()
- fp.close()
-
- assert len(fetch_info_lines) == len(fetch_head_info)
-
- output.extend(FetchInfo._from_line(self.repo, err_line, fetch_line)
- for err_line,fetch_line in zip(fetch_info_lines, fetch_head_info))
-
- self._finalize_proc(proc)
- return output
-
- def _get_push_info(self, proc, progress):
- # read progress information from stderr
- # we hope stdout can hold all the data, it should ...
- # read the lines manually as it will use carriage returns between the messages
- # to override the previous one. This is why we read the bytes manually
- self._digest_process_messages(proc.stderr, progress)
-
- output = IterableList('name')
- for line in proc.stdout.readlines():
- try:
- output.append(PushInfo._from_line(self, line))
- except ValueError:
- # if an error happens, additional info is given which we cannot parse
- pass
- # END exception handling
- # END for each line
-
- self._finalize_proc(proc)
- return output
-
-
def fetch(self, refspec=None, progress=None, **kwargs):
"""Fetch the latest changes for this remote
@@ -546,8 +237,7 @@ def fetch(self, refspec=None, progress=None, **kwargs):
:note:
As fetch does not provide progress information to non-ttys, we cannot make
it available here unfortunately as in the 'push' method."""
- proc = self.repo.git.fetch(self, refspec, with_extended_output=True, as_process=True, v=True, **kwargs)
- return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress())
+ return self.repo.fetch(self.name, refspec, progress, **kwargs)
def pull(self, refspec=None, progress=None, **kwargs):
"""Pull changes from the given branch, being the same as a fetch followed
@@ -557,8 +247,7 @@ def pull(self, refspec=None, progress=None, **kwargs):
:param progress: see 'push' method
:param kwargs: Additional arguments to be passed to git-pull
:return: Please see 'fetch' method """
- proc = self.repo.git.pull(self, refspec, with_extended_output=True, as_process=True, v=True, **kwargs)
- return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress())
+ return self.repo.pull(self.name, refspec, progress, **kwargs)
def push(self, refspec=None, progress=None, **kwargs):
"""Push changes from source branch in refspec to target branch in refspec.
@@ -578,8 +267,7 @@ def push(self, refspec=None, progress=None, **kwargs):
in their flags.
If the operation fails completely, the length of the returned IterableList will
be null."""
- proc = self.repo.git.push(self, refspec, porcelain=True, as_process=True, **kwargs)
- return self._get_push_info(proc, progress or RemoteProgress())
+ return self.repo.push(self.name, refspec, progress, **kwargs)
@property
def config_reader(self):
diff --git a/git/repo.py b/git/repo.py
new file mode 100644
index 000000000..8d5c4021e
--- /dev/null
+++ b/git/repo.py
@@ -0,0 +1,45 @@
+# repo.py
+# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
+#
+# This module is part of GitPython and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""This module is just to maintain compatibility to git-python 0.3x"""
+
+from git.db.complex import CmdCompatibilityGitDB
+
+
+import warnings
+
+__all__ = ('Repo', )
+
+
+class Repo(CmdCompatibilityGitDB):
+ """Represents a git repository and allows you to query references,
+ gather commit information, generate diffs, create and clone repositories query
+ the log.
+
+ The following attributes are worth using:
+
+ 'working_dir' is the working directory of the git command, wich is the working tree
+ directory if available or the .git directory in case of bare repositories
+
+ 'working_tree_dir' is the working tree directory, but will raise AssertionError
+ if we are a bare repository.
+
+ 'git_dir' is the .git repository directoy, which is always set."""
+
+ def __init__(self, path=None, odbt = None):
+ """Create a new Repo instance
+
+ :param path: is the path to either the root git directory or the bare git repo::
+
+ repo = Repo("/Users/mtrier/Development/git-python")
+ repo = Repo("/Users/mtrier/Development/git-python.git")
+ repo = Repo("~/Development/git-python.git")
+ repo = Repo("$REPOSITORIES/Development/git-python.git")
+ :raise InvalidDBRoot:
+ :return: git.Repo """
+ if odbt is not None:
+ warnings.warn("deprecated use of odbt", DeprecationWarning)
+ #END handle old parameter
+ super(Repo, self).__init__(path)
diff --git a/git/repo/__init__.py b/git/repo/__init__.py
deleted file mode 100644
index 8902a254c..000000000
--- a/git/repo/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""Initialize the Repo package"""
-
-from base import *
\ No newline at end of file
diff --git a/git/repo/base.py b/git/repo/base.py
deleted file mode 100644
index 0405a5f9c..000000000
--- a/git/repo/base.py
+++ /dev/null
@@ -1,753 +0,0 @@
-# repo.py
-# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
-#
-# This module is part of GitPython and is released under
-# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-
-from git.exc import InvalidGitRepositoryError, NoSuchPathError
-from git.cmd import Git
-from git.util import Actor
-from git.refs import *
-from git.index import IndexFile
-from git.objects import *
-from git.config import GitConfigParser
-from git.remote import Remote
-from git.db import (
- GitCmdObjectDB,
- GitDB
- )
-
-
-from gitdb.util import (
- join,
- isfile,
- hex_to_bin
- )
-
-from fun import (
- rev_parse,
- is_git_dir,
- touch
- )
-
-import os
-import sys
-import re
-
-DefaultDBType = GitDB
-if sys.version_info[1] < 5: # python 2.4 compatiblity
- DefaultDBType = GitCmdObjectDB
-# END handle python 2.4
-
-
-__all__ = ('Repo', )
-
-
-class Repo(object):
- """Represents a git repository and allows you to query references,
- gather commit information, generate diffs, create and clone repositories query
- the log.
-
- The following attributes are worth using:
-
- 'working_dir' is the working directory of the git command, wich is the working tree
- directory if available or the .git directory in case of bare repositories
-
- 'working_tree_dir' is the working tree directory, but will raise AssertionError
- if we are a bare repository.
-
- 'git_dir' is the .git repository directoy, which is always set."""
- DAEMON_EXPORT_FILE = 'git-daemon-export-ok'
- __slots__ = ( "working_dir", "_working_tree_dir", "git_dir", "_bare", "git", "odb" )
-
- # precompiled regex
- re_whitespace = re.compile(r'\s+')
- re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$')
- re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$')
- re_author_committer_start = re.compile(r'^(author|committer)')
- re_tab_full_line = re.compile(r'^\t(.*)$')
-
- # invariants
- # represents the configuration level of a configuration file
- config_level = ("system", "global", "repository")
-
- def __init__(self, path=None, odbt = DefaultDBType):
- """Create a new Repo instance
-
- :param path: is the path to either the root git directory or the bare git repo::
-
- repo = Repo("/Users/mtrier/Development/git-python")
- repo = Repo("/Users/mtrier/Development/git-python.git")
- repo = Repo("~/Development/git-python.git")
- repo = Repo("$REPOSITORIES/Development/git-python.git")
-
- :param odbt: Object DataBase type - a type which is constructed by providing
- the directory containing the database objects, i.e. .git/objects. It will
- be used to access all object data
- :raise InvalidGitRepositoryError:
- :raise NoSuchPathError:
- :return: git.Repo """
- epath = os.path.abspath(os.path.expandvars(os.path.expanduser(path or os.getcwd())))
-
- if not os.path.exists(epath):
- raise NoSuchPathError(epath)
-
- self.working_dir = None
- self._working_tree_dir = None
- self.git_dir = None
- curpath = epath
-
- # walk up the path to find the .git dir
- while curpath:
- if is_git_dir(curpath):
- self.git_dir = curpath
- self._working_tree_dir = os.path.dirname(curpath)
- break
- gitpath = join(curpath, '.git')
- if is_git_dir(gitpath):
- self.git_dir = gitpath
- self._working_tree_dir = curpath
- break
- curpath, dummy = os.path.split(curpath)
- if not dummy:
- break
- # END while curpath
-
- if self.git_dir is None:
- raise InvalidGitRepositoryError(epath)
-
- self._bare = False
- try:
- self._bare = self.config_reader("repository").getboolean('core','bare')
- except Exception:
- # lets not assume the option exists, although it should
- pass
-
- # adjust the wd in case we are actually bare - we didn't know that
- # in the first place
- if self._bare:
- self._working_tree_dir = None
- # END working dir handling
-
- self.working_dir = self._working_tree_dir or self.git_dir
- self.git = Git(self.working_dir)
-
- # special handling, in special times
- args = [join(self.git_dir, 'objects')]
- if issubclass(odbt, GitCmdObjectDB):
- args.append(self.git)
- self.odb = odbt(*args)
-
- def __eq__(self, rhs):
- if isinstance(rhs, Repo):
- return self.git_dir == rhs.git_dir
- return False
-
- def __ne__(self, rhs):
- return not self.__eq__(rhs)
-
- def __hash__(self):
- return hash(self.git_dir)
-
- def __repr__(self):
- return "%s(%r)" % (type(self).__name__, self.git_dir)
-
- # Description property
- def _get_description(self):
- filename = join(self.git_dir, 'description')
- return file(filename).read().rstrip()
-
- def _set_description(self, descr):
- filename = join(self.git_dir, 'description')
- file(filename, 'w').write(descr+'\n')
-
- description = property(_get_description, _set_description,
- doc="the project's description")
- del _get_description
- del _set_description
-
-
-
- @property
- def working_tree_dir(self):
- """:return: The working tree directory of our git repository
- :raise AssertionError: If we are a bare repository"""
- if self._working_tree_dir is None:
- raise AssertionError( "Repository at %r is bare and does not have a working tree directory" % self.git_dir )
- return self._working_tree_dir
-
- @property
- def bare(self):
- """:return: True if the repository is bare"""
- return self._bare
-
- @property
- def heads(self):
- """A list of ``Head`` objects representing the branch heads in
- this repo
-
- :return: ``git.IterableList(Head, ...)``"""
- return Head.list_items(self)
-
- @property
- def references(self):
- """A list of Reference objects representing tags, heads and remote references.
-
- :return: IterableList(Reference, ...)"""
- return Reference.list_items(self)
-
- # alias for references
- refs = references
-
- # alias for heads
- branches = heads
-
- @property
- def index(self):
- """:return: IndexFile representing this repository's index."""
- return IndexFile(self)
-
- @property
- def head(self):
- """:return: HEAD Object pointing to the current head reference"""
- return HEAD(self,'HEAD')
-
- @property
- def remotes(self):
- """A list of Remote objects allowing to access and manipulate remotes
- :return: ``git.IterableList(Remote, ...)``"""
- return Remote.list_items(self)
-
- def remote(self, name='origin'):
- """:return: Remote with the specified name
- :raise ValueError: if no remote with such a name exists"""
- return Remote(self, name)
-
- #{ Submodules
-
- @property
- def submodules(self):
- """
- :return: git.IterableList(Submodule, ...) of direct submodules
- available from the current head"""
- return Submodule.list_items(self)
-
- def submodule(self, name):
- """ :return: Submodule with the given name
- :raise ValueError: If no such submodule exists"""
- try:
- return self.submodules[name]
- except IndexError:
- raise ValueError("Didn't find submodule named %r" % name)
- # END exception handling
-
- def create_submodule(self, *args, **kwargs):
- """Create a new submodule
-
- :note: See the documentation of Submodule.add for a description of the
- applicable parameters
- :return: created submodules"""
- return Submodule.add(self, *args, **kwargs)
-
- def iter_submodules(self, *args, **kwargs):
- """An iterator yielding Submodule instances, see Traversable interface
- for a description of args and kwargs
- :return: Iterator"""
- return RootModule(self).traverse(*args, **kwargs)
-
- def submodule_update(self, *args, **kwargs):
- """Update the submodules, keeping the repository consistent as it will
- take the previous state into consideration. For more information, please
- see the documentation of RootModule.update"""
- return RootModule(self).update(*args, **kwargs)
-
- #}END submodules
-
- @property
- def tags(self):
- """A list of ``Tag`` objects that are available in this repo
- :return: ``git.IterableList(TagReference, ...)`` """
- return TagReference.list_items(self)
-
- def tag(self,path):
- """:return: TagReference Object, reference pointing to a Commit or Tag
- :param path: path to the tag reference, i.e. 0.1.5 or tags/0.1.5 """
- return TagReference(self, path)
-
- def create_head(self, path, commit='HEAD', force=False, logmsg=None ):
- """Create a new head within the repository.
- For more documentation, please see the Head.create method.
-
- :return: newly created Head Reference"""
- return Head.create(self, path, commit, force, logmsg)
-
- def delete_head(self, *heads, **kwargs):
- """Delete the given heads
-
- :param kwargs: Additional keyword arguments to be passed to git-branch"""
- return Head.delete(self, *heads, **kwargs)
-
- def create_tag(self, path, ref='HEAD', message=None, force=False, **kwargs):
- """Create a new tag reference.
- For more documentation, please see the TagReference.create method.
-
- :return: TagReference object """
- return TagReference.create(self, path, ref, message, force, **kwargs)
-
- def delete_tag(self, *tags):
- """Delete the given tag references"""
- return TagReference.delete(self, *tags)
-
- def create_remote(self, name, url, **kwargs):
- """Create a new remote.
-
- For more information, please see the documentation of the Remote.create
- methods
-
- :return: Remote reference"""
- return Remote.create(self, name, url, **kwargs)
-
- def delete_remote(self, remote):
- """Delete the given remote."""
- return Remote.remove(self, remote)
-
- def _get_config_path(self, config_level ):
- # we do not support an absolute path of the gitconfig on windows ,
- # use the global config instead
- if sys.platform == "win32" and config_level == "system":
- config_level = "global"
-
- if config_level == "system":
- return "/etc/gitconfig"
- elif config_level == "global":
- return os.path.normpath(os.path.expanduser("~/.gitconfig"))
- elif config_level == "repository":
- return join(self.git_dir, "config")
-
- raise ValueError( "Invalid configuration level: %r" % config_level )
-
- def config_reader(self, config_level=None):
- """
- :return:
- GitConfigParser allowing to read the full git configuration, but not to write it
-
- The configuration will include values from the system, user and repository
- configuration files.
-
- :param config_level:
- For possible values, see config_writer method
- If None, all applicable levels will be used. Specify a level in case
- you know which exact file you whish to read to prevent reading multiple files for
- instance
- :note: On windows, system configuration cannot currently be read as the path is
- unknown, instead the global path will be used."""
- files = None
- if config_level is None:
- files = [ self._get_config_path(f) for f in self.config_level ]
- else:
- files = [ self._get_config_path(config_level) ]
- return GitConfigParser(files, read_only=True)
-
- def config_writer(self, config_level="repository"):
- """
- :return:
- GitConfigParser allowing to write values of the specified configuration file level.
- Config writers should be retrieved, used to change the configuration ,and written
- right away as they will lock the configuration file in question and prevent other's
- to write it.
-
- :param config_level:
- One of the following values
- system = sytem wide configuration file
- global = user level configuration file
- repository = configuration file for this repostory only"""
- return GitConfigParser(self._get_config_path(config_level), read_only = False)
-
- def commit(self, rev=None):
- """The Commit object for the specified revision
- :param rev: revision specifier, see git-rev-parse for viable options.
- :return: ``git.Commit``"""
- if rev is None:
- return self.head.commit
- else:
- return self.rev_parse(str(rev)+"^0")
-
- def iter_trees(self, *args, **kwargs):
- """:return: Iterator yielding Tree objects
- :note: Takes all arguments known to iter_commits method"""
- return ( c.tree for c in self.iter_commits(*args, **kwargs) )
-
- def tree(self, rev=None):
- """The Tree object for the given treeish revision
- Examples::
-
- repo.tree(repo.heads[0])
-
- :param rev: is a revision pointing to a Treeish ( being a commit or tree )
- :return: ``git.Tree``
-
- :note:
- If you need a non-root level tree, find it by iterating the root tree. Otherwise
- it cannot know about its path relative to the repository root and subsequent
- operations might have unexpected results."""
- if rev is None:
- return self.head.commit.tree
- else:
- return self.rev_parse(str(rev)+"^{tree}")
-
- def iter_commits(self, rev=None, paths='', **kwargs):
- """A list of Commit objects representing the history of a given ref/commit
-
- :parm rev:
- revision specifier, see git-rev-parse for viable options.
- If None, the active branch will be used.
-
- :parm paths:
- is an optional path or a list of paths to limit the returned commits to
- Commits that do not contain that path or the paths will not be returned.
-
- :parm kwargs:
- Arguments to be passed to git-rev-list - common ones are
- max_count and skip
-
- :note: to receive only commits between two named revisions, use the
- "revA..revB" revision specifier
-
- :return ``git.Commit[]``"""
- if rev is None:
- rev = self.head.commit
-
- return Commit.iter_items(self, rev, paths, **kwargs)
-
- def _get_daemon_export(self):
- filename = join(self.git_dir, self.DAEMON_EXPORT_FILE)
- return os.path.exists(filename)
-
- def _set_daemon_export(self, value):
- filename = join(self.git_dir, self.DAEMON_EXPORT_FILE)
- fileexists = os.path.exists(filename)
- if value and not fileexists:
- touch(filename)
- elif not value and fileexists:
- os.unlink(filename)
-
- daemon_export = property(_get_daemon_export, _set_daemon_export,
- doc="If True, git-daemon may export this repository")
- del _get_daemon_export
- del _set_daemon_export
-
- def _get_alternates(self):
- """The list of alternates for this repo from which objects can be retrieved
-
- :return: list of strings being pathnames of alternates"""
- alternates_path = join(self.git_dir, 'objects', 'info', 'alternates')
-
- if os.path.exists(alternates_path):
- try:
- f = open(alternates_path)
- alts = f.read()
- finally:
- f.close()
- return alts.strip().splitlines()
- else:
- return list()
-
- def _set_alternates(self, alts):
- """Sets the alternates
-
- :parm alts:
- is the array of string paths representing the alternates at which
- git should look for objects, i.e. /home/user/repo/.git/objects
-
- :raise NoSuchPathError:
- :note:
- The method does not check for the existance of the paths in alts
- as the caller is responsible."""
- alternates_path = join(self.git_dir, 'objects', 'info', 'alternates')
- if not alts:
- if isfile(alternates_path):
- os.remove(alternates_path)
- else:
- try:
- f = open(alternates_path, 'w')
- f.write("\n".join(alts))
- finally:
- f.close()
- # END file handling
- # END alts handling
-
- alternates = property(_get_alternates, _set_alternates, doc="Retrieve a list of alternates paths or set a list paths to be used as alternates")
-
- def is_dirty(self, index=True, working_tree=True, untracked_files=False):
- """
- :return:
- ``True``, the repository is considered dirty. By default it will react
- like a git-status without untracked files, hence it is dirty if the
- index or the working copy have changes."""
- if self._bare:
- # Bare repositories with no associated working directory are
- # always consired to be clean.
- return False
-
- # start from the one which is fastest to evaluate
- default_args = ('--abbrev=40', '--full-index', '--raw')
- if index:
- # diff index against HEAD
- if isfile(self.index.path) and self.head.is_valid() and \
- len(self.git.diff('HEAD', '--cached', *default_args)):
- return True
- # END index handling
- if working_tree:
- # diff index against working tree
- if len(self.git.diff(*default_args)):
- return True
- # END working tree handling
- if untracked_files:
- if len(self.untracked_files):
- return True
- # END untracked files
- return False
-
- @property
- def untracked_files(self):
- """
- :return:
- list(str,...)
-
- Files currently untracked as they have not been staged yet. Paths
- are relative to the current working directory of the git command.
-
- :note:
- ignored files will not appear here, i.e. files mentioned in .gitignore"""
- # make sure we get all files, no only untracked directores
- proc = self.git.status(untracked_files=True, as_process=True)
- stream = iter(proc.stdout)
- untracked_files = list()
- for line in stream:
- if not line.startswith("# Untracked files:"):
- continue
- # skip two lines
- stream.next()
- stream.next()
-
- for untracked_info in stream:
- if not untracked_info.startswith("#\t"):
- break
- untracked_files.append(untracked_info.replace("#\t", "").rstrip())
- # END for each utracked info line
- # END for each line
- return untracked_files
-
- @property
- def active_branch(self):
- """The name of the currently active branch.
-
- :return: Head to the active branch"""
- return self.head.reference
-
- def blame(self, rev, file):
- """The blame information for the given file at the given revision.
-
- :parm rev: revision specifier, see git-rev-parse for viable options.
- :return:
- list: [git.Commit, list: []]
- A list of tuples associating a Commit object with a list of lines that
- changed within the given commit. The Commit objects will be given in order
- of appearance."""
- data = self.git.blame(rev, '--', file, p=True)
- commits = dict()
- blames = list()
- info = None
-
- for line in data.splitlines(False):
- parts = self.re_whitespace.split(line, 1)
- firstpart = parts[0]
- if self.re_hexsha_only.search(firstpart):
- # handles
- # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start
- # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2
- digits = parts[-1].split(" ")
- if len(digits) == 3:
- info = {'id': firstpart}
- blames.append([None, []])
- # END blame data initialization
- else:
- m = self.re_author_committer_start.search(firstpart)
- if m:
- # handles:
- # author Tom Preston-Werner
- # author-mail
- # author-time 1192271832
- # author-tz -0700
- # committer Tom Preston-Werner
- # committer-mail
- # committer-time 1192271832
- # committer-tz -0700 - IGNORED BY US
- role = m.group(0)
- if firstpart.endswith('-mail'):
- info["%s_email" % role] = parts[-1]
- elif firstpart.endswith('-time'):
- info["%s_date" % role] = int(parts[-1])
- elif role == firstpart:
- info[role] = parts[-1]
- # END distinguish mail,time,name
- else:
- # handle
- # filename lib/grit.rb
- # summary add Blob
- #
- if firstpart.startswith('filename'):
- info['filename'] = parts[-1]
- elif firstpart.startswith('summary'):
- info['summary'] = parts[-1]
- elif firstpart == '':
- if info:
- sha = info['id']
- c = commits.get(sha)
- if c is None:
- c = Commit( self, hex_to_bin(sha),
- author=Actor._from_string(info['author'] + ' ' + info['author_email']),
- authored_date=info['author_date'],
- committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']),
- committed_date=info['committer_date'],
- message=info['summary'])
- commits[sha] = c
- # END if commit objects needs initial creation
- m = self.re_tab_full_line.search(line)
- text, = m.groups()
- blames[-1][0] = c
- blames[-1][1].append( text )
- info = None
- # END if we collected commit info
- # END distinguish filename,summary,rest
- # END distinguish author|committer vs filename,summary,rest
- # END distinguish hexsha vs other information
- return blames
-
- @classmethod
- def init(cls, path=None, mkdir=True, **kwargs):
- """Initialize a git repository at the given path if specified
-
- :param path:
- is the full path to the repo (traditionally ends with /.git)
- or None in which case the repository will be created in the current
- working directory
-
- :parm mkdir:
- if specified will create the repository directory if it doesn't
- already exists. Creates the directory with a mode=0755.
- Only effective if a path is explicitly given
-
- :parm kwargs:
- keyword arguments serving as additional options to the git-init command
-
- :return: ``git.Repo`` (the newly created repo)"""
-
- if mkdir and path and not os.path.exists(path):
- os.makedirs(path, 0755)
-
- # git command automatically chdir into the directory
- git = Git(path)
- output = git.init(**kwargs)
- return Repo(path)
-
- @classmethod
- def _clone(cls, git, url, path, odb_default_type, **kwargs):
- # special handling for windows for path at which the clone should be
- # created.
- # tilde '~' will be expanded to the HOME no matter where the ~ occours. Hence
- # we at least give a proper error instead of letting git fail
- prev_cwd = None
- prev_path = None
- odbt = kwargs.pop('odbt', odb_default_type)
- if os.name == 'nt':
- if '~' in path:
- raise OSError("Git cannot handle the ~ character in path %r correctly" % path)
-
- # on windows, git will think paths like c: are relative and prepend the
- # current working dir ( before it fails ). We temporarily adjust the working
- # dir to make this actually work
- match = re.match("(\w:[/\\\])(.*)", path)
- if match:
- prev_cwd = os.getcwd()
- prev_path = path
- drive, rest_of_path = match.groups()
- os.chdir(drive)
- path = rest_of_path
- kwargs['with_keep_cwd'] = True
- # END cwd preparation
- # END windows handling
-
- try:
- git.clone(url, path, **kwargs)
- finally:
- if prev_cwd is not None:
- os.chdir(prev_cwd)
- path = prev_path
- # END reset previous working dir
- # END bad windows handling
-
- # our git command could have a different working dir than our actual
- # environment, hence we prepend its working dir if required
- if not os.path.isabs(path) and git.working_dir:
- path = join(git._working_dir, path)
-
- # adjust remotes - there may be operating systems which use backslashes,
- # These might be given as initial paths, but when handling the config file
- # that contains the remote from which we were clones, git stops liking it
- # as it will escape the backslashes. Hence we undo the escaping just to be
- # sure
- repo = cls(os.path.abspath(path), odbt = odbt)
- if repo.remotes:
- repo.remotes[0].config_writer.set_value('url', repo.remotes[0].url.replace("\\\\", "\\").replace("\\", "/"))
- # END handle remote repo
- return repo
-
- def clone(self, path, **kwargs):
- """Create a clone from this repository.
- :param path:
- is the full path of the new repo (traditionally ends with ./.git).
-
- :param kwargs:
- odbt = ObjectDatabase Type, allowing to determine the object database
- implementation used by the returned Repo instance
-
- All remaining keyword arguments are given to the git-clone command
-
- :return: ``git.Repo`` (the newly cloned repo)"""
- return self._clone(self.git, self.git_dir, path, type(self.odb), **kwargs)
-
- @classmethod
- def clone_from(cls, url, to_path, **kwargs):
- """Create a clone from the given URL
- :param url: valid git url, see http://www.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS
- :param to_path: Path to which the repository should be cloned to
- :param kwargs: see the ``clone`` method
- :return: Repo instance pointing to the cloned directory"""
- return cls._clone(Git(os.getcwd()), url, to_path, GitCmdObjectDB, **kwargs)
-
- def archive(self, ostream, treeish=None, prefix=None, **kwargs):
- """Archive the tree at the given revision.
- :parm ostream: file compatible stream object to which the archive will be written
- :parm treeish: is the treeish name/id, defaults to active branch
- :parm prefix: is the optional prefix to prepend to each filename in the archive
- :parm kwargs:
- Additional arguments passed to git-archive
- NOTE: Use the 'format' argument to define the kind of format. Use
- specialized ostreams to write any format supported by python
-
- :raise GitCommandError: in case something went wrong
- :return: self"""
- if treeish is None:
- treeish = self.head.commit
- if prefix and 'prefix' not in kwargs:
- kwargs['prefix'] = prefix
- kwargs['output_stream'] = ostream
-
- self.git.archive(treeish, **kwargs)
- return self
-
- rev_parse = rev_parse
-
- def __repr__(self):
- return '' % self.git_dir
diff --git a/git/stream.py b/git/stream.py
new file mode 100644
index 000000000..8010a0551
--- /dev/null
+++ b/git/stream.py
@@ -0,0 +1,694 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+
+from cStringIO import StringIO
+import errno
+import mmap
+import os
+
+from fun import (
+ msb_size,
+ stream_copy,
+ apply_delta_data,
+ connect_deltas,
+ DeltaChunkList,
+ delta_types
+ )
+
+from util import (
+ allocate_memory,
+ LazyMixin,
+ make_sha,
+ write,
+ close,
+ zlib
+ )
+
+has_perf_mod = False
+try:
+ from _perf import apply_delta as c_apply_delta
+ has_perf_mod = True
+except ImportError:
+ pass
+
+__all__ = ( 'DecompressMemMapReader', 'FDCompressedSha1Writer', 'DeltaApplyReader',
+ 'Sha1Writer', 'FlexibleSha1Writer', 'ZippedStoreShaWriter', 'FDCompressedSha1Writer',
+ 'FDStream', 'NullStream')
+
+
+#{ RO Streams
+
+class DecompressMemMapReader(LazyMixin):
+ """Reads data in chunks from a memory map and decompresses it. The client sees
+ only the uncompressed data, respective file-like read calls are handling on-demand
+ buffered decompression accordingly
+
+ A constraint on the total size of bytes is activated, simulating
+ a logical file within a possibly larger physical memory area
+
+ To read efficiently, you clearly don't want to read individual bytes, instead,
+ read a few kilobytes at least.
+
+ :note: The chunk-size should be carefully selected as it will involve quite a bit
+ of string copying due to the way the zlib is implemented. Its very wasteful,
+ hence we try to find a good tradeoff between allocation time and number of
+ times we actually allocate. An own zlib implementation would be good here
+ to better support streamed reading - it would only need to keep the mmap
+ and decompress it into chunks, thats all ... """
+ __slots__ = ('_m', '_zip', '_buf', '_buflen', '_br', '_cws', '_cwe', '_s', '_close',
+ '_cbr', '_phi')
+
+ max_read_size = 512*1024 # currently unused
+
+ def __init__(self, m, close_on_deletion, size=None):
+ """Initialize with mmap for stream reading
+ :param m: must be content data - use new if you have object data and no size"""
+ self._m = m
+ self._zip = zlib.decompressobj()
+ self._buf = None # buffer of decompressed bytes
+ self._buflen = 0 # length of bytes in buffer
+ if size is not None:
+ self._s = size # size of uncompressed data to read in total
+ self._br = 0 # num uncompressed bytes read
+ self._cws = 0 # start byte of compression window
+ self._cwe = 0 # end byte of compression window
+ self._cbr = 0 # number of compressed bytes read
+ self._phi = False # is True if we parsed the header info
+ self._close = close_on_deletion # close the memmap on deletion ?
+
+ def _set_cache_(self, attr):
+ assert attr == '_s'
+ # only happens for size, which is a marker to indicate we still
+ # have to parse the header from the stream
+ self._parse_header_info()
+
+ def __del__(self):
+ if self._close:
+ self._m.close()
+ # END handle resource freeing
+
+ def _parse_header_info(self):
+ """If this stream contains object data, parse the header info and skip the
+ stream to a point where each read will yield object content
+
+ :return: parsed type_string, size"""
+ # read header
+ maxb = 512 # should really be enough, cgit uses 8192 I believe
+ self._s = maxb
+ hdr = self.read(maxb)
+ hdrend = hdr.find("\0")
+ type, size = hdr[:hdrend].split(" ")
+ size = int(size)
+ self._s = size
+
+ # adjust internal state to match actual header length that we ignore
+ # The buffer will be depleted first on future reads
+ self._br = 0
+ hdrend += 1 # count terminating \0
+ self._buf = StringIO(hdr[hdrend:])
+ self._buflen = len(hdr) - hdrend
+
+ self._phi = True
+
+ return type, size
+
+ #{ Interface
+
+ @classmethod
+ def new(self, m, close_on_deletion=False):
+ """Create a new DecompressMemMapReader instance for acting as a read-only stream
+ This method parses the object header from m and returns the parsed
+ type and size, as well as the created stream instance.
+
+ :param m: memory map on which to oparate. It must be object data ( header + contents )
+ :param close_on_deletion: if True, the memory map will be closed once we are
+ being deleted"""
+ inst = DecompressMemMapReader(m, close_on_deletion, 0)
+ type, size = inst._parse_header_info()
+ return type, size, inst
+
+ def data(self):
+ """:return: random access compatible data we are working on"""
+ return self._m
+
+ def compressed_bytes_read(self):
+ """
+ :return: number of compressed bytes read. This includes the bytes it
+ took to decompress the header ( if there was one )"""
+ # ABSTRACT: When decompressing a byte stream, it can be that the first
+ # x bytes which were requested match the first x bytes in the loosely
+ # compressed datastream. This is the worst-case assumption that the reader
+ # does, it assumes that it will get at least X bytes from X compressed bytes
+ # in call cases.
+ # The caveat is that the object, according to our known uncompressed size,
+ # is already complete, but there are still some bytes left in the compressed
+ # stream that contribute to the amount of compressed bytes.
+ # How can we know that we are truly done, and have read all bytes we need
+ # to read ?
+ # Without help, we cannot know, as we need to obtain the status of the
+ # decompression. If it is not finished, we need to decompress more data
+ # until it is finished, to yield the actual number of compressed bytes
+ # belonging to the decompressed object
+ # We are using a custom zlib module for this, if its not present,
+ # we try to put in additional bytes up for decompression if feasible
+ # and check for the unused_data.
+
+ # Only scrub the stream forward if we are officially done with the
+ # bytes we were to have.
+ if self._br == self._s and not self._zip.unused_data:
+ # manipulate the bytes-read to allow our own read method to coninute
+ # but keep the window at its current position
+ self._br = 0
+ if hasattr(self._zip, 'status'):
+ while self._zip.status == zlib.Z_OK:
+ self.read(mmap.PAGESIZE)
+ # END scrub-loop custom zlib
+ else:
+ # pass in additional pages, until we have unused data
+ while not self._zip.unused_data and self._cbr != len(self._m):
+ self.read(mmap.PAGESIZE)
+ # END scrub-loop default zlib
+ # END handle stream scrubbing
+
+ # reset bytes read, just to be sure
+ self._br = self._s
+ # END handle stream scrubbing
+
+ # unused data ends up in the unconsumed tail, which was removed
+ # from the count already
+ return self._cbr
+
+ #} END interface
+
+ def seek(self, offset, whence=getattr(os, 'SEEK_SET', 0)):
+ """Allows to reset the stream to restart reading
+ :raise ValueError: If offset and whence are not 0"""
+ if offset != 0 or whence != getattr(os, 'SEEK_SET', 0):
+ raise ValueError("Can only seek to position 0")
+ # END handle offset
+
+ self._zip = zlib.decompressobj()
+ self._br = self._cws = self._cwe = self._cbr = 0
+ if self._phi:
+ self._phi = False
+ del(self._s) # trigger header parsing on first access
+ # END skip header
+
+ def read(self, size=-1):
+ if size < 1:
+ size = self._s - self._br
+ else:
+ size = min(size, self._s - self._br)
+ # END clamp size
+
+ if size == 0:
+ return str()
+ # END handle depletion
+
+
+ # deplete the buffer, then just continue using the decompress object
+ # which has an own buffer. We just need this to transparently parse the
+ # header from the zlib stream
+ dat = str()
+ if self._buf:
+ if self._buflen >= size:
+ # have enough data
+ dat = self._buf.read(size)
+ self._buflen -= size
+ self._br += size
+ return dat
+ else:
+ dat = self._buf.read() # ouch, duplicates data
+ size -= self._buflen
+ self._br += self._buflen
+
+ self._buflen = 0
+ self._buf = None
+ # END handle buffer len
+ # END handle buffer
+
+ # decompress some data
+ # Abstract: zlib needs to operate on chunks of our memory map ( which may
+ # be large ), as it will otherwise and always fill in the 'unconsumed_tail'
+ # attribute which possible reads our whole map to the end, forcing
+ # everything to be read from disk even though just a portion was requested.
+ # As this would be a nogo, we workaround it by passing only chunks of data,
+ # moving the window into the memory map along as we decompress, which keeps
+ # the tail smaller than our chunk-size. This causes 'only' the chunk to be
+ # copied once, and another copy of a part of it when it creates the unconsumed
+ # tail. We have to use it to hand in the appropriate amount of bytes durin g
+ # the next read.
+ tail = self._zip.unconsumed_tail
+ if tail:
+ # move the window, make it as large as size demands. For code-clarity,
+ # we just take the chunk from our map again instead of reusing the unconsumed
+ # tail. The latter one would safe some memory copying, but we could end up
+ # with not getting enough data uncompressed, so we had to sort that out as well.
+ # Now we just assume the worst case, hence the data is uncompressed and the window
+ # needs to be as large as the uncompressed bytes we want to read.
+ self._cws = self._cwe - len(tail)
+ self._cwe = self._cws + size
+ else:
+ cws = self._cws
+ self._cws = self._cwe
+ self._cwe = cws + size
+ # END handle tail
+
+
+ # if window is too small, make it larger so zip can decompress something
+ if self._cwe - self._cws < 8:
+ self._cwe = self._cws + 8
+ # END adjust winsize
+
+ # takes a slice, but doesn't copy the data, it says ...
+ indata = buffer(self._m, self._cws, self._cwe - self._cws)
+
+ # get the actual window end to be sure we don't use it for computations
+ self._cwe = self._cws + len(indata)
+ dcompdat = self._zip.decompress(indata, size)
+ # update the amount of compressed bytes read
+ # We feed possibly overlapping chunks, which is why the unconsumed tail
+ # has to be taken into consideration, as well as the unused data
+ # if we hit the end of the stream
+ self._cbr += len(indata) - len(self._zip.unconsumed_tail)
+ self._br += len(dcompdat)
+
+ if dat:
+ dcompdat = dat + dcompdat
+ # END prepend our cached data
+
+ # it can happen, depending on the compression, that we get less bytes
+ # than ordered as it needs the final portion of the data as well.
+ # Recursively resolve that.
+ # Note: dcompdat can be empty even though we still appear to have bytes
+ # to read, if we are called by compressed_bytes_read - it manipulates
+ # us to empty the stream
+ if dcompdat and (len(dcompdat) - len(dat)) < size and self._br < self._s:
+ dcompdat += self.read(size-len(dcompdat))
+ # END handle special case
+ return dcompdat
+
+
+class DeltaApplyReader(LazyMixin):
+ """A reader which dynamically applies pack deltas to a base object, keeping the
+ memory demands to a minimum.
+
+ The size of the final object is only obtainable once all deltas have been
+ applied, unless it is retrieved from a pack index.
+
+ The uncompressed Delta has the following layout (MSB being a most significant
+ bit encoded dynamic size):
+
+ * MSB Source Size - the size of the base against which the delta was created
+ * MSB Target Size - the size of the resulting data after the delta was applied
+ * A list of one byte commands (cmd) which are followed by a specific protocol:
+
+ * cmd & 0x80 - copy delta_data[offset:offset+size]
+
+ * Followed by an encoded offset into the delta data
+ * Followed by an encoded size of the chunk to copy
+
+ * cmd & 0x7f - insert
+
+ * insert cmd bytes from the delta buffer into the output stream
+
+ * cmd == 0 - invalid operation ( or error in delta stream )
+ """
+ __slots__ = (
+ "_bstream", # base stream to which to apply the deltas
+ "_dstreams", # tuple of delta stream readers
+ "_mm_target", # memory map of the delta-applied data
+ "_size", # actual number of bytes in _mm_target
+ "_br" # number of bytes read
+ )
+
+ #{ Configuration
+ k_max_memory_move = 250*1000*1000
+ #} END configuration
+
+ def __init__(self, stream_list):
+ """Initialize this instance with a list of streams, the first stream being
+ the delta to apply on top of all following deltas, the last stream being the
+ base object onto which to apply the deltas"""
+ assert len(stream_list) > 1, "Need at least one delta and one base stream"
+
+ self._bstream = stream_list[-1]
+ self._dstreams = tuple(stream_list[:-1])
+ self._br = 0
+
+ def _set_cache_too_slow_without_c(self, attr):
+ # the direct algorithm is fastest and most direct if there is only one
+ # delta. Also, the extra overhead might not be worth it for items smaller
+ # than X - definitely the case in python, every function call costs
+ # huge amounts of time
+ # if len(self._dstreams) * self._bstream.size < self.k_max_memory_move:
+ if len(self._dstreams) == 1:
+ return self._set_cache_brute_(attr)
+
+ # Aggregate all deltas into one delta in reverse order. Hence we take
+ # the last delta, and reverse-merge its ancestor delta, until we receive
+ # the final delta data stream.
+ # print "Handling %i delta streams, sizes: %s" % (len(self._dstreams), [ds.size for ds in self._dstreams])
+ dcl = connect_deltas(self._dstreams)
+
+ # call len directly, as the (optional) c version doesn't implement the sequence
+ # protocol
+ if dcl.rbound() == 0:
+ self._size = 0
+ self._mm_target = allocate_memory(0)
+ return
+ # END handle empty list
+
+ self._size = dcl.rbound()
+ self._mm_target = allocate_memory(self._size)
+
+ bbuf = allocate_memory(self._bstream.size)
+ stream_copy(self._bstream.read, bbuf.write, self._bstream.size, 256 * mmap.PAGESIZE)
+
+ # APPLY CHUNKS
+ write = self._mm_target.write
+ dcl.apply(bbuf, write)
+
+ self._mm_target.seek(0)
+
+ def _set_cache_brute_(self, attr):
+ """If we are here, we apply the actual deltas"""
+
+ # TODO: There should be a special case if there is only one stream
+ # Then the default-git algorithm should perform a tad faster, as the
+ # delta is not peaked into, causing less overhead.
+ buffer_info_list = list()
+ max_target_size = 0
+ for dstream in self._dstreams:
+ buf = dstream.read(512) # read the header information + X
+ offset, src_size = msb_size(buf)
+ offset, target_size = msb_size(buf, offset)
+ buffer_info_list.append((buffer(buf, offset), offset, src_size, target_size))
+ max_target_size = max(max_target_size, target_size)
+ # END for each delta stream
+
+ # sanity check - the first delta to apply should have the same source
+ # size as our actual base stream
+ base_size = self._bstream.size
+ target_size = max_target_size
+
+ # if we have more than 1 delta to apply, we will swap buffers, hence we must
+ # assure that all buffers we use are large enough to hold all the results
+ if len(self._dstreams) > 1:
+ base_size = target_size = max(base_size, max_target_size)
+ # END adjust buffer sizes
+
+
+ # Allocate private memory map big enough to hold the first base buffer
+ # We need random access to it
+ bbuf = allocate_memory(base_size)
+ stream_copy(self._bstream.read, bbuf.write, base_size, 256 * mmap.PAGESIZE)
+
+ # allocate memory map large enough for the largest (intermediate) target
+ # We will use it as scratch space for all delta ops. If the final
+ # target buffer is smaller than our allocated space, we just use parts
+ # of it upon return.
+ tbuf = allocate_memory(target_size)
+
+ # for each delta to apply, memory map the decompressed delta and
+ # work on the op-codes to reconstruct everything.
+ # For the actual copying, we use a seek and write pattern of buffer
+ # slices.
+ final_target_size = None
+ for (dbuf, offset, src_size, target_size), dstream in reversed(zip(buffer_info_list, self._dstreams)):
+ # allocate a buffer to hold all delta data - fill in the data for
+ # fast access. We do this as we know that reading individual bytes
+ # from our stream would be slower than necessary ( although possible )
+ # The dbuf buffer contains commands after the first two MSB sizes, the
+ # offset specifies the amount of bytes read to get the sizes.
+ ddata = allocate_memory(dstream.size - offset)
+ ddata.write(dbuf)
+ # read the rest from the stream. The size we give is larger than necessary
+ stream_copy(dstream.read, ddata.write, dstream.size, 256*mmap.PAGESIZE)
+
+ #######################################################################
+ if 'c_apply_delta' in globals():
+ c_apply_delta(bbuf, ddata, tbuf);
+ else:
+ apply_delta_data(bbuf, src_size, ddata, len(ddata), tbuf.write)
+ #######################################################################
+
+ # finally, swap out source and target buffers. The target is now the
+ # base for the next delta to apply
+ bbuf, tbuf = tbuf, bbuf
+ bbuf.seek(0)
+ tbuf.seek(0)
+ final_target_size = target_size
+ # END for each delta to apply
+
+ # its already seeked to 0, constrain it to the actual size
+ # NOTE: in the end of the loop, it swaps buffers, hence our target buffer
+ # is not tbuf, but bbuf !
+ self._mm_target = bbuf
+ self._size = final_target_size
+
+
+ #{ Configuration
+ if not has_perf_mod:
+ _set_cache_ = _set_cache_brute_
+ else:
+ _set_cache_ = _set_cache_too_slow_without_c
+
+ #} END configuration
+
+ def read(self, count=0):
+ bl = self._size - self._br # bytes left
+ if count < 1 or count > bl:
+ count = bl
+ # NOTE: we could check for certain size limits, and possibly
+ # return buffers instead of strings to prevent byte copying
+ data = self._mm_target.read(count)
+ self._br += len(data)
+ return data
+
+ def seek(self, offset, whence=getattr(os, 'SEEK_SET', 0)):
+ """Allows to reset the stream to restart reading
+
+ :raise ValueError: If offset and whence are not 0"""
+ if offset != 0 or whence != getattr(os, 'SEEK_SET', 0):
+ raise ValueError("Can only seek to position 0")
+ # END handle offset
+ self._br = 0
+ self._mm_target.seek(0)
+
+ #{ Interface
+
+ @classmethod
+ def new(cls, stream_list):
+ """
+ Convert the given list of streams into a stream which resolves deltas
+ when reading from it.
+
+ :param stream_list: two or more stream objects, first stream is a Delta
+ to the object that you want to resolve, followed by N additional delta
+ streams. The list's last stream must be a non-delta stream.
+
+ :return: Non-Delta OPackStream object whose stream can be used to obtain
+ the decompressed resolved data
+ :raise ValueError: if the stream list cannot be handled"""
+ if len(stream_list) < 2:
+ raise ValueError("Need at least two streams")
+ # END single object special handling
+
+ if stream_list[-1].type_id in delta_types:
+ raise ValueError("Cannot resolve deltas if there is no base object stream, last one was type: %s" % stream_list[-1].type)
+ # END check stream
+
+ return cls(stream_list)
+
+ #} END interface
+
+
+ #{ OInfo like Interface
+
+ @property
+ def type(self):
+ return self._bstream.type
+
+ @property
+ def type_id(self):
+ return self._bstream.type_id
+
+ @property
+ def size(self):
+ """:return: number of uncompressed bytes in the stream"""
+ return self._size
+
+ #} END oinfo like interface
+
+
+#} END RO streams
+
+
+#{ W Streams
+
+class Sha1Writer(object):
+ """Simple stream writer which produces a sha whenever you like as it degests
+ everything it is supposed to write"""
+ __slots__ = "sha1"
+
+ def __init__(self):
+ self.sha1 = make_sha()
+
+ #{ Stream Interface
+
+ def write(self, data):
+ """:raise IOError: If not all bytes could be written
+ :return: lenght of incoming data"""
+ self.sha1.update(data)
+ return len(data)
+
+ # END stream interface
+
+ #{ Interface
+
+ def sha(self, as_hex = False):
+ """:return: sha so far
+ :param as_hex: if True, sha will be hex-encoded, binary otherwise"""
+ if as_hex:
+ return self.sha1.hexdigest()
+ return self.sha1.digest()
+
+ #} END interface
+
+
+class FlexibleSha1Writer(Sha1Writer):
+ """Writer producing a sha1 while passing on the written bytes to the given
+ write function"""
+ __slots__ = 'writer'
+
+ def __init__(self, writer):
+ Sha1Writer.__init__(self)
+ self.writer = writer
+
+ def write(self, data):
+ Sha1Writer.write(self, data)
+ self.writer(data)
+
+
+class ZippedStoreShaWriter(Sha1Writer):
+ """Remembers everything someone writes to it and generates a sha"""
+ __slots__ = ('buf', 'zip')
+ def __init__(self):
+ Sha1Writer.__init__(self)
+ self.buf = StringIO()
+ self.zip = zlib.compressobj(zlib.Z_BEST_SPEED)
+
+ def __getattr__(self, attr):
+ return getattr(self.buf, attr)
+
+ def write(self, data):
+ alen = Sha1Writer.write(self, data)
+ self.buf.write(self.zip.compress(data))
+ return alen
+
+ def close(self):
+ self.buf.write(self.zip.flush())
+
+ def seek(self, offset, whence=getattr(os, 'SEEK_SET', 0)):
+ """Seeking currently only supports to rewind written data
+ Multiple writes are not supported"""
+ if offset != 0 or whence != getattr(os, 'SEEK_SET', 0):
+ raise ValueError("Can only seek to position 0")
+ # END handle offset
+ self.buf.seek(0)
+
+ def getvalue(self):
+ """:return: string value from the current stream position to the end"""
+ return self.buf.getvalue()
+
+
+class FDCompressedSha1Writer(Sha1Writer):
+ """Digests data written to it, making the sha available, then compress the
+ data and write it to the file descriptor
+
+ :note: operates on raw file descriptors
+ :note: for this to work, you have to use the close-method of this instance"""
+ __slots__ = ("fd", "sha1", "zip")
+
+ # default exception
+ exc = IOError("Failed to write all bytes to filedescriptor")
+
+ def __init__(self, fd):
+ super(FDCompressedSha1Writer, self).__init__()
+ self.fd = fd
+ self.zip = zlib.compressobj(zlib.Z_BEST_SPEED)
+
+ #{ Stream Interface
+
+ def write(self, data):
+ """:raise IOError: If not all bytes could be written
+ :return: lenght of incoming data"""
+ self.sha1.update(data)
+ cdata = self.zip.compress(data)
+ bytes_written = write(self.fd, cdata)
+ if bytes_written != len(cdata):
+ raise self.exc
+ return len(data)
+
+ def close(self):
+ remainder = self.zip.flush()
+ if write(self.fd, remainder) != len(remainder):
+ raise self.exc
+ return close(self.fd)
+
+ #} END stream interface
+
+
+class FDStream(object):
+ """A simple wrapper providing the most basic functions on a file descriptor
+ with the fileobject interface. Cannot use os.fdopen as the resulting stream
+ takes ownership"""
+ __slots__ = ("_fd", '_pos')
+ def __init__(self, fd):
+ self._fd = fd
+ self._pos = 0
+
+ def write(self, data):
+ self._pos += len(data)
+ os.write(self._fd, data)
+
+ def read(self, count=0):
+ if count == 0:
+ count = os.path.getsize(self._filepath)
+ # END handle read everything
+
+ bytes = os.read(self._fd, count)
+ self._pos += len(bytes)
+ return bytes
+
+ def fileno(self):
+ return self._fd
+
+ def tell(self):
+ return self._pos
+
+ def close(self):
+ close(self._fd)
+
+
+class NullStream(object):
+ """A stream that does nothing but providing a stream interface.
+ Use it like /dev/null"""
+ __slots__ = tuple()
+
+ def read(self, size=0):
+ return ''
+
+ def close(self):
+ pass
+
+ def write(self, data):
+ return len(data)
+
+
+#} END W streams
+
+
diff --git a/git/test/__init__.py b/git/test/__init__.py
index 757cbad1f..a29d96a7b 100644
--- a/git/test/__init__.py
+++ b/git/test/__init__.py
@@ -3,3 +3,12 @@
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+
+import git.util
+
+def _init_pool():
+ """Assure the pool is actually threaded"""
+ size = 2
+ print "Setting ThreadPool to %i" % size
+ git.util.pool.set_size(size)
+
diff --git a/git/test/db/__init__.py b/git/test/db/__init__.py
new file mode 100644
index 000000000..8a681e428
--- /dev/null
+++ b/git/test/db/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
diff --git a/git/test/test_repo.py b/git/test/db/base.py
similarity index 80%
rename from git/test/test_repo.py
rename to git/test/db/base.py
index deadbe9a2..d4c4eaf96 100644
--- a/git/test/test_repo.py
+++ b/git/test/db/base.py
@@ -3,30 +3,41 @@
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+from lib import TestDBBase
from git.test.lib import *
-from git import *
+from git.cmd import Git
+from git.objects import *
+from git.exc import *
+from git.index import *
+from git.refs import *
from git.util import join_path_native
from git.exc import BadObject
-from gitdb.util import hex_to_bin, bin_to_hex
+from git.util import hex_to_bin, bin_to_hex
import os, sys
import tempfile
import shutil
from cStringIO import StringIO
+from git.db.compat import RepoCompatibilityInterface
+
+
+class RepoGlobalsItemDeletorMetaCls(GlobalsItemDeletorMetaCls):
+ ModuleToDelete = 'RepoBase'
+
+
+class RepoBase(TestDBBase):
+ """Basic test for everything a fully implemented repository should support"""
+ __metaclass__ = RepoGlobalsItemDeletorMetaCls
-class TestRepo(TestBase):
-
- @raises(InvalidGitRepositoryError)
def test_new_should_raise_on_invalid_repo_location(self):
- Repo(tempfile.gettempdir())
+ self.failUnlessRaises(InvalidGitRepositoryError, self.RepoCls, tempfile.gettempdir())
- @raises(NoSuchPathError)
def test_new_should_raise_on_non_existant_path(self):
- Repo("repos/foobar")
+ self.failUnlessRaises(NoSuchPathError, self.RepoCls, "repos/foobar")
def test_repo_creation_from_different_paths(self):
- r_from_gitdir = Repo(self.rorepo.git_dir)
+ r_from_gitdir = self.RepoCls(self.rorepo.git_dir)
assert r_from_gitdir.git_dir == self.rorepo.git_dir
assert r_from_gitdir.git_dir.endswith('.git')
assert not self.rorepo.git.working_dir.endswith('.git')
@@ -44,21 +55,21 @@ def test_heads_should_return_array_of_head_objects(self):
def test_heads_should_populate_head_data(self):
for head in self.rorepo.heads:
assert head.name
- assert isinstance(head.commit,Commit)
+ assert isinstance(head.commit, Commit)
# END for each head
-
+
assert isinstance(self.rorepo.heads.master, Head)
assert isinstance(self.rorepo.heads['master'], Head)
-
+
def test_tree_from_revision(self):
tree = self.rorepo.tree('0.1.6')
- assert len(tree.hexsha) == 40
+ assert len(tree.hexsha) == 40
assert tree.type == "tree"
assert self.rorepo.tree(tree) == tree
-
+
# try from invalid revision that does not exist
self.failUnlessRaises(BadObject, self.rorepo.tree, 'hello world')
-
+
def test_commit_from_revision(self):
commit = self.rorepo.commit('0.1.4')
assert commit.type == 'commit'
@@ -68,7 +79,7 @@ def test_commits(self):
mc = 10
commits = list(self.rorepo.iter_commits('0.1.6', max_count=mc))
assert len(commits) == mc
-
+
c = commits[0]
assert_equal('9a4b1d4d11eee3c5362a4152216376e634bd14cf', c.hexsha)
assert_equal(["c76852d0bff115720af3f27acdb084c59361e5f6"], [p.hexsha for p in c.parents])
@@ -76,11 +87,11 @@ def test_commits(self):
assert_equal("Michael Trier", c.author.name)
assert_equal("mtrier@gmail.com", c.author.email)
assert_equal(1232829715, c.authored_date)
- assert_equal(5*3600, c.author_tz_offset)
+ assert_equal(5 * 3600, c.author_tz_offset)
assert_equal("Michael Trier", c.committer.name)
assert_equal("mtrier@gmail.com", c.committer.email)
assert_equal(1232829715, c.committed_date)
- assert_equal(5*3600, c.committer_tz_offset)
+ assert_equal(5 * 3600, c.committer_tz_offset)
assert_equal("Bumped version 0.1.6\n", c.message)
c = commits[1]
@@ -99,30 +110,30 @@ def test_trees(self):
def _assert_empty_repo(self, repo):
# test all kinds of things with an empty, freshly initialized repo.
# It should throw good errors
-
+
# entries should be empty
assert len(repo.index.entries) == 0
-
+
# head is accessible
assert repo.head
assert repo.head.ref
assert not repo.head.is_valid()
-
+
# we can change the head to some other ref
head_ref = Head.from_path(repo, Head.to_full_path('some_head'))
assert not head_ref.is_valid()
repo.head.ref = head_ref
-
+
# is_dirty can handle all kwargs
for args in ((1, 0, 0), (0, 1, 0), (0, 0, 1)):
assert not repo.is_dirty(*args)
# END for each arg
-
+
# we can add a file to the index ( if we are not bare )
if not repo.bare:
pass
# END test repos with working tree
-
+
def test_init(self):
prev_cwd = os.getcwd()
@@ -133,19 +144,19 @@ def test_init(self):
try:
# with specific path
for path in (git_dir_rela, git_dir_abs):
- r = Repo.init(path=path, bare=True)
- assert isinstance(r, Repo)
+ r = self.RepoCls.init(path=path, bare=True)
+ assert isinstance(r, self.RepoCls)
assert r.bare == True
assert os.path.isdir(r.git_dir)
-
+
self._assert_empty_repo(r)
-
+
# test clone
clone_path = path + "_clone"
rc = r.clone(clone_path)
self._assert_empty_repo(rc)
-
-
+
+
try:
shutil.rmtree(clone_path)
except OSError:
@@ -153,11 +164,11 @@ def test_init(self):
# of the parent directory
pass
# END exception handling
-
+
# try again, this time with the absolute version
- rc = Repo.clone_from(r.git_dir, clone_path)
+ rc = self.RepoCls.clone_from(r.git_dir, clone_path)
self._assert_empty_repo(rc)
-
+
shutil.rmtree(git_dir_abs)
try:
shutil.rmtree(clone_path)
@@ -166,14 +177,14 @@ def test_init(self):
# of the parent directory
pass
# END exception handling
-
+
# END for each path
-
+
os.makedirs(git_dir_rela)
os.chdir(git_dir_rela)
- r = Repo.init(bare=False)
+ r = self.RepoCls.init(bare=False)
r.bare == False
-
+
self._assert_empty_repo(r)
finally:
try:
@@ -182,17 +193,20 @@ def test_init(self):
pass
os.chdir(prev_cwd)
# END restore previous state
-
+
def test_bare_property(self):
- self.rorepo.bare
+ if isinstance(self.rorepo, RepoCompatibilityInterface):
+ self.rorepo.bare
+ #END handle compatability
+ self.rorepo.is_bare
def test_daemon_export(self):
orig_val = self.rorepo.daemon_export
self.rorepo.daemon_export = not orig_val
- assert self.rorepo.daemon_export == ( not orig_val )
+ assert self.rorepo.daemon_export == (not orig_val)
self.rorepo.daemon_export = orig_val
assert self.rorepo.daemon_export == orig_val
-
+
def test_alternates(self):
cur_alternates = self.rorepo.alternates
# empty alternates
@@ -204,20 +218,19 @@ def test_alternates(self):
self.rorepo.alternates = cur_alternates
def test_repr(self):
- path = os.path.join(os.path.abspath(GIT_REPO), '.git')
- assert_equal('' % path, repr(self.rorepo))
+ assert_equal('' % rorepo_dir(), repr(self.rorepo))
def test_is_dirty_with_bare_repository(self):
orig_value = self.rorepo._bare
self.rorepo._bare = True
assert_false(self.rorepo.is_dirty())
- self.rorepo._bare = orig_value
+ self.rorepo._bare = orig_value
def test_is_dirty(self):
self.rorepo._bare = False
- for index in (0,1):
- for working_tree in (0,1):
- for untracked_files in (0,1):
+ for index in (0, 1):
+ for working_tree in (0, 1):
+ for untracked_files in (0, 1):
assert self.rorepo.is_dirty(index, working_tree, untracked_files) in (True, False)
# END untracked files
# END working tree
@@ -233,27 +246,28 @@ def test_head(self):
def test_index(self):
index = self.rorepo.index
assert isinstance(index, IndexFile)
-
+
def test_tag(self):
+ assert self.rorepo.tag('0.1.5').commit
assert self.rorepo.tag('refs/tags/0.1.5').commit
-
+
def test_archive(self):
tmpfile = os.tmpfile()
self.rorepo.archive(tmpfile, '0.1.5')
assert tmpfile.tell()
-
+
@patch_object(Git, '_call_process')
def test_should_display_blame_information(self, git):
git.return_value = fixture('blame')
- b = self.rorepo.blame( 'master', 'lib/git.py')
+ b = self.rorepo.blame('lib/git.py')
assert_equal(13, len(b))
- assert_equal( 2, len(b[0]) )
+ assert_equal(2, len(b[0]))
# assert_equal(25, reduce(lambda acc, x: acc + len(x[-1]), b))
assert_equal(hash(b[0][0]), hash(b[9][0]))
c = b[0][0]
assert_true(git.called)
- assert_equal(git.call_args, (('blame', 'master', '--', 'lib/git.py'), {'p': True}))
-
+ assert_equal(git.call_args, (('blame', 'lib/git.py'), {'p': True, 'w': True}))
+
assert_equal('634396b2f541a9f2d58b00be1a07f0c358b999b3', c.hexsha)
assert_equal('Tom Preston-Werner', c.author.name)
assert_equal('tom@mojombo.com', c.author.email)
@@ -262,26 +276,35 @@ def test_should_display_blame_information(self, git):
assert_equal('tom@mojombo.com', c.committer.email)
assert_equal(1191997100, c.committed_date)
assert_equal('initial grit setup', c.message)
-
+
# test the 'lines per commit' entries
tlist = b[0][1]
- assert_true( tlist )
- assert_true( isinstance( tlist[0], basestring ) )
- assert_true( len( tlist ) < sum( len(t) for t in tlist ) ) # test for single-char bug
-
+ assert_true(tlist)
+ assert_true(isinstance(tlist[0], basestring))
+ assert_true(len(tlist) < sum(len(t) for t in tlist)) # test for single-char bug
+
+ def test_blame_real(self):
+ c = 0
+ for item in self.rorepo.head.commit.tree.traverse(
+ predicate=lambda i, d: i.type == 'blob' and i.path.endswith('.py')):
+ c += 1
+ b = self.rorepo.blame(self.rorepo.head, item.path)
+ #END for each item to traverse
+ assert c
+
def test_untracked_files(self):
base = self.rorepo.working_tree_dir
- files = ( join_path_native(base, "__test_myfile"),
- join_path_native(base, "__test_other_file") )
+ files = (join_path_native(base, "__test_myfile"),
+ join_path_native(base, "__test_other_file"))
num_recently_untracked = 0
try:
for fpath in files:
- fd = open(fpath,"wb")
+ fd = open(fpath, "wb")
fd.close()
# END for each filename
untracked_files = self.rorepo.untracked_files
num_recently_untracked = len(untracked_files)
-
+
# assure we have all names - they are relative to the git-dir
num_test_untracked = 0
for utfile in untracked_files:
@@ -292,15 +315,15 @@ def test_untracked_files(self):
if os.path.isfile(fpath):
os.remove(fpath)
# END handle files
-
+
assert len(self.rorepo.untracked_files) == (num_recently_untracked - len(files))
-
+
def test_config_reader(self):
reader = self.rorepo.config_reader() # all config files
assert reader.read_only
reader = self.rorepo.config_reader("repository") # single config file
assert reader.read_only
-
+
def test_config_writer(self):
for config_level in self.rorepo.config_level:
try:
@@ -309,62 +332,62 @@ def test_config_writer(self):
except IOError:
# its okay not to get a writer for some configuration files if we
# have no permissions
- pass
+ pass
# END for each config level
-
+
def test_creation_deletion(self):
# just a very quick test to assure it generally works. There are
# specialized cases in the test_refs module
head = self.rorepo.create_head("new_head", "HEAD~1")
self.rorepo.delete_head(head)
-
+
tag = self.rorepo.create_tag("new_tag", "HEAD~2")
self.rorepo.delete_tag(tag)
self.rorepo.config_writer()
remote = self.rorepo.create_remote("new_remote", "git@server:repo.git")
self.rorepo.delete_remote(remote)
-
+
def test_comparison_and_hash(self):
# this is only a preliminary test, more testing done in test_index
assert self.rorepo == self.rorepo and not (self.rorepo != self.rorepo)
assert len(set((self.rorepo, self.rorepo))) == 1
-
+
def test_git_cmd(self):
# test CatFileContentStream, just to be very sure we have no fencepost errors
# last \n is the terminating newline that it expects
l1 = "0123456789\n"
l2 = "abcdefghijklmnopqrstxy\n"
- l3 = "z\n"
+ l3 = "z\n"
d = "%s%s%s\n" % (l1, l2, l3)
-
+
l1p = l1[:5]
-
+
# full size
# size is without terminating newline
def mkfull():
- return Git.CatFileContentStream(len(d)-1, StringIO(d))
-
+ return Git.CatFileContentStream(len(d) - 1, StringIO(d))
+
ts = 5
def mktiny():
return Git.CatFileContentStream(ts, StringIO(d))
-
+
# readlines no limit
s = mkfull()
lines = s.readlines()
assert len(lines) == 3 and lines[-1].endswith('\n')
assert s._stream.tell() == len(d) # must have scrubbed to the end
-
+
# realines line limit
s = mkfull()
lines = s.readlines(5)
assert len(lines) == 1
-
+
# readlines on tiny sections
s = mktiny()
lines = s.readlines()
assert len(lines) == 1 and lines[0] == l1p
- assert s._stream.tell() == ts+1
-
+ assert s._stream.tell() == ts + 1
+
# readline no limit
s = mkfull()
assert s.readline() == l1
@@ -372,52 +395,52 @@ def mktiny():
assert s.readline() == l3
assert s.readline() == ''
assert s._stream.tell() == len(d)
-
+
# readline limit
s = mkfull()
assert s.readline(5) == l1p
assert s.readline() == l1[5:]
-
+
# readline on tiny section
s = mktiny()
assert s.readline() == l1p
assert s.readline() == ''
- assert s._stream.tell() == ts+1
-
+ assert s._stream.tell() == ts + 1
+
# read no limit
s = mkfull()
assert s.read() == d[:-1]
assert s.read() == ''
assert s._stream.tell() == len(d)
-
+
# read limit
s = mkfull()
assert s.read(5) == l1p
assert s.read(6) == l1[5:]
assert s._stream.tell() == 5 + 6 # its not yet done
-
+
# read tiny
s = mktiny()
assert s.read(2) == l1[:2]
assert s._stream.tell() == 2
assert s.read() == l1[2:ts]
- assert s._stream.tell() == ts+1
-
+ assert s._stream.tell() == ts + 1
+
def _assert_rev_parse_types(self, name, rev_obj):
rev_parse = self.rorepo.rev_parse
-
+
if rev_obj.type == 'tag':
rev_obj = rev_obj.object
-
+
# tree and blob type
obj = rev_parse(name + '^{tree}')
assert obj == rev_obj.tree
-
+
obj = rev_parse(name + ':CHANGES')
assert obj.type == 'blob' and obj.path == 'CHANGES'
assert rev_obj.tree['CHANGES'] == obj
-
-
+
+
def _assert_rev_parse(self, name):
"""tries multiple different rev-parse syntaxes with the given name
:return: parsed object"""
@@ -428,61 +451,61 @@ def _assert_rev_parse(self, name):
else:
obj = orig_obj
# END deref tags by default
-
+
# try history
rev = name + "~"
obj2 = rev_parse(rev)
assert obj2 == obj.parents[0]
self._assert_rev_parse_types(rev, obj2)
-
+
# history with number
ni = 11
history = [obj.parents[0]]
for pn in range(ni):
history.append(history[-1].parents[0])
# END get given amount of commits
-
+
for pn in range(11):
- rev = name + "~%i" % (pn+1)
+ rev = name + "~%i" % (pn + 1)
obj2 = rev_parse(rev)
assert obj2 == history[pn]
self._assert_rev_parse_types(rev, obj2)
# END history check
-
+
# parent ( default )
rev = name + "^"
obj2 = rev_parse(rev)
assert obj2 == obj.parents[0]
self._assert_rev_parse_types(rev, obj2)
-
+
# parent with number
for pn, parent in enumerate(obj.parents):
- rev = name + "^%i" % (pn+1)
+ rev = name + "^%i" % (pn + 1)
assert rev_parse(rev) == parent
self._assert_rev_parse_types(rev, parent)
# END for each parent
-
+
return orig_obj
-
+
@with_rw_repo('HEAD', bare=False)
def test_rw_rev_parse(self, rwrepo):
# verify it does not confuse branches with hexsha ids
ahead = rwrepo.create_head('aaaaaaaa')
assert(rwrepo.rev_parse(str(ahead)) == ahead.commit)
-
+
def test_rev_parse(self):
rev_parse = self.rorepo.rev_parse
-
+
# try special case: This one failed at some point, make sure its fixed
assert rev_parse("33ebe").hexsha == "33ebe7acec14b25c5f84f35a664803fcab2f7781"
-
+
# start from reference
num_resolved = 0
-
+
for ref in Reference.iter_items(self.rorepo):
path_tokens = ref.path.split("/")
for pt in range(len(path_tokens)):
- path_section = '/'.join(path_tokens[-(pt+1):])
+ path_section = '/'.join(path_tokens[-(pt + 1):])
try:
obj = self._assert_rev_parse(path_section)
assert obj.type == ref.object.type
@@ -495,115 +518,124 @@ def test_rev_parse(self):
# END for each token
# END for each reference
assert num_resolved
-
+
# it works with tags !
tag = self._assert_rev_parse('0.1.4')
assert tag.type == 'tag'
-
+
# try full sha directly ( including type conversion )
assert tag.object == rev_parse(tag.object.hexsha)
self._assert_rev_parse_types(tag.object.hexsha, tag.object)
-
-
+
+
# multiple tree types result in the same tree: HEAD^{tree}^{tree}:CHANGES
rev = '0.1.4^{tree}^{tree}'
assert rev_parse(rev) == tag.object.tree
- assert rev_parse(rev+':CHANGES') == tag.object.tree['CHANGES']
-
-
+ assert rev_parse(rev + ':CHANGES') == tag.object.tree['CHANGES']
+
+
# try to get parents from first revision - it should fail as no such revision
# exists
first_rev = "33ebe7acec14b25c5f84f35a664803fcab2f7781"
commit = rev_parse(first_rev)
assert len(commit.parents) == 0
assert commit.hexsha == first_rev
- self.failUnlessRaises(BadObject, rev_parse, first_rev+"~")
- self.failUnlessRaises(BadObject, rev_parse, first_rev+"^")
-
+ self.failUnlessRaises(BadObject, rev_parse, first_rev + "~")
+ self.failUnlessRaises(BadObject, rev_parse, first_rev + "^")
+
# short SHA1
commit2 = rev_parse(first_rev[:20])
assert commit2 == commit
commit2 = rev_parse(first_rev[:5])
assert commit2 == commit
-
-
+
+
# todo: dereference tag into a blob 0.1.7^{blob} - quite a special one
# needs a tag which points to a blob
-
-
+
+
# ref^0 returns commit being pointed to, same with ref~0, and ^{}
tag = rev_parse('0.1.4')
for token in (('~0', '^0', '^{}')):
assert tag.object == rev_parse('0.1.4%s' % token)
# END handle multiple tokens
-
+
# try partial parsing
max_items = 40
for i, binsha in enumerate(self.rorepo.odb.sha_iter()):
- assert rev_parse(bin_to_hex(binsha)[:8-(i%2)]).binsha == binsha
+ assert rev_parse(bin_to_hex(binsha)[:8 - (i % 2)]).binsha == binsha
if i > max_items:
# this is rather slow currently, as rev_parse returns an object
# which requires accessing packs, it has some additional overhead
break
# END for each binsha in repo
-
+
# missing closing brace commit^{tree
self.failUnlessRaises(ValueError, rev_parse, '0.1.4^{tree')
-
+
# missing starting brace
self.failUnlessRaises(ValueError, rev_parse, '0.1.4^tree}')
-
+
# REVLOG
#######
head = self.rorepo.head
-
+
# need to specify a ref when using the @ syntax
self.failUnlessRaises(BadObject, rev_parse, "%s@{0}" % head.commit.hexsha)
-
+
# uses HEAD.ref by default
assert rev_parse('@{0}') == head.commit
if not head.is_detached:
refspec = '%s@{0}' % head.ref.name
assert rev_parse(refspec) == head.ref.commit
# all additional specs work as well
- assert rev_parse(refspec+"^{tree}") == head.commit.tree
- assert rev_parse(refspec+":CHANGES").type == 'blob'
+ assert rev_parse(refspec + "^{tree}") == head.commit.tree
+ assert rev_parse(refspec + ":CHANGES").type == 'blob'
#END operate on non-detached head
-
- # the last position
- assert rev_parse('@{1}') != head.commit
-
+
+ # the most recent previous position of the currently checked out branch
+
+ try:
+ assert rev_parse('@{1}') != head.commit
+ except IndexError:
+ # on new checkouts, there isn't even a single past branch position
+ # in the log
+ pass
+ #END handle fresh checkouts
+
# position doesn't exist
self.failUnlessRaises(IndexError, rev_parse, '@{10000}')
-
+
# currently, nothing more is supported
self.failUnlessRaises(NotImplementedError, rev_parse, "@{1 week ago}")
-
- def test_repo_odbtype(self):
- target_type = GitDB
- if sys.version_info[1] < 5:
- target_type = GitCmdObjectDB
- assert isinstance(self.rorepo.odb, target_type)
-
+
def test_submodules(self):
- assert len(self.rorepo.submodules) == 1 # non-recursive
+ assert len(self.rorepo.submodules) == 2 # non-recursive
+ # in previous configurations, we had recursive repositories so this would compare to 2
+ # now there is only one left, as gitdb was merged, but we have smmap instead
assert len(list(self.rorepo.iter_submodules())) == 2
-
- assert isinstance(self.rorepo.submodule("gitdb"), Submodule)
+
+ assert isinstance(self.rorepo.submodule("async"), Submodule)
self.failUnlessRaises(ValueError, self.rorepo.submodule, "doesn't exist")
-
+
@with_rw_repo('HEAD', bare=False)
def test_submodule_update(self, rwrepo):
# fails in bare mode
rwrepo._bare = True
+ # special handling: there are repo implementations which have a bare attribute. IN that case, set it directly
+ if not rwrepo.bare:
+ rwrepo.bare = True
self.failUnlessRaises(InvalidGitRepositoryError, rwrepo.submodule_update)
rwrepo._bare = False
-
+ if rwrepo.bare:
+ rwrepo.bare = False
+ #END special repo handling
+
# test create submodule
sm = rwrepo.submodules[0]
sm = rwrepo.create_submodule("my_new_sub", "some_path", join_path_native(self.rorepo.working_tree_dir, sm.path))
assert isinstance(sm, Submodule)
-
+
# note: the rest of this functionality is tested in test_submodule
-
-
+
+
diff --git a/git/test/db/cmd/__init__.py b/git/test/db/cmd/__init__.py
new file mode 100644
index 000000000..8a681e428
--- /dev/null
+++ b/git/test/db/cmd/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
diff --git a/git/test/db/cmd/test_base.py b/git/test/db/cmd/test_base.py
new file mode 100644
index 000000000..cbb4a3391
--- /dev/null
+++ b/git/test/db/cmd/test_base.py
@@ -0,0 +1,91 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from git.test.lib import rorepo_dir
+from git.test.db.base import RepoBase
+
+from git.util import bin_to_hex
+from git.exc import BadObject
+
+from git.db.complex import CmdCompatibilityGitDB
+from git.db.cmd.base import *
+
+from git.refs import TagReference, Reference, RemoteReference
+
+class TestBase(RepoBase):
+ RepoCls = CmdCompatibilityGitDB
+
+ def test_basics(self):
+ gdb = self.rorepo
+
+ # partial to complete - works with everything
+ hexsha = bin_to_hex(gdb.partial_to_complete_sha_hex("0.1.6"))
+ assert len(hexsha) == 40
+
+ assert bin_to_hex(gdb.partial_to_complete_sha_hex(hexsha[:20])) == hexsha
+
+ # fails with BadObject
+ for invalid_rev in ("0000", "bad/ref", "super bad"):
+ self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, invalid_rev)
+
+ def test_fetch_info(self):
+ self.failUnlessRaises(ValueError, CmdCmdFetchInfo._from_line, self.rorepo, "nonsense", '')
+ self.failUnlessRaises(ValueError, CmdCmdFetchInfo._from_line, self.rorepo, "? [up to date] 0.1.7RC -> origin/0.1.7RC", '')
+
+
+ def test_fetch_info(self):
+ # assure we can handle remote-tracking branches
+ fetch_info_line_fmt = "c437ee5deb8d00cf02f03720693e4c802e99f390 not-for-merge %s '0.3' of git://github.com/gitpython-developers/GitPython"
+ remote_info_line_fmt = "* [new branch] nomatter -> %s"
+ fi = CmdFetchInfo._from_line(self.rorepo,
+ remote_info_line_fmt % "local/master",
+ fetch_info_line_fmt % 'remote-tracking branch')
+
+ # we wouldn't be here if it wouldn't have worked
+
+ # handles non-default refspecs: One can specify a different path in refs/remotes
+ # or a special path just in refs/something for instance
+
+ fi = CmdFetchInfo._from_line(self.rorepo,
+ remote_info_line_fmt % "subdir/tagname",
+ fetch_info_line_fmt % 'tag')
+
+ assert isinstance(fi.ref, TagReference)
+ assert fi.ref.path.startswith('refs/tags')
+
+ # it could be in a remote direcftory though
+ fi = CmdFetchInfo._from_line(self.rorepo,
+ remote_info_line_fmt % "remotename/tags/tagname",
+ fetch_info_line_fmt % 'tag')
+
+ assert isinstance(fi.ref, TagReference)
+ assert fi.ref.path.startswith('refs/remotes/')
+
+ # it can also be anywhere !
+ tag_path = "refs/something/remotename/tags/tagname"
+ fi = CmdFetchInfo._from_line(self.rorepo,
+ remote_info_line_fmt % tag_path,
+ fetch_info_line_fmt % 'tag')
+
+ assert isinstance(fi.ref, TagReference)
+ assert fi.ref.path == tag_path
+
+ # branches default to refs/remotes
+ fi = CmdFetchInfo._from_line(self.rorepo,
+ remote_info_line_fmt % "remotename/branch",
+ fetch_info_line_fmt % 'branch')
+
+ assert isinstance(fi.ref, RemoteReference)
+ assert fi.ref.remote_name == 'remotename'
+
+ # but you can force it anywhere, in which case we only have a references
+ fi = CmdFetchInfo._from_line(self.rorepo,
+ remote_info_line_fmt % "refs/something/branch",
+ fetch_info_line_fmt % 'branch')
+
+ assert type(fi.ref) is Reference
+ assert fi.ref.path == "refs/something/branch"
+
+
+
diff --git a/git/test/db/dulwich/__init__.py b/git/test/db/dulwich/__init__.py
new file mode 100644
index 000000000..8a681e428
--- /dev/null
+++ b/git/test/db/dulwich/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
diff --git a/git/test/db/dulwich/lib.py b/git/test/db/dulwich/lib.py
new file mode 100644
index 000000000..567340649
--- /dev/null
+++ b/git/test/db/dulwich/lib.py
@@ -0,0 +1,23 @@
+"""dulwich specific utilities, as well as all the default ones"""
+
+from git.test.lib import (
+ InheritedTestMethodsOverrideWrapperMetaClsAutoMixin,
+ needs_module_or_skip
+ )
+
+__all__ = ['needs_dulwich_or_skip', 'DulwichRequiredMetaMixin']
+
+#{ Decoorators
+
+def needs_dulwich_or_skip(func):
+ """Skip this test if we have no dulwich - print warning"""
+ return needs_module_or_skip('dulwich')(func)
+
+#}END decorators
+
+#{ MetaClasses
+
+class DulwichRequiredMetaMixin(InheritedTestMethodsOverrideWrapperMetaClsAutoMixin):
+ decorator = [needs_dulwich_or_skip]
+
+#} END metaclasses
diff --git a/git/test/db/dulwich/test_base.py b/git/test/db/dulwich/test_base.py
new file mode 100644
index 000000000..784165183
--- /dev/null
+++ b/git/test/db/dulwich/test_base.py
@@ -0,0 +1,32 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from lib import *
+from git.test.lib import TestBase, with_rw_repo
+from git.test.db.base import RepoBase
+
+
+
+try:
+ import dulwich
+except ImportError:
+ # om this case, all other dulwich tests will be skipped
+ # Need to properly initialize the class though, otherwise it would fail
+ from git.db.complex import PureCompatibilityGitDB as DulwichDB
+else:
+ # now we know dulwich is available, to do futher imports
+ from git.db.dulwich.complex import DulwichCompatibilityGitDB as DulwichDB
+
+#END handle imports
+
+class TestDulwichDBBase(RepoBase):
+ __metaclass__ = DulwichRequiredMetaMixin
+ RepoCls = DulwichDB
+
+ @needs_dulwich_or_skip
+ @with_rw_repo('HEAD', bare=False)
+ def test_basics(self, rw_repo):
+ db = DulwichDB(rw_repo.working_tree_dir)
+
+
diff --git a/git/test/db/lib.py b/git/test/db/lib.py
new file mode 100644
index 000000000..2b3ddde56
--- /dev/null
+++ b/git/test/db/lib.py
@@ -0,0 +1,247 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Base classes for object db testing"""
+from git.test.lib import (
+ with_rw_directory,
+ with_packs_rw,
+ ZippedStoreShaWriter,
+ fixture_path,
+ TestBase,
+ rorepo_dir,
+ )
+
+from git.stream import Sha1Writer
+from git.base import (
+ IStream,
+ OStream,
+ OInfo
+ )
+
+from git.exc import BadObject
+from git.typ import str_blob_type
+
+from async import IteratorReader
+from cStringIO import StringIO
+from struct import pack
+
+
+__all__ = ('TestDBBase', 'with_rw_directory', 'with_packs_rw', 'fixture_path')
+
+class TestDBBase(TestBase):
+ """Base Class providing default functionality to all tests such as:
+
+ - Utility functions provided by the TestCase base of the unittest method such as::
+ self.fail("todo")
+ self.failUnlessRaises(...)
+
+ - Class level repository which is considered read-only as it is shared among
+ all test cases in your type.
+ Access it using::
+ self.rorepo # 'ro' stands for read-only
+
+ The rorepo is in fact your current project's git repo. If you refer to specific
+ shas for your objects, be sure you choose some that are part of the immutable portion
+ of the project history ( to assure tests don't fail for others ).
+
+ Derived types can override the default repository type to create a different
+ read-only repo, allowing to test their specific type
+ """
+
+ # data
+ two_lines = "1234\nhello world"
+ all_data = (two_lines, )
+
+ #{ Configuration
+ # The repository type to instantiate. It takes at least a path to operate upon
+ # during instantiation.
+ RepoCls = None
+
+ # if True, a read-only repo will be provided and RepoCls must be set.
+ # Otherwise it may remain unset
+ needs_ro_repo = True
+ #} END configuration
+
+ @classmethod
+ def setUpAll(cls):
+ """
+ Dynamically add a read-only repository to our actual type. This way
+ each test type has its own repository
+ """
+ if cls.needs_ro_repo:
+ if cls is not TestDBBase:
+ assert cls.RepoCls is not None, "RepoCls class member must be set in %s" % cls
+ cls.rorepo = cls.RepoCls(rorepo_dir())
+ #END handle rorepo
+
+ def _assert_object_writing_simple(self, db):
+ # write a bunch of objects and query their streams and info
+ null_objs = db.size()
+ ni = 250
+ for i in xrange(ni):
+ data = pack(">L", i)
+ istream = IStream(str_blob_type, len(data), StringIO(data))
+ new_istream = db.store(istream)
+ assert new_istream is istream
+ assert db.has_object(istream.binsha)
+
+ info = db.info(istream.binsha)
+ assert isinstance(info, OInfo)
+ assert info.type == istream.type and info.size == istream.size
+
+ stream = db.stream(istream.binsha)
+ assert isinstance(stream, OStream)
+ assert stream.binsha == info.binsha and stream.type == info.type
+ assert stream.read() == data
+ # END for each item
+
+ assert db.size() == null_objs + ni
+ shas = list(db.sha_iter())
+ assert len(shas) == db.size()
+ assert len(shas[0]) == 20
+
+
+ def _assert_object_writing(self, db):
+ """General tests to verify object writing, compatible to ObjectDBW
+ :note: requires write access to the database"""
+ # start in 'dry-run' mode, using a simple sha1 writer
+ ostreams = (ZippedStoreShaWriter, None)
+ for ostreamcls in ostreams:
+ for data in self.all_data:
+ dry_run = ostreamcls is not None
+ ostream = None
+ if ostreamcls is not None:
+ ostream = ostreamcls()
+ assert isinstance(ostream, Sha1Writer)
+ # END create ostream
+
+ prev_ostream = db.set_ostream(ostream)
+ assert type(prev_ostream) in ostreams or prev_ostream in ostreams
+
+ istream = IStream(str_blob_type, len(data), StringIO(data))
+
+ # store returns same istream instance, with new sha set
+ my_istream = db.store(istream)
+ sha = istream.binsha
+ assert my_istream is istream
+ assert db.has_object(sha) != dry_run
+ assert len(sha) == 20
+
+ # verify data - the slow way, we want to run code
+ if not dry_run:
+ info = db.info(sha)
+ assert str_blob_type == info.type
+ assert info.size == len(data)
+
+ ostream = db.stream(sha)
+ assert ostream.read() == data
+ assert ostream.type == str_blob_type
+ assert ostream.size == len(data)
+ else:
+ self.failUnlessRaises(BadObject, db.info, sha)
+ self.failUnlessRaises(BadObject, db.stream, sha)
+
+ # DIRECT STREAM COPY
+ # our data hase been written in object format to the StringIO
+ # we pasesd as output stream. No physical database representation
+ # was created.
+ # Test direct stream copy of object streams, the result must be
+ # identical to what we fed in
+ ostream.seek(0)
+ istream.stream = ostream
+ assert istream.binsha is not None
+ prev_sha = istream.binsha
+
+ db.set_ostream(ZippedStoreShaWriter())
+ db.store(istream)
+ assert istream.binsha == prev_sha
+ new_ostream = db.ostream()
+
+ # note: only works as long our store write uses the same compression
+ # level, which is zip_best
+ assert ostream.getvalue() == new_ostream.getvalue()
+ # END for each data set
+ # END for each dry_run mode
+
+ def _assert_object_writing_async(self, db):
+ """Test generic object writing using asynchronous access"""
+ ni = 5000
+ def istream_generator(offset=0, ni=ni):
+ for data_src in xrange(ni):
+ data = str(data_src + offset)
+ yield IStream(str_blob_type, len(data), StringIO(data))
+ # END for each item
+ # END generator utility
+
+ # for now, we are very trusty here as we expect it to work if it worked
+ # in the single-stream case
+
+ # write objects
+ reader = IteratorReader(istream_generator())
+ istream_reader = db.store_async(reader)
+ istreams = istream_reader.read() # read all
+ assert istream_reader.task().error() is None
+ assert len(istreams) == ni
+
+ for stream in istreams:
+ assert stream.error is None
+ assert len(stream.binsha) == 20
+ assert isinstance(stream, IStream)
+ # END assert each stream
+
+ # test has-object-async - we must have all previously added ones
+ reader = IteratorReader( istream.binsha for istream in istreams )
+ hasobject_reader = db.has_object_async(reader)
+ count = 0
+ for sha, has_object in hasobject_reader:
+ assert has_object
+ count += 1
+ # END for each sha
+ assert count == ni
+
+ # read the objects we have just written
+ reader = IteratorReader( istream.binsha for istream in istreams )
+ ostream_reader = db.stream_async(reader)
+
+ # read items individually to prevent hitting possible sys-limits
+ count = 0
+ for ostream in ostream_reader:
+ assert isinstance(ostream, OStream)
+ count += 1
+ # END for each ostream
+ assert ostream_reader.task().error() is None
+ assert count == ni
+
+ # get info about our items
+ reader = IteratorReader( istream.binsha for istream in istreams )
+ info_reader = db.info_async(reader)
+
+ count = 0
+ for oinfo in info_reader:
+ assert isinstance(oinfo, OInfo)
+ count += 1
+ # END for each oinfo instance
+ assert count == ni
+
+
+ # combined read-write using a converter
+ # add 2500 items, and obtain their output streams
+ nni = 2500
+ reader = IteratorReader(istream_generator(offset=ni, ni=nni))
+ istream_to_sha = lambda istreams: [ istream.binsha for istream in istreams ]
+
+ istream_reader = db.store_async(reader)
+ istream_reader.set_post_cb(istream_to_sha)
+
+ ostream_reader = db.stream_async(istream_reader)
+
+ count = 0
+ # read it individually, otherwise we might run into the ulimit
+ for ostream in ostream_reader:
+ assert isinstance(ostream, OStream)
+ count += 1
+ # END for each ostream
+ assert count == nni
+
+
diff --git a/git/test/db/py/__init__.py b/git/test/db/py/__init__.py
new file mode 100644
index 000000000..8a681e428
--- /dev/null
+++ b/git/test/db/py/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
diff --git a/git/test/db/py/test_base.py b/git/test/db/py/test_base.py
new file mode 100644
index 000000000..6b06bbe92
--- /dev/null
+++ b/git/test/db/py/test_base.py
@@ -0,0 +1,16 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from git.test.lib import rorepo_dir
+from git.test.db.base import RepoBase
+
+from git.db.complex import PureCompatibilityGitDB
+
+class TestPyDBBase(RepoBase):
+
+ RepoCls = PureCompatibilityGitDB
+
+ def test_basics(self):
+ pass
+
diff --git a/git/test/db/py/test_git.py b/git/test/db/py/test_git.py
new file mode 100644
index 000000000..ecaa5c8f9
--- /dev/null
+++ b/git/test/db/py/test_git.py
@@ -0,0 +1,51 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from git.test.lib import rorepo_dir
+from git.test.db.lib import TestDBBase, with_rw_directory
+from git.exc import BadObject
+from git.db.py.complex import PureGitODB
+from git.base import OStream, OInfo
+from git.util import hex_to_bin, bin_to_hex
+
+import os
+
+class TestGitDB(TestDBBase):
+ needs_ro_repo = False
+
+ def test_reading(self):
+ gdb = PureGitODB(os.path.join(rorepo_dir(), 'objects'))
+
+ # we have packs and loose objects, alternates doesn't necessarily exist
+ assert 1 < len(gdb.databases()) < 4
+
+ # access should be possible
+ git_sha = hex_to_bin("5aebcd5cb3340fb31776941d7e4d518a712a8655")
+ assert isinstance(gdb.info(git_sha), OInfo)
+ assert isinstance(gdb.stream(git_sha), OStream)
+ assert gdb.size() > 200
+ sha_list = list(gdb.sha_iter())
+ assert len(sha_list) == gdb.size()
+
+
+ # This is actually a test for compound functionality, but it doesn't
+ # have a separate test module
+ # test partial shas
+ # this one as uneven and quite short
+ assert gdb.partial_to_complete_sha_hex('5aebcd') == hex_to_bin("5aebcd5cb3340fb31776941d7e4d518a712a8655")
+
+ # mix even/uneven hexshas
+ for i, binsha in enumerate(sha_list[:50]):
+ assert gdb.partial_to_complete_sha_hex(bin_to_hex(binsha)[:8-(i%2)]) == binsha
+ # END for each sha
+
+ self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, "0000")
+
+ @with_rw_directory
+ def test_writing(self, path):
+ gdb = PureGitODB(path)
+
+ # its possible to write objects
+ self._assert_object_writing(gdb)
+ self._assert_object_writing_async(gdb)
diff --git a/git/test/db/py/test_loose.py b/git/test/db/py/test_loose.py
new file mode 100644
index 000000000..0c9b4831d
--- /dev/null
+++ b/git/test/db/py/test_loose.py
@@ -0,0 +1,36 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from git.test.db.lib import TestDBBase, with_rw_directory
+from git.db.py.loose import PureLooseObjectODB
+from git.exc import BadObject
+from git.util import bin_to_hex
+
+class TestLooseDB(TestDBBase):
+
+ needs_ro_repo = False
+
+ @with_rw_directory
+ def test_basics(self, path):
+ ldb = PureLooseObjectODB(path)
+
+ # write data
+ self._assert_object_writing(ldb)
+ self._assert_object_writing_async(ldb)
+
+ # verify sha iteration and size
+ shas = list(ldb.sha_iter())
+ assert shas and len(shas[0]) == 20
+
+ assert len(shas) == ldb.size()
+
+ # verify find short object
+ long_sha = bin_to_hex(shas[-1])
+ for short_sha in (long_sha[:20], long_sha[:5]):
+ assert bin_to_hex(ldb.partial_to_complete_sha_hex(short_sha)) == long_sha
+ # END for each sha
+
+ self.failUnlessRaises(BadObject, ldb.partial_to_complete_sha_hex, '0000')
+ # raises if no object could be foudn
+
diff --git a/git/test/db/py/test_mem.py b/git/test/db/py/test_mem.py
new file mode 100644
index 000000000..bc98dc56a
--- /dev/null
+++ b/git/test/db/py/test_mem.py
@@ -0,0 +1,30 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from git.test.db.lib import TestDBBase, with_rw_directory
+from git.db.py.mem import PureMemoryDB
+from git.db.py.loose import PureLooseObjectODB
+
+class TestPureMemoryDB(TestDBBase):
+
+ needs_ro_repo = False
+
+ @with_rw_directory
+ def test_writing(self, path):
+ mdb = PureMemoryDB()
+
+ # write data
+ self._assert_object_writing_simple(mdb)
+
+ # test stream copy
+ ldb = PureLooseObjectODB(path)
+ assert ldb.size() == 0
+ num_streams_copied = mdb.stream_copy(mdb.sha_iter(), ldb)
+ assert num_streams_copied == mdb.size()
+
+ assert ldb.size() == mdb.size()
+ for sha in mdb.sha_iter():
+ assert ldb.has_object(sha)
+ assert ldb.stream(sha).read() == mdb.stream(sha).read()
+ # END verify objects where copied and are equal
diff --git a/git/test/db/py/test_pack.py b/git/test/db/py/test_pack.py
new file mode 100644
index 000000000..5043f446c
--- /dev/null
+++ b/git/test/db/py/test_pack.py
@@ -0,0 +1,76 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from git.test.db.lib import TestDBBase, with_packs_rw
+
+from git.db.py.pack import PurePackedODB
+from git.test.lib import fixture_path
+
+from git.exc import BadObject, AmbiguousObjectName
+
+import os
+import random
+
+class TestPackDB(TestDBBase):
+
+ needs_ro_repo = False
+
+ @with_packs_rw
+ def test_writing(self, path):
+ pdb = PurePackedODB(path)
+
+ # on demand, we init our pack cache
+ num_packs = len(pdb.entities())
+ assert num_packs
+ assert pdb._st_mtime != 0
+
+ # test pack directory changed:
+ # packs removed - rename a file, should affect the glob
+ pack_path = pdb.entities()[0].pack().path()
+ new_pack_path = pack_path + "renamed"
+ os.rename(pack_path, new_pack_path)
+
+ pdb.update_cache(force=True)
+ assert len(pdb.entities()) == num_packs - 1
+
+ # packs added
+ os.rename(new_pack_path, pack_path)
+ pdb.update_cache(force=True)
+ assert len(pdb.entities()) == num_packs
+
+ # bang on the cache
+ # access the Entities directly, as there is no iteration interface
+ # yet ( or required for now )
+ sha_list = list(pdb.sha_iter())
+ assert len(sha_list) == pdb.size()
+
+ # hit all packs in random order
+ random.shuffle(sha_list)
+
+ for sha in sha_list:
+ info = pdb.info(sha)
+ stream = pdb.stream(sha)
+ # END for each sha to query
+
+
+ # test short finding - be a bit more brutal here
+ max_bytes = 19
+ min_bytes = 2
+ num_ambiguous = 0
+ for i, sha in enumerate(sha_list):
+ short_sha = sha[:max((i % max_bytes), min_bytes)]
+ try:
+ assert pdb.partial_to_complete_sha(short_sha, len(short_sha)*2) == sha
+ except AmbiguousObjectName:
+ num_ambiguous += 1
+ pass # valid, we can have short objects
+ # END exception handling
+ # END for each sha to find
+
+ # we should have at least one ambiguous, considering the small sizes
+ # but in our pack, there is no ambigious ...
+ # assert num_ambiguous
+
+ # non-existing
+ self.failUnlessRaises(BadObject, pdb.partial_to_complete_sha, "\0\0", 4)
diff --git a/git/test/db/py/test_ref.py b/git/test/db/py/test_ref.py
new file mode 100644
index 000000000..c5374dc9d
--- /dev/null
+++ b/git/test/db/py/test_ref.py
@@ -0,0 +1,62 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from git.test.db.lib import *
+from git.db.py.ref import PureReferenceDB
+
+from git.util import (
+ NULL_BIN_SHA,
+ hex_to_bin
+ )
+
+import os
+
+class TestPureReferenceDB(TestDBBase):
+
+ needs_ro_repo = False
+
+ def make_alt_file(self, alt_path, alt_list):
+ """Create an alternates file which contains the given alternates.
+ The list can be empty"""
+ alt_file = open(alt_path, "wb")
+ for alt in alt_list:
+ alt_file.write(alt + "\n")
+ alt_file.close()
+
+ @with_rw_directory
+ def test_writing(self, path):
+ NULL_BIN_SHA = '\0' * 20
+
+ alt_path = os.path.join(path, 'alternates')
+ rdb = PureReferenceDB(alt_path)
+ assert len(rdb.databases()) == 0
+ assert rdb.size() == 0
+ assert len(list(rdb.sha_iter())) == 0
+
+ # try empty, non-existing
+ assert not rdb.has_object(NULL_BIN_SHA)
+
+
+ # setup alternate file
+ # add two, one is invalid
+ own_repo_path = fixture_path('../../../.git/objects') # use own repo
+ self.make_alt_file(alt_path, [own_repo_path, "invalid/path"])
+ rdb.update_cache()
+ assert len(rdb.databases()) == 1
+
+ # we should now find a default revision of ours
+ git_sha = hex_to_bin("5aebcd5cb3340fb31776941d7e4d518a712a8655")
+ assert rdb.has_object(git_sha)
+
+ # remove valid
+ self.make_alt_file(alt_path, ["just/one/invalid/path"])
+ rdb.update_cache()
+ assert len(rdb.databases()) == 0
+
+ # add valid
+ self.make_alt_file(alt_path, [own_repo_path])
+ rdb.update_cache()
+ assert len(rdb.databases()) == 1
+
+
diff --git a/git/test/db/pygit2/__init__.py b/git/test/db/pygit2/__init__.py
new file mode 100644
index 000000000..8a681e428
--- /dev/null
+++ b/git/test/db/pygit2/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
diff --git a/git/test/db/pygit2/lib.py b/git/test/db/pygit2/lib.py
new file mode 100644
index 000000000..356df9dcb
--- /dev/null
+++ b/git/test/db/pygit2/lib.py
@@ -0,0 +1,23 @@
+"""pygit2 specific utilities, as well as all the default ones"""
+
+from git.test.lib import (
+ InheritedTestMethodsOverrideWrapperMetaClsAutoMixin,
+ needs_module_or_skip
+ )
+
+__all__ = ['needs_pygit2_or_skip', 'Pygit2RequiredMetaMixin']
+
+#{ Decoorators
+
+def needs_pygit2_or_skip(func):
+ """Skip this test if we have no pygit2 - print warning"""
+ return needs_module_or_skip('pygit2')(func)
+
+#}END decorators
+
+#{ MetaClasses
+
+class Pygit2RequiredMetaMixin(InheritedTestMethodsOverrideWrapperMetaClsAutoMixin):
+ decorator = [needs_pygit2_or_skip]
+
+#} END metaclasses
diff --git a/git/test/db/pygit2/test_base.py b/git/test/db/pygit2/test_base.py
new file mode 100644
index 000000000..246a16433
--- /dev/null
+++ b/git/test/db/pygit2/test_base.py
@@ -0,0 +1,32 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from lib import *
+from git.test.lib import TestBase, with_rw_repo
+from git.test.db.base import RepoBase
+
+
+
+try:
+ import pygit2
+except ImportError:
+ # om this case, all other pygit2 tests will be skipped
+ # Need to properly initialize the class though, otherwise it would fail
+ from git.db.complex import PureCompatibilityGitDB as Pygit2DB
+else:
+ # now we know pygit2 is available, to do futher imports
+ from git.db.pygit2.complex import Pygit2CompatibilityGitDB as Pygit2DB
+
+#END handle imports
+
+class TestPyGit2DBBase(RepoBase):
+ __metaclass__ = Pygit2RequiredMetaMixin
+ RepoCls = Pygit2DB
+
+ @needs_pygit2_or_skip
+ @with_rw_repo('HEAD', bare=False)
+ def test_basics(self, rw_repo):
+ db = Pygit2DB(rw_repo.working_tree_dir)
+
+
diff --git a/git/test/db/test_base.py b/git/test/db/test_base.py
new file mode 100644
index 000000000..2a882d0af
--- /dev/null
+++ b/git/test/db/test_base.py
@@ -0,0 +1,20 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from lib import *
+from git.db import RefSpec
+
+class TestBase(TestDBBase):
+
+ needs_ro_repo = False
+
+ @with_rw_directory
+ def test_basics(self, path):
+ self.failUnlessRaises(ValueError, RefSpec, None, None)
+ rs = RefSpec(None, "something")
+ assert rs.force == False
+ assert rs.delete_destination()
+ assert rs.source is None
+ assert rs.destination == "something"
+
diff --git a/git/test/fixtures/git_config b/git/test/fixtures/git_config
index 3c91985fe..ff8e71143 100644
--- a/git/test/fixtures/git_config
+++ b/git/test/fixtures/git_config
@@ -1,22 +1,28 @@
[core]
repositoryformatversion = 0
filemode = true
- bare = false
- logallrefupdates = true
+ bare = false
+ logallrefupdates = true
[remote "origin"]
fetch = +refs/heads/*:refs/remotes/origin/*
url = git://gitorious.org/~byron/git-python/byrons-clone.git
pushurl = git@gitorious.org:~byron/git-python/byrons-clone.git
-[branch "master"]
+# a tab indented section header
+ [branch "master"]
remote = origin
merge = refs/heads/master
-[remote "mainline"]
+# an space indented section header
+ [remote "mainline"]
+ # space indented comment
url = git://gitorious.org/git-python/mainline.git
fetch = +refs/heads/*:refs/remotes/mainline/*
+
[remote "MartinMarcher"]
+ # tab indented comment
url = git://gitorious.org/~martin.marcher/git-python/serverhorror.git
fetch = +refs/heads/*:refs/remotes/MartinMarcher/*
-[gui]
+ # can handle comments - the section name is supposed to be stripped
+[ gui ]
geometry = 1316x820+219+243 207 192
[branch "mainline_performance"]
remote = mainline
diff --git a/git/test/fixtures/objects/7b/b839852ed5e3a069966281bb08d50012fb309b b/git/test/fixtures/objects/7b/b839852ed5e3a069966281bb08d50012fb309b
new file mode 100644
index 000000000..021c2db34
Binary files /dev/null and b/git/test/fixtures/objects/7b/b839852ed5e3a069966281bb08d50012fb309b differ
diff --git a/git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx b/git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx
new file mode 100644
index 000000000..fda5969bc
Binary files /dev/null and b/git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx differ
diff --git a/git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack b/git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack
new file mode 100644
index 000000000..a3209d2be
Binary files /dev/null and b/git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack differ
diff --git a/git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx b/git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx
new file mode 100644
index 000000000..a7d6c7177
Binary files /dev/null and b/git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx differ
diff --git a/git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack b/git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack
new file mode 100644
index 000000000..955c424c7
Binary files /dev/null and b/git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack differ
diff --git a/git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx b/git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx
new file mode 100644
index 000000000..87c635f48
Binary files /dev/null and b/git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx differ
diff --git a/git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack b/git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack
new file mode 100644
index 000000000..a69b28ac6
Binary files /dev/null and b/git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack differ
diff --git a/git/test/lib/__init__.py b/git/test/lib/__init__.py
index 775127943..a06564383 100644
--- a/git/test/lib/__init__.py
+++ b/git/test/lib/__init__.py
@@ -5,9 +5,14 @@
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
import inspect
+# TODO: Separate names - they do repeat unfortunately. Also deduplicate it,
+# redesign decorators to support multiple database types in succession.
+from base import *
+
from mock import *
from asserts import *
from helper import *
+
__all__ = [ name for name, obj in locals().items()
if not (name.startswith('_') or inspect.ismodule(obj)) ]
diff --git a/git/test/lib/base.py b/git/test/lib/base.py
new file mode 100644
index 000000000..bc1607834
--- /dev/null
+++ b/git/test/lib/base.py
@@ -0,0 +1,200 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of PureCompatibilityGitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Utilities used in ODB testing"""
+from git.base import OStream
+from git.stream import (
+ Sha1Writer,
+ ZippedStoreShaWriter
+ )
+
+from git.util import (
+ zlib,
+ dirname
+ )
+
+import sys
+import random
+from array import array
+from cStringIO import StringIO
+
+import glob
+import unittest
+import tempfile
+import shutil
+import os
+import gc
+
+
+#{ Decorators
+
+def with_rw_directory(func):
+ """Create a temporary directory which can be written to, remove it if the
+ test suceeds, but leave it otherwise to aid additional debugging"""
+ def wrapper(self):
+ path = maketemp(prefix=func.__name__)
+ os.mkdir(path)
+ keep = False
+ try:
+ try:
+ return func(self, path)
+ except Exception:
+ print >> sys.stderr, "Test %s.%s failed, output is at %r" % (type(self).__name__, func.__name__, path)
+ keep = True
+ raise
+ finally:
+ # Need to collect here to be sure all handles have been closed. It appears
+ # a windows-only issue. In fact things should be deleted, as well as
+ # memory maps closed, once objects go out of scope. For some reason
+ # though this is not the case here unless we collect explicitly.
+ if not keep:
+ gc.collect()
+ shutil.rmtree(path)
+ # END handle exception
+ # END wrapper
+
+ wrapper.__name__ = func.__name__
+ return wrapper
+
+
+def with_rw_repo(func):
+ """Create a copy of our repository and put it into a writable location. It will
+ be removed if the test doesn't result in an error.
+ As we can currently only copy the fully working tree, tests must not rely on
+ being on a certain branch or on anything really except for the default tags
+ that should exist
+ Wrapped function obtains a git repository """
+ def wrapper(self, path):
+ src_dir = dirname(dirname(dirname(__file__)))
+ assert(os.path.isdir(path))
+ os.rmdir(path) # created by wrapper, but must not exist for copy operation
+ shutil.copytree(src_dir, path)
+ target_gitdir = os.path.join(path, '.git')
+ assert os.path.isdir(target_gitdir)
+ return func(self, self.RepoCls(target_gitdir))
+ #END wrapper
+ wrapper.__name__ = func.__name__
+ return with_rw_directory(wrapper)
+
+
+
+def with_packs_rw(func):
+ """Function that provides a path into which the packs for testing should be
+ copied. Will pass on the path to the actual function afterwards
+
+ :note: needs with_rw_directory wrapped around it"""
+ def wrapper(self, path):
+ src_pack_glob = fixture_path('packs/*')
+ print src_pack_glob
+ copy_files_globbed(src_pack_glob, path, hard_link_ok=True)
+ return func(self, path)
+ # END wrapper
+
+ wrapper.__name__ = func.__name__
+ return with_rw_directory(wrapper)
+
+#} END decorators
+
+#{ Routines
+
+def rorepo_dir():
+ """:return: path to our own repository, being our own .git directory.
+ :note: doesn't work in bare repositories"""
+ base = os.path.join(dirname(dirname(dirname(dirname(__file__)))), '.git')
+ assert os.path.isdir(base)
+ return base
+
+def maketemp(*args, **kwargs):
+ """Wrapper around default tempfile.mktemp to fix an osx issue"""
+ tdir = tempfile.mktemp(*args, **kwargs)
+ if sys.platform == 'darwin':
+ tdir = '/private' + tdir
+ return tdir
+
+def fixture_path(relapath=''):
+ """:return: absolute path into the fixture directory
+ :param relapath: relative path into the fixtures directory, or ''
+ to obtain the fixture directory itself"""
+ test_dir = os.path.dirname(os.path.dirname(__file__))
+ return os.path.join(test_dir, "fixtures", relapath)
+
+def fixture(name):
+ return open(fixture_path(name), 'rb').read()
+
+def absolute_project_path():
+ return os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
+
+def copy_files_globbed(source_glob, target_dir, hard_link_ok=False):
+ """Copy all files found according to the given source glob into the target directory
+ :param hard_link_ok: if True, hard links will be created if possible. Otherwise
+ the files will be copied"""
+ for src_file in glob.glob(source_glob):
+ if hard_link_ok and hasattr(os, 'link'):
+ target = os.path.join(target_dir, os.path.basename(src_file))
+ try:
+ os.link(src_file, target)
+ except OSError:
+ shutil.copy(src_file, target_dir)
+ # END handle cross device links ( and resulting failure )
+ else:
+ shutil.copy(src_file, target_dir)
+ # END try hard link
+ # END for each file to copy
+
+
+def make_bytes(size_in_bytes, randomize=False):
+ """:return: string with given size in bytes
+ :param randomize: try to produce a very random stream"""
+ actual_size = size_in_bytes / 4
+ producer = xrange(actual_size)
+ if randomize:
+ producer = list(producer)
+ random.shuffle(producer)
+ # END randomize
+ a = array('i', producer)
+ return a.tostring()
+
+def make_object(type, data):
+ """:return: bytes resembling an uncompressed object"""
+ odata = "blob %i\0" % len(data)
+ return odata + data
+
+def make_memory_file(size_in_bytes, randomize=False):
+ """:return: tuple(size_of_stream, stream)
+ :param randomize: try to produce a very random stream"""
+ d = make_bytes(size_in_bytes, randomize)
+ return len(d), StringIO(d)
+
+#} END routines
+
+#{ Stream Utilities
+
+class DummyStream(object):
+ def __init__(self):
+ self.was_read = False
+ self.bytes = 0
+ self.closed = False
+
+ def read(self, size):
+ self.was_read = True
+ self.bytes = size
+
+ def close(self):
+ self.closed = True
+
+ def _assert(self):
+ assert self.was_read
+
+
+class DeriveTest(OStream):
+ def __init__(self, sha, type, size, stream, *args, **kwargs):
+ self.myarg = kwargs.pop('myarg')
+ self.args = args
+
+ def _assert(self):
+ assert self.args
+ assert self.myarg
+
+#} END stream utilitiess
+
diff --git a/git/test/lib/helper.py b/git/test/lib/helper.py
index 76aaaa382..2045f9d39 100644
--- a/git/test/lib/helper.py
+++ b/git/test/lib/helper.py
@@ -12,26 +12,21 @@
import shutil
import cStringIO
-GIT_REPO = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+import warnings
+from nose import SkipTest
-__all__ = (
- 'fixture_path', 'fixture', 'absolute_project_path', 'StringProcessAdapter',
- 'with_rw_repo', 'with_rw_and_rw_remote_repo', 'TestBase', 'TestCase', 'GIT_REPO'
- )
-
-#{ Routines
+from base import (
+ maketemp,
+ rorepo_dir
+ )
-def fixture_path(name):
- test_dir = os.path.dirname(os.path.dirname(__file__))
- return os.path.join(test_dir, "fixtures", name)
-def fixture(name):
- return open(fixture_path(name), 'rb').read()
+__all__ = (
+ 'StringProcessAdapter', 'GlobalsItemDeletorMetaCls', 'InheritedTestMethodsOverrideWrapperMetaClsAutoMixin',
+ 'with_rw_repo', 'with_rw_and_rw_remote_repo', 'TestBase', 'TestCase', 'needs_module_or_skip'
+ )
-def absolute_project_path():
- return os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
-#} END routines
#{ Adapters
@@ -52,13 +47,6 @@ def wait(self):
#{ Decorators
-def _mktemp(*args):
- """Wrapper around default tempfile.mktemp to fix an osx issue"""
- tdir = tempfile.mktemp(*args)
- if sys.platform == 'darwin':
- tdir = '/private' + tdir
- return tdir
-
def _rmtree_onerror(osremove, fullpath, exec_info):
"""
Handle the case on windows that read-only files cannot be deleted by
@@ -87,7 +75,7 @@ def repo_creator(self):
if bare:
prefix = ''
#END handle prefix
- repo_dir = _mktemp("%sbare_%s" % (prefix, func.__name__))
+ repo_dir = maketemp("%sbare_%s" % (prefix, func.__name__))
rw_repo = self.rorepo.clone(repo_dir, shared=True, bare=bare, n=True)
rw_repo.head.commit = rw_repo.commit(working_tree_ref)
@@ -143,8 +131,8 @@ def case(self, rw_repo, rw_remote_repo)
assert isinstance(working_tree_ref, basestring), "Decorator requires ref name for working tree checkout"
def argument_passer(func):
def remote_repo_creator(self):
- remote_repo_dir = _mktemp("remote_repo_%s" % func.__name__)
- repo_dir = _mktemp("remote_clone_non_bare_repo")
+ remote_repo_dir = maketemp("remote_repo_%s" % func.__name__)
+ repo_dir = maketemp("remote_clone_non_bare_repo")
rw_remote_repo = self.rorepo.clone(remote_repo_dir, shared=True, bare=True)
rw_repo = rw_remote_repo.clone(repo_dir, shared=True, bare=False, n=True) # recursive alternates info ?
@@ -180,9 +168,11 @@ def remote_repo_creator(self):
except GitCommandError,e:
print str(e)
if os.name == 'nt':
- raise AssertionError('git-daemon needs to run this test, but windows does not have one. Otherwise, run: git-daemon "%s"'%tempfile.gettempdir())
+ raise AssertionError('git-daemon needs to run this test, but windows does not have one. Otherwise, run: git-daemon "%s"' % os.path.dirname(_mktemp()))
else:
- raise AssertionError('Please start a git-daemon to run this test, execute: git-daemon "%s"'%tempfile.gettempdir())
+ raise AssertionError('Please start a git-daemon to run this test, execute: git-daemon "%s"' % os.path.dirname(_mktemp()))
+ # END make assertion
+ #END catch ls remote error
# adjust working dir
prev_cwd = os.getcwd()
@@ -204,33 +194,108 @@ def remote_repo_creator(self):
return argument_passer
+def needs_module_or_skip(module):
+ """Decorator to be used for test cases only.
+ Print a warning if the given module could not be imported, and skip the test.
+ Otherwise run the test as usual
+ :param module: the name of the module to skip"""
+ def argpasser(func):
+ def wrapper(self, *args, **kwargs):
+ try:
+ __import__(module)
+ except ImportError:
+ msg = "Module %r is required to run this test - skipping" % module
+ warnings.warn(msg)
+ raise SkipTest(msg)
+ #END check import
+ return func(self, *args, **kwargs)
+ #END wrapper
+ wrapper.__name__ = func.__name__
+ return wrapper
+ #END argpasser
+ return argpasser
+
#} END decorators
+
+#{ Meta Classes
+class GlobalsItemDeletorMetaCls(type):
+ """Utiltiy to prevent the RepoBase to be picked up by nose as the metacls
+ will delete the instance from the globals"""
+ #{ Configuration
+ # Set this to a string name of the module to delete
+ ModuleToDelete = None
+ #} END configuration
+
+ def __new__(metacls, name, bases, clsdict):
+ assert metacls.ModuleToDelete is not None, "Invalid metaclass configuration"
+ new_type = super(GlobalsItemDeletorMetaCls, metacls).__new__(metacls, name, bases, clsdict)
+ if name != metacls.ModuleToDelete:
+ mod = __import__(new_type.__module__, globals(), locals(), new_type.__module__)
+ try:
+ delattr(mod, metacls.ModuleToDelete)
+ except AttributeError:
+ pass
+ #END skip case that people import our base without actually using it
+ #END handle deletion
+ return new_type
+
+
+class InheritedTestMethodsOverrideWrapperMetaClsAutoMixin(object):
+ """Automatically picks up the actual metaclass of the the type to be created,
+ that is the one inherited by one of the bases, and patch up its __new__ to use
+ the InheritedTestMethodsOverrideWrapperInstanceDecorator with our configured decorator"""
+
+ #{ Configuration
+ # decorator function to use when wrapping the inherited methods. Put it into a list as first member
+ # to hide it from being created as class method
+ decorator = []
+ #}END configuration
+
+ @classmethod
+ def _find_metacls(metacls, bases):
+ """emulate pythons lookup"""
+ mcls_attr = '__metaclass__'
+ for base in bases:
+ if hasattr(base, mcls_attr):
+ return getattr(base, mcls_attr)
+ return metacls._find_metacls(base.__bases__)
+ #END for each base
+ raise AssertionError("base class had not metaclass attached")
+
+ @classmethod
+ def _patch_methods_recursive(metacls, bases, clsdict):
+ """depth-first patching of methods"""
+ for base in bases:
+ metacls._patch_methods_recursive(base.__bases__, clsdict)
+ for name, item in base.__dict__.iteritems():
+ if not name.startswith('test_'):
+ continue
+ #END skip non-tests
+ clsdict[name] = metacls.decorator[0](item)
+ #END for each item
+ #END for each base
+
+ def __new__(metacls, name, bases, clsdict):
+ assert metacls.decorator, "'decorator' member needs to be set in subclass"
+ base_metacls = metacls._find_metacls(bases)
+ metacls._patch_methods_recursive(bases, clsdict)
+ return base_metacls.__new__(base_metacls, name, bases, clsdict)
+
+#} END meta classes
class TestBase(TestCase):
"""
Base Class providing default functionality to all tests such as:
-
- Utility functions provided by the TestCase base of the unittest method such as::
self.fail("todo")
self.failUnlessRaises(...)
-
- - Class level repository which is considered read-only as it is shared among
- all test cases in your type.
- Access it using::
- self.rorepo # 'ro' stands for read-only
-
- The rorepo is in fact your current project's git repo. If you refer to specific
- shas for your objects, be sure you choose some that are part of the immutable portion
- of the project history ( to assure tests don't fail for others ).
"""
@classmethod
def setUpAll(cls):
- """
- Dynamically add a read-only repository to our actual type. This way
- each test type has its own repository
- """
- cls.rorepo = Repo(GIT_REPO)
+ """This method is only called to provide the most basic functionality
+ Subclasses may just override it or implement it differently"""
+ cls.rorepo = Repo(rorepo_dir())
def _make_file(self, rela_path, data, repo=None):
"""
diff --git a/git/test/objects/__init__.py b/git/test/objects/__init__.py
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/git/test/objects/__init__.py
@@ -0,0 +1 @@
+
diff --git a/git/test/objects/lib.py b/git/test/objects/lib.py
new file mode 100644
index 000000000..fe1d9f9db
--- /dev/null
+++ b/git/test/objects/lib.py
@@ -0,0 +1,14 @@
+"""Provide customized obhject testing facilities"""
+
+from git.test.lib import (
+ rorepo_dir,
+ TestBase,
+ assert_equal,
+ assert_not_equal,
+ with_rw_repo,
+ StringProcessAdapter,
+ )
+
+class TestObjectBase(TestBase):
+ """Provides a default read-only repository in the rorepo member"""
+ pass
diff --git a/git/test/test_blob.py b/git/test/objects/test_blob.py
similarity index 86%
rename from git/test/test_blob.py
rename to git/test/objects/test_blob.py
index 661c05014..58ac25b74 100644
--- a/git/test/test_blob.py
+++ b/git/test/objects/test_blob.py
@@ -4,11 +4,11 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from git.test.lib import *
-from git import *
-from gitdb.util import hex_to_bin
+from lib import *
+from git.objects.blob import *
+from git.util import hex_to_bin
-class TestBlob(TestBase):
+class TestBlob(TestObjectBase):
def test_mime_type_should_return_mime_type_for_known_types(self):
blob = Blob(self.rorepo, **{'binsha': Blob.NULL_BIN_SHA, 'path': 'foo.png'})
diff --git a/git/test/test_commit.py b/git/test/objects/test_commit.py
similarity index 98%
rename from git/test/test_commit.py
rename to git/test/objects/test_commit.py
index 4a8d8b878..80326fe9a 100644
--- a/git/test/test_commit.py
+++ b/git/test/objects/test_commit.py
@@ -5,10 +5,14 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from git.test.lib import *
-from git import *
-from gitdb import IStream
-from gitdb.util import hex_to_bin
+from lib import *
+from git.objects.commit import *
+from git.base import IStream
+
+from git.util import (
+ hex_to_bin,
+ Actor,
+ )
from cStringIO import StringIO
import time
@@ -65,7 +69,7 @@ def assert_commit_serialization(rwrepo, commit_id, print_performance_info=False)
# END handle performance info
-class TestCommit(TestBase):
+class TestCommit(TestObjectBase):
def test_bake(self):
diff --git a/git/test/test_submodule.py b/git/test/objects/test_submodule.py
similarity index 83%
rename from git/test/test_submodule.py
rename to git/test/objects/test_submodule.py
index adb4fb821..2b7c7f407 100644
--- a/git/test/test_submodule.py
+++ b/git/test/objects/test_submodule.py
@@ -1,29 +1,65 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from git.test.lib import *
+from lib import *
from git.exc import *
from git.objects.submodule.base import Submodule
from git.objects.submodule.root import RootModule, RootUpdateProgress
from git.util import to_native_path_linux, join_path_native
+
import shutil
import git
+import sys
import os
+import sys
+
+# Change the configuration if possible to prevent the underlying memory manager
+# to keep file handles open. On windows we get problems as they are not properly
+# closed due to mmap bugs on windows (as it appears)
+if sys.platform == 'win32':
+ try:
+ import smmap.util
+ smmap.util.MapRegion._test_read_into_memory = True
+ except ImportError:
+ sys.stderr.write("The submodule tests will fail as some files cannot be removed due to open file handles.\n")
+ sys.stderr.write("The latest version of gitdb uses a memory map manager which can be configured to work around this problem")
+#END handle windows platform
+
class TestRootProgress(RootUpdateProgress):
"""Just prints messages, for now without checking the correctness of the states"""
- def update(self, op, index, max_count, message=''):
+ def update(self, op, index, max_count, message='', input=''):
print message
prog = TestRootProgress()
-class TestSubmodule(TestBase):
+class TestSubmodule(TestObjectBase):
k_subm_current = "468cad66ff1f80ddaeee4123c24e4d53a032c00d"
k_subm_changed = "394ed7006ee5dc8bddfd132b64001d5dfc0ffdd3"
k_no_subm_tag = "0.1.6"
+ k_github_gitdb_url = 'git://github.com/gitpython-developers/gitdb.git'
+ env_gitdb_local_path = "GITPYTHON_TEST_GITDB_LOCAL_PATH"
+ def _generate_async_local_path(self):
+ return to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, 'git/ext/async'))
+
+ def _rewrite_gitdb_to_local_path(self, smgitdb):
+ """Rewrites the given submodule to point to the local path of the gitdb repository, if possible.
+ Otherwise it leaves it unchanged
+ :return: new clone path, or None if no new path was set"""
+ new_smclone_path = os.environ.get(self.env_gitdb_local_path)
+ if new_smclone_path is not None:
+ writer = smgitdb.config_writer()
+ writer.set_value('url', new_smclone_path)
+ del(writer)
+ assert smgitdb.config_reader().get_value('url') == new_smclone_path
+ assert smgitdb.url == new_smclone_path
+ else:
+ sys.stderr.write("Submodule tests need the gitdb repository. You can specify a local source setting the %s environment variable. Otherwise it will be downloaded from the internet" % self.env_gitdb_local_path)
+ #END handle submodule path
+ return new_smclone_path
def _do_base_tests(self, rwrepo):
"""Perform all tests in the given repository, it may be bare or nonbare"""
@@ -42,7 +78,7 @@ def _do_base_tests(self, rwrepo):
assert sm.path == 'git/ext/gitdb'
assert sm.path != sm.name # in our case, we have ids there, which don't equal the path
- assert sm.url == 'git://github.com/gitpython-developers/gitdb.git'
+ assert sm.url == self.k_github_gitdb_url
assert sm.branch_path == 'refs/heads/master' # the default ...
assert sm.branch_name == 'master'
assert sm.parent_commit == rwrepo.head.commit
@@ -73,13 +109,11 @@ def _do_base_tests(self, rwrepo):
if rwrepo.bare:
self.failUnlessRaises(InvalidGitRepositoryError, sm.config_writer)
else:
- writer = sm.config_writer()
# for faster checkout, set the url to the local path
- new_smclone_path = to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, sm.path))
- writer.set_value('url', new_smclone_path)
- del(writer)
- assert sm.config_reader().get_value('url') == new_smclone_path
- assert sm.url == new_smclone_path
+ # Note: This is nice but doesn't work anymore with the latest git-python
+ # version. This would also mean we need internet for this to work which
+ # is why we allow an override using an environment variable
+ new_smclone_path = self._rewrite_gitdb_to_local_path(sm)
# END handle bare repo
smold.config_reader()
@@ -175,7 +209,8 @@ def _do_base_tests(self, rwrepo):
csm_repopath = csm.path
# adjust the path of the submodules module to point to the local destination
- new_csmclone_path = to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, sm.path, csm.path))
+ # In the current gitpython version, async is used directly by gitpython
+ new_csmclone_path = self._generate_async_local_path()
csm.config_writer().set_value('url', new_csmclone_path)
assert csm.url == new_csmclone_path
@@ -247,6 +282,12 @@ def _do_base_tests(self, rwrepo):
self.failUnlessRaises(InvalidGitRepositoryError, sm.remove, dry_run=True)
sm.module().index.reset(working_tree=True)
+ # make sure sub-submodule is not modified by forcing it to update
+ # to the revision it is supposed to point to.
+ for subitem in sm.traverse():
+ subitem.update()
+ #END checkout to right commit
+
# this would work
assert sm.remove(dry_run=True) is sm
assert sm.module_exists()
@@ -259,10 +300,11 @@ def _do_base_tests(self, rwrepo):
self.failUnlessRaises(InvalidGitRepositoryError, sm.remove)
# forcibly delete the child repository
+ prev_count = len(sm.children())
assert csm.remove(force=True) is csm
assert not csm.exists()
assert not csm.module_exists()
- assert len(sm.children()) == 0
+ assert len(sm.children()) == prev_count - 1
# now we have a changed index, as configuration was altered.
# fix this
sm.module().index.reset(working_tree=True)
@@ -326,8 +368,9 @@ def _do_base_tests(self, rwrepo):
abspmp = nsm.abspath
assert nsm.move(nmp) is nsm
nmp = nmp[:-1] # cut last /
- assert nsm.path == nmp
- assert rwrepo.submodules[0].path == nmp
+ nmpl = to_native_path_linux(nmp)
+ assert nsm.path == nmpl
+ assert rwrepo.submodules[0].path == nmpl
mpath = 'newsubmodule'
absmpath = join_path_native(rwrepo.working_tree_dir, mpath)
@@ -368,15 +411,17 @@ def test_base_bare(self, rwrepo):
@with_rw_repo(k_subm_current, bare=False)
def test_root_module(self, rwrepo):
# Can query everything without problems
- rm = RootModule(self.rorepo)
- assert rm.module() is self.rorepo
+ rm = RootModule(rwrepo)
+ # test new constructor
+ assert rm.parent_commit == RootModule(self.rorepo, self.rorepo.commit(self.k_subm_current)).parent_commit
+ assert rm.module() is rwrepo
# try attributes
rm.binsha
rm.mode
rm.path
assert rm.name == rm.k_root_name
- assert rm.parent_commit == self.rorepo.head.commit
+ assert rm.parent_commit == self.rorepo.commit(self.k_subm_current)
rm.url
rm.branch
@@ -384,9 +429,9 @@ def test_root_module(self, rwrepo):
rm.config_reader()
rm.config_writer()
- # deep traversal gitdb / async
+ # deep traversal git / async
rsmsp = [sm.path for sm in rm.traverse()]
- assert len(rsmsp) == 2 # gitdb and async, async being a child of gitdb
+ assert len(rsmsp) == 1 # gitdb only - its not yet uptodate so it has no submodule
# cannot set the parent commit as root module's path didn't exist
self.failUnlessRaises(ValueError, rm.set_parent_commit, 'HEAD')
@@ -406,8 +451,8 @@ def test_root_module(self, rwrepo):
prep = sm.path
assert not sm.module_exists() # was never updated after rwrepo's clone
- # assure we clone from a local source
- sm.config_writer().set_value('url', to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, sm.path)))
+ # assure we clone from a local source
+ self._rewrite_gitdb_to_local_path(sm)
# dry-run does nothing
sm.update(recursive=False, dry_run=True, progress=prog)
@@ -440,7 +485,7 @@ def test_root_module(self, rwrepo):
#================
nsmn = "newsubmodule"
nsmp = "submrepo"
- async_url = to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, rsmsp[0], rsmsp[1]))
+ async_url = self._generate_async_local_path()
nsm = Submodule.add(rwrepo, nsmn, nsmp, url=async_url)
csmadded = rwrepo.index.commit("Added submodule").hexsha # make sure we don't keep the repo reference
nsm.set_parent_commit(csmadded)
@@ -482,7 +527,11 @@ def test_root_module(self, rwrepo):
# to the first repository, this way we have a fast checkout, and a completely different
# repository at the different url
nsm.set_parent_commit(csmremoved)
- nsmurl = to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, rsmsp[0]))
+ nsmurl = os.environ.get(self.env_gitdb_local_path, self.k_github_gitdb_url)
+
+ # Note: We would have liked to have a different url, but we cannot
+ # provoke this case
+ assert nsm.url != nsmurl
nsm.config_writer().set_value('url', nsmurl)
csmpathchange = rwrepo.index.commit("changed url")
nsm.set_parent_commit(csmpathchange)
@@ -535,13 +584,13 @@ def test_root_module(self, rwrepo):
# =================
# finally we recursively update a module, just to run the code at least once
# remove the module so that it has more work
- assert len(nsm.children()) == 1
- assert nsm.exists() and nsm.module_exists() and len(nsm.children()) == 1
+ assert len(nsm.children()) >= 1 # could include smmap
+ assert nsm.exists() and nsm.module_exists() and len(nsm.children()) >= 1
# assure we pull locally only
nsmc = nsm.children()[0]
nsmc.config_writer().set_value('url', async_url)
rm.update(recursive=True, progress=prog, dry_run=True) # just to run the code
rm.update(recursive=True, progress=prog)
- assert len(nsm.children()) == 1 and nsmc.module_exists()
+ assert len(nsm.children()) >= 2 and nsmc.module_exists()
diff --git a/git/test/test_tree.py b/git/test/objects/test_tree.py
similarity index 97%
rename from git/test/test_tree.py
rename to git/test/objects/test_tree.py
index ec10e962a..bc8d3f973 100644
--- a/git/test/test_tree.py
+++ b/git/test/objects/test_tree.py
@@ -4,16 +4,18 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-import os
-from git.test.lib import *
-from git import *
+
+from lib import *
from git.objects.fun import (
traverse_tree_recursive,
traverse_trees_recursive
)
+from git.objects.blob import Blob
+from git.objects.tree import Tree
from cStringIO import StringIO
+import os
-class TestTree(TestBase):
+class TestTree(TestObjectBase):
def test_serializable(self):
# tree at the given commit contains a submodule as well
diff --git a/git/test/performance/__init__.py b/git/test/performance/__init__.py
new file mode 100644
index 000000000..6bd117b96
--- /dev/null
+++ b/git/test/performance/__init__.py
@@ -0,0 +1,2 @@
+"""Note: This init file makes the performance tests an integral part of the test suite
+as nose will now pick them up. Previously the init file was intentionally omitted"""
diff --git a/git/test/performance/db/__init__.py b/git/test/performance/db/__init__.py
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/git/test/performance/db/__init__.py
@@ -0,0 +1 @@
+
diff --git a/git/test/performance/db/looseodb_impl.py b/git/test/performance/db/looseodb_impl.py
new file mode 100644
index 000000000..6d3c1fa60
--- /dev/null
+++ b/git/test/performance/db/looseodb_impl.py
@@ -0,0 +1,132 @@
+"""Performance data streaming performance"""
+from git.db.py import *
+from git.base import *
+from git.stream import *
+from async import ChannelThreadTask
+from git.util import (
+ pool,
+ bin_to_hex
+ )
+import os
+import sys
+from time import time
+
+from git.test.lib import (
+ GlobalsItemDeletorMetaCls,
+ make_memory_file,
+ with_rw_repo
+ )
+
+from git.test.performance.lib import TestBigRepoR
+
+
+#{ Utilities
+
+def read_chunked_stream(stream):
+ total = 0
+ while True:
+ chunk = stream.read(chunk_size)
+ total += len(chunk)
+ if len(chunk) < chunk_size:
+ break
+ # END read stream loop
+ assert total == stream.size
+ return stream
+
+
+class TestStreamReader(ChannelThreadTask):
+ """Expects input streams and reads them in chunks. It will read one at a time,
+ requireing a queue chunk of size 1"""
+ def __init__(self, *args):
+ super(TestStreamReader, self).__init__(*args)
+ self.fun = read_chunked_stream
+ self.max_chunksize = 1
+
+
+#} END utilities
+
+class PerfBaseDeletorMetaClass(GlobalsItemDeletorMetaCls):
+ ModuleToDelete = 'TestLooseDBWPerformanceBase'
+
+
+class TestLooseDBWPerformanceBase(TestBigRepoR):
+ __metaclass__ = PerfBaseDeletorMetaClass
+
+ large_data_size_bytes = 1000*1000*10 # some MiB should do it
+ moderate_data_size_bytes = 1000*1000*1 # just 1 MiB
+
+ #{ Configuration
+ LooseODBCls = None
+ #} END configuration
+
+ @classmethod
+ def setUpAll(cls):
+ super(TestLooseDBWPerformanceBase, cls).setUpAll()
+ if cls.LooseODBCls is None:
+ raise AssertionError("LooseODBCls must be set in subtype")
+ #END assert configuration
+ # currently there is no additional configuration
+
+ @with_rw_repo("HEAD")
+ def test_large_data_streaming(self, rwrepo):
+ # TODO: This part overlaps with the same file in git.test.performance.test_stream
+ # It should be shared if possible
+ objects_path = rwrepo.db_path('')
+ ldb = self.LooseODBCls(objects_path)
+
+ for randomize in range(2):
+ desc = (randomize and 'random ') or ''
+ print >> sys.stderr, "Creating %s data ..." % desc
+ st = time()
+ size, stream = make_memory_file(self.large_data_size_bytes, randomize)
+ elapsed = time() - st
+ print >> sys.stderr, "Done (in %f s)" % elapsed
+
+ # writing - due to the compression it will seem faster than it is
+ st = time()
+ binsha = ldb.store(IStream('blob', size, stream)).binsha
+ elapsed_add = time() - st
+ assert ldb.has_object(binsha)
+ hexsha = bin_to_hex(binsha)
+ db_file = os.path.join(objects_path, hexsha[:2], hexsha[2:])
+ fsize_kib = os.path.getsize(db_file) / 1000
+
+
+ size_kib = size / 1000
+ print >> sys.stderr, "%s: Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (self.LooseODBCls.__name__, size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add)
+
+ # reading all at once
+ st = time()
+ ostream = ldb.stream(binsha)
+ shadata = ostream.read()
+ elapsed_readall = time() - st
+
+ stream.seek(0)
+ assert shadata == stream.getvalue()
+ print >> sys.stderr, "%s: Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (self.LooseODBCls.__name__, size_kib, desc, elapsed_readall, size_kib / elapsed_readall)
+
+
+ # reading in chunks of 1 MiB
+ cs = 512*1000
+ chunks = list()
+ st = time()
+ ostream = ldb.stream(binsha)
+ while True:
+ data = ostream.read(cs)
+ chunks.append(data)
+ if len(data) < cs:
+ break
+ # END read in chunks
+ elapsed_readchunks = time() - st
+
+ stream.seek(0)
+ assert ''.join(chunks) == stream.getvalue()
+
+ cs_kib = cs / 1000
+ print >> sys.stderr, "%s: Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (self.LooseODBCls.__name__, size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks)
+
+ # del db file so git has something to do
+ os.remove(db_file)
+ # END for each randomization factor
+
+
diff --git a/git/test/performance/db/odb_impl.py b/git/test/performance/db/odb_impl.py
new file mode 100644
index 000000000..677cf6a85
--- /dev/null
+++ b/git/test/performance/db/odb_impl.py
@@ -0,0 +1,72 @@
+"""Performance tests for object store"""
+
+from time import time
+import sys
+import stat
+
+from git.test.performance.lib import (
+ TestBigRepoR,
+ GlobalsItemDeletorMetaCls
+ )
+
+class PerfBaseDeletorMetaClass(GlobalsItemDeletorMetaCls):
+ ModuleToDelete = 'TestObjDBPerformanceBase'
+
+
+class TestObjDBPerformanceBase(TestBigRepoR):
+ __metaclass__ = PerfBaseDeletorMetaClass
+
+ #{ Configuration
+ RepoCls = None # to be set by subclass
+ #} END configuration
+
+ def test_random_access_test(self):
+ repo = self.rorepo
+
+ # GET COMMITS
+ st = time()
+ root_commit = repo.commit(self.head_sha_2k)
+ commits = list(root_commit.traverse())
+ nc = len(commits)
+ elapsed = time() - st
+
+ print >> sys.stderr, "%s: Retrieved %i commits from ObjectStore in %g s ( %f commits / s )" % (type(repo.odb), nc, elapsed, nc / elapsed)
+
+ # GET TREES
+ # walk all trees of all commits
+ st = time()
+ blobs_per_commit = list()
+ nt = 0
+ for commit in commits:
+ tree = commit.tree
+ blobs = list()
+ for item in tree.traverse():
+ nt += 1
+ if item.type == 'blob':
+ blobs.append(item)
+ # direct access for speed
+ # END while trees are there for walking
+ blobs_per_commit.append(blobs)
+ # END for each commit
+ elapsed = time() - st
+
+ print >> sys.stderr, "%s: Retrieved %i objects from %i commits in %g s ( %f objects / s )" % (type(repo.odb), nt, len(commits), elapsed, nt / elapsed)
+
+ # GET BLOBS
+ st = time()
+ nb = 0
+ too_many = 15000
+ data_bytes = 0
+ for blob_list in blobs_per_commit:
+ for blob in blob_list:
+ data_bytes += len(blob.data_stream.read())
+ # END for each blobsha
+ nb += len(blob_list)
+ if nb > too_many:
+ break
+ # END for each bloblist
+ elapsed = time() - st
+
+ print >> sys.stderr, "%s: Retrieved %i blob (%i KiB) and their data in %g s ( %f blobs / s, %f KiB / s )" % (type(repo.odb), nb, data_bytes/1000, elapsed, nb / elapsed, (data_bytes / 1000) / elapsed)
+
+
diff --git a/git/test/performance/db/packedodb_impl.py b/git/test/performance/db/packedodb_impl.py
new file mode 100644
index 000000000..b95a8d13f
--- /dev/null
+++ b/git/test/performance/db/packedodb_impl.py
@@ -0,0 +1,107 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Performance tests for object store"""
+from git.test.performance.lib import (
+ TestBigRepoR,
+ GlobalsItemDeletorMetaCls
+ )
+
+from git.exc import UnsupportedOperation
+
+import sys
+import os
+from time import time
+import random
+
+
+class PerfBaseDeletorMetaClass(GlobalsItemDeletorMetaCls):
+ ModuleToDelete = 'TestPurePackedODBPerformanceBase'
+
+class TestPurePackedODBPerformanceBase(TestBigRepoR):
+ __metaclass__ = PerfBaseDeletorMetaClass
+
+ #{ Configuration
+ PackedODBCls = None
+ #} END configuration
+
+ @classmethod
+ def setUpAll(cls):
+ super(TestPurePackedODBPerformanceBase, cls).setUpAll()
+ if cls.PackedODBCls is None:
+ raise AssertionError("PackedODBCls must be set in subclass")
+ #END assert configuration
+ cls.ropdb = cls.PackedODBCls(cls.rorepo.db_path("pack"))
+
+ def test_pack_random_access(self):
+ pdb = self.ropdb
+
+ # sha lookup
+ st = time()
+ sha_list = list(pdb.sha_iter())
+ elapsed = time() - st
+ ns = len(sha_list)
+ print >> sys.stderr, "PDB: looked up %i shas by index in %f s ( %f shas/s )" % (ns, elapsed, ns / elapsed)
+
+ # sha lookup: best-case and worst case access
+ pdb_pack_info = pdb._pack_info
+ # END shuffle shas
+ st = time()
+ for sha in sha_list:
+ pdb_pack_info(sha)
+ # END for each sha to look up
+ elapsed = time() - st
+
+ # discard cache
+ del(pdb._entities)
+ pdb.entities()
+ print >> sys.stderr, "PDB: looked up %i sha in %i packs in %f s ( %f shas/s )" % (ns, len(pdb.entities()), elapsed, ns / elapsed)
+ # END for each random mode
+
+ # query info and streams only
+ max_items = 10000 # can wait longer when testing memory
+ for pdb_fun in (pdb.info, pdb.stream):
+ st = time()
+ for sha in sha_list[:max_items]:
+ pdb_fun(sha)
+ elapsed = time() - st
+ print >> sys.stderr, "PDB: Obtained %i object %s by sha in %f s ( %f items/s )" % (max_items, pdb_fun.__name__.upper(), elapsed, max_items / elapsed)
+ # END for each function
+
+ # retrieve stream and read all
+ max_items = 5000
+ pdb_stream = pdb.stream
+ total_size = 0
+ st = time()
+ for sha in sha_list[:max_items]:
+ stream = pdb_stream(sha)
+ stream.read()
+ total_size += stream.size
+ elapsed = time() - st
+ total_kib = total_size / 1000
+ print >> sys.stderr, "PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (max_items, total_kib, total_kib/elapsed , elapsed, max_items / elapsed)
+
+ def test_correctness(self):
+ pdb = self.ropdb
+ # disabled for now as it used to work perfectly, checking big repositories takes a long time
+ print >> sys.stderr, "Endurance run: verify streaming of objects (crc and sha)"
+ for crc in range(2):
+ count = 0
+ st = time()
+ for entity in pdb.entities():
+ pack_verify = entity.is_valid_stream
+ sha_by_index = entity.index().sha
+ for index in xrange(entity.index().size()):
+ try:
+ assert pack_verify(sha_by_index(index), use_crc=crc)
+ count += 1
+ except UnsupportedOperation:
+ pass
+ # END ignore old indices
+ # END for each index
+ # END for each entity
+ elapsed = time() - st
+ print >> sys.stderr, "PDB: verified %i objects (crc=%i) in %f s ( %f objects/s )" % (count, crc, elapsed, count / elapsed)
+ # END for each verify mode
+
diff --git a/git/test/performance/db/test_looseodb_cmd.py b/git/test/performance/db/test_looseodb_cmd.py
new file mode 100644
index 000000000..9738278c5
--- /dev/null
+++ b/git/test/performance/db/test_looseodb_cmd.py
@@ -0,0 +1,11 @@
+from git.db.complex import CmdCompatibilityGitDB
+from looseodb_impl import TestLooseDBWPerformanceBase
+
+import sys
+
+class TestCmdLooseDB(TestLooseDBWPerformanceBase):
+ LooseODBCls = CmdCompatibilityGitDB
+
+ def test_info(self):
+ sys.stderr.write("This test does not check the write performance of the git command as it is implemented in pure python")
+
diff --git a/git/test/performance/db/test_looseodb_dulwich.py b/git/test/performance/db/test_looseodb_dulwich.py
new file mode 100644
index 000000000..e123ebf19
--- /dev/null
+++ b/git/test/performance/db/test_looseodb_dulwich.py
@@ -0,0 +1,13 @@
+try:
+ from git.db.dulwich.complex import DulwichGitODB
+except ImportError:
+ from git.db.py.complex import PureGitODB as DulwichGitODB
+#END handle import
+
+from git.test.db.dulwich.lib import DulwichRequiredMetaMixin
+from looseodb_impl import TestLooseDBWPerformanceBase
+
+class TestPureLooseDB(TestLooseDBWPerformanceBase):
+ __metaclass__ = DulwichRequiredMetaMixin
+ LooseODBCls = DulwichGitODB
+
diff --git a/git/test/performance/db/test_looseodb_pure.py b/git/test/performance/db/test_looseodb_pure.py
new file mode 100644
index 000000000..46f39d5ea
--- /dev/null
+++ b/git/test/performance/db/test_looseodb_pure.py
@@ -0,0 +1,6 @@
+from git.db.py.loose import PureLooseObjectODB
+from looseodb_impl import TestLooseDBWPerformanceBase
+
+class TestPureLooseDB(TestLooseDBWPerformanceBase):
+ LooseODBCls = PureLooseObjectODB
+
diff --git a/git/test/performance/db/test_looseodb_pygit2.py b/git/test/performance/db/test_looseodb_pygit2.py
new file mode 100644
index 000000000..326af9fb4
--- /dev/null
+++ b/git/test/performance/db/test_looseodb_pygit2.py
@@ -0,0 +1,13 @@
+try:
+ from git.db.pygit2.complex import Pygit2GitODB
+except ImportError:
+ from git.db.py.complex import PureGitODB as Pygit2GitODB
+#END handle import
+
+from git.test.db.pygit2.lib import Pygit2RequiredMetaMixin
+from looseodb_impl import TestLooseDBWPerformanceBase
+
+class TestPureLooseDB(TestLooseDBWPerformanceBase):
+ __metaclass__ = Pygit2RequiredMetaMixin
+ LooseODBCls = Pygit2GitODB
+
diff --git a/git/test/performance/db/test_odb_cmd.py b/git/test/performance/db/test_odb_cmd.py
new file mode 100644
index 000000000..acd55cc90
--- /dev/null
+++ b/git/test/performance/db/test_odb_cmd.py
@@ -0,0 +1,6 @@
+from git.db.complex import CmdCompatibilityGitDB
+from odb_impl import TestObjDBPerformanceBase
+
+class TestCmdDB(TestObjDBPerformanceBase):
+ RepoCls = CmdCompatibilityGitDB
+
diff --git a/git/test/performance/db/test_odb_dulwich.py b/git/test/performance/db/test_odb_dulwich.py
new file mode 100644
index 000000000..6802483c3
--- /dev/null
+++ b/git/test/performance/db/test_odb_dulwich.py
@@ -0,0 +1,13 @@
+try:
+ from git.db.dulwich.complex import DulwichCompatibilityGitDB
+except ImportError:
+ from git.db.complex import PureCompatibilityGitDB as DulwichCompatibilityGitDB
+#END handle dulwich compatibility
+
+from git.test.db.dulwich.lib import DulwichRequiredMetaMixin
+from odb_impl import TestObjDBPerformanceBase
+
+class TestDulwichDB(TestObjDBPerformanceBase):
+ __metaclass__ = DulwichRequiredMetaMixin
+ RepoCls = DulwichCompatibilityGitDB
+
diff --git a/git/test/performance/db/test_odb_pure.py b/git/test/performance/db/test_odb_pure.py
new file mode 100644
index 000000000..6ed3585d4
--- /dev/null
+++ b/git/test/performance/db/test_odb_pure.py
@@ -0,0 +1,6 @@
+from git.db.complex import PureCompatibilityGitDB
+from odb_impl import TestObjDBPerformanceBase
+
+class TestPureDB(TestObjDBPerformanceBase):
+ RepoCls = PureCompatibilityGitDB
+
diff --git a/git/test/performance/db/test_odb_pygit2.py b/git/test/performance/db/test_odb_pygit2.py
new file mode 100644
index 000000000..bb7ed8a92
--- /dev/null
+++ b/git/test/performance/db/test_odb_pygit2.py
@@ -0,0 +1,13 @@
+try:
+ from git.db.pygit2.complex import Pygit2CompatibilityGitDB
+except ImportError:
+ from git.db.complex import PureCompatibilityGitDB as Pygit2CompatibilityGitDB
+#END handle pygit2 compatibility
+
+from git.test.db.pygit2.lib import Pygit2RequiredMetaMixin
+from odb_impl import TestObjDBPerformanceBase
+
+class TestPygit2DB(TestObjDBPerformanceBase):
+ __metaclass__ = Pygit2RequiredMetaMixin
+ RepoCls = Pygit2CompatibilityGitDB
+
diff --git a/git/test/performance/db/test_packedodb_pure.py b/git/test/performance/db/test_packedodb_pure.py
new file mode 100644
index 000000000..11497d9d1
--- /dev/null
+++ b/git/test/performance/db/test_packedodb_pure.py
@@ -0,0 +1,89 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from packedodb_impl import TestPurePackedODBPerformanceBase
+from git.db.py.pack import PurePackedODB
+
+from git.stream import NullStream
+
+from git.pack import PackEntity
+
+import os
+import sys
+
+from time import time
+from nose import SkipTest
+
+
+class CountedNullStream(NullStream):
+ __slots__ = '_bw'
+ def __init__(self):
+ self._bw = 0
+
+ def bytes_written(self):
+ return self._bw
+
+ def write(self, d):
+ self._bw += NullStream.write(self, d)
+
+
+class TestPurePackedODB(TestPurePackedODBPerformanceBase):
+ #{ Configuration
+ PackedODBCls = PurePackedODB
+ #} END configuration
+
+ def test_pack_writing_note(self):
+ sys.stderr.write("test_pack_writing should be adjusted to support different databases to read from - see test for more info")
+ raise SkipTest()
+
+ def test_pack_writing(self):
+ # see how fast we can write a pack from object streams.
+ # This will not be fast, as we take time for decompressing the streams as well
+ # For now we test the fast streaming and slow streaming versions manually
+ ostream = CountedNullStream()
+ # NOTE: We use the same repo twice to see whether OS caching helps
+ for rorepo in (self.rorepo, self.rorepo, self.ropdb):
+
+ ni = 5000
+ count = 0
+ total_size = 0
+ st = time()
+ for sha in rorepo.sha_iter():
+ count += 1
+ rorepo.stream(sha)
+ if count == ni:
+ break
+ #END gather objects for pack-writing
+ elapsed = time() - st
+ print >> sys.stderr, "PDB Streaming: Got %i streams from %s by sha in in %f s ( %f streams/s )" % (count, rorepo.__class__.__name__, elapsed, count / elapsed)
+
+ st = time()
+ PackEntity.write_pack((rorepo.stream(sha) for sha in rorepo.sha_iter()), ostream.write, object_count=ni)
+ elapsed = time() - st
+ total_kb = ostream.bytes_written() / 1000
+ print >> sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (total_kb, elapsed, total_kb/elapsed)
+ #END for each rorepo
+
+
+ def test_stream_reading(self):
+ raise SkipTest("This test was only used for --with-profile runs")
+ pdb = self.ropdb
+
+ # streaming only, meant for --with-profile runs
+ ni = 5000
+ count = 0
+ pdb_stream = pdb.stream
+ total_size = 0
+ st = time()
+ for sha in pdb.sha_iter():
+ if count == ni:
+ break
+ stream = pdb_stream(sha)
+ stream.read()
+ total_size += stream.size
+ count += 1
+ elapsed = time() - st
+ total_kib = total_size / 1000
+ print >> sys.stderr, "PDB Streaming: Got %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (ni, total_kib, total_kib/elapsed , elapsed, ni / elapsed)
+
diff --git a/git/test/performance/lib.py b/git/test/performance/lib.py
index d0727b600..758d402df 100644
--- a/git/test/performance/lib.py
+++ b/git/test/performance/lib.py
@@ -1,17 +1,13 @@
"""Contains library functions"""
import os
-from git.test.lib import *
+from git.test.lib import (
+ TestBase,
+ GlobalsItemDeletorMetaCls
+ )
import shutil
import tempfile
-from git.db import (
- GitCmdObjectDB,
- GitDB
- )
-
-from git import (
- Repo
- )
+from git import Repo
#{ Invvariants
k_env_git_repo = "GIT_PYTHON_TEST_GIT_REPO_BASE"
@@ -38,11 +34,7 @@ class TestBigRepoR(TestBase):
* gitrorepo
- * Read-Only git repository - actually the repo of git itself
-
- * puregitrorepo
-
- * As gitrepo, but uses pure python implementation
+ * a big read-only git repository
"""
#{ Invariants
@@ -50,29 +42,33 @@ class TestBigRepoR(TestBase):
head_sha_50 = '32347c375250fd470973a5d76185cac718955fd5'
#} END invariants
+ #{ Configuration
+ RepoCls = Repo
+ #} END configuration
+
@classmethod
def setUpAll(cls):
super(TestBigRepoR, cls).setUpAll()
- repo_path = resolve_or_fail(k_env_git_repo)
- cls.gitrorepo = Repo(repo_path, odbt=GitCmdObjectDB)
- cls.puregitrorepo = Repo(repo_path, odbt=GitDB)
+ if cls.RepoCls is None:
+ raise AssertionError("Require RepoCls in class %s to be set" % cls)
+ #END assert configuration
+ cls.rorepo = cls.RepoCls(resolve_or_fail(k_env_git_repo))
class TestBigRepoRW(TestBigRepoR):
"""As above, but provides a big repository that we can write to.
- Provides ``self.gitrwrepo`` and ``self.puregitrwrepo``"""
+ Provides ``self.rwrepo``"""
@classmethod
def setUpAll(cls):
super(TestBigRepoRW, cls).setUpAll()
dirname = tempfile.mktemp()
os.mkdir(dirname)
- cls.gitrwrepo = cls.gitrorepo.clone(dirname, shared=True, bare=True, odbt=GitCmdObjectDB)
- cls.puregitrwrepo = Repo(dirname, odbt=GitDB)
+ cls.rwrepo = cls.rorepo.clone(dirname, shared=True, bare=True)
@classmethod
def tearDownAll(cls):
- shutil.rmtree(cls.gitrwrepo.working_dir)
+ shutil.rmtree(cls.rwrepo.working_dir)
#} END base classes
diff --git a/git/test/performance/objects/__init__.py b/git/test/performance/objects/__init__.py
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/git/test/performance/objects/__init__.py
@@ -0,0 +1 @@
+
diff --git a/git/test/performance/test_commit.py b/git/test/performance/objects/test_commit.py
similarity index 84%
rename from git/test/performance/test_commit.py
rename to git/test/performance/objects/test_commit.py
index 80421aa24..685fba2fc 100644
--- a/git/test/performance/test_commit.py
+++ b/git/test/performance/objects/test_commit.py
@@ -4,18 +4,18 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from lib import *
+from git.test.performance.lib import TestBigRepoRW
from git import *
-from gitdb import IStream
-from git.test.test_commit import assert_commit_serialization
+from git.base import IStream
+from git.test.objects.test_commit import assert_commit_serialization
from cStringIO import StringIO
from time import time
import sys
class TestPerformance(TestBigRepoRW):
-
+
# ref with about 100 commits in its history
- ref_100 = '0.1.6'
+ ref_100 = 'v0.99'
def _query_commit_info(self, c):
c.author
@@ -45,13 +45,14 @@ def test_iteration(self):
# END for each object
# END for each commit
elapsed_time = time() - st
+ assert no, "Should have traversed a few objects"
print >> sys.stderr, "Traversed %i Trees and a total of %i unchached objects in %s [s] ( %f objs/s )" % (nc, no, elapsed_time, no/elapsed_time)
def test_commit_traversal(self):
# bound to cat-file parsing performance
nc = 0
st = time()
- for c in self.gitrorepo.commit(self.head_sha_2k).traverse(branch_first=False):
+ for c in self.rorepo.commit(self.head_sha_2k).traverse(branch_first=False):
nc += 1
self._query_commit_info(c)
# END for each traversed commit
@@ -62,7 +63,7 @@ def test_commit_iteration(self):
# bound to stream parsing performance
nc = 0
st = time()
- for c in Commit.iter_items(self.gitrorepo, self.head_sha_2k):
+ for c in Commit.iter_items(self.rorepo, self.head_sha_2k):
nc += 1
self._query_commit_info(c)
# END for each traversed commit
@@ -70,10 +71,10 @@ def test_commit_iteration(self):
print >> sys.stderr, "Iterated %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time)
def test_commit_serialization(self):
- assert_commit_serialization(self.gitrwrepo, self.head_sha_2k, True)
+ assert_commit_serialization(self.rwrepo, self.head_sha_2k, True)
- rwrepo = self.gitrwrepo
- make_object = rwrepo.odb.store
+ rwrepo = self.rwrepo
+ make_object = rwrepo.store
# direct serialization - deserialization can be tested afterwards
# serialization is probably limited on IO
hc = rwrepo.commit(self.head_sha_2k)
diff --git a/git/test/performance/test_odb.py b/git/test/performance/test_odb.py
deleted file mode 100644
index 32b70f69a..000000000
--- a/git/test/performance/test_odb.py
+++ /dev/null
@@ -1,70 +0,0 @@
-"""Performance tests for object store"""
-
-from time import time
-import sys
-import stat
-
-from lib import (
- TestBigRepoR
- )
-
-
-class TestObjDBPerformance(TestBigRepoR):
-
- def test_random_access(self):
- results = [ ["Iterate Commits"], ["Iterate Blobs"], ["Retrieve Blob Data"] ]
- for repo in (self.gitrorepo, self.puregitrorepo):
- # GET COMMITS
- st = time()
- root_commit = repo.commit(self.head_sha_2k)
- commits = list(root_commit.traverse())
- nc = len(commits)
- elapsed = time() - st
-
- print >> sys.stderr, "%s: Retrieved %i commits from ObjectStore in %g s ( %f commits / s )" % (type(repo.odb), nc, elapsed, nc / elapsed)
- results[0].append(elapsed)
-
- # GET TREES
- # walk all trees of all commits
- st = time()
- blobs_per_commit = list()
- nt = 0
- for commit in commits:
- tree = commit.tree
- blobs = list()
- for item in tree.traverse():
- nt += 1
- if item.type == 'blob':
- blobs.append(item)
- # direct access for speed
- # END while trees are there for walking
- blobs_per_commit.append(blobs)
- # END for each commit
- elapsed = time() - st
-
- print >> sys.stderr, "%s: Retrieved %i objects from %i commits in %g s ( %f objects / s )" % (type(repo.odb), nt, len(commits), elapsed, nt / elapsed)
- results[1].append(elapsed)
-
- # GET BLOBS
- st = time()
- nb = 0
- too_many = 15000
- data_bytes = 0
- for blob_list in blobs_per_commit:
- for blob in blob_list:
- data_bytes += len(blob.data_stream.read())
- # END for each blobsha
- nb += len(blob_list)
- if nb > too_many:
- break
- # END for each bloblist
- elapsed = time() - st
-
- print >> sys.stderr, "%s: Retrieved %i blob (%i KiB) and their data in %g s ( %f blobs / s, %f KiB / s )" % (type(repo.odb), nb, data_bytes/1000, elapsed, nb / elapsed, (data_bytes / 1000) / elapsed)
- results[2].append(elapsed)
- # END for each repo type
-
- # final results
- for test_name, a, b in results:
- print >> sys.stderr, "%s: %f s vs %f s, pure is %f times slower" % (test_name, a, b, b / a)
- # END for each result
diff --git a/git/test/performance/test_streams.py b/git/test/performance/test_streams.py
deleted file mode 100644
index 7f17d722d..000000000
--- a/git/test/performance/test_streams.py
+++ /dev/null
@@ -1,131 +0,0 @@
-"""Performance data streaming performance"""
-
-from git.test.lib import *
-from gitdb import *
-from gitdb.util import bin_to_hex
-
-from time import time
-import os
-import sys
-import stat
-import subprocess
-
-from gitdb.test.lib import make_memory_file
-
-from lib import (
- TestBigRepoR
- )
-
-
-class TestObjDBPerformance(TestBigRepoR):
-
- large_data_size_bytes = 1000*1000*10 # some MiB should do it
- moderate_data_size_bytes = 1000*1000*1 # just 1 MiB
-
- @with_rw_repo('HEAD', bare=True)
- def test_large_data_streaming(self, rwrepo):
- # TODO: This part overlaps with the same file in gitdb.test.performance.test_stream
- # It should be shared if possible
- ldb = LooseObjectDB(os.path.join(rwrepo.git_dir, 'objects'))
-
- for randomize in range(2):
- desc = (randomize and 'random ') or ''
- print >> sys.stderr, "Creating %s data ..." % desc
- st = time()
- size, stream = make_memory_file(self.large_data_size_bytes, randomize)
- elapsed = time() - st
- print >> sys.stderr, "Done (in %f s)" % elapsed
-
- # writing - due to the compression it will seem faster than it is
- st = time()
- binsha = ldb.store(IStream('blob', size, stream)).binsha
- elapsed_add = time() - st
- assert ldb.has_object(binsha)
- db_file = ldb.readable_db_object_path(bin_to_hex(binsha))
- fsize_kib = os.path.getsize(db_file) / 1000
-
-
- size_kib = size / 1000
- print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add)
-
- # reading all at once
- st = time()
- ostream = ldb.stream(binsha)
- shadata = ostream.read()
- elapsed_readall = time() - st
-
- stream.seek(0)
- assert shadata == stream.getvalue()
- print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / elapsed_readall)
-
-
- # reading in chunks of 1 MiB
- cs = 512*1000
- chunks = list()
- st = time()
- ostream = ldb.stream(binsha)
- while True:
- data = ostream.read(cs)
- chunks.append(data)
- if len(data) < cs:
- break
- # END read in chunks
- elapsed_readchunks = time() - st
-
- stream.seek(0)
- assert ''.join(chunks) == stream.getvalue()
-
- cs_kib = cs / 1000
- print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks)
-
- # del db file so git has something to do
- os.remove(db_file)
-
- # VS. CGIT
- ##########
- # CGIT ! Can using the cgit programs be faster ?
- proc = rwrepo.git.hash_object('-w', '--stdin', as_process=True, istream=subprocess.PIPE)
-
- # write file - pump everything in at once to be a fast as possible
- data = stream.getvalue() # cache it
- st = time()
- proc.stdin.write(data)
- proc.stdin.close()
- gitsha = proc.stdout.read().strip()
- proc.wait()
- gelapsed_add = time() - st
- del(data)
- assert gitsha == bin_to_hex(binsha) # we do it the same way, right ?
-
- # as its the same sha, we reuse our path
- fsize_kib = os.path.getsize(db_file) / 1000
- print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, gelapsed_add, size_kib / gelapsed_add)
-
- # compare ...
- print >> sys.stderr, "Git-Python is %f %% faster than git when adding big %s files" % (100.0 - (elapsed_add / gelapsed_add) * 100, desc)
-
-
- # read all
- st = time()
- s, t, size, data = rwrepo.git.get_object_data(gitsha)
- gelapsed_readall = time() - st
- print >> sys.stderr, "Read %i KiB of %s data at once using git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, gelapsed_readall, size_kib / gelapsed_readall)
-
- # compare
- print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %sfiles" % (100.0 - (elapsed_readall / gelapsed_readall) * 100, desc)
-
-
- # read chunks
- st = time()
- s, t, size, stream = rwrepo.git.stream_object_data(gitsha)
- while True:
- data = stream.read(cs)
- if len(data) < cs:
- break
- # END read stream
- gelapsed_readchunks = time() - st
- print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, gelapsed_readchunks, size_kib / gelapsed_readchunks)
-
- # compare
- print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %s files in chunks" % (100.0 - (elapsed_readchunks / gelapsed_readchunks) * 100, desc)
- # END for each randomization factor
diff --git a/git/test/refs/__init__.py b/git/test/refs/__init__.py
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/git/test/refs/__init__.py
@@ -0,0 +1 @@
+
diff --git a/git/test/test_reflog.py b/git/test/refs/test_reflog.py
similarity index 94%
rename from git/test/test_reflog.py
rename to git/test/refs/test_reflog.py
index 3fdf1faea..afa00d924 100644
--- a/git/test/test_reflog.py
+++ b/git/test/refs/test_reflog.py
@@ -1,4 +1,4 @@
-from git.test.lib import *
+from git.test.lib import TestBase, fixture_path
from git.objects import IndexObject
from git.refs import *
from git.util import Actor
@@ -34,9 +34,7 @@ def test_base(self):
tdir = tempfile.mktemp(suffix="test_reflogs")
os.mkdir(tdir)
- # verify we have a ref - with the creation of a new ref, the reflog
- # will be created as well
- rlp_master_ro = RefLog.path(self.rorepo.heads.master)
+ rlp_master_ro = RefLog.path(self.rorepo.head)
assert os.path.isfile(rlp_master_ro)
# simple read
diff --git a/git/test/test_refs.py b/git/test/refs/test_refs.py
similarity index 88%
rename from git/test/test_refs.py
rename to git/test/refs/test_refs.py
index 2338b4e43..81be173cd 100644
--- a/git/test/test_refs.py
+++ b/git/test/refs/test_refs.py
@@ -4,43 +4,53 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from mock import *
-from git.test.lib import *
-from git import *
-import git.refs as refs
+from git.test.lib import TestBase, with_rw_repo
+from git.refs import *
+import git.refs as ref
+
from git.util import Actor
from git.objects.tag import TagObject
+
+from git.exc import GitCommandError
+
from itertools import chain
import os
+from nose import SkipTest
+
class TestRefs(TestBase):
def test_from_path(self):
# should be able to create any reference directly
- for ref_type in ( Reference, Head, TagReference, RemoteReference ):
+ for ref_type in (Reference, Head, TagReference, RemoteReference):
for name in ('rela_name', 'path/rela_name'):
full_path = ref_type.to_full_path(name)
instance = ref_type.from_path(self.rorepo, full_path)
assert isinstance(instance, ref_type)
# END for each name
# END for each type
+
+ # invalid path
+ self.failUnlessRaises(ValueError, TagReference, self.rorepo, "refs/invalid/tag")
+ # works without path check
+ TagReference(self.rorepo, "refs/invalid/tag", check_path=False)
def test_tag_base(self):
tag_object_refs = list()
- for tag in self.rorepo.tags:
+ for tag in TagReference.list_items(self.rorepo):
assert "refs/tags" in tag.path
assert tag.name
- assert isinstance( tag.commit, Commit )
+ assert isinstance(tag.commit, tag.CommitCls)
if tag.tag is not None:
- tag_object_refs.append( tag )
+ tag_object_refs.append(tag)
tagobj = tag.tag
# have no dict
self.failUnlessRaises(AttributeError, setattr, tagobj, 'someattr', 1)
- assert isinstance( tagobj, TagObject )
+ assert isinstance(tagobj, TagObject)
assert tagobj.tag == tag.name
- assert isinstance( tagobj.tagger, Actor )
- assert isinstance( tagobj.tagged_date, int )
- assert isinstance( tagobj.tagger_tz_offset, int )
+ assert isinstance(tagobj.tagger, Actor)
+ assert isinstance(tagobj.tagged_date, int)
+ assert isinstance(tagobj.tagger_tz_offset, int)
assert tagobj.message
assert tag.object == tagobj
# can't assign the object
@@ -48,15 +58,15 @@ def test_tag_base(self):
# END if we have a tag object
# END for tag in repo-tags
assert tag_object_refs
- assert isinstance(self.rorepo.tags['0.1.5'], TagReference)
+ assert isinstance(TagReference.list_items(self.rorepo)['0.1.6'], TagReference)
def test_tags(self):
# tag refs can point to tag objects or to commits
s = set()
ref_count = 0
- for ref in chain(self.rorepo.tags, self.rorepo.heads):
+ for ref in chain(TagReference.list_items(self.rorepo), Head.list_items(self.rorepo)):
ref_count += 1
- assert isinstance(ref, refs.Reference)
+ assert isinstance(ref, Reference)
assert str(ref) == ref.name
assert repr(ref)
assert ref == ref
@@ -66,9 +76,9 @@ def test_tags(self):
assert len(s) == ref_count
assert len(s|s) == ref_count
- @with_rw_repo('HEAD', bare=False)
- def test_heads(self, rwrepo):
- for head in rwrepo.heads:
+ @with_rw_repo("0.1.6")
+ def test_heads(self, rw_repo):
+ for head in Head.iter_items(rw_repo):
assert head.name
assert head.path
assert "refs/heads" in head.path
@@ -88,7 +98,7 @@ def test_heads(self, rwrepo):
# after the clone, we might still have a tracking branch setup
head.set_tracking_branch(None)
assert head.tracking_branch() is None
- remote_ref = rwrepo.remotes[0].refs[0]
+ remote_ref = RemoteReference.list_items(rw_repo)[0]
assert head.set_tracking_branch(remote_ref) is head
assert head.tracking_branch() == remote_ref
head.set_tracking_branch(None)
@@ -96,7 +106,7 @@ def test_heads(self, rwrepo):
# END for each head
# verify REFLOG gets altered
- head = rwrepo.head
+ head = HEAD(rw_repo)
cur_head = head.ref
cur_commit = cur_head.commit
pcommit = cur_head.commit.parents[0].parents[0]
@@ -130,33 +140,33 @@ def test_heads(self, rwrepo):
assert len(cur_head.log()) == blog_len+2
# a new branch has just a single entry
- other_head = Head.create(rwrepo, 'mynewhead', pcommit, logmsg='new head created')
+ other_head = Head.create(rw_repo, 'mynewhead', pcommit, logmsg='new head created')
log = other_head.log()
assert len(log) == 1
assert log[0].oldhexsha == pcommit.NULL_HEX_SHA
assert log[0].newhexsha == pcommit.hexsha
-
def test_refs(self):
types_found = set()
- for ref in self.rorepo.refs:
+ for ref in Reference.list_items(self.rorepo):
types_found.add(type(ref))
assert len(types_found) >= 3
def test_is_valid(self):
assert Reference(self.rorepo, 'refs/doesnt/exist').is_valid() == False
- assert self.rorepo.head.is_valid()
- assert self.rorepo.head.reference.is_valid()
+ assert HEAD(self.rorepo).is_valid()
+ assert HEAD(self.rorepo).reference.is_valid()
assert SymbolicReference(self.rorepo, 'hellothere').is_valid() == False
def test_orig_head(self):
- assert type(self.rorepo.head.orig_head()) == SymbolicReference
+ assert type(HEAD(self.rorepo).orig_head()) == SymbolicReference
- @with_rw_repo('0.1.6')
+ @with_rw_repo("0.1.6")
def test_head_reset(self, rw_repo):
- cur_head = rw_repo.head
+ cur_head = HEAD(rw_repo)
old_head_commit = cur_head.commit
new_head_commit = cur_head.ref.commit.parents[0]
+
cur_head.reset(new_head_commit, index=True) # index only
assert cur_head.reference.commit == new_head_commit
@@ -176,10 +186,9 @@ def test_head_reset(self, rw_repo):
cur_head.reset(new_head_commit)
rw_repo.index.checkout(["lib"], force=True)#
-
# now that we have a write write repo, change the HEAD reference - its
# like git-reset --soft
- heads = rw_repo.heads
+ heads = Head.list_items(rw_repo)
assert heads
for head in heads:
cur_head.reference = head
@@ -198,7 +207,7 @@ def test_head_reset(self, rw_repo):
self.failUnlessRaises(TypeError, getattr, cur_head, "reference")
# tags are references, hence we can point to them
- some_tag = rw_repo.tags[0]
+ some_tag = TagReference.list_items(rw_repo)[0]
cur_head.reference = some_tag
assert not cur_head.is_detached
assert cur_head.commit == some_tag.commit
@@ -231,7 +240,7 @@ def test_head_reset(self, rw_repo):
old_name = new_head.name
assert new_head.rename("hello").name == "hello"
- assert new_head.rename("hello/world").name == "hello/world"
+ assert new_head.rename("hello/world").name == "hello/world" # yes, this must work
assert new_head.rename(old_name).name == old_name and new_head.path == old_path
# rename with force
@@ -280,6 +289,15 @@ def test_head_reset(self, rw_repo):
assert remotes
for remote in remotes:
refs = remote.refs
+
+ # If a HEAD exists, it must be deleted first. Otherwise it might
+ # end up pointing to an invalid ref it the ref was deleted before.
+ remote_head_name = "HEAD"
+ if remote_head_name in refs:
+ RemoteReference.delete(rw_repo, refs[remote_head_name])
+ del(refs[remote_head_name])
+ #END handle HEAD deletion
+
RemoteReference.delete(rw_repo, *refs)
remote_refs_so_far += len(refs)
for ref in refs:
@@ -414,12 +432,11 @@ def test_head_reset(self, rw_repo):
symbol_ref_path = "refs/symbol_ref"
symref = SymbolicReference(rw_repo, symbol_ref_path)
assert symref.path == symbol_ref_path
- symbol_ref_abspath = os.path.join(rw_repo.git_dir, symref.path)
# set it
symref.reference = new_head
assert symref.reference == new_head
- assert os.path.isfile(symbol_ref_abspath)
+ assert os.path.isfile(symref.abspath)
assert symref.commit == new_head.commit
for name in ('absname','folder/rela_name'):
@@ -471,7 +488,7 @@ def test_head_reset(self, rw_repo):
rw_repo.head.reference = Head.create(rw_repo, "master")
# At least the head should still exist
- assert os.path.isfile(os.path.join(rw_repo.git_dir, 'HEAD'))
+ assert os.path.isfile(rw_repo.head.abspath)
refs = list(SymbolicReference.iter_items(rw_repo))
assert len(refs) == 1
@@ -517,5 +534,7 @@ def test_dereference_recursive(self):
assert SymbolicReference.dereference_recursive(self.rorepo, 'HEAD')
def test_reflog(self):
- assert isinstance(self.rorepo.heads.master.log(), RefLog)
+ assert isinstance(Head.list_items(self.rorepo).master.log(), RefLog)
+ def test_pure_python_rename(self):
+ raise SkipTest("Pure python reference renames cannot properly handle refnames which become a directory after rename")
diff --git a/git/test/test_actor.py b/git/test/test_actor.py
deleted file mode 100644
index b8e5ba3b1..000000000
--- a/git/test/test_actor.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# test_actor.py
-# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
-#
-# This module is part of GitPython and is released under
-# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-
-import os
-from git.test.lib import *
-from git import *
-
-class TestActor(object):
- def test_from_string_should_separate_name_and_email(self):
- a = Actor._from_string("Michael Trier ")
- assert_equal("Michael Trier", a.name)
- assert_equal("mtrier@example.com", a.email)
-
- # base type capabilities
- assert a == a
- assert not ( a != a )
- m = set()
- m.add(a)
- m.add(a)
- assert len(m) == 1
-
- def test_from_string_should_handle_just_name(self):
- a = Actor._from_string("Michael Trier")
- assert_equal("Michael Trier", a.name)
- assert_equal(None, a.email)
-
- def test_should_display_representation(self):
- a = Actor._from_string("Michael Trier ")
- assert_equal('">', repr(a))
-
- def test_str_should_alias_name(self):
- a = Actor._from_string("Michael Trier ")
- assert_equal(a.name, str(a))
\ No newline at end of file
diff --git a/git/test/test_base.py b/git/test/test_base.py
index e630d1513..7488ac6b4 100644
--- a/git/test/test_base.py
+++ b/git/test/test_base.py
@@ -3,18 +3,48 @@
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+from lib import (
+ TestBase,
+ with_rw_repo,
+ DummyStream,
+ DeriveTest,
+ with_rw_and_rw_remote_repo
+ )
import git.objects.base as base
+from git.objects import (
+ Blob,
+ Tree,
+ Commit,
+ TagObject
+ )
import git.refs as refs
-import os
-from git.test.lib import *
-from git import *
+
from itertools import chain
from git.objects.util import get_object_type_by_name
-from gitdb.util import hex_to_bin
+from git.util import hex_to_bin
import tempfile
+##################
+
+from git.util import (
+ NULL_BIN_SHA
+ )
+
+from git.typ import str_blob_type
+from git.base import (
+ OInfo,
+ OPackInfo,
+ ODeltaPackInfo,
+ OStream,
+ OPackStream,
+ ODeltaPackStream,
+ IStream,
+ )
+
+import os
+
class TestBase(TestBase):
type_tuples = ( ("blob", "8741fc1d09d61f02ffd8cded15ff603eff1ec070", "blob.py"),
@@ -77,7 +107,7 @@ def test_get_object_type_by_name(self):
assert base.Object in get_object_type_by_name(tname).mro()
# END for each known type
- assert_raises( ValueError, get_object_type_by_name, "doesntexist" )
+ self.failUnlessRaises(ValueError, get_object_type_by_name, "doesntexist")
def test_object_resolution(self):
# objects must be resolved to shas so they compare equal
@@ -98,3 +128,85 @@ def test_with_rw_remote_and_rw_repo(self, rw_repo, rw_remote_repo):
assert not rw_repo.config_reader("repository").getboolean("core", "bare")
assert rw_remote_repo.config_reader("repository").getboolean("core", "bare")
assert os.path.isdir(os.path.join(rw_repo.working_tree_dir,'lib'))
+
+
+
+class TestBaseTypes(TestBase):
+
+ def test_streams(self):
+ # test info
+ sha = NULL_BIN_SHA
+ s = 20
+ blob_id = 3
+
+ info = OInfo(sha, str_blob_type, s)
+ assert info.binsha == sha
+ assert info.type == str_blob_type
+ assert info.type_id == blob_id
+ assert info.size == s
+
+ # test pack info
+ # provides type_id
+ pinfo = OPackInfo(0, blob_id, s)
+ assert pinfo.type == str_blob_type
+ assert pinfo.type_id == blob_id
+ assert pinfo.pack_offset == 0
+
+ dpinfo = ODeltaPackInfo(0, blob_id, s, sha)
+ assert dpinfo.type == str_blob_type
+ assert dpinfo.type_id == blob_id
+ assert dpinfo.delta_info == sha
+ assert dpinfo.pack_offset == 0
+
+
+ # test ostream
+ stream = DummyStream()
+ ostream = OStream(*(info + (stream, )))
+ assert ostream.stream is stream
+ ostream.read(15)
+ stream._assert()
+ assert stream.bytes == 15
+ ostream.read(20)
+ assert stream.bytes == 20
+
+ # test packstream
+ postream = OPackStream(*(pinfo + (stream, )))
+ assert postream.stream is stream
+ postream.read(10)
+ stream._assert()
+ assert stream.bytes == 10
+
+ # test deltapackstream
+ dpostream = ODeltaPackStream(*(dpinfo + (stream, )))
+ dpostream.stream is stream
+ dpostream.read(5)
+ stream._assert()
+ assert stream.bytes == 5
+
+ # derive with own args
+ DeriveTest(sha, str_blob_type, s, stream, 'mine',myarg = 3)._assert()
+
+ # test istream
+ istream = IStream(str_blob_type, s, stream)
+ assert istream.binsha == None
+ istream.binsha = sha
+ assert istream.binsha == sha
+
+ assert len(istream.binsha) == 20
+ assert len(istream.hexsha) == 40
+
+ assert istream.size == s
+ istream.size = s * 2
+ istream.size == s * 2
+ assert istream.type == str_blob_type
+ istream.type = "something"
+ assert istream.type == "something"
+ assert istream.stream is stream
+ istream.stream = None
+ assert istream.stream is None
+
+ assert istream.error is None
+ istream.error = Exception()
+ assert isinstance(istream.error, Exception)
+
+
diff --git a/git/test/test_cmd.py b/git/test/test_cmd.py
new file mode 100644
index 000000000..2d38e0a80
--- /dev/null
+++ b/git/test/test_cmd.py
@@ -0,0 +1,110 @@
+# test_git.py
+# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
+#
+# This module is part of GitPython and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+
+import os, sys
+from git.test.lib import (
+ TestBase,
+ patch_object,
+ raises,
+ assert_equal,
+ assert_true,
+ assert_match,
+ fixture_path
+ )
+from git import Git, GitCommandError
+
+class TestGit(TestBase):
+
+ @classmethod
+ def setUpAll(cls):
+ super(TestGit, cls).setUpAll()
+ cls.git = Git(cls.rorepo.working_dir)
+
+ @patch_object(Git, 'execute')
+ def test_call_process_calls_execute(self, git):
+ git.return_value = ''
+ self.git.version()
+ assert_true(git.called)
+ assert_equal(git.call_args, ((['git', 'version'],), {}))
+
+ @raises(GitCommandError)
+ def test_it_raises_errors(self):
+ self.git.this_does_not_exist()
+
+
+ def test_it_transforms_kwargs_into_git_command_arguments(self):
+ assert_equal(["-s"], self.git.transform_kwargs(**{'s': True}))
+ assert_equal(["-s5"], self.git.transform_kwargs(**{'s': 5}))
+
+ assert_equal(["--max-count"], self.git.transform_kwargs(**{'max_count': True}))
+ assert_equal(["--max-count=5"], self.git.transform_kwargs(**{'max_count': 5}))
+
+ assert_equal(["-s", "-t"], self.git.transform_kwargs(**{'s': True, 't': True}))
+
+ def test_it_executes_git_to_shell_and_returns_result(self):
+ assert_match('^git version [\d\.]{2}.*$', self.git.execute(["git","version"]))
+
+ def test_it_accepts_stdin(self):
+ filename = fixture_path("cat_file_blob")
+ fh = open(filename, 'r')
+ assert_equal("70c379b63ffa0795fdbfbc128e5a2818397b7ef8",
+ self.git.hash_object(istream=fh, stdin=True))
+ fh.close()
+
+ @patch_object(Git, 'execute')
+ def test_it_ignores_false_kwargs(self, git):
+ # this_should_not_be_ignored=False implies it *should* be ignored
+ output = self.git.version(pass_this_kwarg=False)
+ assert_true("pass_this_kwarg" not in git.call_args[1])
+
+ def test_persistent_cat_file_command(self):
+ # read header only
+ import subprocess as sp
+ hexsha = "b2339455342180c7cc1e9bba3e9f181f7baa5167"
+ g = self.git.cat_file(batch_check=True, istream=sp.PIPE,as_process=True)
+ g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n")
+ g.stdin.flush()
+ obj_info = g.stdout.readline()
+
+ # read header + data
+ g = self.git.cat_file(batch=True, istream=sp.PIPE,as_process=True)
+ g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n")
+ g.stdin.flush()
+ obj_info_two = g.stdout.readline()
+ assert obj_info == obj_info_two
+
+ # read data - have to read it in one large chunk
+ size = int(obj_info.split()[2])
+ data = g.stdout.read(size)
+ terminating_newline = g.stdout.read(1)
+
+ # now we should be able to read a new object
+ g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n")
+ g.stdin.flush()
+ assert g.stdout.readline() == obj_info
+
+
+ # same can be achived using the respective command functions
+ hexsha, typename, size = self.git.get_object_header(hexsha)
+ hexsha, typename_two, size_two, data = self.git.get_object_data(hexsha)
+ assert typename == typename_two and size == size_two
+
+ def test_version(self):
+ v = self.git.version_info
+ assert isinstance(v, tuple)
+ for n in v:
+ assert isinstance(n, int)
+ #END verify number types
+
+ def test_cmd_override(self):
+ prev_cmd = self.git.GIT_PYTHON_GIT_EXECUTABLE
+ try:
+ # set it to something that doens't exist, assure it raises
+ type(self.git).GIT_PYTHON_GIT_EXECUTABLE = os.path.join("some", "path", "which", "doesn't", "exist", "gitbinary")
+ self.failUnlessRaises(OSError, self.git.version)
+ finally:
+ type(self.git).GIT_PYTHON_GIT_EXECUTABLE = prev_cmd
+ #END undo adjustment
diff --git a/git/test/test_config.py b/git/test/test_config.py
index 173e380cb..d2e199e31 100644
--- a/git/test/test_config.py
+++ b/git/test/test_config.py
@@ -4,13 +4,13 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from git.test.lib import *
-from git import *
+from git.test.lib import TestBase, fixture_path
import StringIO
+from git.config import *
from copy import copy
from ConfigParser import NoSectionError
-class TestBase(TestCase):
+class TestConfig(TestBase):
def _to_memcache(self, file_path):
fp = open(file_path, "r")
@@ -30,7 +30,9 @@ def test_read_write(self):
w_config.read() # enforce reading
assert w_config._sections
w_config.write() # enforce writing
- assert file_obj.getvalue() == file_obj_orig.getvalue()
+
+ # we stripped lines when reading, so the results differ
+ assert file_obj.getvalue() != file_obj_orig.getvalue()
# creating an additional config writer must fail due to exclusive access
self.failUnlessRaises(IOError, GitConfigParser, file_obj, read_only = False)
@@ -56,10 +58,10 @@ def test_read_write(self):
file_obj.seek(0)
r_config = GitConfigParser(file_obj, read_only=True)
+ #print file_obj.getvalue()
assert r_config.has_section(sname)
assert r_config.has_option(sname, oname)
assert r_config.get(sname, oname) == val
-
# END for each filename
def test_base(self):
diff --git a/git/test/test_db.py b/git/test/test_db.py
deleted file mode 100644
index db2d79836..000000000
--- a/git/test/test_db.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# test_repo.py
-# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
-#
-# This module is part of GitPython and is released under
-# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from git.test.lib import *
-from git.db import *
-from gitdb.util import bin_to_hex
-from git.exc import BadObject
-import os
-
-class TestDB(TestBase):
-
- def test_base(self):
- gdb = GitCmdObjectDB(os.path.join(self.rorepo.git_dir, 'objects'), self.rorepo.git)
-
- # partial to complete - works with everything
- hexsha = bin_to_hex(gdb.partial_to_complete_sha_hex("0.1.6"))
- assert len(hexsha) == 40
-
- assert bin_to_hex(gdb.partial_to_complete_sha_hex(hexsha[:20])) == hexsha
-
- # fails with BadObject
- for invalid_rev in ("0000", "bad/ref", "super bad"):
- self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, invalid_rev)
diff --git a/git/test/test_diff.py b/git/test/test_diff.py
index 83db2df67..79f038e84 100644
--- a/git/test/test_diff.py
+++ b/git/test/test_diff.py
@@ -4,8 +4,15 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from git.test.lib import *
-from git import *
+from git.test.lib import (
+ TestBase,
+ StringProcessAdapter,
+ fixture,
+ assert_equal,
+ assert_true
+ )
+
+from git.diff import *
class TestDiff(TestBase):
diff --git a/git/test/test_example.py b/git/test/test_example.py
new file mode 100644
index 000000000..dbab31189
--- /dev/null
+++ b/git/test/test_example.py
@@ -0,0 +1,64 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Module with examples from the tutorial section of the docs"""
+from lib import TestBase, fixture_path
+from git.base import IStream
+from git.db.py.loose import PureLooseObjectODB
+from git.util import pool
+
+from cStringIO import StringIO
+
+from async import IteratorReader
+
+class TestExamples(TestBase):
+
+ def test_base(self):
+ ldb = PureLooseObjectODB(fixture_path("../../../.git/objects"))
+
+ for sha1 in ldb.sha_iter():
+ oinfo = ldb.info(sha1)
+ ostream = ldb.stream(sha1)
+ assert oinfo[:3] == ostream[:3]
+
+ assert len(ostream.read()) == ostream.size
+ assert ldb.has_object(oinfo.binsha)
+ # END for each sha in database
+ # assure we close all files
+ try:
+ del(ostream)
+ del(oinfo)
+ except UnboundLocalError:
+ pass
+ # END ignore exception if there are no loose objects
+
+ data = "my data"
+ istream = IStream("blob", len(data), StringIO(data))
+
+ # the object does not yet have a sha
+ assert istream.binsha is None
+ ldb.store(istream)
+ # now the sha is set
+ assert len(istream.binsha) == 20
+ assert ldb.has_object(istream.binsha)
+
+
+ # async operation
+ # Create a reader from an iterator
+ reader = IteratorReader(ldb.sha_iter())
+
+ # get reader for object streams
+ info_reader = ldb.stream_async(reader)
+
+ # read one
+ info = info_reader.read(1)[0]
+
+ # read all the rest until depletion
+ ostreams = info_reader.read()
+
+ # set the pool to use two threads
+ pool.set_size(2)
+
+ # synchronize the mode of operation
+ pool.set_size(0)
diff --git a/git/test/test_fun.py b/git/test/test_fun.py
index b7991cdbe..ed0699127 100644
--- a/git/test/test_fun.py
+++ b/git/test/test_fun.py
@@ -1,4 +1,4 @@
-from git.test.lib import *
+from git.test.lib import TestBase, with_rw_repo
from git.objects.fun import (
traverse_tree_recursive,
traverse_trees_recursive,
@@ -9,9 +9,9 @@
aggressive_tree_merge
)
-from gitdb.util import bin_to_hex
-from gitdb.base import IStream
-from gitdb.typ import str_tree_type
+from git.util import bin_to_hex
+from git.base import IStream
+from git.typ import str_tree_type
from stat import (
S_IFDIR,
diff --git a/git/test/test_git.py b/git/test/test_git.py
deleted file mode 100644
index c92a642b6..000000000
--- a/git/test/test_git.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# test_git.py
-# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
-#
-# This module is part of GitPython and is released under
-# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-
-import os, sys
-from git.test.lib import *
-from git import Git, GitCommandError
-
-class TestGit(TestCase):
-
- @classmethod
- def setUpAll(cls):
- cls.git = Git(GIT_REPO)
-
- @patch_object(Git, 'execute')
- def test_call_process_calls_execute(self, git):
- git.return_value = ''
- self.git.version()
- assert_true(git.called)
- assert_equal(git.call_args, ((['git', 'version'],), {}))
-
- @raises(GitCommandError)
- def test_it_raises_errors(self):
- self.git.this_does_not_exist()
-
-
- def test_it_transforms_kwargs_into_git_command_arguments(self):
- assert_equal(["-s"], self.git.transform_kwargs(**{'s': True}))
- assert_equal(["-s5"], self.git.transform_kwargs(**{'s': 5}))
-
- assert_equal(["--max-count"], self.git.transform_kwargs(**{'max_count': True}))
- assert_equal(["--max-count=5"], self.git.transform_kwargs(**{'max_count': 5}))
-
- assert_equal(["-s", "-t"], self.git.transform_kwargs(**{'s': True, 't': True}))
-
- def test_it_executes_git_to_shell_and_returns_result(self):
- assert_match('^git version [\d\.]{2}.*$', self.git.execute(["git","version"]))
-
- def test_it_accepts_stdin(self):
- filename = fixture_path("cat_file_blob")
- fh = open(filename, 'r')
- assert_equal("70c379b63ffa0795fdbfbc128e5a2818397b7ef8",
- self.git.hash_object(istream=fh, stdin=True))
- fh.close()
-
- @patch_object(Git, 'execute')
- def test_it_ignores_false_kwargs(self, git):
- # this_should_not_be_ignored=False implies it *should* be ignored
- output = self.git.version(pass_this_kwarg=False)
- assert_true("pass_this_kwarg" not in git.call_args[1])
-
- def test_persistent_cat_file_command(self):
- # read header only
- import subprocess as sp
- hexsha = "b2339455342180c7cc1e9bba3e9f181f7baa5167"
- g = self.git.cat_file(batch_check=True, istream=sp.PIPE,as_process=True)
- g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n")
- g.stdin.flush()
- obj_info = g.stdout.readline()
-
- # read header + data
- g = self.git.cat_file(batch=True, istream=sp.PIPE,as_process=True)
- g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n")
- g.stdin.flush()
- obj_info_two = g.stdout.readline()
- assert obj_info == obj_info_two
-
- # read data - have to read it in one large chunk
- size = int(obj_info.split()[2])
- data = g.stdout.read(size)
- terminating_newline = g.stdout.read(1)
-
- # now we should be able to read a new object
- g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n")
- g.stdin.flush()
- assert g.stdout.readline() == obj_info
-
-
- # same can be achived using the respective command functions
- hexsha, typename, size = self.git.get_object_header(hexsha)
- hexsha, typename_two, size_two, data = self.git.get_object_data(hexsha)
- assert typename == typename_two and size == size_two
diff --git a/git/test/test_import.py b/git/test/test_import.py
new file mode 100644
index 000000000..a5a1d11b4
--- /dev/null
+++ b/git/test/test_import.py
@@ -0,0 +1,58 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""This module's whole purpose is to verify the __all__ descriptions in the respective
+module, by importing using from x import *"""
+
+# perform the actual imports
+import os
+
+from git import *
+
+def import_all(topdir, topmodule='git', skip = "test"):
+ base = os.path.basename
+ join = os.path.join
+ init_script = '__init__.py'
+ prev_cwd = os.getcwd()
+ try:
+ os.chdir(os.path.dirname(topdir))
+ for root, dirs, files in os.walk(base(topdir)):
+ if init_script not in files:
+ del(dirs[:])
+ continue
+ #END ignore non-packages
+
+ if skip in root:
+ continue
+ #END handle ignores
+
+ for relafile in files:
+ if not relafile.endswith('.py'):
+ continue
+ if relafile == init_script:
+ continue
+ module_path = join(root, os.path.splitext(relafile)[0]).replace("/", ".").replace("\\", ".")
+
+ m = __import__(module_path, globals(), locals(), [""])
+ try:
+ attrlist = m.__all__
+ for attr in attrlist:
+ assert hasattr(m, attr), "Invalid item in %s.__all__: %s" % (module_path, attr)
+ #END veriy
+ except AttributeError:
+ pass
+ # END try each listed attribute
+ #END for each file in dir
+ #END for each item
+ finally:
+ os.chdir(prev_cwd)
+ #END handle previous currentdir
+
+
+
+class TestDummy(object):
+ def test_base(self):
+ dn = os.path.dirname
+ # NOTE: i don't think this is working, as the __all__ variable is not used in this case
+ import_all(dn(dn(__file__)))
diff --git a/git/test/test_index.py b/git/test/test_index.py
index 5d2278979..7d65cb9bb 100644
--- a/git/test/test_index.py
+++ b/git/test/test_index.py
@@ -4,7 +4,12 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from git.test.lib import *
+from git.test.lib import (
+ TestBase,
+ with_rw_repo,
+ fixture_path,
+ fixture
+ )
from git import *
import inspect
import os
@@ -12,6 +17,7 @@
import tempfile
import glob
import shutil
+import time
from stat import *
class TestIndex(TestBase):
diff --git a/git/test/test_pack.py b/git/test/test_pack.py
new file mode 100644
index 000000000..c398fc568
--- /dev/null
+++ b/git/test/test_pack.py
@@ -0,0 +1,247 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Test everything about packs reading and writing"""
+from lib import (
+ TestBase,
+ with_rw_directory,
+ with_packs_rw,
+ fixture_path
+ )
+from git.stream import DeltaApplyReader
+
+from git.pack import (
+ PackEntity,
+ PackIndexFile,
+ PackFile
+ )
+
+from git.base import (
+ OInfo,
+ OStream,
+ )
+
+from git.fun import delta_types
+from git.exc import UnsupportedOperation
+from git.util import to_bin_sha
+from itertools import izip, chain
+from nose import SkipTest
+
+import os
+import sys
+import tempfile
+
+
+#{ Utilities
+def bin_sha_from_filename(filename):
+ return to_bin_sha(os.path.splitext(os.path.basename(filename))[0][5:])
+#} END utilities
+
+class TestPack(TestBase):
+
+ packindexfile_v1 = (fixture_path('packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx'), 1, 67)
+ packindexfile_v2 = (fixture_path('packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx'), 2, 30)
+ packindexfile_v2_3_ascii = (fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx'), 2, 42)
+ packfile_v2_1 = (fixture_path('packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack'), 2, packindexfile_v1[2])
+ packfile_v2_2 = (fixture_path('packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack'), 2, packindexfile_v2[2])
+ packfile_v2_3_ascii = (fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack'), 2, packindexfile_v2_3_ascii[2])
+
+
+ def _assert_index_file(self, index, version, size):
+ assert index.packfile_checksum() != index.indexfile_checksum()
+ assert len(index.packfile_checksum()) == 20
+ assert len(index.indexfile_checksum()) == 20
+ assert index.version() == version
+ assert index.size() == size
+ assert len(index.offsets()) == size
+
+ # get all data of all objects
+ for oidx in xrange(index.size()):
+ sha = index.sha(oidx)
+ assert oidx == index.sha_to_index(sha)
+
+ entry = index.entry(oidx)
+ assert len(entry) == 3
+
+ assert entry[0] == index.offset(oidx)
+ assert entry[1] == sha
+ assert entry[2] == index.crc(oidx)
+
+ # verify partial sha
+ for l in (4,8,11,17,20):
+ assert index.partial_sha_to_index(sha[:l], l*2) == oidx
+
+ # END for each object index in indexfile
+ self.failUnlessRaises(ValueError, index.partial_sha_to_index, "\0", 2)
+
+
+ def _assert_pack_file(self, pack, version, size):
+ assert pack.version() == 2
+ assert pack.size() == size
+ assert len(pack.checksum()) == 20
+
+ num_obj = 0
+ for obj in pack.stream_iter():
+ num_obj += 1
+ info = pack.info(obj.pack_offset)
+ stream = pack.stream(obj.pack_offset)
+
+ assert info.pack_offset == stream.pack_offset
+ assert info.type_id == stream.type_id
+ assert hasattr(stream, 'read')
+
+ # it should be possible to read from both streams
+ assert obj.read() == stream.read()
+
+ streams = pack.collect_streams(obj.pack_offset)
+ assert streams
+
+ # read the stream
+ try:
+ dstream = DeltaApplyReader.new(streams)
+ except ValueError:
+ # ignore these, old git versions use only ref deltas,
+ # which we havent resolved ( as we are without an index )
+ # Also ignore non-delta streams
+ continue
+ # END get deltastream
+
+ # read all
+ data = dstream.read()
+ assert len(data) == dstream.size
+
+ # test seek
+ dstream.seek(0)
+ assert dstream.read() == data
+
+
+ # read chunks
+ # NOTE: the current implementation is safe, it basically transfers
+ # all calls to the underlying memory map
+
+ # END for each object
+ assert num_obj == size
+
+
+ def test_pack_index(self):
+ # check version 1 and 2
+ for indexfile, version, size in (self.packindexfile_v1, self.packindexfile_v2):
+ index = PackIndexFile(indexfile)
+ self._assert_index_file(index, version, size)
+ # END run tests
+
+ def test_pack(self):
+ # there is this special version 3, but apparently its like 2 ...
+ for packfile, version, size in (self.packfile_v2_3_ascii, self.packfile_v2_1, self.packfile_v2_2):
+ pack = PackFile(packfile)
+ self._assert_pack_file(pack, version, size)
+ # END for each pack to test
+
+ @with_rw_directory
+ def test_pack_entity(self, rw_dir):
+ pack_objs = list()
+ for packinfo, indexinfo in ( (self.packfile_v2_1, self.packindexfile_v1),
+ (self.packfile_v2_2, self.packindexfile_v2),
+ (self.packfile_v2_3_ascii, self.packindexfile_v2_3_ascii)):
+ packfile, version, size = packinfo
+ indexfile, version, size = indexinfo
+ entity = PackEntity(packfile)
+ assert entity.pack().path() == packfile
+ assert entity.index().path() == indexfile
+ pack_objs.extend(entity.stream_iter())
+
+ count = 0
+ for info, stream in izip(entity.info_iter(), entity.stream_iter()):
+ count += 1
+ assert info.binsha == stream.binsha
+ assert len(info.binsha) == 20
+ assert info.type_id == stream.type_id
+ assert info.size == stream.size
+
+ # we return fully resolved items, which is implied by the sha centric access
+ assert not info.type_id in delta_types
+
+ # try all calls
+ assert len(entity.collect_streams(info.binsha))
+ oinfo = entity.info(info.binsha)
+ assert isinstance(oinfo, OInfo)
+ assert oinfo.binsha is not None
+ ostream = entity.stream(info.binsha)
+ assert isinstance(ostream, OStream)
+ assert ostream.binsha is not None
+
+ # verify the stream
+ try:
+ assert entity.is_valid_stream(info.binsha, use_crc=True)
+ except UnsupportedOperation:
+ pass
+ # END ignore version issues
+ assert entity.is_valid_stream(info.binsha, use_crc=False)
+ # END for each info, stream tuple
+ assert count == size
+
+ # END for each entity
+
+ # pack writing - write all packs into one
+ # index path can be None
+ pack_path = tempfile.mktemp('', "pack", rw_dir)
+ index_path = tempfile.mktemp('', 'index', rw_dir)
+ iteration = 0
+ def rewind_streams():
+ for obj in pack_objs:
+ obj.stream.seek(0)
+ #END utility
+ for ppath, ipath, num_obj in zip((pack_path, )*2, (index_path, None), (len(pack_objs), None)):
+ pfile = open(ppath, 'wb')
+ iwrite = None
+ if ipath:
+ ifile = open(ipath, 'wb')
+ iwrite = ifile.write
+ #END handle ip
+
+ # make sure we rewind the streams ... we work on the same objects over and over again
+ if iteration > 0:
+ rewind_streams()
+ #END rewind streams
+ iteration += 1
+
+ pack_sha, index_sha = PackEntity.write_pack(pack_objs, pfile.write, iwrite, object_count=num_obj)
+ pfile.close()
+ assert os.path.getsize(ppath) > 100
+
+ # verify pack
+ pf = PackFile(ppath)
+ assert pf.size() == len(pack_objs)
+ assert pf.version() == PackFile.pack_version_default
+ assert pf.checksum() == pack_sha
+
+ # verify index
+ if ipath is not None:
+ ifile.close()
+ assert os.path.getsize(ipath) > 100
+ idx = PackIndexFile(ipath)
+ assert idx.version() == PackIndexFile.index_version_default
+ assert idx.packfile_checksum() == pack_sha
+ assert idx.indexfile_checksum() == index_sha
+ assert idx.size() == len(pack_objs)
+ #END verify files exist
+ #END for each packpath, indexpath pair
+
+ # verify the packs throughly
+ rewind_streams()
+ entity = PackEntity.create(pack_objs, rw_dir)
+ count = 0
+ for info in entity.info_iter():
+ count += 1
+ for use_crc in range(2):
+ assert entity.is_valid_stream(info.binsha, use_crc)
+ # END for each crc mode
+ #END for each info
+ assert count == len(pack_objs)
+
+
+ def test_pack_64(self):
+ # TODO: hex-edit a pack helping us to verify that we can handle 64 byte offsets
+ # of course without really needing such a huge pack
+ raise SkipTest()
diff --git a/git/test/test_remote.py b/git/test/test_remote.py
index af6915a32..30bd1232c 100644
--- a/git/test/test_remote.py
+++ b/git/test/test_remote.py
@@ -4,9 +4,25 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from git.test.lib import *
-from git import *
+from git.test.lib import (
+ TestBase,
+ with_rw_and_rw_remote_repo,
+ with_rw_repo,
+ )
from git.util import IterableList
+from git.db.interface import PushInfo, FetchInfo, RemoteProgress
+from git.remote import *
+from git.exc import GitCommandError
+from git.refs import (
+ Reference,
+ TagReference,
+ RemoteReference,
+ Head,
+ SymbolicReference
+ )
+
+from nose import SkipTest
+
import tempfile
import shutil
import os
@@ -16,430 +32,427 @@
random.seed(0)
class TestRemoteProgress(RemoteProgress):
- __slots__ = ( "_seen_lines", "_stages_per_op", '_num_progress_messages' )
- def __init__(self):
- super(TestRemoteProgress, self).__init__()
- self._seen_lines = list()
- self._stages_per_op = dict()
- self._num_progress_messages = 0
-
- def _parse_progress_line(self, line):
- # we may remove the line later if it is dropped
- # Keep it for debugging
- self._seen_lines.append(line)
- rval = super(TestRemoteProgress, self)._parse_progress_line(line)
- assert len(line) > 1, "line %r too short" % line
- return rval
-
- def line_dropped(self, line):
- try:
- self._seen_lines.remove(line)
- except ValueError:
- pass
-
- def update(self, op_code, cur_count, max_count=None, message=''):
- # check each stage only comes once
- op_id = op_code & self.OP_MASK
- assert op_id in (self.COUNTING, self.COMPRESSING, self.WRITING)
-
- self._stages_per_op.setdefault(op_id, 0)
- self._stages_per_op[ op_id ] = self._stages_per_op[ op_id ] | (op_code & self.STAGE_MASK)
-
- if op_code & (self.WRITING|self.END) == (self.WRITING|self.END):
- assert message
- # END check we get message
-
- self._num_progress_messages += 1
-
-
- def make_assertion(self):
- # we don't always receive messages
- if not self._seen_lines:
- return
-
- # sometimes objects are not compressed which is okay
- assert len(self._seen_ops) in (2,3)
- assert self._stages_per_op
-
- # must have seen all stages
- for op, stages in self._stages_per_op.items():
- assert stages & self.STAGE_MASK == self.STAGE_MASK
- # END for each op/stage
+ __slots__ = ( "_seen_lines", "_stages_per_op", '_num_progress_messages')
+ def __init__(self):
+ super(TestRemoteProgress, self).__init__()
+ self._seen_lines = list()
+ self._stages_per_op = dict()
+ self._seen_ops = set()
+ self._num_progress_messages = 0
+
+ def line_dropped(self, line):
+ try:
+ self._seen_lines.remove(line)
+ except ValueError:
+ pass
+
+ def __call__(self, message, input=''):
+ pass
+
+ def update(self, op_code, cur_count, max_count=None, message='', input=''):
+ # check each stage only comes once
+ if input:
+ self._seen_lines.append(input)
+ #END handle input
+ op_id = op_code & self.OP_MASK
+ assert op_id in (self.COUNTING, self.COMPRESSING, self.WRITING)
+
+ self._stages_per_op.setdefault(op_id, 0)
+ self._stages_per_op[ op_id ] = self._stages_per_op[ op_id ] | (op_code & self.STAGE_MASK)
+
+ if op_code & (self.WRITING|self.END) == (self.WRITING|self.END):
+ assert message
+ # END check we get message
+
+ self._num_progress_messages += 1
+
+
+ def make_assertion(self):
+ # we don't always receive messages
+ if not self._seen_lines:
+ return
+
+ # sometimes objects are not compressed which is okay
+ assert len(self._stages_per_op.keys()) in (2,3)
+ assert self._stages_per_op
+
+ # must have seen all stages
+ for op, stages in self._stages_per_op.items():
+ assert stages & self.STAGE_MASK == self.STAGE_MASK
+ # END for each op/stage
- def assert_received_message(self):
- assert self._num_progress_messages
-
+ def assert_received_message(self):
+ assert self._num_progress_messages
+
class TestRemote(TestBase):
-
- def _print_fetchhead(self, repo):
- fp = open(os.path.join(repo.git_dir, "FETCH_HEAD"))
- fp.close()
-
-
- def _do_test_fetch_result(self, results, remote):
- # self._print_fetchhead(remote.repo)
- assert len(results) > 0 and isinstance(results[0], FetchInfo)
- for info in results:
- assert isinstance(info.note, basestring)
- if isinstance(info.ref, Reference):
- assert info.flags != 0
- # END reference type flags handling
- assert isinstance(info.ref, (SymbolicReference, Reference))
- if info.flags & (info.FORCED_UPDATE|info.FAST_FORWARD):
- assert isinstance(info.old_commit, Commit)
- else:
- assert info.old_commit is None
- # END forced update checking
- # END for each info
-
- def _do_test_push_result(self, results, remote):
- assert len(results) > 0 and isinstance(results[0], PushInfo)
- for info in results:
- assert info.flags
- assert isinstance(info.summary, basestring)
- if info.old_commit is not None:
- assert isinstance(info.old_commit, Commit)
- if info.flags & info.ERROR:
- has_one = False
- for bitflag in (info.REJECTED, info.REMOTE_REJECTED, info.REMOTE_FAILURE):
- has_one |= bool(info.flags & bitflag)
- # END for each bitflag
- assert has_one
- else:
- # there must be a remote commit
- if info.flags & info.DELETED == 0:
- assert isinstance(info.local_ref, Reference)
- else:
- assert info.local_ref is None
- assert type(info.remote_ref) in (TagReference, RemoteReference)
- # END error checking
- # END for each info
-
-
- def _do_test_fetch_info(self, repo):
- self.failUnlessRaises(ValueError, FetchInfo._from_line, repo, "nonsense", '')
- self.failUnlessRaises(ValueError, FetchInfo._from_line, repo, "? [up to date] 0.1.7RC -> origin/0.1.7RC", '')
-
- def _commit_random_file(self, repo):
- #Create a file with a random name and random data and commit it to repo.
- # Return the commited absolute file path
- index = repo.index
- new_file = self._make_file(os.path.basename(tempfile.mktemp()),str(random.random()), repo)
- index.add([new_file])
- index.commit("Committing %s" % new_file)
- return new_file
-
- def _do_test_fetch(self,remote, rw_repo, remote_repo):
- # specialized fetch testing to de-clutter the main test
- self._do_test_fetch_info(rw_repo)
-
- def fetch_and_test(remote, **kwargs):
- progress = TestRemoteProgress()
- kwargs['progress'] = progress
- res = remote.fetch(**kwargs)
- progress.make_assertion()
- self._do_test_fetch_result(res, remote)
- return res
- # END fetch and check
-
- def get_info(res, remote, name):
- return res["%s/%s"%(remote,name)]
-
- # put remote head to master as it is garantueed to exist
- remote_repo.head.reference = remote_repo.heads.master
-
- res = fetch_and_test(remote)
- # all uptodate
- for info in res:
- assert info.flags & info.HEAD_UPTODATE
-
- # rewind remote head to trigger rejection
- # index must be false as remote is a bare repo
- rhead = remote_repo.head
- remote_commit = rhead.commit
- rhead.reset("HEAD~2", index=False)
- res = fetch_and_test(remote)
- mkey = "%s/%s"%(remote,'master')
- master_info = res[mkey]
- assert master_info.flags & FetchInfo.FORCED_UPDATE and master_info.note is not None
-
- # normal fast forward - set head back to previous one
- rhead.commit = remote_commit
- res = fetch_and_test(remote)
- assert res[mkey].flags & FetchInfo.FAST_FORWARD
-
- # new remote branch
- new_remote_branch = Head.create(remote_repo, "new_branch")
- res = fetch_and_test(remote)
- new_branch_info = get_info(res, remote, new_remote_branch)
- assert new_branch_info.flags & FetchInfo.NEW_HEAD
-
- # remote branch rename ( causes creation of a new one locally )
- new_remote_branch.rename("other_branch_name")
- res = fetch_and_test(remote)
- other_branch_info = get_info(res, remote, new_remote_branch)
- assert other_branch_info.ref.commit == new_branch_info.ref.commit
-
- # remove new branch
- Head.delete(new_remote_branch.repo, new_remote_branch)
- res = fetch_and_test(remote)
- # deleted remote will not be fetched
- self.failUnlessRaises(IndexError, get_info, res, remote, new_remote_branch)
-
- # prune stale tracking branches
- stale_refs = remote.stale_refs
- assert len(stale_refs) == 2 and isinstance(stale_refs[0], RemoteReference)
- RemoteReference.delete(rw_repo, *stale_refs)
-
- # test single branch fetch with refspec including target remote
- res = fetch_and_test(remote, refspec="master:refs/remotes/%s/master"%remote)
- assert len(res) == 1 and get_info(res, remote, 'master')
-
- # ... with respec and no target
- res = fetch_and_test(remote, refspec='master')
- assert len(res) == 1
-
- # add new tag reference
- rtag = TagReference.create(remote_repo, "1.0-RV_hello.there")
- res = fetch_and_test(remote, tags=True)
- tinfo = res[str(rtag)]
- assert isinstance(tinfo.ref, TagReference) and tinfo.ref.commit == rtag.commit
- assert tinfo.flags & tinfo.NEW_TAG
-
- # adjust tag commit
- Reference.set_object(rtag, rhead.commit.parents[0].parents[0])
- res = fetch_and_test(remote, tags=True)
- tinfo = res[str(rtag)]
- assert tinfo.commit == rtag.commit
- assert tinfo.flags & tinfo.TAG_UPDATE
-
- # delete remote tag - local one will stay
- TagReference.delete(remote_repo, rtag)
- res = fetch_and_test(remote, tags=True)
- self.failUnlessRaises(IndexError, get_info, res, remote, str(rtag))
-
- # provoke to receive actual objects to see what kind of output we have to
- # expect. For that we need a remote transport protocol
- # Create a new UN-shared repo and fetch into it after we pushed a change
- # to the shared repo
- other_repo_dir = tempfile.mktemp("other_repo")
- # must clone with a local path for the repo implementation not to freak out
- # as it wants local paths only ( which I can understand )
- other_repo = remote_repo.clone(other_repo_dir, shared=False)
- remote_repo_url = "git://localhost%s"%remote_repo.git_dir
-
- # put origin to git-url
- other_origin = other_repo.remotes.origin
- other_origin.config_writer.set("url", remote_repo_url)
- # it automatically creates alternates as remote_repo is shared as well.
- # It will use the transport though and ignore alternates when fetching
- # assert not other_repo.alternates # this would fail
-
- # assure we are in the right state
- rw_repo.head.reset(remote.refs.master, working_tree=True)
- try:
- self._commit_random_file(rw_repo)
- remote.push(rw_repo.head.reference)
-
- # here I would expect to see remote-information about packing
- # objects and so on. Unfortunately, this does not happen
- # if we are redirecting the output - git explicitly checks for this
- # and only provides progress information to ttys
- res = fetch_and_test(other_origin)
- finally:
- shutil.rmtree(other_repo_dir)
- # END test and cleanup
-
- def _test_push_and_pull(self,remote, rw_repo, remote_repo):
- # push our changes
- lhead = rw_repo.head
- lindex = rw_repo.index
- # assure we are on master and it is checked out where the remote is
- try:
- lhead.reference = rw_repo.heads.master
- except AttributeError:
- # if the author is on a non-master branch, the clones might not have
- # a local master yet. We simply create it
- lhead.reference = rw_repo.create_head('master')
- # END master handling
- lhead.reset(remote.refs.master, working_tree=True)
-
- # push without spec should fail ( without further configuration )
- # well, works nicely
- # self.failUnlessRaises(GitCommandError, remote.push)
-
- # simple file push
- self._commit_random_file(rw_repo)
- progress = TestRemoteProgress()
- res = remote.push(lhead.reference, progress)
- assert isinstance(res, IterableList)
- self._do_test_push_result(res, remote)
- progress.make_assertion()
-
- # rejected - undo last commit
- lhead.reset("HEAD~1")
- res = remote.push(lhead.reference)
- assert res[0].flags & PushInfo.ERROR
- assert res[0].flags & PushInfo.REJECTED
- self._do_test_push_result(res, remote)
-
- # force rejected pull
- res = remote.push('+%s' % lhead.reference)
- assert res[0].flags & PushInfo.ERROR == 0
- assert res[0].flags & PushInfo.FORCED_UPDATE
- self._do_test_push_result(res, remote)
-
- # invalid refspec
- res = remote.push("hellothere")
- assert len(res) == 0
-
- # push new tags
- progress = TestRemoteProgress()
- to_be_updated = "my_tag.1.0RV"
- new_tag = TagReference.create(rw_repo, to_be_updated)
- other_tag = TagReference.create(rw_repo, "my_obj_tag.2.1aRV", message="my message")
- res = remote.push(progress=progress, tags=True)
- assert res[-1].flags & PushInfo.NEW_TAG
- progress.make_assertion()
- self._do_test_push_result(res, remote)
-
- # update push new tags
- # Rejection is default
- new_tag = TagReference.create(rw_repo, to_be_updated, ref='HEAD~1', force=True)
- res = remote.push(tags=True)
- self._do_test_push_result(res, remote)
- assert res[-1].flags & PushInfo.REJECTED and res[-1].flags & PushInfo.ERROR
-
- # push force this tag
- res = remote.push("+%s" % new_tag.path)
- assert res[-1].flags & PushInfo.ERROR == 0 and res[-1].flags & PushInfo.FORCED_UPDATE
-
- # delete tag - have to do it using refspec
- res = remote.push(":%s" % new_tag.path)
- self._do_test_push_result(res, remote)
- assert res[0].flags & PushInfo.DELETED
- # Currently progress is not properly transferred, especially not using
- # the git daemon
- # progress.assert_received_message()
-
- # push new branch
- new_head = Head.create(rw_repo, "my_new_branch")
- progress = TestRemoteProgress()
- res = remote.push(new_head, progress)
- assert res[0].flags & PushInfo.NEW_HEAD
- progress.make_assertion()
- self._do_test_push_result(res, remote)
-
- # delete new branch on the remote end and locally
- res = remote.push(":%s" % new_head.path)
- self._do_test_push_result(res, remote)
- Head.delete(rw_repo, new_head)
- assert res[-1].flags & PushInfo.DELETED
-
- # --all
- res = remote.push(all=True)
- self._do_test_push_result(res, remote)
-
- remote.pull('master')
-
- # cleanup - delete created tags and branches as we are in an innerloop on
- # the same repository
- TagReference.delete(rw_repo, new_tag, other_tag)
- remote.push(":%s" % other_tag.path)
-
- @with_rw_and_rw_remote_repo('0.1.6')
- def test_base(self, rw_repo, remote_repo):
- num_remotes = 0
- remote_set = set()
- ran_fetch_test = False
-
- for remote in rw_repo.remotes:
- num_remotes += 1
- assert remote == remote
- assert str(remote) != repr(remote)
- remote_set.add(remote)
- remote_set.add(remote) # should already exist
-
- # REFS
- refs = remote.refs
- assert refs
- for ref in refs:
- assert ref.remote_name == remote.name
- assert ref.remote_head
- # END for each ref
-
- # OPTIONS
- # cannot use 'fetch' key anymore as it is now a method
- for opt in ("url", ):
- val = getattr(remote, opt)
- reader = remote.config_reader
- assert reader.get(opt) == val
- assert reader.get_value(opt, None) == val
-
- # unable to write with a reader
- self.failUnlessRaises(IOError, reader.set, opt, "test")
-
- # change value
- writer = remote.config_writer
- new_val = "myval"
- writer.set(opt, new_val)
- assert writer.get(opt) == new_val
- writer.set(opt, val)
- assert writer.get(opt) == val
- del(writer)
- assert getattr(remote, opt) == val
- # END for each default option key
-
- # RENAME
- other_name = "totally_other_name"
- prev_name = remote.name
- assert remote.rename(other_name) == remote
- assert prev_name != remote.name
- # multiple times
- for time in range(2):
- assert remote.rename(prev_name).name == prev_name
- # END for each rename ( back to prev_name )
-
- # PUSH/PULL TESTING
- self._test_push_and_pull(remote, rw_repo, remote_repo)
-
- # FETCH TESTING
- # Only for remotes - local cases are the same or less complicated
- # as additional progress information will never be emitted
- if remote.name == "daemon_origin":
- self._do_test_fetch(remote, rw_repo, remote_repo)
- ran_fetch_test = True
- # END fetch test
-
- remote.update()
- # END for each remote
-
- assert ran_fetch_test
- assert num_remotes
- assert num_remotes == len(remote_set)
-
- origin = rw_repo.remote('origin')
- assert origin == rw_repo.remotes.origin
-
- @with_rw_repo('HEAD', bare=True)
- def test_creation_and_removal(self, bare_rw_repo):
- new_name = "test_new_one"
- arg_list = (new_name, "git@server:hello.git")
- remote = Remote.create(bare_rw_repo, *arg_list )
- assert remote.name == "test_new_one"
- assert remote in bare_rw_repo.remotes
-
- # create same one again
- self.failUnlessRaises(GitCommandError, Remote.create, bare_rw_repo, *arg_list)
-
- Remote.remove(bare_rw_repo, new_name)
-
- for remote in bare_rw_repo.remotes:
- if remote.name == new_name:
- raise AssertionError("Remote removal failed")
- # END if deleted remote matches existing remote's name
- # END for each remote
-
-
-
+
+ def _print_fetchhead(self, repo):
+ fp = open(os.path.join(repo.git_dir, "FETCH_HEAD"))
+ fp.close()
+
+
+ def _do_test_fetch_result(self, results, remote):
+ # self._print_fetchhead(remote.repo)
+ assert len(results) > 0 and isinstance(results[0], FetchInfo)
+ for info in results:
+ assert isinstance(info.note, basestring)
+ if isinstance(info.ref, Reference):
+ assert info.flags != 0
+ # END reference type flags handling
+ assert isinstance(info.ref, (SymbolicReference, Reference))
+ if info.flags & (info.FORCED_UPDATE|info.FAST_FORWARD):
+ assert isinstance(info.old_commit_binsha, str) and len(info.old_commit_binsha) == 20
+ else:
+ assert info.old_commit_binsha is None
+ # END forced update checking
+ # END for each info
+
+ def _do_test_push_result(self, results, remote):
+ assert len(results) > 0 and isinstance(results[0], PushInfo)
+ for info in results:
+ assert info.flags
+ assert isinstance(info.summary, basestring)
+ if info.old_commit_binsha is not None:
+ assert isinstance(info.old_commit_binsha, str) and len(info.old_commit_binsha) == 20
+ if info.flags & info.ERROR:
+ has_one = False
+ for bitflag in (info.REJECTED, info.REMOTE_REJECTED, info.REMOTE_FAILURE):
+ has_one |= bool(info.flags & bitflag)
+ # END for each bitflag
+ assert has_one
+ else:
+ # there must be a remote commit
+ if info.flags & info.DELETED == 0:
+ assert isinstance(info.local_ref, Reference)
+ else:
+ assert info.local_ref is None
+ assert type(info.remote_ref) in (TagReference, RemoteReference)
+ # END error checking
+ # END for each info
+
+ def _commit_random_file(self, repo):
+ #Create a file with a random name and random data and commit it to repo.
+ # Return the commited absolute file path
+ index = repo.index
+ new_file = self._make_file(os.path.basename(tempfile.mktemp()),str(random.random()), repo)
+ index.add([new_file])
+ index.commit("Committing %s" % new_file)
+ return new_file
+
+ def _do_test_fetch(self,remote, rw_repo, remote_repo):
+ def fetch_and_test(remote, **kwargs):
+ progress = TestRemoteProgress()
+ kwargs['progress'] = progress
+ res = remote.fetch(**kwargs)
+ progress.make_assertion()
+ self._do_test_fetch_result(res, remote)
+ return res
+ # END fetch and check
+
+ def get_info(res, remote, name):
+ return res["%s/%s"%(remote,name)]
+
+ # put remote head to master as it is garantueed to exist
+ remote_repo.head.reference = remote_repo.heads.master
+
+ res = fetch_and_test(remote)
+ # all uptodate
+ for info in res:
+ assert info.flags & info.HEAD_UPTODATE
+
+ # rewind remote head to trigger rejection
+ # index must be false as remote is a bare repo
+ rhead = remote_repo.head
+ remote_commit = rhead.commit
+ rhead.reset("HEAD~2", index=False)
+ res = fetch_and_test(remote)
+ mkey = "%s/%s"%(remote,'master')
+ master_info = res[mkey]
+ assert master_info.flags & FetchInfo.FORCED_UPDATE and master_info.note is not None
+
+ # normal fast forward - set head back to previous one
+ rhead.commit = remote_commit
+ res = fetch_and_test(remote)
+ assert res[mkey].flags & FetchInfo.FAST_FORWARD
+
+ # new remote branch
+ new_remote_branch = Head.create(remote_repo, "new_branch")
+ res = fetch_and_test(remote)
+ new_branch_info = get_info(res, remote, new_remote_branch)
+ assert new_branch_info.flags & FetchInfo.NEW_HEAD
+
+ # remote branch rename ( causes creation of a new one locally )
+ new_remote_branch.rename("other_branch_name")
+ res = fetch_and_test(remote)
+ other_branch_info = get_info(res, remote, new_remote_branch)
+ assert other_branch_info.ref.commit == new_branch_info.ref.commit
+
+ # remove new branch
+ Head.delete(new_remote_branch.repo, new_remote_branch)
+ res = fetch_and_test(remote)
+ # deleted remote will not be fetched
+ self.failUnlessRaises(IndexError, get_info, res, remote, new_remote_branch)
+
+ # prune stale tracking branches
+ stale_refs = remote.stale_refs
+ assert len(stale_refs) == 2 and isinstance(stale_refs[0], RemoteReference)
+ RemoteReference.delete(rw_repo, *stale_refs)
+
+ # test single branch fetch with refspec including target remote
+ res = fetch_and_test(remote, refspec="master:refs/remotes/%s/master"%remote)
+ assert len(res) == 1 and get_info(res, remote, 'master')
+
+ # ... with respec and no target
+ res = fetch_and_test(remote, refspec='master')
+ assert len(res) == 1
+
+ # add new tag reference
+ rtag = TagReference.create(remote_repo, "1.0-RV_hello.there")
+ res = fetch_and_test(remote, tags=True)
+ tinfo = res[str(rtag)]
+ assert isinstance(tinfo.ref, TagReference) and tinfo.ref.commit == rtag.commit
+ assert tinfo.flags & tinfo.NEW_TAG
+
+ # adjust tag commit
+ Reference.set_object(rtag, rhead.commit.parents[0].parents[0])
+ res = fetch_and_test(remote, tags=True)
+ tinfo = res[str(rtag)]
+ assert tinfo.commit == rtag.commit
+ assert tinfo.flags & tinfo.TAG_UPDATE
+
+ # delete remote tag - local one will stay
+ TagReference.delete(remote_repo, rtag)
+ res = fetch_and_test(remote, tags=True)
+ self.failUnlessRaises(IndexError, get_info, res, remote, str(rtag))
+
+ # provoke to receive actual objects to see what kind of output we have to
+ # expect. For that we need a remote transport protocol
+ # Create a new UN-shared repo and fetch into it after we pushed a change
+ # to the shared repo
+ other_repo_dir = tempfile.mktemp("other_repo")
+ # must clone with a local path for the repo implementation not to freak out
+ # as it wants local paths only ( which I can understand )
+ other_repo = remote_repo.clone(other_repo_dir, shared=False)
+ remote_repo_url = "git://localhost%s"%remote_repo.git_dir
+
+ # put origin to git-url
+ other_origin = other_repo.remotes.origin
+ other_origin.config_writer.set("url", remote_repo_url)
+ # it automatically creates alternates as remote_repo is shared as well.
+ # It will use the transport though and ignore alternates when fetching
+ # assert not other_repo.alternates # this would fail
+
+ # assure we are in the right state
+ rw_repo.head.reset(remote.refs.master, working_tree=True)
+ try:
+ self._commit_random_file(rw_repo)
+ remote.push(rw_repo.head.reference)
+
+ # here I would expect to see remote-information about packing
+ # objects and so on. Unfortunately, this does not happen
+ # if we are redirecting the output - git explicitly checks for this
+ # and only provides progress information to ttys
+ res = fetch_and_test(other_origin)
+ finally:
+ shutil.rmtree(other_repo_dir)
+ # END test and cleanup
+
+ def _verify_push_and_pull(self,remote, rw_repo, remote_repo):
+ # push our changes
+ lhead = rw_repo.head
+ lindex = rw_repo.index
+ # assure we are on master and it is checked out where the remote is
+ try:
+ lhead.reference = rw_repo.heads.master
+ except AttributeError:
+ # if the author is on a non-master branch, the clones might not have
+ # a local master yet. We simply create it
+ lhead.reference = rw_repo.create_head('master')
+ # END master handling
+ lhead.reset(remote.refs.master, working_tree=True)
+
+ # push without spec should fail ( without further configuration )
+ # well, works nicely
+ # self.failUnlessRaises(GitCommandError, remote.push)
+
+ # simple file push
+ self._commit_random_file(rw_repo)
+ progress = TestRemoteProgress()
+ res = remote.push(lhead.reference, progress)
+ assert isinstance(res, IterableList)
+ self._do_test_push_result(res, remote)
+ progress.make_assertion()
+
+ # rejected - undo last commit
+ lhead.reset("HEAD~1")
+ res = remote.push(lhead.reference)
+ assert res[0].flags & PushInfo.ERROR
+ assert res[0].flags & PushInfo.REJECTED
+ self._do_test_push_result(res, remote)
+
+ # force rejected pull
+ res = remote.push('+%s' % lhead.reference)
+ assert res[0].flags & PushInfo.ERROR == 0
+ assert res[0].flags & PushInfo.FORCED_UPDATE
+ self._do_test_push_result(res, remote)
+
+ # invalid refspec
+ res = remote.push("hellothere")
+ assert len(res) == 0
+
+ # push new tags
+ progress = TestRemoteProgress()
+ to_be_updated = "my_tag.1.0RV"
+ new_tag = TagReference.create(rw_repo, to_be_updated)
+ other_tag = TagReference.create(rw_repo, "my_obj_tag.2.1aRV", message="my message")
+ res = remote.push(progress=progress, tags=True)
+ assert res[-1].flags & PushInfo.NEW_TAG
+ progress.make_assertion()
+ self._do_test_push_result(res, remote)
+
+ # update push new tags
+ # Rejection is default
+ new_tag = TagReference.create(rw_repo, to_be_updated, ref='HEAD~1', force=True)
+ res = remote.push(tags=True)
+ self._do_test_push_result(res, remote)
+ assert res[-1].flags & PushInfo.REJECTED and res[-1].flags & PushInfo.ERROR
+
+ # push force this tag
+ res = remote.push("+%s" % new_tag.path)
+ assert res[-1].flags & PushInfo.ERROR == 0 and res[-1].flags & PushInfo.FORCED_UPDATE
+
+ # delete tag - have to do it using refspec
+ res = remote.push(":%s" % new_tag.path)
+ self._do_test_push_result(res, remote)
+ assert res[0].flags & PushInfo.DELETED
+ # Currently progress is not properly transferred, especially not using
+ # the git daemon
+ # progress.assert_received_message()
+
+ # push new branch
+ new_head = Head.create(rw_repo, "my_new_branch")
+ progress = TestRemoteProgress()
+ res = remote.push(new_head, progress)
+ assert res[0].flags & PushInfo.NEW_HEAD
+ progress.make_assertion()
+ self._do_test_push_result(res, remote)
+
+ # delete new branch on the remote end and locally
+ res = remote.push(":%s" % new_head.path)
+ self._do_test_push_result(res, remote)
+ Head.delete(rw_repo, new_head)
+ assert res[-1].flags & PushInfo.DELETED
+
+ # --all
+ res = remote.push(all=True)
+ self._do_test_push_result(res, remote)
+
+ remote.pull('master')
+
+ # cleanup - delete created tags and branches as we are in an innerloop on
+ # the same repository
+ TagReference.delete(rw_repo, new_tag, other_tag)
+ remote.push(":%s" % other_tag.path)
+
+ def test_todo(self):
+ # If you see this, plesase remind yourself, that all this needs to be run
+ # per repository type !
+ raise SkipTest("todo")
+
+
+ @with_rw_and_rw_remote_repo('0.1.6')
+ def test_base(self, rw_repo, remote_repo):
+ num_remotes = 0
+ remote_set = set()
+ ran_fetch_test = False
+
+ for remote in rw_repo.remotes:
+ num_remotes += 1
+ assert remote == remote
+ assert str(remote) != repr(remote)
+ remote_set.add(remote)
+ remote_set.add(remote) # should already exist
+
+ # REFS
+ refs = remote.refs
+ assert refs
+ for ref in refs:
+ assert ref.remote_name == remote.name
+ assert ref.remote_head
+ # END for each ref
+
+ # OPTIONS
+ # cannot use 'fetch' key anymore as it is now a method
+ for opt in ("url", ):
+ val = getattr(remote, opt)
+ reader = remote.config_reader
+ assert reader.get(opt) == val
+ assert reader.get_value(opt, None) == val
+
+ # unable to write with a reader
+ self.failUnlessRaises(IOError, reader.set, opt, "test")
+
+ # change value
+ writer = remote.config_writer
+ new_val = "myval"
+ writer.set(opt, new_val)
+ assert writer.get(opt) == new_val
+ writer.set(opt, val)
+ assert writer.get(opt) == val
+ del(writer)
+ assert getattr(remote, opt) == val
+ # END for each default option key
+
+ # RENAME
+ other_name = "totally_other_name"
+ prev_name = remote.name
+ assert remote.rename(other_name) == remote
+ assert prev_name != remote.name
+ # multiple times
+ for time in range(2):
+ assert remote.rename(prev_name).name == prev_name
+ # END for each rename ( back to prev_name )
+
+ # PUSH/PULL TESTING
+ self._verify_push_and_pull(remote, rw_repo, remote_repo)
+
+ # FETCH TESTING
+ # Only for remotes - local cases are the same or less complicated
+ # as additional progress information will never be emitted
+ if remote.name == "daemon_origin":
+ self._do_test_fetch(remote, rw_repo, remote_repo)
+ ran_fetch_test = True
+ # END fetch test
+
+ remote.update()
+ # END for each remote
+
+ assert ran_fetch_test
+ assert num_remotes
+ assert num_remotes == len(remote_set)
+
+ origin = rw_repo.remote('origin')
+ assert origin == rw_repo.remotes.origin
+
+ @with_rw_repo('HEAD', bare=True)
+ def test_creation_and_removal(self, bare_rw_repo):
+ new_name = "test_new_one"
+ arg_list = (new_name, "git@server:hello.git")
+ remote = Remote.create(bare_rw_repo, *arg_list )
+ assert remote.name == "test_new_one"
+ assert remote in bare_rw_repo.remotes
+
+ # create same one again
+ self.failUnlessRaises(GitCommandError, Remote.create, bare_rw_repo, *arg_list)
+
+ Remote.remove(bare_rw_repo, new_name)
+
+ for remote in bare_rw_repo.remotes:
+ if remote.name == new_name:
+ raise AssertionError("Remote removal failed")
+ # END if deleted remote matches existing remote's name
+ # END for each remote
+
+
+
diff --git a/git/test/test_stats.py b/git/test/test_stats.py
index 2bdb0a89b..27be6a777 100644
--- a/git/test/test_stats.py
+++ b/git/test/test_stats.py
@@ -4,8 +4,12 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from git.test.lib import *
-from git import *
+from git.test.lib import (
+ TestBase,
+ fixture,
+ assert_equal
+ )
+from git.util import Stats
class TestStats(TestBase):
diff --git a/git/test/test_stream.py b/git/test/test_stream.py
new file mode 100644
index 000000000..8d7a5f9a1
--- /dev/null
+++ b/git/test/test_stream.py
@@ -0,0 +1,155 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Test for object db"""
+from lib import (
+ TestBase,
+ DummyStream,
+ Sha1Writer,
+ make_bytes,
+ make_object,
+ fixture_path
+ )
+
+from git.stream import *
+from git.util import (
+ NULL_HEX_SHA,
+ hex_to_bin
+ )
+
+from git.util import zlib
+from git.typ import (
+ str_blob_type
+ )
+
+from git.db.py.loose import PureLooseObjectODB
+import time
+import tempfile
+import os
+
+
+
+
+class TestStream(TestBase):
+ """Test stream classes"""
+
+ data_sizes = (15, 10000, 1000*1024+512)
+
+ def _assert_stream_reader(self, stream, cdata, rewind_stream=lambda s: None):
+ """Make stream tests - the orig_stream is seekable, allowing it to be
+ rewound and reused
+ :param cdata: the data we expect to read from stream, the contents
+ :param rewind_stream: function called to rewind the stream to make it ready
+ for reuse"""
+ ns = 10
+ assert len(cdata) > ns-1, "Data must be larger than %i, was %i" % (ns, len(cdata))
+
+ # read in small steps
+ ss = len(cdata) / ns
+ for i in range(ns):
+ data = stream.read(ss)
+ chunk = cdata[i*ss:(i+1)*ss]
+ assert data == chunk
+ # END for each step
+ rest = stream.read()
+ if rest:
+ assert rest == cdata[-len(rest):]
+ # END handle rest
+
+ if isinstance(stream, DecompressMemMapReader):
+ assert len(stream.data()) == stream.compressed_bytes_read()
+ # END handle special type
+
+ rewind_stream(stream)
+
+ # read everything
+ rdata = stream.read()
+ assert rdata == cdata
+
+ if isinstance(stream, DecompressMemMapReader):
+ assert len(stream.data()) == stream.compressed_bytes_read()
+ # END handle special type
+
+ def test_decompress_reader(self):
+ for close_on_deletion in range(2):
+ for with_size in range(2):
+ for ds in self.data_sizes:
+ cdata = make_bytes(ds, randomize=False)
+
+ # zdata = zipped actual data
+ # cdata = original content data
+
+ # create reader
+ if with_size:
+ # need object data
+ zdata = zlib.compress(make_object(str_blob_type, cdata))
+ type, size, reader = DecompressMemMapReader.new(zdata, close_on_deletion)
+ assert size == len(cdata)
+ assert type == str_blob_type
+
+ # even if we don't set the size, it will be set automatically on first read
+ test_reader = DecompressMemMapReader(zdata, close_on_deletion=False)
+ assert test_reader._s == len(cdata)
+ else:
+ # here we need content data
+ zdata = zlib.compress(cdata)
+ reader = DecompressMemMapReader(zdata, close_on_deletion, len(cdata))
+ assert reader._s == len(cdata)
+ # END get reader
+
+ self._assert_stream_reader(reader, cdata, lambda r: r.seek(0))
+
+ # put in a dummy stream for closing
+ dummy = DummyStream()
+ reader._m = dummy
+
+ assert not dummy.closed
+ del(reader)
+ assert dummy.closed == close_on_deletion
+ # END for each datasize
+ # END whether size should be used
+ # END whether stream should be closed when deleted
+
+ def test_sha_writer(self):
+ writer = Sha1Writer()
+ assert 2 == writer.write("hi")
+ assert len(writer.sha(as_hex=1)) == 40
+ assert len(writer.sha(as_hex=0)) == 20
+
+ # make sure it does something ;)
+ prev_sha = writer.sha()
+ writer.write("hi again")
+ assert writer.sha() != prev_sha
+
+ def test_compressed_writer(self):
+ for ds in self.data_sizes:
+ fd, path = tempfile.mkstemp()
+ ostream = FDCompressedSha1Writer(fd)
+ data = make_bytes(ds, randomize=False)
+
+ # for now, just a single write, code doesn't care about chunking
+ assert len(data) == ostream.write(data)
+ ostream.close()
+
+ # its closed already
+ self.failUnlessRaises(OSError, os.close, fd)
+
+ # read everything back, compare to data we zip
+ fd = os.open(path, os.O_RDONLY|getattr(os, 'O_BINARY', 0))
+ written_data = os.read(fd, os.path.getsize(path))
+ assert len(written_data) == os.path.getsize(path)
+ os.close(fd)
+ assert written_data == zlib.compress(data, 1) # best speed
+
+ os.remove(path)
+ # END for each os
+
+ def test_decompress_reader_special_case(self):
+ odb = PureLooseObjectODB(fixture_path('objects'))
+ ostream = odb.stream(hex_to_bin('7bb839852ed5e3a069966281bb08d50012fb309b'))
+
+ # if there is a bug, we will be missing one byte exactly !
+ data = ostream.read()
+ assert len(data) == ostream.size
+
diff --git a/git/test/test_util.py b/git/test/test_util.py
index e55a6d15b..7cfcad3f8 100644
--- a/git/test/test_util.py
+++ b/git/test/test_util.py
@@ -7,7 +7,7 @@
import os
import tempfile
-from git.test.lib import *
+from lib import TestBase
from git.util import *
from git.objects.util import *
from git import *
@@ -15,6 +15,24 @@
import time
+from git.util import (
+ to_hex_sha,
+ to_bin_sha,
+ NULL_HEX_SHA,
+ LockedFD,
+ Actor,
+ IterableList
+ )
+
+
+class TestIterableMember(object):
+ """A member of an iterable list"""
+ __slots__ = ("name", "prefix_name")
+
+ def __init__(self, name):
+ self.name = name
+ self.prefix_name = name
+
class TestUtils(TestBase):
def setup(self):
@@ -25,8 +43,8 @@ def setup(self):
}
def test_it_should_dashify(self):
- assert_equal('this-is-my-argument', dashify('this_is_my_argument'))
- assert_equal('foo', dashify('foo'))
+ assert 'this-is-my-argument' == dashify('this_is_my_argument')
+ assert 'foo' == dashify('foo')
def test_lock_file(self):
@@ -107,3 +125,165 @@ def test_actor(self):
assert isinstance(Actor.committer(cr), Actor)
assert isinstance(Actor.author(cr), Actor)
#END assure config reader is handled
+
+ def test_basics(self):
+ assert to_hex_sha(NULL_HEX_SHA) == NULL_HEX_SHA
+ assert len(to_bin_sha(NULL_HEX_SHA)) == 20
+ assert to_hex_sha(to_bin_sha(NULL_HEX_SHA)) == NULL_HEX_SHA
+
+ def _cmp_contents(self, file_path, data):
+ # raise if data from file at file_path
+ # does not match data string
+ fp = open(file_path, "rb")
+ try:
+ assert fp.read() == data
+ finally:
+ fp.close()
+
+ def test_lockedfd(self):
+ my_file = tempfile.mktemp()
+ orig_data = "hello"
+ new_data = "world"
+ my_file_fp = open(my_file, "wb")
+ my_file_fp.write(orig_data)
+ my_file_fp.close()
+
+ try:
+ lfd = LockedFD(my_file)
+ lockfilepath = lfd._lockfilepath()
+
+ # cannot end before it was started
+ self.failUnlessRaises(AssertionError, lfd.rollback)
+ self.failUnlessRaises(AssertionError, lfd.commit)
+
+ # open for writing
+ assert not os.path.isfile(lockfilepath)
+ wfd = lfd.open(write=True)
+ assert lfd._fd is wfd
+ assert os.path.isfile(lockfilepath)
+
+ # write data and fail
+ os.write(wfd, new_data)
+ lfd.rollback()
+ assert lfd._fd is None
+ self._cmp_contents(my_file, orig_data)
+ assert not os.path.isfile(lockfilepath)
+
+ # additional call doesnt fail
+ lfd.commit()
+ lfd.rollback()
+
+ # test reading
+ lfd = LockedFD(my_file)
+ rfd = lfd.open(write=False)
+ assert os.read(rfd, len(orig_data)) == orig_data
+
+ assert os.path.isfile(lockfilepath)
+ # deletion rolls back
+ del(lfd)
+ assert not os.path.isfile(lockfilepath)
+
+
+ # write data - concurrently
+ lfd = LockedFD(my_file)
+ olfd = LockedFD(my_file)
+ assert not os.path.isfile(lockfilepath)
+ wfdstream = lfd.open(write=True, stream=True) # this time as stream
+ assert os.path.isfile(lockfilepath)
+ # another one fails
+ self.failUnlessRaises(IOError, olfd.open)
+
+ wfdstream.write(new_data)
+ lfd.commit()
+ assert not os.path.isfile(lockfilepath)
+ self._cmp_contents(my_file, new_data)
+
+ # could test automatic _end_writing on destruction
+ finally:
+ os.remove(my_file)
+ # END final cleanup
+
+ # try non-existing file for reading
+ lfd = LockedFD(tempfile.mktemp())
+ try:
+ lfd.open(write=False)
+ except OSError:
+ assert not os.path.exists(lfd._lockfilepath())
+ else:
+ self.fail("expected OSError")
+ # END handle exceptions
+
+ def test_iterable_list(self):
+ for args in (('name',), ('name', 'prefix_')):
+ l = IterableList('name')
+
+ m1 = TestIterableMember('one')
+ m2 = TestIterableMember('two')
+
+ l.extend((m1, m2))
+
+ assert len(l) == 2
+
+ # contains works with name and identity
+ assert m1.name in l
+ assert m2.name in l
+ assert m2 in l
+ assert m2 in l
+ assert 'invalid' not in l
+
+ # with string index
+ assert l[m1.name] is m1
+ assert l[m2.name] is m2
+
+ # with int index
+ assert l[0] is m1
+ assert l[1] is m2
+
+ # with getattr
+ assert l.one is m1
+ assert l.two is m2
+
+ # test exceptions
+ self.failUnlessRaises(AttributeError, getattr, l, 'something')
+ self.failUnlessRaises(IndexError, l.__getitem__, 'something')
+
+ # delete by name and index
+ self.failUnlessRaises(IndexError, l.__delitem__, 'something')
+ del(l[m2.name])
+ assert len(l) == 1
+ assert m2.name not in l and m1.name in l
+ del(l[0])
+ assert m1.name not in l
+ assert len(l) == 0
+
+ self.failUnlessRaises(IndexError, l.__delitem__, 0)
+ self.failUnlessRaises(IndexError, l.__delitem__, 'something')
+ #END for each possible mode
+
+
+class TestActor(TestBase):
+ def test_from_string_should_separate_name_and_email(self):
+ a = Actor._from_string("Michael Trier ")
+ assert "Michael Trier" == a.name
+ assert "mtrier@example.com" == a.email
+
+ # base type capabilities
+ assert a == a
+ assert not ( a != a )
+ m = set()
+ m.add(a)
+ m.add(a)
+ assert len(m) == 1
+
+ def test_from_string_should_handle_just_name(self):
+ a = Actor._from_string("Michael Trier")
+ assert "Michael Trier" == a.name
+ assert None == a.email
+
+ def test_should_display_representation(self):
+ a = Actor._from_string("Michael Trier ")
+ assert '">' == repr(a)
+
+ def test_str_should_alias_name(self):
+ a = Actor._from_string("Michael Trier ")
+ assert a.name == str(a)
diff --git a/git/typ.py b/git/typ.py
new file mode 100644
index 000000000..a2e719be6
--- /dev/null
+++ b/git/typ.py
@@ -0,0 +1,27 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Module containing information about types known to the database"""
+
+#{ String types
+
+# For compatability only, use ObjectType instead
+str_blob_type = "blob"
+str_commit_type = "commit"
+str_tree_type = "tree"
+str_tag_type = "tag"
+
+class ObjectType(object):
+ """Enumeration providing object types as strings and ids"""
+ blob = str_blob_type
+ commit = str_commit_type
+ tree = str_tree_type
+ tag = str_tag_type
+
+ commit_id = 1
+ tree_id = 2
+ blob_id = 3
+ tag_id = 4
+
+#} END string types
diff --git a/git/util.py b/git/util.py
index 7cbef07fb..0e7e4cbaa 100644
--- a/git/util.py
+++ b/git/util.py
@@ -4,28 +4,179 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+import platform
+import binascii
import os
-import re
+import mmap
import sys
+import errno
+import re
import time
+import stat
+import shutil
import tempfile
-import platform
+from smmap import (
+ StaticWindowMapManager,
+ SlidingWindowMapManager,
+ SlidingWindowMapBuffer
+ )
+
-from gitdb.util import (
- make_sha,
- LockedFD,
- file_contents_ro,
- LazyMixin,
- to_hex_sha,
- to_bin_sha
- )
__all__ = ( "stream_copy", "join_path", "to_native_path_windows", "to_native_path_linux",
"join_path_native", "Stats", "IndexFileSHA1Writer", "Iterable", "IterableList",
"BlockingLockFile", "LockFile", 'Actor', 'get_user_id', 'assure_directory_exists',
- 'RemoteProgress')
+ 'RepoAliasMixin', 'LockedFD', 'LazyMixin', 'rmtree' )
+
+from cStringIO import StringIO
+
+# in py 2.4, StringIO is only StringI, without write support.
+# Hence we must use the python implementation for this
+if sys.version_info[1] < 5:
+ from StringIO import StringIO
+# END handle python 2.4
+
+try:
+ import async.mod.zlib as zlib
+except ImportError:
+ import zlib
+# END try async zlib
+
+from async import ThreadPool
+
+try:
+ import hashlib
+except ImportError:
+ import sha
+
+try:
+ from struct import unpack_from
+except ImportError:
+ from struct import unpack, calcsize
+ __calcsize_cache = dict()
+ def unpack_from(fmt, data, offset=0):
+ try:
+ size = __calcsize_cache[fmt]
+ except KeyError:
+ size = calcsize(fmt)
+ __calcsize_cache[fmt] = size
+ # END exception handling
+ return unpack(fmt, data[offset : offset + size])
+ # END own unpack_from implementation
+
+
+#{ Globals
-#{ Utility Methods
+# A pool distributing tasks, initially with zero threads, hence everything
+# will be handled in the main thread
+pool = ThreadPool(0)
+
+# initialize our global memory manager instance
+# Use it to free cached (and unused) resources.
+if sys.version_info[1] < 6:
+ mman = StaticWindowMapManager()
+else:
+ mman = SlidingWindowMapManager()
+#END handle mman
+
+#} END globals
+
+
+#{ Aliases
+
+hex_to_bin = binascii.a2b_hex
+bin_to_hex = binascii.b2a_hex
+
+# errors
+ENOENT = errno.ENOENT
+
+# os shortcuts
+exists = os.path.exists
+mkdir = os.mkdir
+chmod = os.chmod
+isdir = os.path.isdir
+isfile = os.path.isfile
+rename = os.rename
+remove = os.remove
+dirname = os.path.dirname
+basename = os.path.basename
+normpath = os.path.normpath
+expandvars = os.path.expandvars
+expanduser = os.path.expanduser
+abspath = os.path.abspath
+join = os.path.join
+read = os.read
+write = os.write
+close = os.close
+fsync = os.fsync
+
+# constants
+NULL_HEX_SHA = "0"*40
+NULL_BIN_SHA = "\0"*20
+
+#} END Aliases
+
+#{ compatibility stuff ...
+
+class _RandomAccessStringIO(object):
+ """Wrapper to provide required functionality in case memory maps cannot or may
+ not be used. This is only really required in python 2.4"""
+ __slots__ = '_sio'
+
+ def __init__(self, buf=''):
+ self._sio = StringIO(buf)
+
+ def __getattr__(self, attr):
+ return getattr(self._sio, attr)
+
+ def __len__(self):
+ return len(self.getvalue())
+
+ def __getitem__(self, i):
+ return self.getvalue()[i]
+
+ def __getslice__(self, start, end):
+ return self.getvalue()[start:end]
+
+#} END compatibility stuff ...
+
+#{ Routines
+
+def get_user_id():
+ """:return: string identifying the currently active system user as name@node
+ :note: user can be set with the 'USER' environment variable, usually set on windows"""
+ ukn = 'UNKNOWN'
+ username = os.environ.get('USER', os.environ.get('USERNAME', ukn))
+ if username == ukn and hasattr(os, 'getlogin'):
+ username = os.getlogin()
+ # END get username from login
+ return "%s@%s" % (username, platform.node())
+
+def is_git_dir(d):
+ """ This is taken from the git setup.c:is_git_directory
+ function."""
+ if isdir(d) and \
+ isdir(join(d, 'objects')) and \
+ isdir(join(d, 'refs')):
+ headref = join(d, 'HEAD')
+ return isfile(headref) or \
+ (os.path.islink(headref) and
+ os.readlink(headref).startswith('refs'))
+ return False
+
+def rmtree(path):
+ """Remove the given recursively.
+ :note: we use shutil rmtree but adjust its behaviour to see whether files that
+ couldn't be deleted are read-only. Windows will not remove them in that case"""
+ def onerror(func, path, exc_info):
+ if not os.access(path, os.W_OK):
+ # Is the error an access error ?
+ os.chmod(path, stat.S_IWUSR)
+ func(path)
+ else:
+ raise
+ # END end onerror
+ return shutil.rmtree(path, False, onerror)
def stream_copy(source, destination, chunk_size=512*1024):
"""Copy all data from the source stream into the destination stream in chunks
@@ -41,18 +192,102 @@ def stream_copy(source, destination, chunk_size=512*1024):
break
# END reading output stream
return br
+
+def make_sha(source=''):
+ """A python2.4 workaround for the sha/hashlib module fiasco
+ :note: From the dulwich project """
+ try:
+ return hashlib.sha1(source)
+ except NameError:
+ sha1 = sha.sha(source)
+ return sha1
+
+def allocate_memory(size):
+ """:return: a file-protocol accessible memory block of the given size"""
+ if size == 0:
+ return _RandomAccessStringIO('')
+ # END handle empty chunks gracefully
+
+ try:
+ return mmap.mmap(-1, size) # read-write by default
+ except EnvironmentError:
+ # setup real memory instead
+ # this of course may fail if the amount of memory is not available in
+ # one chunk - would only be the case in python 2.4, being more likely on
+ # 32 bit systems.
+ return _RandomAccessStringIO("\0"*size)
+ # END handle memory allocation
+
+
+def file_contents_ro(fd, stream=False, allow_mmap=True):
+ """:return: read-only contents of the file represented by the file descriptor fd
+ :param fd: file descriptor opened for reading
+ :param stream: if False, random access is provided, otherwise the stream interface
+ is provided.
+ :param allow_mmap: if True, its allowed to map the contents into memory, which
+ allows large files to be handled and accessed efficiently. The file-descriptor
+ will change its position if this is False"""
+ try:
+ if allow_mmap:
+ # supports stream and random access
+ try:
+ return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
+ except EnvironmentError:
+ # python 2.4 issue, 0 wants to be the actual size
+ return mmap.mmap(fd, os.fstat(fd).st_size, access=mmap.ACCESS_READ)
+ # END handle python 2.4
+ except OSError:
+ pass
+ # END exception handling
+
+ # read manully
+ contents = os.read(fd, os.fstat(fd).st_size)
+ if stream:
+ return _RandomAccessStringIO(contents)
+ return contents
+
+def file_contents_ro_filepath(filepath, stream=False, allow_mmap=True, flags=0):
+ """Get the file contents at filepath as fast as possible
+ :return: random access compatible memory of the given filepath
+ :param stream: see ``file_contents_ro``
+ :param allow_mmap: see ``file_contents_ro``
+ :param flags: additional flags to pass to os.open
+ :raise OSError: If the file could not be opened
+ :note: for now we don't try to use O_NOATIME directly as the right value needs to be
+ shared per database in fact. It only makes a real difference for loose object
+ databases anyway, and they use it with the help of the ``flags`` parameter"""
+ fd = os.open(filepath, os.O_RDONLY|getattr(os, 'O_BINARY', 0)|flags)
+ try:
+ return file_contents_ro(fd, stream, allow_mmap)
+ finally:
+ close(fd)
+ # END assure file is closed
+
+def to_hex_sha(sha):
+ """:return: hexified version of sha"""
+ if len(sha) == 40:
+ return sha
+ return bin_to_hex(sha)
+
+def to_bin_sha(sha):
+ if len(sha) == 20:
+ return sha
+ return hex_to_bin(sha)
def join_path(a, *p):
"""Join path tokens together similar to os.path.join, but always use
'/' instead of possibly '\' on windows."""
path = a
for b in p:
+ if len(b) == 0:
+ continue
if b.startswith('/'):
path += b[1:]
elif path == '' or path.endswith('/'):
path += b
else:
path += '/' + b
+ # END for each path token to add
return path
def to_native_path_windows(path):
@@ -61,6 +296,7 @@ def to_native_path_windows(path):
def to_native_path_linux(path):
return path.replace('\\','/')
+
if sys.platform.startswith('win'):
to_native_path = to_native_path_windows
else:
@@ -75,7 +311,7 @@ def join_path_native(a, *p):
needed to play it safe on my dear windows and to assure nice paths that only
use '\'"""
return to_native_path(join_path(a, *p))
-
+
def assure_directory_exists(path, is_file=False):
"""Assure that the directory pointed to by path exists.
@@ -89,138 +325,287 @@ def assure_directory_exists(path, is_file=False):
os.makedirs(path)
return True
return False
-
-def get_user_id():
- """:return: string identifying the currently active system user as name@node
- :note: user can be set with the 'USER' environment variable, usually set on windows"""
- ukn = 'UNKNOWN'
- username = os.environ.get('USER', os.environ.get('USERNAME', ukn))
- if username == ukn and hasattr(os, 'getlogin'):
- username = os.getlogin()
- # END get username from login
- return "%s@%s" % (username, platform.node())
-#} END utilities
-#{ Classes
+#} END routines
+
-class RemoteProgress(object):
+#{ Utilities
+
+class LazyMixin(object):
"""
- Handler providing an interface to parse progress information emitted by git-push
- and git-fetch and to dispatch callbacks allowing subclasses to react to the progress.
+ Base class providing an interface to lazily retrieve attribute values upon
+ first access. If slots are used, memory will only be reserved once the attribute
+ is actually accessed and retrieved the first time. All future accesses will
+ return the cached value as stored in the Instance's dict or slot.
"""
- _num_op_codes = 5
- BEGIN, END, COUNTING, COMPRESSING, WRITING = [1 << x for x in range(_num_op_codes)]
- STAGE_MASK = BEGIN|END
- OP_MASK = ~STAGE_MASK
-
- __slots__ = ("_cur_line", "_seen_ops")
- re_op_absolute = re.compile("(remote: )?([\w\s]+):\s+()(\d+)()(.*)")
- re_op_relative = re.compile("(remote: )?([\w\s]+):\s+(\d+)% \((\d+)/(\d+)\)(.*)")
-
- def __init__(self):
- self._seen_ops = list()
-
- def _parse_progress_line(self, line):
- """Parse progress information from the given line as retrieved by git-push
- or git-fetch
-
- :return: list(line, ...) list of lines that could not be processed"""
- # handle
- # Counting objects: 4, done.
- # Compressing objects: 50% (1/2) \rCompressing objects: 100% (2/2) \rCompressing objects: 100% (2/2), done.
- self._cur_line = line
- sub_lines = line.split('\r')
- failed_lines = list()
- for sline in sub_lines:
- # find esacpe characters and cut them away - regex will not work with
- # them as they are non-ascii. As git might expect a tty, it will send them
- last_valid_index = None
- for i,c in enumerate(reversed(sline)):
- if ord(c) < 32:
- # its a slice index
- last_valid_index = -i-1
- # END character was non-ascii
- # END for each character in sline
- if last_valid_index is not None:
- sline = sline[:last_valid_index]
- # END cut away invalid part
- sline = sline.rstrip()
-
- cur_count, max_count = None, None
- match = self.re_op_relative.match(sline)
- if match is None:
- match = self.re_op_absolute.match(sline)
-
- if not match:
- self.line_dropped(sline)
- failed_lines.append(sline)
- continue
- # END could not get match
-
- op_code = 0
- remote, op_name, percent, cur_count, max_count, message = match.groups()
-
- # get operation id
- if op_name == "Counting objects":
- op_code |= self.COUNTING
- elif op_name == "Compressing objects":
- op_code |= self.COMPRESSING
- elif op_name == "Writing objects":
- op_code |= self.WRITING
- else:
- raise ValueError("Operation name %r unknown" % op_name)
-
- # figure out stage
- if op_code not in self._seen_ops:
- self._seen_ops.append(op_code)
- op_code |= self.BEGIN
- # END begin opcode
-
- if message is None:
- message = ''
- # END message handling
-
- message = message.strip()
- done_token = ', done.'
- if message.endswith(done_token):
- op_code |= self.END
- message = message[:-len(done_token)]
- # END end message handling
-
- self.update(op_code, cur_count, max_count, message)
- # END for each sub line
- return failed_lines
- def line_dropped(self, line):
- """Called whenever a line could not be understood and was therefore dropped."""
+ __slots__ = tuple()
+
+ def __getattr__(self, attr):
+ """
+ Whenever an attribute is requested that we do not know, we allow it
+ to be created and set. Next time the same attribute is reqeusted, it is simply
+ returned from our dict/slots. """
+ self._set_cache_(attr)
+ # will raise in case the cache was not created
+ return object.__getattribute__(self, attr)
+
+ def _set_cache_(self, attr):
+ """
+ This method should be overridden in the derived class.
+ It should check whether the attribute named by attr can be created
+ and cached. Do nothing if you do not know the attribute or call your subclass
+
+ The derived class may create as many additional attributes as it deems
+ necessary in case a git command returns more information than represented
+ in the single attribute."""
pass
+
- def update(self, op_code, cur_count, max_count=None, message=''):
- """Called whenever the progress changes
+class LockedFD(object):
+ """
+ This class facilitates a safe read and write operation to a file on disk.
+ If we write to 'file', we obtain a lock file at 'file.lock' and write to
+ that instead. If we succeed, the lock file will be renamed to overwrite
+ the original file.
+
+ When reading, we obtain a lock file, but to prevent other writers from
+ succeeding while we are reading the file.
+
+ This type handles error correctly in that it will assure a consistent state
+ on destruction.
+
+ :note: with this setup, parallel reading is not possible"""
+ __slots__ = ("_filepath", '_fd', '_write')
+
+ def __init__(self, filepath):
+ """Initialize an instance with the givne filepath"""
+ self._filepath = filepath
+ self._fd = None
+ self._write = None # if True, we write a file
+
+ def __del__(self):
+ # will do nothing if the file descriptor is already closed
+ if self._fd is not None:
+ self.rollback()
+
+ def _lockfilepath(self):
+ return "%s.lock" % self._filepath
+
+ def open(self, write=False, stream=False):
+ """
+ Open the file descriptor for reading or writing, both in binary mode.
+
+ :param write: if True, the file descriptor will be opened for writing. Other
+ wise it will be opened read-only.
+ :param stream: if True, the file descriptor will be wrapped into a simple stream
+ object which supports only reading or writing
+ :return: fd to read from or write to. It is still maintained by this instance
+ and must not be closed directly
+ :raise IOError: if the lock could not be retrieved
+ :raise OSError: If the actual file could not be opened for reading
+ :note: must only be called once"""
+ if self._write is not None:
+ raise AssertionError("Called %s multiple times" % self.open)
+
+ self._write = write
+
+ # try to open the lock file
+ binary = getattr(os, 'O_BINARY', 0)
+ lockmode = os.O_WRONLY | os.O_CREAT | os.O_EXCL | binary
+ try:
+ fd = os.open(self._lockfilepath(), lockmode, 0600)
+ if not write:
+ os.close(fd)
+ else:
+ self._fd = fd
+ # END handle file descriptor
+ except OSError:
+ raise IOError("Lock at %r could not be obtained" % self._lockfilepath())
+ # END handle lock retrieval
+
+ # open actual file if required
+ if self._fd is None:
+ # we could specify exlusive here, as we obtained the lock anyway
+ try:
+ self._fd = os.open(self._filepath, os.O_RDONLY | binary)
+ except:
+ # assure we release our lockfile
+ os.remove(self._lockfilepath())
+ raise
+ # END handle lockfile
+ # END open descriptor for reading
+
+ if stream:
+ # need delayed import
+ from stream import FDStream
+ return FDStream(self._fd)
+ else:
+ return self._fd
+ # END handle stream
+
+ def commit(self):
+ """When done writing, call this function to commit your changes into the
+ actual file.
+ The file descriptor will be closed, and the lockfile handled.
+ :note: can be called multiple times"""
+ self._end_writing(successful=True)
+
+ def rollback(self):
+ """Abort your operation without any changes. The file descriptor will be
+ closed, and the lock released.
+ :note: can be called multiple times"""
+ self._end_writing(successful=False)
+
+ def _end_writing(self, successful=True):
+ """Handle the lock according to the write mode """
+ if self._write is None:
+ raise AssertionError("Cannot end operation if it wasn't started yet")
- :param op_code:
- Integer allowing to be compared against Operation IDs and stage IDs.
+ if self._fd is None:
+ return
+
+ os.close(self._fd)
+ self._fd = None
+
+ lockfile = self._lockfilepath()
+ if self._write and successful:
+ # on windows, rename does not silently overwrite the existing one
+ if sys.platform == "win32":
+ if isfile(self._filepath):
+ os.remove(self._filepath)
+ # END remove if exists
+ # END win32 special handling
+ os.rename(lockfile, self._filepath)
- Stage IDs are BEGIN and END. BEGIN will only be set once for each Operation
- ID as well as END. It may be that BEGIN and END are set at once in case only
- one progress message was emitted due to the speed of the operation.
- Between BEGIN and END, none of these flags will be set
+ # assure others can at least read the file - the tmpfile left it at rw--
+ # We may also write that file, on windows that boils down to a remove-
+ # protection as well
+ chmod(self._filepath, 0644)
+ else:
+ # just delete the file so far, we failed
+ os.remove(lockfile)
+ # END successful handling
+
+
+class LockFile(object):
+ """Provides methods to obtain, check for, and release a file based lock which
+ should be used to handle concurrent access to the same file.
+
+ As we are a utility class to be derived from, we only use protected methods.
+
+ Locks will automatically be released on destruction"""
+ __slots__ = ("_file_path", "_owns_lock")
+
+ def __init__(self, file_path):
+ self._file_path = file_path
+ self._owns_lock = False
+
+ def __del__(self):
+ self._release_lock()
+
+ def _lock_file_path(self):
+ """:return: Path to lockfile"""
+ return "%s.lock" % (self._file_path)
+
+ def _has_lock(self):
+ """:return: True if we have a lock and if the lockfile still exists
+ :raise AssertionError: if our lock-file does not exist"""
+ if not self._owns_lock:
+ return False
+
+ return True
+
+ def _obtain_lock_or_raise(self):
+ """Create a lock file as flag for other instances, mark our instance as lock-holder
+
+ :raise IOError: if a lock was already present or a lock file could not be written"""
+ if self._has_lock():
+ return
+ lock_file = self._lock_file_path()
+ if os.path.isfile(lock_file):
+ raise IOError("Lock for file %r did already exist, delete %r in case the lock is illegal" % (self._file_path, lock_file))
- Operation IDs are all held within the OP_MASK. Only one Operation ID will
- be active per call.
- :param cur_count: Current absolute count of items
+ try:
+ fd = os.open(lock_file, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0)
+ os.close(fd)
+ except OSError,e:
+ raise IOError(str(e))
+
+ self._owns_lock = True
+
+ def _obtain_lock(self):
+ """The default implementation will raise if a lock cannot be obtained.
+ Subclasses may override this method to provide a different implementation"""
+ return self._obtain_lock_or_raise()
+
+ def _release_lock(self):
+ """Release our lock if we have one"""
+ if not self._has_lock():
+ return
- :param max_count:
- The maximum count of items we expect. It may be None in case there is
- no maximum number of items or if it is (yet) unknown.
+ # if someone removed our file beforhand, lets just flag this issue
+ # instead of failing, to make it more usable.
+ lfp = self._lock_file_path()
+ try:
+ # on bloody windows, the file needs write permissions to be removable.
+ # Why ...
+ if os.name == 'nt':
+ os.chmod(lfp, 0777)
+ # END handle win32
+ os.remove(lfp)
+ except OSError:
+ pass
+ self._owns_lock = False
+
+
+class BlockingLockFile(LockFile):
+ """The lock file will block until a lock could be obtained, or fail after
+ a specified timeout.
+
+ :note: If the directory containing the lock was removed, an exception will
+ be raised during the blocking period, preventing hangs as the lock
+ can never be obtained."""
+ __slots__ = ("_check_interval", "_max_block_time")
+ def __init__(self, file_path, check_interval_s=0.3, max_block_time_s=sys.maxint):
+ """Configure the instance
- :param message:
- In case of the 'WRITING' operation, it contains the amount of bytes
- transferred. It may possibly be used for other purposes as well.
+ :parm check_interval_s:
+ Period of time to sleep until the lock is checked the next time.
+ By default, it waits a nearly unlimited time
- You may read the contents of the current line in self._cur_line"""
- pass
+ :parm max_block_time_s: Maximum amount of seconds we may lock"""
+ super(BlockingLockFile, self).__init__(file_path)
+ self._check_interval = check_interval_s
+ self._max_block_time = max_block_time_s
+
+ def _obtain_lock(self):
+ """This method blocks until it obtained the lock, or raises IOError if
+ it ran out of time or if the parent directory was not available anymore.
+ If this method returns, you are guranteed to own the lock"""
+ starttime = time.time()
+ maxtime = starttime + float(self._max_block_time)
+ while True:
+ try:
+ super(BlockingLockFile, self)._obtain_lock()
+ except IOError:
+ # synity check: if the directory leading to the lockfile is not
+ # readable anymore, raise an execption
+ curtime = time.time()
+ if not os.path.isdir(os.path.dirname(self._lock_file_path())):
+ msg = "Directory containing the lockfile %r was not readable anymore after waiting %g seconds" % (self._lock_file_path(), curtime - starttime)
+ raise IOError(msg)
+ # END handle missing directory
+
+ if curtime >= maxtime:
+ msg = "Waited %g seconds for lock at %r" % ( maxtime - starttime, self._lock_file_path())
+ raise IOError(msg)
+ # END abort if we wait too long
+ time.sleep(self._check_interval)
+ else:
+ break
+ # END endless loop
class Actor(object):
@@ -274,20 +659,20 @@ def _from_string(cls, string):
m = cls.name_email_regex.search(string)
if m:
name, email = m.groups()
- return Actor(name, email)
+ return cls(name, email)
else:
m = cls.name_only_regex.search(string)
if m:
- return Actor(m.group(1), None)
+ return cls(m.group(1), None)
else:
# assume best and use the whole string as name
- return Actor(string, None)
+ return cls(string, None)
# END special case name
# END handle name/email matching
@classmethod
def _main_actor(cls, env_name, env_email, config_reader=None):
- actor = Actor('', '')
+ actor = cls('', '')
default_email = get_user_id()
default_name = default_email.split('@')[0]
@@ -324,6 +709,127 @@ def author(cls, config_reader=None):
return cls._main_actor(cls.env_author_name, cls.env_author_email, config_reader)
+class Iterable(object):
+ """Defines an interface for iterable items which is to assure a uniform
+ way to retrieve and iterate items within the git repository"""
+ __slots__ = tuple()
+ _id_attribute_ = "attribute that most suitably identifies your instance"
+
+ @classmethod
+ def list_items(cls, repo, *args, **kwargs):
+ """
+ Find all items of this type - subclasses can specify args and kwargs differently.
+ If no args are given, subclasses are obliged to return all items if no additional
+ arguments arg given.
+
+ :note: Favor the iter_items method as it will
+
+ :return:list(Item,...) list of item instances"""
+ out_list = IterableList( cls._id_attribute_ )
+ out_list.extend(cls.iter_items(repo, *args, **kwargs))
+ return out_list
+
+
+ @classmethod
+ def iter_items(cls, repo, *args, **kwargs):
+ """For more information about the arguments, see list_items
+ :return: iterator yielding Items"""
+ raise NotImplementedError("To be implemented by Subclass")
+
+
+class IterableList(list):
+ """
+ List of iterable objects allowing to query an object by id or by named index::
+
+ heads = repo.heads
+ heads.master
+ heads['master']
+ heads[0]
+
+ It requires an id_attribute name to be set which will be queried from its
+ contained items to have a means for comparison.
+
+ A prefix can be specified which is to be used in case the id returned by the
+ items always contains a prefix that does not matter to the user, so it
+ can be left out."""
+ __slots__ = ('_id_attr', '_prefix')
+
+ def __new__(cls, id_attr, prefix=''):
+ return super(IterableList,cls).__new__(cls)
+
+ def __init__(self, id_attr, prefix=''):
+ self._id_attr = id_attr
+ self._prefix = prefix
+ if not isinstance(id_attr, basestring):
+ raise ValueError("First parameter must be a string identifying the name-property. Extend the list after initialization")
+ # END help debugging !
+
+ def __contains__(self, attr):
+ # first try identy match for performance
+ rval = list.__contains__(self, attr)
+ if rval:
+ return rval
+ #END handle match
+
+ # otherwise make a full name search
+ try:
+ getattr(self, attr)
+ return True
+ except (AttributeError, TypeError):
+ return False
+ #END handle membership
+
+ def __getattr__(self, attr):
+ attr = self._prefix + attr
+ for item in self:
+ if getattr(item, self._id_attr) == attr:
+ return item
+ # END for each item
+ return list.__getattribute__(self, attr)
+
+ def __getitem__(self, index):
+ if isinstance(index, int):
+ return list.__getitem__(self,index)
+
+ try:
+ return getattr(self, index)
+ except AttributeError:
+ raise IndexError( "No item found with id %r" % (self._prefix + index) )
+ # END handle getattr
+
+ def __delitem__(self, index):
+ delindex = index
+ if not isinstance(index, int):
+ delindex = -1
+ name = self._prefix + index
+ for i, item in enumerate(self):
+ if getattr(item, self._id_attr) == name:
+ delindex = i
+ break
+ #END search index
+ #END for each item
+ if delindex == -1:
+ raise IndexError("Item with name %s not found" % name)
+ #END handle error
+ #END get index to delete
+ list.__delitem__(self, delindex)
+
+
+#} END utilities
+
+#{ Classes
+
+class RepoAliasMixin(object):
+ """Simple utility providing a repo-property which resolves to the 'odb' attribute
+ of the actual type. This is for api compatability only, as the types previously
+ held repository instances, now they hold odb instances instead"""
+ __slots__ = tuple()
+
+ @property
+ def repo(self):
+ return self.odb
+
+
class Stats(object):
"""
Represents stat information as presented by git at the end of a merge. It is
@@ -407,195 +913,4 @@ def tell(self):
return self.f.tell()
-class LockFile(object):
- """Provides methods to obtain, check for, and release a file based lock which
- should be used to handle concurrent access to the same file.
-
- As we are a utility class to be derived from, we only use protected methods.
-
- Locks will automatically be released on destruction"""
- __slots__ = ("_file_path", "_owns_lock")
-
- def __init__(self, file_path):
- self._file_path = file_path
- self._owns_lock = False
-
- def __del__(self):
- self._release_lock()
-
- def _lock_file_path(self):
- """:return: Path to lockfile"""
- return "%s.lock" % (self._file_path)
-
- def _has_lock(self):
- """:return: True if we have a lock and if the lockfile still exists
- :raise AssertionError: if our lock-file does not exist"""
- if not self._owns_lock:
- return False
-
- return True
-
- def _obtain_lock_or_raise(self):
- """Create a lock file as flag for other instances, mark our instance as lock-holder
-
- :raise IOError: if a lock was already present or a lock file could not be written"""
- if self._has_lock():
- return
- lock_file = self._lock_file_path()
- if os.path.isfile(lock_file):
- raise IOError("Lock for file %r did already exist, delete %r in case the lock is illegal" % (self._file_path, lock_file))
-
- try:
- fd = os.open(lock_file, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0)
- os.close(fd)
- except OSError,e:
- raise IOError(str(e))
-
- self._owns_lock = True
-
- def _obtain_lock(self):
- """The default implementation will raise if a lock cannot be obtained.
- Subclasses may override this method to provide a different implementation"""
- return self._obtain_lock_or_raise()
-
- def _release_lock(self):
- """Release our lock if we have one"""
- if not self._has_lock():
- return
-
- # if someone removed our file beforhand, lets just flag this issue
- # instead of failing, to make it more usable.
- lfp = self._lock_file_path()
- try:
- # on bloody windows, the file needs write permissions to be removable.
- # Why ...
- if os.name == 'nt':
- os.chmod(lfp, 0777)
- # END handle win32
- os.remove(lfp)
- except OSError:
- pass
- self._owns_lock = False
-
-
-class BlockingLockFile(LockFile):
- """The lock file will block until a lock could be obtained, or fail after
- a specified timeout.
-
- :note: If the directory containing the lock was removed, an exception will
- be raised during the blocking period, preventing hangs as the lock
- can never be obtained."""
- __slots__ = ("_check_interval", "_max_block_time")
- def __init__(self, file_path, check_interval_s=0.3, max_block_time_s=sys.maxint):
- """Configure the instance
-
- :parm check_interval_s:
- Period of time to sleep until the lock is checked the next time.
- By default, it waits a nearly unlimited time
-
- :parm max_block_time_s: Maximum amount of seconds we may lock"""
- super(BlockingLockFile, self).__init__(file_path)
- self._check_interval = check_interval_s
- self._max_block_time = max_block_time_s
-
- def _obtain_lock(self):
- """This method blocks until it obtained the lock, or raises IOError if
- it ran out of time or if the parent directory was not available anymore.
- If this method returns, you are guranteed to own the lock"""
- starttime = time.time()
- maxtime = starttime + float(self._max_block_time)
- while True:
- try:
- super(BlockingLockFile, self)._obtain_lock()
- except IOError:
- # synity check: if the directory leading to the lockfile is not
- # readable anymore, raise an execption
- curtime = time.time()
- if not os.path.isdir(os.path.dirname(self._lock_file_path())):
- msg = "Directory containing the lockfile %r was not readable anymore after waiting %g seconds" % (self._lock_file_path(), curtime - starttime)
- raise IOError(msg)
- # END handle missing directory
-
- if curtime >= maxtime:
- msg = "Waited %g seconds for lock at %r" % ( maxtime - starttime, self._lock_file_path())
- raise IOError(msg)
- # END abort if we wait too long
- time.sleep(self._check_interval)
- else:
- break
- # END endless loop
-
-
-class IterableList(list):
- """
- List of iterable objects allowing to query an object by id or by named index::
-
- heads = repo.heads
- heads.master
- heads['master']
- heads[0]
-
- It requires an id_attribute name to be set which will be queried from its
- contained items to have a means for comparison.
-
- A prefix can be specified which is to be used in case the id returned by the
- items always contains a prefix that does not matter to the user, so it
- can be left out."""
- __slots__ = ('_id_attr', '_prefix')
-
- def __new__(cls, id_attr, prefix=''):
- return super(IterableList,cls).__new__(cls)
-
- def __init__(self, id_attr, prefix=''):
- self._id_attr = id_attr
- self._prefix = prefix
- if not isinstance(id_attr, basestring):
- raise ValueError("First parameter must be a string identifying the name-property. Extend the list after initialization")
- # END help debugging !
-
- def __getattr__(self, attr):
- attr = self._prefix + attr
- for item in self:
- if getattr(item, self._id_attr) == attr:
- return item
- # END for each item
- return list.__getattribute__(self, attr)
-
- def __getitem__(self, index):
- if isinstance(index, int):
- return list.__getitem__(self,index)
-
- try:
- return getattr(self, index)
- except AttributeError:
- raise IndexError( "No item found with id %r" % (self._prefix + index) )
-
-
-class Iterable(object):
- """Defines an interface for iterable items which is to assure a uniform
- way to retrieve and iterate items within the git repository"""
- __slots__ = tuple()
- _id_attribute_ = "attribute that most suitably identifies your instance"
-
- @classmethod
- def list_items(cls, repo, *args, **kwargs):
- """
- Find all items of this type - subclasses can specify args and kwargs differently.
- If no args are given, subclasses are obliged to return all items if no additional
- arguments arg given.
-
- :note: Favor the iter_items method as it will
-
- :return:list(Item,...) list of item instances"""
- out_list = IterableList( cls._id_attribute_ )
- out_list.extend(cls.iter_items(repo, *args, **kwargs))
- return out_list
-
-
- @classmethod
- def iter_items(cls, repo, *args, **kwargs):
- """For more information about the arguments, see list_items
- :return: iterator yielding Items"""
- raise NotImplementedError("To be implemented by Subclass")
-
#} END classes