diff --git a/.gitignore b/.gitignore index eec80860b..8dd671052 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ /dist /doc/_build nbproject +.nosebazinga diff --git a/.gitmodules b/.gitmodules index 83a5207ef..8535685a3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,6 @@ -[submodule "gitdb"] - path = git/ext/gitdb - url = git://github.com/gitpython-developers/gitdb.git - branch = master +[submodule "async"] + path = git/ext/async + url = git://github.com/gitpython-developers/async.git +[submodule "smmap"] + path = git/ext/smmap + url = git://github.com/Byron/smmap.git diff --git a/doc/source/changes.rst b/doc/source/changes.rst index 2433d00ee..ad0f2530e 100644 --- a/doc/source/changes.rst +++ b/doc/source/changes.rst @@ -4,8 +4,36 @@ Changelog NEXT ==== -* Blob Type - * Added mode constants to ease the manual creation of blobs +* ### Class Renames ### + + * Renamed **GitCmdObjectDB** to **CmdGitDB** (analogue to **PureCmdDB**) + +* ### Interface Changes ### + + * **SymbolicReference** + + * object_binsha property added + + * **Blob** Type + + * Added mode constants to ease the manual creation of blobs + + * **Repo** (i.e. **HighLevelRepository**) now supports a progress instance to be provided for its ``clone()`` and ``clone_from`` methods. + +* ### Module Changes ### + + * Removed rev_parse function from git.repo.fun - the respective functionality is available only through the repository's rev_parse method, which might in turn translate to any implementation. + +* ### Git Cmd ### + + * Added ``version_info`` property to git command, returning a tuple of version numbers. + * Added GIT_PYTHON_GIT_EXECUTABLE environment variable, which can be used to set the desired git executable to be used. despite of what would be found in the path. + * GIT_PYTHON_TRACE is now set on class level of the Git type, previously it was a module level global variable. + * GIT_PYTHON_GIT_EXECUTABLE is a class level variable as well. + +* ### Exceptions ### + + * There is a new common base for all exceptions git-python will throw, namely `GitPythonError`. 0.3.1 Beta 2 ============ diff --git a/doc/source/tutorial.rst b/doc/source/tutorial.rst index 5530cedd6..4dcfad4aa 100644 --- a/doc/source/tutorial.rst +++ b/doc/source/tutorial.rst @@ -412,6 +412,20 @@ The special notion ``git.command(flag=True)`` will create a flag without value l If ``None`` is found in the arguments, it will be dropped silently. Lists and tuples passed as arguments will be unpacked recursively to individual arguments. Objects are converted to strings using the str(...) function. +Git Command Debugging and Customization +*************************************** + +Using environment variables, you can further adjust the behaviour of the git command. + +* **GIT_PYTHON_TRACE** + + * If set to non-0, all executed git commands will be printed to stdout. + * if set to *full*, the executed git command will be printed along with its output. + +* **GIT_PYTHON_GIT_EXECUTABLE** + + * If set, it should contain the full path to the git executable, e.g. *c:\\Program Files (x86)\\Git\\bin\\git.exe* on windows or */usr/bin/git* on linux. + And even more ... ***************** diff --git a/git/__init__.py b/git/__init__.py index 0658c3306..adc5487e6 100644 --- a/git/__init__.py +++ b/git/__init__.py @@ -14,13 +14,15 @@ #{ Initialization def _init_externals(): """Initialize external projects by putting them into the path""" - sys.path.append(os.path.join(os.path.dirname(__file__), 'ext', 'gitdb')) - - try: - import gitdb - except ImportError: - raise ImportError("'gitdb' could not be found in your PYTHONPATH") - #END verify import + ext_base = os.path.join(os.path.dirname(__file__), 'ext') + for package in ('async', 'smmap'): + sys.path.append(os.path.join(ext_base, package)) + try: + __import__(package) + except ImportError: + raise ImportError("%r could not be found in your PYTHONPATH" % package) + #END verify import + #END handle external import #} END initialization @@ -37,9 +39,9 @@ def _init_externals(): from git.exc import * from git.db import * from git.cmd import Git -from git.repo import Repo from git.remote import * from git.index import * +from git.repo import Repo from git.util import ( LockFile, BlockingLockFile, diff --git a/git/base.py b/git/base.py new file mode 100644 index 000000000..ff1062bf6 --- /dev/null +++ b/git/base.py @@ -0,0 +1,311 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Module with basic data structures - they are designed to be lightweight and fast""" +from util import ( + bin_to_hex, + zlib + ) + +from fun import ( + type_id_to_type_map, + type_to_type_id_map + ) + +__all__ = ('OInfo', 'OPackInfo', 'ODeltaPackInfo', + 'OStream', 'OPackStream', 'ODeltaPackStream', + 'IStream', 'InvalidOInfo', 'InvalidOStream' ) + +#{ ODB Bases + +class OInfo(tuple): + """Carries information about an object in an ODB, provding information + about the binary sha of the object, the type_string as well as the uncompressed size + in bytes. + + It can be accessed using tuple notation and using attribute access notation:: + + assert dbi[0] == dbi.binsha + assert dbi[1] == dbi.type + assert dbi[2] == dbi.size + + The type is designed to be as lighteight as possible.""" + __slots__ = tuple() + + def __new__(cls, sha, type, size): + return tuple.__new__(cls, (sha, type, size)) + + def __init__(self, *args): + tuple.__init__(self) + + #{ Interface + @property + def binsha(self): + """:return: our sha as binary, 20 bytes""" + return self[0] + + @property + def hexsha(self): + """:return: our sha, hex encoded, 40 bytes""" + return bin_to_hex(self[0]) + + @property + def type(self): + return self[1] + + @property + def type_id(self): + return type_to_type_id_map[self[1]] + + @property + def size(self): + return self[2] + #} END interface + + +class OPackInfo(tuple): + """As OInfo, but provides a type_id property to retrieve the numerical type id, and + does not include a sha. + + Additionally, the pack_offset is the absolute offset into the packfile at which + all object information is located. The data_offset property points to the abosolute + location in the pack at which that actual data stream can be found.""" + __slots__ = tuple() + + def __new__(cls, packoffset, type, size): + return tuple.__new__(cls, (packoffset,type, size)) + + def __init__(self, *args): + tuple.__init__(self) + + #{ Interface + + @property + def pack_offset(self): + return self[0] + + @property + def type(self): + return type_id_to_type_map[self[1]] + + @property + def type_id(self): + return self[1] + + @property + def size(self): + return self[2] + + #} END interface + + +class ODeltaPackInfo(OPackInfo): + """Adds delta specific information, + Either the 20 byte sha which points to some object in the database, + or the negative offset from the pack_offset, so that pack_offset - delta_info yields + the pack offset of the base object""" + __slots__ = tuple() + + def __new__(cls, packoffset, type, size, delta_info): + return tuple.__new__(cls, (packoffset, type, size, delta_info)) + + #{ Interface + @property + def delta_info(self): + return self[3] + #} END interface + + +class OStream(OInfo): + """Base for object streams retrieved from the database, providing additional + information about the stream. + Generally, ODB streams are read-only as objects are immutable""" + __slots__ = tuple() + + def __new__(cls, sha, type, size, stream, *args, **kwargs): + """Helps with the initialization of subclasses""" + return tuple.__new__(cls, (sha, type, size, stream)) + + + def __init__(self, *args, **kwargs): + tuple.__init__(self) + + #{ Stream Reader Interface + + def read(self, size=-1): + return self[3].read(size) + + @property + def stream(self): + return self[3] + + #} END stream reader interface + + +class ODeltaStream(OStream): + """Uses size info of its stream, delaying reads""" + + def __new__(cls, sha, type, size, stream, *args, **kwargs): + """Helps with the initialization of subclasses""" + return tuple.__new__(cls, (sha, type, size, stream)) + + #{ Stream Reader Interface + + @property + def size(self): + return self[3].size + + #} END stream reader interface + + +class OPackStream(OPackInfo): + """Next to pack object information, a stream outputting an undeltified base object + is provided""" + __slots__ = tuple() + + def __new__(cls, packoffset, type, size, stream, *args): + """Helps with the initialization of subclasses""" + return tuple.__new__(cls, (packoffset, type, size, stream)) + + #{ Stream Reader Interface + def read(self, size=-1): + return self[3].read(size) + + @property + def stream(self): + return self[3] + #} END stream reader interface + + +class ODeltaPackStream(ODeltaPackInfo): + """Provides a stream outputting the uncompressed offset delta information""" + __slots__ = tuple() + + def __new__(cls, packoffset, type, size, delta_info, stream): + return tuple.__new__(cls, (packoffset, type, size, delta_info, stream)) + + + #{ Stream Reader Interface + def read(self, size=-1): + return self[4].read(size) + + @property + def stream(self): + return self[4] + #} END stream reader interface + + +class IStream(list): + """Represents an input content stream to be fed into the ODB. It is mutable to allow + the ODB to record information about the operations outcome right in this instance. + + It provides interfaces for the OStream and a StreamReader to allow the instance + to blend in without prior conversion. + + The only method your content stream must support is 'read'""" + __slots__ = tuple() + + def __new__(cls, type, size, stream, sha=None): + return list.__new__(cls, (sha, type, size, stream, None)) + + def __init__(self, type, size, stream, sha=None): + list.__init__(self, (sha, type, size, stream, None)) + + #{ Interface + @property + def hexsha(self): + """:return: our sha, hex encoded, 40 bytes""" + return bin_to_hex(self[0]) + + def _error(self): + """:return: the error that occurred when processing the stream, or None""" + return self[4] + + def _set_error(self, exc): + """Set this input stream to the given exc, may be None to reset the error""" + self[4] = exc + + error = property(_error, _set_error) + + #} END interface + + #{ Stream Reader Interface + + def read(self, size=-1): + """Implements a simple stream reader interface, passing the read call on + to our internal stream""" + return self[3].read(size) + + #} END stream reader interface + + #{ interface + + def _set_binsha(self, binsha): + self[0] = binsha + + def _binsha(self): + return self[0] + + binsha = property(_binsha, _set_binsha) + + + def _type(self): + return self[1] + + def _set_type(self, type): + self[1] = type + + type = property(_type, _set_type) + + def _size(self): + return self[2] + + def _set_size(self, size): + self[2] = size + + size = property(_size, _set_size) + + def _stream(self): + return self[3] + + def _set_stream(self, stream): + self[3] = stream + + stream = property(_stream, _set_stream) + + #} END odb info interface + + +class InvalidOInfo(tuple): + """Carries information about a sha identifying an object which is invalid in + the queried database. The exception attribute provides more information about + the cause of the issue""" + __slots__ = tuple() + + def __new__(cls, sha, exc): + return tuple.__new__(cls, (sha, exc)) + + def __init__(self, sha, exc): + tuple.__init__(self, (sha, exc)) + + @property + def binsha(self): + return self[0] + + @property + def hexsha(self): + return bin_to_hex(self[0]) + + @property + def error(self): + """:return: exception instance explaining the failure""" + return self[1] + + +class InvalidOStream(InvalidOInfo): + """Carries information about an invalid ODB stream""" + __slots__ = tuple() + +#} END ODB Bases + diff --git a/git/cmd.py b/git/cmd.py index 60887f5da..576a5300a 100644 --- a/git/cmd.py +++ b/git/cmd.py @@ -5,7 +5,10 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php import os, sys -from util import * +from util import ( + LazyMixin, + stream_copy + ) from exc import GitCommandError from subprocess import ( @@ -14,9 +17,6 @@ PIPE ) -# Enables debugging of GitPython's git commands -GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False) - execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output', 'with_exceptions', 'as_process', 'output_stream' ) @@ -26,7 +26,8 @@ def dashify(string): return string.replace('_', '-') -class Git(object): + +class Git(LazyMixin): """ The Git class manages communication with the Git binary. @@ -41,12 +42,23 @@ class Git(object): of the command to stdout. Set its value to 'full' to see details about the returned values. """ - __slots__ = ("_working_dir", "cat_file_all", "cat_file_header") + __slots__ = ("_working_dir", "cat_file_all", "cat_file_header", "_version_info") # CONFIGURATION # The size in bytes read from stdout when copying git's output to another stream max_chunk_size = 1024*64 + git_exec_name = "git" # default that should work on linux and windows + git_exec_name_win = "git.cmd" # alternate command name, windows only + + # Enables debugging of GitPython's git commands + GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False) + + # Provide the full path to the git executable. Otherwise it assumes git is in the path + _git_exec_env_var = "GIT_PYTHON_GIT_EXECUTABLE" + GIT_PYTHON_GIT_EXECUTABLE = os.environ.get(_git_exec_env_var, git_exec_name) + + class AutoInterrupt(object): """Kill/Interrupt the stored process instance once this instance goes out of scope. It is used to prevent processes piling up in case iterators stop reading. @@ -214,14 +226,32 @@ def __getattr__(self, name): """A convenience method as it allows to call the command as if it was an object. :return: Callable object that will execute call _call_process with your arguments.""" - if name[:1] == '_': - raise AttributeError(name) + if name[0] == '_': + return LazyMixin.__getattr__(self, name) return lambda *args, **kwargs: self._call_process(name, *args, **kwargs) + def _set_cache_(self, attr): + if attr == '_version_info': + # We only use the first 4 numbers, as everthing else could be strings in fact (on windows) + version_numbers = self._call_process('version').split(' ')[2] + self._version_info = tuple(int(n) for n in version_numbers.split('.')[:4]) + else: + super(Git, self)._set_cache_(attr) + #END handle version info + + @property def working_dir(self): """:return: Git directory we are working on""" return self._working_dir + + @property + def version_info(self): + """ + :return: tuple(int, int, int, int) tuple with integers representing the major, minor + and additional version numbers as parsed from git version. + This value is generated on demand and is cached""" + return self._version_info def execute(self, command, istream=None, @@ -290,7 +320,7 @@ def execute(self, command, :note: If you add additional keyword arguments to the signature of this method, you must update the execute_kwargs tuple housed in this module.""" - if GIT_PYTHON_TRACE and not GIT_PYTHON_TRACE == 'full': + if self.GIT_PYTHON_TRACE and not self.GIT_PYTHON_TRACE == 'full': print ' '.join(command) # Allow the user to have the command executed in their working dir. @@ -337,7 +367,7 @@ def execute(self, command, proc.stdout.close() proc.stderr.close() - if GIT_PYTHON_TRACE == 'full': + if self.GIT_PYTHON_TRACE == 'full': cmdstr = " ".join(command) if stderr_value: print "%s -> %d; stdout: '%s'; stderr: '%s'" % (cmdstr, status, stdout_value, stderr_value) @@ -423,11 +453,40 @@ def _call_process(self, method, *args, **kwargs): ext_args = self.__unpack_args([a for a in args if a is not None]) args = opt_args + ext_args - - call = ["git", dashify(method)] - call.extend(args) - - return self.execute(call, **_kwargs) + + def make_call(): + call = [self.GIT_PYTHON_GIT_EXECUTABLE, dashify(method)] + call.extend(args) + return call + #END utility to recreate call after changes + + if sys.platform == 'win32': + try: + try: + return self.execute(make_call(), **_kwargs) + except WindowsError: + # did we switch to git.cmd already, or was it changed from default ? permanently fail + if self.GIT_PYTHON_GIT_EXECUTABLE != self.git_exec_name: + raise + #END handle overridden variable + type(self).GIT_PYTHON_GIT_EXECUTABLE = self.git_exec_name_win + call = [self.GIT_PYTHON_GIT_EXECUTABLE] + list(args) + + try: + return self.execute(make_call(), **_kwargs) + finally: + import warnings + msg = "WARNING: Automatically switched to use git.cmd as git executable, which reduces performance by ~70%." + msg += "Its recommended to put git.exe into the PATH or to set the %s environment variable to the executable's location" % self._git_exec_env_var + warnings.warn(msg) + #END print of warning + #END catch first failure + except WindowsError: + raise WindowsError("The system cannot find or execute the file at %r" % self.GIT_PYTHON_GIT_EXECUTABLE) + #END provide better error message + else: + return self.execute(make_call(), **_kwargs) + #END handle windows default installation def _parse_object_header(self, header_line): """ diff --git a/git/config.py b/git/config.py index f1a8832e1..c71bb8ca4 100644 --- a/git/config.py +++ b/git/config.py @@ -120,11 +120,12 @@ class GitConfigParser(cp.RawConfigParser, object): # They must be compatible to the LockFile interface. # A suitable alternative would be the BlockingLockFile t_lock = LockFile + re_comment = re.compile('^\s*[#;]') #} END configuration OPTCRE = re.compile( - r'\s?(?P