# -*- coding: iso-8859-1 -*-
#
# Copyright (C) 2005 Edgewall Software
# Copyright (C) 2005,2006 Lele Gaifax <lele@metapensiero.it>
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://trac.edgewall.com/license.html.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://projects.edgewall.com/trac/.
#
# Author: Lele Gaifax <lele@metapensiero.it>

from __future__ import generators

# Python 2.3+ compatibility
try:
    reversed = reversed
except:
    def reversed(x):
        if hasattr(x, 'keys'):
            raise ValueError("mappings do not support reverse iteration")
        i = len(x)
        while i > 0:
            i -= 1
            yield x[i]

from os import listdir, makedirs, utime, stat
from os.path import join, isdir, split, exists
from time import gmtime, mktime, strftime, timezone
from shutil import copyfile

from trac.util import TracError, NaivePopen
from trac.versioncontrol import Repository

from tracdarcs.node import Node, DarcsNode
from tracdarcs.changeset import Changeset, DarcsChangeset, \
                                changesets_from_darcschanges
from tracdarcs.cache import DarcsCachedChangeset

class DarcsRepository(Repository):
    """
    Darcs concrete implementation of a Repository.

    Darcs (http://www.abridgegame.org/darcs/) is a distribuited SCM,
    patch-centric instead of snapshot-centric like Subversion. The
    approach here is to assume that the history in the repository
    followed by Trac is immutable, which is not a requirement for the
    underlaying darcs. This means that on that repository should never
    be executed a `darcs unpull`, or `darcs unrecord` and the like.

    Given it's nature, this class tries to cache as much as possible
    the requests coming from the above Trac machinery, to avoid or
    minimize the external calls to Darcs that tend to be somewhat time
    expensive.  In particular, _getNodeContent() caches any particular
    version of any file Trac asks to it, kept on the filesystem,
    inside the ``_darcs`` metadir of the controlled repository in the
    directory ``trac_cache`` (or where specified by the option
    ``cachedir`` in the section ``darcs`` of the configuration.  This
    may require some kind of control over the size of the cache, that
    OTOH may be completely removed whenever needed, it will be
    recreated as soon as the first request come in.
    """

    def __init__(self, db, path, log, config):
        Repository.__init__(self, path, None, log)
        self.path = path
        self.db = db
        self.__youngest_rev = 0
        self.__history = None
        self.__history_start = 0
        self.__no_pristine = exists(join(self.path, '_darcs', 'current.none'))
        self.__darcs = config.get('darcs', 'command', 'darcs')
        self.__encoding = config.get('darcs', 'encoding', None)
        if config.get('darcs', 'dont_escape_8bit'):
            self.__darcs = "DARCS_DONT_ESCAPE_8BIT=1 " + self.__darcs
        self.__cachedir = config.get('darcs', 'cachedir',
                                     join(self.path, '_darcs', 'trac_cache'))

    ## Low level stuff: CamelCase is used to avoid clash with inherited
    ## interface namespace.

    def _darcs (self, command, args=''):
        """
        Execute a some query on the repository running an external darcs.

        Return the XML output of the command.
        """

        command = "cd %r; TZ=UTC %s %s %s" % \
                  (self.path, self.__darcs, command, args)
        self.log.debug(command)
        np = NaivePopen (command, capturestderr=True)
        if np.errorlevel:
            err = 'Running (%s) failed: %s, %s: %s' % (command,
                                                       np.errorlevel,
                                                       np.err, np.out)
            raise TracError (err, title='Darcs execution failed')
        return np.out

    def _changes(self, args, startfrom=1):
        """
        Get changes information parsing the XML output of ``darcs changes``.

        Return a sequence of Changesets.
        """

        return changesets_from_darcschanges(
            self._darcs('changes --reverse --summary --xml', args),
            self, startfrom, force_encoding=self.__encoding)

    def _diff(self, path, rev1, rev2=None, patch=None):
        """
        Return a darcs diff between two revisions.
        """

        diff = "diff --unified"
        rev1 = self.normalize_rev(rev1)
        cset1 = DarcsCachedChangeset(rev1, self.db)
        diff += " --from-match 'hash %s'" % cset1.hash
        if rev2:
            rev2 = self.normalize_rev(rev2)
            cset2 = DarcsCachedChangeset(rev2, self.db)
            diff += "  --to-match 'hash %s'" % cset2.hash
        if patch:
            path = path + patch
        return self._darcs(diff, path)

    def _parseInventory(self, inventory):
        """
        Parse an inventory, and return a dictionary of its content.
        """

        from sha import new

        csmap = {}
        start = 0
        length = len(inventory)
        index = self.__youngest_rev
        while start<length:
            start = inventory.find('[', start)
            if start<0:
                break
            end = inventory.index('\n', start)
            patchname = inventory[start+1:end]
            start = end
            end = inventory.index('*', start)
            author = inventory[start+1:end]
            inv = inventory[end+1] == '*' and 'f' or 't'
            date = inventory[end+2:end+16]
            y = int(date[:4])
            m = int(date[4:6])
            d = int(date[6:8])
            hh = int(date[8:10])
            mm = int(date[10:12])
            ss = int(date[12:14])
            unixtime = int(mktime((y, m, d, hh, mm, ss, 0, 0, 0)))-timezone
            end += 16
            log = ''
            if inventory[end] <> ']':
                start = end
                end = inventory.index('\n', start+1)
                while True:
                    log += inventory[start+2:end]
                    start = end
                    if inventory[end+1] == ']':
                        break
                    end = inventory.index('\n', start+1)
                end += 1

            index += 1
            phash = new()
            phash.update(patchname)
            phash.update(author)
            phash.update(date)
            phash.update(log)
            phash.update(inv)
            patchid = '%s-%s-%s.gz' % (date,
                                       new(author).hexdigest()[:5],
                                       phash.hexdigest())
            csmap[index] = patchid
            csmap[patchid] = index
            start = end+1
        self.__youngest_rev = index
        return csmap

    def _loadChangesetsIndex(self):
        """
        Load the index of changesets in the repository, assigning an unique
        integer number to each one, ala Subversion, for easier references.

        This is done by parsing the ``_darcs/inventory`` file using the
        position of each changeset as its `revision number`, **assuming**
        that **nobody's never** going to do anything that alters its order,
        such as ``darcs optimize`` or ``darcs unpull``.
        """

        self.__youngest_rev = 0
        inventories = [join(self.path, '_darcs', 'inventories', i) for i
                       in listdir(join(self.path, '_darcs', 'inventories'))]
        inventories.sort()
        inventories.append(join(self.path, '_darcs', 'inventory'))
        for inventory in inventories:
            f = open(inventory, 'rU')
            try:
                index = self._parseInventory(f.read())
            finally:
                f.close()

    ## Medium level, work-horse methods

    def _getCachedContentLocation(self, node):
        """
        Return the location of the cache for the given node. If it does
        not exist, compute it by applying a diff to the current version.
        This may return None, if the node does not actually exist.
        """

        rev = self.normalize_rev(node.rev)

        if self.__no_pristine:
            current = join(self.path, node.path)
        else:
            current = join(self.path, '_darcs', 'current', node.path)

        # Iterate over history to find out which is the revision of
        # the given path that last changed the it. We need to find
        # both a 'last revision' and 'second last', because later
        # we may apply either a r1:last diff or a 2nd:current diff.
        history = self._getPathHistory(node.path, None)
        try:
            lastnode = history.next()
        except StopIteration:
            lastnode = None

        if lastnode is None:
            return None
        elif lastnode.rev <= rev:
            # Content hasn't changed, return current version
            if exists(current):
                return current

        prevlast = lastnode
        for oldnode in history:
            if oldnode.rev <= rev:
                lastnode = oldnode
                break
            prevlast = oldnode

        cachedir = join(self.__cachedir, str(lastnode.rev))
        cache = join(cachedir, lastnode.path)

        # One may never know: should by any chance an absolute path survived
        # in lastnode.path, or in some clever way introduced some trick like
        # 'somepath/../../../etc/passwd'...
        from os.path import normpath
        assert normpath(cache).startswith(cachedir)

        if not exists(cache):
            self.log.debug('Caching revision %d of %s' % (lastnode.rev,
                                                          lastnode.path))
            dir = split(cache)[0]
            if not exists(dir):
                makedirs(dir)

            # If the file doesn't current exist, create an empty file
            # and apply a patch from revision 1 to the node revision,
            # otherwise apply a reversed patch from the current revision
            # and to node revision+1.
            try:
                if not exists(current):
                    self.log.debug('Applying a direct patch from revision 1 up'
                                   ' to %d to %s' % (lastnode.rev, node.path))
                    open(cache, 'w').close()
                    patch = "| patch -p1 -d %s" % cachedir
                    self._diff(node.path, 1, lastnode.rev, patch=patch)
                else:
                    self.log.debug('Applying a reverse patch from current'
                                   ' revision back to %d to %s' %
                                   (lastnode.rev, node.path))
                    copyfile(current, cache)
                    patch = "| patch -p1 -R -d %s" % cachedir
                    self._diff(node.path, prevlast.rev, patch=patch)
            except TracError, exc:
                if 'Only garbage was found in the patch input' in exc.message:
                    pass
                else:
                    raise

            # Adjust the times of the just created cache file, to match
            # the timestamp of the associated changeset.
            cursor = self.db.cursor()
            cursor.execute("SELECT time FROM revision "
                           "WHERE rev = %s", (lastnode.rev,))
            cstimestamp = int(cursor.fetchone()[0])
            utime(cache, (cstimestamp, cstimestamp))

        if exists(cache):
            return cache
        else:
            return None

    def _getNodeContent(self, node):
        """
        Return the content of the node, loading it from the cache.
        """

        from cStringIO import StringIO

        location = self._getCachedContentLocation(node)
        if location:
            return file(location)
        else:
            return StringIO('')

    def _getNodeSize(self, node):
        """
        Return the content of the node, loading it from the cache.
        """

        location = self._getCachedContentLocation(node)
        if location:
            return stat(location).st_size
        else:
            return None

    def _getNodeEntries(self, node):
        """
        Generate the the immediate child entries of a directory at given
        revision, in alpha order.
        """

        from trac.versioncontrol.cache import _actionmap

        # Loop over nodes touched before given rev that falls in the
        # given path. We effectively want to look at the whole subtree,
        # because when a child is a directory we annotate it with the
        # latest change happened below that, instead with the revision
        # that actually touched the directory itself.

        cursor = self.db.cursor()
        path = node.path.strip('/')
        cursor.execute("SELECT rev, path, change_type, base_path, base_rev "
                       "  FROM node_change "
                       "WHERE ((LENGTH(rev)<LENGTH(%s)) OR "
                       "       (LENGTH(rev)=LENGTH(%s) AND rev<=%s)) "
                       "  AND (path GLOB %s OR base_path GLOB %s) "
                       "ORDER BY -LENGTH(rev),rev DESC,path ",
                       (node.rev, node.rev, node.rev, path+'*', path+'*'))

        if path:
            path += '/'

        done = {}
        for entry in cursor:
            rev, subtreepath, change, oldpath, oldrev = entry
            if oldpath:
                # This is a move/copy: it may be either a new item
                # (something copied/moved to the directory we are
                # listing) or a desaparecidos (something that was
                # there and moved away).
                if oldpath.startswith(path):
                    subpath = oldpath[len(path):].lstrip('/')
                else:
                    subpath = subtreepath[len(path):].lstrip('/')
            elif subtreepath.startswith(path):
                subpath = subtreepath[len(path):].lstrip('/')
            else:
                # The GLOB above may return either entries in the
                # directory or entries in other directories (usually
                # the one that contains the folder we are listing)
                # that share a common chunk of the name with the
                # directory itself.
                continue

            if '/' in subpath:
                child, rest = subpath.split('/', 1)
            else:
                child, rest = subpath, None

            if not child or child in done:
                if oldpath and not oldpath.startswith(path):
                    # Ok, it's something new: recursively list its
                    # children, adjusting their paths
                    oldnode = self.get_node(oldpath, oldrev)
                    for newnode in self._getNodeEntries(oldnode):
                        newnode.path = subtreepath + newnode.path[len(oldpath):]
                        yield newnode
                continue

            done[child] = True

            # Return the node only if the entry is not a direct child
            # of the directory, otherwise only if it's not a deletion
            # or a rename.
            if rest or not ((_actionmap[change]==Changeset.MOVE
                             and oldpath and oldpath.startswith(path)) or
                            _actionmap[change]==Changeset.DELETE):
                yield self.get_node(path + child, rev)

    def _getNodeLastModified(self, node):
        """
        Return the timestamp of the last modification to the given node.
        """

        location = self._getCachedContentLocation(node)
        if location:
            return stat(location).st_mtime
        else:
            return 0

    def _getPathHistory(self, path, rev=None, limit=None):
        """
        Iterate over the nodes that compose the history of the given
        path not newer than rev.
        """

        from trac.versioncontrol.cache import _kindmap, _actionmap

        rev = self.normalize_rev(rev)

        # Start with the concrete history, if present
        if self.__history and rev>self.__history_start:
            node = None
            for cs in reversed(self.__history):
                if cs.rev > rev:
                    continue
                node = cs.get_node(path)
                if node:
                    yield node
                    if limit:
                        limit -= 1
                        if limit==0:
                            break
                    # Expand renames
                    if node.change == Changeset.MOVE:
                        for node in self._getPathHistory(node.ancestor_path,
                                                         node.ancestor_rev,
                                                         limit):
                            yield node
                            if limit:
                                limit -= 1
            if node is not None:
                rev = node.rev-1

        # Keep going with the cache stored in the DB
        kind = self._getNodeKind(path, rev)
        cursor = self.db.cursor()

        path = path.rstrip('/')
        if kind == Node.DIRECTORY:
            revdone = {}
            sql = "SELECT path,rev,node_type,change_type,base_path,base_rev " \
                  "FROM node_change " \
                  "WHERE ((LENGTH(rev)<LENGTH(%s)) OR " \
                  "       (LENGTH(rev)=LENGTH(%s) AND rev<=%s)) "
            if path:
                sql += "  AND (path=%s OR path GLOB %s) "
            sql += "ORDER BY -LENGTH(rev),rev DESC "
            if path:
                cursor.execute(sql, (rev, rev, rev, path, path+'/*'))
            else:
                cursor.execute(sql, (rev, rev, rev))
            for row in cursor:
                path, rev, kind, change, base_path, base_rev = row
                if not rev in revdone:
                    revdone[rev] = True
                    node = DarcsNode(path, rev, _kindmap[kind],
                                     _actionmap[change], self,
                                     base_path, base_rev)
                    yield node
                    if limit:
                        limit -= 1
                        if limit==0:
                            break
        else:
            while rev>=1:
                cursor.execute("SELECT node_type,change_type,"
                               "       base_path,base_rev "
                               "FROM node_change WHERE rev=%s AND path=%s",
                               (rev, path))
                base_rev = None
                for row in cursor:
                    kind, change, base_path, base_rev = row
                    node = DarcsNode(path, rev, _kindmap[kind],
                                     _actionmap[change], self,
                                     base_path, base_rev)
                    yield node
                    if limit:
                        limit -= 1
                        if limit==0:
                            break
                    base_rev = base_rev and int(base_rev) or 0
                    # Expand renames
                    if node.change == Changeset.MOVE:
                        for node in self._getPathHistory(base_path,
                                                         base_rev, limit):
                            yield node
                            if limit:
                                limit -= 1

                if base_rev is None:
                    rev -= 1
                else:
                    rev = base_rev

    def _getNodeKind(self, path, rev):
        """
        Determine the kind of the path at given revision.
        """

        # Determine if the path is really a directory, except when it's
        # already known, that is when its name ends with a slash (a fake
        # one introduced by changesets_from_darcschanges()).
        if not path.endswith("/"):
            cursor = self.db.cursor()
            cursor.execute("SELECT path "
                           "FROM node_change "
                           "WHERE ((LENGTH(rev)<LENGTH(%s)) OR "
                           "       (LENGTH(rev)=LENGTH(%s) AND rev<=%s)) "
                           "  AND path=%s "
                           "ORDER BY -LENGTH(rev),rev DESC "
                           "LIMIT 1", (rev, rev, rev, path+'/'))
            if cursor.fetchone():
                kind = Node.DIRECTORY
            else:
                kind = Node.FILE
        else:
            kind = Node.DIRECTORY
        return kind

    ## Interface API

    def close(self):
        """
        Close the connection to the repository.

        Darcs: no-op.
        """

    def get_changeset(self, rev):
        """
        Retrieve a Changeset object that describes the changes made in
        revision 'rev'.
        """

        if not self.__history:
            youngest = self.normalize_rev(self.youngest_rev)
            youngest_cache = self.get_youngest_rev_in_cache(self.db) or '0'
            self.__history_start = int(youngest_cache)
            self.__history = self._changes('--last=%d' %
                                           (youngest - self.__history_start,),
                                           self.__history_start+1)
        rev = self.normalize_rev(rev)
        return self.__history[rev-self.__history_start-1]

    def get_node(self, path, rev=None):
        """
        Retrieve a Node (directory or file) from the repository at the
        given path. If the rev parameter is specified, the version of the
        node at that revision is returned, otherwise the latest version
        of the node is returned.
        """

        rev = self.normalize_rev(rev)
        path = self.normalize_path(path)

        if path == '/':
            node = DarcsNode('/', rev, Node.DIRECTORY, None, self,
                             '/', self.previous_rev(rev))
            return node

        kind = self._getNodeKind(path, rev)

        if kind == Node.DIRECTORY:
            cursor = self.db.cursor()
            path = path.rstrip('/')
            cursor.execute("SELECT rev, change_type "
                           "FROM node_change "
                           "WHERE ((LENGTH(rev)<LENGTH(%s)) OR "
                           "       (LENGTH(rev)=LENGTH(%s) AND rev<=%s)) "
                           "  AND (path=%s OR path GLOB %s) "
                           "ORDER BY -LENGTH(rev),rev DESC "
                           "LIMIT 1", (rev, rev, rev, path, path+'/*'))
            lastchange = cursor.fetchone()
            if lastchange:
                rev, change = lastchange
            else:
                raise TracError, "No node at %r in revision %s" % (path, rev)
            node = DarcsNode(path, rev, Node.DIRECTORY, change, self,
                             path, self.previous_rev(rev))
        else:
            history = self._getPathHistory(path, rev, limit=1)
            try:
                node = history.next()
            except StopIteration:
                raise TracError, "No node at %r in revision %s" % (path, rev)

        return node

    def get_oldest_rev(self):
        """
        Return the oldest revision stored in the repository.
        """

        # If the repository is empty, return None
        if not self.__youngest_rev:
            return None
        return 1

    def get_youngest_rev(self):
        """
        Return the youngest revision in the repository.
        """

        if not self.__youngest_rev:
            self._loadChangesetsIndex()
        return self.__youngest_rev

    def previous_rev(self, rev):
        """
        Return the revision immediately preceding the specified revision.
        """

        rev = self.normalize_rev(rev)
        if rev == 1:
            return None
        else:
            return rev-1

    def next_rev(self, rev, path=''):
        """
        Return the revision immediately following the specified revision.
        """

        rev = self.normalize_rev(rev)
        if rev < self.get_youngest_rev():
            return rev+1
        else:
            return None

    def rev_older_than(self, rev1, rev2):
        """
        Return True if rev1 is older than rev2, i.e. if rev1 comes before rev2
        in the revision sequence.
        """

        return self.normalize_rev(rev1) < self.normalize_rev(rev2)

    def get_youngest_rev_in_cache(self, db):
        """
        Return the youngest revision currently cached.

        For darcs, this is the last applied revision, not necessarily
        the youngest one. We are numbering darcs patches in order of
        application.
        """

        cursor = db.cursor()
        cursor.execute("SELECT rev "
                       "FROM revision "
                       "ORDER BY -LENGTH(rev),rev DESC " # rev is a string
                       "LIMIT 1")                        # in the database
        row = cursor.fetchone()
        return row and row[0] or None

    def get_path_history(self, path, rev=None, limit=None):
        """
        Retrieve all the revisions containing this path (no newer than 'rev').
        The result format should be the same as the one of Node.get_history()
        """

        path = self.normalize_path(path)
        rev = self.normalize_rev(rev)
        for node in self._getPathHistory(path, rev, limit):
            yield (node.path, node.rev, node.change)

    def normalize_path(self, path):
        """
        Return a canonical representation of path in the repos.
        """

        return path and path.strip('/') or '/'

    def normalize_rev(self, rev):
        """
        Return a canonical representation of a revision in the repos.
        'None' is a valid revision value and represents the youngest revision.
        """

        try:
            rev = int(rev)
        except (ValueError, TypeError):
            rev = None
        if rev is None:
            rev = self.get_youngest_rev()
        elif rev > self.get_youngest_rev():
            rev = self.get_youngest_rev()
        return rev

    def get_changes(self, old_path, old_rev, new_path, new_rev,
                    ignore_ancestry=1):
        """
        Generator that yields change tuples (old_node, new_node, kind, change)
        for each node change between the two arbitrary (path,rev) pairs.

        The old_node is assumed to be None when the change is an ADD,
        the new_node is assumed to be None when the change is a DELETE.
        """

        from trac.versioncontrol.cache import _actionmap

        old_node = new_node = None
        old_rev = self.normalize_rev(old_rev)
        new_rev = self.normalize_rev(new_rev)
        if self.has_node(old_path, old_rev):
            old_node = self.get_node(old_path, old_rev)
        else:
            raise TracError, ('The Base for Diff is invalid: path %s'
                              ' doesn\'t exist in revision %s' \
                              % (old_path, old_rev))
        if self.has_node(new_path, new_rev):
            new_node = self.get_node(new_path, new_rev)
        else:
            raise TracError, ('The Target for Diff is invalid: path %s'
                              ' doesn\'t exist in revision %s' \
                              % (new_path, new_rev))
        if new_node.kind != old_node.kind:
            raise TracError, ('Diff mismatch: Base is a %s (%s in revision %s) '
                              'and Target is a %s (%s in revision %s).' \
                              % (old_node.kind, old_path, old_rev,
                                 new_node.kind, new_path, new_rev))

        if new_node.isdir:
            old_path = self.normalize_path(old_path)
            new_path = self.normalize_path(new_path)

            cursor = self.db.cursor()
            sql = "SELECT path,rev,change_type,base_path,base_rev " \
                  "FROM node_change " \
                  "WHERE ((LENGTH(rev)<LENGTH(%s)) OR " \
                  "       (LENGTH(rev)=LENGTH(%s) AND rev<=%s)) " \
                  "  AND ((LENGTH(rev)>LENGTH(%s)) OR " \
                  "       (LENGTH(rev)=LENGTH(%s) AND rev>=%s)) " \
                  "ORDER BY -LENGTH(rev),rev "
            cursor.execute(sql, (new_rev, new_rev, new_rev,
                                 old_rev, old_rev, old_rev))
            for row in cursor:
                path, rev, change, base_path, base_rev = row
                if path.startswith(new_path) or path.startswith(old_path):
                    if change==Changeset.ADD:
                        old_node = None
                    else:
                        old_node = self.get_node(base_path or path, base_rev)
                    if change==Changeset.DELETE:
                        new_node = None
                        kind = old_node.kind
                    else:
                        new_node = self.get_node(path, rev)
                        kind = new_node.kind
                    yield (old_node, new_node,
                           kind, _actionmap[change])
        else:
            if old_node.rev <> new_node.rev:
                yield (old_node, new_node, Node.FILE, Changeset.EDIT)