source: trunk/moap/vcs/vcs.py @ 403

Revision 403, 17.0 KB checked in by thomas, 4 years ago (diff)
  • moap/test/test_vcs_svn.py:
  • moap/vcs/vcs.py: Fix for Python 2.3 by avoiding extractall, and working around a hardlinking bug in tarfile in python 2.3
  • Property property set to property
Line 
1# -*- Mode: Python -*-
2# vi:si:et:sw=4:sts=4:ts=4
3
4"""
5Version Control System functionality.
6"""
7
8import re
9import os
10import sys
11import glob
12import tarfile
13import tempfile
14import commands
15
16from moap.util import util, log
17
18def getNames():
19    """
20    Returns a sorted list of VCS names that moap can work with.
21    """
22    moduleNames = util.getPackageModules('moap.vcs', ignore=['vcs', ])
23    modules = [util.namedModule('moap.vcs.%s' % s) for s in moduleNames]
24    names = [m.VCSClass.name for m in modules]
25    names.sort()
26    return names
27
28def detect(path=None):
29    """
30    Detect which version control system is being used in the source tree.
31
32    @returns: an instance of a subclass of L{VCS}, or None.
33    """
34    log.debug('vcs', 'detecting VCS in %s' % path)
35    if not path:
36        path = os.getcwd()
37    systems = util.getPackageModules('moap.vcs', ignore=['vcs', ])
38    log.debug('vcs', 'trying vcs modules %r' % systems)
39
40    for s in systems:
41        m = util.namedModule('moap.vcs.%s' % s)
42
43        try:
44            ret = m.detect(path)
45        except AttributeError:
46                sys.stderr.write('moap.vcs.%s is missing detect()\n' % s)
47                continue
48
49        if ret:
50            try:
51                o = m.VCSClass(path)
52            except AttributeError:
53                sys.stderr.write('moap.vcs.%s is missing VCSClass()\n' % s)
54                continue
55
56            log.debug('vcs', 'detected VCS %s' % s)
57
58            return o
59        log.debug('vcs', 'did not find %s' % s)
60
61    return None
62   
63# FIXME: add stdout and stderr, so all spawned commands output there instead
64class VCS(log.Loggable):
65    """
66    @ivar path: the path to the top of the source tree
67    @ivar meta: paths that contain VCS metadata
68    @type meta: list of str
69    """
70    name = 'Some Version Control System'
71    logCategory = 'VCS'
72
73    path = None
74    meta = None
75
76    def __init__(self, path=None):
77        self.path = path
78        if not path:
79            self.path = os.getcwd()
80
81    def getAdded(self, path):
82        """
83        Get a list of paths newly added under the given path and relative to it.
84
85        @param path: the path under which to check for files
86        @type  path: str
87
88        @returns: list of paths
89        @rtype:   list of str
90        """
91        log.info('vcs', 
92            "subclass %r should implement getAdded" % self.__class__)
93
94    def getDeleted(self, path):
95        """
96        Get a list of deleted paths under the given path and relative to it.
97
98        @param path: the path under which to check for files
99        @type  path: str
100
101        @returns: list of paths
102        @rtype:   list of str
103        """
104        log.info('vcs', 
105            "subclass %r should implement getDeleted" % self.__class__)
106
107    def getIgnored(self, path):
108        """
109        Get a list of ignored paths under the given path and relative to it.
110
111        @param path: the path under which to check for files
112        @type  path: str
113
114        @returns: list of paths
115        @rtype:   list of str
116        """
117        raise NotImplementedError, \
118            'subclass %s should implement getIgnored' % self.__class__
119
120    def getUnknown(self, path):
121        """
122        Get a list of unknown paths under the given path and relative to it.
123
124        @param path: the path under which to check for files
125        @type  path: str
126
127        @returns: list of paths
128        @rtype:   list of str
129        """
130        raise NotImplementedError, \
131            'subclass %s should implement getUnknown' % self.__class__
132
133    def ignore(self, paths, commit=True):
134        """
135        Make the VCS ignore the given list of paths.
136
137        @param paths:  list of paths, relative to the checkout directory
138        @type  paths:  list of str
139        @param commit: if True, commit the ignore updates.
140        @type  commit: boolean
141        """
142        raise NotImplementedError, \
143            'subclass %s should implement ignore' % self.__class__
144
145    def commit(self, paths, message):
146        """
147        Commit the given list of paths, with the given message.
148        Note that depending on the VCS, parents that were just added
149        may need to be commited as well.
150
151        @type paths:   list
152        @type message: str
153
154        @rtype: bool
155        """
156
157    def createTree(self, paths):
158        """
159        Given the list of paths, create a dict of parentPath -> [child, ...]
160        If the path is in the root of the repository, parentPath will be ''
161
162        @rtype: dict of str -> list of str
163        """
164        result = {}
165
166        if not paths:
167            return result
168
169        for p in paths:
170            # os.path.basename('test/') returns '', so strip possibly trailing /
171            if p.endswith(os.path.sep): p = p[:-1]
172            base = os.path.basename(p)
173            dirname = os.path.dirname(p)
174            if not dirname in result.keys():
175                result[dirname] = []
176            result[dirname].append(base)
177
178        return result
179
180    def diff(self, path):
181        """
182        Return a diff for the given path.
183
184        The diff should not end in a newline; an empty diff should
185        be an empty string.
186
187        The diff should also be relative to the working directory; no
188        absolute paths.
189
190        @rtype:   str
191        @returns: the diff
192        """
193        raise NotImplementedError, \
194            'subclass %s should implement diff' % self.__class__
195
196    def getFileMatcher(self):
197        """
198        Return an re matcher object that will expand to the file being
199        changed.
200
201        The default implementation works for CVS and SVN.
202        """
203        return re.compile('^Index: (\S+)$')
204
205    def getChanges(self, path, diff=None):
206        """
207        Get a list of changes for the given path and subpaths.
208
209        @type  diff: str
210        @param diff: the diff to use instead of a local vcs diff
211                     (only useful for testing)
212
213        @returns: dict of path -> list of (oldLine, oldCount, newLine, newCount)
214        """
215        if not diff:
216            self.debug('getting changes from diff in %s' % path)
217            diff = self.diff(path)
218
219        changes = {}
220        fileMatcher = self.getFileMatcher()
221
222        # cvs diff can put a function name after the final @@ pair
223        # svn diff on a one-line change in a one-line file looks like this:
224        # @@ -1 +1 @@
225        changeMatcher = re.compile(
226            '^\@\@\s+'         # start of line
227            '(-)(\d+),?(\d*)'  # -x,y or -x
228            '\s+'
229            '(\+)(\d+),?(\d*)'
230            '\s+\@\@'          # end of line
231        )
232        # We rstrip so that we don't end up with a dangling '' line
233        lines = diff.rstrip('\n').split("\n")
234        self.debug('diff is %d lines' % len(lines))
235        for i in range(len(lines)):
236            fm = fileMatcher.search(lines[i])
237            if fm:
238                # found a file being diffed, now get changes
239                path = fm.expand('\\1')
240                self.debug('Found file %s with deltas on line %d' % (
241                    path, i + 1))
242                changes[path] = []
243                i += 1
244                while i < len(lines) and not fileMatcher.search(lines[i]):
245                    self.log('Looking at line %d for file match' % (i + 1))
246                    m = changeMatcher.search(lines[i])
247                    if m:
248                        self.debug('Found change on line %d' % (i + 1))
249                        oldLine = int(m.expand('\\2'))
250                        # oldCount can be missing, which means it's 1
251                        c = m.expand('\\3')
252                        if not c: c = '1'
253                        oldCount = int(c)
254                        newLine = int(m.expand('\\5'))
255                        c = m.expand('\\6')
256                        if not c: c = '1'
257                        newCount = int(c)
258                        i += 1
259
260                        # the diff has 3 lines of context by default
261                        # if a line was added/removed at the beginning or end,
262                        # that context is not always there
263                        # so we need to parse each non-changeMatcher line
264                        block = []
265                        while i < len(lines) \
266                            and not changeMatcher.search(lines[i]) \
267                            and not fileMatcher.search(lines[i]):
268                            block.append(lines[i])
269                            i += 1
270
271                        # now we have the whole block
272                        self.log('Found change block of %d lines at line %d' % (
273                            len(block), i - len(block) + 1))
274
275                        for line in block:
276                            # starting non-change lines add to Line and
277                            # subtract from Count
278                            if line[0] == ' ':
279                                oldLine += 1
280                                newLine += 1
281                                oldCount -= 1
282                                newCount -= 1
283                            else:
284                                break
285
286                        block.reverse()
287                        for line in block:
288                            # trailing non-change lines subtract from Count
289                            # line can be empty
290                            if line and line[0] == ' ':
291                                oldCount -= 1
292                                newCount -= 1
293                            else:
294                                break
295
296                        changes[path].append(
297                            (oldLine, oldCount, newLine, newCount))
298
299                        # we're at a change line, so go back
300                        i -= 1
301
302                    i += 1
303
304        log.debug('vcs', '%d files changed' % len(changes.keys()))
305        return changes
306
307    def getPropertyChanges(self, path):
308        """
309        Get a list of property changes for the given path and subpaths.
310        These are metadata changes to files, not content changes.
311
312        @rtype: dict of str -> list of str
313        @returns: dict of path -> list of property names
314        """
315        log.info('vcs', 
316            "subclass %r should implement getPropertyChanges" % self.__class__)
317 
318    def update(self, path):
319        """
320        Update the given path to the latest version.
321        """
322        raise NotImplementedError, \
323            'subclass %s should implement update' % self.__class__
324
325    def getCheckoutCommands(self):
326        """
327        Return shell commands necessary to do a fresh checkout of the current
328        checkout into a directory called 'checkout'.
329
330        @returns: newline-terminated string of commands.
331        @rtype:   str
332        """
333        raise NotImplementedError, \
334            'subclass %s should implement getCheckoutCommands' % self.__class__
335
336    def backup(self, archive):
337        """
338        Back up the given VCS checkout into an archive.
339
340        This stores all unignored files, as well as a checkout command and
341        a diff, so the working directory can be fully restored.
342
343        The archive will contain:
344         - a subdirectory called unignored
345         - a file called diff
346         - an executable file called checkout.sh
347
348        @raises VCSBackupException: if for some reason it can't guarantee
349                                    a correct backup
350        """
351        mode = 'w:'
352        suffix = '.tar'
353        if archive.endswith('.gz'):
354            mode = 'w:gz'
355            suffix = '.tar.gz'
356        if archive.endswith('.bz2'):
357            mode = 'w:bw2'
358            suffix = '.tar.bz2'
359
360        # P2.4
361        # Pre-2.5, tarfile has a bug, creating hardlinks for temporary files
362        # if the temporary files get deleted right after adding.
363        # See http://mail.python.org/pipermail/python-bugs-list/2005-October/030793.html
364        # the workaround chosen is to keep the temporary files until after
365        # closing
366        tar = tarfile.TarFile.open(name=archive, mode=mode)
367
368        # store the diff
369        (fd, diffpath) = tempfile.mkstemp(prefix='moap.backup.diff.')
370        diff = self.diff('')
371        if diff:
372            os.write(fd, diff + '\n')
373        os.close(fd)
374        tar.add(diffpath, arcname='diff')
375
376        # store the checkout commands
377        (fd, checkoutpath) = tempfile.mkstemp(prefix='moap.backup.checkout.')
378        os.write(fd, "#!/bin/sh\n" + self.getCheckoutCommands())
379        os.close(fd)
380        os.chmod(checkoutpath, 0755)
381        tar.add(checkoutpath, arcname='checkout.sh')
382
383        # store the unignored files
384        tar.add(self.path, 'unignored', recursive=False)
385
386        unignoreds = self.getUnknown(self.path)
387
388        for rel in unignoreds:
389            abspath = os.path.join(self.path, rel)
390            self.debug('Adding unignored path %s', rel)
391            tar.add(abspath, 'unignored/' + rel)
392        tar.close()
393        os.unlink(diffpath)
394        os.unlink(checkoutpath)
395
396        # now verify the backup
397        restoreDir = tempfile.mkdtemp(prefix="moap.test.restore.")
398        os.rmdir(restoreDir)
399        self.restore(archive, restoreDir)
400
401        diff = self.diffCheckout(restoreDir)
402        if diff:
403            msg = "Unexpected diff output between %s and %s:\n%s" % (
404                self.path, restoreDir, diff)
405            self.debug(msg)
406            raise VCSBackupException(msg)
407        else:
408            self.debug('No important difference between '
409                'extracted archive and original directory')
410
411        os.system('rm -rf %s' % restoreDir)
412       
413    def restore(self, archive, path):
414        """
415        Restore from the given archive to the given path.
416        """
417        self.debug('Restoring from archive %s to path %s' % (
418            archive, path))
419
420        if os.path.exists(path):
421            raise VCSException('path %s already exists')
422
423        oldPath = os.getcwd()
424
425        # P2.3: tarfile.extractall only exists since 2.5
426        tar = tarfile.TarFile.open(name=archive)
427        try:
428            tar.extractall(path)
429        except AttributeError:
430            # do it the shell way
431            self.debug('Restoring by using tar directly')
432            os.system('mkdir -p %s' % path)
433            if archive.endswith('.gz'):
434                os.system('cd %s; tar xzf %s' % (path, archive))
435            elif archive.endswith('.bz2'):
436                os.system('cd %s; tar xjf %s' % (path, archive))
437            else:
438                raise AssertionError("Don't know how to handle %s" % archive)
439
440        # start with the checkout
441        os.chdir(path)
442        status, output = commands.getstatusoutput('./checkout.sh')
443        if status:
444            raise VCSException('checkout failed with status %r: %r' % (
445                status, output))
446        os.unlink('checkout.sh')
447
448        # apply the diff
449        os.chdir('checkout')
450        # FIXME: check errors ?
451        os.system('patch -p0 < ../diff > /dev/null')
452        os.chdir('..')
453        os.unlink('diff')
454
455        # move to parent directory
456        # FIXME: make sure we handle . directories (like .svn)
457        for path in glob.glob('checkout/*') + glob.glob('checkout/.*'):
458            os.rename(path, os.path.basename(path))
459        os.rmdir('checkout')
460
461        # move all unignored files to parent directory
462        # FIXME: make sure we handle . directories (like .svn)
463        for path in glob.glob('unignored/*') + glob.glob('unignored/.*'):
464            # there is no good equivalent to mv;
465            # os.rename doesn't work on trees
466            # shutil.move actually invokes copytree
467            cmd = 'mv %s %s' % (path, os.path.basename(path))
468            self.debug(cmd)
469            os.system(cmd)
470        os.rmdir('unignored')
471
472        os.chdir(oldPath)
473
474    def diffCheckout(self, checkoutDir):
475        """
476        Diff our checkout to the given checkout directory.
477
478        Only complains about diffs in files we're interested in, which are
479        tracked or unignored files.
480        """
481        options = ""
482        if self.meta:
483            metaPattern = [s.replace('.', '\.') for s in self.meta]
484            options = "-x ".join([''] + metaPattern)
485        cmd = 'diff -aur %s %s %s 2>&1' % (options, self.path, checkoutDir)
486        self.debug('diffCheckout: running %s' % cmd)
487        output = commands.getoutput(cmd)
488        lines = output.split('\n')
489
490        # we can't use diff -x for excluding ignored files, because -x
491        # takes a pattern for basename only, so we need to scrub output
492        d = {}
493        for path in self.getIgnored(self.path):
494            d[path] = True
495
496        matcher = re.compile('Only in (.*): (.*)$')
497
498        def isIgnored(line):
499            # filter out lines for ignored files
500            m = matcher.search(line)
501            if m:
502                path = os.path.join(m.expand("\\1"), m.expand("\\2"))
503                if path in d.keys():
504                    self.debug('Removing ignored path %s from diff' % path)
505                    return True
506
507            return False
508
509        lines = [l for l in lines if isIgnored(l)]
510
511        return "\n".join(lines)
512
513class VCSException(Exception):
514    """
515    Generic exception for a failed VCS operation.
516    """
517    pass
518
519class VCSBackupException(VCSException):
520    'The VCS cannot back up the working directory.'
521
522
523
Note: See TracBrowser for help on using the repository browser.