Package buildbot :: Package changes :: Module svnpoller
[hide private]
[frames] | no frames]

Source Code for Module buildbot.changes.svnpoller

  1  # -*- test-case-name: buildbot.test.test_svnpoller -*- 
  2   
  3  # Based on the work of Dave Peticolas for the P4poll 
  4  # Changed to svn (using xml.dom.minidom) by Niklaus Giger 
  5  # Hacked beyond recognition by Brian Warner 
  6   
  7  from twisted.python import log 
  8  from twisted.internet import defer, reactor, utils 
  9  from twisted.internet.task import LoopingCall 
 10   
 11  from buildbot import util 
 12  from buildbot.changes import base 
 13  from buildbot.changes.changes import Change 
 14   
 15  import xml.dom.minidom 
 16  import urllib 
 17   
18 -def _assert(condition, msg):
19 if condition: 20 return True 21 raise AssertionError(msg)
22
23 -def dbgMsg(myString):
24 log.msg(myString) 25 return 1
26 27 # these split_file_* functions are available for use as values to the 28 # split_file= argument.
29 -def split_file_alwaystrunk(path):
30 return (None, path)
31
32 -def split_file_branches(path):
33 # turn trunk/subdir/file.c into (None, "subdir/file.c") 34 # and branches/1.5.x/subdir/file.c into ("branches/1.5.x", "subdir/file.c") 35 pieces = path.split('/') 36 if pieces[0] == 'trunk': 37 return (None, '/'.join(pieces[1:])) 38 elif pieces[0] == 'branches': 39 return ('/'.join(pieces[0:2]), '/'.join(pieces[2:])) 40 else: 41 return None
42 43
44 -class SVNPoller(base.ChangeSource, util.ComparableMixin):
45 """This source will poll a Subversion repository for changes and submit 46 them to the change master.""" 47 48 compare_attrs = ["svnurl", "split_file_function", 49 "svnuser", "svnpasswd", 50 "pollinterval", "histmax", 51 "svnbin"] 52 53 parent = None # filled in when we're added 54 last_change = None 55 loop = None 56 working = False 57
58 - def __init__(self, svnurl, split_file=None, 59 svnuser=None, svnpasswd=None, 60 pollinterval=10*60, histmax=100, 61 svnbin='svn', revlinktmpl=''):
62 """ 63 @type svnurl: string 64 @param svnurl: the SVN URL that describes the repository and 65 subdirectory to watch. If this ChangeSource should 66 only pay attention to a single branch, this should 67 point at the repository for that branch, like 68 svn://svn.twistedmatrix.com/svn/Twisted/trunk . If it 69 should follow multiple branches, point it at the 70 repository directory that contains all the branches 71 like svn://svn.twistedmatrix.com/svn/Twisted and also 72 provide a branch-determining function. 73 74 Each file in the repository has a SVN URL in the form 75 (SVNURL)/(BRANCH)/(FILEPATH), where (BRANCH) could be 76 empty or not, depending upon your branch-determining 77 function. Only files that start with (SVNURL)/(BRANCH) 78 will be monitored. The Change objects that are sent to 79 the Schedulers will see (FILEPATH) for each modified 80 file. 81 82 @type split_file: callable or None 83 @param split_file: a function that is called with a string of the 84 form (BRANCH)/(FILEPATH) and should return a tuple 85 (BRANCH, FILEPATH). This function should match 86 your repository's branch-naming policy. Each 87 changed file has a fully-qualified URL that can be 88 split into a prefix (which equals the value of the 89 'svnurl' argument) and a suffix; it is this suffix 90 which is passed to the split_file function. 91 92 If the function returns None, the file is ignored. 93 Use this to indicate that the file is not a part 94 of this project. 95 96 For example, if your repository puts the trunk in 97 trunk/... and branches are in places like 98 branches/1.5/..., your split_file function could 99 look like the following (this function is 100 available as svnpoller.split_file_branches):: 101 102 pieces = path.split('/') 103 if pieces[0] == 'trunk': 104 return (None, '/'.join(pieces[1:])) 105 elif pieces[0] == 'branches': 106 return ('/'.join(pieces[0:2]), 107 '/'.join(pieces[2:])) 108 else: 109 return None 110 111 If instead your repository layout puts the trunk 112 for ProjectA in trunk/ProjectA/... and the 1.5 113 branch in branches/1.5/ProjectA/..., your 114 split_file function could look like:: 115 116 pieces = path.split('/') 117 if pieces[0] == 'trunk': 118 branch = None 119 pieces.pop(0) # remove 'trunk' 120 elif pieces[0] == 'branches': 121 pieces.pop(0) # remove 'branches' 122 # grab branch name 123 branch = 'branches/' + pieces.pop(0) 124 else: 125 return None # something weird 126 projectname = pieces.pop(0) 127 if projectname != 'ProjectA': 128 return None # wrong project 129 return (branch, '/'.join(pieces)) 130 131 The default of split_file= is None, which 132 indicates that no splitting should be done. This 133 is equivalent to the following function:: 134 135 return (None, path) 136 137 If you wish, you can override the split_file 138 method with the same sort of function instead of 139 passing in a split_file= argument. 140 141 142 @type svnuser: string 143 @param svnuser: If set, the --username option will be added to 144 the 'svn log' command. You may need this to get 145 access to a private repository. 146 @type svnpasswd: string 147 @param svnpasswd: If set, the --password option will be added. 148 149 @type pollinterval: int 150 @param pollinterval: interval in seconds between polls. The default 151 is 600 seconds (10 minutes). Smaller values 152 decrease the latency between the time a change 153 is recorded and the time the buildbot notices 154 it, but it also increases the system load. 155 156 @type histmax: int 157 @param histmax: maximum number of changes to look back through. 158 The default is 100. Smaller values decrease 159 system load, but if more than histmax changes 160 are recorded between polls, the extra ones will 161 be silently lost. 162 163 @type svnbin: string 164 @param svnbin: path to svn binary, defaults to just 'svn'. Use 165 this if your subversion command lives in an 166 unusual location. 167 168 @type revlinktmpl: string 169 @param revlinktmpl: A format string to use for hyperlinks to revision 170 information. For example, setting this to 171 "http://reposerver/websvn/revision.php?rev=%s" 172 would create suitable links on the build pages 173 to information in websvn on each revision. 174 """ 175 176 if svnurl.endswith("/"): 177 svnurl = svnurl[:-1] # strip the trailing slash 178 self.svnurl = svnurl 179 self.split_file_function = split_file or split_file_alwaystrunk 180 self.svnuser = svnuser 181 self.svnpasswd = svnpasswd 182 183 self.revlinktmpl = revlinktmpl 184 185 self.svnbin = svnbin 186 self.pollinterval = pollinterval 187 self.histmax = histmax 188 self._prefix = None 189 self.overrun_counter = 0 190 self.loop = LoopingCall(self.checksvn)
191
192 - def split_file(self, path):
193 # use getattr() to avoid turning this function into a bound method, 194 # which would require it to have an extra 'self' argument 195 f = getattr(self, "split_file_function") 196 return f(path)
197
198 - def startService(self):
199 log.msg("SVNPoller(%s) starting" % self.svnurl) 200 base.ChangeSource.startService(self) 201 # Don't start the loop just yet because the reactor isn't running. 202 # Give it a chance to go and install our SIGCHLD handler before 203 # spawning processes. 204 reactor.callLater(0, self.loop.start, self.pollinterval)
205
206 - def stopService(self):
207 log.msg("SVNPoller(%s) shutting down" % self.svnurl) 208 self.loop.stop() 209 return base.ChangeSource.stopService(self)
210
211 - def describe(self):
212 return "SVNPoller watching %s" % self.svnurl
213
214 - def checksvn(self):
215 # Our return value is only used for unit testing. 216 217 # we need to figure out the repository root, so we can figure out 218 # repository-relative pathnames later. Each SVNURL is in the form 219 # (ROOT)/(PROJECT)/(BRANCH)/(FILEPATH), where (ROOT) is something 220 # like svn://svn.twistedmatrix.com/svn/Twisted (i.e. there is a 221 # physical repository at /svn/Twisted on that host), (PROJECT) is 222 # something like Projects/Twisted (i.e. within the repository's 223 # internal namespace, everything under Projects/Twisted/ has 224 # something to do with Twisted, but these directory names do not 225 # actually appear on the repository host), (BRANCH) is something like 226 # "trunk" or "branches/2.0.x", and (FILEPATH) is a tree-relative 227 # filename like "twisted/internet/defer.py". 228 229 # our self.svnurl attribute contains (ROOT)/(PROJECT) combined 230 # together in a way that we can't separate without svn's help. If the 231 # user is not using the split_file= argument, then self.svnurl might 232 # be (ROOT)/(PROJECT)/(BRANCH) . In any case, the filenames we will 233 # get back from 'svn log' will be of the form 234 # (PROJECT)/(BRANCH)/(FILEPATH), but we want to be able to remove 235 # that (PROJECT) prefix from them. To do this without requiring the 236 # user to tell us how svnurl is split into ROOT and PROJECT, we do an 237 # 'svn info --xml' command at startup. This command will include a 238 # <root> element that tells us ROOT. We then strip this prefix from 239 # self.svnurl to determine PROJECT, and then later we strip the 240 # PROJECT prefix from the filenames reported by 'svn log --xml' to 241 # get a (BRANCH)/(FILEPATH) that can be passed to split_file() to 242 # turn into separate BRANCH and FILEPATH values. 243 244 # whew. 245 246 if self.working: 247 log.msg("SVNPoller(%s) overrun: timer fired but the previous " 248 "poll had not yet finished." % self.svnurl) 249 self.overrun_counter += 1 250 return defer.succeed(None) 251 self.working = True 252 253 log.msg("SVNPoller polling") 254 if not self._prefix: 255 # this sets self._prefix when it finishes. It fires with 256 # self._prefix as well, because that makes the unit tests easier 257 # to write. 258 d = self.get_root() 259 d.addCallback(self.determine_prefix) 260 else: 261 d = defer.succeed(self._prefix) 262 263 d.addCallback(self.get_logs) 264 d.addCallback(self.parse_logs) 265 d.addCallback(self.get_new_logentries) 266 d.addCallback(self.create_changes) 267 d.addCallback(self.submit_changes) 268 d.addCallbacks(self.finished_ok, self.finished_failure) 269 return d
270
271 - def getProcessOutput(self, args):
272 # this exists so we can override it during the unit tests 273 d = utils.getProcessOutput(self.svnbin, args, {}) 274 return d
275
276 - def get_root(self):
277 args = ["info", "--xml", "--non-interactive", self.svnurl] 278 if self.svnuser: 279 args.extend(["--username=%s" % self.svnuser]) 280 if self.svnpasswd: 281 args.extend(["--password=%s" % self.svnpasswd]) 282 d = self.getProcessOutput(args) 283 return d
284
285 - def determine_prefix(self, output):
286 try: 287 doc = xml.dom.minidom.parseString(output) 288 except xml.parsers.expat.ExpatError: 289 dbgMsg("_process_changes: ExpatError in %s" % output) 290 log.msg("SVNPoller._determine_prefix_2: ExpatError in '%s'" 291 % output) 292 raise 293 rootnodes = doc.getElementsByTagName("root") 294 if not rootnodes: 295 # this happens if the URL we gave was already the root. In this 296 # case, our prefix is empty. 297 self._prefix = "" 298 return self._prefix 299 rootnode = rootnodes[0] 300 root = "".join([c.data for c in rootnode.childNodes]) 301 # root will be a unicode string 302 _assert(self.svnurl.startswith(root), 303 "svnurl='%s' doesn't start with <root>='%s'" % 304 (self.svnurl, root)) 305 self._prefix = self.svnurl[len(root):] 306 if self._prefix.startswith("/"): 307 self._prefix = self._prefix[1:] 308 log.msg("SVNPoller: svnurl=%s, root=%s, so prefix=%s" % 309 (self.svnurl, root, self._prefix)) 310 return self._prefix
311
312 - def get_logs(self, ignored_prefix=None):
313 args = [] 314 args.extend(["log", "--xml", "--verbose", "--non-interactive"]) 315 if self.svnuser: 316 args.extend(["--username=%s" % self.svnuser]) 317 if self.svnpasswd: 318 args.extend(["--password=%s" % self.svnpasswd]) 319 args.extend(["--limit=%d" % (self.histmax), self.svnurl]) 320 d = self.getProcessOutput(args) 321 return d
322
323 - def parse_logs(self, output):
324 # parse the XML output, return a list of <logentry> nodes 325 try: 326 doc = xml.dom.minidom.parseString(output) 327 except xml.parsers.expat.ExpatError: 328 dbgMsg("_process_changes: ExpatError in %s" % output) 329 log.msg("SVNPoller._parse_changes: ExpatError in '%s'" % output) 330 raise 331 logentries = doc.getElementsByTagName("logentry") 332 return logentries
333 334
335 - def _filter_new_logentries(self, logentries, last_change):
336 # given a list of logentries, return a tuple of (new_last_change, 337 # new_logentries), where new_logentries contains only the ones after 338 # last_change 339 if not logentries: 340 # no entries, so last_change must stay at None 341 return (None, []) 342 343 mostRecent = int(logentries[0].getAttribute("revision")) 344 345 if last_change is None: 346 # if this is the first time we've been run, ignore any changes 347 # that occurred before now. This prevents a build at every 348 # startup. 349 log.msg('svnPoller: starting at change %s' % mostRecent) 350 return (mostRecent, []) 351 352 if last_change == mostRecent: 353 # an unmodified repository will hit this case 354 log.msg('svnPoller: _process_changes last %s mostRecent %s' % ( 355 last_change, mostRecent)) 356 return (mostRecent, []) 357 358 new_logentries = [] 359 for el in logentries: 360 if last_change == int(el.getAttribute("revision")): 361 break 362 new_logentries.append(el) 363 new_logentries.reverse() # return oldest first 364 return (mostRecent, new_logentries)
365
366 - def get_new_logentries(self, logentries):
367 last_change = self.last_change 368 (new_last_change, 369 new_logentries) = self._filter_new_logentries(logentries, 370 self.last_change) 371 self.last_change = new_last_change 372 log.msg('svnPoller: _process_changes %s .. %s' % 373 (last_change, new_last_change)) 374 return new_logentries
375 376
377 - def _get_text(self, element, tag_name):
378 try: 379 child_nodes = element.getElementsByTagName(tag_name)[0].childNodes 380 text = "".join([t.data for t in child_nodes]) 381 except: 382 text = "<unknown>" 383 return text
384
385 - def _transform_path(self, path):
386 _assert(path.startswith(self._prefix), 387 "filepath '%s' should start with prefix '%s'" % 388 (path, self._prefix)) 389 relative_path = path[len(self._prefix):] 390 if relative_path.startswith("/"): 391 relative_path = relative_path[1:] 392 where = self.split_file(relative_path) 393 # 'where' is either None or (branch, final_path) 394 return where
395
396 - def create_changes(self, new_logentries):
397 changes = [] 398 399 for el in new_logentries: 400 branch_files = [] # get oldest change first 401 revision = str(el.getAttribute("revision")) 402 403 revlink='' 404 405 if self.revlinktmpl: 406 if revision: 407 revlink = self.revlinktmpl % urllib.quote_plus(revision) 408 409 dbgMsg("Adding change revision %s" % (revision,)) 410 # TODO: the rest of buildbot may not be ready for unicode 'who' 411 # values 412 author = self._get_text(el, "author") 413 comments = self._get_text(el, "msg") 414 # there is a "date" field, but it provides localtime in the 415 # repository's timezone, whereas we care about buildmaster's 416 # localtime (since this will get used to position the boxes on 417 # the Waterfall display, etc). So ignore the date field and use 418 # our local clock instead. 419 #when = self._get_text(el, "date") 420 #when = time.mktime(time.strptime("%.19s" % when, 421 # "%Y-%m-%dT%H:%M:%S")) 422 branches = {} 423 pathlist = el.getElementsByTagName("paths")[0] 424 for p in pathlist.getElementsByTagName("path"): 425 action = p.getAttribute("action") 426 path = "".join([t.data for t in p.childNodes]) 427 # the rest of buildbot is certaily not yet ready to handle 428 # unicode filenames, because they get put in RemoteCommands 429 # which get sent via PB to the buildslave, and PB doesn't 430 # handle unicode. 431 path = path.encode("ascii") 432 if path.startswith("/"): 433 path = path[1:] 434 where = self._transform_path(path) 435 436 # if 'where' is None, the file was outside any project that 437 # we care about and we should ignore it 438 if where: 439 branch, filename = where 440 if not branch in branches: 441 branches[branch] = { 'files': []} 442 branches[branch]['files'].append(filename) 443 444 if not branches[branch].has_key('action'): 445 branches[branch]['action'] = action 446 447 for branch in branches.keys(): 448 action = branches[branch]['action'] 449 files = branches[branch]['files'] 450 number_of_files_changed = len(files) 451 452 if action == u'D' and number_of_files_changed == 1 and files[0] == '': 453 log.msg("Ignoring deletion of branch '%s'" % branch) 454 else: 455 c = Change(who=author, 456 files=files, 457 comments=comments, 458 revision=revision, 459 branch=branch, 460 revlink=revlink) 461 changes.append(c) 462 463 return changes
464
465 - def submit_changes(self, changes):
466 for c in changes: 467 self.parent.addChange(c)
468
469 - def finished_ok(self, res):
470 log.msg("SVNPoller finished polling") 471 dbgMsg('_finished : %s' % res) 472 assert self.working 473 self.working = False 474 return res
475
476 - def finished_failure(self, f):
477 log.msg("SVNPoller failed") 478 dbgMsg('_finished : %s' % f) 479 assert self.working 480 self.working = False 481 return None # eat the failure
482