| 1 | # -*- Mode: Python; test-case-name: morituri.test.test_program_cdparanoia -*- |
|---|
| 2 | # vi:si:et:sw=4:sts=4:ts=4 |
|---|
| 3 | |
|---|
| 4 | # Morituri - for those about to RIP |
|---|
| 5 | |
|---|
| 6 | # Copyright (C) 2009 Thomas Vander Stichele |
|---|
| 7 | |
|---|
| 8 | # This file is part of morituri. |
|---|
| 9 | # |
|---|
| 10 | # morituri is free software: you can redistribute it and/or modify |
|---|
| 11 | # it under the terms of the GNU General Public License as published by |
|---|
| 12 | # the Free Software Foundation, either version 3 of the License, or |
|---|
| 13 | # (at your option) any later version. |
|---|
| 14 | # |
|---|
| 15 | # morituri is distributed in the hope that it will be useful, |
|---|
| 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 18 | # GNU General Public License for more details. |
|---|
| 19 | # |
|---|
| 20 | # You should have received a copy of the GNU General Public License |
|---|
| 21 | # along with morituri. If not, see <http://www.gnu.org/licenses/>. |
|---|
| 22 | |
|---|
| 23 | import os |
|---|
| 24 | import re |
|---|
| 25 | import stat |
|---|
| 26 | import shutil |
|---|
| 27 | import subprocess |
|---|
| 28 | import tempfile |
|---|
| 29 | |
|---|
| 30 | from morituri.common import task, log, common |
|---|
| 31 | from morituri.extern import asyncsub |
|---|
| 32 | |
|---|
| 33 | class FileSizeError(Exception): |
|---|
| 34 | |
|---|
| 35 | message = None |
|---|
| 36 | |
|---|
| 37 | """ |
|---|
| 38 | The given path does not have the expected size. |
|---|
| 39 | """ |
|---|
| 40 | def __init__(self, path, message): |
|---|
| 41 | self.args = (path, message) |
|---|
| 42 | self.path = path |
|---|
| 43 | self.message = message |
|---|
| 44 | |
|---|
| 45 | class ReturnCodeError(Exception): |
|---|
| 46 | """ |
|---|
| 47 | The program had a non-zero return code. |
|---|
| 48 | """ |
|---|
| 49 | def __init__(self, returncode): |
|---|
| 50 | self.args = (returncode, ) |
|---|
| 51 | self.returncode = returncode |
|---|
| 52 | |
|---|
| 53 | _PROGRESS_RE = re.compile(r""" |
|---|
| 54 | ^\#\#: (?P<code>.+)\s # function code |
|---|
| 55 | \[(?P<function>.*)\]\s@\s # function name |
|---|
| 56 | (?P<offset>\d+) # offset |
|---|
| 57 | """, re.VERBOSE) |
|---|
| 58 | |
|---|
| 59 | _ERROR_RE = re.compile("^scsi_read error:") |
|---|
| 60 | |
|---|
| 61 | # from reading cdparanoia source code, it looks like offset is reported in |
|---|
| 62 | # number of single-channel samples, ie. 2 bytes per unit, and absolute |
|---|
| 63 | |
|---|
| 64 | class ProgressParser(object): |
|---|
| 65 | read = 0 # last [read] frame |
|---|
| 66 | wrote = 0 # last [wrote] frame |
|---|
| 67 | errors = 0 # count of number of scsi errors |
|---|
| 68 | _nframes = None # number of frames read on each [read] |
|---|
| 69 | _firstFrames = None # number of frames read on first [read] |
|---|
| 70 | reads = 0 # total number of reads |
|---|
| 71 | |
|---|
| 72 | def __init__(self, start, stop): |
|---|
| 73 | """ |
|---|
| 74 | @param start: first frame to rip |
|---|
| 75 | @type start: int |
|---|
| 76 | @param stop: last frame to rip (inclusive) |
|---|
| 77 | @type stop: int |
|---|
| 78 | """ |
|---|
| 79 | self.start = start |
|---|
| 80 | self.stop = stop |
|---|
| 81 | |
|---|
| 82 | # FIXME: privatize |
|---|
| 83 | self.read = start |
|---|
| 84 | |
|---|
| 85 | self._reads = {} # read count for each sector |
|---|
| 86 | |
|---|
| 87 | |
|---|
| 88 | def parse(self, line): |
|---|
| 89 | """ |
|---|
| 90 | Parse a line. |
|---|
| 91 | """ |
|---|
| 92 | m = _PROGRESS_RE.search(line) |
|---|
| 93 | if m: |
|---|
| 94 | # code = int(m.group('code')) |
|---|
| 95 | function = m.group('function') |
|---|
| 96 | wordOffset = int(m.group('offset')) |
|---|
| 97 | if function == 'read': |
|---|
| 98 | self._parse_read(wordOffset) |
|---|
| 99 | elif function == 'wrote': |
|---|
| 100 | self._parse_wrote(wordOffset) |
|---|
| 101 | |
|---|
| 102 | m = _ERROR_RE.search(line) |
|---|
| 103 | if m: |
|---|
| 104 | self.errors += 1 |
|---|
| 105 | |
|---|
| 106 | def _parse_read(self, wordOffset): |
|---|
| 107 | if wordOffset % common.WORDS_PER_FRAME != 0: |
|---|
| 108 | print 'THOMAS: not a multiple of %d: %d' % ( |
|---|
| 109 | common.WORDS_PER_FRAME, wordOffset) |
|---|
| 110 | return |
|---|
| 111 | |
|---|
| 112 | frameOffset = wordOffset / common.WORDS_PER_FRAME |
|---|
| 113 | |
|---|
| 114 | # set nframes if not yet set |
|---|
| 115 | if self._nframes is None and self.read != 0: |
|---|
| 116 | self._nframes = frameOffset - self.read |
|---|
| 117 | |
|---|
| 118 | # set firstFrames if not yet set |
|---|
| 119 | if self._firstFrames is None: |
|---|
| 120 | self._firstFrames = frameOffset - self.start |
|---|
| 121 | |
|---|
| 122 | markStart = None |
|---|
| 123 | markEnd = None |
|---|
| 124 | |
|---|
| 125 | # verify it either read nframes more or went back for verify |
|---|
| 126 | if frameOffset > self.read: |
|---|
| 127 | delta = frameOffset - self.read |
|---|
| 128 | if self._nframes and delta != self._nframes: |
|---|
| 129 | # print 'THOMAS: Read %d frames more, not %d' % ( |
|---|
| 130 | # delta, self._nframes) |
|---|
| 131 | # my drive either reads 7 or 13 frames |
|---|
| 132 | pass |
|---|
| 133 | |
|---|
| 134 | # update our read sectors hash |
|---|
| 135 | markStart = self.read |
|---|
| 136 | markEnd = frameOffset |
|---|
| 137 | else: |
|---|
| 138 | # went back to verify |
|---|
| 139 | # we could use firstFrames as an estimate on how many frames this |
|---|
| 140 | # read, but this lowers our track quality needlessly where |
|---|
| 141 | # EAC still reports 100% track quality |
|---|
| 142 | markStart = frameOffset # - self._firstFrames |
|---|
| 143 | markEnd = frameOffset |
|---|
| 144 | |
|---|
| 145 | # FIXME: doing this is way too slow even for a testcase, so disable |
|---|
| 146 | if False: |
|---|
| 147 | for frame in range(markStart, markEnd): |
|---|
| 148 | if not frame in self._reads.keys(): |
|---|
| 149 | self._reads[frame] = 0 |
|---|
| 150 | self._reads[frame] += 1 |
|---|
| 151 | |
|---|
| 152 | # cdparanoia reads quite a bit beyond the current track before it |
|---|
| 153 | # goes back to verify; don't count those |
|---|
| 154 | if markEnd > self.stop: |
|---|
| 155 | markEnd = self.stop |
|---|
| 156 | if markStart > self.stop: |
|---|
| 157 | markStart = self.stop |
|---|
| 158 | |
|---|
| 159 | self.reads += markEnd - markStart |
|---|
| 160 | |
|---|
| 161 | # update our read pointer |
|---|
| 162 | self.read = frameOffset |
|---|
| 163 | |
|---|
| 164 | def _parse_wrote(self, wordOffset): |
|---|
| 165 | # cdparanoia outputs most [wrote] calls with one word less than a frame |
|---|
| 166 | frameOffset = (wordOffset + 1) / common.WORDS_PER_FRAME |
|---|
| 167 | self.wrote = frameOffset |
|---|
| 168 | |
|---|
| 169 | def getTrackQuality(self): |
|---|
| 170 | """ |
|---|
| 171 | Each frame gets read twice. |
|---|
| 172 | More than two reads for a frame reduce track quality. |
|---|
| 173 | """ |
|---|
| 174 | frames = self.stop - self.start + 1 |
|---|
| 175 | reads = self.reads |
|---|
| 176 | |
|---|
| 177 | # don't go over a 100%; we know cdparanoia reads each frame at least |
|---|
| 178 | # twice |
|---|
| 179 | return min(frames * 2.0 / reads, 1.0) |
|---|
| 180 | |
|---|
| 181 | |
|---|
| 182 | # FIXME: handle errors |
|---|
| 183 | class ReadTrackTask(task.Task): |
|---|
| 184 | """ |
|---|
| 185 | I am a task that reads a track using cdparanoia. |
|---|
| 186 | |
|---|
| 187 | @ivar reads: how many reads were done to rip the track |
|---|
| 188 | """ |
|---|
| 189 | |
|---|
| 190 | description = "Reading Track" |
|---|
| 191 | quality = None # set at end of reading |
|---|
| 192 | |
|---|
| 193 | _MAXERROR = 100 # number of errors detected by parser |
|---|
| 194 | |
|---|
| 195 | def __init__(self, path, table, start, stop, offset=0, device=None): |
|---|
| 196 | """ |
|---|
| 197 | Read the given track. |
|---|
| 198 | |
|---|
| 199 | @param path: where to store the ripped track |
|---|
| 200 | @type path: unicode |
|---|
| 201 | @param table: table of contents of CD |
|---|
| 202 | @type table: L{table.Table} |
|---|
| 203 | @param start: first frame to rip |
|---|
| 204 | @type start: int |
|---|
| 205 | @param stop: last frame to rip (inclusive) |
|---|
| 206 | @type stop: int |
|---|
| 207 | @param offset: read offset, in samples |
|---|
| 208 | @type offset: int |
|---|
| 209 | @param device: the device to rip from |
|---|
| 210 | @type device: str |
|---|
| 211 | """ |
|---|
| 212 | assert type(path) is unicode, "%r is not unicode" % path |
|---|
| 213 | |
|---|
| 214 | self.path = path |
|---|
| 215 | self._table = table |
|---|
| 216 | self._start = start |
|---|
| 217 | self._stop = stop |
|---|
| 218 | self._offset = offset |
|---|
| 219 | self._parser = ProgressParser(start, stop) |
|---|
| 220 | self._device = device |
|---|
| 221 | |
|---|
| 222 | self._buffer = "" # accumulate characters |
|---|
| 223 | self._errors = [] |
|---|
| 224 | |
|---|
| 225 | def start(self, runner): |
|---|
| 226 | task.Task.start(self, runner) |
|---|
| 227 | |
|---|
| 228 | # find on which track the range starts and stops |
|---|
| 229 | startTrack = 0 |
|---|
| 230 | startOffset = 0 |
|---|
| 231 | stopTrack = 0 |
|---|
| 232 | stopOffset = self._stop |
|---|
| 233 | |
|---|
| 234 | for i, t in enumerate(self._table.tracks): |
|---|
| 235 | if self._table.getTrackStart(i + 1) <= self._start: |
|---|
| 236 | startTrack = i + 1 |
|---|
| 237 | startOffset = self._start - self._table.getTrackStart(i + 1) |
|---|
| 238 | if self._table.getTrackEnd(i + 1) <= self._stop: |
|---|
| 239 | stopTrack = i + 1 |
|---|
| 240 | stopOffset = self._stop - self._table.getTrackStart(i + 1) |
|---|
| 241 | |
|---|
| 242 | self.debug('Ripping from %d to %d (inclusive)', |
|---|
| 243 | self._start, self._stop) |
|---|
| 244 | self.debug('Starting at track %d, offset %d', |
|---|
| 245 | startTrack, startOffset) |
|---|
| 246 | self.debug('Stopping at track %d, offset %d', |
|---|
| 247 | stopTrack, stopOffset) |
|---|
| 248 | |
|---|
| 249 | bufsize = 1024 |
|---|
| 250 | argv = ["cdparanoia", "--stderr-progress", |
|---|
| 251 | "--sample-offset=%d" % self._offset, ] |
|---|
| 252 | if self._device: |
|---|
| 253 | argv.extend(["--force-cdrom-device", self._device, ]) |
|---|
| 254 | argv.extend(["%d[%s]-%d[%s]" % ( |
|---|
| 255 | startTrack, common.framesToHMSF(startOffset), |
|---|
| 256 | stopTrack, common.framesToHMSF(stopOffset)), |
|---|
| 257 | self.path]) |
|---|
| 258 | self.debug('Running %s' % (" ".join(argv), )) |
|---|
| 259 | try: |
|---|
| 260 | self._popen = asyncsub.Popen(argv, |
|---|
| 261 | bufsize=bufsize, |
|---|
| 262 | stdin=subprocess.PIPE, stdout=subprocess.PIPE, |
|---|
| 263 | stderr=subprocess.PIPE, close_fds=True) |
|---|
| 264 | except OSError, e: |
|---|
| 265 | import errno |
|---|
| 266 | if e.errno == errno.ENOENT: |
|---|
| 267 | raise common.MissingDependencyException('cdparanoia') |
|---|
| 268 | |
|---|
| 269 | raise |
|---|
| 270 | |
|---|
| 271 | self.runner.schedule(1.0, self._read, runner) |
|---|
| 272 | |
|---|
| 273 | def _read(self, runner): |
|---|
| 274 | ret = self._popen.recv_err() |
|---|
| 275 | if not ret: |
|---|
| 276 | if self._popen.poll() is not None: |
|---|
| 277 | self._done() |
|---|
| 278 | return |
|---|
| 279 | self.runner.schedule(0.01, self._read, runner) |
|---|
| 280 | return |
|---|
| 281 | |
|---|
| 282 | self._buffer += ret |
|---|
| 283 | |
|---|
| 284 | # parse buffer into lines if possible, and parse them |
|---|
| 285 | if "\n" in self._buffer: |
|---|
| 286 | lines = self._buffer.split('\n') |
|---|
| 287 | if lines[-1] != "\n": |
|---|
| 288 | # last line didn't end yet |
|---|
| 289 | self._buffer = lines[-1] |
|---|
| 290 | del lines[-1] |
|---|
| 291 | else: |
|---|
| 292 | self._buffer = "" |
|---|
| 293 | |
|---|
| 294 | for line in lines: |
|---|
| 295 | self._parser.parse(line) |
|---|
| 296 | |
|---|
| 297 | # fail if too many errors |
|---|
| 298 | if self._parser.errors > self._MAXERROR: |
|---|
| 299 | self.debug('%d errors, terminating', self._parser.errors) |
|---|
| 300 | self._popen.terminate() |
|---|
| 301 | |
|---|
| 302 | num = float(self._parser.wrote) - self._start |
|---|
| 303 | den = float(self._stop) - self._start |
|---|
| 304 | progress = num / den |
|---|
| 305 | if progress < 1.0: |
|---|
| 306 | self.setProgress(progress) |
|---|
| 307 | |
|---|
| 308 | # 0 does not give us output before we complete, 1.0 gives us output |
|---|
| 309 | # too late |
|---|
| 310 | self.runner.schedule(0.01, self._read, runner) |
|---|
| 311 | |
|---|
| 312 | def _poll(self, runner): |
|---|
| 313 | if self._popen.poll() is None: |
|---|
| 314 | self.runner.schedule(1.0, self._poll, runner) |
|---|
| 315 | return |
|---|
| 316 | |
|---|
| 317 | self._done() |
|---|
| 318 | |
|---|
| 319 | def _done(self): |
|---|
| 320 | self.setProgress(1.0) |
|---|
| 321 | |
|---|
| 322 | # check if the length matches |
|---|
| 323 | size = os.stat(self.path)[stat.ST_SIZE] |
|---|
| 324 | # wav header is 44 bytes |
|---|
| 325 | offsetLength = self._stop - self._start + 1 |
|---|
| 326 | expected = offsetLength * common.BYTES_PER_FRAME + 44 |
|---|
| 327 | if size != expected: |
|---|
| 328 | # FIXME: handle errors better |
|---|
| 329 | self.warning('file size %d did not match expected size %d', |
|---|
| 330 | size, expected) |
|---|
| 331 | if (size - expected) % common.BYTES_PER_FRAME == 0: |
|---|
| 332 | self.warning('%d frames difference' % ( |
|---|
| 333 | (size - expected) / common.BYTES_PER_FRAME)) |
|---|
| 334 | else: |
|---|
| 335 | self.warning('non-integral amount of frames difference') |
|---|
| 336 | |
|---|
| 337 | self.setAndRaiseException(FileSizeError(self.path, |
|---|
| 338 | "File size %d did not match expected size %d" % ( |
|---|
| 339 | size, expected))) |
|---|
| 340 | |
|---|
| 341 | if not self.exception and self._popen.returncode != 0: |
|---|
| 342 | if self._errors: |
|---|
| 343 | print "\n".join(self._errors) |
|---|
| 344 | else: |
|---|
| 345 | self.warning('exit code %r', self._popen.returncode) |
|---|
| 346 | self.exception = ReturnCodeError(self._popen.returncode) |
|---|
| 347 | |
|---|
| 348 | self.quality = self._parser.getTrackQuality() |
|---|
| 349 | |
|---|
| 350 | self.stop() |
|---|
| 351 | return |
|---|
| 352 | |
|---|
| 353 | class ReadVerifyTrackTask(task.MultiSeparateTask): |
|---|
| 354 | """ |
|---|
| 355 | I am a task that reads and verifies a track using cdparanoia. |
|---|
| 356 | |
|---|
| 357 | @ivar path: the path where the file is to be stored. |
|---|
| 358 | @ivar checksum: the checksum of the track; set if they match. |
|---|
| 359 | @ivar testchecksum: the test checksum of the track. |
|---|
| 360 | @ivar copychecksum: the copy checksum of the track. |
|---|
| 361 | @ivar peak: the peak level of the track |
|---|
| 362 | """ |
|---|
| 363 | |
|---|
| 364 | checksum = None |
|---|
| 365 | testchecksum = None |
|---|
| 366 | copychecksum = None |
|---|
| 367 | peak = None |
|---|
| 368 | quality = None |
|---|
| 369 | |
|---|
| 370 | _tmpwavpath = None |
|---|
| 371 | _tmppath = None |
|---|
| 372 | |
|---|
| 373 | def __init__(self, path, table, start, stop, offset=0, device=None, |
|---|
| 374 | profile=None, taglist=None): |
|---|
| 375 | """ |
|---|
| 376 | @param path: where to store the ripped track |
|---|
| 377 | @type path: str |
|---|
| 378 | @param table: table of contents of CD |
|---|
| 379 | @type table: L{table.Table} |
|---|
| 380 | @param start: first frame to rip |
|---|
| 381 | @type start: int |
|---|
| 382 | @param stop: last frame to rip (inclusive) |
|---|
| 383 | @type stop: int |
|---|
| 384 | @param offset: read offset, in samples |
|---|
| 385 | @type offset: int |
|---|
| 386 | @param device: the device to rip from |
|---|
| 387 | @type device: str |
|---|
| 388 | @param profile: the encoding profile |
|---|
| 389 | @type profile: L{encode.Profile} |
|---|
| 390 | @param taglist: a list of tags |
|---|
| 391 | @param taglist: L{gst.TagList} |
|---|
| 392 | """ |
|---|
| 393 | task.MultiSeparateTask.__init__(self) |
|---|
| 394 | |
|---|
| 395 | self.path = path |
|---|
| 396 | |
|---|
| 397 | if taglist: |
|---|
| 398 | self.debug('read and verify with taglist %r', taglist) |
|---|
| 399 | # FIXME: choose a dir on the same disk/dir as the final path |
|---|
| 400 | fd, tmppath = tempfile.mkstemp(suffix='.morituri.wav') |
|---|
| 401 | tmppath = unicode(tmppath) |
|---|
| 402 | os.close(fd) |
|---|
| 403 | self._tmpwavpath = tmppath |
|---|
| 404 | |
|---|
| 405 | # here to avoid import gst eating our options |
|---|
| 406 | from morituri.common import checksum |
|---|
| 407 | |
|---|
| 408 | self.tasks = [] |
|---|
| 409 | self.tasks.append( |
|---|
| 410 | ReadTrackTask(tmppath, table, start, stop, |
|---|
| 411 | offset=offset, device=device)) |
|---|
| 412 | self.tasks.append(checksum.CRC32Task(tmppath)) |
|---|
| 413 | t = ReadTrackTask(tmppath, table, start, stop, |
|---|
| 414 | offset=offset, device=device) |
|---|
| 415 | t.description = 'Verifying track...' |
|---|
| 416 | self.tasks.append(t) |
|---|
| 417 | self.tasks.append(checksum.CRC32Task(tmppath)) |
|---|
| 418 | |
|---|
| 419 | fd, tmpoutpath = tempfile.mkstemp(suffix='.morituri.%s' % |
|---|
| 420 | profile.extension) |
|---|
| 421 | tmpoutpath = unicode(tmpoutpath) |
|---|
| 422 | os.close(fd) |
|---|
| 423 | self._tmppath = tmpoutpath |
|---|
| 424 | |
|---|
| 425 | # here to avoid import gst eating our options |
|---|
| 426 | from morituri.common import encode |
|---|
| 427 | |
|---|
| 428 | self.tasks.append(encode.EncodeTask(tmppath, tmpoutpath, profile, |
|---|
| 429 | taglist=taglist)) |
|---|
| 430 | # make sure our encoding is accurate |
|---|
| 431 | self.tasks.append(checksum.CRC32Task(tmpoutpath)) |
|---|
| 432 | |
|---|
| 433 | self.checksum = None |
|---|
| 434 | |
|---|
| 435 | umask = os.umask(0) |
|---|
| 436 | os.umask(umask) |
|---|
| 437 | self.file_mode = 0666 - umask |
|---|
| 438 | |
|---|
| 439 | def stop(self): |
|---|
| 440 | if not self.exception: |
|---|
| 441 | self.quality = max(self.tasks[0].quality, self.tasks[2].quality) |
|---|
| 442 | self.peak = self.tasks[4].peak |
|---|
| 443 | self.debug('peak: %r', self.peak) |
|---|
| 444 | |
|---|
| 445 | self.testchecksum = c1 = self.tasks[1].checksum |
|---|
| 446 | self.copychecksum = c2 = self.tasks[3].checksum |
|---|
| 447 | if c1 == c2: |
|---|
| 448 | self.info('Checksums match, %08x' % c1) |
|---|
| 449 | self.checksum = self.testchecksum |
|---|
| 450 | else: |
|---|
| 451 | # FIXME: detect this before encoding |
|---|
| 452 | self.error('read and verify failed') |
|---|
| 453 | |
|---|
| 454 | if self.tasks[5].checksum != self.checksum: |
|---|
| 455 | self.error('Encoding failed, checksum does not match') |
|---|
| 456 | |
|---|
| 457 | # delete the unencoded file |
|---|
| 458 | os.unlink(self._tmpwavpath) |
|---|
| 459 | |
|---|
| 460 | os.chmod(self._tmppath, self.file_mode) |
|---|
| 461 | |
|---|
| 462 | try: |
|---|
| 463 | shutil.move(self._tmppath, self.path) |
|---|
| 464 | except Exception, e: |
|---|
| 465 | self._exception = e |
|---|
| 466 | else: |
|---|
| 467 | self.debug('stop: exception %r', self.exception) |
|---|
| 468 | |
|---|
| 469 | task.MultiSeparateTask.stop(self) |
|---|