source: trunk/morituri/program/cdparanoia.py @ 432

Revision 432, 15.2 KB checked in by thomas, 2 years ago (diff)
  • morituri/program/cdparanoia.py: cdparanoia can hang indefinitely on scsi read errors. For example, on some drives, when trying negative offsets. Notice them, count them, and fail after 100.
  • morituri/test/test_program_cdparanoia.py:
  • morituri/test/cdparanoia.progress.error (added): Add a test for this output.
Line 
1# -*- Mode: Python; test-case-name: morituri.test.test_program_cdparanoia -*-
2# vi:si:et:sw=4:sts=4:ts=4
3
4# Morituri - for those about to RIP
5
6# Copyright (C) 2009 Thomas Vander Stichele
7
8# This file is part of morituri.
9#
10# morituri is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# morituri is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with morituri.  If not, see <http://www.gnu.org/licenses/>.
22
23import os
24import re
25import stat
26import shutil
27import subprocess
28import tempfile
29
30from morituri.common import task, log, common
31from morituri.extern import asyncsub
32
33class FileSizeError(Exception):
34
35    message = None
36
37    """
38    The given path does not have the expected size.
39    """
40    def __init__(self, path, message):
41        self.args = (path, message)
42        self.path = path
43        self.message = message
44
45class ReturnCodeError(Exception):
46    """
47    The program had a non-zero return code.
48    """
49    def __init__(self, returncode):
50        self.args = (returncode, )
51        self.returncode = returncode
52
53_PROGRESS_RE = re.compile(r"""
54    ^\#\#: (?P<code>.+)\s      # function code
55    \[(?P<function>.*)\]\s@\s     # function name
56    (?P<offset>\d+)        # offset
57""", re.VERBOSE)
58
59_ERROR_RE = re.compile("^scsi_read error:")
60
61# from reading cdparanoia source code, it looks like offset is reported in
62# number of single-channel samples, ie. 2 bytes per unit, and absolute
63
64class ProgressParser(object):
65    read = 0 # last [read] frame
66    wrote = 0 # last [wrote] frame
67    errors = 0 # count of number of scsi errors
68    _nframes = None # number of frames read on each [read]
69    _firstFrames = None # number of frames read on first [read]
70    reads = 0 # total number of reads
71
72    def __init__(self, start, stop):
73        """
74        @param start:  first frame to rip
75        @type  start:  int
76        @param stop:   last frame to rip (inclusive)
77        @type  stop:   int
78        """
79        self.start = start
80        self.stop = stop
81
82        # FIXME: privatize
83        self.read = start
84
85        self._reads = {} # read count for each sector
86
87
88    def parse(self, line):
89        """
90        Parse a line.
91        """
92        m = _PROGRESS_RE.search(line)
93        if m:
94            # code = int(m.group('code'))
95            function = m.group('function')
96            wordOffset = int(m.group('offset'))
97            if function == 'read':
98                self._parse_read(wordOffset)
99            elif function == 'wrote':
100                self._parse_wrote(wordOffset)
101
102        m = _ERROR_RE.search(line)
103        if m:
104            self.errors += 1
105
106    def _parse_read(self, wordOffset):
107        if wordOffset % common.WORDS_PER_FRAME != 0:
108            print 'THOMAS: not a multiple of %d: %d' % (
109                common.WORDS_PER_FRAME, wordOffset)
110            return
111
112        frameOffset = wordOffset / common.WORDS_PER_FRAME
113
114        # set nframes if not yet set
115        if self._nframes is None and self.read != 0:
116            self._nframes = frameOffset - self.read
117
118        # set firstFrames if not yet set
119        if self._firstFrames is None:
120            self._firstFrames = frameOffset - self.start
121
122        markStart = None
123        markEnd = None
124
125        # verify it either read nframes more or went back for verify
126        if frameOffset > self.read:
127            delta = frameOffset - self.read
128            if self._nframes and delta != self._nframes:
129                # print 'THOMAS: Read %d frames more, not %d' % (
130                # delta, self._nframes)
131                # my drive either reads 7 or 13 frames
132                pass
133
134            # update our read sectors hash
135            markStart = self.read
136            markEnd = frameOffset
137        else:
138            # went back to verify
139            # we could use firstFrames as an estimate on how many frames this
140            # read, but this lowers our track quality needlessly where
141            # EAC still reports 100% track quality
142            markStart = frameOffset # - self._firstFrames
143            markEnd = frameOffset
144
145        # FIXME: doing this is way too slow even for a testcase, so disable
146        if False:
147            for frame in range(markStart, markEnd):
148                if not frame in self._reads.keys():
149                    self._reads[frame] = 0
150                self._reads[frame] += 1
151
152        # cdparanoia reads quite a bit beyond the current track before it
153        # goes back to verify; don't count those
154        if markEnd > self.stop:
155            markEnd = self.stop
156        if markStart > self.stop:
157            markStart = self.stop
158
159        self.reads += markEnd - markStart
160
161        # update our read pointer
162        self.read = frameOffset
163
164    def _parse_wrote(self, wordOffset):
165        # cdparanoia outputs most [wrote] calls with one word less than a frame
166        frameOffset = (wordOffset + 1) / common.WORDS_PER_FRAME
167        self.wrote = frameOffset
168       
169    def getTrackQuality(self):
170        """
171        Each frame gets read twice.
172        More than two reads for a frame reduce track quality.
173        """
174        frames = self.stop - self.start + 1
175        reads = self.reads
176
177        # don't go over a 100%; we know cdparanoia reads each frame at least
178        # twice
179        return min(frames * 2.0 / reads, 1.0)
180
181
182# FIXME: handle errors
183class ReadTrackTask(task.Task):
184    """
185    I am a task that reads a track using cdparanoia.
186
187    @ivar reads: how many reads were done to rip the track
188    """
189
190    description = "Reading Track"
191    quality = None # set at end of reading
192
193    _MAXERROR = 100 # number of errors detected by parser
194
195    def __init__(self, path, table, start, stop, offset=0, device=None):
196        """
197        Read the given track.
198
199        @param path:   where to store the ripped track
200        @type  path:   unicode
201        @param table:  table of contents of CD
202        @type  table:  L{table.Table}
203        @param start:  first frame to rip
204        @type  start:  int
205        @param stop:   last frame to rip (inclusive)
206        @type  stop:   int
207        @param offset: read offset, in samples
208        @type  offset: int
209        @param device: the device to rip from
210        @type  device: str
211        """
212        assert type(path) is unicode, "%r is not unicode" % path
213
214        self.path = path
215        self._table = table
216        self._start = start
217        self._stop = stop
218        self._offset = offset
219        self._parser = ProgressParser(start, stop)
220        self._device = device
221
222        self._buffer = "" # accumulate characters
223        self._errors = []
224
225    def start(self, runner):
226        task.Task.start(self, runner)
227
228        # find on which track the range starts and stops
229        startTrack = 0
230        startOffset = 0
231        stopTrack = 0
232        stopOffset = self._stop
233
234        for i, t in enumerate(self._table.tracks):
235            if self._table.getTrackStart(i + 1) <= self._start:
236                startTrack = i + 1
237                startOffset = self._start - self._table.getTrackStart(i + 1)
238            if self._table.getTrackEnd(i + 1) <= self._stop:
239                stopTrack = i + 1
240                stopOffset = self._stop - self._table.getTrackStart(i + 1)
241
242        self.debug('Ripping from %d to %d (inclusive)',
243            self._start, self._stop)
244        self.debug('Starting at track %d, offset %d',
245            startTrack, startOffset)
246        self.debug('Stopping at track %d, offset %d',
247            stopTrack, stopOffset)
248
249        bufsize = 1024
250        argv = ["cdparanoia", "--stderr-progress",
251            "--sample-offset=%d" % self._offset, ]
252        if self._device:
253            argv.extend(["--force-cdrom-device", self._device, ])
254        argv.extend(["%d[%s]-%d[%s]" % (
255                startTrack, common.framesToHMSF(startOffset),
256                stopTrack, common.framesToHMSF(stopOffset)),
257            self.path])
258        self.debug('Running %s' % (" ".join(argv), ))
259        try:
260            self._popen = asyncsub.Popen(argv,
261                bufsize=bufsize,
262                stdin=subprocess.PIPE, stdout=subprocess.PIPE,
263                stderr=subprocess.PIPE, close_fds=True)
264        except OSError, e:
265            import errno
266            if e.errno == errno.ENOENT:
267                raise common.MissingDependencyException('cdparanoia')
268
269            raise
270
271        self.runner.schedule(1.0, self._read, runner)
272
273    def _read(self, runner):
274        ret = self._popen.recv_err()
275        if not ret:
276            if self._popen.poll() is not None:
277                self._done()
278                return
279            self.runner.schedule(0.01, self._read, runner)
280            return
281
282        self._buffer += ret
283
284        # parse buffer into lines if possible, and parse them
285        if "\n" in self._buffer:
286            lines = self._buffer.split('\n')
287            if lines[-1] != "\n":
288                # last line didn't end yet
289                self._buffer = lines[-1]
290                del lines[-1]
291            else:
292                self._buffer = ""
293
294            for line in lines:
295                self._parser.parse(line)
296
297            # fail if too many errors
298            if self._parser.errors > self._MAXERROR:
299                self.debug('%d errors, terminating', self._parser.errors)
300                self._popen.terminate()
301
302            num = float(self._parser.wrote) - self._start
303            den = float(self._stop) - self._start
304            progress = num / den
305            if progress < 1.0:
306                self.setProgress(progress)
307
308        # 0 does not give us output before we complete, 1.0 gives us output
309        # too late
310        self.runner.schedule(0.01, self._read, runner)
311
312    def _poll(self, runner):
313        if self._popen.poll() is None:
314            self.runner.schedule(1.0, self._poll, runner)
315            return
316
317        self._done()
318
319    def _done(self):
320        self.setProgress(1.0)
321
322        # check if the length matches
323        size = os.stat(self.path)[stat.ST_SIZE]
324        # wav header is 44 bytes
325        offsetLength = self._stop - self._start + 1
326        expected = offsetLength * common.BYTES_PER_FRAME + 44
327        if size != expected:
328            # FIXME: handle errors better
329            self.warning('file size %d did not match expected size %d',
330                size, expected)
331            if (size - expected) % common.BYTES_PER_FRAME == 0:
332                self.warning('%d frames difference' % (
333                    (size - expected) / common.BYTES_PER_FRAME))
334            else:
335                self.warning('non-integral amount of frames difference')
336
337            self.setAndRaiseException(FileSizeError(self.path,
338                "File size %d did not match expected size %d" % (
339                    size, expected)))
340
341        if not self.exception and self._popen.returncode != 0:
342            if self._errors:
343                print "\n".join(self._errors)
344            else:
345                self.warning('exit code %r', self._popen.returncode)
346                self.exception = ReturnCodeError(self._popen.returncode)
347
348        self.quality = self._parser.getTrackQuality()
349           
350        self.stop()
351        return
352
353class ReadVerifyTrackTask(task.MultiSeparateTask):
354    """
355    I am a task that reads and verifies a track using cdparanoia.
356
357    @ivar path:         the path where the file is to be stored.
358    @ivar checksum:     the checksum of the track; set if they match.
359    @ivar testchecksum: the test checksum of the track.
360    @ivar copychecksum: the copy checksum of the track.
361    @ivar peak:         the peak level of the track
362    """
363
364    checksum = None
365    testchecksum = None
366    copychecksum = None
367    peak = None
368    quality = None
369
370    _tmpwavpath = None
371    _tmppath = None
372
373    def __init__(self, path, table, start, stop, offset=0, device=None,
374                 profile=None, taglist=None):
375        """
376        @param path:    where to store the ripped track
377        @type  path:    str
378        @param table:   table of contents of CD
379        @type  table:   L{table.Table}
380        @param start:   first frame to rip
381        @type  start:   int
382        @param stop:    last frame to rip (inclusive)
383        @type  stop:    int
384        @param offset:  read offset, in samples
385        @type  offset:  int
386        @param device:  the device to rip from
387        @type  device:  str
388        @param profile: the encoding profile
389        @type  profile: L{encode.Profile}
390        @param taglist: a list of tags
391        @param taglist: L{gst.TagList}
392        """
393        task.MultiSeparateTask.__init__(self)
394
395        self.path = path
396
397        if taglist:
398            self.debug('read and verify with taglist %r', taglist)
399        # FIXME: choose a dir on the same disk/dir as the final path
400        fd, tmppath = tempfile.mkstemp(suffix='.morituri.wav')
401        tmppath = unicode(tmppath)
402        os.close(fd)
403        self._tmpwavpath = tmppath
404
405        # here to avoid import gst eating our options
406        from morituri.common import checksum
407
408        self.tasks = []
409        self.tasks.append(
410            ReadTrackTask(tmppath, table, start, stop,
411                offset=offset, device=device))
412        self.tasks.append(checksum.CRC32Task(tmppath))
413        t = ReadTrackTask(tmppath, table, start, stop,
414            offset=offset, device=device)
415        t.description = 'Verifying track...'
416        self.tasks.append(t)
417        self.tasks.append(checksum.CRC32Task(tmppath))
418
419        fd, tmpoutpath = tempfile.mkstemp(suffix='.morituri.%s' %
420            profile.extension)
421        tmpoutpath = unicode(tmpoutpath)
422        os.close(fd)
423        self._tmppath = tmpoutpath
424
425        # here to avoid import gst eating our options
426        from morituri.common import encode
427
428        self.tasks.append(encode.EncodeTask(tmppath, tmpoutpath, profile,
429            taglist=taglist))
430        # make sure our encoding is accurate
431        self.tasks.append(checksum.CRC32Task(tmpoutpath))
432
433        self.checksum = None
434
435        umask = os.umask(0)
436        os.umask(umask)
437        self.file_mode = 0666 - umask
438
439    def stop(self):
440        if not self.exception:
441            self.quality = max(self.tasks[0].quality, self.tasks[2].quality)
442            self.peak = self.tasks[4].peak
443            self.debug('peak: %r', self.peak)
444
445            self.testchecksum = c1 = self.tasks[1].checksum
446            self.copychecksum = c2 = self.tasks[3].checksum
447            if c1 == c2:
448                self.info('Checksums match, %08x' % c1)
449                self.checksum = self.testchecksum
450            else:
451                # FIXME: detect this before encoding
452                self.error('read and verify failed')
453
454            if self.tasks[5].checksum != self.checksum:
455                self.error('Encoding failed, checksum does not match')
456
457            # delete the unencoded file
458            os.unlink(self._tmpwavpath)
459
460            os.chmod(self._tmppath, self.file_mode)
461
462            try:
463                shutil.move(self._tmppath, self.path)
464            except Exception, e:
465                self._exception = e
466        else:
467            self.debug('stop: exception %r', self.exception)
468
469        task.MultiSeparateTask.stop(self)
Note: See TracBrowser for help on using the repository browser.