N elegant way to split a file by chapter using ffmpeg

ffmpeg

In this page, Albert Armea share a code to split videos by chapter using ffmpeg. The code is straight forward, but not quite good-looking.

ffmpeg -i "$SOURCE.$EXT" 2>&1 |
grep Chapter |
sed -E "s/ *Chapter #([0-9]+\.[0-9]+): start ([0-9]+\.[0-9]+), end ([0-9]+\.[0-9]+)/-i \"$SOURCE.$EXT\" -vcodec copy -acodec copy -ss \2 -to \3 \"$SOURCE-\1.$EXT\"/" |
xargs -n 11 ffmpeg

Is there an elegant way to do this job?

Best Solution

(Edit: This tip came from https://github.com/phiresky via this issue: https://github.com/harryjackson/ffmpeg_split/issues/2)

You can get chapters using:

ffprobe -i fname -print_format json -show_chapters -loglevel error

If I was writing this again I'd use ffprobe's json options

(Original answer follows)

This is a working python script. I tested it on several videos and it worked well. Python isn't my first language but I noticed you use it so I figure writing it in Python might make more sense. I've added it to Github. If you want to improve please submit pull requests.

#!/usr/bin/env python
import os
import re
import subprocess as sp
from subprocess import *
from optparse import OptionParser

def parseChapters(filename):
  chapters = []
  command = [ "ffmpeg", '-i', filename]
  output = ""
  try:
    # ffmpeg requires an output file and so it errors 
    # when it does not get one so we need to capture stderr, 
    # not stdout.
    output = sp.check_output(command, stderr=sp.STDOUT, universal_newlines=True)
  except CalledProcessError, e:
    output = e.output 

  for line in iter(output.splitlines()):
    m = re.match(r".*Chapter #(\d+:\d+): start (\d+\.\d+), end (\d+\.\d+).*", line)
    num = 0 
    if m != None:
      chapters.append({ "name": m.group(1), "start": m.group(2), "end": m.group(3)})
      num += 1
  return chapters

def getChapters():
  parser = OptionParser(usage="usage: %prog [options] filename", version="%prog 1.0")
  parser.add_option("-f", "--file",dest="infile", help="Input File", metavar="FILE")
  (options, args) = parser.parse_args()
  if not options.infile:
    parser.error('Filename required')
  chapters = parseChapters(options.infile)
  fbase, fext = os.path.splitext(options.infile)
  for chap in chapters:
    print "start:" +  chap['start']
    chap['outfile'] = fbase + "-ch-"+ chap['name'] + fext
    chap['origfile'] = options.infile
    print chap['outfile']
  return chapters

def convertChapters(chapters):
  for chap in chapters:
    print "start:" +  chap['start']
    print chap
    command = [
        "ffmpeg", '-i', chap['origfile'],
        '-vcodec', 'copy',
        '-acodec', 'copy',
        '-ss', chap['start'],
        '-to', chap['end'],
        chap['outfile']]
    output = ""
    try:
      # ffmpeg requires an output file and so it errors 
      # when it does not get one
      output = sp.check_output(command, stderr=sp.STDOUT, universal_newlines=True)
    except CalledProcessError, e:
      output = e.output
      raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))

if __name__ == '__main__':
  chapters = getChapters()
  convertChapters(chapters)