Re: Periodic frame losses when recording to btrfs volume with OBS

Chris Mason Mon, 22 Jan 2018 06:29:20 -0800

On 01/20/2018 05:47 AM, Sebastian Ochmann wrote:

Hello,
I would like to describe a real-world use case where btrfs does notperform well for me. I'm recording 60 fps, larger-than-1080p video usingOBS Studio [1] where it is important that the video stream is encodedand written out to disk in real-time for a prolonged period of time (2-5hours). The result is a H264 video encoded on the GPU with a data rateranging from approximately 10-50 MB/s.
The hardware used is powerful enough to handle this task. When I use aXFS volume for recording, no matter whether it's a SSD or HDD, therecording is smooth and no frame drops are reported (OBS has a niceStats window where it shows the number of frames dropped due to encodinglag which seemingly also includes writing the data out to disk).
However, when using a btrfs volume I quickly observe severe, periodicframe drops. It's not single frames but larger chunks of frames that adropped at a time. I tried mounting the volume with nobarrier but to noavail.
Of course, the simple fix is to use a FS that works for me(TM). HoweverI thought since this is a common real-world use case I'd describe thesymptoms here in case anyone is interested in analyzing this behavior.It's not immediately obvious that the FS makes such a difference. Also,if anyone has an idea what I could try to mitigate this issue (mount ormkfs options?) I can try that.
I saw this behavior on two different machines with kernels 4.14.13 and4.14.5, both Arch Linux. btrfs-progs 4.14, OBS 20.1.3-241-gf5c3af1bbuilt from git.

This could be a few different things, trying without the space cache wasalready suggested, and that's a top suspect.

How does the application do the writes? Are they always 4K aligned ordoes it send them out in odd sizes?

The easiest way to nail it down is to use offcputime from the iovisorproject:



https://github.com/iovisor/bcc/blob/master/tools/offcputime.py

If you haven't already configured this it may take a little while, butit's the perfect tool for this problem.

Otherwise, if the stalls are long enough you can try to catch it with/proc/<pid>/stack. I've attached a helper script I often use to dumpthe stack trace of all the tasks in D state.

Just run walker.py and it should give you something useful. You can usewalker.py -a to see all the tasks instead of just D state. This justwalks /proc/<pid>/stack, so you'll need to run it as someone withpermissions to see the stack traces of the procs you care about.


-chris

#!/usr/bin/env python3
#
# this walks all the tasks on the system and prints out a stack trace
# of any tasks waiting in D state.  If you pass -a, it will print out
# the stack of every task it finds.
#
# It also makes a histogram of the common stacks so you can see where
# more of the tasks are.
#

import sys
import os

from optparse import OptionParser

usage = "usage: %prog [options]"
parser = OptionParser(usage=usage)
parser.add_option("-a", "--all_tasks", help="Dump all stacks", default=False,
                  action="store_true")
parser.add_option("-s", "--smaps", help="Dump /proc/pid/smaps", default=False,
                  action="store_true")
parser.add_option("-S", "--sort", help="/proc/pid/smaps sort key",
                  default="size", type="str")
parser.add_option("-p", "--pid", help="Filter on pid", default=None,
                 type="str")
parser.add_option("-c", "--command", help="Filter on command name",
                 default=None, type="str")
parser.add_option("-f", "--files", help="don't collapse open files",
                  default=False, action="store_true")
parser.add_option("-v", "--verbose", help="details++", default=False,
                  action="store_true")
(options, args) = parser.parse_args()

stacks = {}

maps = {}

lines = []

# parse the units from a number and normalize into KB
def parse_number(s):
    try:
        words = s.split()
        unit = words[-1].lower()
        number = int(words[1])
        tag = words[0].lower().rstrip(':')

        # we store in kb
        if unit == "mb":
            number = number * 1024
        elif unit == "gb":
            number = number * 1024 * 1024
        elif unit == "tb":
            number = number * 1024 * 1024

        return (tag, number)
    except:
        return (None, None)

# pretty print a number in KB with units
def print_number(num):
    # we store the number in kb
    units = ['KB', 'MB', 'GB', 'TB']
    index = 0

    while num > 1024:
        num /= 1024
        index += 1

    final = float(num + num / 1024)
    return (final, units[index])

# find a given line in the record and pretty print it
def print_line(header, record, tag):
    num, unit = print_number(record[tag])

    if options.verbose:
        header = header + " "
    else:
        header = ""

    print("\t%s%s: %.2f %s" % (header, tag.capitalize(), num, unit))


# print all the lines we care about in a given record
def print_record(record):
    tag = record['tag']

    print(tag)
    print_line(tag, record, "size")
    print_line(tag, record, "rss")
    print_line(tag, record, "pss")
    print_line(tag, record, "anonymous")
    print_line(tag, record, "swap")
    print_line(tag, record, "locked")

# stuff a record with a group of lines from /proc/pid/smaps
# into the smaps hash.  This does all the collapsing by cmd name
# unless we're in -a mode
#
def add_record(cmd, pid, record):
    global maps

    if not record:
        return

    val = record["tag"]

    if val[0] != '[':
        if val.find('/') < 0:
            val = "[anonymous]"
        elif options.files:
            val = "[files:%s]" % val
        else:
            val = "[files]"

    if not options.verbose and val.startswith('[stack:'):
        val = val[0:6] + "]"
    if options.all_tasks:
        val = "%s:%s:%s" % (cmd, pid, val)
    else:
        val = "%s:%s" % (cmd, val)
    proc = maps.setdefault(val, {"tag": val})

    for x in record:
        if x != "tag":
            e = proc.get(x, 0)
            proc[x] = e + record[x]

# open the smap file for a given pid and collect all the records
def add_smap(path, pid, cmd):
    try:
        fd = open(os.path.join(path, "smaps"), "r")
    except:
        return

    record = {}

    while True:
        l = fd.readline()
        if not l:
            break
        l = l.lstrip().rstrip()

        if l.find('-') >= 0:
            add_record(cmd, pid, record)
            record = {}
            words = l.split()

            # if the tag includes information about filenames
            # add them in
            #
            if len(words) > 5:
                record['tag'] = " ".join(words[5:])
            else:
                record['tag'] = words[-1]

            continue

        tag, number = parse_number(l)
        if tag:
            record[tag] = number

    add_record(cmd, pid, record)

# read /proc/pid/stack and add it to the hashes
def add_stack(path, pid, cmd, status):
    global stacks

    try:
        stack = open(os.path.join(path, "stack"), "r").read()
    except:
        return

    if (status != "D" and not options.all_tasks):
        return

    print("%s %s %s" % (pid, cmd, status))
    print(stack)
    v = stacks.get(stack)
    if v:
        v += 1
    else:
        v = 1
    stacks[stack] = v


# worker to read all the files for one individual task
def run_one_task(path):

    try:
        stat = open(os.path.join(path, "stat"), "r").read()
    except:
        pass

    words = stat.split()
    pid, cmd, status = words[0:3]

    cmd = cmd.lstrip('(')
    cmd = cmd.rstrip(')')

    if options.command and options.command != cmd:
        return

    if options.smaps:
        add_smap(path, pid, cmd)
    else:
        add_stack(path, pid, cmd, status)

def print_usage():
    sys.stderr.write("Usage: %s [-a]\n" % sys.argv[0])
    sys.exit(1)

# for a given pid in string form, read the files from proc
def run_pid(name):
    try:
        pid = int(name)
    except:
        return

    if options.smaps:
        p = os.path.join("/proc", name)
        try:
            run_one_task(p)
        except:
            pass
    else:
        p = os.path.join("/proc", name, "task")
        if not os.path.exists(p):
            return

        try:
            for t in os.listdir(p):
                run_one_task(os.path.join(p, t))
        except:
            raise

if options.pid:
    run_pid(options.pid)
else:
    for name in os.listdir("/proc"):
        run_pid(name)

if options.smaps:
    key = options.sort.lower()
    sort = {}

    for x in maps:
        record = maps[x]
        val = record.get(key, 0)
        sort.setdefault(val, []).append(x)

    ar = list(sort.keys())
    ar.sort()
    for x in ar:
        bucket = sort[x]
        for x in bucket:
            print_record(maps[x])
else:
    values = {}
    for stack, count in stacks.items():
        l = values.setdefault(count, [])
        l.append(stack)

    counts = list(values.keys())
    counts.sort(reverse=True)
    if counts:
            print("-----\nstack summary\n")

    for x in counts:
        if x == 1:
            print("1 hit:")
        else:
            print("%d hits: " % x)

        l = values[x]
        for stack in l:
            print(stack)
            print("-----")

Re: Periodic frame losses when recording to btrfs volume with OBS

Reply via email to