#!/bin/bash

# Merge all given openstack log files into a single, sorted output stream. Each
# output line has the source filename prepended, e.g.:

# screen-n-cond.txt: 2015-07-14 16:33:24.669 DEBUG nova.o...
# screen-n-cond.txt: 2015-07-14 16:33:24.669 DEBUG nova.o...
# screen-n-cond.txt: 2015-07-14 16:33:24.670 DEBUG nova.o...
# screen-n-cell-child.txt: 2015-07-14 16:33:24.708 DEBUG ...
# screen-n-cond.txt: 2015-07-14 16:33:24.710 DEBUG nova.o...
# screen-n-cond.txt: 2015-07-14 16:33:24.710 DEBUG oslo_d...

# Usage: log_merge.sh screen-*.txt.gz

# An awk script to tidy up openstack logs.
# Leading lines without a timestamp are discarded.
# Inset lines without a timestamp are appended to the previous line.
# Consequently, all output lines have a timestamp.
LOG_TIDY='
# Set ORS to "" so we only output newlines explicitly
BEGIN { found = 0; ORS = "" }

# Does this line contain a timestamp?
{
    if ($2 ~ /[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{3}/)
        timestamped = 1
    else
        timestamped = 0
}

# Ignore leading un-timestamped lines
found == 0 && timestamped == 0 { next }

# Timestamped lines are displayed prefixed with the filename.
# We output a preceeding newline to terminate the previous log
timestamped == 1 {
    if (found == 1) print "\n";

    found = 1;
    print FILENAME": "$0
}

# Untimestamped lines are output with no trailing newline, which appends them
# to the previous line.
timestamped == 0 { print }
'

# Construct a sort command to merge sort the input files based on the first
# and second fields (date and time)
cmd='sort -m -k2,3'

# Add a bash process substitution argument, the <(cmd) syntax, for each input
# file. The process substitution creates a file descriptor attached to the
# output of the above awk script for the given input file.
for i in "$@"; do
    cmd=$cmd' <(awk "$LOG_TIDY" "'"$i"'")'
done

eval $cmd
