I've had this idea that we could make more efficient use of the nodes by grouping them into pools...
This would hopefully balance out the load on the nodes (most of the armhf nodes CPUs are idle roughly 25% of the time) a little better. Perhaps more importantly, it should be much more resilient if one node is down, as a given build job can use one of several build machines for the second build. You can still group pools into categories to ensure diversity in kernel version, cpu type, operating date, etc. The load check isn't perfect, but it's better than nothing, maybe good enough as is. Adding a check for available ram should be simple enough. Another option is to only use pools for the second build, which still gets most of the benefits, but perhaps is a little simpler configuration-wise. Patch below! No idea if it works, given that I don't have a spare jenkins.debian.net or build network to test on, but hopefully it demonstrates the idea, and is mostly there. My biggest concern with the code is not knowing if setting the NODE1, PORT1, NODE2 and PORT2 within the function will work correctly and be available outside of the function for the remainder of the process, or other functions that run outside of reproducible_build.sh that need to know those variables. live well, vagrant commit 200c45bbb5768dce5649b05ad599c85c6bb14b50 Author: Vagrant Cascadian <vagr...@debian.org> Date: Fri Dec 18 15:02:32 2015 -0800 Implement support for build pools, and add an example pool. --- bin/reproducible_build.sh | 55 +++++++++++++++++++++++++++++++++++++++++++++-- job-cfg/reproducible.yaml | 3 ++- 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/bin/reproducible_build.sh b/bin/reproducible_build.sh index 338c207..c9d725f 100755 --- a/bin/reproducible_build.sh +++ b/bin/reproducible_build.sh @@ -688,9 +688,53 @@ check_buildinfo() { rm -f $TMPFILE1 $TMPFILE2 } +select_least_loaded_node() { + local pool_nodes + local node + local port + local load + local best_load + local selected + # default to the first node + selected="$1" + pool_nodes="$@" + if [ "$selected" = "$pool_nodes" ]; then + echo $selected + return 0 + fi + load = 0 + best_load = 0 + for this_node in $pool_nodes ; do + node=$(echo $this_node | cut -d : -f 1) + port=$(echo $this_node | cut -d : -f 2) + # Compare the number of processors against the load, and add + # 1000 so we don't need to bother comparing negative numbers. + # + # TODO: account for available memory. + # + # TODO: this could be improved upon and simplified by calling + # a shell script on the remote end. + load=$(echo \ + $(ssh $node -p $port \ + "grep ^processor /proc/cpuinfo | wc -l ; echo ' \* X 100 + 1000 - 100 X ' ; cut -d ' ' -f 1 /proc/loadavg") | \ + tr 'X' '*' | \ + bc | \ + cut -d . -f 1) + if [ "$load" -gt "$best_load" ]; then + selected="$this_node" + best_load="$load" + fi + done + echo $selected +} + build_rebuild() { FTBFS=1 mkdir b1 b2 + local selected_node + selected_node=$(select_least_loaded_node $NODE1_POOL) + NODE1=$(echo $selected_node | cut -d : -f 1) + PORT1=$(echo $selected_node | cut -d : -f 2) remote_build 1 $NODE1 $PORT1 if [ ! -f b1/${SRCPACKAGE}_${EVERSION}_${ARCH}.changes ] && [ -f b1/${SRCPACKAGE}_*_${ARCH}.changes ] ; then echo "Version mismatch between main node (${SRCPACKAGE}_${EVERSION}_${ARCH}.dsc expected) and first build node ($(ls b1/*dsc)) for $SUITE/$ARCH, aborting. Please upgrade the schroots..." | tee -a ${RBUILDLOG} @@ -700,6 +744,9 @@ build_rebuild() { exit 0 elif [ -f b1/${SRCPACKAGE}_${EVERSION}_${ARCH}.changes ] ; then # the first build did not FTBFS, try rebuild it. + selected_node=$(select_least_loaded_node $NODE2_POOL) + NODE2=$(echo $selected_node | cut -d : -f 1) + PORT2=$(echo $selected_node | cut -d : -f 2) remote_build 2 $NODE2 $PORT2 if [ -f b2/${SRCPACKAGE}_${EVERSION}_${ARCH}.changes ] ; then # both builds were fine, i.e., they did not FTBFS. @@ -750,10 +797,14 @@ elif [ "$1" = "1" ] || [ "$1" = "2" ] ; then exit 0 elif [ "$2" != "" ] ; then MODE="master" + NODE1_POOL="$1" + NODE2_POOL="$2" + # FIXME: postpone setting NODE1/PORT1 and NODE2/PORT2 until the builds + # run NODE1="$(echo $1 | cut -d ':' -f1).debian.net" NODE2="$(echo $2 | cut -d ':' -f1).debian.net" - PORT1="$(echo $1 | cut -d ':' -f2)" - PORT2="$(echo $2 | cut -d ':' -f2)" + PORT1="$(echo $1 | cut -d ':' -f2 | cut -d ' ' -f 1)" + PORT2="$(echo $2 | cut -d ':' -f2 | cut -d ' ' -f 1)" # if no port is given, assume 22 if [ "$NODE1" = "${PORT1}.debian.net" ] ; then PORT1=22 ; fi if [ "$NODE2" = "${PORT2}.debian.net" ] ; then PORT2=22 ; fi diff --git a/job-cfg/reproducible.yaml b/job-cfg/reproducible.yaml index 97df606..5963206 100644 --- a/job-cfg/reproducible.yaml +++ b/job-cfg/reproducible.yaml @@ -481,7 +481,8 @@ - '13': { my_node1: 'cbxi4pro0-armhf-rb:2226', my_node2: 'rpi2b-armhf-rb:2230' } - '14': { my_node1: 'rpi2b-armhf-rb:2230', my_node2: 'wbq0-armhf-rb:2225' } - '15': { my_node1: 'wbd0-armhf-rb:2223', my_node2: 'hb0-armhf-rb:2224' } - my_shell: '/srv/jenkins/bin/reproducible_build.sh {my_node1} {my_node2}' + - '16': { my_node1: 'wbd0-armhf-rb:2223 wbq0-armhf-r:2225', my_node2: 'bpi0-armhf-rb:2222 odxu4-armhf-rb:2229' } + my_shell: '/srv/jenkins/bin/reproducible_build.sh "{my_node1}" "{my_node2}"' my_timed: '* * * * *' my_hname: ''
signature.asc
Description: PGP signature
_______________________________________________ Reproducible-builds mailing list Reproducible-builds@lists.alioth.debian.org http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/reproducible-builds