whitelist command adds all existing files in the repo dir
Project: http://git-wip-us.apache.org/repos/asf/brooklyn-client/repo Commit: http://git-wip-us.apache.org/repos/asf/brooklyn-client/commit/02f010b0 Tree: http://git-wip-us.apache.org/repos/asf/brooklyn-client/tree/02f010b0 Diff: http://git-wip-us.apache.org/repos/asf/brooklyn-client/diff/02f010b0 Branch: refs/heads/master Commit: 02f010b0eadd1c18518b5b8d7e170147a71ffab7 Parents: e9c070a Author: Alex Heneveld <[email protected]> Authored: Thu Jan 28 05:25:17 2016 +0000 Committer: Alex Heneveld <[email protected]> Committed: Thu Jan 28 05:25:17 2016 +0000 ---------------------------------------------------------------------- make-whitelist.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/brooklyn-client/blob/02f010b0/make-whitelist.sh ---------------------------------------------------------------------- diff --git a/make-whitelist.sh b/make-whitelist.sh index eb31841..1b7ebef 100755 --- a/make-whitelist.sh +++ b/make-whitelist.sh @@ -30,6 +30,9 @@ pushd $REPO > /dev/null echo scanning $REPO for relevant files in history for $OUTPUT_FILENAME starting with `cat $TODO_REMAINING | wc -l` paths including $SAMPLE_PATHS +# first seed with existing files, dropping an extra / which find might put in, in case any existing files aren't picked up by the log (e.g. on merge) +cat ${TODO_REMAINING} | xargs -J % -n1 find % -type file 2> /dev/null | sed s/\\/\\//\\// >> ${OUTPUT} + while [ -s $TODO_REMAINING ] ; do echo current pass has `cat $TODO_REMAINING | wc -l` paths including `( gshuf $TODO_REMAINING 2> /dev/null || cat $TODO_REMAINING ) | head -4` @@ -40,7 +43,7 @@ while [ -s $TODO_REMAINING ] ; do rm -f $TODO_HERE echo collecting relevant commits... - cat $TODO_REMAINING | xargs -L -n100 git log --format='%H' --diff-filter=A -- >> ${TODO_HERE}_ids + cat $TODO_REMAINING | xargs -n100 git log --format='%H' --diff-filter=A -- >> ${TODO_HERE}_ids sort -u ${TODO_HERE}_ids -o ${TODO_HERE}_ids # echo IDS | cat - ${TODO_HERE}_ids >> ${ORIG_DIR}/log @@ -48,7 +51,7 @@ while [ -s $TODO_REMAINING ] ; do rm -f ${TODO_HERE}_allpaths echo gathering files from `cat ${TODO_HERE}_ids | wc -l` commits... # 50% match is a bit low but better safe than sorry for moves; for copies we go higher - cat ${TODO_HERE}_ids | xargs -L -n100 git show -l99999 -M50 -C90 --name-status --format="ID: %H" | grep -v ^ID: | awk -F $'\t' '{ if ($3) print $3"\t"$2; else print $2; }' | sort -u >> ${TODO_HERE}_allpaths + cat ${TODO_HERE}_ids | xargs -n100 git show -l99999 -M50 -C90 --name-status --format="ID: %H" | grep -v ^ID: | awk -F $'\t' '{ if ($3) print $3"\t"$2; else print $2; }' | sort -u >> ${TODO_HERE}_allpaths echo comparing `cat ${TODO_HERE}_allpaths | wc -l` candidate files against paths... ${ORIG_DIR}/grep-lines-starting.sh ${TODO_REMAINING} ${TODO_HERE}_allpaths | awk -F $'\t' '{print $1; if ($2) print $2;}' | sort -u -o ${TODO_HERE}
