Author: moeller
Date: 2014-11-26 10:50:56 +0000 (Wed, 26 Nov 2014)
New Revision: 18517

Added:
   trunk/community/edam/test/
   trunk/community/edam/test/Makefile
   trunk/community/edam/test/test_01.sh
Modified:
   trunk/community/edam/debian2edam
Log:
Rough skeleton of a translator into EDAM ontology


I crafted a BASH script to help with the transition from
the Debian-typical YAML-like EDAM-description of our packages
into something that is closer to the expected JSON.

To play with it, please enter the test directory and type
'make'.


debian-med/community/edam/test $ make
./test_01.sh
Package: muscle
Description: Multiple alignment program of protein sequences
(
 Package summary
 Version 1:3.8.31
 Description
 Topic topic_0182
 (function
  (input data_2044)
  (input format_1929)
  (output data_1916)
W: found colon in ID of line ' output: format:1984 FASTA-aln' - transscribing 
to underscore
  (output format_1984)
  (output format_1982)
  (output format_1997)
 )# openfunction(1) after loop
)# openscope(1) after loop




Modified: trunk/community/edam/debian2edam
===================================================================
--- trunk/community/edam/debian2edam    2014-11-25 17:44:08 UTC (rev 18516)
+++ trunk/community/edam/debian2edam    2014-11-26 10:50:56 UTC (rev 18517)
@@ -1,30 +1,219 @@
 #!/bin/bash -e
 
+
+# A routine to facilitate the output to STDERR instead of the default STDIN
+function STDERR () {
+       cat - 1>&2
+}
+
+function echoindent () {
+       for i in $(seq 1 $1)
+       do
+               echo -n " "
+       done
+}
+
+# helper to properly close an open paranthesis
+function closeParenthesis () {
+                       level=$((level-1))
+                       echoindent $level
+                       echo -n ")"
+                       if [ -n "$1" ]; then
+                               echo "# $1"
+                       else
+                               echo
+                       fi
+}
+
+# Key argument indicating the debian directory from which to retrieve all the
+# information
 pathToDebian=$1
+#verbose="yes"
+verbose=""
 
+# Variable keeping usage information
 USAGE=<<EOUSAGE
 debian2edam <path to 'debian' directory>
 EOUSAGE
 
+filename=$(basename "$pathToDebian")
+if [ "edam" = "$filename" ]; then
+       pathToDebian=$(dirname "$pathToDebian") # upstream
+       pathToDebian=$(dirname "$pathToDebian") # debian
+fi
+
+if [ -z "$pathToDebian" ]; then
+       echo "$USAGE" | STDERR
+       echo "E: Please specify debian directory in which to find EDAM 
annotation." | STDERR
+       exit -1
+fi
+
 if [ ! -d "$pathToDebian" ]; then
-       echo "$USAGE"
-       echo "Could not find directory 'pathToDebian'"
+       echo "$USAGE" | STDERR
+       echo "E: Could not find directory '$pathToDebian'" | STDERR
        exit -1
 fi
+
+if [ ! -r "$pathToDebian/changelog" ]; then
+       echo "$USAGE" | STDERR
+       echo "E: Could not find a changelog file expected at 
'$pathToDebian/changelog'" | STDERR
+       exit -1
+fi
+
 cd $(dirname "$pathToDebian")
 
 edamfile="debian/upstream/edam"
 if [ ! -r "$edamfile" ]; then
-       echo "$USAGE"
-       echo "Could not access file '$edamfile' from $(pwd)"
+       echo "$USAGE" | STDERR
+       echo "E: Could not access file '$edamfile' from $(pwd)" | STDERR
        exit -1
 fi
 
-package=$(dpkg-parsechangelog |grep ^Source | sed -e 's/`^Source: //' )
+sourcepackage=$(dpkg-parsechangelog |grep ^Source | sed -e 's/`^Source: //' )
 version=$(dpkg-parsechangelog |grep ^Version | cut -f2  -d\  | sed -e 
's/-[^-][^-]*//' )
 
-echo "Package $package"
-echo "Version $version"
+declare -a descriptions
+#cat debian/control
 
+grep "^Package:" debian/control | \
+while read desc; do
+       d=$(echo "$desc"|sed -e 's/^[^:]*: *//')
+       echo Package: $d
+       packages[${#packages[*]}]="$d"
+done
 
+grep "^Description:" debian/control | \
+while read desc; do
+       d=$(echo "$desc"|sed -e 's/^[^:]*: *//')
+       echo Description: $d
+       descriptions[${#descriptions[*]}]="$d"
+done
 
+if [ ${#packagesp[*]} != ${#descriptions[*]} ]; then
+       echo "E: Internal error - expected same number of packages 
(${#packagesp[*]}) as for their descriptions (${#descriptions[*]})" | STDERR
+       exit -1
+fi
+
+for packageno in $(seq 1 ${#descriptions[*]})
+do
+       echo Packages: ${packages[$packageno]}
+       echo Descriptions: ${descriptions[$packageno]}
+done
+prevstate="start";
+previndent=0
+currentscope=""
+currenttopic=""
+opentopic=0
+openfunction=0
+openscope=0
+indentlen=0
+level=0
+
+# Core part of the program
+# It reads every line of the EDAM file (see end of loop for the redirection)
+# and decides what to print to STDOUT.
+
+while IFS='' read -r line 
+do
+       if [ -z "$line" ]; then
+               echo "Read empty line"
+               continue
+       fi
+
+       if [ -n "$verbose" ]; then
+               echo "line: '$line'" | STDERR
+       fi
+
+       # retrieve different parts of the description
+       blanks=$(echo "$line"|sed -e 's/^\( *\)\([^ :]\+\): *\([^ ]\+\).*$/\1/')
+       type=$(echo   "$line"|sed -e 's/^\( *\)\([^ :]\+\): *\([^ ]\+\).*$/\2/')
+       val=$(echo    "$line"|sed -e 's/^\( *\)\([^ :]\+\): *\([^ ]\+\).*$/\3/')
+
+       if echo "$val" | grep -q : ; then
+               echo "W: found colon in ID of line '$line' - transscribing to 
underscore"|STDERR
+               val=$(echo "$val"|tr ":" "_")
+       fi
+
+       #echo "Indent='$blanks'"
+       #echo "Indentlength='$indentlen'"
+       #echo "Type='$type'"
+       #echo "Val='$val'"
+
+       if [  -n "$currentscope" -a "*" != "$currentscope" -a "summary" != 
"$currentscope" -a "scope" != "$type" ]; then
+               echo "I: Wrong scope ($currentscope) - ignored '$line'" | STDERR
+               continue
+       fi
+       indentlen=${#blanks}
+
+       if [ "scope" = "$type" ]; then
+               if [ $openfunction -gt 0 ]; then closeParenthesis 
"openfunction($openfunction) in scope"; fi
+               currentscope="$val"
+               resourcename=$sourcepackage
+               if [ "*"!=$val -a "summary"!="$val" ];then
+                       resourcename=$val
+               fi
+
+               if [ "summary" != "$val" -a "*" != "$val" ]; then
+                       echo "I: treatment of multiple scopes not yet 
implemented" | STDERR
+               else
+                       echo "("
+                       level=$((level+1))
+                       echoindent
+                       echo "Package $resourcename"
+                       echoindent
+                       echo "Version $version"
+                       echoindent
+                       echo "Description ${descriptions[1]}"
+                       echoindent
+                       echo "Topic $currenttopic"
+                       openscope=1
+               fi
+       elif [ "topic" = "$type" ]; then
+               if [ $openfunction -gt 0 ]; then closeParenthesis 
"openfunction($openfunction) in topic"; openfunction=0; fi
+               if [ $openscope -gt 0 ]; then closeParenthesis 
"openscope($openscope) after loop"; openscope=0; fi
+               if [ "start" != "$prevstate" ]; then
+                       closeParenthesis "topic with prior state - weird"
+               fi
+               currenttopic="$val"
+               # at some laterimplementation point, bits generated here would 
be cached and then distributed
+               # to various lower-level scopes
+       elif [ "function" = "$type" ]; then
+               if [ $openfunction -gt 0 ]; then
+                       closeParenthesis "openfunction($openfunction) in 
function"
+                       openfunction=0
+               fi
+               echoindent $level
+               echo "(function";
+               level=$((level+1))
+               openfunction=1
+       elif [ "input" = "$type" -o "output" = "$type" ]; then
+               echoindent $level
+               echo "($type $val)"
+       else
+               echo "W: unknown type '$type' - ignored" | STDERR
+       fi
+       prevstate=$type
+       #echo "indentlen='$indentlen'"
+done < $edamfile 
+
+if [ $openfunction -gt 0 ]; then
+       closeParenthesis "openfunction($openfunction) after loop"
+       openfunction=0
+fi
+
+if [ $openscope -gt 0 ]; then
+       #echo "I: treatment of multiple scopes not yet implemented"|STDERR
+       closeParenthesis "openscope($openscope) after loop"
+       openscope=0
+fi
+
+#echo "indentlen='$indentlen'" | STDERR
+
+if [ $opentopic -gt 0 ]; then
+       opentopic=0
+fi
+
+#for i in $(seq $(($indentlen-$openfunction-$openscope-$opentopic)) -1 1)
+#do
+#      closeParenthesis "indent $i"
+#done

Added: trunk/community/edam/test/Makefile
===================================================================
--- trunk/community/edam/test/Makefile                          (rev 0)
+++ trunk/community/edam/test/Makefile  2014-11-26 10:50:56 UTC (rev 18517)
@@ -0,0 +1,2 @@
+test:
+       ./test_01.sh

Added: trunk/community/edam/test/test_01.sh
===================================================================
--- trunk/community/edam/test/test_01.sh                                (rev 0)
+++ trunk/community/edam/test/test_01.sh        2014-11-26 10:50:56 UTC (rev 
18517)
@@ -0,0 +1 @@
+../debian2edam ../../../packages/muscle/trunk/debian/upstream/edam


Property changes on: trunk/community/edam/test/test_01.sh
___________________________________________________________________
Added: svn:executable
   + *


_______________________________________________
debian-med-commit mailing list
debian-med-commit@lists.alioth.debian.org
http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit

Reply via email to