Hi,

Can you review the patch above. I haven't replaced '#' with '\#' yet (my
previous mail). I have got commit permissions for collab-qa. If everything
looks fine then I can commit these changes to the repo.

On Thu, Apr 23, 2015 at 11:17 AM, Akshita Jha <[email protected]> wrote:

> ---
>  config-ullmann.yaml           |   4 +
>  scripts/cron_ftpnew_blends.sh |   1 +
>  udd/bibref_gatherer.py        | 112 +-------------------
>  udd/generate_bibtex.py        | 230
> ++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 237 insertions(+), 110 deletions(-)
>  create mode 100644 udd/generate_bibtex.py
>
> diff --git a/config-ullmann.yaml b/config-ullmann.yaml
> index 901550e..06c8a19 100644
> --- a/config-ullmann.yaml
> +++ b/config-ullmann.yaml
> @@ -45,6 +45,7 @@ general:
>      i18n-apps: module udd.i18n_apps_gatherer
>      hints: module udd.hints_gatherer
>      deferred: module udd.deferred_gatherer
> +    generate-bibtex: module udd.generate_bibtex
>    timestamp-dir: /srv/udd.debian.org/timestamps
>    lock-dir: /srv/udd.debian.org/locks
>    archs:
> @@ -471,3 +472,6 @@ vcswatch:
>
>  reproducible:
>    type: reproducible
> +
> +generate-bibtex:
> +  type: generate-bibtex
> diff --git a/scripts/cron_ftpnew_blends.sh b/scripts/cron_ftpnew_blends.sh
> index fc0d087..c38c076 100755
> --- a/scripts/cron_ftpnew_blends.sh
> +++ b/scripts/cron_ftpnew_blends.sh
> @@ -11,3 +11,4 @@ $UAR ftpnew
>  $UAR blends-prospective
>  # $UAR blends-metadata
>  $UAR blends-all
> +$UAR generate-bibtex
> diff --git a/udd/bibref_gatherer.py b/udd/bibref_gatherer.py
> index 654d7e7..41f9618 100644
> --- a/udd/bibref_gatherer.py
> +++ b/udd/bibref_gatherer.py
> @@ -6,8 +6,7 @@ This script imports bibliographic references from
> upstream-metadata.debian.net.
>
>  from gatherer import gatherer
>  from sys import stderr, exit
> -from os import listdir, unlink, rename, access, X_OK
> -from os.path import isfile
> +from os import listdir
>  from fnmatch import fnmatch
>  import yaml
>  from psycopg2 import IntegrityError, InternalError
> @@ -23,43 +22,9 @@ debug=0
>  def get_gatherer(connection, config, source):
>    return bibref_gatherer(connection, config, source)
>
> -def rm_f(file):
> -  try:
> -    unlink(file)
> -  except OSError:
> -    pass
> -
> -def cleanup_tex_logs(basetexfile):
> -  rm_f(basetexfile+'.aux')
> -  rm_f(basetexfile+'.bbl')
> -  rm_f(basetexfile+'.blg')
> -  rm_f(basetexfile+'.log')
> -
>  # seek for authors separated by ',' rather than by ' and '
>  seek_broken_authors_re =
> re.compile('^[^\s^,]+\s+[^\s^,]+\s*,\s*[^\s^,]+\s+[^\s^,]')
>
> -def open_tex_process(texexe, basetexfile):
> -  if texexe == 'pdflatex':
> -    ptex = Popen(['pdflatex', '-interaction=batchmode', basetexfile],
> shell=False, stdout=PIPE)
> -  elif texexe == 'bibtex':
> -    ptex = Popen(['bibtex', basetexfile], shell=False, stdout=PIPE)
> -  else:
> -    return(False, 'Wrong exe: '+texexe)
> -  errstring=""
> -  if ptex.wait():
> -    if texexe == 'pdflatex':
> -      for logrow in ptex.communicate()[0].splitlines():
> -        if logrow.startswith('!'):
> -          errstring += logrow
> -      return(False, errstring)
> -    else:
> -      for logrow in ptex.communicate()[0].splitlines():
> -        if logrow.startswith('This is BibTeX'):
> -          continue
> -        errstring += logrow + '\n'
> -      return(True, errstring)
> -  return(True, errstring)
> -
>  other_known_keys = ('Archive',
>                      'Bug-Database',
>                      'Cite-As',
> @@ -297,10 +262,6 @@ class bibref_gatherer(gatherer):
>      handler.setFormatter(formatter)
>      self.log.addHandler(handler)
>
> -
> -    self.bibtexfile = 'debian.bib'
> -    self.bibtex_example_tex = 'debian.tex'
> -
>    def run(self):
>      my_config = self.my_config
>      #start harassing the DB, preparing the final inserts and making place
> @@ -364,76 +325,7 @@ class bibref_gatherer(gatherer):
>      # commit before check to make sure the table is not locked in case
> LaTeX run will fail for whatever reason
>      self.connection.commit()
>
> -    # if there is a working LaTeX installation try to build a BibTeX
> database and test it by creating a debian.pdf file
> -    if isfile('/usr/bin/pdflatex') and access('/usr/bin/pdflatex', X_OK)
> and \
> -       isfile('/usr/bin/bibtex')   and access('/usr/bin/bibtex', X_OK)
> and \
> -       (
> isfile('/usr/share/texlive/texmf-dist/fonts/source/jknappen/ec/ecrm.mf') or
> \
> -
>  isfile('/usr/share/texmf-texlive/fonts/source/jknappen/ec/ecrm.mf') ) :
> -      # create BibTeX file
> -      bf = open(self.bibtexfile, 'w')
> -      cur.execute("SELECT * FROM bibtex()")
> -      for row in cur.fetchall():
> -       print >>bf, row[0]
> -      bf.close()
> -
> -      # create LaTeX file to test BibTeX functionality
> -      bf = open(self.bibtex_example_tex, 'w')
> -      print >>bf, """\documentclass[10]{article}
> -\usepackage[T1]{fontenc}
> -\usepackage[utf8]{inputenc}
> -\usepackage[left=2mm,top=2mm,right=2mm,bottom=2mm,nohead,nofoot]{geometry}
> -\usepackage{longtable}
> -\usepackage[super]{natbib}
> -\setlongtables
> -\\begin{document}
> -\small
> -\\begin{longtable}{llp{70mm}l}
> -\\bf package & \\bf source & \\bf description & BibTeX key \\\\ \hline"""
> -
> -      cur.execute("SELECT * FROM bibtex_example_data() AS (package text,
> source text, bibkey text, description text)")
> -      for row in cur.fetchall():
> -       print >>bf, row[0], '&', row[1], '&', row[3] , '&',
> row[2]+'\cite{'+row[2]+'} \\\\'
> -
> -      print >>bf, """\end{longtable}
> -
> -% \\bibliographystyle{plain}
> -% Try a bit harder by also including URL+DOI
> -\\bibliographystyle{plainnat}
> -\\bibliography{debian}
> -
> -\end{document}
> -"""
> -      bf.close()
> -
> -      # try to build debian.pdf file to test aboc LaTeX file
> -      basetexfile = self.bibtex_example_tex.replace('.tex','')
> -      cleanup_tex_logs(basetexfile)
> -      try:
> -        rename(basetexfile+'.pdf', basetexfile+'.pdf~')
> -      except OSError:
> -        pass
> -
> -      (retcode,errstring) = open_tex_process('pdflatex', basetexfile)
> -      if not retcode:
> -        self.log.error("Problem in 1. PdfLaTeX run of %s.tex: `%s` -->
> please inspect %s.log" % (basetexfile, errstring, basetexfile))
> -        exit(1)
> -      (retcode,errstring) = open_tex_process('bibtex', basetexfile)
> -      if errstring != "":
> -        if not retcode:
> -          self.log.error("Problem in BibTeX run of %s.bib: `%s`" %
> (basetexfile, errstring))
> -          exit(1)
> -        self.log.error("Ignore the following problems in BibTeX run of
> %s.bib: `%s`" % (basetexfile, errstring))
> -      (retcode,errstring) = open_tex_process('pdflatex', basetexfile)
> -      if not retcode:
> -        self.log.error("Problem in 2. PdfLaTeX run of %s.tex: `%s` -->
> please inspect %s.log" % (basetexfile, errstring, basetexfile))
> -        exit(1)
> -      (retcode,errstring) = open_tex_process('pdflatex', basetexfile)
> -      if not retcode:
> -        self.log.error("Problem in 3. PdfLaTeX run of %s.tex: `%s` -->
> please inspect %s.log" % (basetexfile, errstring, basetexfile))
> -        exit(1)
> -
> -      cleanup_tex_logs(basetexfile)
> -
> +
>  if __name__ == '__main__':
>    main()
>
> diff --git a/udd/generate_bibtex.py b/udd/generate_bibtex.py
> new file mode 100644
> index 0000000..6ddc03a
> --- /dev/null
> +++ b/udd/generate_bibtex.py
> @@ -0,0 +1,230 @@
> +from gatherer import  gatherer
> +from os import unlink, rename, access, X_OK
> +from os.path import isfile
> +from subprocess import Popen, PIPE
> +import logging
> +import logging.handlers
> +
> +debug = 0
> +
> +def get_gatherer(connection, config, source):
> +  return generate_bibtex(connection, config, source)
> +
> +def rm_f(file):
> +  try:
> +    unlink(file)
> +  except OSError:
> +    pass
> +
> +
> +def cleanup_tex_logs(basetexfile):
> +  rm_f(basetexfile+'.aux')
> +  rm_f(basetexfile+'.bbl')
> +  rm_f(basetexfile+'.blg')
> +  rm_f(basetexfile+'.log')
> +
> +
> +def open_tex_process(texexe, basetexfile):
> +  if texexe == 'pdflatex':
> +    ptex = Popen(['pdflatex', '-interaction=batchmode', basetexfile],
> shell=False, stdout=PIPE)
> +  elif texexe == 'bibtex':
> +    ptex = Popen(['bibtex', basetexfile], shell=False, stdout=PIPE)
> +  else:
> +    return(False, 'Wrong exe: '+texexe)
> +  errstring=""
> +  if ptex.wait():
> +    if texexe == 'pdflatex':
> +      for logrow in ptex.communicate()[0].splitlines():
> +        if logrow.startswith('!'):
> +          errstring += logrow
> +      return(False, errstring)
> +    else:
> +      for logrow in ptex.communicate()[0].splitlines():
> +        if logrow.startswith('This is BibTeX'):
> +          continue
> +        errstring += logrow + '\n'
> +      return(True, errstring)
> +  return(True, errstring)
> +
> +
> +class generate_bibtex(gatherer):
> +  """
> +  Generate a debian.bib and debian.tex files
> +  """
> +
> +  def __init__(self, connection, config, source):
> +    gatherer.__init__(self, connection, config, source)
> +
> +    self.log = logging.getLogger(self.__class__.__name__)
> +    if debug==1:
> +        self.log.setLevel(logging.DEBUG)
> +    else:
> +        self.log.setLevel(logging.INFO)
> +    handler =
> logging.handlers.RotatingFileHandler(filename=self.__class__.__name__+'.log',mode='w')
> +    formatter = logging.Formatter("%(asctime)s - %(levelname)s -
> (%(lineno)d): %(message)s")
> +    handler.setFormatter(formatter)
> +    self.log.addHandler(handler)
> +
> +    self.bibtexfile = 'debian.bib'
> +    self.bibtex_example_tex = 'debian.tex'
> +    self.all_ref = 0   # to include all references from bibref table set
> it to 1
> +
> +  def run(self):
> +    cur = self.cursor()
> +
> +    # if there is a working LaTeX installation try to build a BibTeX
> database and test it by creating a debian.pdf file
> +    if isfile('/usr/bin/pdflatex') and access('/usr/bin/pdflatex', X_OK)
> and \
> +       isfile('/usr/bin/bibtex')   and access('/usr/bin/bibtex', X_OK)
> and \
> +       (
> isfile('/usr/share/texlive/texmf-dist/fonts/source/jknappen/ec/ecrm.mf') or
> \
> +
>  isfile('/usr/share/texmf-texlive/fonts/source/jknappen/ec/ecrm.mf') ) :
> +
> +      # create BibTeX file
> +      bf = open(self.bibtexfile, 'w')
> +
> +      if self.all_ref == 1:
> +        query = "SELECT * FROM bibtex()"
> +      else:
> +       query = """ SELECT DISTINCT
> +                        CASE WHEN bibjournal.value IS NULL AND
> bibin.value IS NOT NULL AND bibpublisher.value IS NOT NULL THEN '@Book{' ||
> bibkey.value
> +                            ELSE CASE WHEN bibauthor.value IS NULL OR
> bibjournal.value IS NULL THEN '@Misc{'|| bibkey.value ||
> +                                 CASE WHEN bibauthor.value IS NULL THEN
> E',\n  Key     = "' || bibkey.value || '"' ELSE '' END -- without author we
> need a sorting key
> +                            ELSE '@Article{' || bibkey.value END END  ||
> +                        CASE WHEN bibauthor.value  IS NOT NULL THEN
> E',\n  Author  = {' || bibauthor.value  || '}' ELSE '' END ||
> +                        CASE WHEN bibtitle.value   IS NOT NULL THEN
> E',\n  Title   = "{' ||
> +                          replace(replace(replace(bibtitle.value,
> +                                          '_', E'\\_'),            --
> +                                          '%', E'\\%'),            --
> +                                          E'\xe2\x80\x89', E'\\,') -- TeX
> syntax for '_' and UTF-8 "thin space"
> +                                          -- see
> http://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=128&utf8=string-literal
> +                                  || '}"'
> +                        ELSE '' END ||
> +                        CASE WHEN bibbooktitle.value IS NOT NULL THEN
> E',\n  Booktitle = "{' || bibbooktitle.value || '}"' ELSE '' END ||
> +                        CASE WHEN bibyear.value    IS NOT NULL THEN
> E',\n  Year    = {' || bibyear.value    || '}' ELSE '' END ||
> +                        CASE WHEN bibmonth.value   IS NOT NULL THEN
> E',\n  Month   = {' || bibmonth.value   || '}' ELSE '' END ||
> +                        CASE WHEN bibjournal.value IS NOT NULL THEN
> E',\n  Journal = {' || replace(bibjournal.value, '&', E'\\&') || '}' ELSE
> '' END ||
> +                        CASE WHEN bibaddress.value IS NOT NULL THEN
> E',\n  Address = {' || bibaddress.value || '}' ELSE '' END ||
> +                        CASE WHEN bibpublisher.value IS NOT NULL THEN
> E',\n  Publisher = {' || bibpublisher.value || '}' ELSE '' END ||
> +                        CASE WHEN bibvolume.value  IS NOT NULL THEN
> E',\n  Volume  = {' || bibvolume.value  || '}' ELSE '' END ||
> +                        CASE WHEN bibnumber.value  IS NOT NULL THEN
> E',\n  Number  = {' || bibnumber.value  || '}' ELSE '' END ||
> +                        CASE WHEN bibpages.value   IS NOT NULL THEN
> E',\n  Pages   = {' || regexp_replace(bibpages.value, E'(\\d)-([\\d])',
> E'\\1--\\2')   || '}' ELSE '' END ||
> +                        CASE WHEN biburl.value     IS NOT NULL THEN
> E',\n  URL     = {' ||
> +                          replace(replace(replace(replace(biburl.value,
> +                                          '_', E'\\_'),           --
> +                                          '%', E'\\%'),           --
> +                                          '&', E'\\&'),           --
> +                                          '~', E'\\~{}')          --
> +                                  || '}'
> +                        ELSE '' END ||
> +                        CASE WHEN bibdoi.value     IS NOT NULL THEN
> E',\n  DOI     = {' ||
> +                          replace(replace(bibdoi.value,
> +                                          '_', E'\\_'),           --
> +                                          '&', E'\\&')            --
> +                                  || '}'
> +                        ELSE '' END ||
> +                        CASE WHEN bibpmid.value    IS NOT NULL THEN
> E',\n  PMID    = {' || bibpmid.value    || '}' ELSE '' END ||
> +                        CASE WHEN bibeprint.value  IS NOT NULL THEN
> E',\n  EPrint  = {' ||
> +                          replace(replace(replace(replace(bibeprint.value,
> +                                         '_', E'\\_'),           --
> +                                         '%', E'\\%'),           --
> +                                         '&', E'\\&'),           --
> +                                         '~', E'\\~{}')          --
> +                                  || '}'
> +                        ELSE '' END ||
> +                        CASE WHEN bibin.value      IS NOT NULL THEN
> E',\n  In      = {' || bibin.value      || '}' ELSE '' END ||
> +                        CASE WHEN bibissn.value    IS NOT NULL THEN
> E',\n  ISSN    = {' || bibissn.value    || '}' ELSE '' END ||
> +                        E',\n}\n'
> +                        AS bibentry
> +                        --         p.source         AS source,
> +                        --         p.rank           AS rank,
> +                FROM (SELECT DISTINCT source, package, rank FROM bibref) p
> +                INNER JOIN sources s ON s.source = p.source
> +                LEFT OUTER JOIN bibref bibkey     ON p.source =
> bibkey.source     AND bibkey.rank     = p.rank AND bibkey.package     =
> p.package AND bibkey.key     = 'bibtex'
> +                LEFT OUTER JOIN bibref bibyear    ON p.source =
> bibyear.source    AND bibyear.rank    = p.rank AND bibyear.package    =
> p.package AND bibyear.key    = 'year'
> +                LEFT OUTER JOIN bibref bibmonth   ON p.source =
> bibmonth.source   AND bibmonth.rank   = p.rank AND bibmonth.package   =
> p.package AND bibmonth.key   = 'month'
> +                LEFT OUTER JOIN bibref bibtitle   ON p.source =
> bibtitle.source   AND bibtitle.rank   = p.rank AND bibtitle.package   =
> p.package AND bibtitle.key   = 'title'
> +                LEFT OUTER JOIN bibref bibbooktitle ON p.source =
> bibbooktitle.source AND bibbooktitle.rank = p.rank AND bibbooktitle.package
> = p.package AND bibbooktitle.key = 'booktitle'
> +                LEFT OUTER JOIN bibref bibauthor  ON p.source =
> bibauthor.source  AND bibauthor.rank  = p.rank AND bibauthor.package  =
> p.package AND bibauthor.key  = 'author'
> +                LEFT OUTER JOIN bibref bibjournal ON p.source =
> bibjournal.source AND bibjournal.rank = p.rank AND bibjournal.package =
> p.package AND bibjournal.key = 'journal'
> +                LEFT OUTER JOIN bibref bibaddress ON p.source =
> bibaddress.source AND bibaddress.rank = p.rank AND bibaddress.package =
> p.package AND bibaddress.key = 'address'
> +                LEFT OUTER JOIN bibref bibpublisher ON p.source =
> bibpublisher.source AND bibpublisher.rank = p.rank AND bibpublisher.package
> = p.package AND bibpublisher.key = 'publisher'
> +                LEFT OUTER JOIN bibref bibvolume  ON p.source =
> bibvolume.source  AND bibvolume.rank  = p.rank AND bibvolume.package  =
> p.package AND bibvolume.key  = 'volume'
> +                LEFT OUTER JOIN bibref bibdoi     ON p.source =
> bibdoi.source     AND bibdoi.rank     = p.rank AND bibdoi.package     =
> p.package AND bibdoi.key     = 'doi'
> +                LEFT OUTER JOIN bibref bibpmid    ON p.source =
> bibpmid.source    AND bibpmid.rank    = p.rank AND bibpmid.package    =
> p.package AND bibpmid.key    = 'pmid'LEFT OUTER JOIN bibref biburl     ON
> p.source = biburl.source     AND biburl.rank     = p.rank AND
> biburl.package     = p.package AND biburl.key     = 'url'
> +                LEFT OUTER JOIN bibref bibnumber  ON p.source =
> bibnumber.source  AND bibnumber.rank  = p.rank AND bibnumber.package  =
> p.package AND bibnumber.key  = 'number'
> +                LEFT OUTER JOIN bibref bibpages   ON p.source =
> bibpages.source   AND bibpages.rank   = p.rank AND bibpages.package   =
> p.package AND bibpages.key   = 'pages'
> +                LEFT OUTER JOIN bibref bibeprint  ON p.source =
> bibeprint.source  AND bibeprint.rank  = p.rank AND bibeprint.package  =
> p.package AND bibeprint.key  = 'eprint'
> +                LEFT OUTER JOIN bibref bibin      ON p.source =
> bibin.source      AND bibin.rank      = p.rank AND bibin.package      =
> p.package AND bibin.key      = 'in'
> +                LEFT OUTER JOIN bibref bibissn    ON p.source =
> bibissn.source    AND bibissn.rank    = p.rank AND bibissn.package    =
> p.package AND bibissn.key    = 'issn'
> +                ORDER BY bibentry -- p.source
> +                ;"""
> +
> +      cur.execute(query)
> +      for row in cur.fetchall():
> +          print >>bf, row[0]
> +
> +      bf.close()
> +
> +      # create LaTeX file to test BibTeX functionality
> +      bf = open(self.bibtex_example_tex, 'w')
> +      print >>bf, """\documentclass[10]{article}
> +\usepackage[T1]{fontenc}
> +\usepackage[utf8]{inputenc}
> +\usepackage[left=2mm,top=2mm,right=2mm,bottom=2mm,nohead,nofoot]{geometry}
> +\usepackage{longtable}
> +\usepackage[super]{natbib}
> +\setlongtables
> +\\begin{document}
> +\small
> +\\begin{longtable}{llp{70mm}l}
> +\\bf package & \\bf source & \\bf description & BibTeX key \\\\ \hline"""
> +
> +      cur.execute("SELECT * FROM bibtex_example_data() AS (package text,
> source text, bibkey text, description text)")
> +      for row in cur.fetchall():
> +       print >>bf, row[0], '&', row[1], '&', row[3] , '&',
> row[2]+'\cite{'+row[2]+'} \\\\'
> +
> +      print >>bf, """\end{longtable}
> +
> +% \\bibliographystyle{plain}
> +% Try a bit harder by also including URL+DOI
> +\\bibliographystyle{plainnat}
> +\\bibliography{debian}
> +
> +\end{document}
> +"""
> +      bf.close()
> +
> +      # try to build debian.pdf file to test aboc LaTeX file
> +      basetexfile = self.bibtex_example_tex.replace('.tex','')
> +      cleanup_tex_logs(basetexfile)
> +      try:
> +        rename(basetexfile+'.pdf', basetexfile+'.pdf~')
> +      except OSError:
> +        pass
> +
> +      (retcode,errstring) = open_tex_process('pdflatex', basetexfile)
> +      if not retcode:
> +        self.log.error("Problem in 1. PdfLaTeX run of %s.tex: `%s` -->
> please inspect %s.log" % (basetexfile, errstring, basetexfile))
> +        exit(1)
> +
> +      (retcode,errstring) = open_tex_process('bibtex', basetexfile)
> +      if errstring != "":
> +        if not retcode:
> +          self.log.error("Problem in BibTeX run of %s.bib: `%s`" %
> (basetexfile, errstring))
> +          exit(1)
> +        self.log.error("Ignore the following problems in BibTeX run of
> %s.bib: `%s`" % (basetexfile, errstring))
> +
> +      (retcode,errstring) = open_tex_process('pdflatex', basetexfile)
> +      if not retcode:
> +        self.log.error("Problem in 2. PdfLaTeX run of %s.tex: `%s` -->
> please inspect %s.log" % (basetexfile, errstring, basetexfile))
> +        exit(1)
> +
> +      (retcode,errstring) = open_tex_process('pdflatex', basetexfile)
> +      if not retcode:
> +        self.log.error("Problem in 3. PdfLaTeX run of %s.tex: `%s` -->
> please inspect %s.log" % (basetexfile, errstring, basetexfile))
> +        exit(1)
> +
> +      cleanup_tex_logs(basetexfile)
> +
> +if __name__ == '__main__':
> +  main()
> +
> --
> 1.9.1
>
>


-- 
Akshita Jha

Reply via email to