Added: uima/site/trunk/uima-website/docs/d/uima-ducc-2.0.1/duccbook.html URL: http://svn.apache.org/viewvc/uima/site/trunk/uima-website/docs/d/uima-ducc-2.0.1/duccbook.html?rev=1710669&view=auto ============================================================================== --- uima/site/trunk/uima-website/docs/d/uima-ducc-2.0.1/duccbook.html (added) +++ uima/site/trunk/uima-website/docs/d/uima-ducc-2.0.1/duccbook.html Mon Oct 26 18:25:16 2015 @@ -0,0 +1,14241 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" + "http://www.w3.org/TR/html4/loose.dtd"> +<html > +<head><title>Distributed UIMA Cluster Computing</title> +<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta name="generator" content="TeX4ht (http://www.cse.ohio-state.edu/~gurari/TeX4ht/)"> +<meta name="originator" content="TeX4ht (http://www.cse.ohio-state.edu/~gurari/TeX4ht/)"> +<!-- html --> +<meta name="src" content="duccbook.tex"> +<meta name="date" content="2015-10-15 13:07:00"> +<link rel="stylesheet" type="text/css" href="duccbook.css"> +</head><body +> + + +<div class="maketitle"> + + + + + +<h2 class="titleHead">Distributed UIMA Cluster Computing</h2> +<div class="author" ><span +class="cmr-12">Written and maintained by the Apache</span> +<br /> <span +class="cmr-12">UIMA</span><sup class="textsuperscript"><span +class="cmr-9">TM</span></sup><span +class="cmr-12">Development Community</span><br /><br /><br /> +<br /> <span +class="cmr-12">Version 2.0.1</span></div> +<br /> +<div class="date" ></div> + + +</div> +<!--l. 18--><p class="noindent" >Copyright <span +class="cmsy-10">©</span>  2012 The Apache Software Foundation +<!--l. 20--><p class="noindent" >Copyright <span +class="cmsy-10">©</span>  2012 International Business Machines Corporation + <!--l. 23--><p class="noindent" ><span class="paragraphHead"><a + id="x1-1000"></a><span +class="cmbx-10">License and Disclaimer</span></span> + The ASF licenses this documentation to you under the Apache License, Version 2.0 (the ”License”); you may not + use this documentation except in compliance with the License. You may obtain a copy of the License + at + <!--l. 28--><p class="noindent" ><a +href="http://www.apache.org/licenses/LICENSE-2.0" class="url" ><span +class="cmtt-10">http://www.apache.org/licenses/LICENSE-2.0</span></a> + <!--l. 30--><p class="noindent" >Unless required by applicable law or agreed to in writing, this documentation and its contents are distributed under + the License on an ”AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + or implied. See the License for the specific language governing permissions and limitations under the + License. + <!--l. 35--><p class="noindent" ><span class="paragraphHead"><a + id="x1-2000"></a><span +class="cmbx-10">Trademarks</span></span> + All terms mentioned in the text that are known to be trademarks or service marks have been appropriately capitalized. + Use of such terms in this book should not be regarded as affecting the validity of the the trademark or service + mark. +<!--l. 47--><p class="noindent" >Publication date: October 2015 + + +<h2 class="likechapterHead"><a + id="x1-3000"></a>Table of Contents</h2> <div class="tableofcontents"> +<span class="partToc" >I  <a +href="#x1-5000I" id="QQ2-1-5">DUCC Concepts</a></span> +<br /><span class="chapterToc" >1 <a +href="#x1-60001" id="QQ2-1-6">DUCC Overview</a></span> +<br /> <span class="sectionToc" >1.1 <a +href="#x1-70001.1" id="QQ2-1-7">What is DUCC?</a></span> +<br /> <span class="sectionToc" >1.2 <a +href="#x1-80001.2" id="QQ2-1-8">DUCC Job Model</a></span> +<br /> <span class="sectionToc" >1.3 <a +href="#x1-90001.3" id="QQ2-1-9">DUCC From UIMA to Full Scale-out</a></span> +<br /> <span class="sectionToc" >1.4 <a +href="#x1-140001.4" id="QQ2-1-18">Error Management </a></span> +<br /> <span class="sectionToc" >1.5 <a +href="#x1-150001.5" id="QQ2-1-19">Cluster and Job Management</a></span> +<br /> <span class="sectionToc" >1.6 <a +href="#x1-160001.6" id="QQ2-1-20">Security Measures</a></span> +<br />  <span class="subsectionToc" >1.6.1 <a +href="#x1-170001.6.1" id="QQ2-1-21">ducc_ling</a></span> +<br /> <span class="sectionToc" >1.7 <a +href="#x1-180001.7" id="QQ2-1-22">Security Issues</a></span> +<br /><span class="chapterToc" >2 <a +href="#x1-190002" id="QQ2-1-23">Glossary</a></span> +<br /><span class="partToc" >II  <a +href="#x1-20000II" id="QQ2-1-24">Ducc Users Guide</a></span> +<br /><span class="chapterToc" >3 <a +href="#x1-210003" id="QQ2-1-25">Command Line Interface</a></span> +<br /> <span class="sectionToc" >3.1 <a +href="#x1-230003.1" id="QQ2-1-27">The DUCC Job Descriptor</a></span> +<br /> <span class="sectionToc" >3.2 <a +href="#x1-240003.2" id="QQ2-1-28">Operating System Limit Support</a></span> +<br /> <span class="sectionToc" >3.3 <a +href="#x1-250003.3" id="QQ2-1-29">Command Line Forms</a></span> +<br /> <span class="sectionToc" >3.4 <a +href="#x1-260003.4" id="QQ2-1-30">DUCC Commands</a></span> +<br /> <span class="sectionToc" >3.5 <a +href="#x1-270003.5" id="QQ2-1-31">ducc_submit</a></span> +<br /> <span class="sectionToc" >3.6 <a +href="#x1-320003.6" id="QQ2-1-36">ducc_cancel</a></span> +<br /> <span class="sectionToc" >3.7 <a +href="#x1-370003.7" id="QQ2-1-41">ducc_reserve</a></span> +<br /> <span class="sectionToc" >3.8 <a +href="#x1-420003.8" id="QQ2-1-46">ducc_unreserve</a></span> +<br /> <span class="sectionToc" >3.9 <a +href="#x1-470003.9" id="QQ2-1-51">ducc_process_submit</a></span> +<br /> <span class="sectionToc" >3.10 <a +href="#x1-520003.10" id="QQ2-1-56">ducc_process_cancel</a></span> +<br /> <span class="sectionToc" >3.11 <a +href="#x1-570003.11" id="QQ2-1-61">ducc_services</a></span> +<br />  <span class="subsectionToc" >3.11.1 <a +href="#x1-610003.11.1" id="QQ2-1-65">Common Options</a></span> +<br />  <span class="subsectionToc" >3.11.2 <a +href="#x1-620003.11.2" id="QQ2-1-66">ducc_services –register [specification file] [options]</a></span> +<br />  <span class="subsectionToc" >3.11.3 <a +href="#x1-630003.11.3" id="QQ2-1-67">ducc_services –start options</a></span> +<br />  <span class="subsectionToc" >3.11.4 <a +href="#x1-640003.11.4" id="QQ2-1-68">ducc_services –stop options</a></span> +<br />  <span class="subsectionToc" >3.11.5 <a +href="#x1-650003.11.5" id="QQ2-1-69">ducc_services –enable options</a></span> +<br />  <span class="subsectionToc" >3.11.6 <a +href="#x1-660003.11.6" id="QQ2-1-70">ducc_services –disable options</a></span> + + +<br />  <span class="subsectionToc" >3.11.7 <a +href="#x1-670003.11.7" id="QQ2-1-71">ducc_services –observe_references options</a></span> +<br />  <span class="subsectionToc" >3.11.8 <a +href="#x1-680003.11.8" id="QQ2-1-72">ducc_services –ignore_references options</a></span> +<br />  <span class="subsectionToc" >3.11.9 <a +href="#x1-690003.11.9" id="QQ2-1-73">ducc_services –modify options</a></span> +<br />  <span class="subsectionToc" >3.11.10 <a +href="#x1-700003.11.10" id="QQ2-1-74">ducc_services –query options</a></span> +<br /> <span class="sectionToc" >3.12 <a +href="#x1-720003.12" id="QQ2-1-76">viaducc and java_viaducc</a></span> +<br /><span class="chapterToc" >4 <a +href="#x1-760004" id="QQ2-1-80">The DUCC Public API</a></span> +<br /> <span class="sectionToc" >4.1 <a +href="#x1-770004.1" id="QQ2-1-81">Overview Of The DUCC API</a></span> +<br /> <span class="sectionToc" >4.2 <a +href="#x1-780004.2" id="QQ2-1-82">Compiling and Running With the DUCC API</a></span> +<br /> <span class="sectionToc" >4.3 <a +href="#x1-790004.3" id="QQ2-1-83">Java API</a></span> +<br /><span class="chapterToc" >5 <a +href="#x1-800005" id="QQ2-1-84">Service Management</a></span> +<br /> <span class="sectionToc" >5.1 <a +href="#x1-810005.1" id="QQ2-1-85">Overview.</a></span> +<br /> <span class="sectionToc" >5.2 <a +href="#x1-820005.2" id="QQ2-1-86">Service Types.</a></span> +<br /> <span class="sectionToc" >5.3 <a +href="#x1-830005.3" id="QQ2-1-87">Service Instance IDs</a></span> +<br /> <span class="sectionToc" >5.4 <a +href="#x1-840005.4" id="QQ2-1-88">Service References and Endpoints</a></span> +<br /> <span class="sectionToc" >5.5 <a +href="#x1-850005.5" id="QQ2-1-89">Service Management Policies</a></span> +<br /> <span class="sectionToc" >5.6 <a +href="#x1-870005.6" id="QQ2-1-91">Service Pingers</a></span> +<br />  <span class="subsectionToc" >5.6.1 <a +href="#x1-880005.6.1" id="QQ2-1-92">The Pinger API</a></span> +<br />  <span class="subsectionToc" >5.6.2 <a +href="#x1-910005.6.2" id="QQ2-1-95">Declaring a Pinger in A Service</a></span> +<br />  <span class="subsectionToc" >5.6.3 <a +href="#x1-920005.6.3" id="QQ2-1-96">Implementing a Pinger</a></span> +<br />  <span class="subsectionToc" >5.6.4 <a +href="#x1-930005.6.4" id="QQ2-1-98">Building And Testing Your Pinger</a></span> +<br />  <span class="subsectionToc" >5.6.5 <a +href="#x1-980005.6.5" id="QQ2-1-103">Globally Registered Pingers</a></span> +<br /> <span class="sectionToc" >5.7 <a +href="#x1-990005.7" id="QQ2-1-104">Sample Pinger</a></span> +<br />  <span class="subsectionToc" >5.7.1 <a +href="#x1-1000005.7.1" id="QQ2-1-105">Using the Sample Pinger</a></span> +<br />  <span class="subsectionToc" >5.7.2 <a +href="#x1-1010005.7.2" id="QQ2-1-106">Understanding Sample Pinger</a></span> +<br />  <span class="subsectionToc" >5.7.3 <a +href="#x1-1140005.7.3" id="QQ2-1-119">Calculating New Deployments in the Pinger</a></span> +<br />  <span class="subsectionToc" >5.7.4 <a +href="#x1-1250005.7.4" id="QQ2-1-130">Summary of Sample Pinger</a></span> +<br /><span class="chapterToc" >6 <a +href="#x1-1260006" id="QQ2-1-131">Job Logs</a></span> +<br /><span class="chapterToc" >7 <a +href="#x1-1320007" id="QQ2-1-137">DUCC Web Server</a></span> +<br /> <span class="sectionToc" >7.1 <a +href="#x1-1370007.1" id="QQ2-1-144">Common Links</a></span> +<br /> <span class="sectionToc" >7.2 <a +href="#x1-1380007.2" id="QQ2-1-145">Jobs Page</a></span> +<br /> <span class="sectionToc" >7.3 <a +href="#x1-1390007.3" id="QQ2-1-147">Job Details Page</a></span> +<br />  <span class="subsectionToc" >7.3.1 <a +href="#x1-1400007.3.1" id="QQ2-1-148">Processes</a></span> +<br />  <span class="subsectionToc" >7.3.2 <a +href="#x1-1410007.3.2" id="QQ2-1-150">Work Items</a></span> +<br />  <span class="subsectionToc" >7.3.3 <a +href="#x1-1420007.3.3" id="QQ2-1-152">Performance</a></span> +<br />  <span class="subsectionToc" >7.3.4 <a +href="#x1-1430007.3.4" id="QQ2-1-154">Specification</a></span> +<br /> <span class="sectionToc" >7.4 <a +href="#x1-1440007.4" id="QQ2-1-156">Reservation Page</a></span> +<br /> <span class="sectionToc" >7.5 <a +href="#x1-1450007.5" id="QQ2-1-158">Managed Reservation Details Page</a></span> + + +<br />  <span class="subsectionToc" >7.5.1 <a +href="#x1-1460007.5.1" id="QQ2-1-159">Processes</a></span> +<br />  <span class="subsectionToc" >7.5.2 <a +href="#x1-1470007.5.2" id="QQ2-1-160">Specification</a></span> +<br /> <span class="sectionToc" >7.6 <a +href="#x1-1480007.6" id="QQ2-1-161">Services Page</a></span> +<br /> <span class="sectionToc" >7.7 <a +href="#x1-1490007.7" id="QQ2-1-162">Service Details Page</a></span> +<br />  <span class="subsectionToc" >7.7.1 <a +href="#x1-1500007.7.1" id="QQ2-1-163">Processes</a></span> +<br />  <span class="subsectionToc" >7.7.2 <a +href="#x1-1510007.7.2" id="QQ2-1-164">Specification</a></span> +<br /> <span class="sectionToc" >7.8 <a +href="#x1-1520007.8" id="QQ2-1-165">System Details Page</a></span> +<br />  <span class="subsectionToc" >7.8.1 <a +href="#x1-1530007.8.1" id="QQ2-1-166">Administration</a></span> +<br />  <span class="subsectionToc" >7.8.2 <a +href="#x1-1540007.8.2" id="QQ2-1-167">Classes</a></span> +<br />  <span class="subsectionToc" >7.8.3 <a +href="#x1-1550007.8.3" id="QQ2-1-168">Daemons</a></span> +<br />  <span class="subsectionToc" >7.8.4 <a +href="#x1-1560007.8.4" id="QQ2-1-169">Machines</a></span> +<br /> <span class="sectionToc" >7.9 <a +href="#x1-1570007.9" id="QQ2-1-170">Visualization</a></span> +<br /><span class="partToc" >III  <a +href="#x1-158000III" id="QQ2-1-172">Programming Model And Applications</a></span> +<br /><span class="chapterToc" >8 <a +href="#x1-1590008" id="QQ2-1-173">Building and Testing Jobs</a></span> +<br /> <span class="sectionToc" >8.1 <a +href="#x1-1600008.1" id="QQ2-1-174">Overview</a></span> +<br />  <span class="subsectionToc" >8.1.1 <a +href="#x1-1610008.1.1" id="QQ2-1-175">Basic Job Process Threading Model</a></span> +<br />  <span class="subsectionToc" >8.1.2 <a +href="#x1-1620008.1.2" id="QQ2-1-176">Alternate Pipeline Threading Model</a></span> +<br />  <span class="subsectionToc" >8.1.3 <a +href="#x1-1630008.1.3" id="QQ2-1-177">Overriding UIMA Configuration Parameters</a></span> +<br /> <span class="sectionToc" >8.2 <a +href="#x1-1640008.2" id="QQ2-1-178">Collection Segmentation and Artifact Extraction</a></span> +<br /> <span class="sectionToc" >8.3 <a +href="#x1-1650008.3" id="QQ2-1-179">CAS Consumer Changes for DUCC</a></span> +<br /> <span class="sectionToc" >8.4 <a +href="#x1-1660008.4" id="QQ2-1-180">Job Development for an Existing Pipeline Design</a></span> +<br /> <span class="sectionToc" >8.5 <a +href="#x1-1670008.5" id="QQ2-1-181">Job Development for a New Pipeline Design</a></span> +<br />  <span class="subsectionToc" >8.5.1 <a +href="#x1-1680008.5.1" id="QQ2-1-182">Collection Reader (CR) Characteristics</a></span> +<br />  <span class="subsectionToc" >8.5.2 <a +href="#x1-1690008.5.2" id="QQ2-1-183">DUCC built-in Flow Controller</a></span> +<br />  <span class="subsectionToc" >8.5.3 <a +href="#x1-1700008.5.3" id="QQ2-1-184">Workitem Feature Structure</a></span> +<br />  <span class="subsectionToc" >8.5.4 <a +href="#x1-1710008.5.4" id="QQ2-1-185">Deployment Descriptor (DD) Jobs</a></span> +<br />  <span class="subsectionToc" >8.5.5 <a +href="#x1-1720008.5.5" id="QQ2-1-186">Debugging</a></span> +<br /><span class="chapterToc" >9 <a +href="#x1-1730009" id="QQ2-1-187">Sample Application: Raw Text Processing</a></span> +<br /> <span class="sectionToc" >9.1 <a +href="#x1-1740009.1" id="QQ2-1-188">Application Function and Design</a></span> +<br /> <span class="sectionToc" >9.2 <a +href="#x1-1750009.2" id="QQ2-1-189">Configuration Parameters</a></span> +<br /> <span class="sectionToc" >9.3 <a +href="#x1-1760009.3" id="QQ2-1-190">Set up a working directory</a></span> +<br /> <span class="sectionToc" >9.4 <a +href="#x1-1770009.4" id="QQ2-1-191">Download and Install OpenNLP</a></span> +<br /> <span class="sectionToc" >9.5 <a +href="#x1-1780009.5" id="QQ2-1-192">Get some Input Text</a></span> +<br /> <span class="sectionToc" >9.6 <a +href="#x1-1790009.6" id="QQ2-1-193">Run the Job</a></span> +<br /> <span class="sectionToc" >9.7 <a +href="#x1-1800009.7" id="QQ2-1-194">Job Output</a></span> +<br /> <span class="sectionToc" >9.8 <a +href="#x1-1810009.8" id="QQ2-1-195">Job Performance Details</a></span> +<br /><span class="chapterToc" >10 <a +href="#x1-18200010" id="QQ2-1-198">Sample Application: CAS Input Processing</a></span> + + +<br /> <span class="sectionToc" >10.1 <a +href="#x1-18300010.1" id="QQ2-1-199">Application Function and Design</a></span> +<br /> <span class="sectionToc" >10.2 <a +href="#x1-18400010.2" id="QQ2-1-200">Configuration Parameters</a></span> +<br /> <span class="sectionToc" >10.3 <a +href="#x1-18500010.3" id="QQ2-1-201">Run the Job</a></span> +<br /> <span class="sectionToc" >10.4 <a +href="#x1-18600010.4" id="QQ2-1-202">Job Performance Details</a></span> +<br /> <span class="sectionToc" >10.5 <a +href="#x1-18700010.5" id="QQ2-1-204">Limiting Job Resources</a></span> +<br /><span class="partToc" >IV  <a +href="#x1-188000IV" id="QQ2-1-205">Ducc Administrators Guide</a></span> +<br /><span class="chapterToc" >11 <a +href="#x1-18900011" id="QQ2-1-206">Installation, Configuration, and Verification</a></span> +<br /> <span class="sectionToc" >11.1 <a +href="#x1-19000011.1" id="QQ2-1-207">Overview</a></span> +<br /> <span class="sectionToc" >11.2 <a +href="#x1-19100011.2" id="QQ2-1-208">Software Prerequisites</a></span> +<br /> <span class="sectionToc" >11.3 <a +href="#x1-19200011.3" id="QQ2-1-209">Building from Source</a></span> +<br /> <span class="sectionToc" >11.4 <a +href="#x1-19300011.4" id="QQ2-1-210">Documentation</a></span> +<br /> <span class="sectionToc" >11.5 <a +href="#x1-19400011.5" id="QQ2-1-211">Single System Installation and Verification</a></span> +<br /> <span class="sectionToc" >11.6 <a +href="#x1-19500011.6" id="QQ2-1-212">Minimal Hardware Requirements for Single System Installation</a></span> +<br /> <span class="sectionToc" >11.7 <a +href="#x1-19600011.7" id="QQ2-1-213">Single System Installation</a></span> +<br /> <span class="sectionToc" >11.8 <a +href="#x1-19700011.8" id="QQ2-1-214">Initial System Verification</a></span> +<br /> <span class="sectionToc" >11.9 <a +href="#x1-19800011.9" id="QQ2-1-215">Add additional nodes to the DUCC cluster</a></span> +<br /> <span class="sectionToc" >11.10 <a +href="#x1-19900011.10" id="QQ2-1-216">Ducc_ling Configuration - Running with credentials of submitting user</a></span> +<br /> <span class="sectionToc" >11.11 <a +href="#x1-20000011.11" id="QQ2-1-217">CGroups Installation and Configuration</a></span> +<br /> <span class="sectionToc" >11.12 <a +href="#x1-20100011.12" id="QQ2-1-218">Full DUCC Verification</a></span> +<br /> <span class="sectionToc" >11.13 <a +href="#x1-20200011.13" id="QQ2-1-219">Enable DUCC webserver login</a></span> +<br /><span class="chapterToc" >12 <a +href="#x1-20300012" id="QQ2-1-220">Administration</a></span> +<br /> <span class="sectionToc" >12.1 <a +href="#x1-20400012.1" id="QQ2-1-221">WebServer Authentication</a></span> +<br />  <span class="subsectionToc" >12.1.1 <a +href="#x1-20500012.1.1" id="QQ2-1-222">Example Implementation</a></span> +<br />  <span class="subsectionToc" >12.1.2 <a +href="#x1-20600012.1.2" id="QQ2-1-223">IAuthenticationManager</a></span> +<br />  <span class="subsectionToc" >12.1.3 <a +href="#x1-20700012.1.3" id="QQ2-1-224">IAuthenticationResult</a></span> +<br />  <span class="subsectionToc" >12.1.4 <a +href="#x1-20800012.1.4" id="QQ2-1-225">Example ANT script to build jar</a></span> +<br />  <span class="subsectionToc" >12.1.5 <a +href="#x1-20900012.1.5" id="QQ2-1-226">Example ducc.properties entries</a></span> +<br />  <span class="subsectionToc" >12.1.6 <a +href="#x1-21000012.1.6" id="QQ2-1-227">Example ducc.administrators</a></span> +<br /> <span class="sectionToc" >12.2 <a +href="#x1-21100012.2" id="QQ2-1-228">Properties</a></span> +<br /> <span class="sectionToc" >12.3 <a +href="#x1-21200012.3" id="QQ2-1-229">Properties merging</a></span> +<br /> <span class="sectionToc" >12.4 <a +href="#x1-21300012.4" id="QQ2-1-230">ducc.properties</a></span> +<br />  <span class="subsectionToc" >12.4.1 <a +href="#x1-21400012.4.1" id="QQ2-1-231">General DUCC Properties</a></span> +<br />  <span class="subsectionToc" >12.4.2 <a +href="#x1-21500012.4.2" id="QQ2-1-232">Web Server Properties</a></span> +<br />  <span class="subsectionToc" >12.4.3 <a +href="#x1-21600012.4.3" id="QQ2-1-233">Job Driver Properties</a></span> +<br />  <span class="subsectionToc" >12.4.4 <a +href="#x1-21700012.4.4" id="QQ2-1-234">Service Manager Properties</a></span> +<br />  <span class="subsectionToc" >12.4.5 <a +href="#x1-21800012.4.5" id="QQ2-1-235">Orchestrator Properties</a></span> +<br />  <span class="subsectionToc" >12.4.6 <a +href="#x1-21900012.4.6" id="QQ2-1-236">Resource Manager Properties</a></span> + + +<br />  <span class="subsectionToc" >12.4.7 <a +href="#x1-22000012.4.7" id="QQ2-1-237">Agent Properties</a></span> +<br />  <span class="subsectionToc" >12.4.8 <a +href="#x1-22100012.4.8" id="QQ2-1-238">Process Manager Properties</a></span> +<br />  <span class="subsectionToc" >12.4.9 <a +href="#x1-22200012.4.9" id="QQ2-1-239">Job Process Properties</a></span> +<br /> <span class="sectionToc" >12.5 <a +href="#x1-22300012.5" id="QQ2-1-240">ducc.private.properties</a></span> +<br />  <span class="subsectionToc" >12.5.1 <a +href="#x1-22400012.5.1" id="QQ2-1-241">Web Server Properties</a></span> +<br /> <span class="sectionToc" >12.6 <a +href="#x1-22500012.6" id="QQ2-1-242">Resource Manager Configuration: Classes and Nodepools</a></span> +<br />  <span class="subsectionToc" >12.6.1 <a +href="#x1-22600012.6.1" id="QQ2-1-243">Nodepools</a></span> +<br />  <span class="subsectionToc" >12.6.2 <a +href="#x1-23000012.6.2" id="QQ2-1-251">Class Definitions</a></span> +<br />  <span class="subsectionToc" >12.6.3 <a +href="#x1-23100012.6.3" id="QQ2-1-253">Validation</a></span> +<br /> <span class="sectionToc" >12.7 <a +href="#x1-23400012.7" id="QQ2-1-256">Ducc Node Definitions</a></span> +<br /> <span class="sectionToc" >12.8 <a +href="#x1-23500012.8" id="QQ2-1-258">Ducc User Definitions</a></span> +<br /> <span class="sectionToc" >12.9 <a +href="#x1-23600012.9" id="QQ2-1-260">Administrative Commands</a></span> +<br />  <span class="subsectionToc" >12.9.1 <a +href="#x1-23700012.9.1" id="QQ2-1-261">start_ducc</a></span> +<br />  <span class="subsectionToc" >12.9.2 <a +href="#x1-24400012.9.2" id="QQ2-1-268">stop_ducc</a></span> +<br />  <span class="subsectionToc" >12.9.3 <a +href="#x1-24900012.9.3" id="QQ2-1-273">check_ducc</a></span> +<br />  <span class="subsectionToc" >12.9.4 <a +href="#x1-25300012.9.4" id="QQ2-1-277">rm_reconfigure</a></span> +<br />  <span class="subsectionToc" >12.9.5 <a +href="#x1-25600012.9.5" id="QQ2-1-280">rm_qload</a></span> +<br />  <span class="subsectionToc" >12.9.6 <a +href="#x1-26100012.9.6" id="QQ2-1-285">rm_qoccupancy</a></span> +<br />  <span class="subsectionToc" >12.9.7 <a +href="#x1-26400012.9.7" id="QQ2-1-288">vary_off</a></span> +<br />  <span class="subsectionToc" >12.9.8 <a +href="#x1-26700012.9.8" id="QQ2-1-291">vary_on</a></span> +<br />  <span class="subsectionToc" >12.9.9 <a +href="#x1-27000012.9.9" id="QQ2-1-294">ducc_properties_manager</a></span> +<br /><span class="chapterToc" >13 <a +href="#x1-27500013" id="QQ2-1-299">Resource Management</a></span> +<br /> <span class="sectionToc" >13.1 <a +href="#x1-27600013.1" id="QQ2-1-300">Overview</a></span> +<br /> <span class="sectionToc" >13.2 <a +href="#x1-27700013.2" id="QQ2-1-301">Preemption vs Eviction</a></span> +<br /> <span class="sectionToc" >13.3 <a +href="#x1-27800013.3" id="QQ2-1-302">Scheduling Policies</a></span> +<br /> <span class="sectionToc" >13.4 <a +href="#x1-27900013.4" id="QQ2-1-303">Allotment</a></span> +<br /> <span class="sectionToc" >13.5 <a +href="#x1-28000013.5" id="QQ2-1-304">Priority vs Weight</a></span> +<br /> <span class="sectionToc" >13.6 <a +href="#x1-28300013.6" id="QQ2-1-307">Node Pools</a></span> +<br /> <span class="sectionToc" >13.7 <a +href="#x1-28400013.7" id="QQ2-1-308">Scheduling Classes</a></span> +<br /><span class="chapterToc" >14 <a +href="#x1-28500014" id="QQ2-1-309">Service Management</a></span> +<br /><span class="chapterToc" >15 <a +href="#x1-28600015" id="QQ2-1-310">Simulation and System Testing</a></span> +<br /> <span class="sectionToc" >15.1 <a +href="#x1-28700015.1" id="QQ2-1-311">Cluster Simulation</a></span> +<br />  <span class="subsectionToc" >15.1.1 <a +href="#x1-28800015.1.1" id="QQ2-1-312">Overview</a></span> +<br />  <span class="subsectionToc" >15.1.2 <a +href="#x1-28900015.1.2" id="QQ2-1-313">Node Configuration</a></span> +<br />  <span class="subsectionToc" >15.1.3 <a +href="#x1-29000015.1.3" id="QQ2-1-314">Setting up Test Mode</a></span> +<br />  <span class="subsectionToc" >15.1.4 <a +href="#x1-29100015.1.4" id="QQ2-1-315">Starting a Simulated Cluster</a></span> +<br />  <span class="subsectionToc" >15.1.5 <a +href="#x1-29500015.1.5" id="QQ2-1-319">Stopping a Simulated Cluster</a></span> + + +<br /> <span class="sectionToc" >15.2 <a +href="#x1-29900015.2" id="QQ2-1-323">Job Simulation</a></span> +<br />  <span class="subsectionToc" >15.2.1 <a +href="#x1-30000015.2.1" id="QQ2-1-324">Overview</a></span> +<br />  <span class="subsectionToc" >15.2.2 <a +href="#x1-30100015.2.2" id="QQ2-1-325">Job meta-descriptors</a></span> +<br />  <span class="subsectionToc" >15.2.3 <a +href="#x1-30200015.2.3" id="QQ2-1-326"><span +class="cmti-10">Prepare </span>Descriptors</a></span> +<br />  <span class="subsectionToc" >15.2.4 <a +href="#x1-30300015.2.4" id="QQ2-1-327">Services</a></span> +<br />  <span class="subsectionToc" >15.2.5 <a +href="#x1-30500015.2.5" id="QQ2-1-329">Generating a Job Set</a></span> +<br />  <span class="subsectionToc" >15.2.6 <a +href="#x1-30600015.2.6" id="QQ2-1-330">Running the Test Driver</a></span> +<br /> <span class="sectionToc" >15.3 <a +href="#x1-30700015.3" id="QQ2-1-331">Pre-Packaged Tests</a></span> +<br /><span class="chapterToc" >16 <a +href="#x1-30800016" id="QQ2-1-332">DUCC Web Server Customization</a></span> +<br /> <span class="sectionToc" >16.1 <a +href="#x1-30900016.1" id="QQ2-1-333">Server Side</a></span> +<br /> <span class="sectionToc" >16.2 <a +href="#x1-31000016.2" id="QQ2-1-334">Client Side</a></span> +<br /> <span class="sectionToc" >16.3 <a +href="#x1-31100016.3" id="QQ2-1-335">Build and Install</a></span> +<br /><span class="chapterToc" >17 <a +href="#x1-31200017" id="QQ2-1-336">Understanding the DUCC logs</a></span> +<br /> <span class="sectionToc" >17.1 <a +href="#x1-31300017.1" id="QQ2-1-337">Overview</a></span> +<br /> <span class="sectionToc" >17.2 <a +href="#x1-31400017.2" id="QQ2-1-338">Resource Manager Log (rm.log)</a></span> +<br />  <span class="subsectionToc" >17.2.1 <a +href="#x1-31500017.2.1" id="QQ2-1-339">Bootstrap Configuration</a></span> +<br />  <span class="subsectionToc" >17.2.2 <a +href="#x1-32000017.2.2" id="QQ2-1-344">Node Arrival and Missed Heartbeats</a></span> +<br />  <span class="subsectionToc" >17.2.3 <a +href="#x1-32300017.2.3" id="QQ2-1-347">Node Occupancy</a></span> +<br />  <span class="subsectionToc" >17.2.4 <a +href="#x1-32400017.2.4" id="QQ2-1-348">Job Arrival and Status Updates</a></span> +<br />  <span class="subsectionToc" >17.2.5 <a +href="#x1-32700017.2.5" id="QQ2-1-351">Calculation Of Job Caps</a></span> +<br />  <span class="subsectionToc" >17.2.6 <a +href="#x1-32800017.2.6" id="QQ2-1-352">The “how much” calculations</a></span> +<br />  <span class="subsectionToc" >17.2.7 <a +href="#x1-32900017.2.7" id="QQ2-1-353">The “what of” calculations</a></span> +<br />  <span class="subsectionToc" >17.2.8 <a +href="#x1-33000017.2.8" id="QQ2-1-354">Defragmentation</a></span> +<br />  <span class="subsectionToc" >17.2.9 <a +href="#x1-33100017.2.9" id="QQ2-1-355">Published Schedule</a></span> +<br /> <span class="sectionToc" >17.3 <a +href="#x1-33400017.3" id="QQ2-1-358">Service Manager Log (sm.log)</a></span> +<br />  <span class="subsectionToc" >17.3.1 <a +href="#x1-33500017.3.1" id="QQ2-1-359">Bootstrap configuration</a></span> +<br />  <span class="subsectionToc" >17.3.2 <a +href="#x1-34000017.3.2" id="QQ2-1-364">Receipt and analysis of Orchestrator State</a></span> +<br />  <span class="subsectionToc" >17.3.3 <a +href="#x1-34100017.3.3" id="QQ2-1-365">CLI Requests</a></span> +<br />  <span class="subsectionToc" >17.3.4 <a +href="#x1-34200017.3.4" id="QQ2-1-366">Dispatching / Startup of Service Instances</a></span> +<br />  <span class="subsectionToc" >17.3.5 <a +href="#x1-34300017.3.5" id="QQ2-1-367">Progression of Service State</a></span> +<br />  <span class="subsectionToc" >17.3.6 <a +href="#x1-34400017.3.6" id="QQ2-1-368">Starting and Logging Pingers</a></span> +<br />  <span class="subsectionToc" >17.3.7 <a +href="#x1-34500017.3.7" id="QQ2-1-369">Publishing State</a></span> +<br /> <span class="sectionToc" >17.4 <a +href="#x1-34600017.4" id="QQ2-1-370"> (Orchestrator Log or.log)</a></span> +<br /> <span class="sectionToc" >17.5 <a +href="#x1-34700017.5" id="QQ2-1-371">Process Manager Log (pm.log)</a></span> +<br /> <span class="sectionToc" >17.6 <a +href="#x1-34800017.6" id="QQ2-1-372">Agent log Log (hostname.agent.log)</a></span> +</div> + + +<h2 class="likechapterHead"><a + id="x1-4000"></a>List of Figures</h2><div class="tableofcontents"><span class="lofToc" >1.1 <a +href="#x1-10001r1">Standard UIMA Pipeline</a></span><br /><span class="lofToc" >1.2 <a +href="#x1-11001r2">UIMA Pipeline As Scaled by +UIMA-AS</a></span><br /><span class="lofToc" >1.3 <a +href="#x1-12001r3">UIMA Pipeline As Automatically Scaled Out By DUCC</a></span><br /><span class="lofToc" >1.4 <a +href="#x1-13001r4">UIMA Pipeline +With User-Supplied DD as Automatically Scaled Out By DUCC</a></span><br /><span class="lofToc" >5.1 <a +href="#x1-92001r1">Sample UIMA-AS +Service Pinger</a></span><br /><span class="lofToc" >7.1 <a +href="#x1-132001r1">Sample Webserver Page</a></span><br /><span class="lofToc" >7.2 <a +href="#x1-136001r2">Preferences Page</a></span><br /><span class="lofToc" >7.3 <a +href="#x1-138001r3">Jobs Page</a></span><br /><span class="lofToc" >7.4 <a +href="#x1-140004r4">Processes +Tab</a></span><br /><span class="lofToc" >7.5 <a +href="#x1-141001r5">Work Items Tab</a></span><br /><span class="lofToc" >7.6 <a +href="#x1-142001r6">Performance Tab</a></span><br /><span class="lofToc" >7.7 <a +href="#x1-143001r7">Specification Tab</a></span><br /><span class="lofToc" >7.8 <a +href="#x1-144001r8">Reservations +Page</a></span><br /><span class="lofToc" >7.9 <a +href="#x1-157001r9">Visualization</a></span><br /><span class="lofToc" >9.1 <a +href="#x1-181001r1">OpenNLP Process Measurements</a></span><br /><span class="lofToc" >9.2 <a +href="#x1-181002r2">OpenNLP +Process Breakdown</a></span><br /><span class="lofToc" >10.1 <a +href="#x1-186001r1">CAS Input Processing Performacne</a></span><br /><span class="lofToc" >12.1 <a +href="#x1-227004r1">Nodepool +Example</a></span><br /><span class="lofToc" >12.2 <a +href="#x1-227007r2">Nodepools: Overlapping Pools are Incorrect</a></span><br /><span class="lofToc" >12.3 <a +href="#x1-227008r3">Nodepools: Multiple +top-level Nodepools</a></span><br /><span class="lofToc" >12.4 <a +href="#x1-229007r4">Sample Nodepool Configuration</a></span><br /><span class="lofToc" >12.5 <a +href="#x1-230001r5">Sample Class +Configuration</a></span><br /><span class="lofToc" >12.6 <a +href="#x1-234001r6">Sample Node Configuration</a></span><br /><span class="lofToc" >12.7 <a +href="#x1-235001r7">Sample User Registration</a></span><br /> +</div> + + + + +<!--l. 79--><p class="noindent" > + + +<h1 class="partHead"><span class="titlemark">Part I<br /></span><a + id="x1-5000I"></a>DUCC Concepts</h1> +<!--l. 22--><p class="noindent" ><a name='DUCC_OVERVIEW'></a> + + +<h2 class="chapterHead"><span class="titlemark">Chapter 1</span><br /><a + id="x1-60001"></a>DUCC Overview</h2> +<h3 class="sectionHead"><span class="titlemark">1.1 </span> <a + id="x1-70001.1"></a>What is DUCC?</h3> +<!--l. 28--><p class="noindent" >DUCC stands for Distributed UIMA Cluster Computing. DUCC is a cluster management system providing +tooling, management, and scheduling facilities to automate the scale-out of applications written to the UIMA +framework. +<!--l. 32--><p class="noindent" >Core UIMA provides a generalized framework for applications that process unstructured information such as human +language, but does not provide a scale-out mechanism. UIMA-AS provides a scale-out mechanism to distribute UIMA +pipelines over a cluster of computing resources, but does not provide job or cluster management of the resources. +DUCC defines a formal job model that closely maps to a standard UIMA pipeline. Around this job model +DUCC provides cluster management services to automate the scale-out of UIMA pipelines over computing +clusters. +<!--l. 39--><p class="noindent" > +<h3 class="sectionHead"><span class="titlemark">1.2 </span> <a + id="x1-80001.2"></a>DUCC Job Model</h3> +<!--l. 41--><p class="noindent" >The Job Model defines the steps necessary to scale-up a UIMA pipeline using DUCC. The goal of DUCC is to +scale-up any UIMA pipeline, including pipelines that must be deployed across multiple machines using shared +services. +<!--l. 45--><p class="noindent" >The DUCC Job model consists of standard UIMA components: a Collection Reader (CR), a CAS Multiplier (CM), +application logic as implemented one or more Analysis Engines (AE), and a CAS Consumer (CC). +<!--l. 49--><p class="noindent" >The Collection Reader builds input CASs and forwards them to the UIMA pipelines. In the DUCC model, the CR is run in a +process separate from the rest of the pipeline. In fact, in all but the smallest clusters it is run on a different physical machine +than the rest of the pipeline. To achieve scalability, the CR must create very small CASs that do not contain application +data, but which contain references to data; for instance, file names. Ideally, the CR should be runnable in a process +not much larger than the smallest Java virtual machine. Later sections demonstrate methods for achieving +this. +<!--l. 57--><p class="noindent" >Each pipeline must contain at least one CAS Multiplier which receives the CASs from the CR. The CMs encapsulate the +knowledge of how to receive the data references in the small CASs received from the CRs and deliver the referenced data to +the application pipeline. DUCC packages the CM, AE(s), and CC into a single process, multiple instances of which are then +deployed over the cluster. +<!--l. 63--><p class="noindent" >A DUCC job therefore consists of a small specification containing the following items: + <ul class="itemize1"> + <li class="itemize">The name of a resource containing the CR descriptor. + </li> + <li class="itemize">The name of a resource containing the CM descriptor. + </li> + <li class="itemize">The name of a resource containing the AE descriptor. + </li> + <li class="itemize">The name of a resource containing the CC descriptor. + </li> + <li class="itemize">Other information required to parameterize the above and identify the job such as log directory, working + directory, desired scale-out, classpath, etc. These are described in detail in subsequent sections.</li></ul> + + +<!--l. 75--><p class="noindent" >On job submission, DUCC creates a single process executing the CR and one or more processes containing the analysis +pipeline. +<!--l. 78--><p class="noindent" >DUCC provides other facilities in support of scale-out: + <ul class="itemize1"> + <li class="itemize">The ability to reserve all or part of a node in the cluster. + </li> + <li class="itemize">Automated management of services required in support of jobs. + </li> + <li class="itemize">The ability to schedule and execute arbitrary processes on nodes in the cluster. + </li> + <li class="itemize">Debugging tools and support. + </li> + <li class="itemize">A web server to display and manage work and cluster status. + </li> + <li class="itemize">A CLI and a Java API to support the above.</li></ul> +<!--l. 89--><p class="noindent" > +<h3 class="sectionHead"><span class="titlemark">1.3 </span> <a + id="x1-90001.3"></a>DUCC From UIMA to Full Scale-out</h3> +<!--l. 91--><p class="noindent" >In this section we demonstrate the progression of a simple UIMA pipeline to a fully scaled-out job running under +DUCC. +<!--l. 94--><p class="noindent" ><span class="paragraphHead"><a + id="x1-100001.3"></a><span +class="cmbx-10">UIMA Pipelines</span></span> +A normal UIMA pipeline contains a Collection Reader (CR), one or more Analysis Engines (AE) connected in a pipeline, and +a CAS Consumer (CC) as shown in <a +href="#x1-10001r1">Figure  1.1</a>. +<!--l. 99--><p class="noindent" ><hr class="figure"><div class="figure" +> + + +<a + id="x1-10001r1"></a> + + + +<!--l. 101--><p class="noindent" ><img +src="images/uima-pipeline.jpg" alt="PIC" +> +<br /> <div class="caption" +><span class="id">Figure 1.1: </span><span +class="content">Standard UIMA Pipeline</span></div><!--tex4ht:label?: x1-10001r1 --> + + +<!--l. 104--><p class="noindent" ></div><hr class="endfigure"> +<!--l. 106--><p class="noindent" ><span class="paragraphHead"><a + id="x1-110001.3"></a><span +class="cmbx-10">UIMA-AS Scaled Pipeline</span></span> +With UIMA-AS the CR is separated into a discrete process and a CAS Multiplier (CM) is introduced into the pipeline as an +interface between the CR and the pipeline, as shown in <a +href="#x1-11001r2">Figure  1.2</a> below. Multiple pipelines are serviced by the CR and are +scaled-out over a computing cluster. The difficulty with this model is that each user is individually responsible for finding and +scheduling computing nodes, installing communication software such as ActiveMQ, and generally managing the distributed +job and associated hardware. +<!--l. 116--><p class="noindent" ><hr class="figure"><div class="figure" +> + + +<a + id="x1-11001r2"></a> + + + +<!--l. 118--><p class="noindent" ><img +src="images/uima-as-pipeline.png" alt="PIC" +> +<br /> <div class="caption" +><span class="id">Figure 1.2: </span><span +class="content">UIMA Pipeline As Scaled by UIMA-AS</span></div><!--tex4ht:label?: x1-11001r2 --> + + +<!--l. 121--><p class="noindent" ></div><hr class="endfigure"> +<!--l. 123--><p class="noindent" ><span class="paragraphHead"><a + id="x1-120001.3"></a><span +class="cmbx-10">UIMA Pipeline Scaled By DUCC</span></span> +DUCC is a UIMA and UIMA-AS-aware cluster manager. To scale out work under DUCC the developer tells DUCC what +the parts of the application are, and DUCC does the work to build the scale-out via UIMA/AS, to find and +schedule resources, to deploy the parts of the application over the cluster, and to manage the jobs while it +executes. +<!--l. 129--><p class="noindent" >On job submission, the CR is wrapped with a DUCC main class and launched as a Job Driver (or JD). The DUCC main +class establishes communication with other DUCC components and instantiates the CR. If the CR initializes +successfully, and indicates that there are greater than 0 work items to process, the specified CM, AE and CC +components are assembled into an aggregate, wrapped with a DUCC main class, and launched as a Job Process (or +JP). +<!--l. 135--><p class="noindent" >The JP will replicate the aggregate as many times as specified, each aggregate instance running in a single thread. When the +aggregate initializes, and whenever an aggregate thread needs work, the JP wrapper will fetch the next work item from the +JD, as shown in <a +href="#x1-12001r3">Figure  1.3</a> below. +<!--l. 140--><p class="noindent" ><hr class="figure"><div class="figure" +> + + +<a + id="x1-12001r3"></a> + + + +<!--l. 142--><p class="noindent" ><img +src="images/ducc-sequential.png" alt="PIC" +> +<br /> <div class="caption" +><span class="id">Figure 1.3: </span><span +class="content">UIMA Pipeline As Automatically Scaled Out By DUCC</span></div><!--tex4ht:label?: x1-12001r3 --> + + +<!--l. 145--><p class="noindent" ></div><hr class="endfigure"> +<!--l. 147--><p class="noindent" ><span class="paragraphHead"><a + id="x1-130001.3"></a><span +class="cmbx-10">UIMA Pipeline with User-Supplied DD Scaled By DUCC</span></span> +Application programmers may supply their own Deployment Descriptors to control intra-process threading and scale-out. If a +DD is specified in the job parameters, DUCC will launch each JP with the specified UIMA-AS service instantiated in-process, +as depicted in <a +href="#x1-13001r4">Figure  1.4</a> below. In this case the user can still specify how many work items to deliver to the service +concurrently. +<!--l. 155--><p class="noindent" ><hr class="figure"><div class="figure" +> + + +<a + id="x1-13001r4"></a> + + + +<!--l. 157--><p class="noindent" ><img +src="images/ducc-parallel.png" alt="PIC" +> +<br /> <div class="caption" +><span class="id">Figure 1.4: </span><span +class="content">UIMA Pipeline With User-Supplied DD as Automatically Scaled Out By DUCC</span></div><!--tex4ht:label?: x1-13001r4 --> + + +<!--l. 160--><p class="noindent" ></div><hr class="endfigure"> +<h3 class="sectionHead"><span class="titlemark">1.4 </span> <a + id="x1-140001.4"></a>Error Management </h3> +<!--l. 164--><p class="noindent" >DUCC provides a number of facilities to assist error management: + <ul class="itemize1"> + <li class="itemize">DUCC captures exceptions in the JPs and delivers them to the Job Drivers. The JD wrappers implement logic + to enforce error thresholds, to identify and log errors, and to reflect job problems in the DUCC Web Server. + Error thresholds are configurable both globally and on a per-job basis. + </li> + <li class="itemize">Error and timeout thresholds are implemented for both the initialization phase of a pipeline and the execution + phase. + </li> + <li class="itemize">Retry-after-error is supported: if a process has a failure on some CAS after initialization is successful, the + process is terminated and all affected CASs are retried, up to some configurable threshold. + </li> + <li class="itemize">To avoid disrupting existing workloads by a job that will fail to run, DUCC ensures that JD and JP processes + can successfully initialize before fully scaling out a job. + </li> + <li class="itemize">Various error conditions encountered while a job is running will prevent a problematic job from continuing + scale out, and can result in termination of the job.</li></ul> +<!--l. 186--><p class="noindent" > +<h3 class="sectionHead"><span class="titlemark">1.5 </span> <a + id="x1-150001.5"></a>Cluster and Job Management</h3> +<!--l. 187--><p class="noindent" >DUCC supports management of multiple jobs and multiple users in a distributed cluster: + <dl class="description"><dt class="description"> +<span +class="cmbx-10">Multiple User Support</span> </dt><dd +class="description">When properly configured, DUCC runs all work under the identity of the submitting + user. Logs are written with the user’s credentials into the user’s file space designated at job submission. + </dd><dt class="description"> +<span +class="cmbx-10">Fair-Share Scheduling</span> </dt><dd +class="description">DUCC provides a Fair-Share scheduler to equitably share resources among multiple users. + The scheduler also supports semi-permanent reservation of full or partial machines. + </dd><dt class="description"> +<span +class="cmbx-10">Service Management</span> </dt><dd +class="description">DUCC provides a Service Manager capable of automatically starting, stopping, and + otherwise managing and querying both UIMA-AS and non-UIMA-AS services in support of jobs. + </dd><dt class="description"> +<span +class="cmbx-10">Job Lifetime Management and Orchestration</span> </dt><dd +class="description">DUCC includes an Orchestrator to manage the lifetimes of all + entities in the system. + </dd><dt class="description"> +<span +class="cmbx-10">Node Sharing</span> </dt><dd +class="description">DUCC allocates processes from one or more users on a node, each with a specified amount of + memory. DUCC’s preferred mechanism for constraining memory use is Linux Control Groups, or CGroups. For + nodes that do not suport CGroups, DUCC agents monitor RAM use and kill processes that exceed their share + size by a settable fudge factor. + + + </dd><dt class="description"> +<span +class="cmbx-10">DUCC Agents</span> </dt><dd +class="description">DUCC Agents manage each node’s local resources and all processes started by DUCC. Each node in a + cluster has exactly one Agent. The Agent + <ul class="itemize1"> + <li class="itemize">Monitors and reports node capabilities (memory, etc) and performance data (CPU busy, swap, etc). + </li> + <li class="itemize">Starts, stops, and monitors all processes on behalf of users. + </li> + <li class="itemize">Patrols the node for “foreign” (non-DUCC) processes, reporting them to the Web Server, and optionally + reaping them. + </li> + <li class="itemize">Ensures job processes do not exceed their declared memory requirements through the use of Linux Cgroups.</li></ul> + </dd><dt class="description"> +<span +class="cmbx-10">DUCC Web server</span> </dt><dd +class="description">DUCC provides a web server displaying all aspects of the system: + <ul class="itemize1"> + <li class="itemize">All jobs in the system, their current state, resource usage, etc. + </li> + <li class="itemize">All reserved resources and associated information (owner, etc.), including the ability to request and cancel + reservations. + </li> + <li class="itemize">All services, including the ability to start, stop, and modify service definitions. + </li> + <li class="itemize">All nodes in the system and their status, usage, etc. + </li> + <li class="itemize">The status of all DUCC management processes. + </li> + <li class="itemize">Access to documentation.</li></ul> + </dd><dt class="description"> +<span +class="cmbx-10">Cluster Management Support</span> </dt><dd +class="description">DUCC provides system management support to: + <ul class="itemize1"> + <li class="itemize">Start, stop, and query full DUCC systems. + </li> + <li class="itemize">Start, stop, and quiesce individual DUCC components. + </li> + <li class="itemize">Add and delete nodes from the DUCC system. + </li> + <li class="itemize">Discover DUCC processes (e.g. after partial failures). + </li> + <li class="itemize">Find and kill errant job processes belonging to individual users. + </li> + <li class="itemize">Monitor and display inter-DUCC messages.</li></ul> + </dd></dl> + + +<!--l. 257--><p class="noindent" > +<h3 class="sectionHead"><span class="titlemark">1.6 </span> <a + id="x1-160001.6"></a>Security Measures</h3> +<!--l. 258--><p class="noindent" >The following DUCC security measures are provided: + <dl class="description"><dt class="description"> +<span +class="cmbx-10">user credentials</span> </dt><dd +class="description">DUCC instantiates user processes using a setuid root executable named ducc_ling. See more at + <a +href="#x1-170001.6.1"><span +class="cmti-10">ducc</span><span +class="cmti-10">_ling</span></a>. + </dd><dt class="description"> +<span +class="cmbx-10">command line interface</span> </dt><dd +class="description">The CLI employs HTTP to send requests to the DUCC controller. The CLI creates and + employs public and private security keys in the user’s home directory for authentication of HTTP requests. + The controller validates requests via these same security keys. + </dd><dt class="description"> +<span +class="cmbx-10">webserver</span> </dt><dd +class="description">The webserver facilitates operational control and therefore authentication is desirable. + <ul class="itemize1"> + <li class="itemize">Each user has the ability to control certain aspects of only his/her active submissions. + </li> + <li class="itemize">Each administrator has the ability to control certain aspects of any user’s active submissions, as well as + modification of some DUCC operational characteristics.</li></ul> + <!--l. 276--><p class="noindent" >A simple interface is provided so that an installation can plug-in a site specific authentication mechanism comprising + userid and password. + </dd><dt class="description"> +<span +class="cmbx-10">ActiveMQ</span> </dt><dd +class="description">DUCC uses ActiveMQ for administrative communication. AMQ authentication is used to prevent arbitrary + processes from participating.</dd></dl> +<!--l. 283--><p class="noindent" > +<h4 class="subsectionHead"><span class="titlemark">1.6.1 </span> <a + id="x1-170001.6.1"></a>ducc_ling</h4> +<!--l. 285--><p class="noindent" >ducc_ling contains the following functions, which the security-conscious may verify by examining the source in +$DUCC_HOME/duccling. All sensitive operations are performed only AFTER switching userids, to prevent unauthorized +root access to the system. + <ul class="itemize1"> + <li class="itemize">Changes it’s real and effective userid to that of the user invoking the job. + </li> + <li class="itemize">Optionally redirects its stdout and stderr to the DUCC log for the current job. + </li> + <li class="itemize">Optionally redirects its stdio to a port set by the CLI, when a job is submitted. + </li> + <li class="itemize">“Nice”s itself to a “worse” priority than the default, to reduce the chances that a runaway DUCC job could + monopolize a system. + </li> + <li class="itemize">Optionally sets user limits. + </li> + <li class="itemize">Prints the effective limits for a job to both the user’s log, and the DUCC agent’s log. + </li> + <li class="itemize">Changes to the user’s working directory, as specified by the job. + + + </li> + <li class="itemize">Optionally establishes LD_LIBRARY_PATH for the job from the environment variable <span +class="cmtt-10">DUCC</span><span +class="cmtt-10">_LD</span><span +class="cmtt-10">_LIBRARY</span><span +class="cmtt-10">_PATH</span> + if set in the DUCC job specification. (Secure Linux systems will prevent LD_LIBRARY_PATH from being set + by a program with root authority, so this is done AFTER changing userids). + </li> + <li class="itemize">ONLY user <span +class="cmti-10">ducc </span>may use the ducc_ling program in a privileged way. Ducc_ling contains checks to prevent even + user <span +class="cmti-10">root </span>from using it for privileged operations. + </li></ul> +<!--l. 310--><p class="noindent" > +<h3 class="sectionHead"><span class="titlemark">1.7 </span> <a + id="x1-180001.7"></a>Security Issues</h3> +<!--l. 311--><p class="noindent" >The following DUCC security issues should be considered: + <dl class="description"><dt class="description"> +<span +class="cmbx-10">submit transmission ’sniffed’</span> </dt><dd +class="description">In the event that the DUCC submit command is ’sniffed’ then the user + authentication mechanism is compromised and user masquerading is possible. That is, the userid encryption + mechanism can be exploited such that user A can submit a job pretending to be user B. + </dd><dt class="description"> +<span +class="cmbx-10">user </span><span +class="cmbxti-10">ducc </span><span +class="cmbx-10">password compromised</span> </dt><dd +class="description">In the event that the <span +class="cmti-10">ducc </span>user password is compromised then the root + privileged command <span +class="cmbx-10">ducc</span><span +class="cmbx-10">_ling </span>can be used to become any other user except root. + </dd><dt class="description"> +<span +class="cmbx-10">user </span><span +class="cmbxti-10">root </span><span +class="cmbx-10">password compromised</span> </dt><dd +class="description">In the event that the <span +class="cmti-10">root </span>user password is compromised DUCC provides no + protection. That is, compromising the root user is equivalent to compromising the DUCC user password.</dd></dl> +<!--l. 22--><p class="noindent" ><a name='DUCC_TERMINOLOGY'></a> + + +<h2 class="chapterHead"><span class="titlemark">Chapter 2</span><br /><a + id="x1-190002"></a>Glossary</h2> + <dl class="description"><dt class="description"> +<span +class="cmbx-10">Autostarted Service</span> </dt><dd +class="description">An autostarted service is a registered service that is started automatically by DUCC when + the DUCC system is booted. + </dd><dt class="description"> +<span +class="cmbx-10">Dependent service or job</span> </dt><dd +class="description">A dependent service or job is a service or job that specifies one or more service + dependencies in their job specification. The service or job is dependent upon the referenced service being + operational before being started by DUCC. + </dd><dt class="description"> +<span +class="cmbx-10">DUCC</span> </dt><dd +class="description">Distributed UIMA Cluster Computing. + </dd><dt class="description"> +<span +class="cmbx-10">Registered service</span> </dt><dd +class="description">A registered service is a service that is registered with DUCC. DUCC saves the service + specification and fully manages the service, insuring it is running when needed, and shutdown when not. + </dd><dt class="description"> +<span +class="cmbx-10">Service Instance</span> </dt><dd +class="description">A service instance is one physical process which runs a CUSTOM or UIMA-AS service. UIMA-AS + services are usually scaled-out with multiple instances implementing the same underlying service logic. + </dd><dt class="description"> +<span +class="cmbx-10">Orchestrator (OR)</span> </dt><dd +class="description">The Orchestrator manages the life cycle of all entities within DUCC. + </dd><dt class="description"> +<span +class="cmbx-10">Process Manager (PM)</span> </dt><dd +class="description">The Process Manager coordinates distribution of work among the Agents. + </dd><dt class="description"> +<span +class="cmbx-10">Resource Manager (RM)</span> </dt><dd +class="description">The Resource Manager schedules physical resources for DUCC work. + </dd><dt class="description"> +<span +class="cmbx-10">Service Endpoint</span> </dt><dd +class="description">In DUCC, the service endpoint provides a unique identifier for a service. In the case of UIMA-AS + services, the endpoint also serves as a well-known address for contacting the service. + </dd><dt class="description"> +<span +class="cmbx-10">Service Manager (SM)</span> </dt><dd +class="description">The Service Manager manages the life-cycles of UIMA-AS and CUSTOM services. It + coordinates registration of services, starting and stopping of services, and ensures that services are available + and remain available for the lifetime of the jobs. + </dd><dt class="description"> +<span +class="cmbx-10">Agent</span> </dt><dd +class="description">DUCC Agent processes run on every node in the system. The Agent receives orders to start and stop processes + on each node. Agents monitors nodes, sending heartbeat packets with node statistics to interested components + (such as the RM and web-server). If CGroups are installed in the cluster, the Agent is responsible for managing + the CGroups for each job process. All processes other than the DUCC management processes are are managed + as children of the agents. + </dd><dt class="description"> +<span +class="cmbx-10">DUCC-MON</span> </dt><dd +class="description">DUCC-MON is the DUCC web-server. + </dd><dt class="description"> +<span +class="cmbx-10">Job Driver (JD)</span> </dt><dd +class="description">The Job Driver is a thin wrapper that encapsulates a Job’s Collection Reader. The JD executes + as a process that is scheduled and deployed by DUCC. + </dd><dt class="description"> +<span +class="cmbx-10">Job Process (JP)</span> </dt><dd +class="description">The Job Process is a thin wrapper that encapsulates a job’s pipeline components. The JP + executes in a process that is scheduled and deployed by DUCC. + </dd><dt class="description"> +<span +class="cmbx-10">Job specification</span> </dt><dd +class="description">The Job Specification is a collection of properties that describe work to be scheduled and deployed + by DUCC. It identifies the UIMA components (CR, AE, etc) that comprise the job and the system-wide + properties of the job (CLASSPATHs, RAM requirements, etc). + + + </dd><dt class="description"> +<span +class="cmbx-10">Job</span> </dt><dd +class="description">A DUCC job consists of the components required to deploy and execute a UIMA pipeline over a computing + cluster. It consists of a JD to run the Collection Reader, a set of JPs to run the UIMA AEs, and a Job + Specification to describe how the parts fit together. + </dd><dt class="description"> +<span +class="cmbx-10">Share Quantum</span> </dt><dd +class="description">The DUCC scheduler abstracts the nodes in the cluster as a single large conglomerate of resources: + memory, processor cores, etc. The scheduler logically decomposes the collection of resources into some number + of equal-sized atomic units. Each unit of work requiring resources is apportioned one or more of these atomic + units. The smallest possible atomic unit is called the <span +class="cmti-10">share quantum</span>, or simply, <span +class="cmti-10">share</span>. + </dd><dt class="description"> +<span +class="cmbx-10">Process</span> </dt><dd +class="description">A process is one physical process executing on a machine in the DUCC cluster. DUCC jobs are comprised + of one or more processes (JDs and JPs). Each process is assigned one or more <span +class="cmti-10">shares </span>by the DUCC scheduler. + </dd><dt class="description"> +<span +class="cmbx-10">Weighted Fair Share</span> </dt><dd +class="description">A weighted fair share calculation is used to apportion resources equitably to the outstanding + work in the system. In a non-weighted fair-share system, all work requests are given equal consideration to all + resources. To provide some (“more important”) work more than equal resources, weights are used to bias the + allotment of shares in favor of some classes of work. + </dd><dt class="description"> +<span +class="cmbx-10">Work Items</span> </dt><dd +class="description">A DUCC work item is one unit of work to be completed in a single DUCC process. It is usually + initiated by the submission of a single CAS from the JD to one of the JPs. It could be thought of as a single + “question” to be answered by a UIMA analytic, or a single “task” to complete. Usually each DUCC JP executes + many work items per job. + </dd><dt class="description"> +<span +class="cmbx-10">$DUCC</span><span +class="cmbx-10">_HOME</span> </dt><dd +class="description">The root of the installed DUCC runtime, e.g. /home/ducc/ducc_runtime. It need not be set in + the environment, although the examples in this document assume that it has been. + </dd></dl> + + +<!--l. 83--><p class="noindent" > + + +<h1 class="partHead"><span class="titlemark">Part II<br /></span><a + id="x1-20000II"></a>Ducc Users Guide</h1> +<!--l. 23--><p class="noindent" ><a name='DUCC_CLI'></a> + + +<h2 class="chapterHead"><span class="titlemark">Chapter 3</span><br /><a + id="x1-210003"></a>Command Line Interface</h2> +<!--l. 28--><p class="noindent" ><span class="paragraphHead"><a + id="x1-220003"></a><span +class="cmbx-10">Overview</span></span> +The DUCC CLI is the primary means of communication with DUCC. Work is submitted, work is canceled, work is +monitored, and work is queried with this interface. +<!--l. 32--><p class="noindent" >All parameters may be passed to all the CLI commands in the form of Unix-like “long-form” (key, value) pairs, in which the +key is proceeded by the characters “<span +class="cmsy-10">--</span>”. As well, the parameters may be saved in a standard Java Properties file, without +the leading “<span +class="cmsy-10">--</span>” characters. Both a properties file and command-line parameters may be passed to each CLI. +When both are present, the parameters on the command line take precedence. Take, for example the following +simple job properties file, call it <span +class="cmtt-10">1.job</span>, where the environment variable “DH” has been set to the location of +$DUCC_HOME. + + +<div class="verbatim" id="verbatim-1"> +description                    Test job 1 + <br /> + <br />classpath                      ${DH}/lib/uima-ducc/examples/* + <br />environment                    AE_INIT_TIME=5 AE_INIT_RANGE=5 LD_LIBRARY_PATH=/a/nother/path + <br />scheduling_class               normal + <br /> + <br />driver_descriptor_CR           org.apache.uima.ducc.test.randomsleep.FixedSleepCR + <br />driver_descriptor_CR_overrides jobfile=${DH}/lib/examples/simple/1.inputs compression=10 + <br />error_rate=0.0 + <br /> + <br />driver_jvm_args                -Xmx500M + <br /> + <br />process_descriptor_AE          org.apache.uima.ducc.test.randomsleep.FixedSleepAE + <br />process_memory_size            4 + <br />process_jvm_args               -Xmx100M + <br />process_thread_count           2 + <br />process_per_item_time_max      5 + <br />process_deployments_max        999 + <br /></div> +<!--l. 59--><p class="nopar" > +<!--l. 61--><p class="noindent" >This can be submitted, overriding the scheduling class and memory, thus: + + +<div class="verbatim" id="verbatim-2"> +ducc_submit --specification 1.job --process_memory_size 16 --scheduling_class high</div> +<!--l. 64--><p class="nopar" > +<!--l. 66--><p class="noindent" >The DUCC CLI parameters are now described in detail. +<!--l. 68--><p class="noindent" > +<h3 class="sectionHead"><span class="titlemark">3.1 </span> <a + id="x1-230003.1"></a>The DUCC Job Descriptor</h3> +<!--l. 69--><p class="noindent" >The DUCC Job Descriptor includes properties to enable automated management and scale-out over large computing clusters. +The job descriptor includes + <ul class="itemize1"> + <li class="itemize">References to the various UIMA components required by the job (CR, CM, AE, CC, and maybe DD) + </li> + <li class="itemize">Scale-out requirements: number of processes, number of threads per process, etc + </li> + <li class="itemize">Environment requirements: log directory, working directory, environment variables, etc, + </li> + <li class="itemize">JVM parameters + </li> + <li class="itemize">Scheduling class + </li> + <li class="itemize">Error-handling preferences: acceptable failure counts, timeouts, etc + </li> + <li class="itemize">Debugging and monitoring requirements and preferences</li></ul> +<!--l. 81--><p class="noindent" > +<h3 class="sectionHead"><span class="titlemark">3.2 </span> <a + id="x1-240003.2"></a>Operating System Limit Support</h3> +<!--l. 82--><p class="noindent" >The CLI supports specification of operating system limits applied to the various job processes. To specify a limit, pass the +name of the limit and its value in the <span +class="cmti-10">environment </span>specified in the job. Limits are named with the string +“DUCC_RLIMIT_name” where “name” is the name of a specific limit. Supported limits include: + <ul class="itemize1"> + <li class="itemize">DUCC_RLIMIT_CORE + </li> + <li class="itemize">DUCC_RLIMIT_CPU + </li> + <li class="itemize">DUCC_RLIMIT_DATA + </li> + <li class="itemize">DUCC_RLIMIT_FSIZE + + + </li> + <li class="itemize">DUCC_RLIMIT_MEMLOCK + </li> + <li class="itemize">DUCC_RLIMIT_NOFILE + </li> + <li class="itemize">DUCC_RLIMIT_NPROC + </li> + <li class="itemize">DUCC_RLIMIT_RSS + </li> + <li class="itemize">DUCC_RLIMIT_STACK + </li> + <li class="itemize">DUCC_RLIMIT_AS + </li> + <li class="itemize">DUCC_RLIMIT_LOCKS + </li> + <li class="itemize">DUCC_RLIMIT_SIGPENDING + </li> + <li class="itemize">DUCC_RLIMIT_MSGQUEUE + </li> + <li class="itemize">DUCC_RLIMIT_NICE + </li> + <li class="itemize">DUCC_RLIMIT_STACK + </li> + <li class="itemize">DUCC_RLIMIT_RTPRIO</li></ul> +<!--l. 104--><p class="noindent" >See the Linux documentation for details on the meanings of these limits and their values. +<!--l. 106--><p class="noindent" >For example, to set the maximum number of open files allowed in any job process, specify an environment similar to this +when submitting the job: + + +<div class="verbatim" id="verbatim-3"> +     ducc_submit .... --environment="DUCC_RLIMT_NOFILE=1024" ...</div> +<!--l. 110--><p class="nopar" > +<!--l. 112--><p class="noindent" > +<h3 class="sectionHead"><span class="titlemark">3.3 </span> <a + id="x1-250003.3"></a>Command Line Forms</h3> +<!--l. 113--><p class="noindent" >The Command Line Interface is provided in several forms: +<!--l. 116--><p class="noindent" > + <dl class="enumerate-enumitem"><dt class="enumerate-enumitem"> + 1. </dt><dd +class="enumerate-enumitem">A wrapper script around the uima-ducc-cli.jar. + </dd><dt class="enumerate-enumitem"> + 2. </dt><dd +class="enumerate-enumitem">Direct invocation of each command’s <span +class="cmtt-10">class </span>with the <span +class="cmtt-10">java </span>command.</dd></dl> +<!--l. 120--><p class="noindent" >When using the scripts the full execution environment is established silently. When invoking a command’s <span +class="cmtt-10">class </span>directly, the +java <span +class="cmtt-10">CLASSPATH </span>must include the uima-ducc-cli.jar, as illustrated in the wrapper scripts. +<!--l. 124--><p class="noindent" > +<h3 class="sectionHead"><span class="titlemark">3.4 </span> <a + id="x1-260003.4"></a>DUCC Commands</h3> +<!--l. 125--><p class="noindent" >The following commands are provided: + <dl class="description"><dt class="description"> +<span +class="cmbx-10">ducc</span><span +class="cmbx-10">_submit</span> </dt><dd +class="description">Submit a job for execution. + </dd><dt class="description"> +<span +class="cmbx-10">ducc</span><span +class="cmbx-10">_cancel</span> </dt><dd +class="description">Cancel a job in progress. + </dd><dt class="description"> +<span +class="cmbx-10">ducc</span><span +class="cmbx-10">_reserve</span> </dt><dd +class="description">Request a reservation of a full machine. + </dd><dt class="description"> +<span +class="cmbx-10">ducc</span><span +class="cmbx-10">_unreserve</span> </dt><dd +class="description">Cancel a reservation. + </dd><dt class="description"> +<span +class="cmbx-10">ducc</span><span +class="cmbx-10">_monitor</span> </dt><dd +class="description">Monitor the progress of a job that is already submitted. + </dd><dt class="description"> +<span +class="cmbx-10">ducc</span><span +class="cmbx-10">_process</span><span +class="cmbx-10">_submit</span> </dt><dd +class="description">Submit an arbitrary process (managed reservation) for execution. + </dd><dt class="description"> +<span +class="cmbx-10">ducc</span><span +class="cmbx-10">_process</span><span +class="cmbx-10">_cancel</span> </dt><dd +class="description">Cancel an arbitrary process. + </dd><dt class="description"> +<span +class="cmbx-10">ducc</span><span +class="cmbx-10">_services</span> </dt><dd +class="description">Register, unregister, start, stop, modify, disable, enable, ignore references, observe references, and + query a service. + </dd><dt class="description"> +<span +class="cmbx-10">ducc</span><span +class="cmbx-10">_view</span><span +class="cmbx-10">_perf</span> </dt><dd +class="description">Fetch performance data from the log and history files for analysis by spreadsheets, etc. + + + </dd><dt class="description"> +<span +class="cmbx-10">viaducc</span> </dt><dd +class="description">This is a script wrapper to facilitate execution of Eclipse workspaces as DUCC jobs as well as general + execution of arbitrary processes in DUCC-managed resources.</dd></dl> +<!--l. 142--><p class="noindent" >The next section describes these commands in detail. +<!--l. 22--><p class="noindent" ><a name='DUCC_CLI_SUBMIT'></a> +<!--l. 25--><p class="noindent" > +<h3 class="sectionHead"><span class="titlemark">3.5 </span> <a + id="x1-270003.5"></a>ducc_submit</h3> +<!--l. 27--><p class="noindent" >The source for this section is ducc_duccbook/documents/part-user/cli/submit.xml. +<!--l. 28--><p class="noindent" ><span class="paragraphHead"><a + id="x1-280003.5"></a><span +class="cmbx-10">Description:</span></span> +The submit CLI is used to submit work for execution by DUCC. DUCC assigns a unique id to the job and schedules it for +execution. The submitter may optionally request that the progress of the job is monitored, in which case the state of the job +as it progresses through its lifetime is printed on the console. +<!--l. 33--><p class="noindent" ><span class="paragraphHead"><a + id="x1-290003.5"></a><span +class="cmbx-10">Usage:</span></span> + <dl class="description"><dt class="description"> +<span +class="cmbx-10">Script wrapper</span> </dt><dd +class="description">$DUCC_HOME/bin/ducc_submit <span +class="cmti-10">options</span> + </dd><dt class="description"> +<span +class="cmbx-10">Java Main</span> </dt><dd +class="description">java -cp $DUCC_HOME/lib/uima-ducc-cli.jar org.apache.uima.ducc.cli.DuccJobSubmit <span +class="cmti-10">options</span></dd></dl> +<!--l. 39--><p class="noindent" ><span class="paragraphHead"><a + id="x1-300003.5"></a><span +class="cmbx-10">Options:</span></span> + <dl class="description"><dt class="description"> +<span +class="cmsy-10">--</span><span +class="cmbx-10">all</span><span +class="cmbx-10">_in</span><span +class="cmbx-10">_one </span><span +class="cmmi-10"><</span><span +class="cmbx-10">local </span><span +class="cmsy-10">| </span><span +class="cmbx-10">remote </span><span +class="cmmi-10">></span> </dt><dd +class="description">Run driver and pipeline in single process. If <span +class="cmti-10">local </span>is specified, the process is + executed on the local machine, for example, in the current Eclipse session. If <span +class="cmti-10">remote </span>is specified, the jobs is + submitted to DUCC as a <span +class="cmti-10">managed reservation </span>and run on some (presumably larger) machine allocated by + DUCC. + </dd><dt class="description"> +<span +class="cmsy-10">--</span><span +class="cmbx-10">attach</span><span +class="cmbx-10">_console</span> </dt><dd +class="description">If specified, redirect remote stdout and stderr to the local submitting console. + </dd><dt class="description"> +<span +class="cmsy-10">--</span><span +class="cmbx-10">cancel</span><span +class="cmbx-10">_on</span><span +class="cmbx-10">_interrupt</span> </dt><dd +class="description">If specified, the job is monitored and will be canceled if the submit command is + interrupted, e.g. with CTRL-C. This option always implies <span +class="cmsy-10">--</span>wait_for_completion. + </dd><dt class="description"> +<span +class="cmsy-10">--</span><span +class="cmbx-10">classpath [path-string]</span> </dt><dd +class="description">The CLASSPATH used for the job. If specified, this is used for both the Job Driver + and each Job Process. If not specified, the CLASSPATH of the process invoking this request is used. + </dd><dt class="description"> +<span +class="cmsy-10">--</span><span +class="cmbx-10">debug</span> </dt><dd +class="description">Enable debugging messages. This is primarily for debugging DUCC itself. + + + </dd><dt class="description"> +<span +class="cmsy-10">--</span><span +class="cmbx-10">description [text]</span> </dt><dd +class="description">The text is any string used to describe the job. It is displayed in the Web Server. When + specified on a command-line the text usually must be surrounded by quotes to protect it from the shell. The + default is “none”. + </dd><dt class="description"> +<span +class="cmsy-10">--</span><span +class="cmbx-10">driver</span><span +class="cmbx-10">_debug [debug-port]</span> </dt><dd +class="description">Append JVM debug flags to the JVM arguments to start the JobDriver in remote + debug mode. The remote process debugger will attempt to contact the specified port. + </dd><dt class="description"> +<span +class="cmsy-10">--</span><span +class="cmbx-10">driver</span><span +class="cmbx-10">_descriptor</span><span +class="cmbx-10">_CR [descriptor.xml]</span> </dt><dd +class="description">This is the XML descriptor for the Collection Reader. This + descriptor is a resource that is searched for in the filesystem or Java classpath as described in the  <a +href="#x1-310003.5">notes below</a>. + (Required) + </dd><dt class="description"> +<span +class="cmsy-10">--</span><span +class="cmbx-10">driver</span><span +class="cmbx-10">_descriptor</span><span +class="cmbx-10">_CR</span><span +class="cmbx-10">_overrides [list]</span> </dt><dd +class="description">This is the Job Driver collection reader configuration overrides. They are + specified as name/value pairs in a whitespace-delimited list. For example: + + + <div class="verbatim" id="verbatim-4"> + --driver_descriptor_CR_overrides name1=value1 name2=value2... +  <br />             </div> + <!--l. 79--><p class="nopar" > +<span +class="cmsy-10">--</span><span +class="cmbx-10">driver</span><span +class="cmbx-10">_exception</span><span +class="cmbx-10">_handler [classname]</span> This specifies a developer-supplied exception handler for the Job Driver. It + must implement org.apache.uima.ducc.IErrorHandler or extend org.apache.uima.ducc.ErrorHandler. A default handler + is provided. + </dd><dt class="description"> +<span +class="cmsy-10">--</span><span +class="cmbx-10">driver</span><span +class="cmbx-10">_exception</span><span +class="cmbx-10">_handler</span><span +class="cmbx-10">_arguments [argument-string]</span> </dt><dd +class="description">This is a string containing arguments for the exception + handler. The contents of the string is entirely a function of the specified handler. If not specified, a <span +class="cmti-10">null </span>is passed + in. + <!--l. 93--><p class="noindent" >Note: When used as a CLI option, the string must usually be quoted to protect it from the shell, if it contains + blanks. + <!--l. 96--><p class="noindent" >The built-in default exception handler supports an argument string of the following form (with NO embedded + blanks): + + + <div class="verbatim" id="verbatim-5"> +      max_job_errors=15</div> + <!--l. 100--><p class="nopar" > + </dd><dt class="description"> +<span +class="cmsy-10">--</span><span +class="cmbx-10">driver</span><span +class="cmbx-10">_jvm</span><span +class="cmbx-10">_args [list]</span> </dt><dd +class="description"> + <!--l. 104--><p class="noindent" >This specifies extra JVM arguments to be provided to the Job Driver process. It is a blank-delimited list of strings. + Example: + + + <div class="verbatim" id="verbatim-6"> + --driver_jvm_args -Xmx100M -Xms50M +  <br />             </div> + <!--l. 108--><p class="nopar" > + <!--l. 110--><p class="noindent" >Note: When used as a CLI option, the list must usually be quoted to protect it from the shell. + </dd><dt class="description"> +<span +class="cmsy-10">--</span><span +class="cmbx-10">environment [env vars]</span> </dt><dd +class="description">Blank-delimited list of environment variables and variable assignments. Entries will be copied + from the user’s environment if just the variable name is specified, optionally with a final ’*’ for those with the same + prefix. If specified, this is used for all DUCC processes in the job. Example: + + + <div class="verbatim" id="verbatim-7"> + --environment TERM=xterm DISPLAY=:1.0 LANG UIMA_*</div> + <!--l. 120--><p class="nopar" > + <!--l. 122--><p class="noindent" >Additional entries may be copied from the user’s environment based on the setting of + + + <div class="verbatim" id="verbatim-8"> + ducc.submit.environment.propagated</div> + <!--l. 125--><p class="nopar" > in the global DUCC configuration ducc.properties. + <!--l. 128--><p class="noindent" >Note: When used as a CLI option, the environment string must usually be quoted to protect it from the + shell. + </dd><dt class="description"> +<span +class="cmsy-10">--</span><span +class="cmbx-10">help</span> </dt><dd
[... 12808 lines stripped ...]