Added: uima/site/trunk/uima-website/docs/d/uima-ducc-2.0.1/duccbook.html
URL: 
http://svn.apache.org/viewvc/uima/site/trunk/uima-website/docs/d/uima-ducc-2.0.1/duccbook.html?rev=1710669&view=auto
==============================================================================
--- uima/site/trunk/uima-website/docs/d/uima-ducc-2.0.1/duccbook.html (added)
+++ uima/site/trunk/uima-website/docs/d/uima-ducc-2.0.1/duccbook.html Mon Oct 
26 18:25:16 2015
@@ -0,0 +1,14241 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"  
+  "http://www.w3.org/TR/html4/loose.dtd";>  
+<html > 
+<head><title>Distributed UIMA Cluster Computing</title> 
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 
+<meta name="generator" content="TeX4ht 
(http://www.cse.ohio-state.edu/~gurari/TeX4ht/)"> 
+<meta name="originator" content="TeX4ht 
(http://www.cse.ohio-state.edu/~gurari/TeX4ht/)"> 
+<!-- html --> 
+<meta name="src" content="duccbook.tex"> 
+<meta name="date" content="2015-10-15 13:07:00"> 
+<link rel="stylesheet" type="text/css" href="duccbook.css"> 
+</head><body 
+>
+                                                                               
                 
+                                                                               
                 
+<div class="maketitle">
+                                                                               
                 
+                                                                               
                 
+                                                                               
                 
+                                                                               
                 
+
+<h2 class="titleHead">Distributed UIMA Cluster Computing</h2>
+<div class="author" ><span 
+class="cmr-12">Written and maintained by the Apache</span>
+<br />  <span 
+class="cmr-12">UIMA</span><sup class="textsuperscript"><span 
+class="cmr-9">TM</span></sup><span 
+class="cmr-12">Development Community</span><br /><br /><br />
+<br />             <span 
+class="cmr-12">Version 2.0.1</span></div>
+<br />
+<div class="date" ></div>
+                                                                               
                 
+                                                                               
                 
+</div>
+<!--l. 18--><p class="noindent" >Copyright <span 
+class="cmsy-10">©</span>&#x00A0; 2012 The Apache Software Foundation
+<!--l. 20--><p class="noindent" >Copyright <span 
+class="cmsy-10">©</span>&#x00A0; 2012 International Business Machines 
Corporation
+     <!--l. 23--><p class="noindent" ><span class="paragraphHead"><a 
+ id="x1-1000"></a><span 
+class="cmbx-10">License and Disclaimer</span></span>
+     The ASF licenses this documentation to you under the Apache License, 
Version 2.0 (the &#8221;License&#8221;); you may not
+     use this documentation except in compliance with the License. You may 
obtain a copy of the License
+     at
+     <!--l. 28--><p class="noindent" ><a 
+href="http://www.apache.org/licenses/LICENSE-2.0"; class="url" ><span 
+class="cmtt-10">http://www.apache.org/licenses/LICENSE-2.0</span></a>
+     <!--l. 30--><p class="noindent" >Unless required by applicable law or 
agreed to in writing, this documentation and its contents are distributed under
+     the License on an &#8221;AS IS&#8221; BASIS, WITHOUT WARRANTIES OR 
CONDITIONS OF ANY KIND, either express
+     or implied. See the License for the specific language governing 
permissions and limitations under the
+     License.
+     <!--l. 35--><p class="noindent" ><span class="paragraphHead"><a 
+ id="x1-2000"></a><span 
+class="cmbx-10">Trademarks</span></span>
+     All terms mentioned in the text that are known to be trademarks or 
service marks have been appropriately capitalized.
+     Use of such terms in this book should not be regarded as affecting the 
validity of the the trademark or service
+     mark.
+<!--l. 47--><p class="noindent" >Publication date: October&#x00A0;2015
+                                                                               
                 
+                                                                               
                 
+<h2 class="likechapterHead"><a 
+ id="x1-3000"></a>Table of Contents</h2> <div class="tableofcontents">
+<span class="partToc" >I&#x00A0;&#x00A0;<a 
+href="#x1-5000I" id="QQ2-1-5">DUCC Concepts</a></span>
+<br /><span class="chapterToc" >1 <a 
+href="#x1-60001" id="QQ2-1-6">DUCC Overview</a></span>
+<br />&#x00A0;<span class="sectionToc" >1.1 <a 
+href="#x1-70001.1" id="QQ2-1-7">What is DUCC?</a></span>
+<br />&#x00A0;<span class="sectionToc" >1.2 <a 
+href="#x1-80001.2" id="QQ2-1-8">DUCC Job Model</a></span>
+<br />&#x00A0;<span class="sectionToc" >1.3 <a 
+href="#x1-90001.3" id="QQ2-1-9">DUCC From UIMA to Full Scale-out</a></span>
+<br />&#x00A0;<span class="sectionToc" >1.4 <a 
+href="#x1-140001.4" id="QQ2-1-18">Error Management </a></span>
+<br />&#x00A0;<span class="sectionToc" >1.5 <a 
+href="#x1-150001.5" id="QQ2-1-19">Cluster and Job Management</a></span>
+<br />&#x00A0;<span class="sectionToc" >1.6 <a 
+href="#x1-160001.6" id="QQ2-1-20">Security Measures</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >1.6.1 <a 
+href="#x1-170001.6.1" id="QQ2-1-21">ducc_ling</a></span>
+<br />&#x00A0;<span class="sectionToc" >1.7 <a 
+href="#x1-180001.7" id="QQ2-1-22">Security Issues</a></span>
+<br /><span class="chapterToc" >2 <a 
+href="#x1-190002" id="QQ2-1-23">Glossary</a></span>
+<br /><span class="partToc" >II&#x00A0;&#x00A0;<a 
+href="#x1-20000II" id="QQ2-1-24">Ducc Users Guide</a></span>
+<br /><span class="chapterToc" >3 <a 
+href="#x1-210003" id="QQ2-1-25">Command Line Interface</a></span>
+<br />&#x00A0;<span class="sectionToc" >3.1 <a 
+href="#x1-230003.1" id="QQ2-1-27">The DUCC Job Descriptor</a></span>
+<br />&#x00A0;<span class="sectionToc" >3.2 <a 
+href="#x1-240003.2" id="QQ2-1-28">Operating System Limit Support</a></span>
+<br />&#x00A0;<span class="sectionToc" >3.3 <a 
+href="#x1-250003.3" id="QQ2-1-29">Command Line Forms</a></span>
+<br />&#x00A0;<span class="sectionToc" >3.4 <a 
+href="#x1-260003.4" id="QQ2-1-30">DUCC Commands</a></span>
+<br />&#x00A0;<span class="sectionToc" >3.5 <a 
+href="#x1-270003.5" id="QQ2-1-31">ducc_submit</a></span>
+<br />&#x00A0;<span class="sectionToc" >3.6 <a 
+href="#x1-320003.6" id="QQ2-1-36">ducc_cancel</a></span>
+<br />&#x00A0;<span class="sectionToc" >3.7 <a 
+href="#x1-370003.7" id="QQ2-1-41">ducc_reserve</a></span>
+<br />&#x00A0;<span class="sectionToc" >3.8 <a 
+href="#x1-420003.8" id="QQ2-1-46">ducc_unreserve</a></span>
+<br />&#x00A0;<span class="sectionToc" >3.9 <a 
+href="#x1-470003.9" id="QQ2-1-51">ducc_process_submit</a></span>
+<br />&#x00A0;<span class="sectionToc" >3.10 <a 
+href="#x1-520003.10" id="QQ2-1-56">ducc_process_cancel</a></span>
+<br />&#x00A0;<span class="sectionToc" >3.11 <a 
+href="#x1-570003.11" id="QQ2-1-61">ducc_services</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >3.11.1 <a 
+href="#x1-610003.11.1" id="QQ2-1-65">Common Options</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >3.11.2 <a 
+href="#x1-620003.11.2" id="QQ2-1-66">ducc_services &#8211;register 
[specification file] [options]</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >3.11.3 <a 
+href="#x1-630003.11.3" id="QQ2-1-67">ducc_services &#8211;start 
options</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >3.11.4 <a 
+href="#x1-640003.11.4" id="QQ2-1-68">ducc_services &#8211;stop 
options</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >3.11.5 <a 
+href="#x1-650003.11.5" id="QQ2-1-69">ducc_services &#8211;enable 
options</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >3.11.6 <a 
+href="#x1-660003.11.6" id="QQ2-1-70">ducc_services &#8211;disable 
options</a></span>
+                                                                               
                 
+                                                                               
                 
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >3.11.7 <a 
+href="#x1-670003.11.7" id="QQ2-1-71">ducc_services &#8211;observe_references 
options</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >3.11.8 <a 
+href="#x1-680003.11.8" id="QQ2-1-72">ducc_services &#8211;ignore_references 
options</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >3.11.9 <a 
+href="#x1-690003.11.9" id="QQ2-1-73">ducc_services &#8211;modify 
options</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >3.11.10 <a 
+href="#x1-700003.11.10" id="QQ2-1-74">ducc_services &#8211;query 
options</a></span>
+<br />&#x00A0;<span class="sectionToc" >3.12 <a 
+href="#x1-720003.12" id="QQ2-1-76">viaducc and java_viaducc</a></span>
+<br /><span class="chapterToc" >4 <a 
+href="#x1-760004" id="QQ2-1-80">The DUCC Public API</a></span>
+<br />&#x00A0;<span class="sectionToc" >4.1 <a 
+href="#x1-770004.1" id="QQ2-1-81">Overview Of The DUCC API</a></span>
+<br />&#x00A0;<span class="sectionToc" >4.2 <a 
+href="#x1-780004.2" id="QQ2-1-82">Compiling and Running With the DUCC 
API</a></span>
+<br />&#x00A0;<span class="sectionToc" >4.3 <a 
+href="#x1-790004.3" id="QQ2-1-83">Java API</a></span>
+<br /><span class="chapterToc" >5 <a 
+href="#x1-800005" id="QQ2-1-84">Service Management</a></span>
+<br />&#x00A0;<span class="sectionToc" >5.1 <a 
+href="#x1-810005.1" id="QQ2-1-85">Overview.</a></span>
+<br />&#x00A0;<span class="sectionToc" >5.2 <a 
+href="#x1-820005.2" id="QQ2-1-86">Service Types.</a></span>
+<br />&#x00A0;<span class="sectionToc" >5.3 <a 
+href="#x1-830005.3" id="QQ2-1-87">Service Instance IDs</a></span>
+<br />&#x00A0;<span class="sectionToc" >5.4 <a 
+href="#x1-840005.4" id="QQ2-1-88">Service References and Endpoints</a></span>
+<br />&#x00A0;<span class="sectionToc" >5.5 <a 
+href="#x1-850005.5" id="QQ2-1-89">Service Management Policies</a></span>
+<br />&#x00A0;<span class="sectionToc" >5.6 <a 
+href="#x1-870005.6" id="QQ2-1-91">Service Pingers</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >5.6.1 <a 
+href="#x1-880005.6.1" id="QQ2-1-92">The Pinger API</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >5.6.2 <a 
+href="#x1-910005.6.2" id="QQ2-1-95">Declaring a Pinger in A Service</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >5.6.3 <a 
+href="#x1-920005.6.3" id="QQ2-1-96">Implementing a Pinger</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >5.6.4 <a 
+href="#x1-930005.6.4" id="QQ2-1-98">Building And Testing Your Pinger</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >5.6.5 <a 
+href="#x1-980005.6.5" id="QQ2-1-103">Globally Registered Pingers</a></span>
+<br />&#x00A0;<span class="sectionToc" >5.7 <a 
+href="#x1-990005.7" id="QQ2-1-104">Sample Pinger</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >5.7.1 <a 
+href="#x1-1000005.7.1" id="QQ2-1-105">Using the Sample Pinger</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >5.7.2 <a 
+href="#x1-1010005.7.2" id="QQ2-1-106">Understanding Sample Pinger</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >5.7.3 <a 
+href="#x1-1140005.7.3" id="QQ2-1-119">Calculating New Deployments in the 
Pinger</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >5.7.4 <a 
+href="#x1-1250005.7.4" id="QQ2-1-130">Summary of Sample Pinger</a></span>
+<br /><span class="chapterToc" >6 <a 
+href="#x1-1260006" id="QQ2-1-131">Job Logs</a></span>
+<br /><span class="chapterToc" >7 <a 
+href="#x1-1320007" id="QQ2-1-137">DUCC Web Server</a></span>
+<br />&#x00A0;<span class="sectionToc" >7.1 <a 
+href="#x1-1370007.1" id="QQ2-1-144">Common Links</a></span>
+<br />&#x00A0;<span class="sectionToc" >7.2 <a 
+href="#x1-1380007.2" id="QQ2-1-145">Jobs Page</a></span>
+<br />&#x00A0;<span class="sectionToc" >7.3 <a 
+href="#x1-1390007.3" id="QQ2-1-147">Job Details Page</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >7.3.1 <a 
+href="#x1-1400007.3.1" id="QQ2-1-148">Processes</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >7.3.2 <a 
+href="#x1-1410007.3.2" id="QQ2-1-150">Work Items</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >7.3.3 <a 
+href="#x1-1420007.3.3" id="QQ2-1-152">Performance</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >7.3.4 <a 
+href="#x1-1430007.3.4" id="QQ2-1-154">Specification</a></span>
+<br />&#x00A0;<span class="sectionToc" >7.4 <a 
+href="#x1-1440007.4" id="QQ2-1-156">Reservation Page</a></span>
+<br />&#x00A0;<span class="sectionToc" >7.5 <a 
+href="#x1-1450007.5" id="QQ2-1-158">Managed Reservation Details Page</a></span>
+                                                                               
                 
+                                                                               
                 
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >7.5.1 <a 
+href="#x1-1460007.5.1" id="QQ2-1-159">Processes</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >7.5.2 <a 
+href="#x1-1470007.5.2" id="QQ2-1-160">Specification</a></span>
+<br />&#x00A0;<span class="sectionToc" >7.6 <a 
+href="#x1-1480007.6" id="QQ2-1-161">Services Page</a></span>
+<br />&#x00A0;<span class="sectionToc" >7.7 <a 
+href="#x1-1490007.7" id="QQ2-1-162">Service Details Page</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >7.7.1 <a 
+href="#x1-1500007.7.1" id="QQ2-1-163">Processes</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >7.7.2 <a 
+href="#x1-1510007.7.2" id="QQ2-1-164">Specification</a></span>
+<br />&#x00A0;<span class="sectionToc" >7.8 <a 
+href="#x1-1520007.8" id="QQ2-1-165">System Details Page</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >7.8.1 <a 
+href="#x1-1530007.8.1" id="QQ2-1-166">Administration</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >7.8.2 <a 
+href="#x1-1540007.8.2" id="QQ2-1-167">Classes</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >7.8.3 <a 
+href="#x1-1550007.8.3" id="QQ2-1-168">Daemons</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >7.8.4 <a 
+href="#x1-1560007.8.4" id="QQ2-1-169">Machines</a></span>
+<br />&#x00A0;<span class="sectionToc" >7.9 <a 
+href="#x1-1570007.9" id="QQ2-1-170">Visualization</a></span>
+<br /><span class="partToc" >III&#x00A0;&#x00A0;<a 
+href="#x1-158000III" id="QQ2-1-172">Programming Model And 
Applications</a></span>
+<br /><span class="chapterToc" >8 <a 
+href="#x1-1590008" id="QQ2-1-173">Building and Testing Jobs</a></span>
+<br />&#x00A0;<span class="sectionToc" >8.1 <a 
+href="#x1-1600008.1" id="QQ2-1-174">Overview</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >8.1.1 <a 
+href="#x1-1610008.1.1" id="QQ2-1-175">Basic Job Process Threading 
Model</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >8.1.2 <a 
+href="#x1-1620008.1.2" id="QQ2-1-176">Alternate Pipeline Threading 
Model</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >8.1.3 <a 
+href="#x1-1630008.1.3" id="QQ2-1-177">Overriding UIMA Configuration 
Parameters</a></span>
+<br />&#x00A0;<span class="sectionToc" >8.2 <a 
+href="#x1-1640008.2" id="QQ2-1-178">Collection Segmentation and Artifact 
Extraction</a></span>
+<br />&#x00A0;<span class="sectionToc" >8.3 <a 
+href="#x1-1650008.3" id="QQ2-1-179">CAS Consumer Changes for DUCC</a></span>
+<br />&#x00A0;<span class="sectionToc" >8.4 <a 
+href="#x1-1660008.4" id="QQ2-1-180">Job Development for an Existing Pipeline 
Design</a></span>
+<br />&#x00A0;<span class="sectionToc" >8.5 <a 
+href="#x1-1670008.5" id="QQ2-1-181">Job Development for a New Pipeline 
Design</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >8.5.1 <a 
+href="#x1-1680008.5.1" id="QQ2-1-182">Collection Reader (CR) 
Characteristics</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >8.5.2 <a 
+href="#x1-1690008.5.2" id="QQ2-1-183">DUCC built-in Flow Controller</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >8.5.3 <a 
+href="#x1-1700008.5.3" id="QQ2-1-184">Workitem Feature Structure</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >8.5.4 <a 
+href="#x1-1710008.5.4" id="QQ2-1-185">Deployment Descriptor (DD) 
Jobs</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >8.5.5 <a 
+href="#x1-1720008.5.5" id="QQ2-1-186">Debugging</a></span>
+<br /><span class="chapterToc" >9 <a 
+href="#x1-1730009" id="QQ2-1-187">Sample Application: Raw Text 
Processing</a></span>
+<br />&#x00A0;<span class="sectionToc" >9.1 <a 
+href="#x1-1740009.1" id="QQ2-1-188">Application Function and Design</a></span>
+<br />&#x00A0;<span class="sectionToc" >9.2 <a 
+href="#x1-1750009.2" id="QQ2-1-189">Configuration Parameters</a></span>
+<br />&#x00A0;<span class="sectionToc" >9.3 <a 
+href="#x1-1760009.3" id="QQ2-1-190">Set up a working directory</a></span>
+<br />&#x00A0;<span class="sectionToc" >9.4 <a 
+href="#x1-1770009.4" id="QQ2-1-191">Download and Install OpenNLP</a></span>
+<br />&#x00A0;<span class="sectionToc" >9.5 <a 
+href="#x1-1780009.5" id="QQ2-1-192">Get some Input Text</a></span>
+<br />&#x00A0;<span class="sectionToc" >9.6 <a 
+href="#x1-1790009.6" id="QQ2-1-193">Run the Job</a></span>
+<br />&#x00A0;<span class="sectionToc" >9.7 <a 
+href="#x1-1800009.7" id="QQ2-1-194">Job Output</a></span>
+<br />&#x00A0;<span class="sectionToc" >9.8 <a 
+href="#x1-1810009.8" id="QQ2-1-195">Job Performance Details</a></span>
+<br /><span class="chapterToc" >10 <a 
+href="#x1-18200010" id="QQ2-1-198">Sample Application: CAS Input 
Processing</a></span>
+                                                                               
                 
+                                                                               
                 
+<br />&#x00A0;<span class="sectionToc" >10.1 <a 
+href="#x1-18300010.1" id="QQ2-1-199">Application Function and Design</a></span>
+<br />&#x00A0;<span class="sectionToc" >10.2 <a 
+href="#x1-18400010.2" id="QQ2-1-200">Configuration Parameters</a></span>
+<br />&#x00A0;<span class="sectionToc" >10.3 <a 
+href="#x1-18500010.3" id="QQ2-1-201">Run the Job</a></span>
+<br />&#x00A0;<span class="sectionToc" >10.4 <a 
+href="#x1-18600010.4" id="QQ2-1-202">Job Performance Details</a></span>
+<br />&#x00A0;<span class="sectionToc" >10.5 <a 
+href="#x1-18700010.5" id="QQ2-1-204">Limiting Job Resources</a></span>
+<br /><span class="partToc" >IV&#x00A0;&#x00A0;<a 
+href="#x1-188000IV" id="QQ2-1-205">Ducc Administrators Guide</a></span>
+<br /><span class="chapterToc" >11 <a 
+href="#x1-18900011" id="QQ2-1-206">Installation, Configuration, and 
Verification</a></span>
+<br />&#x00A0;<span class="sectionToc" >11.1 <a 
+href="#x1-19000011.1" id="QQ2-1-207">Overview</a></span>
+<br />&#x00A0;<span class="sectionToc" >11.2 <a 
+href="#x1-19100011.2" id="QQ2-1-208">Software Prerequisites</a></span>
+<br />&#x00A0;<span class="sectionToc" >11.3 <a 
+href="#x1-19200011.3" id="QQ2-1-209">Building from Source</a></span>
+<br />&#x00A0;<span class="sectionToc" >11.4 <a 
+href="#x1-19300011.4" id="QQ2-1-210">Documentation</a></span>
+<br />&#x00A0;<span class="sectionToc" >11.5 <a 
+href="#x1-19400011.5" id="QQ2-1-211">Single System Installation and 
Verification</a></span>
+<br />&#x00A0;<span class="sectionToc" >11.6 <a 
+href="#x1-19500011.6" id="QQ2-1-212">Minimal Hardware Requirements for Single 
System Installation</a></span>
+<br />&#x00A0;<span class="sectionToc" >11.7 <a 
+href="#x1-19600011.7" id="QQ2-1-213">Single System Installation</a></span>
+<br />&#x00A0;<span class="sectionToc" >11.8 <a 
+href="#x1-19700011.8" id="QQ2-1-214">Initial System Verification</a></span>
+<br />&#x00A0;<span class="sectionToc" >11.9 <a 
+href="#x1-19800011.9" id="QQ2-1-215">Add additional nodes to the DUCC 
cluster</a></span>
+<br />&#x00A0;<span class="sectionToc" >11.10 <a 
+href="#x1-19900011.10" id="QQ2-1-216">Ducc_ling Configuration - Running with 
credentials of submitting user</a></span>
+<br />&#x00A0;<span class="sectionToc" >11.11 <a 
+href="#x1-20000011.11" id="QQ2-1-217">CGroups Installation and 
Configuration</a></span>
+<br />&#x00A0;<span class="sectionToc" >11.12 <a 
+href="#x1-20100011.12" id="QQ2-1-218">Full DUCC Verification</a></span>
+<br />&#x00A0;<span class="sectionToc" >11.13 <a 
+href="#x1-20200011.13" id="QQ2-1-219">Enable DUCC webserver login</a></span>
+<br /><span class="chapterToc" >12 <a 
+href="#x1-20300012" id="QQ2-1-220">Administration</a></span>
+<br />&#x00A0;<span class="sectionToc" >12.1 <a 
+href="#x1-20400012.1" id="QQ2-1-221">WebServer Authentication</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.1.1 <a 
+href="#x1-20500012.1.1" id="QQ2-1-222">Example Implementation</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.1.2 <a 
+href="#x1-20600012.1.2" id="QQ2-1-223">IAuthenticationManager</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.1.3 <a 
+href="#x1-20700012.1.3" id="QQ2-1-224">IAuthenticationResult</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.1.4 <a 
+href="#x1-20800012.1.4" id="QQ2-1-225">Example ANT script to build 
jar</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.1.5 <a 
+href="#x1-20900012.1.5" id="QQ2-1-226">Example ducc.properties 
entries</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.1.6 <a 
+href="#x1-21000012.1.6" id="QQ2-1-227">Example ducc.administrators</a></span>
+<br />&#x00A0;<span class="sectionToc" >12.2 <a 
+href="#x1-21100012.2" id="QQ2-1-228">Properties</a></span>
+<br />&#x00A0;<span class="sectionToc" >12.3 <a 
+href="#x1-21200012.3" id="QQ2-1-229">Properties merging</a></span>
+<br />&#x00A0;<span class="sectionToc" >12.4 <a 
+href="#x1-21300012.4" id="QQ2-1-230">ducc.properties</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.4.1 <a 
+href="#x1-21400012.4.1" id="QQ2-1-231">General DUCC Properties</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.4.2 <a 
+href="#x1-21500012.4.2" id="QQ2-1-232">Web Server Properties</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.4.3 <a 
+href="#x1-21600012.4.3" id="QQ2-1-233">Job Driver Properties</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.4.4 <a 
+href="#x1-21700012.4.4" id="QQ2-1-234">Service Manager Properties</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.4.5 <a 
+href="#x1-21800012.4.5" id="QQ2-1-235">Orchestrator Properties</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.4.6 <a 
+href="#x1-21900012.4.6" id="QQ2-1-236">Resource Manager Properties</a></span>
+                                                                               
                 
+                                                                               
                 
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.4.7 <a 
+href="#x1-22000012.4.7" id="QQ2-1-237">Agent Properties</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.4.8 <a 
+href="#x1-22100012.4.8" id="QQ2-1-238">Process Manager Properties</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.4.9 <a 
+href="#x1-22200012.4.9" id="QQ2-1-239">Job Process Properties</a></span>
+<br />&#x00A0;<span class="sectionToc" >12.5 <a 
+href="#x1-22300012.5" id="QQ2-1-240">ducc.private.properties</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.5.1 <a 
+href="#x1-22400012.5.1" id="QQ2-1-241">Web Server Properties</a></span>
+<br />&#x00A0;<span class="sectionToc" >12.6 <a 
+href="#x1-22500012.6" id="QQ2-1-242">Resource Manager Configuration: Classes 
and Nodepools</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.6.1 <a 
+href="#x1-22600012.6.1" id="QQ2-1-243">Nodepools</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.6.2 <a 
+href="#x1-23000012.6.2" id="QQ2-1-251">Class Definitions</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.6.3 <a 
+href="#x1-23100012.6.3" id="QQ2-1-253">Validation</a></span>
+<br />&#x00A0;<span class="sectionToc" >12.7 <a 
+href="#x1-23400012.7" id="QQ2-1-256">Ducc Node Definitions</a></span>
+<br />&#x00A0;<span class="sectionToc" >12.8 <a 
+href="#x1-23500012.8" id="QQ2-1-258">Ducc User Definitions</a></span>
+<br />&#x00A0;<span class="sectionToc" >12.9 <a 
+href="#x1-23600012.9" id="QQ2-1-260">Administrative Commands</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.9.1 <a 
+href="#x1-23700012.9.1" id="QQ2-1-261">start_ducc</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.9.2 <a 
+href="#x1-24400012.9.2" id="QQ2-1-268">stop_ducc</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.9.3 <a 
+href="#x1-24900012.9.3" id="QQ2-1-273">check_ducc</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.9.4 <a 
+href="#x1-25300012.9.4" id="QQ2-1-277">rm_reconfigure</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.9.5 <a 
+href="#x1-25600012.9.5" id="QQ2-1-280">rm_qload</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.9.6 <a 
+href="#x1-26100012.9.6" id="QQ2-1-285">rm_qoccupancy</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.9.7 <a 
+href="#x1-26400012.9.7" id="QQ2-1-288">vary_off</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.9.8 <a 
+href="#x1-26700012.9.8" id="QQ2-1-291">vary_on</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >12.9.9 <a 
+href="#x1-27000012.9.9" id="QQ2-1-294">ducc_properties_manager</a></span>
+<br /><span class="chapterToc" >13 <a 
+href="#x1-27500013" id="QQ2-1-299">Resource Management</a></span>
+<br />&#x00A0;<span class="sectionToc" >13.1 <a 
+href="#x1-27600013.1" id="QQ2-1-300">Overview</a></span>
+<br />&#x00A0;<span class="sectionToc" >13.2 <a 
+href="#x1-27700013.2" id="QQ2-1-301">Preemption vs Eviction</a></span>
+<br />&#x00A0;<span class="sectionToc" >13.3 <a 
+href="#x1-27800013.3" id="QQ2-1-302">Scheduling Policies</a></span>
+<br />&#x00A0;<span class="sectionToc" >13.4 <a 
+href="#x1-27900013.4" id="QQ2-1-303">Allotment</a></span>
+<br />&#x00A0;<span class="sectionToc" >13.5 <a 
+href="#x1-28000013.5" id="QQ2-1-304">Priority vs Weight</a></span>
+<br />&#x00A0;<span class="sectionToc" >13.6 <a 
+href="#x1-28300013.6" id="QQ2-1-307">Node Pools</a></span>
+<br />&#x00A0;<span class="sectionToc" >13.7 <a 
+href="#x1-28400013.7" id="QQ2-1-308">Scheduling Classes</a></span>
+<br /><span class="chapterToc" >14 <a 
+href="#x1-28500014" id="QQ2-1-309">Service Management</a></span>
+<br /><span class="chapterToc" >15 <a 
+href="#x1-28600015" id="QQ2-1-310">Simulation and System Testing</a></span>
+<br />&#x00A0;<span class="sectionToc" >15.1 <a 
+href="#x1-28700015.1" id="QQ2-1-311">Cluster Simulation</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >15.1.1 <a 
+href="#x1-28800015.1.1" id="QQ2-1-312">Overview</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >15.1.2 <a 
+href="#x1-28900015.1.2" id="QQ2-1-313">Node Configuration</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >15.1.3 <a 
+href="#x1-29000015.1.3" id="QQ2-1-314">Setting up Test Mode</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >15.1.4 <a 
+href="#x1-29100015.1.4" id="QQ2-1-315">Starting a Simulated Cluster</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >15.1.5 <a 
+href="#x1-29500015.1.5" id="QQ2-1-319">Stopping a Simulated Cluster</a></span>
+                                                                               
                 
+                                                                               
                 
+<br />&#x00A0;<span class="sectionToc" >15.2 <a 
+href="#x1-29900015.2" id="QQ2-1-323">Job Simulation</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >15.2.1 <a 
+href="#x1-30000015.2.1" id="QQ2-1-324">Overview</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >15.2.2 <a 
+href="#x1-30100015.2.2" id="QQ2-1-325">Job meta-descriptors</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >15.2.3 <a 
+href="#x1-30200015.2.3" id="QQ2-1-326"><span 
+class="cmti-10">Prepare </span>Descriptors</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >15.2.4 <a 
+href="#x1-30300015.2.4" id="QQ2-1-327">Services</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >15.2.5 <a 
+href="#x1-30500015.2.5" id="QQ2-1-329">Generating a Job Set</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >15.2.6 <a 
+href="#x1-30600015.2.6" id="QQ2-1-330">Running the Test Driver</a></span>
+<br />&#x00A0;<span class="sectionToc" >15.3 <a 
+href="#x1-30700015.3" id="QQ2-1-331">Pre-Packaged Tests</a></span>
+<br /><span class="chapterToc" >16 <a 
+href="#x1-30800016" id="QQ2-1-332">DUCC Web Server Customization</a></span>
+<br />&#x00A0;<span class="sectionToc" >16.1 <a 
+href="#x1-30900016.1" id="QQ2-1-333">Server Side</a></span>
+<br />&#x00A0;<span class="sectionToc" >16.2 <a 
+href="#x1-31000016.2" id="QQ2-1-334">Client Side</a></span>
+<br />&#x00A0;<span class="sectionToc" >16.3 <a 
+href="#x1-31100016.3" id="QQ2-1-335">Build and Install</a></span>
+<br /><span class="chapterToc" >17 <a 
+href="#x1-31200017" id="QQ2-1-336">Understanding the DUCC logs</a></span>
+<br />&#x00A0;<span class="sectionToc" >17.1 <a 
+href="#x1-31300017.1" id="QQ2-1-337">Overview</a></span>
+<br />&#x00A0;<span class="sectionToc" >17.2 <a 
+href="#x1-31400017.2" id="QQ2-1-338">Resource Manager Log (rm.log)</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.2.1 <a 
+href="#x1-31500017.2.1" id="QQ2-1-339">Bootstrap Configuration</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.2.2 <a 
+href="#x1-32000017.2.2" id="QQ2-1-344">Node Arrival and Missed 
Heartbeats</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.2.3 <a 
+href="#x1-32300017.2.3" id="QQ2-1-347">Node Occupancy</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.2.4 <a 
+href="#x1-32400017.2.4" id="QQ2-1-348">Job Arrival and Status 
Updates</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.2.5 <a 
+href="#x1-32700017.2.5" id="QQ2-1-351">Calculation Of Job Caps</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.2.6 <a 
+href="#x1-32800017.2.6" id="QQ2-1-352">The &#8220;how much&#8221; 
calculations</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.2.7 <a 
+href="#x1-32900017.2.7" id="QQ2-1-353">The &#8220;what of&#8221; 
calculations</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.2.8 <a 
+href="#x1-33000017.2.8" id="QQ2-1-354">Defragmentation</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.2.9 <a 
+href="#x1-33100017.2.9" id="QQ2-1-355">Published Schedule</a></span>
+<br />&#x00A0;<span class="sectionToc" >17.3 <a 
+href="#x1-33400017.3" id="QQ2-1-358">Service Manager Log (sm.log)</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.3.1 <a 
+href="#x1-33500017.3.1" id="QQ2-1-359">Bootstrap configuration</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.3.2 <a 
+href="#x1-34000017.3.2" id="QQ2-1-364">Receipt and analysis of Orchestrator 
State</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.3.3 <a 
+href="#x1-34100017.3.3" id="QQ2-1-365">CLI Requests</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.3.4 <a 
+href="#x1-34200017.3.4" id="QQ2-1-366">Dispatching / Startup of Service 
Instances</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.3.5 <a 
+href="#x1-34300017.3.5" id="QQ2-1-367">Progression of Service State</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.3.6 <a 
+href="#x1-34400017.3.6" id="QQ2-1-368">Starting and Logging Pingers</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.3.7 <a 
+href="#x1-34500017.3.7" id="QQ2-1-369">Publishing State</a></span>
+<br />&#x00A0;<span class="sectionToc" >17.4 <a 
+href="#x1-34600017.4" id="QQ2-1-370"> (Orchestrator Log or.log)</a></span>
+<br />&#x00A0;<span class="sectionToc" >17.5 <a 
+href="#x1-34700017.5" id="QQ2-1-371">Process Manager Log (pm.log)</a></span>
+<br />&#x00A0;<span class="sectionToc" >17.6 <a 
+href="#x1-34800017.6" id="QQ2-1-372">Agent log Log 
(hostname.agent.log)</a></span>
+</div>
+                                                                               
                 
+                                                                               
                 
+<h2 class="likechapterHead"><a 
+ id="x1-4000"></a>List of Figures</h2><div class="tableofcontents"><span 
class="lofToc" >1.1&#x00A0;<a 
+href="#x1-10001r1">Standard UIMA Pipeline</a></span><br /><span class="lofToc" 
>1.2&#x00A0;<a 
+href="#x1-11001r2">UIMA Pipeline As Scaled by
+UIMA-AS</a></span><br /><span class="lofToc" >1.3&#x00A0;<a 
+href="#x1-12001r3">UIMA Pipeline As Automatically Scaled Out By 
DUCC</a></span><br /><span class="lofToc" >1.4&#x00A0;<a 
+href="#x1-13001r4">UIMA Pipeline
+With User-Supplied DD as Automatically Scaled Out By DUCC</a></span><br 
/><span class="lofToc" >5.1&#x00A0;<a 
+href="#x1-92001r1">Sample UIMA-AS
+Service Pinger</a></span><br /><span class="lofToc" >7.1&#x00A0;<a 
+href="#x1-132001r1">Sample Webserver Page</a></span><br /><span class="lofToc" 
>7.2&#x00A0;<a 
+href="#x1-136001r2">Preferences Page</a></span><br /><span class="lofToc" 
>7.3&#x00A0;<a 
+href="#x1-138001r3">Jobs Page</a></span><br /><span class="lofToc" 
>7.4&#x00A0;<a 
+href="#x1-140004r4">Processes
+Tab</a></span><br /><span class="lofToc" >7.5&#x00A0;<a 
+href="#x1-141001r5">Work Items Tab</a></span><br /><span class="lofToc" 
>7.6&#x00A0;<a 
+href="#x1-142001r6">Performance Tab</a></span><br /><span class="lofToc" 
>7.7&#x00A0;<a 
+href="#x1-143001r7">Specification Tab</a></span><br /><span class="lofToc" 
>7.8&#x00A0;<a 
+href="#x1-144001r8">Reservations
+Page</a></span><br /><span class="lofToc" >7.9&#x00A0;<a 
+href="#x1-157001r9">Visualization</a></span><br /><span class="lofToc" 
>9.1&#x00A0;<a 
+href="#x1-181001r1">OpenNLP Process Measurements</a></span><br /><span 
class="lofToc" >9.2&#x00A0;<a 
+href="#x1-181002r2">OpenNLP
+Process Breakdown</a></span><br /><span class="lofToc" >10.1&#x00A0;<a 
+href="#x1-186001r1">CAS Input Processing Performacne</a></span><br /><span 
class="lofToc" >12.1&#x00A0;<a 
+href="#x1-227004r1">Nodepool
+Example</a></span><br /><span class="lofToc" >12.2&#x00A0;<a 
+href="#x1-227007r2">Nodepools: Overlapping Pools are Incorrect</a></span><br 
/><span class="lofToc" >12.3&#x00A0;<a 
+href="#x1-227008r3">Nodepools: Multiple
+top-level Nodepools</a></span><br /><span class="lofToc" >12.4&#x00A0;<a 
+href="#x1-229007r4">Sample Nodepool Configuration</a></span><br /><span 
class="lofToc" >12.5&#x00A0;<a 
+href="#x1-230001r5">Sample Class
+Configuration</a></span><br /><span class="lofToc" >12.6&#x00A0;<a 
+href="#x1-234001r6">Sample Node Configuration</a></span><br /><span 
class="lofToc" >12.7&#x00A0;<a 
+href="#x1-235001r7">Sample User Registration</a></span><br />
+</div>
+                                                                               
                 
+                                                                               
                 
+                                                                               
                 
+                                                                               
                 
+<!--l. 79--><p class="noindent" >
+                                                                               
                 
+                                                                               
                 
+<h1 class="partHead"><span class="titlemark">Part&#x00A0;I<br /></span><a 
+ id="x1-5000I"></a>DUCC Concepts</h1>
+<!--l. 22--><p class="noindent" ><a name='DUCC_OVERVIEW'></a>
+                                                                               
                 
+                                                                               
                 
+<h2 class="chapterHead"><span class="titlemark">Chapter&#x00A0;1</span><br 
/><a 
+ id="x1-60001"></a>DUCC Overview</h2>
+<h3 class="sectionHead"><span class="titlemark">1.1   </span> <a 
+ id="x1-70001.1"></a>What is DUCC?</h3>
+<!--l. 28--><p class="noindent" >DUCC stands for Distributed UIMA Cluster 
Computing. DUCC is a cluster management system providing
+tooling, management, and scheduling facilities to automate the scale-out of 
applications written to the UIMA
+framework.
+<!--l. 32--><p class="noindent" >Core UIMA provides a generalized framework 
for applications that process unstructured information such as human
+language, but does not provide a scale-out mechanism. UIMA-AS provides a 
scale-out mechanism to distribute UIMA
+pipelines over a cluster of computing resources, but does not provide job or 
cluster management of the resources.
+DUCC defines a formal job model that closely maps to a standard UIMA pipeline. 
Around this job model
+DUCC provides cluster management services to automate the scale-out of UIMA 
pipelines over computing
+clusters.
+<!--l. 39--><p class="noindent" >
+<h3 class="sectionHead"><span class="titlemark">1.2   </span> <a 
+ id="x1-80001.2"></a>DUCC Job Model</h3>
+<!--l. 41--><p class="noindent" >The Job Model defines the steps necessary to 
scale-up a UIMA pipeline using DUCC. The goal of DUCC is to
+scale-up any UIMA pipeline, including pipelines that must be deployed across 
multiple machines using shared
+services.
+<!--l. 45--><p class="noindent" >The DUCC Job model consists of standard UIMA 
components: a Collection Reader (CR), a CAS Multiplier (CM),
+application logic as implemented one or more Analysis Engines (AE), and a CAS 
Consumer (CC).
+<!--l. 49--><p class="noindent" >The Collection Reader builds input CASs and 
forwards them to the UIMA pipelines. In the DUCC model, the CR is run in a
+process separate from the rest of the pipeline. In fact, in all but the 
smallest clusters it is run on a different physical machine
+than the rest of the pipeline. To achieve scalability, the CR must create very 
small CASs that do not contain application
+data, but which contain references to data; for instance, file names. Ideally, 
the CR should be runnable in a process
+not much larger than the smallest Java virtual machine. Later sections 
demonstrate methods for achieving
+this.
+<!--l. 57--><p class="noindent" >Each pipeline must contain at least one CAS 
Multiplier which receives the CASs from the CR. The CMs encapsulate the
+knowledge of how to receive the data references in the small CASs received 
from the CRs and deliver the referenced data to
+the application pipeline. DUCC packages the CM, AE(s), and CC into a single 
process, multiple instances of which are then
+deployed over the cluster.
+<!--l. 63--><p class="noindent" >A DUCC job therefore consists of a small 
specification containing the following items:
+     <ul class="itemize1">
+     <li class="itemize">The name of a resource containing the CR descriptor.
+     </li>
+     <li class="itemize">The name of a resource containing the CM descriptor.
+     </li>
+     <li class="itemize">The name of a resource containing the AE descriptor.
+     </li>
+     <li class="itemize">The name of a resource containing the CC descriptor.
+     </li>
+     <li class="itemize">Other information required to parameterize the above 
and identify the job such as log directory, working
+     directory, desired scale-out, classpath, etc. These are described in 
detail in subsequent sections.</li></ul>
+                                                                               
                 
+                                                                               
                 
+<!--l. 75--><p class="noindent" >On job submission, DUCC creates a single 
process executing the CR and one or more processes containing the analysis
+pipeline.
+<!--l. 78--><p class="noindent" >DUCC provides other facilities in support of 
scale-out:
+     <ul class="itemize1">
+     <li class="itemize">The ability to reserve all or part of a node in the 
cluster.
+     </li>
+     <li class="itemize">Automated management of services required in support 
of jobs.
+     </li>
+     <li class="itemize">The ability to schedule and execute arbitrary 
processes on nodes in the cluster.
+     </li>
+     <li class="itemize">Debugging tools and support.
+     </li>
+     <li class="itemize">A web server to display and manage work and cluster 
status.
+     </li>
+     <li class="itemize">A CLI and a Java API to support the above.</li></ul>
+<!--l. 89--><p class="noindent" >
+<h3 class="sectionHead"><span class="titlemark">1.3   </span> <a 
+ id="x1-90001.3"></a>DUCC From UIMA to Full Scale-out</h3>
+<!--l. 91--><p class="noindent" >In this section we demonstrate the 
progression of a simple UIMA pipeline to a fully scaled-out job running under
+DUCC.
+<!--l. 94--><p class="noindent" ><span class="paragraphHead"><a 
+ id="x1-100001.3"></a><span 
+class="cmbx-10">UIMA Pipelines</span></span>
+A normal UIMA pipeline contains a Collection Reader (CR), one or more Analysis 
Engines (AE) connected in a pipeline, and
+a CAS Consumer (CC) as shown in <a 
+href="#x1-10001r1">Figure &#x00A0;1.1</a>.
+<!--l. 99--><p class="noindent" ><hr class="figure"><div class="figure" 
+>
+                                                                               
                 
+                                                                               
                 
+<a 
+ id="x1-10001r1"></a>
+                                                                               
                 
+                                                                               
                 
+
+<!--l. 101--><p class="noindent" ><img 
+src="images/uima-pipeline.jpg" alt="PIC"  
+>
+<br /> <div class="caption" 
+><span class="id">Figure&#x00A0;1.1: </span><span  
+class="content">Standard UIMA Pipeline</span></div><!--tex4ht:label?: 
x1-10001r1 -->
+                                                                               
                 
+                                                                               
                 
+<!--l. 104--><p class="noindent" ></div><hr class="endfigure">
+<!--l. 106--><p class="noindent" ><span class="paragraphHead"><a 
+ id="x1-110001.3"></a><span 
+class="cmbx-10">UIMA-AS Scaled Pipeline</span></span>
+With UIMA-AS the CR is separated into a discrete process and a CAS Multiplier 
(CM) is introduced into the pipeline as an
+interface between the CR and the pipeline, as shown in <a 
+href="#x1-11001r2">Figure &#x00A0;1.2</a> below. Multiple pipelines are 
serviced by the CR and are
+scaled-out over a computing cluster. The difficulty with this model is that 
each user is individually responsible for finding and
+scheduling computing nodes, installing communication software such as 
ActiveMQ, and generally managing the distributed
+job and associated hardware.
+<!--l. 116--><p class="noindent" ><hr class="figure"><div class="figure" 
+>
+                                                                               
                 
+                                                                               
                 
+<a 
+ id="x1-11001r2"></a>
+                                                                               
                 
+                                                                               
                 
+
+<!--l. 118--><p class="noindent" ><img 
+src="images/uima-as-pipeline.png" alt="PIC"  
+>
+<br /> <div class="caption" 
+><span class="id">Figure&#x00A0;1.2: </span><span  
+class="content">UIMA Pipeline As Scaled by 
UIMA-AS</span></div><!--tex4ht:label?: x1-11001r2 -->
+                                                                               
                 
+                                                                               
                 
+<!--l. 121--><p class="noindent" ></div><hr class="endfigure">
+<!--l. 123--><p class="noindent" ><span class="paragraphHead"><a 
+ id="x1-120001.3"></a><span 
+class="cmbx-10">UIMA Pipeline Scaled By DUCC</span></span>
+DUCC is a UIMA and UIMA-AS-aware cluster manager. To scale out work under DUCC 
the developer tells DUCC what
+the parts of the application are, and DUCC does the work to build the 
scale-out via UIMA/AS, to find and
+schedule resources, to deploy the parts of the application over the cluster, 
and to manage the jobs while it
+executes.
+<!--l. 129--><p class="noindent" >On job submission, the CR is wrapped with a 
DUCC main class and launched as a Job Driver (or JD). The DUCC main
+class establishes communication with other DUCC components and instantiates 
the CR. If the CR initializes
+successfully, and indicates that there are greater than 0 work items to 
process, the specified CM, AE and CC
+components are assembled into an aggregate, wrapped with a DUCC main class, 
and launched as a Job Process (or
+JP).
+<!--l. 135--><p class="noindent" >The JP will replicate the aggregate as many 
times as specified, each aggregate instance running in a single thread. When the
+aggregate initializes, and whenever an aggregate thread needs work, the JP 
wrapper will fetch the next work item from the
+JD, as shown in <a 
+href="#x1-12001r3">Figure &#x00A0;1.3</a> below.
+<!--l. 140--><p class="noindent" ><hr class="figure"><div class="figure" 
+>
+                                                                               
                 
+                                                                               
                 
+<a 
+ id="x1-12001r3"></a>
+                                                                               
                 
+                                                                               
                 
+
+<!--l. 142--><p class="noindent" ><img 
+src="images/ducc-sequential.png" alt="PIC"  
+>
+<br /> <div class="caption" 
+><span class="id">Figure&#x00A0;1.3: </span><span  
+class="content">UIMA Pipeline As Automatically Scaled Out By 
DUCC</span></div><!--tex4ht:label?: x1-12001r3 -->
+                                                                               
                 
+                                                                               
                 
+<!--l. 145--><p class="noindent" ></div><hr class="endfigure">
+<!--l. 147--><p class="noindent" ><span class="paragraphHead"><a 
+ id="x1-130001.3"></a><span 
+class="cmbx-10">UIMA Pipeline with User-Supplied DD Scaled By 
DUCC</span></span>
+Application programmers may supply their own Deployment Descriptors to control 
intra-process threading and scale-out. If a
+DD is specified in the job parameters, DUCC will launch each JP with the 
specified UIMA-AS service instantiated in-process,
+as depicted in <a 
+href="#x1-13001r4">Figure &#x00A0;1.4</a> below. In this case the user can 
still specify how many work items to deliver to the service
+concurrently.
+<!--l. 155--><p class="noindent" ><hr class="figure"><div class="figure" 
+>
+                                                                               
                 
+                                                                               
                 
+<a 
+ id="x1-13001r4"></a>
+                                                                               
                 
+                                                                               
                 
+
+<!--l. 157--><p class="noindent" ><img 
+src="images/ducc-parallel.png" alt="PIC"  
+>
+<br /> <div class="caption" 
+><span class="id">Figure&#x00A0;1.4: </span><span  
+class="content">UIMA Pipeline With User-Supplied DD as Automatically Scaled 
Out By DUCC</span></div><!--tex4ht:label?: x1-13001r4 -->
+                                                                               
                 
+                                                                               
                 
+<!--l. 160--><p class="noindent" ></div><hr class="endfigure">
+<h3 class="sectionHead"><span class="titlemark">1.4   </span> <a 
+ id="x1-140001.4"></a>Error Management </h3>
+<!--l. 164--><p class="noindent" >DUCC provides a number of facilities to 
assist error management:
+     <ul class="itemize1">
+     <li class="itemize">DUCC captures exceptions in the JPs and delivers them 
to the Job Drivers. The JD wrappers implement logic
+     to enforce error thresholds, to identify and log errors, and to reflect 
job problems in the DUCC Web Server.
+     Error thresholds are configurable both globally and on a per-job basis.
+     </li>
+     <li class="itemize">Error and timeout thresholds are implemented for both 
the initialization phase of a pipeline and the execution
+     phase.
+     </li>
+     <li class="itemize">Retry-after-error is supported: if a process has a 
failure on some CAS after initialization is successful, the
+     process is terminated and all affected CASs are retried, up to some 
configurable threshold.
+     </li>
+     <li class="itemize">To avoid disrupting existing workloads by a job that 
will fail to run, DUCC ensures that JD and JP processes
+     can successfully initialize before fully scaling out a job.
+     </li>
+     <li class="itemize">Various error conditions encountered while a job is 
running will prevent a problematic job from continuing
+     scale out, and can result in termination of the job.</li></ul>
+<!--l. 186--><p class="noindent" >
+<h3 class="sectionHead"><span class="titlemark">1.5   </span> <a 
+ id="x1-150001.5"></a>Cluster and Job Management</h3>
+<!--l. 187--><p class="noindent" >DUCC supports management of multiple jobs 
and multiple users in a distributed cluster:
+     <dl class="description"><dt class="description">
+<span 
+class="cmbx-10">Multiple User Support</span> </dt><dd 
+class="description">When properly configured, DUCC runs all work under the 
identity of the submitting
+     user. Logs are written with the user&#8217;s credentials into the 
user&#8217;s file space designated at job submission.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Fair-Share Scheduling</span> </dt><dd 
+class="description">DUCC provides a Fair-Share scheduler to equitably share 
resources among multiple users.
+     The scheduler also supports semi-permanent reservation of full or partial 
machines.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Service Management</span> </dt><dd 
+class="description">DUCC  provides  a  Service  Manager  capable  of  
automatically  starting,  stopping,  and
+     otherwise managing and querying both UIMA-AS and non-UIMA-AS services in 
support of jobs.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Job Lifetime Management and Orchestration</span> </dt><dd 
+class="description">DUCC includes an Orchestrator to manage the lifetimes of 
all
+     entities in the system.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Node Sharing</span> </dt><dd 
+class="description">DUCC allocates processes from one or more users on a node, 
each with a specified amount of
+     memory. DUCC&#8217;s preferred mechanism for constraining memory use is 
Linux Control Groups, or CGroups. For
+     nodes that do not suport CGroups, DUCC agents monitor RAM use and kill 
processes that exceed their share
+     size by a settable fudge factor.
+                                                                               
                 
+                                                                               
                 
+     </dd><dt class="description">
+<span 
+class="cmbx-10">DUCC Agents</span> </dt><dd 
+class="description">DUCC Agents manage each node&#8217;s local resources and 
all processes started by DUCC. Each node in a
+     cluster has exactly one Agent. The Agent
+         <ul class="itemize1">
+         <li class="itemize">Monitors and reports node capabilities (memory, 
etc) and performance data (CPU busy, swap, etc).
+         </li>
+         <li class="itemize">Starts, stops, and monitors all processes on 
behalf of users.
+         </li>
+         <li class="itemize">Patrols the node for &#8220;foreign&#8221; 
(non-DUCC) processes, reporting them to the Web Server, and optionally
+         reaping them.
+         </li>
+         <li class="itemize">Ensures job processes do not exceed their 
declared memory requirements through the use of Linux Cgroups.</li></ul>
+     </dd><dt class="description">
+<span 
+class="cmbx-10">DUCC Web server</span> </dt><dd 
+class="description">DUCC provides a web server displaying all aspects of the 
system:
+         <ul class="itemize1">
+         <li class="itemize">All jobs in the system, their current state, 
resource usage, etc.
+         </li>
+         <li class="itemize">All reserved resources and associated information 
(owner, etc.), including the ability to request and cancel
+         reservations.
+         </li>
+         <li class="itemize">All services, including the ability to start, 
stop, and modify service definitions.
+         </li>
+         <li class="itemize">All nodes in the system and their status, usage, 
etc.
+         </li>
+         <li class="itemize">The status of all DUCC management processes.
+         </li>
+         <li class="itemize">Access to documentation.</li></ul>
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Cluster Management Support</span> </dt><dd 
+class="description">DUCC provides system management support to:
+         <ul class="itemize1">
+         <li class="itemize">Start, stop, and query full DUCC systems.
+         </li>
+         <li class="itemize">Start, stop, and quiesce individual DUCC 
components.
+         </li>
+         <li class="itemize">Add and delete nodes from the DUCC system.
+         </li>
+         <li class="itemize">Discover DUCC processes (e.g. after partial 
failures).
+         </li>
+         <li class="itemize">Find and kill errant job processes belonging to 
individual users.
+         </li>
+         <li class="itemize">Monitor and display inter-DUCC messages.</li></ul>
+     </dd></dl>
+                                                                               
                 
+                                                                               
                 
+<!--l. 257--><p class="noindent" >
+<h3 class="sectionHead"><span class="titlemark">1.6   </span> <a 
+ id="x1-160001.6"></a>Security Measures</h3>
+<!--l. 258--><p class="noindent" >The following DUCC security measures are 
provided:
+     <dl class="description"><dt class="description">
+<span 
+class="cmbx-10">user credentials</span> </dt><dd 
+class="description">DUCC instantiates user processes using a setuid root 
executable named ducc_ling. See more at
+     <a 
+href="#x1-170001.6.1"><span 
+class="cmti-10">ducc</span><span 
+class="cmti-10">_ling</span></a>.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">command line interface</span> </dt><dd 
+class="description">The CLI employs HTTP to send requests to the DUCC 
controller. The CLI creates and
+     employs public and private security keys in the user&#8217;s home 
directory for authentication of HTTP requests.
+     The controller validates requests via these same security keys.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">webserver</span> </dt><dd 
+class="description">The webserver facilitates operational control and 
therefore authentication is desirable.
+         <ul class="itemize1">
+         <li class="itemize">Each user has the ability to control certain 
aspects of only his/her active submissions.
+         </li>
+         <li class="itemize">Each administrator has the ability to control 
certain aspects of any user&#8217;s active submissions, as well as
+         modification of some DUCC operational characteristics.</li></ul>
+     <!--l. 276--><p class="noindent" >A simple interface is provided so that 
an installation can plug-in a site specific authentication mechanism comprising
+     userid and password.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">ActiveMQ</span> </dt><dd 
+class="description">DUCC uses ActiveMQ for administrative communication. AMQ 
authentication is used to prevent arbitrary
+     processes from participating.</dd></dl>
+<!--l. 283--><p class="noindent" >
+<h4 class="subsectionHead"><span class="titlemark">1.6.1   </span> <a 
+ id="x1-170001.6.1"></a>ducc_ling</h4>
+<!--l. 285--><p class="noindent" >ducc_ling contains the following functions, 
which the security-conscious may verify by examining the source in
+$DUCC_HOME/duccling. All sensitive operations are performed only AFTER 
switching userids, to prevent unauthorized
+root access to the system.
+     <ul class="itemize1">
+     <li class="itemize">Changes it&#8217;s real and effective userid to that 
of the user invoking the job.
+     </li>
+     <li class="itemize">Optionally redirects its stdout and stderr to the 
DUCC log for the current job.
+     </li>
+     <li class="itemize">Optionally redirects its stdio to a port set by the 
CLI, when a job is submitted.
+     </li>
+     <li class="itemize">&#8220;Nice&#8221;s itself to a &#8220;worse&#8221; 
priority than the default, to reduce the chances that a runaway DUCC job could
+     monopolize a system.
+     </li>
+     <li class="itemize">Optionally sets user limits.
+     </li>
+     <li class="itemize">Prints the effective limits for a job to both the 
user&#8217;s log, and the DUCC agent&#8217;s log.
+     </li>
+     <li class="itemize">Changes to the user&#8217;s working directory, as 
specified by the job.
+                                                                               
                 
+                                                                               
                 
+     </li>
+     <li class="itemize">Optionally establishes LD_LIBRARY_PATH for the job 
from the environment variable <span 
+class="cmtt-10">DUCC</span><span 
+class="cmtt-10">_LD</span><span 
+class="cmtt-10">_LIBRARY</span><span 
+class="cmtt-10">_PATH</span>
+     if set in the DUCC job specification. (Secure Linux systems will prevent 
LD_LIBRARY_PATH from being set
+     by a program with root authority, so this is done AFTER changing userids).
+     </li>
+     <li class="itemize">ONLY user <span 
+class="cmti-10">ducc </span>may use the ducc_ling program in a privileged way. 
Ducc_ling contains checks to prevent even
+     user <span 
+class="cmti-10">root </span>from using it for privileged operations.
+     </li></ul>
+<!--l. 310--><p class="noindent" >
+<h3 class="sectionHead"><span class="titlemark">1.7   </span> <a 
+ id="x1-180001.7"></a>Security Issues</h3>
+<!--l. 311--><p class="noindent" >The following DUCC security issues should be 
considered:
+     <dl class="description"><dt class="description">
+<span 
+class="cmbx-10">submit transmission &#8217;sniffed&#8217;</span> </dt><dd 
+class="description">In  the  event  that  the  DUCC  submit  command  is  
&#8217;sniffed&#8217;  then  the  user
+     authentication mechanism is compromised and user masquerading is 
possible. That is, the userid encryption
+     mechanism can be exploited such that user A can submit a job pretending 
to be user B.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">user </span><span 
+class="cmbxti-10">ducc </span><span 
+class="cmbx-10">password compromised</span> </dt><dd 
+class="description">In  the  event  that  the  <span 
+class="cmti-10">ducc  </span>user  password  is  compromised  then  the  root
+     privileged command <span 
+class="cmbx-10">ducc</span><span 
+class="cmbx-10">_ling </span>can be used to become any other user except root.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">user </span><span 
+class="cmbxti-10">root </span><span 
+class="cmbx-10">password compromised</span> </dt><dd 
+class="description">In the event that the <span 
+class="cmti-10">root </span>user password is compromised DUCC provides no
+     protection. That is, compromising the root user is equivalent to 
compromising the DUCC user password.</dd></dl>
+<!--l. 22--><p class="noindent" ><a name='DUCC_TERMINOLOGY'></a>
+                                                                               
                 
+                                                                               
                 
+<h2 class="chapterHead"><span class="titlemark">Chapter&#x00A0;2</span><br 
/><a 
+ id="x1-190002"></a>Glossary</h2>
+     <dl class="description"><dt class="description">
+<span 
+class="cmbx-10">Autostarted Service</span> </dt><dd 
+class="description">An autostarted service is a registered service that is 
started automatically by DUCC when
+     the DUCC system is booted.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Dependent service or job</span> </dt><dd 
+class="description">A  dependent  service  or  job  is  a  service  or  job  
that  specifies  one  or  more  service
+     dependencies in their job specification. The service or job is dependent 
upon the referenced service being
+     operational before being started by DUCC.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">DUCC</span> </dt><dd 
+class="description">Distributed UIMA Cluster Computing.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Registered service</span> </dt><dd 
+class="description">A  registered  service  is  a  service  that  is  
registered  with  DUCC.  DUCC  saves  the  service
+     specification and fully manages the service, insuring it is running when 
needed, and shutdown when not.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Service Instance</span> </dt><dd 
+class="description">A service instance is one physical process which runs a 
CUSTOM or UIMA-AS service. UIMA-AS
+     services are usually scaled-out with multiple instances implementing the 
same underlying service logic.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Orchestrator (OR)</span> </dt><dd 
+class="description">The Orchestrator manages the life cycle of all entities 
within DUCC.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Process Manager (PM)</span>  </dt><dd 
+class="description">The Process Manager coordinates distribution of work among 
the Agents.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Resource Manager (RM)</span>  </dt><dd 
+class="description">The Resource Manager schedules physical resources for DUCC 
work.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Service Endpoint</span> </dt><dd 
+class="description">In DUCC, the service endpoint provides a unique identifier 
for a service. In the case of UIMA-AS
+     services, the endpoint also serves as a well-known address for contacting 
the service.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Service Manager (SM)</span> </dt><dd 
+class="description">The Service Manager manages the life-cycles of UIMA-AS and 
CUSTOM services. It
+     coordinates registration of services, starting and stopping of services, 
and ensures that services are available
+     and remain available for the lifetime of the jobs.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Agent</span> </dt><dd 
+class="description">DUCC Agent processes run on every node in the system. The 
Agent receives orders to start and stop processes
+     on each node. Agents monitors nodes, sending heartbeat packets with node 
statistics to interested components
+     (such as the RM and web-server). If CGroups are installed in the cluster, 
the Agent is responsible for managing
+     the CGroups for each job process. All processes other than the DUCC 
management processes are are managed
+     as children of the agents.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">DUCC-MON</span> </dt><dd 
+class="description">DUCC-MON is the DUCC web-server.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Job Driver (JD)</span> </dt><dd 
+class="description">The Job Driver is a thin wrapper that encapsulates a 
Job&#8217;s Collection Reader. The JD executes
+     as a process that is scheduled and deployed by DUCC.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Job Process (JP)</span> </dt><dd 
+class="description">The Job Process is a thin wrapper that encapsulates a 
job&#8217;s pipeline components. The JP
+     executes in a process that is scheduled and deployed by DUCC.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Job specification</span> </dt><dd 
+class="description">The Job Specification is a collection of properties that 
describe work to be scheduled and deployed
+     by DUCC. It identifies the UIMA components (CR, AE, etc) that comprise 
the job and the system-wide
+     properties of the job (CLASSPATHs, RAM requirements, etc).
+                                                                               
                 
+                                                                               
                 
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Job</span> </dt><dd 
+class="description">A DUCC job consists of the components required to deploy 
and execute a UIMA pipeline over a computing
+     cluster. It consists of a JD to run the Collection Reader, a set of JPs 
to run the UIMA AEs, and a Job
+     Specification to describe how the parts fit together.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Share Quantum</span> </dt><dd 
+class="description">The DUCC scheduler abstracts the nodes in the cluster as a 
single large conglomerate of resources:
+     memory, processor cores, etc. The scheduler logically decomposes the 
collection of resources into some number
+     of equal-sized atomic units. Each unit of work requiring resources is 
apportioned one or more of these atomic
+     units. The smallest possible atomic unit is called the <span 
+class="cmti-10">share quantum</span>, or simply, <span 
+class="cmti-10">share</span>.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Process</span> </dt><dd 
+class="description">A process is one physical process executing on a machine 
in the DUCC cluster. DUCC jobs are comprised
+     of one or more processes (JDs and JPs). Each process is assigned one or 
more <span 
+class="cmti-10">shares </span>by the DUCC scheduler.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Weighted Fair Share</span> </dt><dd 
+class="description">A weighted fair share calculation is used to apportion 
resources equitably to the outstanding
+     work in the system. In a non-weighted fair-share system, all work 
requests are given equal consideration to all
+     resources. To provide some (&#8220;more important&#8221;) work more than 
equal resources, weights are used to bias the
+     allotment of shares in favor of some classes of work.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Work Items</span> </dt><dd 
+class="description">A DUCC work item is one unit of work to be completed in a 
single DUCC process. It is usually
+     initiated by the submission of a single CAS from the JD to one of the 
JPs. It could be thought of as a single
+     &#8220;question&#8221; to be answered by a UIMA analytic, or a single 
&#8220;task&#8221; to complete. Usually each DUCC JP executes
+     many work items per job.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">$DUCC</span><span 
+class="cmbx-10">_HOME</span> </dt><dd 
+class="description">The root of the installed DUCC runtime, e.g. 
/home/ducc/ducc_runtime. It need not be set in
+     the environment, although the examples in this document assume that it 
has been.
+     </dd></dl>
+                                                                               
                 
+                                                                               
                 
+<!--l. 83--><p class="noindent" >
+                                                                               
                 
+                                                                               
                 
+<h1 class="partHead"><span class="titlemark">Part&#x00A0;II<br /></span><a 
+ id="x1-20000II"></a>Ducc Users Guide</h1>
+<!--l. 23--><p class="noindent" ><a name='DUCC_CLI'></a>
+                                                                               
                 
+                                                                               
                 
+<h2 class="chapterHead"><span class="titlemark">Chapter&#x00A0;3</span><br 
/><a 
+ id="x1-210003"></a>Command Line Interface</h2>
+<!--l. 28--><p class="noindent" ><span class="paragraphHead"><a 
+ id="x1-220003"></a><span 
+class="cmbx-10">Overview</span></span>
+The DUCC CLI is the primary means of communication with DUCC. Work is 
submitted, work is canceled, work is
+monitored, and work is queried with this interface.
+<!--l. 32--><p class="noindent" >All parameters may be passed to all the CLI 
commands in the form of Unix-like &#8220;long-form&#8221; (key, value) pairs, 
in which the
+key is proceeded by the characters &#8220;<span 
+class="cmsy-10">--</span>&#8221;. As well, the parameters may be saved in a 
standard Java Properties file, without
+the leading &#8220;<span 
+class="cmsy-10">--</span>&#8221; characters. Both a properties file and 
command-line parameters may be passed to each CLI.
+When both are present, the parameters on the command line take precedence. 
Take, for example the following
+simple job properties file, call it <span 
+class="cmtt-10">1.job</span>, where the environment variable &#8220;DH&#8221; 
has been set to the location of
+$DUCC_HOME.
+                                                                               
                 
+                                                                               
                 
+<div class="verbatim" id="verbatim-1">
+description&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;Test&#x00A0;job&#x00A0;1
+&#x00A0;<br />
+&#x00A0;<br 
/>classpath&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;${DH}/lib/uima-ducc/examples/*
+&#x00A0;<br 
/>environment&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;AE_INIT_TIME=5&#x00A0;AE_INIT_RANGE=5&#x00A0;LD_LIBRARY_PATH=/a/nother/path
+&#x00A0;<br 
/>scheduling_class&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;normal
+&#x00A0;<br />
+&#x00A0;<br 
/>driver_descriptor_CR&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;org.apache.uima.ducc.test.randomsleep.FixedSleepCR
+&#x00A0;<br 
/>driver_descriptor_CR_overrides&#x00A0;jobfile=${DH}/lib/examples/simple/1.inputs&#x00A0;compression=10
+&#x00A0;<br />error_rate=0.0
+&#x00A0;<br />
+&#x00A0;<br 
/>driver_jvm_args&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;-Xmx500M
+&#x00A0;<br />
+&#x00A0;<br 
/>process_descriptor_AE&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;org.apache.uima.ducc.test.randomsleep.FixedSleepAE
+&#x00A0;<br 
/>process_memory_size&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;4
+&#x00A0;<br 
/>process_jvm_args&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;-Xmx100M
+&#x00A0;<br 
/>process_thread_count&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;2
+&#x00A0;<br 
/>process_per_item_time_max&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;5
+&#x00A0;<br 
/>process_deployments_max&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;999
+&#x00A0;<br /></div>
+<!--l. 59--><p class="nopar" >
+<!--l. 61--><p class="noindent" >This can be submitted, overriding the 
scheduling class and memory, thus:
+                                                                               
                 
+                                                                               
                 
+<div class="verbatim" id="verbatim-2">
+ducc_submit&#x00A0;--specification&#x00A0;1.job&#x00A0;--process_memory_size&#x00A0;16&#x00A0;--scheduling_class&#x00A0;high</div>
+<!--l. 64--><p class="nopar" >
+<!--l. 66--><p class="noindent" >The DUCC CLI parameters are now described in 
detail.
+<!--l. 68--><p class="noindent" >
+<h3 class="sectionHead"><span class="titlemark">3.1   </span> <a 
+ id="x1-230003.1"></a>The DUCC Job Descriptor</h3>
+<!--l. 69--><p class="noindent" >The DUCC Job Descriptor includes properties 
to enable automated management and scale-out over large computing clusters.
+The job descriptor includes
+     <ul class="itemize1">
+     <li class="itemize">References to the various UIMA components required by 
the job (CR, CM, AE, CC, and maybe DD)
+     </li>
+     <li class="itemize">Scale-out requirements: number of processes, number 
of threads per process, etc
+     </li>
+     <li class="itemize">Environment requirements: log directory, working 
directory, environment variables, etc,
+     </li>
+     <li class="itemize">JVM parameters
+     </li>
+     <li class="itemize">Scheduling class
+     </li>
+     <li class="itemize">Error-handling preferences: acceptable failure 
counts, timeouts, etc
+     </li>
+     <li class="itemize">Debugging and monitoring requirements and 
preferences</li></ul>
+<!--l. 81--><p class="noindent" >
+<h3 class="sectionHead"><span class="titlemark">3.2   </span> <a 
+ id="x1-240003.2"></a>Operating System Limit Support</h3>
+<!--l. 82--><p class="noindent" >The CLI supports specification of operating 
system limits applied to the various job processes. To specify a limit, pass the
+name of the limit and its value in the <span 
+class="cmti-10">environment </span>specified in the job. Limits are named with 
the string
+&#8220;DUCC_RLIMIT_name&#8221; where &#8220;name&#8221; is the name of a 
specific limit. Supported limits include:
+     <ul class="itemize1">
+     <li class="itemize">DUCC_RLIMIT_CORE
+     </li>
+     <li class="itemize">DUCC_RLIMIT_CPU
+     </li>
+     <li class="itemize">DUCC_RLIMIT_DATA
+     </li>
+     <li class="itemize">DUCC_RLIMIT_FSIZE
+                                                                               
                 
+                                                                               
                 
+     </li>
+     <li class="itemize">DUCC_RLIMIT_MEMLOCK
+     </li>
+     <li class="itemize">DUCC_RLIMIT_NOFILE
+     </li>
+     <li class="itemize">DUCC_RLIMIT_NPROC
+     </li>
+     <li class="itemize">DUCC_RLIMIT_RSS
+     </li>
+     <li class="itemize">DUCC_RLIMIT_STACK
+     </li>
+     <li class="itemize">DUCC_RLIMIT_AS
+     </li>
+     <li class="itemize">DUCC_RLIMIT_LOCKS
+     </li>
+     <li class="itemize">DUCC_RLIMIT_SIGPENDING
+     </li>
+     <li class="itemize">DUCC_RLIMIT_MSGQUEUE
+     </li>
+     <li class="itemize">DUCC_RLIMIT_NICE
+     </li>
+     <li class="itemize">DUCC_RLIMIT_STACK
+     </li>
+     <li class="itemize">DUCC_RLIMIT_RTPRIO</li></ul>
+<!--l. 104--><p class="noindent" >See the Linux documentation for details on 
the meanings of these limits and their values.
+<!--l. 106--><p class="noindent" >For example, to set the maximum number of 
open files allowed in any job process, specify an environment similar to this
+when submitting the job:
+                                                                               
                 
+                                                                               
                 
+<div class="verbatim" id="verbatim-3">
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;ducc_submit&#x00A0;....&#x00A0;--environment="DUCC_RLIMT_NOFILE=1024"&#x00A0;...</div>
+<!--l. 110--><p class="nopar" >
+<!--l. 112--><p class="noindent" >
+<h3 class="sectionHead"><span class="titlemark">3.3   </span> <a 
+ id="x1-250003.3"></a>Command Line Forms</h3>
+<!--l. 113--><p class="noindent" >The Command Line Interface is provided in 
several forms:
+<!--l. 116--><p class="noindent" >
+     <dl class="enumerate-enumitem"><dt class="enumerate-enumitem">
+  1. </dt><dd 
+class="enumerate-enumitem">A wrapper script around the uima-ducc-cli.jar.
+     </dd><dt class="enumerate-enumitem">
+  2. </dt><dd 
+class="enumerate-enumitem">Direct invocation of each command&#8217;s <span 
+class="cmtt-10">class </span>with the <span 
+class="cmtt-10">java </span>command.</dd></dl>
+<!--l. 120--><p class="noindent" >When using the scripts the full execution 
environment is established silently. When invoking a command&#8217;s <span 
+class="cmtt-10">class </span>directly, the
+java <span 
+class="cmtt-10">CLASSPATH </span>must include the uima-ducc-cli.jar, as 
illustrated in the wrapper scripts.
+<!--l. 124--><p class="noindent" >
+<h3 class="sectionHead"><span class="titlemark">3.4   </span> <a 
+ id="x1-260003.4"></a>DUCC Commands</h3>
+<!--l. 125--><p class="noindent" >The following commands are provided:
+     <dl class="description"><dt class="description">
+<span 
+class="cmbx-10">ducc</span><span 
+class="cmbx-10">_submit</span> </dt><dd 
+class="description">Submit a job for execution.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">ducc</span><span 
+class="cmbx-10">_cancel</span> </dt><dd 
+class="description">Cancel a job in progress.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">ducc</span><span 
+class="cmbx-10">_reserve</span> </dt><dd 
+class="description">Request a reservation of a full machine.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">ducc</span><span 
+class="cmbx-10">_unreserve</span> </dt><dd 
+class="description">Cancel a reservation.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">ducc</span><span 
+class="cmbx-10">_monitor</span> </dt><dd 
+class="description">Monitor the progress of a job that is already submitted.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">ducc</span><span 
+class="cmbx-10">_process</span><span 
+class="cmbx-10">_submit</span> </dt><dd 
+class="description">Submit an arbitrary process (managed reservation) for 
execution.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">ducc</span><span 
+class="cmbx-10">_process</span><span 
+class="cmbx-10">_cancel</span> </dt><dd 
+class="description">Cancel an arbitrary process.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">ducc</span><span 
+class="cmbx-10">_services</span> </dt><dd 
+class="description">Register, unregister, start, stop, modify, disable, 
enable, ignore references, observe references, and
+     query a service.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">ducc</span><span 
+class="cmbx-10">_view</span><span 
+class="cmbx-10">_perf</span> </dt><dd 
+class="description">Fetch performance data from the log and history files for 
analysis by spreadsheets, etc.
+                                                                               
                 
+                                                                               
                 
+     </dd><dt class="description">
+<span 
+class="cmbx-10">viaducc</span> </dt><dd 
+class="description">This is a script wrapper to facilitate execution of 
Eclipse workspaces as DUCC jobs as well as general
+     execution of arbitrary processes in DUCC-managed resources.</dd></dl>
+<!--l. 142--><p class="noindent" >The next section describes these commands in 
detail.
+<!--l. 22--><p class="noindent" ><a name='DUCC_CLI_SUBMIT'></a>
+<!--l. 25--><p class="noindent" >
+<h3 class="sectionHead"><span class="titlemark">3.5   </span> <a 
+ id="x1-270003.5"></a>ducc_submit</h3>
+<!--l. 27--><p class="noindent" >The source for this section is 
ducc_duccbook/documents/part-user/cli/submit.xml.
+<!--l. 28--><p class="noindent" ><span class="paragraphHead"><a 
+ id="x1-280003.5"></a><span 
+class="cmbx-10">Description:</span></span>
+The submit CLI is used to submit work for execution by DUCC. DUCC assigns a 
unique id to the job and schedules it for
+execution. The submitter may optionally request that the progress of the job 
is monitored, in which case the state of the job
+as it progresses through its lifetime is printed on the console.
+<!--l. 33--><p class="noindent" ><span class="paragraphHead"><a 
+ id="x1-290003.5"></a><span 
+class="cmbx-10">Usage:</span></span>
+     <dl class="description"><dt class="description">
+<span 
+class="cmbx-10">Script wrapper</span> </dt><dd 
+class="description">$DUCC_HOME/bin/ducc_submit <span 
+class="cmti-10">options</span>
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Java Main</span> </dt><dd 
+class="description">java -cp $DUCC_HOME/lib/uima-ducc-cli.jar 
org.apache.uima.ducc.cli.DuccJobSubmit <span 
+class="cmti-10">options</span></dd></dl>
+<!--l. 39--><p class="noindent" ><span class="paragraphHead"><a 
+ id="x1-300003.5"></a><span 
+class="cmbx-10">Options:</span></span>
+     <dl class="description"><dt class="description">
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">all</span><span 
+class="cmbx-10">_in</span><span 
+class="cmbx-10">_one </span><span 
+class="cmmi-10">&#x003C;</span><span 
+class="cmbx-10">local </span><span 
+class="cmsy-10">| </span><span 
+class="cmbx-10">remote </span><span 
+class="cmmi-10">&#x003E;</span> </dt><dd 
+class="description">Run driver and pipeline in single process. If <span 
+class="cmti-10">local </span>is specified, the process is
+     executed on the local machine, for example, in the current Eclipse 
session. If <span 
+class="cmti-10">remote </span>is specified, the jobs is
+     submitted to DUCC as a <span 
+class="cmti-10">managed reservation </span>and run on some (presumably larger) 
machine allocated by
+     DUCC.
+     </dd><dt class="description">
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">attach</span><span 
+class="cmbx-10">_console</span> </dt><dd 
+class="description">If specified, redirect remote stdout and stderr to the 
local submitting console.
+     </dd><dt class="description">
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">cancel</span><span 
+class="cmbx-10">_on</span><span 
+class="cmbx-10">_interrupt</span> </dt><dd 
+class="description">If  specified,  the  job  is  monitored  and  will  be  
canceled  if  the  submit  command  is
+     interrupted, e.g. with CTRL-C. This option always implies <span 
+class="cmsy-10">--</span>wait_for_completion.
+     </dd><dt class="description">
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">classpath [path-string]</span> </dt><dd 
+class="description">The CLASSPATH used for the job. If specified, this is used 
for both the Job Driver
+     and each Job Process. If not specified, the CLASSPATH of the process 
invoking this request is used.
+     </dd><dt class="description">
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">debug</span> </dt><dd 
+class="description">Enable debugging messages. This is primarily for debugging 
DUCC itself.
+                                                                               
                 
+                                                                               
                 
+     </dd><dt class="description">
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">description [text]</span> </dt><dd 
+class="description">The text is any string used to describe the job. It is 
displayed in the Web Server. When
+     specified on a command-line the text usually must be surrounded by quotes 
to protect it from the shell. The
+     default is &#8220;none&#8221;.
+     </dd><dt class="description">
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">driver</span><span 
+class="cmbx-10">_debug [debug-port]</span> </dt><dd 
+class="description">Append JVM debug flags to the JVM arguments to start the 
JobDriver in remote
+     debug mode. The remote process debugger will attempt to contact the 
specified port.
+     </dd><dt class="description">
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">driver</span><span 
+class="cmbx-10">_descriptor</span><span 
+class="cmbx-10">_CR [descriptor.xml]</span>  </dt><dd 
+class="description">This  is  the  XML  descriptor  for  the  Collection  
Reader.  This
+     descriptor is a resource that is searched for in the filesystem or Java 
classpath as described in the &#x00A0;<a 
+href="#x1-310003.5">notes below</a>.
+     (Required)
+     </dd><dt class="description">
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">driver</span><span 
+class="cmbx-10">_descriptor</span><span 
+class="cmbx-10">_CR</span><span 
+class="cmbx-10">_overrides [list]</span>  </dt><dd 
+class="description">This is the Job Driver collection reader configuration 
overrides. They are
+     specified as name/value pairs in a whitespace-delimited list. For example:
+                                                                               
                 
+                                                                               
                 
+     <div class="verbatim" id="verbatim-4">
+     
--driver_descriptor_CR_overrides&#x00A0;name1=value1&#x00A0;name2=value2...
+     &#x00A0;<br 
/>&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;</div>
+     <!--l. 79--><p class="nopar" >
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">driver</span><span 
+class="cmbx-10">_exception</span><span 
+class="cmbx-10">_handler [classname]</span> This specifies a 
developer-supplied exception handler for the Job Driver. It
+     must implement org.apache.uima.ducc.IErrorHandler or extend 
org.apache.uima.ducc.ErrorHandler. A default handler
+     is provided.
+     </dd><dt class="description">
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">driver</span><span 
+class="cmbx-10">_exception</span><span 
+class="cmbx-10">_handler</span><span 
+class="cmbx-10">_arguments [argument-string]</span> </dt><dd 
+class="description">This is a string containing arguments for the exception
+     handler. The contents of the string is entirely a function of the 
specified handler. If not specified, a <span 
+class="cmti-10">null </span>is passed
+     in.
+     <!--l. 93--><p class="noindent" >Note: When used as a CLI option, the 
string must usually be quoted to protect it from the shell, if it contains
+     blanks.
+     <!--l. 96--><p class="noindent" >The built-in default exception handler 
supports an argument string of the following form (with NO embedded
+     blanks):
+                                                                               
                 
+                                                                               
                 
+     <div class="verbatim" id="verbatim-5">
+     &#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;max_job_errors=15</div>
+     <!--l. 100--><p class="nopar" >
+     </dd><dt class="description">
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">driver</span><span 
+class="cmbx-10">_jvm</span><span 
+class="cmbx-10">_args [list]</span>  </dt><dd 
+class="description">
+     <!--l. 104--><p class="noindent" >This specifies extra JVM arguments to 
be provided to the Job Driver process. It is a blank-delimited list of strings.
+     Example:
+                                                                               
                 
+                                                                               
                 
+     <div class="verbatim" id="verbatim-6">
+     --driver_jvm_args&#x00A0;-Xmx100M&#x00A0;-Xms50M
+     &#x00A0;<br 
/>&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;</div>
+     <!--l. 108--><p class="nopar" >
+     <!--l. 110--><p class="noindent" >Note: When used as a CLI option, the 
list must usually be quoted to protect it from the shell.
+     </dd><dt class="description">
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">environment [env vars]</span> </dt><dd 
+class="description">Blank-delimited list of environment variables and variable 
assignments. Entries will be copied
+     from the user&#8217;s environment if just the variable name is specified, 
optionally with a final &#8217;*&#8217; for those with the same
+     prefix. If specified, this is used for all DUCC processes in the job. 
Example:
+                                                                               
                 
+                                                                               
                 
+     <div class="verbatim" id="verbatim-7">
+     
--environment&#x00A0;TERM=xterm&#x00A0;DISPLAY=:1.0&#x00A0;LANG&#x00A0;UIMA_*</div>
+     <!--l. 120--><p class="nopar" >
+     <!--l. 122--><p class="noindent" >Additional entries may be copied from 
the user&#8217;s environment based on the setting of
+                                                                               
                 
+                                                                               
                 
+     <div class="verbatim" id="verbatim-8">
+     ducc.submit.environment.propagated</div>
+     <!--l. 125--><p class="nopar" > in the global DUCC configuration 
ducc.properties.
+     <!--l. 128--><p class="noindent" >Note: When used as a CLI option, the 
environment string must usually be quoted to protect it from the
+     shell.
+     </dd><dt class="description">
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">help</span>  </dt><dd 

[... 12808 lines stripped ...]


Reply via email to