Added: uima/sandbox/ruta/trunk/example-projects/TextRulerExample/resources/JournalVolumeMarker.txt URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/example-projects/TextRulerExample/resources/JournalVolumeMarker.txt?rev=1478378&view=auto ============================================================================== --- uima/sandbox/ruta/trunk/example-projects/TextRulerExample/resources/JournalVolumeMarker.txt (added) +++ uima/sandbox/ruta/trunk/example-projects/TextRulerExample/resources/JournalVolumeMarker.txt Thu May 2 14:11:58 2013 @@ -0,0 +1,6 @@ +VOL. +vol. +vol +VOL +volume +Volume \ No newline at end of file
Added: uima/sandbox/ruta/trunk/example-projects/TextRulerExample/resources/Months.txt URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/example-projects/TextRulerExample/resources/Months.txt?rev=1478378&view=auto ============================================================================== --- uima/sandbox/ruta/trunk/example-projects/TextRulerExample/resources/Months.txt (added) +++ uima/sandbox/ruta/trunk/example-projects/TextRulerExample/resources/Months.txt Thu May 2 14:11:58 2013 @@ -0,0 +1,54 @@ +Januar +Jan +January +Jan +Februar +Feb +February +Feb +März +mär +Mrz +March +Mar +April +Apr +April +Apr +Mai +Mai +May +May +Juni +Jun +June +Jun +Juli +Jul +July +Jul +August +Aug +August +Aug +September +Sep +September +Sep +Sept +Oktober +Okt +October +Oct +November +Nov +November +Nov +Dezember +Dez +December +Dec +Summer +Fall +Spring +Winter Added: uima/sandbox/ruta/trunk/example-projects/TextRulerExample/resources/PagesMarker.txt URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/example-projects/TextRulerExample/resources/PagesMarker.txt?rev=1478378&view=auto ============================================================================== --- uima/sandbox/ruta/trunk/example-projects/TextRulerExample/resources/PagesMarker.txt (added) +++ uima/sandbox/ruta/trunk/example-projects/TextRulerExample/resources/PagesMarker.txt Thu May 2 14:11:58 2013 @@ -0,0 +1,7 @@ +p. +pp. +PP. +pp +PP +pages +Pages \ No newline at end of file Added: uima/sandbox/ruta/trunk/example-projects/TextRulerExample/resources/Publishers.txt URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/example-projects/TextRulerExample/resources/Publishers.txt?rev=1478378&view=auto ============================================================================== --- uima/sandbox/ruta/trunk/example-projects/TextRulerExample/resources/Publishers.txt (added) +++ uima/sandbox/ruta/trunk/example-projects/TextRulerExample/resources/Publishers.txt Thu May 2 14:11:58 2013 @@ -0,0 +1,512 @@ +Springer Verlag +Springer-Verlag +Springer +Morgan Kaufmann +Kluwer Academic Publishers +The MIT Press +MIT Press +The AAAI Press +AAAI Press +Addison-Wesley +Addison Wesley +Association for Computing Machinery +Africa Journals Online +Bentham Science Publishers +Blackwell Publishing +BioMed Central +Brill +Chinese Electronic Periodical Services +Cambridge University Press +Walter de Gruyter GmbH & Co. KG +Elsevier Science +Emerald Insight +Haworth Press Inc +Portal of Scientific Journals of Croatia +IEEE +Inderscience Publishers +Informa plc +Japan Science and Technology Information Aggregator, Electronic +S. Karger AG +Lippincott Williams and Wilkins +Maney Publishing +NewsRX +OECD Publications Centre +Oxford University Press +Reed Business +Sage Publications +SciELO +Springer +Taylor and Francis Group +Thieme Medical Publishers +John Wiley and Sons +World of Information +Medium Publishers (30 to 100 journals) +American Anthropological Association +Academic Journals +American City Business Journals +American Chemical Society +Adis International Limited (now part of Wolters Kluwer Health) +Academic Electronic Press +American Institute of Physics +American Library Association +Allerton Press Inc +American Psychological Association +Allen Press Online Publishing +Annual Reviews +American Society of Civil Engineers +Academy of Sciences Library, Czech Republic +John Benjamins Publishing Company +The Berkeley Electronic Press +Bioline International +Bioone +BMJ Publishing Group +Richard Boorberg Verlag +Bohn Stafleu Van Loghum +Copernicus Group +Consejo Superior de Investigaciones Cientificas (Spanish National Research Council) +Commonwealth Scientific and Industrial Research Organisation Publishing + +Dove Press Ltd. +Duke University Press +Eclipse Group +EDP Sciences +Elsevier Health +Emap +The European Mathematical Information Service +Equinox Publishing Ltd +Lawrence Erlbaum Associates, Inc. +E. Schweizerbart Science Publishers +Edinburgh University Press +Frank Cass & Co. Ltd +Giuffre Editore +Global Science Books +Carl Hanser Verlag +Harrassowitz Verlag +Heldref Publishing +Hindawi Publishing Corporation +Idea Group Inc +The Institution of Electrical Engineers + imbiomed +Intellect Ltd +Institute of Physics +IOS Press +Internet Scientific Publications LLC +Jane's Information Group +John Hopkins University Press +Editions Lavoisier +Libertas Academica +Mary Ann Liebert +Masson Editeur +Meditsina Publishers +Medknow Publications +Medwell Journals +Edizioni Minerva Medica +MIT Press +il Mulino +Multi-Science Publishing Co Ltd +Nova Science Publishers +Nature Publishing Group +Oldenbourg Wissenschaftsverlag +Palgrave Publishers Ltd. +Peeters Publishers +PennWell Corporation +Penton Media +Presses Universitaires de France +International Academic Publishing Co (Nauka/Interperiodica) +Research Trends +Rodopi Publishers +Royal Society of Chemistry +Sabinet Online +M. E. Sharpe Inc. +Sweet & Maxwell +University of Tehran +Ten Hagen & Stam BV +Thomson American Health Consultants +Tehran University of Medical Sciences Publications +Universitat de Barcelona +University of California Press +University of Chicago Press +Universidad Complutense de Madrid +World Academy of Science, Engineering and Technology +WITPress +World Scientific Publishing Company + +Small Publishers (3 to 29 journals) +American Accounting Association +American Association for the Advancement of Science +American Association for Cancer Research +Association for the Advancement in Combinatorial Sciences +American Association for Health Education +American Association for Laboratory Animal Science +American Association of Neurological Surgeons +American Academy of Pediatrics +American Association of Pharmaceutical Scientists +American Astronomical Society +AB Academic Publishers +American Bar Association +King Abdulaziz University â Scientific Publishing Center +American Counseling Association +Academy Publisher, Finland +America's Community Bankers +Association for Childhood Education International +Australian Council for Educational Research +ACG Publishing +Academy of Chemistry of Globe Publications +American College of Healthcare Executives +American Concrete Institute +Academy of Natural Sciences +Australian Council of Social Service +American College of Physicians +American Congress on Surveying and Mapping +ACTA Press +American Diabetes Association +Addleton Academic Publishers +Adenine Press +Astrophysical Data Service +Advanstar Communications +American Economic Association +AEGIS Publications +Association for Education in Journalism and Mass Communication +American Fisheries Society +American Foundry Society +American Federation of Teachers +Academy of General Dentistry +Guttmacher Institute +Agra Informa Ltd +The American Society of Agronomy +American Geophysical Union +American Institute of Aeronautics and Astronautics +Agricultural Institute of Canada +American Institute for Conservation of Historic and Artistic Works +American Institute of Chemical Engineers +American Institute of Mathematical Sciences +American Institute of Musicology +Academy & Industry Research Collaboration Center ( AIRCC ) +Anthony J. Jannetti, Inc. +Andrew John Publishing Inc +Akademie Verlag +A K Peters Ltd +Verlag Karl Alber +Allied Academies +Allured Publishing Corp +Alpe Editores +American Marketing Association +American Medical Association +American Correctional Association +Amateur Entomologists' Society (AES) +American Geographical Society +American Meterological Society +American Nuclear Society +American Nurses Association +The American Physiological Society +American Mathematical Society +Association for the Advancement of Modelling and Simulation Techniques in Enterprises (AMSE) +American Statistical Association +The Analytic Press (now part of Taylor & Francis) +American Nephrology Nurses' Association +ANSInet +Association of Official Analytical Chemists +The American Oil Chemists' Society +Academy of Management +American Pharmacists Association +American Psychiatric Publishing Inc +Applied Probability Trust +The American Physical Society +The American Phytopathological Society +American Physical Therapy Association +Association for Preservation Technology International (APT) +Thomson Aranzadi +Agricultural Research Communication Centre +Archaeological Institute of America +Africa Resource Center, Inc. +American Real Estate Society +American Risk and Insurance Association +Armand Colin +Arnold Publishers +Array Publications +American Sociological Association +American Society for Agricultural Engineers +The American Society for Biochemistry and Molecular Biology +American Society for Cell Biology +American Society for Engineering Education +American Society for Education Science Research +American Society of Hematology +American Speech-Language-Hearing Association. +Ashdin Publishing +Ashgate Publishing +American Society for Horticultural Science +American Society for Information Science and Technology +American Society of Limnology and Oceanography +American Society for Microbiology +The American Society of Mechanical Engineers +ASM International +American Schools of Oriental Research +American Scientific Publishers +Aspen Publishers +American Society for Pharmacology and Experimental Therapeutics +American Society for Quality +Association for the Advancement of Computing in Education +Association for Science Education +ASTM International +AstonJournals +American Thoracic Society +Amsterdam University Press +Australian Academic Press +Australian Mathematical Society +Air and Waste Management Association +Association of the Scientific Medical Societies in Germany (AWMF) +American Water Works Association +Baird Publications +A.A.Balkema Publishers +Bangladesh Journals Online +Behavior Analysis Online +Bulgarian Academy of Sciences +Bauer Media +Bayard Inc +Bayerische Staatsbibliothek +Baywood Publishing Company +BC Decker +Boston College Law School +Verlag C.H. Beck oHG +Beech Tree Publishing +Begell House Publishing Inc +Editions Belin +Les Belles Lettres +Bellwether Publishing +Berg Publishers +Berghahn Books +University of California, Berkeley, Law School +British Herpetological Society +Business Information Group +Bioinfo Publications +The Company of Biologists Ltd +Biolife +Biochemical Society, London +British Institute of Radiology +Blackhorse Publishing International +Business News Publishing Company (BNP Media) +Boehlau Verlag +BoomSun +Borgis Publishing House +NSW Bureau of Crime Statistics and Research (BOSCAR) +R.R. Bowker +British Psychological Society +Council for British Archaeology +Brookings Institution +Brunton Business Publications +Bruylant +CABI Publishing +College of Aerospace Doctrine, Research and Education, Maxwell Air Force Base, Alabama +Canadian Agricultural Economics Society +Cairn +Cameron May +Canadian Medical Association +Conference Board of Canada +CCH +Canadian Center of Science and Education +CCS Publishing +Canadian Dental Association +Centers for Disease Control and Prevention +California Digital Library +Council for Exceptional Children +CEDAM +Centaur Communications Ltd. +Centro de Estudios Pol +Council for Economic Planning and Development +Centre for Environment, Social & Economic Research (CESER), India +Chadwyck-Healey +CIG Media Group, LP. +Canadian Institute of Mining, Metallurgy and Petroleum +Carden Jennings Publishing +Czech Medical Association +Clute Institute for Academic Research +CMP Media, LLC +Canadian Mathematical Society +Centre National de la Recherche Scientifique +Co-Action Publishing +Cognizant Communication Corporation +Law School, Columbia University +Common Ground Publishing +Continuum +Cooper Ornithological Society +Cornell University +Cornetis Publishing House +Canadian Psychiatric Association +CQ Press +Crain Communications Inc +CRC Press +Centre for Reformation and Renaissance Studies +Cambridge Scientific Abstracts +Canadian Research & Development Center of Sciences and Cultures +CSF Medical Communications Ltd +Cold Spring Harbor Laboratory Press +Chemical Society of Japan +CTSNet +Catholic University of America Press +Cuneiform Digital Library Initiative +Current Medicine Group LLC +Curtin University of Technology +School of Law, Case Western Reserve University +Dalloz +Universitatea Danubius, Romania +Data Trace Publishing +Groupe De Boeck +Editions La Decouverte +Dempa Publications, Inc +Diesel & Gas Turbine Publications +Digital Library and Archives +DMG World Media (UK) Ltd +Dovetail Communications Inc +Duke University School of Law +Duncker & Humblot +Dustri Verlag +Dynamic Publishers +Euro-American Association of Economic Development Studies +Early Music America +Earthscan +Eashwar Publications +The Electrochemical Society +Ecomed Verlagsgesellschaft AG +eContent Management Pty Ltd +Editio Cantor Verlag (ECV) +Editecom (Editions Techniques et Economiques) +Educause +Education Publishing Company Ltd +L'Ecole des Hautes Etudes en Sciences Sociales (EHESS) +Euromoney Institutional Investor Plc +Pharma Publishing & Media Europe (PPM Europe) +Elmer Press +EMC Consulting Group BVBA +E-Med +EMH +Emory University School of Law +Endocrine Society +Society for Endocrinology +Engineers Australia +Engg Journals Publications +The Norwegian Entomological Society +Editions Eres +Erich Schmidt Verlag GmbH +Edizioni Erickson +European Respiratory Society +Ecological Society of America +Executive Sciences Institute +ESKA Editions +L'Esprit du Temps +Eurail Press +European Journals Inc. +Evangelische Verlagsanstalt GmbH +Deutscher Fachverlag +Fairmont Press, Inc. +Food and Agriculture Organization of the United Nations +FBCommunication +Expert Reviews +FDC Reports Inc +FDI World Dental Federation +Giovanni Fioriti Editore +Florida State University College of Law +Firearms Marketing Group Publications +Fordham University School of Law +Freund Publishing House +Fuji Technology Press +Fordham University Press +Future Medicine +Future Science +Editions Gabalda +Gangemi Editore +Gauthier-Villars Editeur +Gentner Verlag +Geographical Association +Institute of Geosciences, University of Debrecen, Hungary +The Geology Society of America +The Geological Society (London) +Institute of Geophysics, Polish Academy of Sciences +Geothermal Resources Council +Georgetown University +The Gerontological Society of America +Garden History Society +Giesel Verlag +Drukkerij Giethoorn Ten Brink +Geodetical Information & Trading Centre +Global Journals +Global Science Press +Greenleaf Publishing +Grosse Verlag +Grupo Aran +GeoScienceWorld +University of Guelph +Guildford Publishing Inc +Hart Energy Publishing +Hatton-Brown Publishers +Haymarket Business Publications Ltd +Hayward Group plc +Harvard Business School Publishing +Healthcare Bulletin +HealthCare Marketplace +Heldermann Verlag +Henry Stewart Publications +Higher Education Press and Springer +Verlag Herder +Carl Heymanns Verlag KG +Hogrefe & Huber Publishers +Human Kinetics Publishers, Inc +Harvard Law School +HMP Communications +Hokkaido University, Japan +Horizon Scientific Press +Verlag Hans Huber +Huss Medien +HVG-DGG +Hamburg Institute of International Economics (HWWA) +HyperSciences Publisher +International and American Association of Dental Research +International Association of Engineers +The Institute for Advanced Interdisciplinary Research +International Agency for Research on Cancer (IARC) +Indian Academy of Sciences +International Bar Association +International Bureau of Fiscal Documentation, The Netherlands +The International Bee Research Association +International Consortium for Alternative Academic Publication +Indian Council of Agricultural Research +Institute of Chartered Financial Analysts of India (ICFAI) University Press +Institution of Chemical Engineers +Indian Council of Medical Research +Indian Dental Association +Institution of Engineers (India) +Industrial Fabrics Association International +Igaku-Shoin, Japan +Igitur - Utrecht Archiving and Publishing Services, Netherlands +Institute of Industrial Engineers +Institute for International Cooperation of the German Adult Education Association +IJENS Publishers +International Labour Organisation +Institute of Marine Engineering, Science and Technology +International Monetary Fund +Institute of Materials, Minerals and Mining +Institute of Mathematics of the Polish Academy of Sciences +Imprint Academic +IM Publications +Institute of Mathematical Statistics +Incisive Media plc +Info Presse +INFO Project, John Hopkins School of Public Health +Informing Science Institute +INFORMS +Institut National de Recherche Pedagogique +Indian National Science Academy +INSInet Publication +Institutional Investor +World Textile Publications Ltd +Intellectbase International Consortium +Intersentia +International Press +Inter-Research +Institute of Pure and Applied Physics +IP Publishing +International Reading Association +The Instrumentation, Systems, and Automation Society +Indian Society for Development and Environment Research +Institute of Systematics and Evolution of Animals, Polish Academy of Sciences +Intercollegiate Studies Institute +The Islamic Society of Scientific Research (ISSR) \ No newline at end of file Added: uima/sandbox/ruta/trunk/example-projects/TextRulerExample/script/uima/ruta/example/Base.ruta URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/example-projects/TextRulerExample/script/uima/ruta/example/Base.ruta?rev=1478378&view=auto ============================================================================== --- uima/sandbox/ruta/trunk/example-projects/TextRulerExample/script/uima/ruta/example/Base.ruta (added) +++ uima/sandbox/ruta/trunk/example-projects/TextRulerExample/script/uima/ruta/example/Base.ruta Thu May 2 14:11:58 2013 @@ -0,0 +1,4 @@ +PACKAGE uima.ruta.example; + +TYPESYSTEM types.Bibtex; +TYPESYSTEM types.Boundaries; Added: uima/sandbox/ruta/trunk/example-projects/TextRulerExample/script/uima/ruta/example/Features.ruta URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/example-projects/TextRulerExample/script/uima/ruta/example/Features.ruta?rev=1478378&view=auto ============================================================================== --- uima/sandbox/ruta/trunk/example-projects/TextRulerExample/script/uima/ruta/example/Features.ruta (added) +++ uima/sandbox/ruta/trunk/example-projects/TextRulerExample/script/uima/ruta/example/Features.ruta Thu May 2 14:11:58 2013 @@ -0,0 +1,85 @@ +PACKAGE uima.ruta.example; + +SCRIPT uima.ruta.example.Base; + +ENGINE utils.PlainTextAnnotator; +TYPESYSTEM utils.PlainTextTypeSystem; + +WORDLIST EditorMarkerList = 'EditorMarker.txt'; +WORDLIST EnglishStopWordList = 'EnglishStopWords.txt'; +WORDLIST FirstNameList = 'FirstNames.txt'; +WORDLIST JournalVolumeMarkerList = 'JournalVolumeMarker.txt'; +WORDLIST MonthList = 'Months.txt'; +WORDLIST PagesMarkerList = 'PagesMarker.txt'; +WORDLIST PublisherList = 'Publishers.txt'; + +DECLARE EditorMarker, EnglishStopWord, FirstName, JournalVolumeMarker, Month, PagesMarker, PublisherInd; +Document{ -> MARKFAST(EditorMarker, EditorMarkerList)}; +Document{ -> MARKFAST(EnglishStopWord,EnglishStopWordList)}; +Document{ -> MARKFAST(FirstName, FirstNameList)}; +Document{ -> MARKFAST(JournalVolumeMarker, JournalVolumeMarkerList)}; +Document{ -> MARKFAST(Month, MonthList)}; +Document{ -> MARKFAST(PagesMarker, PagesMarkerList)}; +Document{ -> MARKFAST(PublisherInd, PublisherList)}; + + +DECLARE Reference; +Document{-> EXEC(PlainTextAnnotator, {Line})}; +Document{-> RETAINTYPE(SPACE, BREAK)}; +Line{-REGEXP("CORA:.*") -> MARK(Reference)}; +Reference{-> TRIM(SPACE, BREAK)}; +Document{-> RETAINTYPE}; + +DECLARE LParen, RParen; +SPECIAL{REGEXP("[(]") -> MARK(LParen)}; +SPECIAL{REGEXP("[)]") -> MARK(RParen)}; + +DECLARE YearInd; +NUM{REGEXP("19..|20..") -> MARK(YearInd, 1, 2)} SW?{REGEXP("a|b|c|d", true)}; +Document{-> RETAINTYPE(SPACE)}; +CAP YearInd{-> UNMARK(YearInd)}; +Document{-> RETAINTYPE}; + + +DECLARE NameLinker; +W{-PARTOF(NameLinker), REGEXP("and", true) -> MARK(NameLinker)}; +COMMA{-PARTOF(NameLinker) -> MARK(NameLinker)}; +SEMICOLON{-PARTOF(NameLinker) -> MARK(NameLinker)}; +SPECIAL{-PARTOF(NameLinker), REGEXP("&") -> MARK(NameLinker)}; + +DECLARE FirstNameInd, FirstNameInitial, SingleChar; +CW{-PARTOF(FirstNameInitial), REGEXP(".")} SPECIAL{-PARTOF(FirstNameInitial), REGEXP("-")} CW{REGEXP(".") -> MARK(FirstNameInitial,1,2,3,4)} PERIOD; +SPECIAL{-PARTOF(FirstNameInitial), REGEXP("-")} CW{REGEXP(".") -> MARK(FirstNameInitial,1,2,3)} PERIOD; +CW{-PARTOF(FirstNameInitial), REGEXP(".") -> MARK(FirstNameInitial,1,2)} PERIOD; +CW{-PARTOF(FirstNameInitial), REGEXP(".") -> MARK(FirstNameInitial)} COMMA; +CW{-PARTOF(FirstNameInitial), REGEXP(".") -> MARK(SingleChar)}; + +DECLARE Quote, QuotedStuff; +SPECIAL[1,2]{REGEXP("[\"'´`âââ]"), -PARTOF(Quote) -> MARK(Quote)}; +Document{-> RETAINTYPE(SPACE)}; +W Quote{-> UNMARK(Quote)} W; +Document{-> RETAINTYPE}; +BLOCK(InRef) Reference{}{ + Quote ANY+{-PARTOF(Quote) -> MARK(QuotedStuff, 1, 2, 3)} Quote; +} + +DECLARE InInd; +W{REGEXP("In", true)-> MARK(InInd)}; + +DECLARE FirstToken, LastToken; +BLOCK(InRef) Reference{}{ + ANY{POSITION(Reference,1) -> MARK(FirstToken)}; + Document{-> MARKLAST(LastToken)}; +} + + +DECLARE NumPeriod, NumComma, NumColon; +Document{-> RETAINTYPE(SPACE, BREAK)}; +NUM PERIOD{-> MARKONCE(NumPeriod)} NUM; +NUM COMMA{-> MARKONCE(NumComma)} NUM; +NUM COLON{-> MARKONCE(NumColon)} NUM; +Document{-> RETAINTYPE}; +DECLARE PeriodSep, CommaSep, ColonSep; +PERIOD{-PARTOF(FirstNameInitial), -PARTOF(NumPeriod), -PARTOF(FirstToken) -> MARKONCE (PeriodSep)}; +COMMA{-PARTOF(FirstNameInitial), -PARTOF(NumComma), -PARTOF(FirstToken) -> MARKONCE (CommaSep)}; +COLON{-PARTOF(FirstNameInitial), -PARTOF(NumColon), -PARTOF(FirstToken) -> MARKONCE (ColonSep)}; Added: uima/sandbox/ruta/trunk/example-projects/TextRulerExample/script/uima/ruta/example/kep/KEP_Base_Single.ruta URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/example-projects/TextRulerExample/script/uima/ruta/example/kep/KEP_Base_Single.ruta?rev=1478378&view=auto ============================================================================== --- uima/sandbox/ruta/trunk/example-projects/TextRulerExample/script/uima/ruta/example/kep/KEP_Base_Single.ruta (added) +++ uima/sandbox/ruta/trunk/example-projects/TextRulerExample/script/uima/ruta/example/kep/KEP_Base_Single.ruta Thu May 2 14:11:58 2013 @@ -0,0 +1,186 @@ +PACKAGE uima.textmarker.example.kep; + +TYPESYSTEM uima.textmarker.example.BaseTypeSystem; + +Document{->FILTERTYPE(SPACE, BREAK, MARKUP, NBSP)}; + +BLOCK(DocumentAnnotation) DocumentAnnotation{} { +// uima.textmarker.example.Author RULES + PERIOD CW[1,2]?{->MARKONCE(Author, 2, 11)} COMMA CW PERIOD COMMA SPECIAL CW COMMA CW PERIOD SPECIAL; // p=5; n=0 + PERIOD CW{->MARKONCE(Author, 2, 5)} COMMA CW[1,2]? PERIOD SPECIAL; // p=4; n=0 + PERIOD CW{->MARKONCE(Author, 2, 7)} COMMA CW PERIOD CW PERIOD SPECIAL; // p=2; n=0 + PERIOD CW{->MARKONCE(Author, 2, 13)} COMMA CW PERIOD CW PERIOD COMMA SPECIAL CW COMMA CW PERIOD SPECIAL; // p=2; n=0 + CW{->MARKONCE(Author, 1, 20)} COMMA CW PERIOD COMMA CW COMMA CW PERIOD COMMA CW COMMA CW PERIOD COMMA SPECIAL CW COMMA CW PERIOD; // p=1; n=0 + CW{->MARKONCE(Author, 1, 21)} COMMA CW PERIOD CW PERIOD COMMA CW COMMA CW PERIOD CW PERIOD COMMA SPECIAL CW COMMA CW PERIOD CW PERIOD; // p=1; n=0 + CW{->MARKONCE(Author, 1, 19)} COMMA CW PERIOD CW PERIOD COMMA CW COMMA CW PERIOD CW PERIOD COMMA SPECIAL CW COMMA CW PERIOD SPECIAL; // p=1; n=0 + PERIOD CW{->MARKONCE(Author, 2, 16)} COMMA CW PERIOD COMMA CW COMMA CW PERIOD COMMA SPECIAL CW COMMA CW PERIOD SPECIAL; // p=1; n=0 + PERIOD CW[1,2]?{->MARKONCE(Author, 2, 9)} COMMA CW PERIOD CW PERIOD CW PERIOD; // p=1; n=0 + COLON CW{->MARKONCE(Author, 2, 15)} COMMA CW PERIOD CW PERIOD COMMA SPECIAL CW[1,2]? COMMA CW PERIOD CW PERIOD SPECIAL; // p=1; n=0 + PERIOD CW{->MARKONCE(Author, 2, 15)} COMMA CW PERIOD CW PERIOD COMMA SPECIAL CW[1,2]? COMMA CW PERIOD CW PERIOD SPECIAL; // p=1; n=0 + +// uima.textmarker.example.AuthorSTART RULES + PERIOD CW{->MARKONCE(AuthorSTART)}; // p=19; n=54 + NUM PERIOD CW{->MARKONCE(AuthorSTART)}; // p=14; n=0 + COLON CW{->MARKONCE(AuthorSTART)} COMMA; // p=1; n=0 + +// uima.textmarker.example.AuthorEND RULES + PERIOD{->MARKONCE(AuthorEND)} SPECIAL; // p=20; n=0 + +// uima.textmarker.example.Date RULES + SPECIAL{->MARKONCE(Date, 1, 4)} NUM SPECIAL PERIOD; // p=19; n=0 + SPECIAL{->MARKONCE(Date, 1, 5)} NUM SW SPECIAL PERIOD; // p=1; n=0 + +// uima.textmarker.example.DateSTART RULES + AuthorEND SPECIAL{->MARKONCE(DateSTART)} NUM; // p=20; n=0 + +// uima.textmarker.example.DateEND RULES + SPECIAL PERIOD{->MARKONCE(DateEND)} CW; // p=20; n=0 + +// uima.textmarker.example.Pages RULES + NUM{->MARKONCE(Pages, 1, 4)} SPECIAL NUM PERIOD; // p=15; n=0 + +// uima.textmarker.example.PagesSTART RULES + COMMA NUM{->MARKONCE(PagesSTART)} SPECIAL NUM PERIOD; // p=15; n=0 + +// uima.textmarker.example.PagesEND RULES + NUM PERIOD{->MARKONCE(PagesEND)}; // p=15; n=0 + +// uima.textmarker.example.Publisher RULES + CW{->MARKONCE(Publisher, 1, 4)} SPECIAL CW COMMA; // p=1; n=0 + PERIOD CW{->MARKONCE(Publisher, 2, 5)} SW CW[1,2]? COMMA CW COMMA; // p=1; n=0 + CW[1,2]? PERIOD CW{->MARKONCE(Publisher, 3, 4)} COMMA CW[1,2]? PERIOD Author; // p=1; n=0 + +// uima.textmarker.example.PublisherSTART RULES + CW PERIOD CW{->MARKONCE(PublisherSTART)}; // p=3; n=20 + PERIOD CW{->MARKONCE(PublisherSTART)} SPECIAL; // p=1; n=0 + CW PERIOD CW{->MARKONCE(PublisherSTART)} SW CW; // p=1; n=0 + +// uima.textmarker.example.PublisherEND RULES + CW COMMA{->MARKONCE(PublisherEND)} CW; // p=3; n=42 + CW[1,2]? COMMA{->MARKONCE(PublisherEND)} CW COMMA CW; // p=1; n=0 + +// uima.textmarker.example.Institution RULES + CW{->MARKONCE(Institution, 1, 9)} SW CW COMMA CW PERIOD SW CW PERIOD; // p=1; n=0 + +// uima.textmarker.example.InstitutionSTART RULES + COMMA CW{->MARKONCE(InstitutionSTART)} SW; // p=1; n=0 + +// uima.textmarker.example.InstitutionEND RULES + SW CW PERIOD{->MARKONCE(InstitutionEND)} Author Date; // p=1; n=0 + +// uima.textmarker.example.Volume RULES + COMMA NUM{->MARKONCE(Volume, 2, 3)} COMMA PagesSTART; // p=8; n=0 + NUM{->MARKONCE(Volume, 1, 5)} SPECIAL NUM SPECIAL COMMA; // p=5; n=0 + NUM{->MARKONCE(Volume, 1, 9)} SPECIAL NUM COMMA NUM COMMA NUM SPECIAL COMMA; // p=1; n=0 + +// uima.textmarker.example.VolumeSTART RULES + COMMA NUM{->MARKONCE(VolumeSTART)}; // p=14; n=17 + CW COMMA NUM{->MARKONCE(VolumeSTART)}; // p=11; n=0 + +// uima.textmarker.example.VolumeEND RULES + COMMA{->MARKONCE(VolumeEND)} Pages; // p=14; n=1 + SPECIAL COMMA{->MARKONCE(VolumeEND)} Pages; // p=6; n=0 +} +BLOCK(DocumentAnnotation) DocumentAnnotation{} { +// uima.textmarker.example.Title RULES + PERIOD CW{->MARKONCE(Title, 2, 4)} SW[1,9]? PERIOD CW; // p=10; n=0 + CW{->MARKONCE(Title, 1, 5)} SW[1,5]? SPECIAL SW[1,5]? PERIOD; // p=2; n=0 + Date CW[1,2]?{->MARKONCE(Title, 2, 5)} SW[1,2]? CW[1,3]? PERIOD CW; // p=2; n=0 + CW{->MARKONCE(Title, 1, 7)} SW[1,2]? CW SPECIAL CW SW PERIOD; // p=1; n=0 + CW{->MARKONCE(Title, 1, 6)} SW[1,3]? NUM SPECIAL SW[1,3]? PERIOD; // p=1; n=0 + CW{->MARKONCE(Title, 1, 5)} SW[1,6]? CAP SW PERIOD; // p=1; n=0 + CW{->MARKONCE(Title, 1, 7)} COMMA CW COMMA SW CW PERIOD; // p=1; n=0 + CW{->MARKONCE(Title, 1, 8)} SW[1,8]? COLON CW SW[1,2]? CW SW PERIOD; // p=1; n=0 + DateEND CW[1,3]?{->MARKONCE(Title, 2, 3)} PERIOD Publisher; // p=1; n=0 + +// uima.textmarker.example.TitleSTART RULES + Date CW{->MARKONCE(TitleSTART)}; // p=20; n=0 + +// uima.textmarker.example.TitleEND RULES + PERIOD{->MARKONCE(TitleEND)} CW; // p=20; n=53 + SW PERIOD{->MARKONCE(TitleEND)} CW; // p=16; n=0 + PERIOD{->MARKONCE(TitleEND)} Publisher; // p=3; n=0 + +// uima.textmarker.example.Booktitle RULES + CW{->MARKONCE(Booktitle, 1, 6)} SW CAP SPECIAL NUM COMMA; // p=1; n=0 + +// uima.textmarker.example.BooktitleSTART RULES + Title CW{->MARKONCE(BooktitleSTART)} SW CAP; // p=1; n=0 + +// uima.textmarker.example.BooktitleEND RULES + SPECIAL NUM COMMA{->MARKONCE(BooktitleEND)} PagesSTART SPECIAL; // p=1; n=0 +} +BLOCK(DocumentAnnotation) DocumentAnnotation{} { +// uima.textmarker.example.Journal RULES + PERIOD CW[1,3]?{->MARKONCE(Journal, 2, 3)} COMMA Volume; // p=7; n=0 + CW{->MARKONCE(Journal, 1, 4)} SW[1,2]? CW[1,3]? COMMA NUM; // p=4; n=0 + CW{->MARKONCE(Journal, 1, 4)} SW[1,2]? CAP COMMA; // p=1; n=0 + CW{->MARKONCE(Journal, 1, 8)} SW CW[1,2]? COMMA CW PERIOD CAP COMMA; // p=1; n=0 + Title CW{->MARKONCE(Journal, 2, 5)} PERIOD CAP COMMA; // p=1; n=0 + +// uima.textmarker.example.JournalSTART RULES + Title CW{->MARKONCE(JournalSTART)}; // p=14; n=6 + Title CW{->MARKONCE(JournalSTART)} CW; // p=6; n=0 + +// uima.textmarker.example.JournalEND RULES + COMMA{->MARKONCE(JournalEND)} Volume; // p=14; n=0 + +// uima.textmarker.example.Location RULES + Publisher CW[1,2]?{->MARKONCE(Location, 2, 3)} PERIOD Author; // p=2; n=0 + CW{->MARKONCE(Location, 1, 4)} COMMA CW PERIOD Author; // p=1; n=0 + +// uima.textmarker.example.LocationSTART RULES + Publisher CW{->MARKONCE(LocationSTART)}; // p=3; n=0 + +// uima.textmarker.example.LocationEND RULES + COMMA CW[1,2]? PERIOD{->MARKONCE(LocationEND)} Author DateSTART; // p=3; n=0 + +// uima.textmarker.example.Tech RULES + CW{->MARKONCE(Tech, 1, 5)} PERIOD SW PERIOD COMMA; // p=1; n=0 + +// uima.textmarker.example.TechSTART RULES + Title CW{->MARKONCE(TechSTART)} PERIOD SW; // p=1; n=0 + +// uima.textmarker.example.TechEND RULES + COMMA{->MARKONCE(TechEND)} InstitutionSTART; // p=1; n=0 +} + + // BOUNDARY CORRECTION RULES: + AuthorSTART{OR(CONTAINS(Title), CONTAINS(TitleSTART), CONTAINS(Journal), CONTAINS(Publisher), CONTAINS(Booktitle), CONTAINS(BooktitleSTART), CONTAINS(Tech), CONTAINS(TechSTART))->UNMARK(AuthorSTART)}; // p=0; n=39 +PublisherSTART{OR(CONTAINS(Author), CONTAINS(Booktitle), CONTAINS(BooktitleSTART))->UNMARK(PublisherSTART)}; // p=0; n=5 +VolumeSTART{OR(CONTAINS(Pages), CONTAINS(PagesSTART))->UNMARK(VolumeSTART)}; // p=0; n=15 +VolumeEND{CONTAINS(BooktitleEND)->UNMARK(VolumeEND)}; // p=0; n=1 +TitleEND{OR(CONTAINS(DateEND), CONTAINS(PagesEND), CONTAINS(LocationEND), CONTAINS(InstitutionEND))->UNMARK(TitleEND)}; // p=0; n=38 +JournalSTART{OR(CONTAINS(Author), CONTAINS(Publisher), CONTAINS(Booktitle), CONTAINS(BooktitleSTART), CONTAINS(Tech), CONTAINS(TechSTART))->UNMARK(JournalSTART)}; // p=0; n=6 + + // CONNECTORS: + BLOCK(DocumentAnnotationConnectors) DocumentAnnotation{} { + AuthorSTART{->MARKONCE(Author,1,3)} ANY*? AuthorEND; + AuthorSTART{IS(AuthorEND)->MARKONCE(Author)} ; + DateSTART{->MARKONCE(Date,1,3)} ANY*? DateEND; + DateSTART{IS(DateEND)->MARKONCE(Date)} ; + PagesSTART{->MARKONCE(Pages,1,3)} ANY*? PagesEND; + PagesSTART{IS(PagesEND)->MARKONCE(Pages)} ; + PublisherSTART{->MARKONCE(Publisher,1,3)} ANY*? PublisherEND; + PublisherSTART{IS(PublisherEND)->MARKONCE(Publisher)} ; + InstitutionSTART{->MARKONCE(Institution,1,3)} ANY*? InstitutionEND; + InstitutionSTART{IS(InstitutionEND)->MARKONCE(Institution)} ; + VolumeSTART{->MARKONCE(Volume,1,3)} ANY*? VolumeEND; + VolumeSTART{IS(VolumeEND)->MARKONCE(Volume)} ; + TitleSTART{->MARKONCE(Title,1,3)} ANY*? TitleEND; + TitleSTART{IS(TitleEND)->MARKONCE(Title)} ; + BooktitleSTART{->MARKONCE(Booktitle,1,3)} ANY*? BooktitleEND; + BooktitleSTART{IS(BooktitleEND)->MARKONCE(Booktitle)} ; + JournalSTART{->MARKONCE(Journal,1,3)} ANY*? JournalEND; + JournalSTART{IS(JournalEND)->MARKONCE(Journal)} ; + LocationSTART{->MARKONCE(Location,1,3)} ANY*? LocationEND; + LocationSTART{IS(LocationEND)->MARKONCE(Location)} ; + TechSTART{->MARKONCE(Tech,1,3)} ANY*? TechEND; + TechSTART{IS(TechEND)->MARKONCE(Tech)} ; +} +EditorSTART{->MARKONCE(Editor,1,3)} ANY*? EditorEND; +EditorSTART{IS(EditorEND)->MARKONCE(Editor)} ; +NoteSTART{->MARKONCE(Note,1,3)} ANY*? NoteEND; +NoteSTART{IS(NoteEND)->MARKONCE(Note)} ; + + // CORRECTION RULES: \ No newline at end of file