001/* 002 * Copyright © 2012, 2013, 2014 Royal Botanic Gardens, Kew. 003 * 004 * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 005 * 006 * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 007 * 008 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 009 */ 010package org.kew.rmf.transformers.collations.ipni; 011 012import java.io.BufferedReader; 013import java.io.BufferedWriter; 014import java.io.FileInputStream; 015import java.io.FileWriter; 016import java.io.InputStreamReader; 017import java.util.Arrays; 018 019import org.kew.rmf.transformers.collations.CollationStructureTransformer; 020 021public class CollationUtils { 022 023 public static int SERIES_INDEX = 0; 024 public static int VOL_INDEX = 1; 025 public static int ISSUE_INDEX = 2; 026 public static int PAGE_INDEX = 3; 027 public static int TAB_OR_FIG_INDEX = 4; 028 public static int YEAR_INDEX = 5; 029 public static int RULE_INDEX = 6; 030 031 private static String[] NOTES_PATTERNS={", a.", ", a. a.", ", a a.", ", a a a.", ", a 'a'"}; 032 033 // This pair of methods provides a way to match collation patterns which have notes, such as ', in obs.' 034 // at the end, without always explicitly stating these in each test below. 035 // It has not been used in all the code in the parseCollation method but would make this much more succinct if done so. 036 037 //private static boolean patternMatchesWithNotes(String suppliedPattern, String testPattern){ 038 // return patternMatchesWithNotes(suppliedPattern, testPattern, false); 039 //} 040 private static boolean patternMatchesWithNotes(String suppliedPattern, String testPattern, boolean normaliseSpace){ 041 boolean matches = false; 042 for (String suffix :NOTES_PATTERNS){ 043 if (normaliseSpace){ 044 matches = suppliedPattern.replaceAll(" ", "").equals((testPattern+suffix).replaceAll(" ", "")); 045 } 046 else{ 047 matches = suppliedPattern.equals(testPattern+suffix); 048 } 049 if (matches) break; 050 } 051 // if the submitted pattern matches the test pattern with the addition of notes flags, then return true: 052 return matches; 053 } 054 055 private static String[] splitCollation(String collation){ 056 return CollationStructureTransformer.splitCollation(collation); 057 } 058 059 public static String assessCollationStructure(String collation){ 060 return CollationStructureTransformer.assessCollationStructure(collation); 061 } 062 063 public static boolean parsableCollation(String collation){ 064 return !Arrays.toString(parseCollation(collation)).equals("[, , , , , , ]"); 065 } 066 067 public static String[] parseCollation(String collation){ 068 069 String pattern = assessCollationStructure(collation); 070 String series=""; 071 String volume=""; 072 String issue=""; 073 String page=""; 074 String tab_or_fig_or_no=""; 075 String year=""; 076 String rule=""; 077 078 boolean parsed = false; 079 080 // Just pages, maybe followed by in obs etc 081 if (pattern.equals("d") 082 || pattern.equals("d.") 083 || pattern.equals("d, a.") 084 || pattern.equals("d, a a.") 085 || pattern.equals("d, a. a.") 086 || pattern.equals("d, a a a.") 087 || pattern.equals("d, a 'a'") 088 || pattern.equals("d (-d)") 089 || pattern.equals("yyyy") 090 || pattern.equals("yyyy.") 091 || pattern.equals("yyyy, a.") 092 || pattern.equals("yyyy, a a.") 093 || pattern.equals("yyyy, a. a.") 094 || pattern.equals("yyyy, a a a.") 095 || pattern.equals("yyyy, a 'a'") 096 || pattern.equals("r") 097 || pattern.equals("r.") 098 || pattern.equals("r, a.") 099 || pattern.equals("r, a a.") 100 || pattern.equals("r, a. a.") 101 || pattern.equals("r, a a a.") 102 || pattern.equals("r, a 'a'")){ 103 //Example: "88" 104 //Series: "" 105 series = ""; 106 //Volume: "" 107 volume = ""; 108 //Issue: "" 109 issue = ""; 110 //Page: "88" 111 page = splitCollation(collation)[0]; 112 //Tab/Fig/No: "" 113 tab_or_fig_or_no = ""; 114 //Year: "" 115 year = ""; 116 rule = "1"; 117 parsed = true; 118 } 119 // Things that are vol: page, possibly followed by in obs / nom nov etc 120 if (!parsed && (pattern.replaceAll(" ","").equals("d:d") 121 ||pattern.replaceAll(" ","").equals("d:d.") 122 ||pattern.replaceAll(" ","").equals("d:d,") 123 ||pattern.replaceAll(" ","").equals("d:d,a") 124 ||pattern.replaceAll(" ","").equals("d:d.a.") 125 ||pattern.replaceAll(" ","").equals("d:d.aa.") 126 ||pattern.replaceAll(" ","").equals("d:d.a.a.") 127 ||pattern.replaceAll(" ","").equals("d:d.aaa.") 128 ||pattern.replaceAll(" ","").equals("d:d.a'a'") 129 ||pattern.replaceAll(" ","").equals("d:d,a.") 130 ||pattern.replaceAll(" ","").equals("d:d,aa.") 131 ||pattern.replaceAll(" ","").equals("d:d,a.a.") 132 ||pattern.replaceAll(" ","").equals("d:d,aaa.") 133 ||pattern.replaceAll(" ","").equals("d:d,a'a'") 134 ||pattern.replaceAll(" ","").equals("d:yyyy") 135 ||pattern.replaceAll(" ","").equals("d:yyyy.") 136 ||pattern.replaceAll(" ","").equals("d:yyyy,") 137 ||pattern.replaceAll(" ","").equals("d:yyyy,a") 138 ||pattern.replaceAll(" ","").equals("d:yyyy.a.") 139 ||pattern.replaceAll(" ","").equals("d:yyyy.aa.") 140 ||pattern.replaceAll(" ","").equals("d:yyyy.a.a.") 141 ||pattern.replaceAll(" ","").equals("d:yyyy.aaa.") 142 ||pattern.replaceAll(" ","").equals("d:yyyy.a'a'") 143 ||pattern.replaceAll(" ","").equals("d:yyyy,a.") 144 ||pattern.replaceAll(" ","").equals("d:yyyy,aa.") 145 ||pattern.replaceAll(" ","").equals("d:yyyy,a.a.") 146 ||pattern.replaceAll(" ","").equals("d:yyyy,aaa.") 147 ||pattern.replaceAll(" ","").equals("d:yyyy,a'a'") 148 // 149 ||pattern.replaceAll(" ","").equals("yyyy-d:d") 150 // As above, but with roman numeral volume: 151 ||pattern.replaceAll(" ","").equals("r:d") 152 ||pattern.replaceAll(" ","").equals("r:d.") 153 ||pattern.replaceAll(" ","").equals("r:d,") 154 ||pattern.replaceAll(" ","").equals("r:d,a") 155 ||pattern.replaceAll(" ","").equals("r:d.a.") 156 ||pattern.replaceAll(" ","").equals("r:d.aa.") 157 ||pattern.replaceAll(" ","").equals("r:d.a.a.") 158 ||pattern.replaceAll(" ","").equals("r:d.aaa.") 159 ||pattern.replaceAll(" ","").equals("r:d.a'a'") 160 ||pattern.replaceAll(" ","").equals("r:d,a.") 161 ||pattern.replaceAll(" ","").equals("r:d,aa.") 162 ||pattern.replaceAll(" ","").equals("r:d,a.a.") 163 ||pattern.replaceAll(" ","").equals("r:d,aaa.") 164 ||pattern.replaceAll(" ","").equals("r:d,a'a'") 165 ||pattern.replaceAll(" ","").equals("r:yyyy") 166 ||pattern.replaceAll(" ","").equals("r:yyyy.") 167 ||pattern.replaceAll(" ","").equals("r:yyyy,") 168 ||pattern.replaceAll(" ","").equals("r:yyyy,a") 169 ||pattern.replaceAll(" ","").equals("r:yyyy.a.") 170 ||pattern.replaceAll(" ","").equals("r:yyyy.aa.") 171 ||pattern.replaceAll(" ","").equals("r:yyyy.a.a.") 172 ||pattern.replaceAll(" ","").equals("r:yyyy.aaa.") 173 ||pattern.replaceAll(" ","").equals("r:yyyy.a'a'") 174 ||pattern.replaceAll(" ","").equals("r:yyyy,a.") 175 ||pattern.replaceAll(" ","").equals("r:yyyy,aa.") 176 ||pattern.replaceAll(" ","").equals("r:yyyy,a.a.") 177 ||pattern.replaceAll(" ","").equals("r:yyyy,aaa.") 178 ||pattern.replaceAll(" ","").equals("r:yyyy,a'a'") 179 // As above but with 4 digit volume 180 ||pattern.replaceAll(" ","").equals("yyyy:d") 181 ||pattern.replaceAll(" ","").equals("yyyy:d.") 182 ||pattern.replaceAll(" ","").equals("yyyy:d,") 183 ||pattern.replaceAll(" ","").equals("yyyy:d,a") 184 ||pattern.replaceAll(" ","").equals("yyyy:d.a.") 185 ||pattern.replaceAll(" ","").equals("yyyy:d.aa.") 186 ||pattern.replaceAll(" ","").equals("yyyy:d.a.a.") 187 ||pattern.replaceAll(" ","").equals("yyyy:d.aaa.") 188 ||pattern.replaceAll(" ","").equals("yyyy:d.a'a'") 189 ||pattern.replaceAll(" ","").equals("yyyy:d,a.") 190 ||pattern.replaceAll(" ","").equals("yyyy:d,aa.") 191 ||pattern.replaceAll(" ","").equals("yyyy:d,a.a.") 192 ||pattern.replaceAll(" ","").equals("yyyy:d,aaa.") 193 ||pattern.replaceAll(" ","").equals("yyyy:d,a'a'") 194 ||pattern.replaceAll(" ","").equals("yyyy:yyyy") 195 ||pattern.replaceAll(" ","").equals("yyyy:yyyy.") 196 ||pattern.replaceAll(" ","").equals("yyyy:yyyy,") 197 ||pattern.replaceAll(" ","").equals("yyyy:yyyy,a") 198 ||pattern.replaceAll(" ","").equals("yyyy:yyyy.a.") 199 ||pattern.replaceAll(" ","").equals("yyyy:yyyy.aa.") 200 ||pattern.replaceAll(" ","").equals("yyyy:yyyy.a.a.") 201 ||pattern.replaceAll(" ","").equals("yyyy:yyyy.aaa.") 202 ||pattern.replaceAll(" ","").equals("yyyy:yyyy.a'a'") 203 ||pattern.replaceAll(" ","").equals("yyyy:yyyy,a.") 204 ||pattern.replaceAll(" ","").equals("yyyy:yyyy,aa.") 205 ||pattern.replaceAll(" ","").equals("yyyy:yyyy,a.a.") 206 ||pattern.replaceAll(" ","").equals("yyyy:yyyy,aaa.") 207 ||pattern.replaceAll(" ","").equals("yyyy:yyyy,a'a'") 208 )){ 209 //Example: "1: 1" 210 String[] c = splitCollation(collation.replaceAll(" ", "")); 211 //Series: "" 212 series = ""; 213 //Volume: "1" 214 volume = c[0]; 215 //Issue: "" 216 issue = ""; 217 //Page: "1" 218 page = c[1]; 219 //Tab/Fig/No: "" 220 tab_or_fig_or_no = ""; 221 //Year: "" 222 year = ""; 223 rule = "3"; 224 parsed = true; 225 } 226 // Things that are vol (issue): page, possibly followed by "in obs." / "nom. nov." / "sphalm." / "figs." / "without basionym ref." etc 227 if (!parsed && 228 (pattern.replaceAll(" ","").equals("d(d):d") 229 ||pattern.replaceAll(" ","").equals("d(d):d,") 230 ||pattern.replaceAll(" ","").equals("d(d):d.") 231 ||pattern.replaceAll(" ","").equals("d(d):daa.") 232 ||pattern.replaceAll(" ","").equals("d(d):d,aa.") 233 ||pattern.replaceAll(" ","").equals("d(d):d.aa.") 234 ||pattern.replaceAll(" ","").equals("d(d):d,a.a.") 235 ||pattern.replaceAll(" ","").equals("d(d):d.a.a.") 236 ||pattern.replaceAll(" ","").equals("d(d):d,a.") 237 ||pattern.replaceAll(" ","").equals("d(d):d.a.") 238 ||pattern.replaceAll(" ","").equals("d(d):daaa.") 239 ||pattern.replaceAll(" ","").equals("d(d):d,aaa.") 240 ||pattern.replaceAll(" ","").equals("d(d):d.aaa.") 241 ||pattern.replaceAll(" ","").equals("d(d):yyyy") 242 ||pattern.replaceAll(" ","").equals("d(d):yyyy,") 243 ||pattern.replaceAll(" ","").equals("d(d):yyyy.") 244 ||pattern.replaceAll(" ","").equals("d(d):yyyy,aa.") 245 ||pattern.replaceAll(" ","").equals("d(d):yyyy.aa.") 246 ||pattern.replaceAll(" ","").equals("d(d):yyyy,a.a.") 247 ||pattern.replaceAll(" ","").equals("d(d):yyyy.a.a.") 248 ||pattern.replaceAll(" ","").equals("d(d):yyyy,a.") 249 ||pattern.replaceAll(" ","").equals("d(d):yyyy.a.") 250 ||pattern.replaceAll(" ","").equals("d(d):yyyy,aaa.") 251 ||pattern.replaceAll(" ","").equals("d(d):yyyy.aaa.") 252 // Prev handledunder rule 7 253 ||pattern.replaceAll(" ","").equals("d[d]:d") 254 ||pattern.replaceAll(" ","").equals("d[d]:yyyy") 255 // Prev handled under rule #39 256 ||pattern.replaceAll(" ","").equals("d(d):d(-d)") 257 ||pattern.replaceAll(" ","").equals("d(d):d(-d)a") 258 ||pattern.replaceAll(" ","").equals("d(d):d(-d),a") 259 ||pattern.replaceAll(" ","").equals("d(d):d(-d).a") 260 ||pattern.replaceAll(" ","").equals("d(d):d(-d)aaa.") 261 ||pattern.replaceAll(" ","").equals("d(d):d(-d),aaa.") 262 ||pattern.replaceAll(" ","").equals("d(d):d(-d).aaa.") 263 ||pattern.replaceAll(" ","").equals("d(d):yyyy(-d)") 264 ||pattern.replaceAll(" ","").equals("d(d):yyyy(-d)a") 265 ||pattern.replaceAll(" ","").equals("d(d):yyyy(-d),a") 266 ||pattern.replaceAll(" ","").equals("d(d):yyyy(-d).a") 267 ||pattern.replaceAll(" ","").equals("d(d):yyyy(-d)aaa.") 268 ||pattern.replaceAll(" ","").equals("d(d):yyyy(-d),aaa.") 269 ||pattern.replaceAll(" ","").equals("d(d):yyyy(-d).aaa.") 270 // Roman numeral volume: 271 ||pattern.replaceAll(" ","").equals("r(d):d") 272 ||pattern.replaceAll(" ","").equals("r(d):d(-d)") 273 ||pattern.replaceAll(" ","").equals("r(d):d(-d)a") 274 ||pattern.replaceAll(" ","").equals("r(d):d(-d),a") 275 ||pattern.replaceAll(" ","").equals("r(d):d(-d).a") 276 ||pattern.replaceAll(" ","").equals("r(d):d(-d)aaa.") 277 ||pattern.replaceAll(" ","").equals("r(d):d(-d),aaa.") 278 ||pattern.replaceAll(" ","").equals("r(d):d(-d).aaa.") 279 ||pattern.replaceAll(" ","").equals("r(d):yyyy(-d)") 280 ||pattern.replaceAll(" ","").equals("r(d):yyyy(-d)a") 281 ||pattern.replaceAll(" ","").equals("r(d):yyyy(-d),a") 282 ||pattern.replaceAll(" ","").equals("r(d):yyyy(-d).a") 283 ||pattern.replaceAll(" ","").equals("r(d):yyyy(-d)aaa.") 284 ||pattern.replaceAll(" ","").equals("r(d):yyyy(-d),aaa.") 285 ||pattern.replaceAll(" ","").equals("r(d):yyyy(-d).aaa.") 286 // Prev handled under rule #57.7 287 ||pattern.replaceAll(" ","").equals("yyyy(d):d") 288 ||pattern.replaceAll(" ","").equals("yyyy(d):d,") 289 ||pattern.replaceAll(" ","").equals("yyyy(d):d.") 290 ||pattern.replaceAll(" ","").equals("yyyy(d):d,aa.") 291 ||pattern.replaceAll(" ","").equals("yyyy(d):d.aa.") 292 ||pattern.replaceAll(" ","").equals("yyyy(d):d,a.a.") 293 ||pattern.replaceAll(" ","").equals("yyyy(d):d.a.a.") 294 ||pattern.replaceAll(" ","").equals("yyyy(d):da.") 295 ||pattern.replaceAll(" ","").equals("yyyy(d):d,a.") 296 ||pattern.replaceAll(" ","").equals("yyyy(d):d.a.") 297 ||pattern.replaceAll(" ","").equals("yyyy(d):d,aaa.") 298 ||pattern.replaceAll(" ","").equals("yyyy(d):d.aaa.") 299 ||pattern.replaceAll(" ","").equals("yyyy(d):d(-d)") 300 ||pattern.replaceAll(" ","").equals("yyyy(d):d(-d)a") 301 ||pattern.replaceAll(" ","").equals("yyyy(d):d(-d),a") 302 ||pattern.replaceAll(" ","").equals("yyyy(d):d(-d),aa.") 303 ||pattern.replaceAll(" ","").equals("yyyy(d):d(-d).a") 304 ||pattern.replaceAll(" ","").equals("yyyy(d):d(-d)aaa.") 305 ||pattern.replaceAll(" ","").equals("yyyy(d):d(-d),aaa.") 306 ||pattern.replaceAll(" ","").equals("yyyy(d):d(-d).aaa.") 307 ||pattern.replaceAll(" ","").equals("yyyy(d):yyyy") 308 ||pattern.replaceAll(" ","").equals("yyyy(d):yyyy,a.a.") 309 ||pattern.replaceAll(" ","").equals("yyyy(d):yyyy,aa.") 310 ||pattern.replaceAll(" ","").equals("yyyy(d):yyyy,aaa.") 311 ||pattern.replaceAll(" ","").equals("yyyy(d):yyyy(-d)") 312 ||pattern.replaceAll(" ","").equals("yyyy(d):yyyy(-d)a") 313 ||pattern.replaceAll(" ","").equals("yyyy(d):yyyy(-d),a") 314 ||pattern.replaceAll(" ","").equals("yyyy(d):yyyy(-d).a") 315 ||pattern.replaceAll(" ","").equals("yyyy(d):yyyy(-d)aaa.") 316 ||pattern.replaceAll(" ","").equals("yyyy(d):yyyy(-d),aaa.") 317 ||pattern.replaceAll(" ","").equals("yyyy(d):yyyy(-d).aaa."))){ 318 319 //Example: "1(1): 1" or "1(1):1" 320 String[] c = splitCollation(collation.replaceAll(" ", "")); 321 //Series: "" 322 series = ""; 323 //Volume: "1" 324 volume = c[0]; 325 //Issue: "1" 326 issue = c[1]; 327 //Page: "1" 328 page = c[2]; 329 //Tab/Fig/No: "" 330 tab_or_fig_or_no = ""; 331 //Year: "" 332 year = ""; 333 rule = "4"; 334 parsed = true; 335 } 336 // vol(issue): page (year) 337 if (!parsed && (pattern.replaceAll(" ","").equals("d(d):d(yyyy)") 338 || pattern.replaceAll(" ","").equals("d(d):d(yyyy).") 339 || pattern.replaceAll(" ","").equals("d(d):d(yyyy),") 340 || pattern.replaceAll(" ","").equals("d(d):d(yyyy):") 341 || pattern.replaceAll(" ","").equals("d(d):d(yyyy);") 342 || pattern.replaceAll(" ","").equals("d(d):yyyy(yyyy)") 343 || pattern.replaceAll(" ","").equals("d(d):yyyy(yyyy).") 344 || pattern.replaceAll(" ","").equals("d(d):yyyy(yyyy),") 345 || pattern.replaceAll(" ","").equals("d(d):yyyy(yyyy):") 346 || pattern.replaceAll(" ","").equals("d(d):yyyy(yyyy);") 347 // as above w. "sphalm." or "fig." 348 || pattern.replaceAll(" ","").equals("d(d):d(yyyy)a.") 349 || pattern.replaceAll(" ","").equals("d(d):d(yyyy).a.") 350 || pattern.replaceAll(" ","").equals("d(d):d(yyyy),a.") 351 || pattern.replaceAll(" ","").equals("d(d):d(yyyy):a.") 352 || pattern.replaceAll(" ","").equals("d(d):d(yyyy);a.") 353 || pattern.replaceAll(" ","").equals("d(d):yyyy(yyyy)a.") 354 || pattern.replaceAll(" ","").equals("d(d):yyyy(yyyy).a.") 355 || pattern.replaceAll(" ","").equals("d(d):yyyy(yyyy),a.") 356 || pattern.replaceAll(" ","").equals("d(d):yyyy(yyyy):a.") 357 || pattern.replaceAll(" ","").equals("d(d):yyyy(yyyy);a.") 358 // as above w. "in obs." 359 || pattern.replaceAll(" ","").equals("d(d):d(yyyy)aa.") 360 || pattern.replaceAll(" ","").equals("d(d):d(yyyy).aa.") 361 || pattern.replaceAll(" ","").equals("d(d):d(yyyy),aa.") 362 || pattern.replaceAll(" ","").equals("d(d):d(yyyy):aa.") 363 || pattern.replaceAll(" ","").equals("d(d):d(yyyy);aa.") 364 || pattern.replaceAll(" ","").equals("d(d):yyyy(yyyy)aa.") 365 || pattern.replaceAll(" ","").equals("d(d):yyyy(yyyy).aa.") 366 || pattern.replaceAll(" ","").equals("d(d):yyyy(yyyy),aa.") 367 || pattern.replaceAll(" ","").equals("d(d):yyyy(yyyy):aa.") 368 || pattern.replaceAll(" ","").equals("d(d):yyyy(yyyy);aa.") 369 // as above w. "nom. nov." 370 || pattern.replaceAll(" ","").equals("d(d):d(yyyy)a.a.") 371 || pattern.replaceAll(" ","").equals("d(d):d(yyyy).a.a.") 372 || pattern.replaceAll(" ","").equals("d(d):d(yyyy),a.a.") 373 || pattern.replaceAll(" ","").equals("d(d):d(yyyy):a.a.") 374 || pattern.replaceAll(" ","").equals("d(d):d(yyyy);a.a.") 375 || pattern.replaceAll(" ","").equals("d(d):yyyy(yyyy)a.a.") 376 || pattern.replaceAll(" ","").equals("d(d):yyyy(yyyy).a.a.") 377 || pattern.replaceAll(" ","").equals("d(d):yyyy(yyyy),a.a.") 378 || pattern.replaceAll(" ","").equals("d(d):yyyy(yyyy):a.a.") 379 || pattern.replaceAll(" ","").equals("d(d):yyyy(yyyy);a.a.") 380 // as above w. "without basionym ref." 381 || pattern.replaceAll(" ","").equals("d(d):d(yyyy)aaa.") 382 || pattern.replaceAll(" ","").equals("d(d):d(yyyy).aaa.") 383 || pattern.replaceAll(" ","").equals("d(d):d(yyyy),aaa.") 384 || pattern.replaceAll(" ","").equals("d(d):d(yyyy):aaa.") 385 || pattern.replaceAll(" ","").equals("d(d):d(yyyy);aaa.") 386 || pattern.replaceAll(" ","").equals("d(d):yyyy(yyyy)aaa.") 387 || pattern.replaceAll(" ","").equals("d(d):yyyy(yyyy).aaa.") 388 || pattern.replaceAll(" ","").equals("d(d):yyyy(yyyy),aaa.") 389 || pattern.replaceAll(" ","").equals("d(d):yyyy(yyyy):aaa.") 390 || pattern.replaceAll(" ","").equals("d(d):yyyy(yyyy);aaa.") 391 392 || pattern.replaceAll(" ","").equals("yyyy(d):d(yyyy)") 393 || pattern.replaceAll(" ","").equals("yyyy(d):d(yyyy).") 394 || pattern.replaceAll(" ","").equals("yyyy(d):d(yyyy),") 395 || pattern.replaceAll(" ","").equals("yyyy(d):d(yyyy):") 396 || pattern.replaceAll(" ","").equals("yyyy(d):d(yyyy);") 397 || pattern.replaceAll(" ","").equals("yyyy(d):yyyy(yyyy)") 398 || pattern.replaceAll(" ","").equals("yyyy(d):yyyy(yyyy).") 399 || pattern.replaceAll(" ","").equals("yyyy(d):yyyy(yyyy),") 400 || pattern.replaceAll(" ","").equals("yyyy(d):yyyy(yyyy):") 401 || pattern.replaceAll(" ","").equals("yyyy(d):yyyy(yyyy);") 402 // as above w. "sphalm." or "fig." yyyy 403 || pattern.replaceAll(" ","").equals("yyyy(d):d(yyyy)a.") 404 || pattern.replaceAll(" ","").equals("yyyy(d):d(yyyy).a.") 405 || pattern.replaceAll(" ","").equals("yyyy(d):d(yyyy),a.") 406 || pattern.replaceAll(" ","").equals("yyyy(d):d(yyyy):a.") 407 || pattern.replaceAll(" ","").equals("yyyy(d):d(yyyy);a.") 408 || pattern.replaceAll(" ","").equals("yyyy(d):yyyy(yyyy)a.") 409 || pattern.replaceAll(" ","").equals("yyyy(d):yyyy(yyyy).a.") 410 || pattern.replaceAll(" ","").equals("yyyy(d):yyyy(yyyy),a.") 411 || pattern.replaceAll(" ","").equals("yyyy(d):yyyy(yyyy):a.") 412 || pattern.replaceAll(" ","").equals("yyyy(d):yyyy(yyyy);a.") 413 // as above w. "in obs." yyyy 414 || pattern.replaceAll(" ","").equals("yyyy(d):d(yyyy)aa.") 415 || pattern.replaceAll(" ","").equals("yyyy(d):d(yyyy).aa.") 416 || pattern.replaceAll(" ","").equals("yyyy(d):d(yyyy),aa.") 417 || pattern.replaceAll(" ","").equals("yyyy(d):d(yyyy):aa.") 418 || pattern.replaceAll(" ","").equals("yyyy(d):d(yyyy);aa.") 419 || pattern.replaceAll(" ","").equals("yyyy(d):yyyy(yyyy)aa.") 420 || pattern.replaceAll(" ","").equals("yyyy(d):yyyy(yyyy).aa.") 421 || pattern.replaceAll(" ","").equals("yyyy(d):yyyy(yyyy),aa.") 422 || pattern.replaceAll(" ","").equals("yyyy(d):yyyy(yyyy):aa.") 423 || pattern.replaceAll(" ","").equals("yyyy(d):yyyy(yyyy);aa.") 424 // as above w. "nom. nov." yyyy 425 || pattern.replaceAll(" ","").equals("yyyy(d):d(yyyy)a.a.") 426 || pattern.replaceAll(" ","").equals("yyyy(d):d(yyyy).a.a.") 427 || pattern.replaceAll(" ","").equals("yyyy(d):d(yyyy),a.a.") 428 || pattern.replaceAll(" ","").equals("yyyy(d):d(yyyy):a.a.") 429 || pattern.replaceAll(" ","").equals("yyyy(d):d(yyyy);a.a.") 430 || pattern.replaceAll(" ","").equals("yyyy(d):yyyy(yyyy)a.a.") 431 || pattern.replaceAll(" ","").equals("yyyy(d):yyyy(yyyy).a.a.") 432 || pattern.replaceAll(" ","").equals("yyyy(d):yyyy(yyyy),a.a.") 433 || pattern.replaceAll(" ","").equals("yyyy(d):yyyy(yyyy):a.a.") 434 || pattern.replaceAll(" ","").equals("yyyy(d):yyyy(yyyy);a.a.") 435 // as above w. "without basionym ref."yyyy 436 || pattern.replaceAll(" ","").equals("yyyy(d):d(yyyy)aaa.") 437 || pattern.replaceAll(" ","").equals("yyyy(d):d(yyyy).aaa.") 438 || pattern.replaceAll(" ","").equals("yyyy(d):d(yyyy),aaa.") 439 || pattern.replaceAll(" ","").equals("yyyy(d):d(yyyy):aaa.") 440 || pattern.replaceAll(" ","").equals("yyyy(d):d(yyyy);aaa.") 441 || pattern.replaceAll(" ","").equals("yyyy(d):yyyy(yyyy)aaa.") 442 || pattern.replaceAll(" ","").equals("yyyy(d):yyyy(yyyy).aaa.") 443 || pattern.replaceAll(" ","").equals("yyyy(d):yyyy(yyyy),aaa.") 444 || pattern.replaceAll(" ","").equals("yyyy(d):yyyy(yyyy):aaa.") 445 || pattern.replaceAll(" ","").equals("yyyy(d):yyyy(yyyy);aaa.") 446 )){ 447 //Example: "1(1): 1 (1988)" 448 String[] c = splitCollation(collation.replaceAll(" ","")); 449 //Series: "" 450 series = ""; 451 //Volume: "1" 452 volume = c[0]; 453 //Issue: "1" 454 issue = c[1]; 455 //Page: "1" 456 page = c[2]; 457 //Tab/Fig/No: "" 458 tab_or_fig_or_no = ""; 459 //Year: "1988" 460 year = c[3]; 461 rule = "5"; 462 parsed = true; 463 } 464 // vol: page (year) 465 if ( !parsed && 466 (pattern.replaceAll(" ","").equals("d:d(yyyy)") 467 || pattern.replaceAll(" ","").equals("d:d(yyyy).") 468 || pattern.replaceAll(" ","").equals("d:d(yyyy),") 469 || pattern.replaceAll(" ","").equals("d:d(yyyy):") 470 || pattern.replaceAll(" ","").equals("d:d(yyyy);") 471 || pattern.replaceAll(" ","").equals("d:yyyy(yyyy)") 472 || pattern.replaceAll(" ","").equals("d:yyyy(yyyy).") 473 || pattern.replaceAll(" ","").equals("d:yyyy(yyyy),") 474 || pattern.replaceAll(" ","").equals("d:yyyy(yyyy):") 475 || pattern.replaceAll(" ","").equals("d:yyyy(yyyy);") 476 // as above w. "sphalm." or "fig." 477 || pattern.replaceAll(" ","").equals("d:d(yyyy)a.") 478 || pattern.replaceAll(" ","").equals("d:d(yyyy).a.") 479 || pattern.replaceAll(" ","").equals("d:d(yyyy),a.") 480 || pattern.replaceAll(" ","").equals("d:d(yyyy):a.") 481 || pattern.replaceAll(" ","").equals("d:d(yyyy);a.") 482 || pattern.replaceAll(" ","").equals("d:yyyy(yyyy)a.") 483 || pattern.replaceAll(" ","").equals("d:yyyy(yyyy).a.") 484 || pattern.replaceAll(" ","").equals("d:yyyy(yyyy),a.") 485 || pattern.replaceAll(" ","").equals("d:yyyy(yyyy):a.") 486 || pattern.replaceAll(" ","").equals("d:yyyy(yyyy);a.") 487 // as above w. "in obs." 488 || pattern.replaceAll(" ","").equals("d:d(yyyy)aa.") 489 || pattern.replaceAll(" ","").equals("d:d(yyyy).aa.") 490 || pattern.replaceAll(" ","").equals("d:d(yyyy),aa.") 491 || pattern.replaceAll(" ","").equals("d:d(yyyy):aa.") 492 || pattern.replaceAll(" ","").equals("d:d(yyyy);aa.") 493 || pattern.replaceAll(" ","").equals("d:yyyy(yyyy)aa.") 494 || pattern.replaceAll(" ","").equals("d:yyyy(yyyy).aa.") 495 || pattern.replaceAll(" ","").equals("d:yyyy(yyyy),aa.") 496 || pattern.replaceAll(" ","").equals("d:yyyy(yyyy):aa.") 497 || pattern.replaceAll(" ","").equals("d:yyyy(yyyy);aa.") 498 // as above w. "nom. nov." 499 || pattern.replaceAll(" ","").equals("d:d(yyyy)a.a.") 500 || pattern.replaceAll(" ","").equals("d:d(yyyy).a.a.") 501 || pattern.replaceAll(" ","").equals("d:d(yyyy),a.a.") 502 || pattern.replaceAll(" ","").equals("d:d(yyyy):a.a.") 503 || pattern.replaceAll(" ","").equals("d:d(yyyy);a.a.") 504 || pattern.replaceAll(" ","").equals("d:yyyy(yyyy)a.a.") 505 || pattern.replaceAll(" ","").equals("d:yyyy(yyyy).a.a.") 506 || pattern.replaceAll(" ","").equals("d:yyyy(yyyy),a.a.") 507 || pattern.replaceAll(" ","").equals("d:yyyy(yyyy):a.a.") 508 || pattern.replaceAll(" ","").equals("d:yyyy(yyyy);a.a.") 509 // as above w. "without basionym ref." 510 || pattern.replaceAll(" ","").equals("d:d(yyyy)aaa.") 511 || pattern.replaceAll(" ","").equals("d:d(yyyy).aaa.") 512 || pattern.replaceAll(" ","").equals("d:d(yyyy),aaa.") 513 || pattern.replaceAll(" ","").equals("d:d(yyyy):aaa.") 514 || pattern.replaceAll(" ","").equals("d:d(yyyy);aaa.") 515 || pattern.replaceAll(" ","").equals("d:yyyy(yyyy)aaa.") 516 || pattern.replaceAll(" ","").equals("d:yyyy(yyyy).aaa.") 517 || pattern.replaceAll(" ","").equals("d:yyyy(yyyy),aaa.") 518 || pattern.replaceAll(" ","").equals("d:yyyy(yyyy):aaa.") 519 || pattern.replaceAll(" ","").equals("d:yyyy(yyyy);aaa.") 520 // as above w. roman numeral volume 521 || pattern.replaceAll(" ","").equals("r:d(yyyy)") 522 || pattern.replaceAll(" ","").equals("r:d(yyyy)a.a.") 523 || pattern.replaceAll(" ","").equals("r:d(yyyy).a.a.") 524 || pattern.replaceAll(" ","").equals("r:d(yyyy),a.a.") 525 || pattern.replaceAll(" ","").equals("r:d(yyyy):a.a.") 526 || pattern.replaceAll(" ","").equals("r:d(yyyy);a.a.") 527 || pattern.replaceAll(" ","").equals("r:yyyy(yyyy)a.a.") 528 || pattern.replaceAll(" ","").equals("r:yyyy(yyyy).a.a.") 529 || pattern.replaceAll(" ","").equals("r:yyyy(yyyy),a.a.") 530 || pattern.replaceAll(" ","").equals("r:yyyy(yyyy):a.a.") 531 || pattern.replaceAll(" ","").equals("r:yyyy(yyyy);a.a.") 532 // as above w. "without basionym ref."r 533 || pattern.replaceAll(" ","").equals("r:d(yyyy)aaa.") 534 || pattern.replaceAll(" ","").equals("r:d(yyyy).aaa.") 535 || pattern.replaceAll(" ","").equals("r:d(yyyy),aaa.") 536 || pattern.replaceAll(" ","").equals("r:d(yyyy):aaa.") 537 || pattern.replaceAll(" ","").equals("r:d(yyyy);aaa.") 538 || pattern.replaceAll(" ","").equals("r:yyyy(yyyy)aaa.") 539 || pattern.replaceAll(" ","").equals("r:yyyy(yyyy).aaa.") 540 || pattern.replaceAll(" ","").equals("r:yyyy(yyyy),aaa.") 541 || pattern.replaceAll(" ","").equals("r:yyyy(yyyy):aaa.") 542 || pattern.replaceAll(" ","").equals("r:yyyy(yyyy);aaa.") 543 // as above w. roman numeral volume, page separated by . 544 || pattern.replaceAll(" ","").equals("r.d(yyyy)") 545 || pattern.replaceAll(" ","").equals("r.d(yyyy).") 546 || pattern.replaceAll(" ","").equals("r.d(yyyy),a.") 547 || pattern.replaceAll(" ","").equals("r.d(yyyy)a.a.") 548 || pattern.replaceAll(" ","").equals("r.d(yyyy).a.a.") 549 || pattern.replaceAll(" ","").equals("r.d(yyyy),a.a.") 550 || pattern.replaceAll(" ","").equals("r.d(yyyy):a.a.") 551 || pattern.replaceAll(" ","").equals("r.d(yyyy);a.a.") 552 || pattern.replaceAll(" ","").equals("r.yyyy(yyyy)a.a.") 553 || pattern.replaceAll(" ","").equals("r.yyyy(yyyy).a.a.") 554 || pattern.replaceAll(" ","").equals("r.yyyy(yyyy),a.a.") 555 || pattern.replaceAll(" ","").equals("r.yyyy(yyyy):a.a.") 556 || pattern.replaceAll(" ","").equals("r.yyyy(yyyy);a.a.") 557 // as above w. "without basionym ref."r 558 || pattern.replaceAll(" ","").equals("r.d(yyyy)aaa.") 559 || pattern.replaceAll(" ","").equals("r.d(yyyy).aaa.") 560 || pattern.replaceAll(" ","").equals("r.d(yyyy),aaa.") 561 || pattern.replaceAll(" ","").equals("r.d(yyyy):aaa.") 562 || pattern.replaceAll(" ","").equals("r.d(yyyy);aaa.") 563 || pattern.replaceAll(" ","").equals("r.yyyy(yyyy)aaa.") 564 || pattern.replaceAll(" ","").equals("r.yyyy(yyyy).aaa.") 565 || pattern.replaceAll(" ","").equals("r.yyyy(yyyy),aaa.") 566 || pattern.replaceAll(" ","").equals("r.yyyy(yyyy):aaa.") 567 || pattern.replaceAll(" ","").equals("r.yyyy(yyyy);aaa.") 568 )){ 569 //Example: "1: 1 (1988)" 570 String[] c = splitCollation(collation.replaceAll(" ","")); 571 //Series: "" 572 series = ""; 573 //Volume: "1" 574 volume = c[0]; 575 //Issue: "1" 576 issue = ""; 577 //Page: "1" 578 page = c[1]; 579 //Tab/Fig/No: "" 580 tab_or_fig_or_no = ""; 581 //Year: "1988" 582 year = c[2]; 583 rule = "5.1"; 584 parsed = true; 585 } 586 587 588 if (!parsed && 589 ( 590 (pattern.equals("r. d") || patternMatchesWithNotes(pattern, "r. d", true)) 591 || 592 (pattern.equals("r. d.") || patternMatchesWithNotes(pattern, "r. d.", true)) 593 ) 594 ){ 595 //Example: "x. 89." 596 //Series: "" 597 series = ""; 598 //Volume: "x" 599 volume = splitCollation(collation)[0]; 600 //Issue: "" 601 issue = ""; 602 //Page: "89" 603 page = splitCollation(collation)[1]; 604 //Tab/Fig/No: "" 605 tab_or_fig_or_no = ""; 606 //Year: "" 607 year = ""; 608 rule = "6"; 609 parsed = true; 610 } 611 if (!parsed && (pattern.replaceAll(" ","").equals("d,a.d:d"))){ 612 String[] c = splitCollation(collation.replaceAll(" ","")); 613 //"5, pt. 1: 123" 614 volume = c[0]; // 5 615 issue = c[2]; // 1 616 page = c[3]; // 123 617 rule = "9"; 618 parsed = true; 619 } 620 if (!parsed && (pattern.replaceAll(" ","").equals("yyyy,r.d.") 621 || pattern.replaceAll(" ","").equals("yyyy,r.d"))){ 622 String[] c = splitCollation(collation.replaceAll(" ","")); 623 //Example: "1901, xv. 58" 624 volume = c[1]; // xv 625 page = c[2]; // 58 626 year = c[0]; // 1901 627 rule = "15"; 628 parsed = true; 629 } 630 if (!parsed && (pattern.replaceAll(" ","").equals("(yyyy)r.d.") 631 || pattern.replaceAll(" ","").equals("(yyyy),r.d.") 632 || pattern.replaceAll(" ","").equals("(yyyy)r.d") 633 || pattern.replaceAll(" ","").equals("(yyyy),r.d"))){ 634 String[] c = splitCollation(collation.replaceAll(" ","")); 635 //Example: "(1901), xv. 58 636 volume = c[2]; // xv 637 page = c[3]; // 58 638 year = c[1]; // 1901 639 rule = "17"; 640 parsed = true; 641 } 642 if (!parsed && (pattern.replaceAll(" ","").equals("yyyy,d.") 643 || pattern.replaceAll(" ","").equals("yyyy,d"))){ 644 String[] c = splitCollation(collation.replaceAll(" ","")); 645 //Example: "1901, 58 646 page = c[1]; // 58 647 year = c[0]; // 1901 648 rule = "20"; 649 parsed = true; 650 } 651 if (!parsed && (pattern.replaceAll(" ","").equals("d:d(-d)") 652 || pattern.replaceAll(" ","").equals("d:d(-d)."))){ 653 String[] c=splitCollation(collation.replaceAll(" ","")); 654 // Example: "12: 34(-7)" 655 volume=c[0]; // 12 656 page=c[1]; // 34 657 parsed=true; 658 rule = "22"; 659 } 660 if (!parsed && (pattern.replaceAll(" ","").equals("d.d.yyyy") 661 ||pattern.replaceAll(" ","").equals("d.d.yyyy."))){ 662 String[] c=splitCollation(collation.replaceAll(" ","")); 663 volume=c[0]; 664 page=c[1]; 665 year=c[2]; 666 parsed=true; 667 rule = "24"; 668 } 669 if (!parsed && (pattern.replaceAll(" ","").equals("d(d,a.d):d"))){ 670 String[] c=splitCollation(collation.replaceAll(" ","")); 671 volume=c[0]; 672 issue=c[1]+", "+c[2]+". "+c[3]; 673 page=c[4]; 674 parsed=true; 675 rule = "25"; 676 } 677 if (!parsed && (pattern.replaceAll(" ","").equals("d:d,a.d") 678 ||pattern.replaceAll(" ","").equals("d:d(a.d)") 679 || pattern.replaceAll(" ","").equals("d:yyyy,a.d") 680 ||pattern.replaceAll(" ","").equals("yyyy:d(a.d)") 681 || pattern.replaceAll(" ","").equals("r.d,a.d") 682 || pattern.replaceAll(" ","").equals("r.yyyy,a.d") 683 || pattern.replaceAll(" ","").equals("r.d.a.d.") 684 || pattern.replaceAll(" ","").equals("r.d.a.d") 685 || pattern.replaceAll(" ","").equals("r.yyyy.a.d") 686 || pattern.replaceAll(" ","").equals("r.yyyy.a.d."))){ 687 String[] c=splitCollation(collation.replaceAll(" ","")); 688 volume=c[0]; 689 page=c[1]; 690 tab_or_fig_or_no=c[2]+". "+c[3]; 691 parsed=true; 692 rule = "28"; 693 } 694 if (!parsed && (pattern.replaceAll(" ","").equals("d:d,a.d,d") 695 ||pattern.replaceAll(" ","").equals("d:d(a.d,d)") 696 || pattern.replaceAll(" ","").equals("d:yyyy,a.d,d") 697 ||pattern.replaceAll(" ","").equals("yyyy:d(a.d,d)") 698 || pattern.replaceAll(" ","").equals("r.d,a.d,d") 699 || pattern.replaceAll(" ","").equals("r.yyyy,a.d,d") 700 || pattern.replaceAll(" ","").equals("r.d.a.d,d.") 701 || pattern.replaceAll(" ","").equals("r.d.a.d,d") 702 || pattern.replaceAll(" ","").equals("r.yyyy.a.d,d") 703 || pattern.replaceAll(" ","").equals("r.yyyy.a.d,d."))){ 704 String[] c=splitCollation(collation.replaceAll(" ","")); 705 volume=c[0]; 706 page=c[1]; 707 tab_or_fig_or_no=c[2]+". "+c[3]+", "+c[4]; 708 parsed=true; 709 rule = "28.1"; 710 } 711 if (!parsed && (pattern.replaceAll(" ","").equals("d:d(-d;a.d)"))){ 712 String[] c=splitCollation(collation.replaceAll(" ","")); 713 volume=c[0]; 714 page=c[1]+c[2]; 715 tab_or_fig_or_no=c[3]+". "+c[4]; 716 parsed=true; 717 rule = "28.2"; 718 } 719 if (!parsed && (pattern.replaceAll(" ","").equals("r.(yyyy)d.") 720 ||pattern.replaceAll(" ","").equals("r.(yyyy)d") 721 ||pattern.replaceAll(" ","").equals("r(yyyy)d"))){ 722 String[] c=splitCollation(collation.replaceAll(" ","")); 723 volume=c[0]; 724 year=c[1]; 725 page=c[2]; 726 parsed=true; 727 rule = "29"; 728 } 729 if (!parsed && pattern.replaceAll(" ","").equals("yyyy,d(yyyy)")){ 730 String[] c=splitCollation(collation.replaceAll(" ","")); 731 volume=c[0]; 732 page=c[1]; 733 year=c[2]; 734 parsed=true; 735 rule = "30"; 736 } 737 if (!parsed && pattern.replaceAll(" ","").equals("d:d(-d),a")){ 738 String[] c=splitCollation(collation.replaceAll(" ","")); 739 volume=c[0]; 740 page=c[1]; 741 parsed=true; 742 rule = "32"; 743 } 744 if (!parsed && pattern.replaceAll(" ","").equals("d:d,d")){ 745 String[] c=splitCollation(collation.replaceAll(" ","")); 746 volume=c[0]; 747 page=c[1]; 748 parsed=true; 749 rule = "33"; 750 } 751 752 if (!parsed && pattern.replaceAll(" ","").equals("d(d):d(yyyy),a.a.:")){ 753 String[] c=splitCollation(collation.replaceAll(" ","")); 754 volume=c[0]; 755 issue=c[1]; 756 page=c[2]; 757 year=c[3]; 758 parsed=true; 759 rule = "34"; 760 } 761 if (!parsed && pattern.replaceAll(" ","").equals("d(d):d(-d;a.d)")){ 762 String[] c=splitCollation(collation.replaceAll(" ","")); 763 // Example: 1(1): 217 (-219; fig. 11) 764 volume=c[0]; // 1 765 issue=c[1]; // 1 766 page=c[2]; // 217 767 tab_or_fig_or_no=c[4]+". "+c[5]; // fig. 11 768 parsed=true; 769 rule = "35"; 770 } 771 if (!parsed && pattern.replaceAll(" ","").equals("d:d(yyyy-d)")){ 772 String[] c=splitCollation(collation.replaceAll(" ","")); 773 // Example: 30: 101 (1859-61) 774 volume=c[0]; 775 page=c[1]; 776 year=c[2].split("\\-")[0]; 777 parsed=true; 778 rule = "36"; 779 } 780 // Rule #37 - see rule #29 781 if (!parsed && pattern.replaceAll(" ","").equals("d,a.d:d,a.d,a.d")){ 782 String[] c=splitCollation(collation.replaceAll(" ","")); 783 // Example: 1, pt. 2: 112, tab. 5, fig. 2 784 volume=c[0]; // 1 785 issue=c[2]; // 2 786 page=c[3]; // 112 787 tab_or_fig_or_no=c[4]+". "+c[5]+", "+c[6]+". " + c[7]; // tab. 5, fig. 2 788 parsed=true; 789 rule = "38"; 790 } 791 if (!parsed && pattern.replaceAll(" ","").equals("d:d,a.d,a.d")){ 792 String[] c=splitCollation(collation.replaceAll(" ","")); 793 volume=c[0]; 794 page=c[1]; 795 tab_or_fig_or_no=c[2]+". "+c[3]+", "+c[4]+". "+c[5]; 796 parsed=true; 797 rule = "38.1"; 798 } 799 800 // Rule 39 handled under rule #4 801 802 if (!parsed && (pattern.replaceAll(" ","").equals("r.d(yyyy),aa.") 803 || pattern.replaceAll(" ","").equals("r.d(yyyy)aa."))){ 804 String[] c=splitCollation(collation.replaceAll(" ","")); 805 // Example: C. 433 (1895), in obs. 806 volume=c[0]; 807 page=c[1]; 808 year=c[2]; 809 parsed=true; 810 rule = "40"; 811 } 812 if (!parsed && (pattern.replaceAll(" ","").equals("r.a.d,d"))){ 813 String[] c=splitCollation(collation.replaceAll(" ","")); 814 volume=c[0]; 815 issue=c[2]; 816 page=c[3]; 817 parsed=true; 818 rule = "41"; 819 } 820 if (!parsed && ( 821 (pattern.replaceAll(" ","").equals("r.a.d,d(yyyy)") 822 ||pattern.replaceAll(" ","").equals("r.a.d,d(yyyy).")))){ 823 String[] c=splitCollation(collation.replaceAll(" ","")); 824 volume=c[0]; 825 issue=c[2]; 826 page=c[3]; 827 page=c[4]; 828 parsed=true; 829 rule = "41.1"; 830 } 831 832 // Rule #42 - see rule #5 833 834 // Rule #43 - see rule #24 835 836 // Rule #44 - see rule #3 837 // See rule #24 838 839 if (!parsed && (pattern.replaceAll(" ","").equals("d(a.):d"))){ 840 String[] c=splitCollation(collation.replaceAll(" ","")); 841 // Example: 24(Misc.): 71 842 volume=c[0]+"("+c[1]+".)"; // 24(Misc.) 843 page=c[2]; // 71 844 parsed=true; 845 rule = "45"; 846 } 847 if (!parsed && (pattern.replaceAll(" ","").equals("d(yyyy).") 848 || pattern.replaceAll(" ","").equals("d(yyyy)") 849 || pattern.replaceAll(" ","").equals("d.yyyy."))){ 850 String[] c=splitCollation(collation.replaceAll(" ","")); 851 // Example: 7 (1932). 852 page=c[0]; // 7 853 year=c[1]; // 1932 854 parsed=true; 855 rule = "46"; 856 } 857 858 // Rule #47 - see rule #5 859 860 if (!parsed && (pattern.replaceAll(" ","").equals("d(d):d,a.d") 861 ||pattern.replaceAll(" ","").equals("d(d):d(a.d)"))){ 862 String[] c=splitCollation(collation.replaceAll(" ","")); 863 volume=c[0]; 864 issue=c[1]; 865 page=c[2]; 866 tab_or_fig_or_no=c[3]+". "+c[4]; 867 parsed=true; 868 rule = "48"; 869 } 870 871 // Rule #49 - see rule 3 872 873 // Rule #50 - see rule #46 874 875 if (!parsed && (pattern.replaceAll(" ","").equals("d,a.d:d,a.d"))){ 876 String[] c=splitCollation(collation.replaceAll(" ","")); 877 volume=c[0]; 878 issue=c[2]; 879 page=c[3]; 880 tab_or_fig_or_no=c[4]+". "+c[5]; 881 parsed=true; 882 rule = "51"; 883 } 884 885 if (!parsed && (pattern.replaceAll(" ","").equals("(yyyy)d.") 886 || pattern.replaceAll(" ","").equals("(yyyy)d"))){ 887 String[] c=splitCollation(collation.replaceAll(" ","")); 888 // Example: (1756) 249. 889 year=c[1]; // 1756 890 page=c[2]; // 249 891 parsed=true; 892 rule = "52"; 893 } 894 895 // Rule #53 - see rule #52 896 897 // Rule #54 - see rule #4 898 899 if (!parsed && (pattern.replaceAll(" ","").equals("a.d,d:d" ) || pattern.replaceAll(" ","").equals("a.d,d:yyyy"))){ 900 String[] c=splitCollation(collation.replaceAll(" ","")); 901 if (c[0].equalsIgnoreCase("ser")){ 902 series=c[1]; 903 volume=c[2]; 904 page=c[3]; 905 parsed=true; 906 rule = "55"; 907 } 908 } 909 if (!parsed && (pattern.replaceAll(" ","").equals("a.d,d(d):d" ) || pattern.replaceAll(" ","").equals("a.d,d(d):yyyy"))){ 910 String[] c=splitCollation(collation.replaceAll(" ","")); 911 if (c[0].equalsIgnoreCase("ser")){ 912 series=c[1]; 913 volume=c[2]; 914 issue=c[3]; 915 page=c[4]; 916 parsed=true; 917 rule = "56"; 918 } 919 } 920 if (!parsed && (pattern.replaceAll(" ","").equals("a.d"))){ 921 String[] c=splitCollation(collation.replaceAll(" ","")); 922 // Example: Acerac. 22 923 page = c[1]; 924 parsed = true; 925 rule = "56.1"; 926 } 927 if (!parsed && (pattern.replaceAll(" ","").equals("r.r.(yyyy)d"))){ 928 String[] c=splitCollation(collation.replaceAll(" ","")); 929 if(c[0].toUpperCase().equals(c[0])){ 930 series=c[0]; 931 volume=c[1]; 932 year=c[2]; 933 page=c[3]; 934 parsed = true; 935 rule = "56.111"; 936 } 937 else{ 938 volume=c[0]; 939 issue=c[1]; 940 year=c[2]; 941 page=c[3]; 942 parsed = true; 943 rule = "56.112"; 944 } 945 } 946 947 if (!parsed && (pattern.replaceAll(" ","").equals("a.d,d(yyyy)"))){ 948 String[] c=splitCollation(collation.replaceAll(" ","")); 949 // Example: No. 154, 65 (1963) 950 if (c[0].equals("No")||c[0].equals("Fasc")||c[0].equals("Pt")){ 951 volume=c[1]; 952 page=c[2]; 953 year=c[3]; 954 rule="56.3"; 955 parsed=true; 956 } 957 } 958 959 if (!parsed && (pattern.replaceAll(" ","").equals("a.r,r.d(yyyy).") 960 || pattern.replaceAll(" ","").equals("a.r,r.d(yyyy)"))){ 961 // Example: Ser. I, ix. 16 (1911). 962 String[] c=splitCollation(collation.replaceAll(" ","")); 963 series = c[1]; 964 volume = c[2]; 965 page = c[3]; 966 year = c[4]; 967 parsed = true; 968 rule = "56.4"; 969 } 970 971 if (!parsed && (pattern.replaceAll(" ","").equals("r.r.d(yyyy)"))){ 972 String[] c=splitCollation(collation.replaceAll(" ","")); 973 volume=c[0]; 974 issue=c[1]; 975 page=c[2]; 976 year=c[3]; 977 rule="56.13"; 978 parsed=true; 979 } 980 if (!parsed && (pattern.replaceAll(" ","").equals("d:a.d") 981 || pattern.replaceAll(" ","").equals("d:a.yyyy"))){ 982 // 4: t. 334 983 String[] c=splitCollation(collation.replaceAll(" ","")); 984 volume=c[0]; 985 tab_or_fig_or_no=c[1]+". "+c[2]; 986 parsed = true; 987 rule = "56.15"; 988 } 989 990 if (!parsed && (pattern.replaceAll(" ","").equals("d.yyyy"))){ 991 // 2. 1857 992 String[] c=splitCollation(collation.replaceAll(" ","")); 993 page=c[0]; 994 year=c[1]; 995 parsed = true; 996 rule = "56.8"; 997 } 998 999 if (!parsed && (pattern.replaceAll(" ","").equals("r.(yyyy)r.d") 1000 ||pattern.replaceAll(" ","").equals("r.(yyyy)r.d."))){ 1001 // Example: xxx. (1857) II. 67. 1002 String[] c=splitCollation(collation.replaceAll(" ","")); 1003 volume=c[0]; 1004 year=c[1]; 1005 issue=c[2]; 1006 page=c[3]; 1007 parsed = true; 1008 rule = "56.9"; 1009 } 1010 1011 1012 if (!parsed && (pattern.replaceAll(" ","").equals("r.d(yyyy)d"))){ 1013 String[] c=splitCollation(collation.replaceAll(" ","")); 1014 if(c[1].matches(".*[A-Za-z].*")){ 1015 series=c[0]; 1016 volume=c[1]; 1017 year=c[2]; 1018 page=c[3]; 1019 rule="62.5"; 1020 parsed=true; 1021 } 1022 else{ 1023 volume=c[0]; 1024 issue=c[1]; 1025 year=c[2]; 1026 page=c[3]; 1027 rule = "62.4"; 1028 parsed=true; 1029 } 1030 } 1031 if (pattern.replaceAll(" ","").equals("d,a.d") 1032 ||pattern.replaceAll(" ","").equals("d.a.d") 1033 ||pattern.replaceAll(" ","").equals("d.a.d.")){ 1034 String[] c=splitCollation(collation.replaceAll(" ","")); 1035 page=c[0]; 1036 tab_or_fig_or_no=c[1]+". "+c[2]; 1037 rule = "63.1"; 1038 parsed=true; 1039 } 1040 //17, Heft 74: 36 1041 if (pattern.equals("d, a d: d")){ 1042 String[] c=splitCollation(collation); 1043 volume=c[0]; 1044 issue=c[1] + " " + c[2]; 1045 page=c[3]; 1046 rule = "63.5"; 1047 parsed=true; 1048 } 1049 1050 //94: sub t. 5720 1051 if (pattern.equals("d: a a. d")||pattern.equals("d: a a. yyyy")){ 1052 String[] c=splitCollation(collation); 1053 volume=c[0]; 1054 tab_or_fig_or_no=c[1]+" " + c[2] + ". "+c[3]; 1055 rule = "65.15"; 1056 parsed=true; 1057 } 1058 1059 1060 if (pattern.replaceAll(" ","").equals("a.d:d")){ 1061 String[] c=splitCollation(collation.replaceAll(" ","")); 1062 volume=c[0]+". "+c[1]; 1063 page=c[2]; 1064 rule = "65.2"; 1065 parsed=true; 1066 } 1067 if (!parsed && (pattern.replaceAll(" ","").equals("d(d.d):d") 1068 ||pattern.replaceAll(" ","").equals("d(d,d):d") 1069 ||pattern.replaceAll(" ","").equals("d(d:d):d") 1070 ||pattern.replaceAll(" ","").equals("d(d/d):d"))){ 1071 String[] c=splitCollation(collation.replaceAll(" ","")); 1072 volume=c[0]; 1073 issue=collation.replaceAll(" ","").split("[\\(\\)]")[1]; 1074 page=c[3]; 1075 rule = "65.4"; 1076 parsed=true; 1077 } 1078 if (pattern.replaceAll(" ","").equals("d(a.d):d") 1079 || pattern.replaceAll(" ","").equals("d(d,a.):d") 1080 || pattern.replaceAll(" ","").equals("d(d,a.a.):d")){ 1081 String[] c=splitCollation(collation.replaceAll(" ","")); 1082 issue=collation.split("[\\(\\)]")[1]; 1083 volume=c[0]; 1084 page=collation.replaceAll(" ","").split(":")[1]; 1085 rule = "65.5"; 1086 parsed=true; 1087 } 1088 if (pattern.replaceAll(" ","").equals("r.a.d")){ 1089 String[] c=splitCollation(collation.replaceAll(" ","")); 1090 if (c[1].equals("t") 1091 || c[1].equals("fig") 1092 || c[1].equals("n")){ 1093 volume=c[0]; 1094 tab_or_fig_or_no=c[1]+". "+c[2]; 1095 rule = "65.1"; 1096 parsed=true; 1097 } 1098 else{ 1099 volume=c[0]+"."+c[1]; 1100 page=c[2]; 1101 rule = "65.1"; 1102 parsed=true; 1103 } 1104 } 1105 if (pattern.replaceAll(" ","").equals("a.r.r.d.")){ 1106 // Example: Ser. II. i. 1074. 1107 String[] c=splitCollation(collation.replaceAll(" ","")); 1108 series=c[1]; // II 1109 volume=c[2]; // i 1110 page=c[3]; // 1074 1111 parsed = true; 1112 rule = "56.5"; 1113 } 1114 if (pattern.replaceAll(" ","").equals("a.-a.d(yyyy)")){ 1115 String[] c=splitCollation(collation.replaceAll(" ","")); 1116 series=c[0]+"."+c[1]+"."; 1117 page=c[2]; 1118 year=c[3]; 1119 parsed = true; 1120 rule = "56.7"; 1121 } 1122 if (pattern.replaceAll(" ","").equals("r.r.d")){ 1123 String[] c=splitCollation(collation.replaceAll(" ","")); 1124 volume=c[0]; 1125 issue=c[1]; 1126 page=c[2]; 1127 parsed = true; 1128 rule = "56.12"; 1129 } 1130 if (pattern.replaceAll(" ","").equals("yyyy,r.d(yyyy).")){ 1131 // Example: 1851, iii. 160 (1855). 1132 String[] c=splitCollation(collation.replaceAll(" ","")); 1133 volume=c[1]; 1134 page=c[2]; 1135 year=c[3]; 1136 parsed = true; 1137 rule = "56.14"; 1138 } 1139 if (pattern.replaceAll(" ","").equals("d:a.d") 1140 || pattern.replaceAll(" ","").equals("d:a.yyyy")){ 1141 // 4: t. 334 1142 String[] c=splitCollation(collation.replaceAll(" ","")); 1143 volume=c[0]; 1144 tab_or_fig_or_no=c[1]+". "+c[2]; 1145 parsed = true; 1146 rule = "56.15"; 1147 } 1148 if (pattern.equals("(a yyyy) d")){ 1149 // Example: (Aout 1819) 98 1150 String[] c=splitCollation(collation); 1151 year=c[2]; 1152 page=c[3]; 1153 parsed = true; 1154 rule = "56.17"; 1155 } 1156 1157 if (pattern.replaceAll(" ","").equals("a.d,d")){ 1158 String[] c=splitCollation(collation.replaceAll(" ","")); 1159 if (c[0].equals("ed")){ 1160 series=c[0]+". "+c[1]; 1161 page=c[2]; 1162 rule="56.21"; 1163 parsed=true; 1164 } 1165 else if (c[0].equals("no")){ 1166 volume=c[1]; 1167 page=c[2]; 1168 rule="56.22"; 1169 parsed=true; 1170 } 1171 else if (c[0].equals("t")){ 1172 tab_or_fig_or_no=c[0]+". "+c[1]+", "+c[2]; 1173 rule="56.23"; 1174 parsed=true; 1175 } 1176 } 1177 if (pattern.replaceAll(" ","").equals("r.d.d.yyyy")){ 1178 String[] c=splitCollation(collation.replaceAll(" ","")); 1179 series=c[0]; 1180 volume=c[1]; 1181 page=c[2]; 1182 year=c[3]; 1183 rule="62"; 1184 parsed=true; 1185 } 1186 //r. d, d (yyyy). 1187 if (pattern.replaceAll(" ","").equals("r.d,d(yyyy).")){ 1188 String[] c=splitCollation(collation.replaceAll(" ","")); 1189 volume=c[0]; 1190 page=c[1] + ", " + c[2]; 1191 year=c[3]; 1192 rule="62.3"; 1193 parsed=true; 1194 } 1195 // a., d(d): d 1196 if (pattern.replaceAll(" ","").equals("a.,d(d):d")){ 1197 String[] c=splitCollation(collation.replaceAll(" ","")); 1198 volume=c[0] + "., " + c[1]; 1199 issue=c[2]; 1200 page=c[3]; 1201 rule="65.10"; 1202 parsed=true; 1203 } 1204 //yyyy, a. r. r. d. 1205 if (pattern.replaceAll(" ","").equals("yyyy,a.r.r.d.")){ 1206 String[] c=splitCollation(collation.replaceAll(" ","")); 1207 year=c[0]; 1208 series=c[2]; 1209 volume=c[3]; 1210 page=c[4]; 1211 rule="67"; 1212 parsed=true; 1213 } 1214 //d; a. a. r. d 1215 if (pattern.replaceAll(" ","").equals("d;a.a.r.d")){ 1216 String[] c=splitCollation(collation.replaceAll(" ","")); 1217 page=c[0]; 1218 rule="70"; 1219 parsed=true; 1220 } 1221 //a.-a.-a. d (yyyy) 1222 if (pattern.replaceAll(" ","").equals("a.-a.-a.d(yyyy)")){ 1223 String[] c=splitCollation(collation.replaceAll(" ","")); 1224 series=c[0] + "." + c[1] + "." + c[2] + "."; 1225 page=c[3]; 1226 year=c[4]; 1227 rule="71"; 1228 parsed=true; 1229 } 1230 //a. r. d (yyyy) 1231 if (pattern.replaceAll(" ","").equals("a.r.d(yyyy)")){ 1232 String[] c=splitCollation(collation.replaceAll(" ","")); 1233 if (c[0].equalsIgnoreCase("Suppl")){ 1234 volume="Suppl. " + c[1]; 1235 page=c[2]; 1236 year=c[3]; 1237 rule="75.2"; 1238 parsed = true; 1239 } 1240 else{ 1241 volume=c[1]; 1242 page=c[2]; 1243 year=c[3]; 1244 rule="75.1"; 1245 parsed = true; 1246 } 1247 } 1248 //d(a): d 1249 if (pattern.replaceAll(" ","").equals("d(a):d")||pattern.replaceAll(" ","").equals("d(a):yyyy")){ 1250 String[] c=splitCollation(collation.replaceAll(" ","")); 1251 volume=c[0]+"("+c[1]+")"; 1252 page=c[2]; 1253 rule="65.17"; 1254 parsed=true; 1255 } 1256 //a. r. r. d (yyyy). 1257 if (pattern.replaceAll(" ","").equals("a.r.r.d(yyyy).")){ 1258 String[] c=splitCollation(collation.replaceAll(" ","")); 1259 if (c[0].equalsIgnoreCase("Ser")){ 1260 series=c[1]; 1261 volume=c[2]; 1262 page=c[3]; 1263 year=c[4]; 1264 rule="77"; 1265 parsed=true; 1266 } 1267 } 1268 if (pattern.replaceAll(" ","").equals("d(d):a.d")||pattern.replaceAll(" ","").equals("d(d):a.yyyy")){ 1269 String[] c=splitCollation(collation.replaceAll(" ","")); 1270 volume=c[0]; 1271 issue=c[1]; 1272 tab_or_fig_or_no=c[2] + ". " + c[3]; 1273 rule="78"; 1274 parsed=true; 1275 } 1276 //yyyy, r. d (yyyy) 1277 if(pattern.replaceAll(" ","").equals("yyyy,r.d(yyyy)")){ 1278 String[] c=splitCollation(collation.replaceAll(" ","")); 1279 volume=c[1]; 1280 page=c[2]; 1281 year=c[3]; 1282 parsed=true; 1283 rule="65.21"; 1284 } 1285 //r a. d (yyyy) 1286 if(pattern.equals("r a. d (yyyy)")){ 1287 String[] c=splitCollation(collation); 1288 volume=c[0] + " " + c[1]; 1289 page=c[2]; 1290 year=c[3]; 1291 parsed=true; 1292 rule="80"; 1293 } 1294 // a. d, d, a. d 1295 if(pattern.replaceAll(" ","").equals("a.d,d,a.d")){ 1296 String[] c=splitCollation(collation.replaceAll(" ","")); 1297 if (c[0].equalsIgnoreCase("Ser")){ 1298 series=c[1]; 1299 volume=c[2]; 1300 tab_or_fig_or_no=c[3] + ". " + c[4]; 1301 rule="81"; 1302 parsed=true; 1303 } 1304 } 1305 //a. r, r. (yyyy) d 1306 if(pattern.replaceAll(" ","").equals("a.r,r.(yyyy)d")){ 1307 String[] c=splitCollation(collation.replaceAll(" ","")); 1308 if (c[0].equalsIgnoreCase("Ser")){ 1309 series=c[1]; 1310 volume=c[2]; 1311 year=c[3]; 1312 page=c[4]; 1313 rule="82"; 1314 parsed=true; 1315 } 1316 } 1317 //r. (yyyy-d) d. 1318 if(pattern.replaceAll(" ","").equals("r.(yyyy-d)d.")){ 1319 String[] c=splitCollation(collation.replaceAll(" ","")); 1320 volume=c[0]; 1321 year=c[1].split("\\-")[0]+","+c[1].split("\\-")[0].substring(0,2)+c[1].split("\\-")[1]; 1322 page=c[2]; 1323 parsed=true; 1324 rule="83"; 1325 } 1326 //r-r. (yyyy) d 1327 if(pattern.replaceAll(" ","").equals("r-r.(yyyy)d")){ 1328 String[] c=splitCollation(collation.replaceAll(" ","")); 1329 volume=c[0]; 1330 year=c[1]; 1331 page=c[2]; 1332 parsed=true; 1333 rule="84"; 1334 } 1335 //r.d.a.d(yyyy) 1336 if(pattern.replaceAll(" ","").equals("r.d.a.d(yyyy)")){ 1337 String[] c=splitCollation(collation.replaceAll(" ","")); 1338 volume=c[0]; 1339 page=c[1]; 1340 tab_or_fig_or_no=c[2] + ". " + c[3]; 1341 year=c[4]; 1342 parsed=true; 1343 rule="62.6"; 1344 } 1345 //r. d(a d): d 1346 if(pattern.equals("r. d(a d): d")){ 1347 String[] c=splitCollation(collation); 1348 series=c[0]; 1349 volume=c[1]; 1350 issue=c[2] + " " + c[3]; 1351 page=c[4]; 1352 parsed=true; 1353 rule="62.8"; 1354 } 1355 //r. d (yyyy); a. a a. a a. 1356 if(pattern.equals("r. d (yyyy); a. a a. a a.")){ 1357 String[] c=splitCollation(collation); 1358 volume=c[0]; 1359 page=c[1]; 1360 year=c[2]; 1361 parsed=true; 1362 rule="62.9"; 1363 } 1364 //r. d, d (yyyy) 1365 if(pattern.replaceAll(" ", "").equals("r.d,d(yyyy)")){ 1366 String[] c=splitCollation(collation.replaceAll(" ", "")); 1367 volume=c[0]; 1368 page=c[1] + ", " + c[2]; 1369 year=c[3]; 1370 parsed=true; 1371 rule="62.10"; 1372 } 1373 //r. d. a. d (yyyy). 1374 if(pattern.replaceAll(" ", "").equals("r.d.a.d(yyyy).")){ 1375 String[] c=splitCollation(collation.replaceAll(" ", "")); 1376 volume=c[0]; 1377 page=c[1]; 1378 tab_or_fig_or_no=c[2] + ". " + c[3]; 1379 year=c[4]; 1380 parsed=true; 1381 rule="62.11"; 1382 } 1383 //r. d, d 1384 if(pattern.replaceAll(" ", "").equals("r.d,d")){ 1385 String[] c=splitCollation(collation.replaceAll(" ", "")); 1386 volume=c[0]; 1387 page=c[1] + ", " + c[2]; 1388 parsed=true; 1389 rule="62.12"; 1390 } 1391 //d(d):d,d 1392 if(pattern.replaceAll(" ", "").equals("d(d):d,d")){ 1393 String[] c=splitCollation(collation.replaceAll(" ", "")); 1394 volume=c[0]; 1395 issue=c[1]; 1396 page=c[2]+", "+c[3]; 1397 parsed=true; 1398 rule="62.12"; 1399 } 1400 //d,a.d,r:d 1401 if(pattern.replaceAll(" ", "").equals("d,a.d,r:d")){ 1402 String[] c=splitCollation(collation.replaceAll(" ", "")); 1403 series=c[0]; 1404 volume=c[1]+". "+c[2]+", "+c[3]; 1405 page=c[4]; 1406 parsed=true; 1407 rule="63.3"; 1408 } 1409 1410 // Final catches if not yet parsed: 1411 if (!parsed){ 1412 // do some final catches - LOOK AT THIS 1413 if (pattern.startsWith("d: d")){ 1414 String[] c=collation.split("[^a-z0-9]+"); 1415 volume=c[0]; 1416 page=c[1]; 1417 parsed=true; 1418 rule = "58"; 1419 } 1420 if (pattern.startsWith("d(d): d")){ 1421 String[] c=collation.split("[^a-z0-9]+"); 1422 volume=c[0]; 1423 issue=c[1]; 1424 page=c[2]; 1425 parsed=true; 1426 rule = "59"; 1427 } 1428 if (pattern.startsWith("r(d): d")){ 1429 String[] c=collation.split("[^a-z0-9]+"); 1430 volume=c[0]; 1431 issue=c[1]; 1432 page=c[2]; 1433 parsed=true; 1434 rule = "60"; 1435 } 1436 if (pattern.startsWith("r: d")){ 1437 String[] c=collation.split("[^a-z0-9]+"); 1438 volume=c[0]; 1439 page=c[1]; 1440 parsed=true; 1441 rule = "61"; 1442 } 1443 if (pattern.startsWith("d (yyyy)")){ 1444 page=collation.split(" ")[0]; 1445 year=collation.split("\\(")[1].split("\\)")[0]; 1446 parsed=true; 1447 rule = "64"; 1448 } 1449 } 1450// // Final attempt - if we don't have a page at this point: 1451// if (!parsed){ 1452// String[] locators = {": d", ": yyyy"}; 1453// for (String locator : locators){ 1454// if (c_patt.contains(locator)){ 1455// if (c_patt.endsWith(locator)){ 1456// page = collation.split(": ")[collation.split(": ").length-1]; 1457// } 1458// else{ 1459// Pattern pattern = Pattern.compile(": [0-9]"); 1460// Matcher matcher = pattern.matcher(collation); 1461// if(matcher.find()){ 1462// String rem = collation.substring(matcher.start()+2); 1463// page = rem.split("[^0-9]")[0]; 1464// } 1465// } 1466// rule = "65"; 1467// } 1468// } 1469// } 1470 1471 // Put the components into a list for return 1472 String[] c_parsed={series,volume,issue,page,tab_or_fig_or_no,year,rule}; 1473 return c_parsed; 1474 } 1475 1476 public static void main(String[] args) { 1477 // Tab separated file of id and collation 1478 String inputfile = args[0]; 1479 String outputfile = args[1]; 1480 1481 try (BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(inputfile),"utf8")); 1482 FileWriter fw = new FileWriter(outputfile); BufferedWriter bw = new BufferedWriter(fw);) { 1483 int count = 0; 1484 1485 String line = null; 1486 while ((line = br.readLine()) != null) { 1487 if ((count++ % 10000) == 0){ 1488 System.out.println(count); 1489 } 1490 String[] elems = line.split("\t"); 1491 String id = elems[0]; 1492 if (elems.length > 1){ 1493 String collation = elems[1]; 1494 1495 String structure = new CollationStructureTransformer().transform(collation); 1496 1497 String[] parsed = CollationUtils.parseCollation(collation); 1498 1499 bw.write(id 1500 + "\t" + collation 1501 + "\t" + structure 1502 + "\t" + parsed[SERIES_INDEX] 1503 + "\t" + parsed[VOL_INDEX] 1504 + "\t" + parsed[ISSUE_INDEX] 1505 + "\t" + parsed[PAGE_INDEX] 1506 + "\t" + parsed[TAB_OR_FIG_INDEX] 1507 + "\t" + parsed[YEAR_INDEX] 1508 + "\t" + CollationUtils.parsableCollation(collation) 1509 + "\t" + parsed[RULE_INDEX] 1510 + "\n"); 1511 } 1512 else{ 1513 bw.write(id + "\n"); 1514 } 1515 } 1516 bw.flush(); 1517 bw.close(); 1518 } 1519 catch(Exception e){ 1520 e.printStackTrace(); 1521 } 1522 } 1523 1524}