001/*
002 * Copyright © 2012, 2013, 2014 Royal Botanic Gardens, Kew.
003 *
004 * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
005 *
006 * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
007 *
008 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
009 */
010package org.kew.rmf.transformers.collations.wcs;
011
012import java.io.BufferedReader;
013import java.io.BufferedWriter;
014import java.io.FileInputStream;
015import java.io.FileWriter;
016import java.io.InputStreamReader;
017import java.util.Arrays;
018
019import org.kew.rmf.transformers.RomanNumeralTransformer;
020import org.kew.rmf.transformers.collations.CollationStructureTransformer;
021
022public class CollationUtils {
023
024        public static int SERIES_INDEX = 0;
025        public static int VOL_INDEX = 1;
026        public static int ISSUE_INDEX = 2;
027        public static int PAGE_INDEX = 3;
028        public static int TAB_OR_FIG_INDEX = 4;
029        public static int YEAR_INDEX = 5;
030        public static int RULE_INDEX = 6;
031
032        private static String convertRoman(String r){
033                return new RomanNumeralTransformer().transform(r);
034        }
035        
036        private static String[] splitCollation(String collation){
037                return CollationStructureTransformer.splitCollation(collation);
038        }
039        
040        public static String assessCollationStructure(String collation){
041                return CollationStructureTransformer.assessCollationStructure(collation);
042        }
043        
044        public static boolean parsableCollation(String collation){
045                return !Arrays.toString(parseCollation(collation)).equals("[, , , , , , ]");
046        }
047        public static String[] parseCollation(String collation){
048                String pattern = assessCollationStructure(collation);
049                String series="";
050                String volume="";
051                String issue="";
052                String page="";
053                String tab_or_fig_or_no="";
054                String year="";
055                String rule="";
056
057                if (pattern.equals("d: d") 
058                                || pattern.equals("d: r")
059                                || pattern.equals("yyyy: d") 
060                                || pattern.equals("d: yyyy") 
061                                || pattern.equals("yyyy: yyyy")){
062                        //Example: "12: 89"
063                        //Series: ""
064                        series = "";
065                        //Volume: "12"
066                        volume = splitCollation(collation)[0];
067                        //Issue: ""
068                        issue = "";
069                        //Page: "89"
070                        page = splitCollation(collation)[1];
071                        //Tab/Fig/No: ""
072                        tab_or_fig_or_no = "";
073                        //Year: ""
074                        year = "";
075                        rule = "1";
076                }
077                if (pattern.equals(", d: d") || pattern.equals(", yyyy: d") || pattern.equals(", d: yyyy") || pattern.equals(", yyyy: yyyy")){
078                        //Example: ", 12: 89"
079                        //Series: ""
080                        series = "";
081                        //Volume: "12"
082                        volume = convertRoman(splitCollation(collation)[1]);
083                        //Issue: ""
084                        issue = "";
085                        //Page: "89"
086                        page = convertRoman(splitCollation(collation)[2]);
087                        //Tab/Fig/No: ""
088                        tab_or_fig_or_no = "";
089                        //Year: ""
090                        year = "";
091                        rule = "1.1";
092                }               
093                if (pattern.equals(": d") || pattern.equals(": yyyy") || pattern.equals(": r")){
094                        //Example: ": 121"
095                        //Series: ""
096                        series = "";
097                        //Volume: ""
098                        volume = "";
099                        //Issue: ""
100                        issue = "";
101                        //Page: "121"
102                        page = collation.split(" ")[1];
103                        //Tab/Fig/No: ""
104                        tab_or_fig_or_no = "";
105                        //Year: ""
106                        year = "";
107                        rule = "2";
108                }
109                if (pattern.equals("d(d): d") || pattern.equals("d(d): yyyy") || pattern.equals("yyyy(d): d")){
110                        //Example: "12(2): 61"
111                        //Series: ""
112                        series = "";
113                        //Volume: "12"
114                        volume = splitCollation(collation)[0];
115                        //Issue: "2"
116                        issue = splitCollation(collation)[1];
117                        //Page: "61"
118                        page = splitCollation(collation)[2];
119                        //Tab/Fig/No: ""
120                        tab_or_fig_or_no = "";
121                        //Year: ""
122                        year = "";
123                        rule = "3";
124                }
125                if (pattern.equals("d: d (yyyy)")){
126                        //Example: "12: 99 (1898)"
127                        //Series: ""
128                        series = "";
129                        //Volume: "12"
130                        volume = splitCollation(collation)[0];
131                        //Issue: ""
132                        issue = "";
133                        //Page: "99"
134                        page = splitCollation(collation)[1];
135                        //Tab/Fig/No: ""
136                        tab_or_fig_or_no = "";
137                        //Year: "1898"
138                        year = splitCollation(collation)[2];
139                        rule = "3";
140                }
141                if (pattern.equals(", r, d: d")){
142                        //Example: ", III, 102: 414"
143                        //Series: "III"
144                        series = splitCollation(collation)[1];
145                        //Volume: "102"
146                        volume = splitCollation(collation)[2];
147                        //Issue: ""
148                        issue = "";
149                        //Page: "4141"
150                        page = splitCollation(collation)[3];
151                        //Tab/Fig/No: ""
152                        tab_or_fig_or_no = "";
153                        //Year: ""
154                        year = "";
155                        rule = "65.1";
156                }
157                if (pattern.equals(", a. d: d")){
158                        //Example: ", ed. 3: 273"
159                        // if "a" = "ed" OR "Nachtr" OR "Suppl" OR "Beib" OR "Beih" OR "Reimpr"
160                        if (splitCollation(collation)[1].equals("ed")
161                                        || splitCollation(collation)[1].equals("Nachtr")
162                                        || splitCollation(collation)[1].equals("Suppl")
163                                        || splitCollation(collation)[1].equals("Beib")
164                                        || splitCollation(collation)[1].equals("Beih")
165                                        || splitCollation(collation)[1].equals("Reimpr"))
166                        //Series: "ed. 3"
167                        series = splitCollation(collation)[1] + ". " + splitCollation(collation)[2];
168                        //Volume: ""
169                        volume = "";
170                        //Issue: ""
171                        issue = "";
172                        //Page: "2731"
173                        page = splitCollation(collation)[3];
174                        //Tab/Fig/No: ""
175                        tab_or_fig_or_no = "";
176                        //Year: ""
177                        year = "";
178                        rule = "65.2";
179                }
180                if (pattern.equals(", a.a., d: d")){
181                        //Example: ", a.s., 3: 508"
182                        // if "a" = "a.s." OR "n.f." OR "n.s."
183                        if ((splitCollation(collation)[1]+"."+splitCollation(collation)[2]+".").equals("a.s.")
184                                        || (splitCollation(collation)[1]+"."+splitCollation(collation)[2]+".").equals("n.f.")
185                                        || (splitCollation(collation)[1]+"."+splitCollation(collation)[2]+".").equals("n.s."))
186                                series = splitCollation(collation)[1]+"."+splitCollation(collation)[2]+".";
187                        //Volume: "3"
188                        volume = splitCollation(collation)[3];
189                        //Issue: ""
190                        issue = "";
191                        //Page: "508"
192                        page = splitCollation(collation)[4];
193                        //Tab/Fig/No: ""
194                        tab_or_fig_or_no = "";
195                        //Year: ""
196                        year = "";
197                        rule = "65.3";
198                }
199                if (pattern.equals(", a.a., d(d): d")){
200                        //Example: ", a.s., 3(2): 508"
201                        // if "a" = "a.s." OR "n.f." OR "n.s."
202                        if ((splitCollation(collation)[1]+"."+splitCollation(collation)[2]+".").equals("a.s.")
203                                        || (splitCollation(collation)[1]+"."+splitCollation(collation)[2]+".").equals("n.f.")
204                                        || (splitCollation(collation)[1]+"."+splitCollation(collation)[2]+".").equals("n.s."))
205                                series = splitCollation(collation)[1]+"."+splitCollation(collation)[2]+".";
206                        //Volume: "3"
207                        volume = splitCollation(collation)[3];
208                        //Issue: ""
209                        issue = splitCollation(collation)[4];
210                        //Page: "508"
211                        page = splitCollation(collation)[5];
212                        //Tab/Fig/No: ""
213                        tab_or_fig_or_no = "";
214                        //Year: ""
215                        year = "";
216                        rule = "65.12";
217                }               
218                if (pattern.equals(", a. d, d: d")){
219                        String[] c=splitCollation(collation.replaceAll(" ", ""));
220                        if (c[1].equals("ed")){
221                                //Example: ", ed. 9, 1: 197"
222                                series = c[1]+". "+c[2];
223                                //Volume: "3"
224                                volume = c[3];
225                                //Issue: ""
226                                issue = "";
227                                //Page: "508"
228                                page = c[4];
229                                //Tab/Fig/No: ""
230                                tab_or_fig_or_no = "";
231                                //Year: ""
232                                year = "";
233                                rule = "65.4";
234                        }
235                        else{
236                                series=c[2];
237                                volume=c[3];
238                                page=c[4];
239                                rule="65.4a";
240                        }
241                }
242                if (pattern.equals(", a. d, d(d): d")){
243                        String[] c=splitCollation(collation.replaceAll(" ", ""));
244                        if (c[1].equals("ed")){
245                                //Example: ", ed. 9, 1(1): 197"
246                                series = c[1]+". "+c[2];
247                                //Volume: "3"
248                                volume = c[3];
249                                //Issue: ""
250                                issue = c[4];
251                                //Page: "508"
252                                page = c[5];
253                                //Tab/Fig/No: ""
254                                tab_or_fig_or_no = "";
255                                //Year: ""
256                                year = "";
257                                rule = "65.49";
258                        }
259                        else{
260                                series=c[2];
261                                volume=c[3];
262                                issue=c[4];
263                                page=c[5];
264                                rule="65.49a";
265                        }
266                }
267                
268                if (pattern.equals(", r, d(d): d")){
269                        // Example: ", II, 1(1): 28"
270                        //Series: ""
271                        series = splitCollation(collation)[1];
272                        //Volume: "C"
273                        volume = splitCollation(collation)[2];
274                        //Issue: ""
275                        issue = splitCollation(collation)[3];
276                        //Page: "433"
277                        page = splitCollation(collation)[4];
278                        //Tab/Fig/No: ""
279                        tab_or_fig_or_no = "";
280                        //Year: ""
281                        year = "";
282                        rule = "65.5";                  
283                }
284                // 1(Conif.): 42        d(a.): d                1 (Conif.)              42                      TRUE    65.6
285                if (pattern.equals("d(a.): d")){
286                        // Example: "1(Conif.): 42"
287                        //Series: ""
288                        series = "";
289                        //Volume: "1(Conif.)"
290                        volume = collation.split(":")[0];
291                        //Issue: ""
292                        issue = "";
293                        //Page: "42"
294                        page = splitCollation(collation)[2];
295                        //Tab/Fig/No: ""
296                        tab_or_fig_or_no = "";
297                        //Year: ""
298                        year = "";
299                        rule = "65.6";                  
300                }
301                // , Aloac.: 41 , a.: d                         41                      true    65.8
302                if (pattern.equals(", a.: d")){
303                        // Example: ", Aloac.: 41"
304                        //Series: ""
305                        series = "";
306                        //Volume: ""
307                        volume = "";
308                        //Issue: ""
309                        issue = "";
310                        //Page: "41"
311                        page = splitCollation(collation)[2];
312                        //Tab/Fig/No: ""
313                        tab_or_fig_or_no = "";
314                        //Year: ""
315                        year = "";
316                        rule = "65.8";                  
317                }
318                // 65.9 - see rule 3
319                
320                if (pattern.equals(": a. d") && splitCollation(collation)[1].equals("t")){
321                        //Example: : t. 10
322                        //Series:
323                        series="";
324                        //Volume:
325                        volume="";
326                        //Issue:
327                        issue="";
328                        //Page:
329                        page="";
330                        //Tab/Fig/Pl/Nos: t. 10
331                        tab_or_fig_or_no="t. " + splitCollation(collation)[2];
332                        //Year:
333                        year="";
334                        //Rule: 67
335                        rule="67";
336                }
337                if ((pattern.equals("d: a. d") || pattern.equals("d: a. yyyy") )&& splitCollation(collation)[1].equals("t")){
338                        //Example: 13: t. 1291
339                        //Series:
340                        series="";
341                        //Volume: 13
342                        volume=splitCollation(collation)[0];
343                        //Issue:
344                        issue="";
345                        //Page:
346                        page="";
347                        //Tab/Fig/Pl/Nos: t. 1291
348                        tab_or_fig_or_no="t. " + splitCollation(collation)[2];
349                        //Year:
350                        year="";
351                        //Rule: 7
352                        rule="7";
353                }
354                if (pattern.equals("d(a. d): d")){
355                        //Example:  19(Suppl. 1): 131
356                        //Series:
357                        series="";
358                        //Volume: 19(Suppl. 1)
359                        volume=collation.split(":")[0];
360                        //Issue:
361                        issue="";
362                        //Page: 131
363                        page=splitCollation(collation)[3];
364                        //Tab/Fig/Pl/Nos:
365                        tab_or_fig_or_no="";
366                        //Year:
367                        year="";
368                        //Rule: 65.13
369                        rule="65.13";
370                }
371                if (pattern.equals("d(d, a.): d")){
372                        //Example: 118(1291, Suppl.): 61
373                        //Series:
374                        series="";
375                        //Volume: 118
376                        volume=splitCollation(collation)[0];
377                        //Issue: 1291, Suppl.
378                        issue=splitCollation(collation)[1] + ", " + splitCollation(collation)[2] + ".";
379                        //Page: 61
380                        page=splitCollation(collation)[3];
381                        //Tab/Fig/Pl/Nos:
382                        tab_or_fig_or_no="";
383                        //Year:
384                        year="";
385                        //Rule: 65.14
386                        rule="65.14";
387                }
388                if (pattern.equals("d: d, d")){
389                        //Example: 2: 128, 331
390                        //Series:
391                        series="";
392                        //Volume: 2
393                        volume=splitCollation(collation)[0];
394                        //Issue:
395                        issue="";
396                        //Page: 128, 331
397                        page=splitCollation(collation)[1] + ", " + splitCollation(collation)[2];
398                        //Tab/Fig/Pl/Nos:
399                        tab_or_fig_or_no="";
400                        //Year:
401                        year="";
402                        //Rule: 65.15
403                        rule="65.15";
404                }
405                if (pattern.equals("d(d) a: d") && splitCollation(collation)[2].equals("cppo")){
406                        //Example: 93(1097) cppo: 12
407                        //Series:
408                        series="";
409                        //Volume: 93
410                        volume=splitCollation(collation)[0];
411                        //Issue: 1097 cppo
412                        issue=splitCollation(collation)[1] + " " + splitCollation(collation)[2];
413                        //Page: 12
414                        page=splitCollation(collation)[3];
415                        //Tab/Fig/Pl/Nos:
416                        tab_or_fig_or_no="";
417                        //Year:
418                        year="";
419                        //Rule: 65.17
420                        rule="65.17";
421                }
422                if (pattern.equals(", d(d): d")){
423                        //Example: , 54(5): 88
424                        //Series:
425                        series="";
426                        //Volume: 54
427                        volume=splitCollation(collation)[1];
428                        //Issue: 5
429                        issue=splitCollation(collation)[2];
430                        //Page: 88
431                        page=splitCollation(collation)[3];
432                        //Tab/Fig/Pl/Nos:
433                        tab_or_fig_or_no="";
434                        //Year:
435                        year="";
436                        //Rule: 65.18
437                        rule="65.18";
438                }
439                if (pattern.equals(", r: d")){
440                        //Example: , C: 96
441                        //Series: C
442                        series=splitCollation(collation)[1];
443                        //Volume:
444                        volume="";
445                        //Issue:
446                        issue="";
447                        //Page: 96
448                        page=splitCollation(collation)[2];;
449                        //Tab/Fig/Pl/Nos:
450                        tab_or_fig_or_no="";
451                        //Year:
452                        year="";
453                        //Rule: 65.19
454                        rule="65.19";
455                }
456                if (pattern.equals("d(d a.): d")){
457                        //Example: 40(1 Anh.): 88
458                        //Series:
459                        series="";
460                        //Volume: 40
461                        volume=splitCollation(collation)[0];
462                        //Issue: 1 Anh.
463                        issue=splitCollation(collation)[1] + " " + splitCollation(collation)[2] + ".";
464                        //Page: 88
465                        page=splitCollation(collation)[3];
466                        //Tab/Fig/Pl/Nos:
467                        tab_or_fig_or_no="";
468                        //Year:
469                        year="";
470                        //Rule: 65.22
471                        rule="65.22";
472                }
473                if (pattern.equals(", a.a., a.a., d: d")){
474                        //Example: , n.s., f.m., 1: 107
475                        //Series: n.s., f.m.
476                        series=splitCollation(collation)[1]+"."+splitCollation(collation)[2]+"., "+splitCollation(collation)[3]+"."+splitCollation(collation)[4]+".";
477                        //Volume: 1
478                        volume=splitCollation(collation)[5];
479                        //Issue:
480                        issue="";
481                        //Page: 107
482                        page=splitCollation(collation)[6];
483                        //Tab/Fig/Pl/Nos:
484                        tab_or_fig_or_no="";
485                        //Year:
486                        year="";
487                        //Rule: 65.23
488                        rule="65.23";
489                }
490                if (pattern.equals(", a d: d")){
491                        //Example: , Texte 3: 1149
492                        //Series:
493                        series="";
494                        //Volume: Texte 3
495                        volume=splitCollation(collation)[1] + " " + splitCollation(collation)[2];
496                        //Issue:
497                        issue="";
498                        //Page: 1149
499                        page=splitCollation(collation)[3];
500                        //Tab/Fig/Pl/Nos:
501                        tab_or_fig_or_no="";
502                        //Year:
503                        year="";
504                        //Rule: 65.24
505                        rule="65.24";
506                }
507                if (pattern.equals("d: d, a. d")){
508                        //Example: 9: 388, t. 116
509                        //Series:
510                        series="";
511                        //Volume: 9
512                        volume=splitCollation(collation)[0];
513                        //Issue:
514                        issue="";
515                        //Page: 388
516                        page=splitCollation(collation)[1];
517                        //Tab/Fig/Pl/Nos: t. 116
518                        tab_or_fig_or_no=splitCollation(collation)[2]+". "+splitCollation(collation)[3];
519                        //Year:
520                        year="";
521                        //Rule: 65.25
522                        rule="65.25";
523                }
524                if (pattern.equals(": d, d")){
525                        //Example: : 487, 701
526                        //Series:
527                        series="";
528                        //Volume:
529                        volume="";
530                        //Issue:
531                        issue="";
532                        //Page: 487, 701
533                        page=splitCollation(collation)[1]+", "+splitCollation(collation)[2];
534                        //Tab/Fig/Pl/Nos:
535                        tab_or_fig_or_no="";
536                        //Year:
537                        year="";
538                        //Rule: 65.26
539                        rule="65.26";
540                }
541                if (pattern.equals("yyyy(a.): d")){
542                        //Example: 1857(App.): 4
543                        //Series:
544                        series="";
545                        //Volume: 1857(App.)
546                        volume=collation.split(": ")[0];
547                        //Issue:
548                        issue="";
549                        //Page: 4
550                        page=collation.split(": ")[1];
551                        //Tab/Fig/Pl/Nos:
552                        tab_or_fig_or_no="";
553                        //Year:
554                        year="";
555                        //Rule: 65.29
556                        rule="65.29";
557                }
558                if (pattern.equals(", a. d, d(d): d")){
559                        if (splitCollation(collation)[1].equals("ed")){
560                                //Example: , ed. 3, 1(11-12): 677
561                                //Series: ed. 3
562                                series=splitCollation(collation)[1]+". "+splitCollation(collation)[2];
563                                //Volume: 1
564                                volume=splitCollation(collation)[3];
565                                //Issue: 11-12
566                                issue=splitCollation(collation)[4];
567                                //Page: 677
568                                page=splitCollation(collation)[5];
569                                //Tab/Fig/Pl/Nos:
570                                tab_or_fig_or_no="";
571                                //Year:
572                                year="";
573                                //Rule: 65.30
574                                rule="65.30";
575                        }
576                        else{
577                                //Example: , ser. 3, 1(11-12): 677
578                                //Series: 3
579                                series=splitCollation(collation)[2];
580                                //Volume: 1
581                                volume=splitCollation(collation)[3];
582                                //Issue: 11-12
583                                issue=splitCollation(collation)[4];
584                                //Page: 677
585                                page=splitCollation(collation)[5];
586                                //Tab/Fig/Pl/Nos:
587                                tab_or_fig_or_no="";
588                                //Year:
589                                year="";
590                                //Rule: 65.30
591                                rule="65.30a";                          
592                        }
593                }
594                if (pattern.equals("yyyy-d: d")){
595                        //Example: 2012-39: 35
596                        //Series:
597                        series="";
598                        //Volume: 2012-39
599                        volume=collation.split(": ")[0];
600                        //Issue:
601                        issue="";
602                        //Page: 35
603                        page=collation.split(": ")[1];
604                        //Tab/Fig/Pl/Nos:
605                        tab_or_fig_or_no="";
606                        //Year:
607                        year="";
608                        //Rule: 65.31
609                        rule="65.31";
610                }
611                if (pattern.equals(", a. a.: d") && (splitCollation(collation)[1]+". "+splitCollation(collation)[2]).equals("Spec. No")){
612                        //Example: , Spec. No.: 566
613                        //Series:
614                        series="";
615                        //Volume:
616                        volume="";
617                        //Issue:
618                        issue="";
619                        //Page:
620                        page="";
621                        //Tab/Fig/Pl/Nos: Spec. No. 566
622                        tab_or_fig_or_no="Spec. No. " + splitCollation(collation)[3];
623                        //Year:
624                        year="";
625                        //Rule: 65.32
626                        rule="65.32";
627                }
628                if (pattern.equals(", a. a.: d") && !(splitCollation(collation)[1]+". "+splitCollation(collation)[2]).equals("Spec. No")){
629                        //Example: , ed. rev.: 1439
630                        //Series: ed. rev.
631                        series=splitCollation(collation)[1]+". "+splitCollation(collation)[2]+".";
632                        //Volume:
633                        volume="";
634                        //Issue:
635                        issue="";
636                        //Page: 1439
637                        page=splitCollation(collation)[3];
638                        //Tab/Fig/Pl/Nos:
639                        tab_or_fig_or_no="";
640                        //Year:
641                        year="";
642                        //Rule: 65.33
643                        rule="65.33";
644                }
645                if (pattern.equals(", r, d r a d: d")){
646                        //Example: , IV, 50 II B 21: 242
647                        //Series: 4
648                        series=splitCollation(collation)[1];
649                        //Volume: 50 II B 21
650                        volume=splitCollation(collation)[2]+" "+splitCollation(collation)[3]+" "+splitCollation(collation)[4]+" "+splitCollation(collation)[5];
651                        //Issue:
652                        issue="";
653                        //Page: 242
654                        page=splitCollation(collation)[6];
655                        //Tab/Fig/Pl/Nos:
656                        tab_or_fig_or_no="";
657                        //Year:
658                        year="";
659                        //Rule: 65.34
660                        rule="65.34";
661                }
662                if (pattern.equals(", a. d(d): d")){
663                        //Example: , Suppl. 115(1276): 18
664                        //Series:
665                        series="";
666                        //Volume: Suppl. 115
667                        volume=splitCollation(collation)[1]+". "+splitCollation(collation)[2];
668                        //Issue: 1276
669                        issue=splitCollation(collation)[3];
670                        //Page: 18
671                        page=splitCollation(collation)[4];
672                        //Tab/Fig/Pl/Nos:
673                        tab_or_fig_or_no="";
674                        //Year:
675                        year="";
676                        //Rule: 65.35
677                        rule="65.35";
678                }
679                if (pattern.equals(", a.a., yyyy(d): d")){
680                        //Example: , n.s., 1883(2): 328
681                        //Series: n.s.
682                        series=splitCollation(collation)[1]+"."+splitCollation(collation)[2]+".";
683                        //Volume: 1883
684                        volume=splitCollation(collation)[3];
685                        //Issue: 2
686                        issue=splitCollation(collation)[4];
687                        //Page: 328
688                        page=splitCollation(collation)[5];
689                        //Tab/Fig/Pl/Nos:
690                        tab_or_fig_or_no="";
691                        //Year:
692                        year="";
693                        //Rule: 65.36
694                        rule="65.36";
695                }
696                if (pattern.equals(", r, d: a. d")){
697                        //Example: , V, 7: t. 44
698                        //Series: 5
699                        series=splitCollation(collation)[1];
700                        //Volume: 7
701                        volume=splitCollation(collation)[2];
702                        //Issue:
703                        issue="";
704                        //Page:
705                        page="";
706                        //Tab/Fig/Pl/Nos: t. 44
707                        tab_or_fig_or_no=splitCollation(collation)[3]+". "+splitCollation(collation)[4];
708                        //Year:
709                        year="";
710                        //Rule: 70
711                        rule="70";
712                }
713                if (pattern.equals("d(d,  a.): d")){
714                        //Example: 120(1299,  Suppl.): 49
715                        //Series:
716                        series="";
717                        //Volume: 120
718                        volume=splitCollation(collation)[0];
719                        //Issue: 1299, Suppl.
720                        issue=splitCollation(collation)[1]+", "+splitCollation(collation)[2]+".";
721                        //Page: 49
722                        page=splitCollation(collation)[3];
723                        //Tab/Fig/Pl/Nos:
724                        tab_or_fig_or_no="";
725                        //Year:
726                        year="";
727                        //Rule: 65.37
728                        rule="65.37";
729                }
730                if (pattern.equals("yyyy-yyyy: d")){
731                        //Example: 1927-1929: 9
732                        //Series:
733                        series="";
734                        //Volume: 1927-1929
735                        volume=collation.split(": ")[0];
736                        //Issue:
737                        issue="";
738                        //Page: 9
739                        page=collation.split(": ")[1];
740                        //Tab/Fig/Pl/Nos:
741                        tab_or_fig_or_no="";
742                        //Year:
743                        year="";
744                        //Rule: 65.38
745                        rule="65.38";
746                }
747                if (pattern.equals("d(d; d): d")){
748                        //Example: 24(3; 10): 23
749                        //Series:
750                        series="";
751                        //Volume: 24
752                        volume=splitCollation(collation)[0];
753                        //Issue: 3; 10
754                        issue=splitCollation(collation)[1]+"; "+splitCollation(collation)[2];
755                        //Page: 23
756                        page=splitCollation(collation)[3];
757                        //Tab/Fig/Pl/Nos:
758                        tab_or_fig_or_no="";
759                        //Year:
760                        year="";
761                        //Rule: 65.39
762                        rule="65.39";
763                }
764                if (pattern.equals("d(a. a.): d")){
765                        //Example: 9(Suppl. Bot.): 33
766                        //Series:
767                        series="";
768                        //Volume: 9(Suppl. Bot.)
769                        volume=collation.split(": ")[0];
770                        //Issue:
771                        issue="";
772                        //Page: 33
773                        page=collation.split(": ")[1];
774                        //Tab/Fig/Pl/Nos:
775                        tab_or_fig_or_no="";
776                        //Year:
777                        year="";
778                        //Rule: 65.40
779                        rule="65.40";
780                }
781                if (pattern.equals("d: a.\u00ba d")
782                                || pattern.equals("d: a.\u00ba da")
783                                || pattern.equals("d: a.\u00ba yyyy")
784                                ||pattern.equals("yyyy: a.\u00ba d")
785                                ||pattern.equals("yyyy: a.\u00ba da")
786                                ||pattern.equals("yyyy: a.\u00ba yyyy")){
787                        //Example: 17: n.o 4
788                        //Series:
789                        series="";
790                        //Volume: 17
791                        volume=splitCollation(collation)[0];
792                        //Issue:
793                        issue="";
794                        //Page: 
795                        page="";
796                        //Tab/Fig/Pl/Nos: n.o 4
797                        tab_or_fig_or_no="n.\u00ba " + splitCollation(collation)[2];
798                        //Year:
799                        year="";
800                        //Rule: 73
801                        rule="73";
802                }
803                if (pattern.equals(": a.\u00ba d")
804                                || pattern.equals(": a.\u00ba yyyy")
805                                || pattern.equals(": a.\u00ba da")
806                                || pattern.equals(": a.\u00ba yyyya")){
807                        //Example: ": n.o 4"
808                        //Series:
809                        series="";
810                        //Volume: 
811                        volume="";
812                        //Issue:
813                        issue="";
814                        //Page: 
815                        page="";
816                        //Tab/Fig/Pl/Nos: n.o 4
817                        tab_or_fig_or_no="n.\u00ba " + splitCollation(collation)[2];
818                        //Year:
819                        year="";
820                        //Rule: 73
821                        rule="73.1";
822                }
823                
824                
825                if (pattern.equals(", r, yyyy(d): d")){
826                        //Example: , III, 1893(1): 413
827                        //Series: 3
828                        series=splitCollation(collation)[1];
829                        //Volume: 1893
830                        volume=splitCollation(collation)[2];
831                        //Issue: 1
832                        issue=splitCollation(collation)[3];
833                        //Page: 413
834                        page=splitCollation(collation)[4];
835                        //Tab/Fig/Pl/Nos:
836                        tab_or_fig_or_no="";
837                        //Year:
838                        year="";
839                        //Rule: 65.41
840                        rule="65.41";
841                }
842                if (pattern.equals("d: a.a.")){
843                        //Example: 339: s.p.
844                        //Series:
845                        series="";
846                        //Volume: 339
847                        volume=splitCollation(collation)[0];
848                        //Issue:
849                        issue="";
850                        //Page: s.p.
851                        page=splitCollation(collation)[1]+"."+splitCollation(collation)[2]+".";
852                        //Tab/Fig/Pl/Nos:
853                        tab_or_fig_or_no="";
854                        //Year:
855                        year="";
856                        //Rule: 74
857                        rule="74";
858                }
859                if (pattern.equals("d: a. d, a. d")){
860                        //Example: 47: t. 2113, p. 5
861                        //Series:
862                        series="";
863                        //Volume: 47
864                        volume=splitCollation(collation)[0];
865                        //Issue:
866                        issue="";
867                        //Page: t. 2113, p. 5
868                        // CHECK!!
869                        page=collation.split(": ")[1];
870                        //Tab/Fig/Pl/Nos:
871                        tab_or_fig_or_no="";
872                        //Year:
873                        year="";
874                        //Rule: 75
875                        rule="75";
876                }
877                if (pattern.equals(", a. a. a.: d")){
878                        //Example: , Prodr. Fl. Cap.: 5
879                        //Series: Prodr. Fl. Cap.
880                        series=splitCollation(collation)[1]+". "+splitCollation(collation)[2]+". "+splitCollation(collation)[3]+".";
881                        //Volume:
882                        volume="";
883                        //Issue:
884                        issue="";
885                        //Page: 5
886                        page=splitCollation(collation)[4];
887                        //Tab/Fig/Pl/Nos:
888                        tab_or_fig_or_no="";
889                        //Year:
890                        year="";
891                        //Rule: 65.42
892                        rule="65.42";
893                }
894                if (pattern.equals("d(d: d): d")){
895                        //Example: 9(225: 1): 267
896                        //Series:
897                        series="";
898                        //Volume: 9
899                        volume=splitCollation(collation)[0];
900                        //Issue: 255:1
901                        issue=splitCollation(collation)[1]+":"+splitCollation(collation)[2];
902                        //Page: 267
903                        page=splitCollation(collation)[3];
904                        //Tab/Fig/Pl/Nos:
905                        tab_or_fig_or_no="";
906                        //Year:
907                        year="";
908                        //Rule: 65.44
909                        rule="65.44";
910                }
911                if (pattern.equals(", a: d")){
912                        //Example: , Atlas: 13
913                        //Series: Atlas
914                        series=splitCollation(collation)[1];
915                        //Volume:
916                        volume="";
917                        //Issue:
918                        issue="";
919                        //Page: 13
920                        page=splitCollation(collation)[2];
921                        //Tab/Fig/Pl/Nos:
922                        tab_or_fig_or_no="";
923                        //Year:
924                        year="";
925                        //Rule: 65.45
926                        rule="65.45";
927                }
928                if (pattern.equals("d(d): a. d")){
929                        //Example: 1(2): t. 20
930                        //Series:
931                        series="";
932                        //Volume: 1
933                        volume=splitCollation(collation)[0];
934                        //Issue: 2
935                        issue=splitCollation(collation)[1];
936                        //Page:
937                        page="";
938                        //Tab/Fig/Pl/Nos: t. 20
939                        tab_or_fig_or_no=splitCollation(collation)[2]+". "+splitCollation(collation)[3];
940                        //Year:
941                        year="";
942                        //Rule: 77
943                        rule="77";
944                }
945                if (pattern.equals(", a.a., yyyy: d")){
946                        //Example: , n.s., 1875: 162
947                        //Series: n.s.
948                        series=splitCollation(collation)[1]+"."+splitCollation(collation)[2]+".";
949                        //Volume: 1875
950                        volume=splitCollation(collation)[3];
951                        //Issue:
952                        issue="";
953                        //Page: 162
954                        page=splitCollation(collation)[4];
955                        //Tab/Fig/Pl/Nos:
956                        tab_or_fig_or_no="";
957                        //Year:
958                        year="";
959                        //Rule: 65.46
960                        rule="65.46";
961                }
962                if (pattern.equals(", r, yyyy: d")){
963                        //Example: , III, 1893: 629
964                        //Series: 3
965                        series=splitCollation(collation)[1];
966                        //Volume: 1893
967                        volume=splitCollation(collation)[2];
968                        //Issue:
969                        issue="";
970                        //Page: 629
971                        page=splitCollation(collation)[3];
972                        //Tab/Fig/Pl/Nos:
973                        tab_or_fig_or_no="";
974                        //Year:
975                        year="";
976                        //Rule: 65.47
977                        rule="65.47";
978                }
979                if (pattern.equals("d(a): d")){
980                        //Example: 15(Extra): 408
981                        //Series:
982                        series="";
983                        //Volume: 15(Extra)
984                        volume=collation.split(": ")[0];
985                        //Issue:
986                        issue="";
987                        //Page: 408
988                        page=collation.split(": ")[1];
989                        //Tab/Fig/Pl/Nos:
990                        tab_or_fig_or_no="";
991                        //Year:
992                        year="";
993                        //Rule: 65.50
994                        rule="65.50";
995                }
996                if (pattern.equals(", r, d r: d")){
997                        //Example: , IV, 243 II: 95
998                        //Series: 4
999                        series=splitCollation(collation)[1];
1000                        //Volume: 243 II
1001                        volume=splitCollation(collation)[2]+" "+splitCollation(collation)[3];
1002                        //Issue:
1003                        issue="";
1004                        //Page: 95
1005                        page=splitCollation(collation)[4];
1006                        //Tab/Fig/Pl/Nos:
1007                        tab_or_fig_or_no="";
1008                        //Year:
1009                        year="";
1010                        //Rule: 65.51
1011                        rule="65.51";
1012                }
1013                if (pattern.equals("d:")){
1014                        //Example: 7:
1015                        //Series:
1016                        series="";
1017                        //Volume: 7
1018                        volume=splitCollation(collation)[0];
1019                        //Issue:
1020                        issue="";
1021                        //Page:
1022                        page="";
1023                        //Tab/Fig/Pl/Nos:
1024                        tab_or_fig_or_no="";
1025                        //Year:
1026                        year="";
1027                        //Rule: 65.53
1028                        rule="65.53";
1029                }
1030                
1031                String [] values = {series, volume, issue, page, tab_or_fig_or_no, year, rule};
1032                return values;
1033        }
1034        
1035        public static void main(String[] args) {
1036                // Tab separated file of id and collation
1037                String inputfile = args[0];
1038                String outputfile = args[1];
1039
1040                try(BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(inputfile),"utf8"));
1041                                FileWriter fw = new FileWriter(outputfile);
1042                                BufferedWriter bw = new BufferedWriter(fw);) {
1043                int count = 0;
1044
1045                
1046                        String line = null;
1047                while ((line = br.readLine()) != null) {
1048                        if ((count++ % 10000) == 0){
1049                                System.out.println(count);
1050                        }
1051                        String[] elems = line.split("\t");
1052                        String id = elems[0];
1053                        if (elems.length > 1){
1054                                String collation = elems[1];
1055                                
1056                                String structure = new CollationStructureTransformer().transform(collation);
1057                                
1058                                String[] parsed = CollationUtils.parseCollation(collation);
1059                                
1060                                bw.write(id 
1061                                                + "\t" + collation
1062                                                + "\t" + structure
1063                                                + "\t" + parsed[SERIES_INDEX]
1064                                                + "\t" + parsed[VOL_INDEX]
1065                                                + "\t" + parsed[ISSUE_INDEX]
1066                                                + "\t" + parsed[PAGE_INDEX]
1067                                                + "\t" + parsed[TAB_OR_FIG_INDEX]
1068                                                + "\t" + parsed[YEAR_INDEX]
1069                                                + "\t" + CollationUtils.parsableCollation(collation)
1070                                                + "\t" + parsed[RULE_INDEX]
1071                                                + "\n");
1072                        }
1073                        else{
1074                                bw.write(id + "\n");
1075                        }
1076                }
1077                bw.flush();
1078                bw.close();
1079                }
1080                catch(Exception e){
1081                        e.printStackTrace();
1082                }
1083        }
1084
1085}