001/*
002 * Copyright © 2012, 2013, 2014 Royal Botanic Gardens, Kew.
003 *
004 * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
005 *
006 * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
007 *
008 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
009 */
010package org.kew.rmf.transformers;
011
012import java.util.HashMap;
013import java.util.Map;
014import java.util.regex.Pattern;
015
016/**
017 * This transformer splits a string into a series of words. The word delimiter is any 
018 * sequence of non-alphanumeric characters. It then iterates over the "words" and 
019 * converts any Roman numerals to their Arabic equivalent, then concatenates these 
020 * converted words back into a single string, using the space character to separate words.
021 */
022public class RomanNumeralTransformer implements Transformer {
023
024        private final Pattern nonAsciiAlphanumeric = Pattern.compile("[^A-Za-z0-9]");
025        private final Pattern multipleWhitespace = Pattern.compile("\\s+");
026
027        private final Map<String,String> map = new HashMap<String,String>();
028        private final static String[] BASIC_ROMAN_NUMBERS = { "M", "CM", "D", "CD", "C", "XC", "L", "XL", "X", "IX", "V", "IV", "I" };
029        private final static int[] BASIC_VALUES = { 1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1 };
030
031        public RomanNumeralTransformer() {
032                for (int i = 1; i <= 5000; i++) {
033                        map.put(toRomanValue(i), Integer.toString(i));
034                }
035        }
036
037        public static String toRomanValue(int arabicValue) {
038                StringBuffer sb = new StringBuffer();
039                int remainder = arabicValue;
040                for (int i = 0; i < BASIC_VALUES.length; i++) {
041                        while (remainder >= BASIC_VALUES[i]) {
042                                sb.append(BASIC_ROMAN_NUMBERS[i]);
043                                remainder -= BASIC_VALUES[i];
044                        }
045                }
046                return sb.toString();
047        }
048
049        @Override
050        public String transform(String s) {
051                String[] words = multipleWhitespace.matcher(nonAsciiAlphanumeric.matcher(s).replaceAll(" ")).replaceAll(" ").split(" ");
052                String[] converted_words = new String[words.length];
053                for (int i = 0; i < words.length; i++) {
054                        String roman = map.get(words[i].toUpperCase());
055                        if (roman != null) {
056                                converted_words[i] = roman;
057                        }
058                        else {
059                                converted_words[i] = words[i];
060                        }
061                }
062                StringBuffer sb = new StringBuffer();
063                for (String converted_word : converted_words) {
064                        if (sb.length() > 0) {
065                                sb.append(" ");
066                        }
067                        sb.append(converted_word);
068                }
069                return sb.toString();
070        }
071}