001/*
002 * Copyright © 2012, 2013, 2014 Royal Botanic Gardens, Kew.
003 *
004 * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
005 *
006 * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
007 *
008 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
009 */
010package org.kew.rmf.transformers;
011
012/**
013 * Removes all text in brackets (round, square and curly) incl. the brackets.
014 */
015public class RemoveBracketedTextTransformer implements Transformer {
016
017        private SqueezeWhitespaceTransformer whitespaceSqueezer = new SqueezeWhitespaceTransformer();
018
019        @Override
020        public String transform(String s) {
021                if (s == null) return null;
022
023                char[] original = s.toCharArray();
024                char[] stripped = new char[original.length];
025
026                int pos = 0;
027
028                int count = 0;
029
030                char starter = 0;
031                char terminator = 0;
032
033                for (int i = 0; i < original.length; i++) {
034                        boolean closed = false;
035
036                        // If I'm inside some brackets...
037                        if (count > 0) {
038
039                                // Check for opening brackets of the same kind
040                                if (original[i] == starter) {
041                                        count++;
042                                }
043                                // Or closing brackets of the right kind
044                                else if (original[i] == terminator) {
045                                        // Brackets are only ending if this reduces the count to zero
046                                        count--;
047
048                                        // But that's only the end if we're out of nested brackets
049                                        if (count == 0) {
050                                                closed = true;
051                                        }
052                                }
053                        }
054                        // If I'm not inside, check for a new sequence and set the terminator.
055                        else {
056                                switch (original[i]) {
057                                case '(':
058                                        count++;
059                                        starter = '(';
060                                        terminator = ')';
061                                        break;
062                                case '[':
063                                        count++;
064                                        starter = '[';
065                                        terminator = ']';
066                                        break;
067                                case '{':
068                                        count++;
069                                        starter = '{';
070                                        terminator = '}';
071                                        break;
072                                default:
073                                }
074                        }
075
076                        if (count > 0 || closed) {
077                                // Ignore this character
078                        }
079                        else {
080                                stripped[pos++] = original[i];
081                        }
082                }
083
084                String strippedString = new String(stripped);
085
086                return whitespaceSqueezer.transform(strippedString);
087        }
088}