001/* 002 * Copyright © 2012, 2013, 2014 Royal Botanic Gardens, Kew. 003 * 004 * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 005 * 006 * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 007 * 008 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 009 */ 010package org.kew.rmf.transformers; 011 012/** 013 * Removes all text in brackets (round, square and curly) incl. the brackets. 014 */ 015public class RemoveBracketedTextTransformer implements Transformer { 016 017 private SqueezeWhitespaceTransformer whitespaceSqueezer = new SqueezeWhitespaceTransformer(); 018 019 @Override 020 public String transform(String s) { 021 if (s == null) return null; 022 023 char[] original = s.toCharArray(); 024 char[] stripped = new char[original.length]; 025 026 int pos = 0; 027 028 int count = 0; 029 030 char starter = 0; 031 char terminator = 0; 032 033 for (int i = 0; i < original.length; i++) { 034 boolean closed = false; 035 036 // If I'm inside some brackets... 037 if (count > 0) { 038 039 // Check for opening brackets of the same kind 040 if (original[i] == starter) { 041 count++; 042 } 043 // Or closing brackets of the right kind 044 else if (original[i] == terminator) { 045 // Brackets are only ending if this reduces the count to zero 046 count--; 047 048 // But that's only the end if we're out of nested brackets 049 if (count == 0) { 050 closed = true; 051 } 052 } 053 } 054 // If I'm not inside, check for a new sequence and set the terminator. 055 else { 056 switch (original[i]) { 057 case '(': 058 count++; 059 starter = '('; 060 terminator = ')'; 061 break; 062 case '[': 063 count++; 064 starter = '['; 065 terminator = ']'; 066 break; 067 case '{': 068 count++; 069 starter = '{'; 070 terminator = '}'; 071 break; 072 default: 073 } 074 } 075 076 if (count > 0 || closed) { 077 // Ignore this character 078 } 079 else { 080 stripped[pos++] = original[i]; 081 } 082 } 083 084 String strippedString = new String(stripped); 085 086 return whitespaceSqueezer.transform(strippedString); 087 } 088}