001 /**- 002 * Copyright (c) 2006 Hugo Zaragoza and Jose R. Pérez-Agüera 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without 006 * modification, are permitted provided that the following conditions 007 * are met: 008 * 1. Redistributions of source code must retain the above copyright 009 * notice, this list of conditions and the following disclaimer. 010 * 2. Redistributions in binary form must reproduce the above copyright 011 * notice, this list of conditions and the following disclaimer in the 012 * documentation and/or other materials provided with the distribution. 013 * 3. Neither the name of copyright holders nor the names of its 014 * contributors may be used to endorse or promote products derived 015 * from this software without specific prior written permission. 016 * 017 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 018 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 019 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 020 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS 021 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 022 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 023 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 024 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 025 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 026 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 027 * POSSIBILITY OF SUCH DAMAGE. 028 */ 029 package jcolibri.extensions.textual.lucene.spanish; 030 031 import java.io.IOException; 032 033 import net.sf.snowball.ext.SpanishStemmer; 034 035 import org.apache.lucene.analysis.Token; 036 import org.apache.lucene.analysis.TokenFilter; 037 import org.apache.lucene.analysis.TokenStream; 038 039 /** 040 * Spanish Stemmer Filter 041 * @author Hugo Zaragoza and Jose R. Pérez-Agüera 042 */ 043 public class SpanishStemmerFilter extends TokenFilter { 044 045 private SpanishStemmer stemmer; 046 047 public SpanishStemmerFilter(TokenStream in) 048 { 049 super(in); 050 this.stemmer = new SpanishStemmer(); 051 } 052 053 public final Token next() throws IOException { 054 Token t = input.next(); 055 056 if (t == null) 057 return null; 058 059 this.stemmer.setCurrent(t.termText()); 060 this.stemmer.stem(); 061 String text = stemmer.getCurrent(); 062 String type = t.type(); 063 Token tokenStem = new Token(text, t.startOffset(), t.endOffset(), type); 064 065 return tokenStem; 066 } 067 }