001    /**-
002     * Copyright (c) 2006 Hugo Zaragoza and Jose R. Pérez-Agüera
003     * All rights reserved.
004     *
005     * Redistribution and use in source and binary forms, with or without
006     * modification, are permitted provided that the following conditions
007     * are met:
008     * 1. Redistributions of source code must retain the above copyright
009     *    notice, this list of conditions and the following disclaimer.
010     * 2. Redistributions in binary form must reproduce the above copyright
011     *    notice, this list of conditions and the following disclaimer in the
012     *    documentation and/or other materials provided with the distribution.
013     * 3. Neither the name of copyright holders nor the names of its
014     *    contributors may be used to endorse or promote products derived
015     *    from this software without specific prior written permission.
016     *
017     * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
018     * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019     * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
020     * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS
021     * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
022     * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
023     * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
024     * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
025     * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
026     * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
027     * POSSIBILITY OF SUCH DAMAGE.
028     */
029    package jcolibri.extensions.textual.lucene.spanish;
030    
031    import java.io.IOException;
032    
033    import net.sf.snowball.ext.SpanishStemmer;
034    
035    import org.apache.lucene.analysis.Token;
036    import org.apache.lucene.analysis.TokenFilter;
037    import org.apache.lucene.analysis.TokenStream;
038    
039    /**
040     * Spanish Stemmer Filter
041     * @author Hugo Zaragoza and Jose R. Pérez-Agüera
042     */
043    public class SpanishStemmerFilter extends TokenFilter {
044    
045            private SpanishStemmer stemmer;
046            
047            public SpanishStemmerFilter(TokenStream in)
048            {
049                    super(in);
050                    this.stemmer = new SpanishStemmer();
051            }
052    
053            public final Token next() throws IOException {
054                    Token t = input.next();
055    
056                    if (t == null)
057                            return null;
058                    
059                this.stemmer.setCurrent(t.termText());
060                this.stemmer.stem();            
061                    String text = stemmer.getCurrent();
062                    String type = t.type();
063                    Token tokenStem = new Token(text, t.startOffset(), t.endOffset(), type);
064                    
065                    return tokenStem;
066              }
067    }