/**********************************************************************
 *
 * GS2Analyzer.java 
 *
 * Copyright 2004 The New Zealand Digital Library Project
 *
 * A component of the Greenstone digital library software
 * from the New Zealand Digital Library Project at the
 * University of Waikato, New Zealand.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *********************************************************************/
package org.greenstone.LuceneWrapper4;


import java.io.*;
import java.util.Set;

import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.core.*;  // StopFilter, LowerCaseFilter, (StandardFilter)
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.*;

import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;

import org.apache.lucene.util.Version;


public class GS2Analyzer extends GS2StandardAnalyzer
{
    public GS2Analyzer() 
    {
	super(GSLuceneConstants.MATCH_VERSION);
    }

    public GS2Analyzer(String [] stopwords) 
    {
	super(GSLuceneConstants.MATCH_VERSION,StopFilter.makeStopSet(GSLuceneConstants.MATCH_VERSION, stopwords));
    }

  @Override
  protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
    final StandardTokenizer src = new StandardTokenizer(GSLuceneConstants.MATCH_VERSION, reader);
    src.setMaxTokenLength(maxTokenLength);
    //src.setReplaceInvalidAcronym(replaceInvalidAcronym); // now true by default, see http://lucene.apache.org/core/3_0_3/api/all/org/apache/lucene/analysis/standard/StandardTokenizer.html
    TokenStream tok = new StandardFilter(GSLuceneConstants.MATCH_VERSION, src);
    tok = new LowerCaseFilter(GSLuceneConstants.MATCH_VERSION, tok);
    tok = new StopFilter(GSLuceneConstants.MATCH_VERSION, tok, stopwords);

    // top it up with accent folding
    tok = new ASCIIFoldingFilter(tok);

    return new TokenStreamComponents(src, tok) {
      @Override
      protected void setReader(final Reader reader) throws IOException {
	  
	  // Previously called reset(Reader), now called setReader(Reader), but with mostly the same method description:
	  // https://lucene.apache.org/core/3_6_0/api/all/org/apache/lucene/analysis/ReusableAnalyzerBase.TokenStreamComponents.html
	  // http://lucene.apache.org/core/4_8_1/core/org/apache/lucene/analysis/Analyzer.TokenStreamComponents.html
	  // New method should throw an exception, not return false if unable to reset

        src.setMaxTokenLength(GS2Analyzer.this.maxTokenLength);
        super.setReader(reader);
      }
    };
  }

}


