Lucene 词法分析器代码
作者:admin 日期:2006-09-07
/*
* Created on 2006-8-26
*
*/
package com.xdtech.util.lucene;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
/**
* @author jaddy0302
*
*/
public class XDChineseAnalyzer extends Analyzer {
private Set stopWords;
/**
* 停止词
*/
public static final String[] STOP_WORDS = { "a", "as", "at", "be", "but",
"by","these", "they", "this", "to", "was", "for", "if", "in", "into", "is", "it",
"no", "such", "t", "that", "the", "or","an", "and", "are", "s", "there",
"their", "then","not", "of", "on", "will", "with", "是", "的", "不", "可", "好", "无" };
public XDChineseAnalyzer() {
stopWords = StopFilter.makeStopSet(STOP_WORDS);
}
public XDChineseAnalyzer(String[] stopWords) {
this.stopWords = StopFilter.makeStopSet(stopWords);
}
public final TokenStream tokenStream(String filename, Reader reader) {
return new StopFilter(new XDChineseTokenizer(reader), stopWords);
}
}
Tags: Lucene 词法 分析器 代码
北京线点科技 致力于以数据和搜索为核心的业务 (全文检索、舆情监控、搜索引擎产品)http://www.xd-tech.com
- 1







