public abstract class CompoundWordTokenFilterBase
extends org.apache.lucene.analysis.TokenFilter
| Modifier and Type | Field and Description |
|---|---|
static int |
DEFAULT_MAX_SUBWORD_SIZE
The default for maximal length of subwords that get propagated to the output of this filter
|
static int |
DEFAULT_MIN_SUBWORD_SIZE
The default for minimal length of subwords that get propagated to the output of this filter
|
static int |
DEFAULT_MIN_WORD_SIZE
The default for minimal word length that gets decomposed
|
protected org.apache.lucene.analysis.CharArraySet |
dictionary |
protected int |
maxSubwordSize |
protected int |
minSubwordSize |
protected int |
minWordSize |
protected boolean |
onlyLongestMatch |
protected java.util.LinkedList |
tokens |
| Modifier | Constructor and Description |
|---|---|
protected |
CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
java.util.Set dictionary) |
protected |
CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
java.util.Set dictionary,
boolean onlyLongestMatch) |
protected |
CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
java.util.Set dictionary,
int minWordSize,
int minSubwordSize,
int maxSubwordSize,
boolean onlyLongestMatch) |
protected |
CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
java.lang.String[] dictionary) |
protected |
CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
java.lang.String[] dictionary,
boolean onlyLongestMatch) |
protected |
CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
java.lang.String[] dictionary,
int minWordSize,
int minSubwordSize,
int maxSubwordSize,
boolean onlyLongestMatch) |
| Modifier and Type | Method and Description |
|---|---|
protected static void |
addAllLowerCase(java.util.Set target,
java.util.Collection col) |
protected org.apache.lucene.analysis.Token |
createToken(int offset,
int length,
org.apache.lucene.analysis.Token prototype) |
protected void |
decompose(org.apache.lucene.analysis.Token token) |
protected abstract void |
decomposeInternal(org.apache.lucene.analysis.Token token) |
boolean |
incrementToken() |
static java.util.Set |
makeDictionary(java.lang.String[] dictionary)
Create a set of words from an array
The resulting Set does case insensitive matching
TODO We should look for a faster dictionary lookup approach.
|
protected static char[] |
makeLowerCaseCopy(char[] buffer) |
org.apache.lucene.analysis.Token |
next()
Deprecated.
Will be removed in Lucene 3.0. This method is final, as it should
not be overridden. Delegates to the backwards compatibility layer.
|
org.apache.lucene.analysis.Token |
next(org.apache.lucene.analysis.Token reusableToken)
Deprecated.
Will be removed in Lucene 3.0. This method is final, as it should
not be overridden. Delegates to the backwards compatibility layer.
|
void |
reset() |
getOnlyUseNewAPI, setOnlyUseNewAPIaddAttribute, addAttributeImpl, captureState, clearAttributes, cloneAttributes, equals, getAttribute, getAttributeClassesIterator, getAttributeFactory, getAttributeImplsIterator, hasAttribute, hasAttributes, hashCode, restoreState, toStringpublic static final int DEFAULT_MIN_WORD_SIZE
public static final int DEFAULT_MIN_SUBWORD_SIZE
public static final int DEFAULT_MAX_SUBWORD_SIZE
protected final org.apache.lucene.analysis.CharArraySet dictionary
protected final java.util.LinkedList tokens
protected final int minWordSize
protected final int minSubwordSize
protected final int maxSubwordSize
protected final boolean onlyLongestMatch
protected CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
java.lang.String[] dictionary,
int minWordSize,
int minSubwordSize,
int maxSubwordSize,
boolean onlyLongestMatch)
protected CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
java.lang.String[] dictionary,
boolean onlyLongestMatch)
protected CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
java.util.Set dictionary,
boolean onlyLongestMatch)
protected CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
java.lang.String[] dictionary)
protected CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
java.util.Set dictionary)
protected CompoundWordTokenFilterBase(org.apache.lucene.analysis.TokenStream input,
java.util.Set dictionary,
int minWordSize,
int minSubwordSize,
int maxSubwordSize,
boolean onlyLongestMatch)
public static final java.util.Set makeDictionary(java.lang.String[] dictionary)
dictionary - Set of lowercased termspublic final boolean incrementToken()
throws java.io.IOException
incrementToken in class org.apache.lucene.analysis.TokenStreamjava.io.IOExceptionpublic final org.apache.lucene.analysis.Token next(org.apache.lucene.analysis.Token reusableToken)
throws java.io.IOException
next in class org.apache.lucene.analysis.TokenStreamjava.io.IOExceptionpublic final org.apache.lucene.analysis.Token next()
throws java.io.IOException
next in class org.apache.lucene.analysis.TokenStreamjava.io.IOExceptionprotected static final void addAllLowerCase(java.util.Set target,
java.util.Collection col)
protected static char[] makeLowerCaseCopy(char[] buffer)
protected final org.apache.lucene.analysis.Token createToken(int offset,
int length,
org.apache.lucene.analysis.Token prototype)
protected void decompose(org.apache.lucene.analysis.Token token)
protected abstract void decomposeInternal(org.apache.lucene.analysis.Token token)
public void reset()
throws java.io.IOException
reset in class org.apache.lucene.analysis.TokenFilterjava.io.IOExceptionCopyright © 2000-2016 Apache Software Foundation. All Rights Reserved.