Class: Ferret::Analysis::StandardAnalyzer
Summary
The StandardAnalyzer is the most advanced of the available analyzers. If it were implemented in Ruby it would look like this;
class StandardAnalyzer
def initialize(stop_words = FULL_ENGLISH_STOP_WORDS, lower = true)
@lower = lower
@stop_words = stop_words
end
def token_stream(field, str)
ts = StandardTokenizer.new(str)
ts = LowerCaseFilter.new(ts) if @lower
ts = StopFilter.new(ts, @stop_words)
ts = HyphenFilter.new(ts)
end
end
As you can see it makes use of the StandardTokenizer and you can also add your own list of stopwords if you wish.
Public Class Methods
StandardAnalyzer.new(stop_words = FULL_ENGLISH_STOP_WORDS, lower=true)
→ analyzer
Create a new StandardAnalyzer which downcases tokens by default but can optionally leave case as is. Lowercasing will be done based on the current locale. You can also set the list of stop-words to be used by the StopFilter.
| lower: | set to false if you don‘t want the field‘s tokens to be downcased |
| stop_words: | list of stop-words to pass to the StopFilter |
/*
* call-seq:
* StandardAnalyzer.new(stop_words = FULL_ENGLISH_STOP_WORDS, lower=true)
* -> analyzer
*
* Create a new StandardAnalyzer which downcases tokens by default but can
* optionally leave case as is. Lowercasing will be done based on the current
* locale. You can also set the list of stop-words to be used by the
* StopFilter.
*
* lower:: set to false if you don't want the field's tokens to be downcased
* stop_words:: list of stop-words to pass to the StopFilter
*/
static VALUE
frt_standard_analyzer_init(int argc, VALUE *argv, VALUE self)
{
bool lower;
VALUE rlower, rstop_words;
Analyzer *a;
#ifndef POSH_OS_WIN32
if (!frt_locale) frt_locale = setlocale(LC_CTYPE, "");
#endif
rb_scan_args(argc, argv, "02", &rstop_words, &rlower);
lower = ((rlower == Qnil) ? true : RTEST(rlower));
if (rstop_words != Qnil) {
char **stop_words = get_stopwords(rstop_words);
a = mb_standard_analyzer_new_with_words((const char **)stop_words, lower);
free(stop_words);
} else {
a = mb_standard_analyzer_new(lower);
}
Frt_Wrap_Struct(self, NULL, &frt_analyzer_free, a);
object_add(a, self);
return self;
}