Class: Ferret::Analysis::StopFilter

Summary

A StopFilter filters *stop-words* from a TokenStream. Stop-words are words that you don‘t wish to be index. Usually they will be common words like "the" and "and" although you can specify whichever words you want.

Example

  ["the", "pig", "and", "whistle"] => ["pig", "whistle"]

Public Class Methods


StopFilter.new(token_stream) → token_stream
StopFilter.new(token_stream, ["the", "and", "it"]) → token_stream

Create an StopFilter which removes *stop-words* from a TokenStream. You can optionally specify the stopwords you wish to have removed.

token_stream:TokenStream to be filtered
stop_words:Array of *stop-words* you wish to be filtered out. This defaults to a list of English stop-words. The Ferret::Analysis contains a number of stop-word lists.
/* 
 *  call-seq:
 *     StopFilter.new(token_stream) -> token_stream
 *     StopFilter.new(token_stream, ["the", "and", "it"]) -> token_stream
 *
 *  Create an StopFilter which removes *stop-words* from a TokenStream. You can
 *  optionally specify the stopwords you wish to have removed.
 *
 *  token_stream:: TokenStream to be filtered
 *  stop_words::   Array of *stop-words* you wish to be filtered out. This
 *                 defaults to a list of English stop-words. The
 *                 Ferret::Analysis contains a number of stop-word lists.
 */
static VALUE
frt_stop_filter_init(int argc, VALUE *argv, VALUE self) 
{
    VALUE rsub_ts, rstop_words;
    TokenStream *ts;
    rb_scan_args(argc, argv, "11", &rsub_ts, &rstop_words);
    ts = frt_get_cwrapped_rts(rsub_ts);
    if (rstop_words != Qnil) {
        char **stop_words = get_stopwords(rstop_words);
        ts = stop_filter_new_with_words(ts, (const char **)stop_words);

        free(stop_words);
    } else {
        ts = stop_filter_new(ts);
    }
    object_add(&(TkFilt(ts)->sub_ts), rsub_ts);

    Frt_Wrap_Struct(self, &frt_tf_mark, &frt_tf_free, ts);
    object_add(ts, self);
    return self;
}