Class: Ferret::Analysis::TokenStream
Summary
A TokenStream enumerates the sequence of tokens, either from fields of a document or from query text.
This is an abstract class. Concrete subclasses are:
| Tokenizer: | a TokenStream whose input is a string |
| TokenFilter: | a TokenStream whose input is another TokenStream |
Public Instance Methods
token_stream.next → token
Return the next token from the TokenStream or nil if there are no more tokens.
/*
* call-seq:
* token_stream.next -> token
*
* Return the next token from the TokenStream or nil if there are no more
* tokens.
*/
static VALUE
frt_ts_next(VALUE self)
{
TokenStream *ts;
Token *next;
GET_TS(ts, self);
next = ts->next(ts);
if (next == NULL) {
return Qnil;
}
return get_token(next);
}
token_stream.text = text → text
Return the text that the TokenStream is tokenizing
/*
* call-seq:
* token_stream.text = text -> text
*
* Return the text that the TokenStream is tokenizing
*/
static VALUE
frt_ts_get_text(VALUE self)
{
VALUE rtext = Qnil;
TokenStream *ts;
Data_Get_Struct(self, TokenStream, ts);
if ((rtext = object_get(&ts->text)) == Qnil) {
if (ts->text) {
rtext = rb_str_new2(ts->text);
object_set(&ts->text, rtext);
}
}
return rtext;
}
token_stream.text = text → text
Set the text attribute of the TokenStream to the text you wish to be tokenized. For example, you may do this;
token_stream.text = File.read(file_name)
/*
* call-seq:
* token_stream.text = text -> text
*
* Set the text attribute of the TokenStream to the text you wish to be
* tokenized. For example, you may do this;
*
* token_stream.text = File.read(file_name)
*/
static VALUE
frt_ts_set_text(VALUE self, VALUE rtext)
{
TokenStream *ts;
Data_Get_Struct(self, TokenStream, ts);
StringValue(rtext);
ts->reset(ts, rs2s(rtext));
/* prevent garbage collection */
rb_ivar_set(self, id_text, rtext);
return rtext;
}