Changeset a249f609ee1784b37b8bce802dfa88406cc2c3fd

Show
Ignore:
Timestamp:
06/22/08 15:15:45 (7 months ago)
Author:
David Balmain <dbalmain@…>
Parents:
97564968f9aed076ddedd60a97b504b972e3f7fc
Children:
ff6f080b9c3c3ca054c9bbce77571f813aee457a
git-committer:
David Balmain <dbalmain@gmail.com> / 2008-06-22T15:15:45Z+1000
Message:

Added term frequency field to TVTerm

* You can now found out the frequency of a term in a document without having to

store offsets or positions.

Location:
ruby
Files:
3 modified

Legend:

Unmodified
Added
Removed
  • ruby/ext/r_index.c

    rb41949 ra249f6  
    12281228        RARRAY(rpositions)->len = freq; 
    12291229    } 
    1230     return rb_struct_new(cTVTerm, rtext, rpositions, NULL); 
     1230    return rb_struct_new(cTVTerm, rtext, INT2FIX(freq), rpositions, NULL); 
    12311231} 
    12321232 
     
    30733073    cTVTerm = rb_define_class_under(cTermVector, "TVTerm", rb_cObject); 
    30743074    */ 
    3075     cTVTerm = rb_struct_define(tv_term_class, "text", "positions", NULL); 
     3075    cTVTerm = rb_struct_define(tv_term_class, "text", "freq", "positions", NULL); 
    30763076    rb_set_class_path(cTVTerm, cTermVector, tv_term_class); 
    30773077    rb_const_set(mIndex, rb_intern(tv_term_class), cTVTerm); 
  • ruby/lib/ferret/index.rb

    r975649 ra249f6  
    491491          term_doc_enum = @reader.term_docs_for(@id_field, id.to_s) 
    492492          if term_doc_enum.next? 
    493             id = @reader[term_doc_enum.doc] 
     493            id = term_doc_enum.doc 
    494494          else 
    495495            return nil 
  • ruby/test/unit/index/tc_index_reader.rb

    r905f16 ra249f6  
    192192    expected_tv = TermVector.new(:body, 
    193193      [ 
    194         TVTerm.new("word1", [2, 4, 7]), 
    195         TVTerm.new("word2", [3]), 
    196         TVTerm.new("word3", [0, 5, 8, 9]), 
    197         TVTerm.new("word4", [1, 6]) 
     194        TVTerm.new("word1", 3, [2, 4, 7]), 
     195        TVTerm.new("word2", 1, [3]), 
     196        TVTerm.new("word3", 4, [0, 5, 8, 9]), 
     197        TVTerm.new("word4", 2, [1, 6]) 
    198198      ], 
    199199      [*(0...10)].collect {|i| TVOffsets.new(i*6, (i+1)*6 - 1)}) 
     
    210210    tv = tvs[:author] 
    211211    assert_equal(:author, tv.field) 
    212     assert_equal([TVTerm.new("Leo", [0]), TVTerm.new("Tolstoy", [1])], tv.terms) 
     212    assert_equal([TVTerm.new("Leo", 1, [0]), TVTerm.new("Tolstoy", 1, [1])], tv.terms) 
    213213    assert(tv.offsets.nil?) 
    214214 
     
    216216    tv = tvs[:title] 
    217217    assert_equal(:title, tv.field) 
    218     assert_equal([TVTerm.new("War And Peace", nil)], tv.terms) 
     218    assert_equal([TVTerm.new("War And Peace", 1, nil)], tv.terms) 
    219219    assert_equal([TVOffsets.new(0, 13)], tv.offsets) 
    220220  end 
     
    609609    expected_tv = TermVector.new(:body, 
    610610      [ 
    611         TVTerm.new("word1", [2, 4, 7]), 
    612         TVTerm.new("word2", [3]), 
    613         TVTerm.new("word3", [0, 5, 8, 9]), 
    614         TVTerm.new("word4", [1, 6]) 
     611        TVTerm.new("word1", 3, [2, 4, 7]), 
     612        TVTerm.new("word2", 1, [3]), 
     613        TVTerm.new("word3", 4, [0, 5, 8, 9]), 
     614        TVTerm.new("word4", 2, [1, 6]) 
    615615      ], 
    616616      [*(0...10)].collect {|i| TVOffsets.new(i*6, (i+1)*6 - 1)}) 
     
    627627    tv = tvs[:author] 
    628628    assert_equal(:author, tv.field) 
    629     assert_equal([TVTerm.new("Leo", [0]), TVTerm.new("Tolstoy", [1])], tv.terms) 
     629    assert_equal([TVTerm.new("Leo", 1, [0]), TVTerm.new("Tolstoy", 1, [1])], tv.terms) 
    630630    assert(tv.offsets.nil?) 
    631631 
     
    633633    tv = tvs[:title] 
    634634    assert_equal(:title, tv.field) 
    635     assert_equal([TVTerm.new("War And Peace", nil)], tv.terms) 
     635    assert_equal([TVTerm.new("War And Peace", 1, nil)], tv.terms) 
    636636    assert_equal([TVOffsets.new(0, 13)], tv.offsets) 
    637637  end