Changeset a249f609ee1784b37b8bce802dfa88406cc2c3fd
- Timestamp:
- 06/22/08 15:15:45 (7 months ago)
- Author:
- David Balmain <dbalmain@…>
- Parents:
- 97564968f9aed076ddedd60a97b504b972e3f7fc
- Children:
- ff6f080b9c3c3ca054c9bbce77571f813aee457a
- git-committer:
- David Balmain <dbalmain@gmail.com> / 2008-06-22T15:15:45Z+1000
- Message:
-
Added term frequency field to TVTerm
* You can now found out the frequency of a term in a document without having to
store offsets or positions.
- Location:
- ruby
- Files:
-
Legend:
- Unmodified
- Added
- Removed
-
|
rb41949
|
ra249f6
|
|
| 1228 | 1228 | RARRAY(rpositions)->len = freq; |
| 1229 | 1229 | } |
| 1230 | | return rb_struct_new(cTVTerm, rtext, rpositions, NULL); |
| | 1230 | return rb_struct_new(cTVTerm, rtext, INT2FIX(freq), rpositions, NULL); |
| 1231 | 1231 | } |
| 1232 | 1232 | |
| … |
… |
|
| 3073 | 3073 | cTVTerm = rb_define_class_under(cTermVector, "TVTerm", rb_cObject); |
| 3074 | 3074 | */ |
| 3075 | | cTVTerm = rb_struct_define(tv_term_class, "text", "positions", NULL); |
| | 3075 | cTVTerm = rb_struct_define(tv_term_class, "text", "freq", "positions", NULL); |
| 3076 | 3076 | rb_set_class_path(cTVTerm, cTermVector, tv_term_class); |
| 3077 | 3077 | rb_const_set(mIndex, rb_intern(tv_term_class), cTVTerm); |
-
|
r975649
|
ra249f6
|
|
| 491 | 491 | term_doc_enum = @reader.term_docs_for(@id_field, id.to_s) |
| 492 | 492 | if term_doc_enum.next? |
| 493 | | id = @reader[term_doc_enum.doc] |
| | 493 | id = term_doc_enum.doc |
| 494 | 494 | else |
| 495 | 495 | return nil |
-
|
r905f16
|
ra249f6
|
|
| 192 | 192 | expected_tv = TermVector.new(:body, |
| 193 | 193 | [ |
| 194 | | TVTerm.new("word1", [2, 4, 7]), |
| 195 | | TVTerm.new("word2", [3]), |
| 196 | | TVTerm.new("word3", [0, 5, 8, 9]), |
| 197 | | TVTerm.new("word4", [1, 6]) |
| | 194 | TVTerm.new("word1", 3, [2, 4, 7]), |
| | 195 | TVTerm.new("word2", 1, [3]), |
| | 196 | TVTerm.new("word3", 4, [0, 5, 8, 9]), |
| | 197 | TVTerm.new("word4", 2, [1, 6]) |
| 198 | 198 | ], |
| 199 | 199 | [*(0...10)].collect {|i| TVOffsets.new(i*6, (i+1)*6 - 1)}) |
| … |
… |
|
| 210 | 210 | tv = tvs[:author] |
| 211 | 211 | assert_equal(:author, tv.field) |
| 212 | | assert_equal([TVTerm.new("Leo", [0]), TVTerm.new("Tolstoy", [1])], tv.terms) |
| | 212 | assert_equal([TVTerm.new("Leo", 1, [0]), TVTerm.new("Tolstoy", 1, [1])], tv.terms) |
| 213 | 213 | assert(tv.offsets.nil?) |
| 214 | 214 | |
| … |
… |
|
| 216 | 216 | tv = tvs[:title] |
| 217 | 217 | assert_equal(:title, tv.field) |
| 218 | | assert_equal([TVTerm.new("War And Peace", nil)], tv.terms) |
| | 218 | assert_equal([TVTerm.new("War And Peace", 1, nil)], tv.terms) |
| 219 | 219 | assert_equal([TVOffsets.new(0, 13)], tv.offsets) |
| 220 | 220 | end |
| … |
… |
|
| 609 | 609 | expected_tv = TermVector.new(:body, |
| 610 | 610 | [ |
| 611 | | TVTerm.new("word1", [2, 4, 7]), |
| 612 | | TVTerm.new("word2", [3]), |
| 613 | | TVTerm.new("word3", [0, 5, 8, 9]), |
| 614 | | TVTerm.new("word4", [1, 6]) |
| | 611 | TVTerm.new("word1", 3, [2, 4, 7]), |
| | 612 | TVTerm.new("word2", 1, [3]), |
| | 613 | TVTerm.new("word3", 4, [0, 5, 8, 9]), |
| | 614 | TVTerm.new("word4", 2, [1, 6]) |
| 615 | 615 | ], |
| 616 | 616 | [*(0...10)].collect {|i| TVOffsets.new(i*6, (i+1)*6 - 1)}) |
| … |
… |
|
| 627 | 627 | tv = tvs[:author] |
| 628 | 628 | assert_equal(:author, tv.field) |
| 629 | | assert_equal([TVTerm.new("Leo", [0]), TVTerm.new("Tolstoy", [1])], tv.terms) |
| | 629 | assert_equal([TVTerm.new("Leo", 1, [0]), TVTerm.new("Tolstoy", 1, [1])], tv.terms) |
| 630 | 630 | assert(tv.offsets.nil?) |
| 631 | 631 | |
| … |
… |
|
| 633 | 633 | tv = tvs[:title] |
| 634 | 634 | assert_equal(:title, tv.field) |
| 635 | | assert_equal([TVTerm.new("War And Peace", nil)], tv.terms) |
| | 635 | assert_equal([TVTerm.new("War And Peace", 1, nil)], tv.terms) |
| 636 | 636 | assert_equal([TVOffsets.new(0, 13)], tv.offsets) |
| 637 | 637 | end |