Changeset 553474a2f82a765bea2a3430c3d1b15ced7a15fc
- Timestamp:
- 04/21/08 20:59:24 (9 months ago)
- Author:
- dave <dave@…>
- Parents:
- 7746856afdda981effb01ee9e21e7454d5ad2fa7
- Children:
- 585583fabc2d8f758ad113f07b02b6e37e03ca63
- git-committer:
- dave <dave@06fd6eb0-0002-0410-a719-e5602cce40bc> / 2008-04-21T10:59:24Z+0000
- Message:
-
Added the intern method. All field names are now interned.
Whenever a field name is assigned to a struct it must be interned. This means that field names never need to be freed and they can be compared with == instead of strcmp. This is not the case for public functions which have a field parameter. In this case, the parameter will either need to be interned or the standard string methods used for comparison.
I'm not sure I like this solution yet. The reason I've added it is that it means there is one less object that we need to keep track of in terms of memory allocation. It also adds a slight speed and memory usage improvement as field names only ever get allocated once and they are quite commonly compared so it is more efficient to use == instead of strcmp. However, it feels like it could be quite easy to forget to intern a field somewhere causing a hard to track down error. In future I may add a Symbol object to force field names to be interned.
git-svn-id: svn+ssh://davebalmain.com/home/dave/repos/ferret/trunk@1027 06fd6eb0-0002-0410-a719-e5602cce40bc
- Location:
- c
- Files:
-
Legend:
- Unmodified
- Added
- Removed
-
|
r613a2b
|
r553474
|
|
| 45 | 45 | search.o similarity.o sort.o stopwords.o \ |
| 46 | 46 | store.o term_vectors.o field_index.o lang.o \ |
| 47 | | scanner.o scanner_mb.o |
| | 47 | scanner.o scanner_mb.o intern.o |
| 48 | 48 | |
| 49 | 49 | TEST_OBJS = \ |
| … |
… |
|
| 60 | 60 | test_analysis.o test_filter.o test_priorityqueue.o \ |
| 61 | 61 | test_sort.o test_ram_store.o test_file_deleter.o \ |
| 62 | | test_lang.o |
| | 62 | test_lang.o test_intern.o |
| 63 | 63 | |
| 64 | 64 | BZLIB_SRCS = \ |
-
|
r54419e
|
r553474
|
|
| 200 | 200 | GCOV_DIR]) |
| 201 | 201 | |
| 202 | | task :default => :test |
| | 202 | task :default => :testall |
| 203 | 203 | |
| 204 | 204 | def run_tests |
| … |
… |
|
| 369 | 369 | |
| 370 | 370 | rule '.c' => '.rl' do |t| |
| 371 | | sh "ragel -G2 #{t.prerequisites} -o #{t.name}" |
| | 371 | sh "ragel #{t.prerequisites} -o #{t.name}" |
| 372 | 372 | end |
| 373 | 373 | |
-
|
r442a23
|
r553474
|
|
| 182 | 182 | { |
| 183 | 183 | FrtTokenStream *current_ts; |
| 184 | | FrtTokenStream *(*get_ts)(struct FrtAnalyzer *a, char *field, char *text); |
| | 184 | FrtTokenStream *(*get_ts)(struct FrtAnalyzer *a, const char *field, char *text); |
| 185 | 185 | void (*destroy_i)(struct FrtAnalyzer *a); |
| 186 | 186 | int ref_cnt; |
| … |
… |
|
| 194 | 194 | void (*destroy)(FrtAnalyzer *a), |
| 195 | 195 | FrtTokenStream *(*get_ts)(FrtAnalyzer *a, |
| 196 | | char *field, |
| | 196 | const char *field, |
| 197 | 197 | char *text)); |
| 198 | 198 | extern void frt_a_standard_destroy(FrtAnalyzer *a); |
| … |
… |
|
| 226 | 226 | |
| 227 | 227 | extern FrtAnalyzer *frt_per_field_analyzer_new(FrtAnalyzer *a); |
| 228 | | extern void frt_pfa_add_field(FrtAnalyzer *self, char *field, FrtAnalyzer *analyzer); |
| | 228 | extern void frt_pfa_add_field(FrtAnalyzer *self, |
| | 229 | const char *field, |
| | 230 | FrtAnalyzer *analyzer); |
| 229 | 231 | |
| 230 | 232 | #endif |
-
|
r48290f
|
r553474
|
|
| 14 | 14 | typedef struct FrtDocField |
| 15 | 15 | { |
| 16 | | char *name; |
| | 16 | const char *name; |
| 17 | 17 | int size; |
| 18 | 18 | int capa; |
-
|
r48290f
|
r553474
|
|
| 126 | 126 | */ |
| 127 | 127 | extern FrtHash *frt_h_new(frt_hash_ft hash, |
| 128 | | frt_eq_ft eq, |
| 129 | | frt_free_ft free_key, |
| 130 | | frt_free_ft free_value); |
| | 128 | frt_eq_ft eq, |
| | 129 | frt_free_ft free_key, |
| | 130 | frt_free_ft free_value); |
| 131 | 131 | |
| 132 | 132 | /** |
| … |
… |
|
| 145 | 145 | */ |
| 146 | 146 | extern FrtHash *frt_h_new_str(frt_free_ft free_key, |
| 147 | | frt_free_ft free_value); |
| | 147 | frt_free_ft free_value); |
| 148 | 148 | |
| 149 | 149 | /** |
-
|
r6a5a9e
|
r553474
|
|
| 22 | 22 | FrtQParser *qp; |
| 23 | 23 | FrtHashSet *key; |
| 24 | | char *id_field; |
| 25 | | char *def_field; |
| | 24 | const char *id_field; |
| | 25 | const char *def_field; |
| 26 | 26 | /* for FrtIndexWriter */ |
| 27 | 27 | bool auto_flush : 1; |
-
|
r48290f
|
r553474
|
|
| 103 | 103 | typedef struct FrtFieldInfo |
| 104 | 104 | { |
| 105 | | char *name; |
| | 105 | const char *name; |
| 106 | 106 | float boost; |
| 107 | 107 | unsigned int bits; |
| … |
… |
|
| 111 | 111 | |
| 112 | 112 | extern FrtFieldInfo *frt_fi_new(const char *name, |
| 113 | | FrtStoreValue store, |
| 114 | | FrtIndexValue index, |
| 115 | | FrtTermVectorValue term_vector); |
| | 113 | FrtStoreValue store, |
| | 114 | FrtIndexValue index, |
| | 115 | FrtTermVectorValue term_vector); |
| 116 | 116 | extern char *frt_fi_to_s(FrtFieldInfo *fi); |
| 117 | 117 | extern void frt_fi_deref(FrtFieldInfo *fi); |
| … |
… |
|
| 550 | 550 | typedef struct FrtTermVector |
| 551 | 551 | { |
| 552 | | int field_num; |
| 553 | | char *field; |
| 554 | | int term_cnt; |
| 555 | | FrtTVTerm *terms; |
| 556 | | int offset_cnt; |
| 557 | | FrtOffset *offsets; |
| | 552 | int field_num; |
| | 553 | const char *field; |
| | 554 | int term_cnt; |
| | 555 | FrtTVTerm *terms; |
| | 556 | int offset_cnt; |
| | 557 | FrtOffset *offsets; |
| 558 | 558 | } FrtTermVector; |
| 559 | 559 | |
| … |
… |
|
| 633 | 633 | typedef struct FrtLazyDocField |
| 634 | 634 | { |
| 635 | | char *name; |
| | 635 | const char *name; |
| 636 | 636 | FrtLazyDocFieldData *data; |
| 637 | 637 | FrtLazyDoc *doc; |
| 638 | | int size; /* number of data elements */ |
| 639 | | int len; /* length of data elements concatenated */ |
| 640 | | bool is_compressed : 2; /* set to 2 after all data is loaded */ |
| | 638 | int size; /* number of data elements */ |
| | 639 | int len; /* length of data elements concatenated */ |
| | 640 | bool is_compressed : 2; /* set to 2 after all data is loaded */ |
| 641 | 641 | } FrtLazyDocField; |
| 642 | 642 | |
-
|
r442a23
|
r553474
|
|
| 581 | 581 | #define init frt_init |
| 582 | 582 | #define int2float frt_int2float |
| | 583 | #define intern frt_intern |
| | 584 | #define intern_and_free frt_intern_and_free |
| | 585 | #define intern_init frt_intern_init |
| 583 | 586 | #define ir_add_cache frt_ir_add_cache |
| 584 | 587 | #define ir_close frt_ir_close |
-
|
r48290f
|
r553474
|
|
| 89 | 89 | } FrtTopDocs; |
| 90 | 90 | |
| 91 | | extern FrtTopDocs *frt_td_new(int total_hits, int size, FrtHit **hits, float max_score); |
| | 91 | extern FrtTopDocs *frt_td_new(int total_hits, int size, FrtHit **hits, |
| | 92 | float max_score); |
| 92 | 93 | extern void frt_td_destroy(FrtTopDocs *td); |
| 93 | 94 | extern char *frt_td_to_s(FrtTopDocs *td); |
| … |
… |
|
| 101 | 102 | typedef struct FrtFilter |
| 102 | 103 | { |
| 103 | | char *name; |
| 104 | | FrtHash *cache; |
| 105 | | FrtBitVector *(*get_bv_i)(struct FrtFilter *self, FrtIndexReader *ir); |
| 106 | | char *(*to_s)(struct FrtFilter *self); |
| 107 | | unsigned long (*hash)(struct FrtFilter *self); |
| 108 | | int (*eq)(struct FrtFilter *self, struct FrtFilter *o); |
| 109 | | void (*destroy_i)(struct FrtFilter *self); |
| 110 | | int ref_cnt; |
| | 104 | const char *name; |
| | 105 | FrtHash *cache; |
| | 106 | FrtBitVector *(*get_bv_i)(struct FrtFilter *self, FrtIndexReader *ir); |
| | 107 | char *(*to_s)(struct FrtFilter *self); |
| | 108 | unsigned long (*hash)(struct FrtFilter *self); |
| | 109 | int (*eq)(struct FrtFilter *self, struct FrtFilter *o); |
| | 110 | void (*destroy_i)(struct FrtFilter *self); |
| | 111 | int ref_cnt; |
| 111 | 112 | } FrtFilter; |
| 112 | 113 | |
| … |
… |
|
| 246 | 247 | typedef struct FrtTermQuery |
| 247 | 248 | { |
| 248 | | FrtQuery super; |
| 249 | | char *field; |
| 250 | | char *term; |
| | 249 | FrtQuery super; |
| | 250 | const char *field; |
| | 251 | char *term; |
| 251 | 252 | } FrtTermQuery; |
| 252 | 253 | |
| … |
… |
|
| 313 | 314 | typedef struct FrtPhraseQuery |
| 314 | 315 | { |
| 315 | | FrtQuery super; |
| 316 | | int slop; |
| 317 | | char *field; |
| | 316 | FrtQuery super; |
| | 317 | int slop; |
| | 318 | const char *field; |
| 318 | 319 | FrtPhrasePosition *positions; |
| 319 | | int pos_cnt; |
| 320 | | int pos_capa; |
| | 320 | int pos_cnt; |
| | 321 | int pos_capa; |
| 321 | 322 | } FrtPhraseQuery; |
| 322 | 323 | |
| … |
… |
|
| 333 | 334 | typedef struct FrtMultiTermQuery |
| 334 | 335 | { |
| 335 | | FrtQuery super; |
| 336 | | char *field; |
| 337 | | FrtPriorityQueue *boosted_terms; |
| 338 | | float min_boost; |
| | 336 | FrtQuery super; |
| | 337 | const char *field; |
| | 338 | FrtPriorityQueue *boosted_terms; |
| | 339 | float min_boost; |
| 339 | 340 | } FrtMultiTermQuery; |
| 340 | 341 | |
| … |
… |
|
| 361 | 362 | { |
| 362 | 363 | FrtMTQSubQuery super; |
| 363 | | char *field; |
| 364 | | char *prefix; |
| | 364 | const char *field; |
| | 365 | char *prefix; |
| 365 | 366 | } FrtPrefixQuery; |
| 366 | 367 | |
| … |
… |
|
| 378 | 379 | { |
| 379 | 380 | FrtMTQSubQuery super; |
| 380 | | char *field; |
| 381 | | char *pattern; |
| | 381 | const char *field; |
| | 382 | char *pattern; |
| 382 | 383 | } FrtWildCardQuery; |
| 383 | 384 | |
| … |
… |
|
| 398 | 399 | { |
| 399 | 400 | FrtMTQSubQuery super; |
| 400 | | char *field; |
| | 401 | const char *field; |
| 401 | 402 | char *term; |
| 402 | 403 | const char *text; /* term text after prefix */ |
| … |
… |
|
| 492 | 493 | typedef struct FrtSpanQuery |
| 493 | 494 | { |
| 494 | | FrtQuery super; |
| 495 | | char *field; |
| | 495 | FrtQuery super; |
| | 496 | const char *field; |
| 496 | 497 | FrtSpanEnum *(*get_spans)(FrtQuery *self, FrtIndexReader *ir); |
| 497 | 498 | FrtHashSet *(*get_terms)(FrtQuery *self); |
| … |
… |
|
| 674 | 675 | typedef struct FrtSortField |
| 675 | 676 | { |
| 676 | | char *field; |
| 677 | | SortType type; |
| 678 | | bool reverse : 1; |
| 679 | 677 | const FrtFieldIndexClass *field_index_class; |
| | 678 | const char *field; |
| | 679 | SortType type; |
| | 680 | bool reverse : 1; |
| 680 | 681 | int (*compare)(void *index_ptr, FrtHit *hit1, FrtHit *hit2); |
| 681 | 682 | void (*get_val)(void *index_ptr, FrtHit *hit1, FrtComparable *comparable); |
| … |
… |
|
| 683 | 684 | |
| 684 | 685 | extern FrtSortField *frt_sort_field_new(const char *field, |
| 685 | | SortType type, |
| 686 | | bool reverse); |
| | 686 | SortType type, |
| | 687 | bool reverse); |
| 687 | 688 | extern FrtSortField *frt_sort_field_score_new(bool reverse); |
| 688 | 689 | extern FrtSortField *frt_sort_field_doc_new(bool reverse); |
-
|
r5167b9
|
r553474
|
|
| 15 | 15 | typedef struct FrtTerm |
| 16 | 16 | { |
| 17 | | char *field; |
| | 17 | const char *field; |
| 18 | 18 | char *text; |
| 19 | 19 | } FrtTerm; |
-
|
r378d29
|
r553474
|
|
| 213 | 213 | } |
| 214 | 214 | |
| 215 | | static TokenStream *a_standard_get_ts(Analyzer *a, char *field, char *text) |
| | 215 | static TokenStream *a_standard_get_ts(Analyzer *a, |
| | 216 | const char *field, |
| | 217 | char *text) |
| 216 | 218 | { |
| 217 | 219 | TokenStream *ts; |
| … |
… |
|
| 223 | 225 | Analyzer *analyzer_new(TokenStream *ts, |
| 224 | 226 | void (*destroy_i)(Analyzer *a), |
| 225 | | TokenStream *(*get_ts)(Analyzer *a, char *field, |
| | 227 | TokenStream *(*get_ts)(Analyzer *a, |
| | 228 | const char *field, |
| 226 | 229 | char *text)) |
| 227 | 230 | { |
| … |
… |
|
| 1549 | 1552 | } |
| 1550 | 1553 | |
| 1551 | | static TokenStream *pfa_get_ts(Analyzer *self, char *field, char *text) |
| | 1554 | static TokenStream *pfa_get_ts(Analyzer *self, |
| | 1555 | const char *field, char *text) |
| 1552 | 1556 | { |
| 1553 | 1557 | Analyzer *a = (Analyzer *)h_get(PFA(self)->dict, field); |
| … |
… |
|
| 1564 | 1568 | } |
| 1565 | 1569 | |
| 1566 | | void pfa_add_field(Analyzer *self, char *field, Analyzer *analyzer) |
| 1567 | | { |
| 1568 | | h_set(PFA(self)->dict, estrdup(field), analyzer); |
| | 1570 | void pfa_add_field(Analyzer *self, |
| | 1571 | const char *field, |
| | 1572 | Analyzer *analyzer) |
| | 1573 | { |
| | 1574 | h_set(PFA(self)->dict, field, analyzer); |
| 1569 | 1575 | } |
| 1570 | 1576 | |
| … |
… |
|
| 1574 | 1580 | |
| 1575 | 1581 | PFA(a)->default_a = default_a; |
| 1576 | | PFA(a)->dict = h_new_str(&free, &pfa_sub_a_destroy_i); |
| | 1582 | PFA(a)->dict = h_new_str(NULL, &pfa_sub_a_destroy_i); |
| 1577 | 1583 | |
| 1578 | 1584 | a->destroy_i = &pfa_destroy_i; |
-
|
r5a8e6f
|
r553474
|
|
| 1 | 1 | #include "document.h" |
| | 2 | #include "intern.h" |
| 2 | 3 | #include <string.h> |
| 3 | 4 | #include "internal.h" |
| … |
… |
|
| 12 | 13 | { |
| 13 | 14 | DocField *df = ALLOC(DocField); |
| 14 | | df->name = estrdup(name); |
| | 15 | df->name = intern(name); |
| 15 | 16 | df->size = 0; |
| 16 | 17 | df->capa = DF_INIT_CAPA; |
| … |
… |
|
| 50 | 51 | free(df->data); |
| 51 | 52 | free(df->lengths); |
| 52 | | free(df->name); |
| 53 | 53 | free(df); |
| 54 | 54 | } |
-
|
r5a8e6f
|
r553474
|
|
| 1 | 1 | #include "search.h" |
| | 2 | #include "intern.h" |
| 2 | 3 | #include <string.h> |
| 3 | 4 | #include "internal.h" |
| … |
… |
|
| 12 | 13 | { |
| 13 | 14 | h_destroy(filt->cache); |
| 14 | | free(filt->name); |
| 15 | 15 | free(filt); |
| 16 | 16 | } |
| … |
… |
|
| 59 | 59 | Filter *filt = (Filter *)emalloc(size); |
| 60 | 60 | filt->cache = co_hash_create(); |
| 61 | | filt->name = estrdup(name); |
| | 61 | filt->name = intern(name); |
| 62 | 62 | filt->to_s = &filt_to_s_i; |
| 63 | 63 | filt->hash = &filt_hash_default; |
-
|
r2088c0b
|
r553474
|
|
| 1 | 1 | #include "global.h" |
| | 2 | #include "intern.h" |
| 2 | 3 | #include <stdarg.h> |
| 3 | 4 | #include <stdio.h> |
| … |
… |
|
| 445 | 446 | SETSIG_IF_UNSET(SIGBUS , action); |
| 446 | 447 | SETSIG_IF_UNSET(SIGSEGV, action); |
| 447 | | } |
| | 448 | |
| | 449 | intern_init(); |
| | 450 | } |
-
|
r5a8e6f
|
r553474
|
|
| 47 | 47 | bool create) |
| 48 | 48 | { |
| 49 | | HashSet *all_fields = hs_new_str(&free); |
| | 49 | HashSet *all_fields = hs_new_str(NULL); |
| 50 | 50 | Index *self = ALLOC_AND_ZERO(Index); |
| 51 | 51 | self->config = default_config; |
| … |
… |
|
| 75 | 75 | /* options */ |
| 76 | 76 | self->key = NULL; |
| 77 | | self->id_field = estrdup(ID_STRING); |
| 78 | | self->def_field = estrdup(ID_STRING); |
| | 77 | self->id_field = ID_STRING; |
| | 78 | self->def_field = ID_STRING; |
| 79 | 79 | self->auto_flush = false; |
| 80 | 80 | self->check_latest = true; |
| … |
… |
|
| 99 | 99 | if (self->qp) qp_destroy(self->qp); |
| 100 | 100 | if (self->key) hs_destroy(self->key); |
| 101 | | free(self->id_field); |
| 102 | | free(self->def_field); |
| 103 | 101 | free(self); |
| 104 | 102 | } |
| … |
… |
|
| 282 | 280 | fis = self->ir->fis; |
| 283 | 281 | for (i = fis->size - 1; i >= 0; i--) { |
| 284 | | char *field = fis->fields[i]->name; |
| 285 | | hs_add(self->qp->all_fields, estrdup(field)); |
| | 282 | hs_add(self->qp->all_fields, (char *)fis->fields[i]->name); |
| 286 | 283 | } |
| 287 | 284 | return qp_parse(self->qp, qstr); |
-
|
r48290f
|
r553474
|
|
| 1 | 1 | #include "index.h" |
| | 2 | #include "intern.h" |
| 2 | 3 | #include "similarity.h" |
| 3 | 4 | #include "helper.h" |
| … |
… |
|
| 285 | 286 | FieldInfo *fi = ALLOC(FieldInfo); |
| 286 | 287 | fi_check_params(store, index, term_vector); |
| 287 | | fi->name = estrdup(name); |
| | 288 | fi->name = intern(name); |
| 288 | 289 | fi->boost = 1.0; |
| 289 | 290 | fi->bits = 0; |
| … |
… |
|
| 298 | 299 | { |
| 299 | 300 | if (0 == --(fi->ref_cnt)) { |
| 300 | | free(fi->name); |
| 301 | 301 | free(fi); |
| 302 | 302 | } |
| … |
… |
|
| 418 | 418 | fi = ALLOC_AND_ZERO(FieldInfo); |
| 419 | 419 | TRY |
| 420 | | fi->name = is_read_string_safe(is); |
| | 420 | fi->name = intern_and_free(is_read_string_safe(is)); |
| 421 | 421 | tmp.i = is_read_u32(is); |
| 422 | 422 | fi->boost = tmp.f; |
| 423 | 423 | fi->bits = is_read_vint(is); |
| 424 | 424 | XCATCHALL |
| 425 | | free(fi->name); |
| 426 | 425 | free(fi); |
| 427 | 426 | XENDTRY |