Changeset 0c11a534a1b97b7addf59c733a31bcbe1120fcc9

Show
Ignore:
Timestamp:
04/25/08 00:09:25 (9 months ago)
Author:
dave <dave@…>
Parents:
8eb44963185f8afa008b1abfe3f6fad0839b2efd
Children:
a04da26a0a26ed33041c9059b44ece39727fc2a1
git-committer:
dave <dave@06fd6eb0-0002-0410-a719-e5602cce40bc> / 2008-04-24T14:09:25Z+0000
Message:

Changed ruby bindings to use ruby symbols

Ruby bindings now replace Ferret's Symbol implementation with its own. The main
advantage of this is that we don't need to do as much to convert Ferret's data
types into Ruby data types.

An added bonus of this was that it highlighted all the places where the current
Ferret Symbol implementation was broken so it is much more solid now.

git-svn-id: svn+ssh://davebalmain.com/home/dave/repos/ferret/trunk@1043 06fd6eb0-0002-0410-a719-e5602cce40bc

Files:
2 added
25 modified

Legend:

Unmodified
Added
Removed
  • c/include/index.h

    r950230 r0c11a5  
    648648struct FrtLazyDoc 
    649649{ 
    650     FrtHash *field_dict; 
     650    FrtHash *field_dictionary; 
    651651    int size; 
    652652    FrtLazyDocField **fields; 
     
    655655 
    656656extern void frt_lazy_doc_close(FrtLazyDoc *self); 
     657extern FrtLazyDocField *frt_lazy_doc_get(FrtLazyDoc *self, FrtSymbol field); 
    657658 
    658659/**************************************************************************** 
  • c/include/internal.h

    r950230 r0c11a5  
    7474#define HS_MIN_SIZE                        FRT_HS_MIN_SIZE 
    7575#define I                                  FRT_I 
    76 #define IF                                 FRT_IF 
    7776#define INDEX_ERROR                        FRT_INDEX_ERROR 
    7877#define INDEX_INTERVAL                     FRT_INDEX_INTERVAL 
     
    645644#define lazy_df_get_data                        frt_lazy_df_get_data 
    646645#define lazy_doc_close                          frt_lazy_doc_close 
     646#define lazy_doc_get                            frt_lazy_doc_get 
    647647#define legacy_standard_tokenizer_new           frt_legacy_standard_tokenizer_new 
    648648#define letter_analyzer_new                     frt_letter_analyzer_new 
  • c/include/store.h

    r48290f r0c11a5  
    566566 
    567567/** 
     568 * Write a string with known length to the FrtOutStream. A string is an 
     569 * integer +length+ in VINT format (see frt_os_write_vint) followed by 
     570 * +length+ bytes. The string can then be read using frt_is_read_string. 
     571 * 
     572 * @param os FrtOutStream to write to 
     573 * @param str the string to write 
     574 * @param len the length of the string to write 
     575 * @raise FRT_IO_ERROR if there is an error writing to the file-system 
     576 */ 
     577extern FRT_INLINE void os_write_string_len(FrtOutStream *os, 
     578                                           const char *str, 
     579                                           int len); 
     580 
     581/** 
    568582 * Write a string to the FrtOutStream. A string is an integer +length+ in VINT 
    569583 * format (see frt_os_write_vint) followed by +length+ bytes. The string can then 
  • c/include/symbol.h

    r950230 r0c11a5  
    1 #ifndef _SYMBOL_H 
    2 #define _SYMBOL_H 
     1#ifndef FRT_SYMBOL_H 
     2#define FRT_SYMBOL_H 
    33 
    44typedef struct Frt__Symbol { 
     
    1313 
    1414#define FRT_I frt_intern 
    15 #define FRT_IF frt_intern_and_free 
    1615#define FRT_S(sym) ((const char *)sym) 
    1716 
  • c/src/index.c

    rad7fae r0c11a5  
    450450    for (i = 0; i < fis_size; i++) { 
    451451        fi = fis->fields[i]; 
    452         os_write_string(os, (char *)fi->name); 
     452        os_write_string(os, S(fi->name)); 
    453453        tmp.f = fi->boost; 
    454454        os_write_u32(os, tmp.i); 
     
    14701470{ 
    14711471    LazyDoc *self = ALLOC(LazyDoc); 
    1472     self->field_dict = h_new_str(NULL, (free_ft)&lazy_df_destroy); 
     1472    self->field_dictionary = h_new_ptr((free_ft)&lazy_df_destroy); 
    14731473    self->size = size; 
    14741474    self->fields = ALLOC_AND_ZERO_N(LazyDocField *, size); 
     
    14791479void lazy_doc_close(LazyDoc *self) 
    14801480{ 
    1481     h_destroy(self->field_dict); 
     1481    h_destroy(self->field_dictionary); 
    14821482    is_close(self->fields_in); 
    14831483    free(self->fields); 
     
    14881488{ 
    14891489    self->fields[i] = lazy_df; 
    1490     h_set(self->field_dict, lazy_df->name, lazy_df); 
     1490    h_set(self->field_dictionary, lazy_df->name, lazy_df); 
    14911491    lazy_df->doc = self; 
     1492} 
     1493 
     1494LazyDocField *frt_lazy_doc_get(LazyDoc *self, Symbol field) 
     1495{ 
     1496    return (LazyDocField *)h_get(self->field_dictionary, field); 
    14921497} 
    14931498 
     
    17161721Hash *fr_get_tv(FieldsReader *fr, int doc_num) 
    17171722{ 
    1718     Hash *term_vectors = h_new_str((free_ft)NULL, (free_ft)&tv_destroy); 
     1723    Hash *term_vectors = h_new_ptr((free_ft)&tv_destroy); 
    17191724    int i; 
    17201725    InStream *fdx_in = fr->fdx_in; 
  • c/src/q_multi_term.c

    r950230 r0c11a5  
    261261                expl_new(sim_tf(self->similarity, (float)freq) * tdew->boost, 
    262262                         "tf(term_freq(%s:%s)=%d)^%f", 
    263                          mtsc->field, tdew->term, freq, tdew->boost)); 
     263                         S(mtsc->field), tdew->term, freq, tdew->boost)); 
    264264 
    265265            total_score += sim_tf(self->similarity, (float)freq) * tdew->boost; 
     
    385385    char *query_str; 
    386386    MultiTermQuery *mtq = MTQ(self->query); 
    387     Symbol field = mtq->field; 
     387    const char *field = S(mtq->field); 
    388388    PriorityQueue *bt_pq = mtq->boosted_terms; 
    389389    int i; 
     
    391391    char *doc_freqs = NULL; 
    392392    size_t len = 0, pos = 0; 
    393     const int field_num = fis_get_field_num(ir->fis, field); 
     393    const int field_num = fis_get_field_num(ir->fis, mtq->field); 
    394394 
    395395    if (field_num < 0) { 
    396396        return expl_new(0.0, "field \"%s\" does not exist in the index", 
    397                         (char *)field); 
     397                        field); 
    398398    } 
    399399 
     
    416416    sprintf(doc_freqs + pos, "= %d", total_doc_freqs); 
    417417 
    418     idf_expl1 = expl_new(self->idf, "idf(%s:<%s>)", (char *)field, doc_freqs); 
    419     idf_expl2 = expl_new(self->idf, "idf(%s:<%s>)", (char *)field, doc_freqs); 
     418    idf_expl1 = expl_new(self->idf, "idf(%s:<%s>)", field, doc_freqs); 
     419    idf_expl2 = expl_new(self->idf, "idf(%s:<%s>)", field, doc_freqs); 
    420420    free(doc_freqs); 
    421421 
     
    455455        : (float)0.0; 
    456456    field_norm_expl = expl_new(field_norm, "field_norm(field=%s, doc=%d)", 
    457                                (char *)field, doc_num); 
     457                               field, doc_num); 
    458458 
    459459    expl_add_detail(field_expl, field_norm_expl); 
     
    503503 ***************************************************************************/ 
    504504 
    505 static char *multi_tq_to_s(Query *self, Symbol curr_field) 
     505static char *multi_tq_to_s(Query *self, Symbol default_field) 
    506506{ 
    507507    int i; 
     
    509509    BoostedTerm *bt; 
    510510    char *buffer, *bptr; 
    511     int flen = (int)sym_len(MTQ(self)->field); 
     511    const char *field = S(MTQ(self)->field); 
     512    int flen = (int)strlen(field); 
    512513    int tlen = 0; 
    513514 
     
    519520    bptr = buffer = ALLOC_N(char, tlen + flen + 35); 
    520521 
    521     if (curr_field != MTQ(self)->field) { 
    522         bptr += sprintf(bptr, "%s:", S(MTQ(self)->field)); 
     522    if (default_field != MTQ(self)->field) { 
     523        bptr += sprintf(bptr, "%s:", field); 
    523524    } 
    524525 
  • c/src/q_parser.c

    rad7fae r0c11a5  
    29072907    /* make sure all_fields contains the default fields */ 
    29082908    self->analyzer = analyzer; 
    2909     self->ts_cache = h_new_str(NULL, (free_ft)&ts_deref); 
     2909    self->ts_cache = h_new_ptr((free_ft)&ts_deref); 
    29102910    self->buf_index = 0; 
    29112911    self->dynbuf = NULL; 
  • c/src/q_parser.y

    rad7fae r0c11a5  
    11701170    /* make sure all_fields contains the default fields */ 
    11711171    self->analyzer = analyzer; 
    1172     self->ts_cache = h_new_str(NULL, (free_ft)&ts_deref); 
     1172    self->ts_cache = h_new_ptr((free_ft)&ts_deref); 
    11731173    self->buf_index = 0; 
    11741174    self->dynbuf = NULL; 
  • c/src/q_phrase.c

    rad7fae r0c11a5  
    533533    size_t len = 0, pos = 0; 
    534534    const int field_num = fis_get_field_num(ir->fis, phq->field); 
     535    const char *field = S(phq->field); 
    535536 
    536537    if (field_num < 0) { 
    537         return expl_new(0.0, "field \"%s\" does not exist in the index", phq->field); 
     538        return expl_new(0.0, "field \"%s\" does not exist in the index", field); 
    538539    } 
    539540 
     
    564565    doc_freqs[pos] = 0; 
    565566 
    566     idf_expl1 = expl_new(self->idf, "idf(%s:<%s>)", phq->field, doc_freqs); 
    567     idf_expl2 = expl_new(self->idf, "idf(%s:<%s>)", phq->field, doc_freqs); 
     567    idf_expl1 = expl_new(self->idf, "idf(%s:<%s>)", field, doc_freqs); 
     568    idf_expl2 = expl_new(self->idf, "idf(%s:<%s>)", field, doc_freqs); 
    568569    free(doc_freqs); 
    569570 
     
    599600        : (float)0.0; 
    600601    field_norm_expl = expl_new(field_norm, "field_norm(field=%s, doc=%d)", 
    601                                phq->field, doc_num); 
     602                               field, doc_num); 
    602603 
    603604    expl_add_detail(field_expl, field_norm_expl); 
     
    893894    const int pos_cnt = phq->pos_cnt; 
    894895    PhrasePosition *positions = phq->positions; 
     896    const char *field = S(phq->field); 
     897    int flen = strlen(field); 
    895898 
    896899    int i, j, buf_index = 0, pos, last_pos; 
     
    900903    if (phq->pos_cnt == 0) { 
    901904        if (default_field != phq->field) { 
    902             return strfmt("%s:\"\"", phq->field); 
     905            return strfmt("%s:\"\"", field); 
    903906        } 
    904907        else { 
     
    910913    qsort(positions, pos_cnt, sizeof(PhrasePosition), &phrase_pos_cmp); 
    911914 
    912     len = sym_len(phq->field) + 1; 
     915    len = flen + 1; 
    913916 
    914917    for (i = 0; i < pos_cnt; i++) { 
     
    926929 
    927930    if (default_field != phq->field) { 
    928         len = sym_len(phq->field); 
    929         memcpy(buffer, phq->field, len); 
    930         buffer[len] = ':'; 
    931         buf_index += len + 1; 
     931        memcpy(buffer, field, flen); 
     932        buffer[flen] = ':'; 
     933        buf_index += flen + 1; 
    932934    } 
    933935 
  • c/src/q_range.c

    r950230 r0c11a5  
    1919} Range; 
    2020 
    21 static char *range_to_s(Range *range, Symbol field, float boost) 
     21static char *range_to_s(Range *range, Symbol default_field, float boost) 
    2222{ 
    2323    char *buffer, *b; 
    2424    size_t flen, llen, ulen; 
    25  
    26     flen = sym_len(range->field); 
     25    const char *field = S(range->field); 
     26 
     27    flen = strlen(field); 
    2728    llen = range->lower_term ? strlen(range->lower_term) : 0; 
    2829    ulen = range->upper_term ? strlen(range->upper_term) : 0; 
     
    3031    b = buffer; 
    3132 
    32     if (field != range->field) { 
    33         memcpy(buffer, range->field, flen * sizeof(char)); 
     33    if (default_field != range->field) { 
     34        memcpy(buffer, field, flen * sizeof(char)); 
    3435        b += flen; 
    3536        *b = ':'; 
  • c/src/q_span.c

    rad7fae r0c11a5  
    16151615    } 
    16161616    else { 
    1617         p = strfmt("span_terms(%s:%s)", SpQ(self)->field, terms); 
     1617        p = strfmt("span_terms(%s:%s)", S(SpQ(self)->field), terms); 
    16181618    } 
    16191619    free(terms); 
  • c/src/q_term.c

    r950230 r0c11a5  
    106106    return expl_new(sim_tf(self->similarity, (float)tf), 
    107107                    "tf(term_freq(%s:%s)=%d)", 
    108                     TQ(query)->field, TQ(query)->term, tf); 
     108                    S(TQ(query)->field), TQ(query)->term, tf); 
    109109} 
    110110 
     
    266266static char *tq_to_s(Query *self, Symbol default_field) 
    267267{ 
    268     size_t flen = sym_len(TQ(self)->field); 
     268    const char *field = S(TQ(self)->field); 
     269    size_t flen = strlen(field); 
    269270    size_t tlen = strlen(TQ(self)->term); 
    270271    char *buffer = ALLOC_N(char, 34 + flen + tlen); 
    271272    char *b = buffer; 
    272273    if (default_field != TQ(self)->field) { 
    273         memcpy(b, TQ(self)->field, sizeof(char) * flen); 
     274        memcpy(b, field, sizeof(char) * flen); 
    274275        b[flen] = ':'; 
    275276        b += flen + 1; 
  • c/src/search.c

    r950230 r0c11a5  
    848848    LazyDocField *lazy_df = NULL; 
    849849    if (lazy_doc) { 
    850         lazy_df = (LazyDocField *)h_get(lazy_doc->field_dict, field); 
     850        lazy_df = lazy_doc_get(lazy_doc, field); 
    851851    } 
    852852    if (tv && lazy_df && tv->term_cnt > 0 && tv->terms[0].positions != NULL 
  • c/src/store.c

    r5a8e6f r0c11a5  
    599599} 
    600600 
     601INLINE void os_write_string_len(OutStream *os, const char *str, int len) 
     602{ 
     603    os_write_vint(os, len); 
     604    os_write_bytes(os, (uchar *)str, len); 
     605} 
    601606void os_write_string(OutStream *os, const char *str) 
    602607{ 
    603     int len = (int)strlen(str); 
    604     os_write_vint(os, len); 
    605  
    606     os_write_bytes(os, (uchar *)str, len); 
     608    os_write_string_len(os, str, (int)strlen(str)); 
    607609} 
    608610 
  • c/test/test_fields.c

    r950230 r0c11a5  
    545545    store_deref(store); 
    546546 
    547     lazy_df = (LazyDocField *)h_get(lazy_doc->field_dict, "stored"); 
     547    lazy_df = lazy_doc_get(lazy_doc, I("stored")); 
    548548    Apnull(lazy_doc->fields[0]->data[0].text); 
    549549    Asequal("this is a stored field", text=lazy_df_get_data(lazy_df, 0)); 
  • c/test/test_filter.c

    rad7fae r0c11a5  
    250250    float is_ok = 0.0; 
    251251    LazyDoc *lazy_doc = searcher_get_lazy_doc(sea, doc_num); 
    252     LazyDocField *lazy_df = (LazyDocField *)h_get(lazy_doc->field_dict, "num"); 
     252    LazyDocField *lazy_df = lazy_doc_get(lazy_doc, I("num")); 
    253253    char *num = lazy_df_get_data(lazy_df, 0); 
    254254    (void)score; 
     
    268268    float distance = 0.0; 
    269269    LazyDoc *lazy_doc = searcher_get_lazy_doc(sea, doc_num); 
    270     LazyDocField *lazy_df = (LazyDocField *)h_get(lazy_doc->field_dict, "num"); 
     270    LazyDocField *lazy_df = lazy_doc_get(lazy_doc, I("num")); 
    271271    char *num = lazy_df_get_data(lazy_df, 0); 
    272272    (void)score; 
  • c/test/test_index.c

    r950230 r0c11a5  
    17361736    tvs = ir->term_vectors(ir, 3); 
    17371737    Aiequal(3, tvs->size); 
    1738     tv = (TermVector *)h_get(tvs, "author"); 
     1738    tv = (TermVector *)h_get(tvs, I("author")); 
    17391739    if (Apnotnull(tv)) { 
    17401740        Asequal("author", tv->field); 
     
    17431743        Apnull(tv->offsets); 
    17441744    } 
    1745     tv = (TermVector *)h_get(tvs, "body"); 
     1745    tv = (TermVector *)h_get(tvs, I("body")); 
    17461746    if (Apnotnull(tv)) { 
    17471747        Asequal("body", tv->field); 
    17481748        Aiequal(4, tv->term_cnt); 
    17491749    } 
    1750     tv = (TermVector *)h_get(tvs, "title"); 
     1750    tv = (TermVector *)h_get(tvs, I("title")); 
    17511751    if (Apnotnull(tv)) { 
    17521752        Asequal("title", tv->field); 
     
    18211821 
    18221822    lz_doc = ir->get_lazy_doc(ir, 0); 
    1823     lz_df1 = (LazyDocField *)h_get(lz_doc->field_dict, changing_field); 
    1824     lz_df2 = (LazyDocField *)h_get(lz_doc->field_dict, compressed_field); 
     1823    lz_df1 = lazy_doc_get(lz_doc, changing_field); 
     1824    lz_df2 = lazy_doc_get(lz_doc, compressed_field); 
    18251825    Asequal(lazy_df_get_data(lz_df1, 0), lazy_df_get_data(lz_df2, 0)); 
    18261826    lazy_doc_close(lz_doc); 
    18271827 
    18281828    lz_doc = ir->get_lazy_doc(ir, 2); 
    1829     lz_df1 = (LazyDocField *)h_get(lz_doc->field_dict, tag); 
    1830     lz_df2 = (LazyDocField *)h_get(lz_doc->field_dict, compressed_field); 
     1829    lz_df1 = lazy_doc_get(lz_doc, tag); 
     1830    lz_df2 = lazy_doc_get(lz_doc, compressed_field); 
    18311831    for (i = 0; i < 4; i++) { 
    18321832        Asequal(lazy_df_get_data(lz_df1, i), lazy_df_get_data(lz_df2, i)); 
     
    18351835 
    18361836    lz_doc = ir->get_lazy_doc(ir, 2); 
    1837     lz_df1 = (LazyDocField *)h_get(lz_doc->field_dict, tag); 
    1838     lz_df2 = (LazyDocField *)h_get(lz_doc->field_dict, compressed_field); 
     1837    lz_df1 = lazy_doc_get(lz_doc, tag); 
     1838    lz_df2 = lazy_doc_get(lz_doc, compressed_field); 
    18391839    lazy_df_get_bytes(lz_df1, buf1, 5, 11); 
    18401840    lazy_df_get_bytes(lz_df2, buf2, 5, 11); 
  • ruby/.gitignore

    r0a55ed r0c11a5  
    138138ext/store.c 
    139139ext/store.h 
    140 ext/symbol.c 
    141 ext/symbol.h 
    142140ext/term_vectors.c 
    143141ext/win32.h 
  • ruby/Rakefile

    rcc429f r0c11a5  
    5555                     "../c/lib/libstemmer_c/include/libstemmer.[h]"] 
    5656  EXT_SRC.exclude('../c/**/ind.[ch]', 
     57                  '../c/**/symbol.[ch]', 
    5758                  '../c/include/threading.h', 
     59                  '../c/include/scanner.h', 
    5860                  '../c/include/internal.h', 
    5961                  '../c/src/lang.c', 
     
    8284    # prepend lib files to avoid conflicts 
    8385    file dest_fn => fn do |t| 
    84       safe_ln fn, dest_fn 
     86      ln fn, dest_fn 
    8587 
    8688      if fn =~ /stemmer/ 
  • ruby/ext/extconf.rb

    raeada4 r0c11a5  
    66elsif ENV['FERRET_DEV'] 
    77  require 'mkmf' 
    8   $CFLAGS += " -g -Wall -fno-stack-protector -fno-common -D_FILE_OFFSET_BITS=64 -D_XOPEN_SOURCE=500" 
     8  $CFLAGS = " -g -Wall -fno-stack-protector -fno-common -D_FILE_OFFSET_BITS=64 -D_XOPEN_SOURCE=500" 
     9  puts $CFLAGS 
    910  create_makefile("ferret_ext") 
    1011else 
  • ruby/ext/ferret.c

    r0a55ed r0c11a5  
    191191    switch (TYPE(rfield)) { 
    192192        case T_SYMBOL: 
    193             return I(rb_id2name(SYM2ID(rfield))); 
     193            return SYM2FSYM(rfield); 
    194194        case T_STRING: 
    195195            return I(rs2s(rfield)); 
     
    293293{ 
    294294    return rb_struct_new(cTerm, 
    295                          SYM2RSYM(field), 
     295                         FSYM2SYM(field), 
    296296                         rb_str_new2(text), 
    297297                         NULL);