Changeset 2ac444617a25eb599de0faec801ffa532425f554

Show
Ignore:
Timestamp:
04/28/08 14:54:33 (8 months ago)
Author:
David Balmain <dbalmain@…>
Parents:
88af924cc987bc50098df966cc16b84e4213a7fe
Children:
8d89448795e517f0f1afd9bb633dc677fde09712
git-committer:
David Balmain <dbalmain@gmail.com> / 2008-04-28T14:54:33Z+1000
Message:

Full test coverage for PhraseQuery and its variants

Full test coverage for;
* PhraseQuery
* MultiPhraseQuery?
* PhraseQuery with slop
* MultiPhraseQuery? with slop

Files:
6 modified

Legend:

Unmodified
Added
Removed
  • c/src/index.c

    ra23445 r2ac444  
    55805580                while (NULL != (tk = ts->next(ts))) { 
    55815581                    pos += tk->pos_inc; 
     5582                    /* if for some reason pos gets set to some number less 
     5583                     * than 0 the we'll start pos at 0 */ 
     5584                    if (pos < 0) { 
     5585                        pos = 0; 
     5586                    } 
    55825587                    dw_add_posting(mp, curr_plists, fld_plists, doc_num, 
    55835588                                   tk->text, tk->len, pos); 
  • c/src/q_phrase.c

    r7b6066 r2ac444  
    778778        if (tv_term) { 
    779779            TVPosEnum *tvpe = tvpe_new(tv_term->positions, tv_term->freq, 0); 
    780             if (tvpe_next(tvpe)) { 
    781                 pq_push(tvpe_pq, tvpe); 
    782                 total_positions += tv_term->freq; 
    783             } 
    784             else { 
    785                 free(tvpe); 
    786             } 
     780            /* got tv_term so tvpe_next should always return true once here */ 
     781            assert(tvpe_next(tvpe)); 
     782            pq_push(tvpe_pq, tvpe); 
     783            total_positions += tv_term->freq; 
    787784        } 
    788785    } 
  • c/src/search.c

    r7b6066 r2ac444  
    540540    } 
    541541    self->matches[self->size].start = start; 
    542     self->matches[self->size].end = end; 
    543     self->matches[self->size++].score = 1.0; 
     542    self->matches[self->size].end   = end; 
     543    self->matches[self->size].score = 1.0; 
     544    self->size++; 
    544545    return self; 
    545546} 
     
    15231524    Query *rewritten_query = self->rewrite(self, query); 
    15241525    /* terms get copied directly to df_map so no need to free here */ 
    1525     HashSet *terms = frt_hs_new((hash_ft)&frt_term_hash, 
    1526                                 (eq_ft)&frt_term_eq, 
    1527                                 (free_ft)NULL); 
     1526    HashSet *terms = hs_new((hash_ft)&term_hash, 
     1527                            (eq_ft)&term_eq, 
     1528                            (free_ft)NULL); 
    15281529    HashSetEntry *hse; 
    15291530 
     
    18011802    int i = msea_get_searcher_index(self, doc_num); 
    18021803    Searcher *s = msea->searchers[i]; 
    1803     return s->get_term_vector(s, doc_num - msea->starts[i], 
    1804                               field); 
     1804    return s->get_term_vector(s, doc_num - msea->starts[i], field); 
    18051805} 
    18061806 
  • c/test/test_search.c

    r7b6066 r2ac444  
    202202    IndexWriter *iw; 
    203203 
    204     FieldInfos *fis = fis_new(STORE_YES, INDEX_YES, TERM_VECTOR_YES); 
     204    FieldInfos *fis = fis_new(STORE_YES, 
     205                              INDEX_YES, 
     206                              TERM_VECTOR_WITH_POSITIONS_OFFSETS); 
    205207    index_create(store, fis); 
    206208    fis_deref(fis); 
     
    514516static void test_phrase_query(TestCase *tc, void *data) 
    515517{ 
     518    MatchVector *mv; 
    516519    Searcher *searcher = (Searcher *)data; 
     520    Explanation *explanation; 
    517521    Query *q; 
    518522    Query *phq = phq_new(field); 
     
    597601    phq_add_term(phq, "quick", 1); 
    598602    check_hits(tc, searcher, phq, "", -1); 
     603    explanation = searcher->explain(searcher, phq, 0); 
     604    Afequal(0.0, explanation->value); 
     605    expl_destroy(explanation); 
    599606    q_deref(phq); 
    600607 
    601     /* test single term case, query is rewritten to TermQuery */ 
     608    /* test single-term case, query is rewritten to TermQuery */ 
    602609    phq = phq_new(field); 
    603610    phq_add_term(phq, "word2", 1); 
     
    605612    q = searcher_rewrite(searcher, phq); 
    606613    Aiequal(q->type, TERM_QUERY); 
     614    q_deref(q); 
     615 
     616    /* test single-position/multi-term query is rewritten as MultiTermQuery */ 
     617    phq_append_multi_term(phq, "word3"); 
     618    check_hits(tc, searcher, phq, "1,2,3,4,6,8,11,14", -1); 
     619    q = searcher_rewrite(searcher, phq); 
     620    Aiequal(q->type, MULTI_TERM_QUERY); 
     621    q_deref(q); 
     622 
     623    /* check boost doesn't break anything */; 
     624    phq_add_term(phq, "one", 1); /* make sure it won't be rewritten */ 
     625    phq->boost = 10.0; 
     626    check_hits(tc, searcher, phq, "2,3", -1); 
    607627    q_deref(phq); 
    608     q_deref(q); 
     628 
     629    /* test get_matchv_i */ 
     630    phq = phq_new(field); 
     631    phq_add_term(phq, "quick", 0); 
     632    phq_add_term(phq, "brown", 1); 
     633    check_hits(tc, searcher, phq, "1", -1); 
     634    mv = searcher_get_match_vector(searcher, phq, 1, field); 
     635    if (Aiequal(2, mv->size)) { 
     636        Aiequal(3, mv->matches[0].start); 
     637        Aiequal(4, mv->matches[0].end); 
     638        Aiequal(7, mv->matches[1].start); 
     639        Aiequal(8, mv->matches[1].end); 
     640    } 
     641    matchv_destroy(mv); 
     642    phq_set_slop(phq, 4); 
     643    check_hits(tc, searcher, phq, "1,16,17", -1); 
     644    mv = searcher_get_match_vector(searcher, phq, 1, field); 
     645    if (Aiequal(2, mv->size)) { 
     646        Aiequal(3, mv->matches[0].start); 
     647        Aiequal(4, mv->matches[0].end); 
     648        Aiequal(7, mv->matches[1].start); 
     649        Aiequal(8, mv->matches[1].end); 
     650    } 
     651    matchv_destroy(mv); 
     652    mv = searcher_get_match_vector(searcher, phq, 16, field); 
     653    if (Aiequal(1, mv->size)) { 
     654        Aiequal(2, mv->matches[0].start); 
     655        Aiequal(5, mv->matches[0].end); 
     656    } 
     657    matchv_destroy(mv); 
     658    phq_add_term(phq, "chicken", 1); 
     659    check_hits(tc, searcher, phq, "", -1); 
     660    mv = searcher_get_match_vector(searcher, phq, 16, field); 
     661    Aiequal(0, mv->size); 
     662    matchv_destroy(mv); 
     663    q_deref(phq); 
    609664} 
    610665 
     
    666721    Searcher *searcher = (Searcher *)data; 
    667722    Query *phq, *q; 
     723    MatchVector *mv; 
    668724 
    669725    phq = phq_new(field); 
    670     phq_add_term(phq, "quick", 0); 
     726    /* ok to use append_multi_term to start */ 
     727    phq_append_multi_term(phq, "quick"); 
    671728    phq_append_multi_term(phq, "fast"); 
    672729    check_hits(tc, searcher, phq, "1, 8, 11, 14, 16, 17", -1); 
     
    731788    q_deref(q); 
    732789 
     790    /* test get_matchv_i */ 
     791    phq = phq_new(field); 
     792    phq_add_term(phq, "quick", 0); 
     793    phq_add_term(phq, "brown", 1); 
     794    phq_append_multi_term(phq, "dirty"); 
     795    phq_append_multi_term(phq, "red"); 
     796    check_hits(tc, searcher, phq, "1,11", -1); 
     797    mv = searcher_get_match_vector(searcher, phq, 1, field); 
     798    if (Aiequal(2, mv->size)) { 
     799        Aiequal(3, mv->matches[0].start); 
     800        Aiequal(4, mv->matches[0].end); 
     801        Aiequal(7, mv->matches[1].start); 
     802        Aiequal(8, mv->matches[1].end); 
     803    } 
     804    matchv_destroy(mv); 
     805    phq_set_slop(phq, 1); 
     806    check_hits(tc, searcher, phq, "1,11,17", -1); 
     807    mv = searcher_get_match_vector(searcher, phq, 1, field); 
     808    if (Aiequal(2, mv->size)) { 
     809        Aiequal(3, mv->matches[0].start); 
     810        Aiequal(4, mv->matches[0].end); 
     811        Aiequal(7, mv->matches[1].start); 
     812        Aiequal(8, mv->matches[1].end); 
     813    } 
     814    matchv_destroy(mv); 
     815    mv = searcher_get_match_vector(searcher, phq, 17, field); 
     816    if (Aiequal(1, mv->size)) { 
     817        Aiequal(5, mv->matches[0].start); 
     818        Aiequal(7, mv->matches[0].end); 
     819    } 
     820    matchv_destroy(mv); 
     821    phq_add_term(phq, "chicken", 1); 
     822    phq_append_multi_term(phq, "turtle"); 
     823    check_hits(tc, searcher, phq, "", -1); 
     824    mv = searcher_get_match_vector(searcher, phq, 17, field); 
     825    Aiequal(0, mv->size); 
     826    matchv_destroy(mv); 
     827    q_deref(phq); 
    733828} 
    734829 
     
    14061501    int i; 
    14071502    IndexWriter *iw; 
    1408     FieldInfos *fis = fis_new(STORE_YES, INDEX_YES, TERM_VECTOR_YES); 
     1503    FieldInfos *fis = fis_new(STORE_YES, 
     1504                              INDEX_YES, 
     1505                              TERM_VECTOR_WITH_POSITIONS_OFFSETS); 
    14091506    index_create(store, fis); 
    14101507    fis_deref(fis); 
  • ruby/ext/r_search.c

    r0c11a5 r2ac444  
    494494{ 
    495495    VALUE rterms = rb_ary_new(); 
    496     HashSet *terms = term_set_new(); 
     496    HashSet *terms = hs_new((hash_ft)&term_hash, 
     497                            (eq_ft)&term_eq, 
     498                            (free_ft)term_destroy); 
    497499    HashSetEntry *hse; 
    498500    GET_Q();