Changeset cafd8d281bdf982e2ac0b84d57dfbf9fed46603c
- Timestamp:
- 04/22/08 00:10:15 (9 months ago)
- Parents:
- 8c94483694f5774d39acb4072885476c45d9b571
- Children:
- c929863ac8839608b81f31d8c7877cb11792961e, 83fedc095fb017076709f408095674cd9fd05ecc, c93e31a2e50bb75c511e0b1553d7311ec0335cce
- git-committer:
- dave <dave@06fd6eb0-0002-0410-a719-e5602cce40bc> / 2008-04-21T14:10:15Z+0000
- Files:
-
- 4 modified
-
TODO (modified) (3 diffs)
-
c/TODO (modified) (1 diff)
-
c/src/q_parser.c (modified) (7 diffs)
-
c/src/q_parser.y (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
TODO
r488aac rcafd8d 20 20 useful for storing field names so that no objects need to strdup the 21 21 field-names but can just store the symbol representative instead. 22 + this has been done but it can be improved using actual Symbol structs 23 instead of plain char* 22 24 - Make threading optional at compile time 23 25 - to_json should limit output to prevent memory overflow on large indexes. … … 34 36 - Auto-loading of documents during search. ie actual documents get returned 35 37 instead of document numbers. 36 - update benchmark suite to use getrusage.u37 38 38 39 * Ruby bindings … … 106 107 + Working Query: field1:value1 AND NOT field2:value2 107 108 + Failing Query: field1:value1 AND ( NOT field2:value2 ) 108 109 * update benchmark suite to use getrusage -
c/TODO
r48290f rcafd8d 4 4 5 5 benchmarks 6 * string actions when length is known7 6 * standard tokenizer 8 * writevint -
c/src/q_parser.c
r553474 rcafd8d 99 99 100 100 /* Copy the first part of user declarations. */ 101 #line 1"src/q_parser.y"101 #line 3 "src/q_parser.y" 102 102 103 103 #include <string.h> … … 149 149 #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED 150 150 typedef union YYSTYPE 151 #line 29"src/q_parser.y"151 #line 31 "src/q_parser.y" 152 152 { 153 153 Query *query; … … 169 169 170 170 /* Copy the second part of user declarations. */ 171 #line 3 7"src/q_parser.y"171 #line 39 "src/q_parser.y" 172 172 173 173 static int yylex(YYSTYPE *lvalp, QParser *qp); … … 549 549 static const yytype_uint8 yyrline[] = 550 550 { 551 0, 13 1, 131, 132, 134, 135, 136, 137, 139, 140,552 14 1, 143, 144, 146, 147, 148, 149, 150, 151, 152,553 15 4, 155, 156, 158, 160, 160, 162, 162, 162, 165,554 16 6, 168, 169, 170, 171, 173, 174, 175, 176, 177,555 1 79, 180, 181, 182, 183, 184, 185, 186, 187, 188,556 1 89, 190551 0, 133, 133, 134, 136, 137, 138, 139, 141, 142, 552 143, 145, 146, 148, 149, 150, 151, 152, 153, 154, 553 156, 157, 158, 160, 162, 162, 164, 164, 164, 167, 554 168, 170, 171, 172, 173, 175, 176, 177, 178, 179, 555 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 556 191, 192 557 557 }; 558 558 #endif … … 1213 1213 { 1214 1214 case 27: /* "bool_q" */ 1215 #line 12 6"src/q_parser.y"1215 #line 128 "src/q_parser.y" 1216 1216 { if ((yyvaluep->query) && qp->destruct) q_deref((yyvaluep->query)); }; 1217 1217 #line 1218 "src/q_parser.c" 1218 1218 break; 1219 1219 case 28: /* "bool_clss" */ 1220 #line 1 28"src/q_parser.y"1220 #line 130 "src/q_parser.y" 1221 1221 { if ((yyvaluep->bclss) && qp->destruct) bca_destroy((yyvaluep->bclss)); }; 1222 1222 #line 1223 "src/q_parser.c" 1223 1223 break; 1224 1224 case 29: /* "bool_cls" */ 1225 #line 12 7"src/q_parser.y"1225 #line 129 "src/q_parser.y" 1226 1226 { if ((yyvaluep->bcls) && qp->destruct) bc_deref((yyvaluep->bcls)); }; 1227 1227 #line 1228 "src/q_parser.c" 1228 1228 break; 1229 1229 case 30: /* "boosted_q" */ 1230 #line 12 6"src/q_parser.y"1230 #line 128 "src/q_parser.y" 1231 1231 { if ((yyvaluep->query) && qp->destruct) q_deref((yyvaluep->query)); }; 1232 1232 #line 1233 "src/q_parser.c" 1233 1233 break; 1234 1234 case 31: /* "q" */ 1235 #line 12 6"src/q_parser.y"1235 #line 128 "src/q_parser.y" 1236 1236 { if ((yyvaluep->query) && qp->destruct) q_deref((yyvaluep->query)); }; 1237 1237 #line 1238 "src/q_parser.c" 1238 1238 break; 1239 1239 case 32: /* "term_q" */ 1240 #line 12 6"src/q_parser.y"1240 #line 128 "src/q_parser.y" 1241 1241 { if ((yyvaluep->query) && qp->destruct) q_deref((yyvaluep->query)); }; 1242 1242 #line 1243 "src/q_parser.c" 1243 1243 break; 1244 1244 case 33: /* "wild_q" */ 1245 #line 12 6"src/q_parser.y"1245 #line 128 "src/q_parser.y" 1246 1246 { if ((yyvaluep->query) && qp->destruct) q_deref((yyvaluep->query)); }; 1247 1247 #line 1248 "src/q_parser.c" 1248 1248 break; 1249 1249 case 34: /* "field_q" */ 1250 #line 12 6"src/q_parser.y"1250 #line 128 "src/q_parser.y" 1251 1251 { if ((yyvaluep->query) && qp->destruct) q_deref((yyvaluep->query)); }; 1252 1252 #line 1253 "src/q_parser.c" 1253 1253 break; 1254 1254 case 39: /* "phrase_q" */ 1255 #line 12 6"src/q_parser.y"1255 #line 128 "src/q_parser.y" 1256 1256 { if ((yyvaluep->query) && qp->destruct) q_deref((yyvaluep->query)); }; 1257 1257 #line 1258 "src/q_parser.c" 1258 1258 break; 1259 1259 case 40: /* "ph_words" */ 1260 #line 1 29"src/q_parser.y"1260 #line 131 "src/q_parser.y" 1261 1261 { if ((yyvaluep->phrase) && qp->destruct) ph_destroy((yyvaluep->phrase)); }; 1262 1262 #line 1263 "src/q_parser.c" 1263 1263 break; 1264 1264 case 41: /* "range_q" */ 1265 #line 12 6"src/q_parser.y"1265 #line 128 "src/q_parser.y" 1266 1266 { if ((yyvaluep->query) && qp->destruct) q_deref((yyvaluep->query)); }; 1267 1267 #line 1268 "src/q_parser.c" … … 1574 1574 { 1575 1575 case 2: 1576 #line 13 1"src/q_parser.y"1576 #line 133 "src/q_parser.y" 1577 1577 { qp->result = (yyval.query) = NULL; } 1578 1578 break; 1579 1579 1580 1580 case 3: 1581 #line 13 2"src/q_parser.y"1581 #line 134 "src/q_parser.y" 1582 1582 { T qp->result = (yyval.query) = get_bool_q((yyvsp[(1) - (1)].bclss)); E } 1583 1583 break; 1584 1584 1585 1585 case 4: 1586 #line 13 4"src/q_parser.y"1586 #line 136 "src/q_parser.y" 1587 1587 { T (yyval.bclss) = first_cls((yyvsp[(1) - (1)].bcls)); E } 1588 1588 break; 1589 1589 1590 1590 case 5: 1591 #line 13 5"src/q_parser.y"1591 #line 137 "src/q_parser.y" 1592 1592 { T (yyval.bclss) = add_and_cls((yyvsp[(1) - (3)].bclss), (yyvsp[(3) - (3)].bcls)); E } 1593 1593 break; 1594 1594 1595 1595 case 6: 1596 #line 13 6"src/q_parser.y"1596 #line 138 "src/q_parser.y" 1597 1597 { T (yyval.bclss) = add_or_cls((yyvsp[(1) - (3)].bclss), (yyvsp[(3) - (3)].bcls)); E } 1598 1598 break; 1599 1599 1600 1600 case 7: 1601 #line 13 7"src/q_parser.y"1601 #line 139 "src/q_parser.y" 1602 1602 { T (yyval.bclss) = add_default_cls(qp, (yyvsp[(1) - (2)].bclss), (yyvsp[(2) - (2)].bcls)); E } 1603 1603 break; 1604 1604 1605 1605 case 8: 1606 #line 1 39"src/q_parser.y"1606 #line 141 "src/q_parser.y" 1607 1607 { T (yyval.bcls) = get_bool_cls((yyvsp[(2) - (2)].query), BC_MUST); E } 1608 1608 break; 1609 1609 1610 1610 case 9: 1611 #line 14 0"src/q_parser.y"1611 #line 142 "src/q_parser.y" 1612 1612 { T (yyval.bcls) = get_bool_cls((yyvsp[(2) - (2)].query), BC_MUST_NOT); E } 1613 1613 break; 1614 1614 1615 1615 case 10: 1616 #line 14 1"src/q_parser.y"1616 #line 143 "src/q_parser.y" 1617 1617 { T (yyval.bcls) = get_bool_cls((yyvsp[(1) - (1)].query), BC_SHOULD); E } 1618 1618 break; 1619 1619 1620 1620 case 12: 1621 #line 14 4"src/q_parser.y"1621 #line 146 "src/q_parser.y" 1622 1622 { T if ((yyvsp[(1) - (3)].query)) sscanf((yyvsp[(3) - (3)].str),"%f",&((yyvsp[(1) - (3)].query)->boost)); (yyval.query)=(yyvsp[(1) - (3)].query); E } 1623 1623 break; 1624 1624 1625 1625 case 14: 1626 #line 14 7"src/q_parser.y"1626 #line 149 "src/q_parser.y" 1627 1627 { T (yyval.query) = bq_new_max(true, qp->max_clauses); E } 1628 1628 break; 1629 1629 1630 1630 case 15: 1631 #line 1 48"src/q_parser.y"1631 #line 150 "src/q_parser.y" 1632 1632 { T (yyval.query) = get_bool_q((yyvsp[(2) - (3)].bclss)); E } 1633 1633 break; 1634 1634 1635 1635 case 20: 1636 #line 15 4"src/q_parser.y"1636 #line 156 "src/q_parser.y" 1637 1637 { FLDS((yyval.query), get_term_q(qp, field, (yyvsp[(1) - (1)].str))); Y} 1638 1638 break; 1639 1639 1640 1640 case 21: 1641 #line 15 5"src/q_parser.y"1641 #line 157 "src/q_parser.y" 1642 1642 { FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[(1) - (3)].str), (yyvsp[(3) - (3)].str))); Y} 1643 1643 break; 1644 1644 1645 1645 case 22: 1646 #line 15 6"src/q_parser.y"1646 #line 158 "src/q_parser.y" 1647 1647 { FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[(1) - (2)].str), NULL)); Y} 1648 1648 break; 1649 1649 1650 1650 case 23: 1651 #line 1 58"src/q_parser.y"1651 #line 160 "src/q_parser.y" 1652 1652 { FLDS((yyval.query), get_wild_q(qp, field, (yyvsp[(1) - (1)].str))); Y} 1653 1653 break; 1654 1654 1655 1655 case 24: 1656 #line 16 0"src/q_parser.y"1656 #line 162 "src/q_parser.y" 1657 1657 { qp->fields = qp->def_fields; } 1658 1658 break; 1659 1659 1660 1660 case 25: 1661 #line 16 1"src/q_parser.y"1661 #line 163 "src/q_parser.y" 1662 1662 { (yyval.query) = (yyvsp[(3) - (4)].query); } 1663 1663 break; 1664 1664 1665 1665 case 26: 1666 #line 16 2"src/q_parser.y"1666 #line 164 "src/q_parser.y" 1667 1667 { qp->fields = qp->all_fields; } 1668 1668 break; 1669 1669 1670 1670 case 27: 1671 #line 16 2"src/q_parser.y"1671 #line 164 "src/q_parser.y" 1672 1672 {qp->fields = qp->def_fields;} 1673 1673 break; 1674 1674 1675 1675 case 28: 1676 #line 16 3"src/q_parser.y"1676 #line 165 "src/q_parser.y" 1677 1677 { (yyval.query) = (yyvsp[(4) - (5)].query); } 1678 1678 break; 1679 1679 1680 1680 case 29: 1681 #line 16 5"src/q_parser.y"1681 #line 167 "src/q_parser.y" 1682 1682 { (yyval.hashset) = first_field(qp, (yyvsp[(1) - (1)].str)); } 1683 1683 break; 1684 1684 1685 1685 case 30: 1686 #line 16 6"src/q_parser.y"1686 #line 168 "src/q_parser.y" 1687 1687 { (yyval.hashset) = add_field(qp, (yyvsp[(3) - (3)].str));} 1688 1688 break; 1689 1689 1690 1690 case 31: 1691 #line 1 68"src/q_parser.y"1691 #line 170 "src/q_parser.y" 1692 1692 { (yyval.query) = get_phrase_q(qp, (yyvsp[(2) - (3)].phrase), NULL); } 1693 1693 break; 1694 1694 1695 1695 case 32: 1696 #line 1 69"src/q_parser.y"1696 #line 171 "src/q_parser.y" 1697 1697 { (yyval.query) = get_phrase_q(qp, (yyvsp[(2) - (5)].phrase), (yyvsp[(5) - (5)].str)); } 1698 1698 break; 1699 1699 1700 1700 case 33: 1701 #line 17 0"src/q_parser.y"1701 #line 172 "src/q_parser.y" 1702 1702 { (yyval.query) = NULL; } 1703 1703 break; 1704 1704 1705 1705 case 34: 1706 #line 17 1"src/q_parser.y"1706 #line 173 "src/q_parser.y" 1707 1707 { (yyval.query) = NULL; (void)(yyvsp[(4) - (4)].str);} 1708 1708 break; 1709 1709 1710 1710 case 35: 1711 #line 17 3"src/q_parser.y"1711 #line 175 "src/q_parser.y" 1712 1712 { (yyval.phrase) = ph_first_word((yyvsp[(1) - (1)].str)); } 1713 1713 break; 1714 1714 1715 1715 case 36: 1716 #line 17 4"src/q_parser.y"1716 #line 176 "src/q_parser.y" 1717 1717 { (yyval.phrase) = ph_first_word(NULL); } 1718 1718 break; 1719 1719 1720 1720 case 37: 1721 #line 17 5"src/q_parser.y"1721 #line 177 "src/q_parser.y" 1722 1722 { (yyval.phrase) = ph_add_word((yyvsp[(1) - (2)].phrase), (yyvsp[(2) - (2)].str)); } 1723 1723 break; 1724 1724 1725 1725 case 38: 1726 #line 17 6"src/q_parser.y"1726 #line 178 "src/q_parser.y" 1727 1727 { (yyval.phrase) = ph_add_word((yyvsp[(1) - (3)].phrase), NULL); } 1728 1728 break; 1729 1729 1730 1730 case 39: 1731 #line 17 7"src/q_parser.y"1731 #line 179 "src/q_parser.y" 1732 1732 { (yyval.phrase) = ph_add_multi_word((yyvsp[(1) - (3)].phrase), (yyvsp[(3) - (3)].str)); } 1733 1733 break; 1734 1734 1735 1735 case 40: 1736 #line 1 79"src/q_parser.y"1736 #line 181 "src/q_parser.y" 1737 1737 { FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (4)].str), (yyvsp[(3) - (4)].str), true, true)); Y} 1738 1738 break; 1739 1739 1740 1740 case 41: 1741 #line 18 0"src/q_parser.y"1741 #line 182 "src/q_parser.y" 1742 1742 { FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (4)].str), (yyvsp[(3) - (4)].str), true, false)); Y} 1743 1743 break; 1744 1744 1745 1745 case 42: 1746 #line 18 1"src/q_parser.y"1746 #line 183 "src/q_parser.y" 1747 1747 { FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (4)].str), (yyvsp[(3) - (4)].str), false, true)); Y} 1748 1748 break; 1749 1749 1750 1750 case 43: 1751 #line 18 2"src/q_parser.y"1751 #line 184 "src/q_parser.y" 1752 1752 { FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (4)].str), (yyvsp[(3) - (4)].str), false, false)); Y} 1753 1753 break; 1754 1754 1755 1755 case 44: 1756 #line 18 3"src/q_parser.y"1756 #line 185 "src/q_parser.y" 1757 1757 { FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[(2) - (3)].str), false, false)); Y} 1758 1758 break; 1759 1759 1760 1760 case 45: 1761 #line 18 4"src/q_parser.y"1761 #line 186 "src/q_parser.y" 1762 1762 { FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[(2) - (3)].str), false, true)); Y} 1763 1763 break; 1764 1764 1765 1765 case 46: 1766 #line 18 5"src/q_parser.y"1766 #line 187 "src/q_parser.y" 1767 1767 { FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (3)].str), NULL,true, false)); Y} 1768 1768 break; 1769 1769 1770 1770 case 47: 1771 #line 18 6"src/q_parser.y"1771 #line 188 "src/q_parser.y" 1772 1772 { FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (3)].str), NULL,false, false)); Y} 1773 1773 break; 1774 1774 1775 1775 case 48: 1776 #line 18 7"src/q_parser.y"1776 #line 189 "src/q_parser.y" 1777 1777 { FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[(2) - (2)].str), false, false)); Y} 1778 1778 break; 1779 1779 1780 1780 case 49: 1781 #line 1 88"src/q_parser.y"1781 #line 190 "src/q_parser.y" 1782 1782 { FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[(3) - (3)].str), false, true)); Y} 1783 1783 break; 1784 1784 1785 1785 case 50: 1786 #line 1 89"src/q_parser.y"1786 #line 191 "src/q_parser.y" 1787 1787 { FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(3) - (3)].str), NULL,true, false)); Y} 1788 1788 break; 1789 1789 1790 1790 case 51: 1791 #line 19 0"src/q_parser.y"1791 #line 192 "src/q_parser.y" 1792 1792 { FLDS((yyval.query), get_r_q(qp, field, (yyvsp[(2) - (2)].str), NULL,false, false)); Y} 1793 1793 break; … … 2009 2009 2010 2010 2011 #line 19 2"src/q_parser.y"2011 #line 194 "src/q_parser.y" 2012 2012 2013 2013 -
c/src/q_parser.y
r553474 rcafd8d 1 /***************************************************************************** 2 * QueryParser 3 * =========== 4 * 5 * Synopsis 6 * -------- 7 * 8 * === qp_parse 9 * 10 * The main QueryParser method is +qp_parse+. It gets called with a the query 11 * string. The first thing it does is to clean the query string if 12 * ((QueryParser *)self)->clean_str is set to true. The cleaning is done with 13 * the qp_clean_str. 14 * 15 * It then calls the yacc parser which will set self->result to the parsed 16 * query. If parsing fails in anyway, self->result should be set to NULL, in 17 * which case qp_parse does one of two things, depending on the value of 18 * self->handle_parse_errors. If it is set to true, qp_parse attempts to do a 19 * very basic parsing of the query by ignoring all special characters and 20 * parsing the query as a plain boolean query. If it is set to false, qp_parse 21 * will raise a PARSE_ERROR. 22 * 23 * === The Lexer 24 * 25 * yylex is the lexing method called by the QueryParser. It breaks the query 26 * up into special characters ( "&:()[]{}!\"~^|<>=*?+-" ) and tokens (QWRD, 27 * WILD_STR, AND['AND', '&&'], OR['OR', '||'], REQ['REQ', '+'], NOT['NOT', 28 * '-', '~']). QWRD tokens are query word tokens which are made up of 29 * characters other than the special characters. They can also contain special 30 * characters when escaped with a backslash '\'. WILD_STR is the same as QWRD 31 * except that it may also contain '?' and '*' characters. 32 * 33 * === The Parser 34 * 35 * For a better understanding of the how the query parser works, it is a good 36 * idea to study the Ferret Query Language (FQL) described below. Once you 37 * understand FQL the one tricky part that needs to be mentioned is how fields 38 * are handled. The QueryParser knows about two sets of fields, the default 39 * search fields and the set of all fields in the index. When no fields are 40 * specified then the default fields are used. The '*:' field specifier will 41 * search all fields contained in the all_fields set. Otherwise all fields 42 * specified in the field descripter separated by '|' will be searched. For 43 * example 'title|content:' will search the title and content fields. When 44 * fields are specified like this, the parser will push the fields onto a 45 * stack and all queries modified by the field specifier will be applied to 46 * the fields on top of the stack. This is where the FLDS macro comes into 47 * place. It takes the current query building function in the parser and calls 48 * it for all fields on top of the stack. 49 * 50 * Ferret Query Language (FQL) 51 * =========================== 52 * 53 * FIXME to be continued... 54 *****************************************************************************/ 1 55 %{ 2 56 #include <string.h> … … 878 932 } 879 933 934 /***************************************************************************** 935 * qp_clean_str method which basically scans the query string and ensures that 936 * all open and close parentheses '()' and quotes '"' are balanced. It does 937 * this be inserting or appending extra parentheses or quotes which is not 938 * necessarily going to be exactly what the user wanted but it will help 939 * prevent the parser from failing so it's the best we can do at this stage. 940 * It also checks 941 *****************************************************************************/ 880 942 char *qp_clean_str(char *str) 881 943 {
