query = parse_tsquery(text_to_cstring(in),
pushval_morph,
PointerGetDatum(&data),
- 0);
+ 0,
+ NULL);
PG_RETURN_TSQUERY(query);
}
query = parse_tsquery(text_to_cstring(in),
pushval_morph,
PointerGetDatum(&data),
- P_TSQ_PLAIN);
+ P_TSQ_PLAIN,
+ NULL);
PG_RETURN_POINTER(query);
}
query = parse_tsquery(text_to_cstring(in),
pushval_morph,
PointerGetDatum(&data),
- P_TSQ_PLAIN);
+ P_TSQ_PLAIN,
+ NULL);
PG_RETURN_TSQUERY(query);
}
query = parse_tsquery(text_to_cstring(in),
pushval_morph,
PointerGetDatum(&data),
- P_TSQ_WEB);
+ P_TSQ_WEB,
+ NULL);
PG_RETURN_TSQUERY(query);
}
#include "libpq/pqformat.h"
#include "miscadmin.h"
+#include "nodes/miscnodes.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
/*
* get token from query string
*
- * *operator is filled in with OP_* when return values is PT_OPR,
- * but *weight could contain a distance value in case of phrase operator.
- * *strval, *lenval and *weight are filled in when return value is PT_VAL
+ * All arguments except "state" are output arguments.
*
+ * If return value is PT_OPR, then *operator is filled with an OP_* code
+ * and *weight will contain a distance value in case of phrase operator.
+ *
+ * If return value is PT_VAL, then *lenval, *strval, *weight, and *prefix
+ * are filled.
+ *
+ * If PT_ERR is returned then a soft error has occurred. If state->escontext
+ * isn't already filled then this should be reported as a generic parse error.
*/
typedef ts_tokentype (*ts_tokenizer) (TSQueryParserState state, int8 *operator,
int *lenval, char **strval,
/* state for value's parser */
TSVectorParseState valstate;
+
+ /* context object for soft errors - must match valstate's escontext */
+ Node *escontext;
};
/*
if (ptr == endptr)
return false;
else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS)
- ereport(ERROR,
+ ereturn(pstate->escontext, false,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("distance in phrase operator must be an integer value between zero and %d inclusive",
MAXENTRYPOS)));
}
else if (t_iseq(state->buf, ':'))
{
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("syntax error in tsquery: \"%s\"",
- state->buffer)));
+ /* generic syntax error message is fine */
+ return PT_ERR;
}
else if (!t_isspace(state->buf))
{
state->state = WAITOPERATOR;
return PT_VAL;
}
+ else if (SOFT_ERROR_OCCURRED(state->escontext))
+ {
+ /* gettoken_tsvector reported a soft error */
+ return PT_ERR;
+ }
else if (state->state == WAITFIRSTOPERAND)
{
return PT_END;
}
else
- ereport(ERROR,
+ ereturn(state->escontext, PT_ERR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("no operand in tsquery: \"%s\"",
state->buffer)));
*operator = OP_PHRASE;
return PT_OPR;
}
+ else if (SOFT_ERROR_OCCURRED(state->escontext))
+ {
+ /* parse_phrase_operator reported a soft error */
+ return PT_ERR;
+ }
else if (t_iseq(state->buf, ')'))
{
state->buf++;
state->state = WAITOPERATOR;
return PT_VAL;
}
+ else if (SOFT_ERROR_OCCURRED(state->escontext))
+ {
+ /* gettoken_tsvector reported a soft error */
+ return PT_ERR;
+ }
else if (state->state == WAITFIRSTOPERAND)
{
return PT_END;
QueryOperand *tmp;
if (distance >= MAXSTRPOS)
- ereport(ERROR,
+ ereturn(state->escontext,,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("value is too big in tsquery: \"%s\"",
state->buffer)));
if (lenval >= MAXSTRLEN)
- ereport(ERROR,
+ ereturn(state->escontext,,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("operand is too long in tsquery: \"%s\"",
state->buffer)));
pg_crc32 valcrc;
if (lenval >= MAXSTRLEN)
- ereport(ERROR,
+ ereturn(state->escontext,,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("word is too long in tsquery: \"%s\"",
state->buffer)));
return;
case PT_ERR:
default:
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("syntax error in tsquery: \"%s\"",
- state->buffer)));
+ /* don't overwrite a soft error saved by gettoken function */
+ if (!SOFT_ERROR_OCCURRED(state->escontext))
+ errsave(state->escontext,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsquery: \"%s\"",
+ state->buffer)));
+ return;
}
+ /* detect soft error in pushval or recursion */
+ if (SOFT_ERROR_OCCURRED(state->escontext))
+ return;
}
cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
/*
+ * Parse the tsquery stored in "buf".
+ *
* Each value (operand) in the query is passed to pushval. pushval can
* transform the simple value to an arbitrarily complex expression using
* pushValue and pushOperator. It must push a single value with pushValue,
*
* opaque is passed on to pushval as is, pushval can use it to store its
* private state.
+ *
+ * The pushval function can record soft errors via escontext.
+ * Callers must check SOFT_ERROR_OCCURRED to detect that.
+ *
+ * A bitmask of flags (see ts_utils.h) and an error context object
+ * can be provided as well. If a soft error occurs, NULL is returned.
*/
TSQuery
parse_tsquery(char *buf,
PushFunction pushval,
Datum opaque,
- int flags)
+ int flags,
+ Node *escontext)
{
struct TSQueryParserStateData state;
int i;
int commonlen;
QueryItem *ptr;
ListCell *cell;
+ bool noisy;
bool needcleanup;
int tsv_flags = P_TSV_OPR_IS_DELIM | P_TSV_IS_TSQUERY;
else
state.gettoken = gettoken_query_standard;
+ /* emit nuisance NOTICEs only if not doing soft errors */
+ noisy = !(escontext && IsA(escontext, ErrorSaveContext));
+
/* init state */
state.buffer = buf;
state.buf = buf;
state.count = 0;
state.state = WAITFIRSTOPERAND;
state.polstr = NIL;
+ state.escontext = escontext;
/* init value parser's state */
- state.valstate = init_tsvector_parser(state.buffer, tsv_flags);
+ state.valstate = init_tsvector_parser(state.buffer, tsv_flags, escontext);
/* init list of operand */
state.sumlen = 0;
close_tsvector_parser(state.valstate);
+ if (SOFT_ERROR_OCCURRED(escontext))
+ return NULL;
+
if (state.polstr == NIL)
{
- ereport(NOTICE,
- (errmsg("text-search query doesn't contain lexemes: \"%s\"",
- state.buffer)));
+ if (noisy)
+ ereport(NOTICE,
+ (errmsg("text-search query doesn't contain lexemes: \"%s\"",
+ state.buffer)));
query = (TSQuery) palloc(HDRSIZETQ);
SET_VARSIZE(query, HDRSIZETQ);
query->size = 0;
}
if (TSQUERY_TOO_BIG(list_length(state.polstr), state.sumlen))
- ereport(ERROR,
+ ereturn(escontext, NULL,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("tsquery is too large")));
commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
* If there are QI_VALSTOP nodes, delete them and simplify the tree.
*/
if (needcleanup)
- query = cleanup_tsquery_stopwords(query);
+ query = cleanup_tsquery_stopwords(query, noisy);
return query;
}
tsqueryin(PG_FUNCTION_ARGS)
{
char *in = PG_GETARG_CSTRING(0);
+ Node *escontext = fcinfo->context;
- PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), 0));
+ PG_RETURN_TSQUERY(parse_tsquery(in,
+ pushval_asis,
+ PointerGetDatum(NULL),
+ 0,
+ escontext));
}
/*
* Remove QI_VALSTOP (stopword) nodes from TSQuery.
*/
TSQuery
-cleanup_tsquery_stopwords(TSQuery in)
+cleanup_tsquery_stopwords(TSQuery in, bool noisy)
{
int32 len,
lenstr,
root = clean_stopword_intree(maketree(GETQUERY(in)), &ladd, &radd);
if (root == NULL)
{
- ereport(NOTICE,
- (errmsg("text-search query contains only stop words or doesn't contain lexemes, ignored")));
+ if (noisy)
+ ereport(NOTICE,
+ (errmsg("text-search query contains only stop words or doesn't contain lexemes, ignored")));
out = palloc(HDRSIZETQ);
out->size = 0;
SET_VARSIZE(out, HDRSIZETQ);
#include "postgres.h"
#include "libpq/pqformat.h"
+#include "nodes/miscnodes.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
tsvectorin(PG_FUNCTION_ARGS)
{
char *buf = PG_GETARG_CSTRING(0);
+ Node *escontext = fcinfo->context;
TSVectorParseState state;
WordEntryIN *arr;
int totallen;
char *cur;
int buflen = 256; /* allocated size of tmpbuf */
- state = init_tsvector_parser(buf, 0);
+ state = init_tsvector_parser(buf, 0, escontext);
arrlen = 64;
arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen);
while (gettoken_tsvector(state, &token, &toklen, &pos, &poslen, NULL))
{
if (toklen >= MAXSTRLEN)
- ereport(ERROR,
+ ereturn(escontext, (Datum) 0,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("word is too long (%ld bytes, max %ld bytes)",
(long) toklen,
(long) (MAXSTRLEN - 1))));
if (cur - tmpbuf > MAXSTRPOS)
- ereport(ERROR,
+ ereturn(escontext, (Datum) 0,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("string is too long for tsvector (%ld bytes, max %ld bytes)",
(long) (cur - tmpbuf), (long) MAXSTRPOS)));
close_tsvector_parser(state);
+ /* Did gettoken_tsvector fail? */
+ if (SOFT_ERROR_OCCURRED(escontext))
+ PG_RETURN_NULL();
+
if (len > 0)
len = uniqueentry(arr, len, tmpbuf, &buflen);
else
buflen = 0;
if (buflen > MAXSTRPOS)
- ereport(ERROR,
+ ereturn(escontext, (Datum) 0,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("string is too long for tsvector (%d bytes, max %d bytes)", buflen, MAXSTRPOS)));
stroff += arr[i].entry.len;
if (arr[i].entry.haspos)
{
+ /* This should be unreachable because of MAXNUMPOS restrictions */
if (arr[i].poslen > 0xFFFF)
elog(ERROR, "positions array too long");
/*
* Private state of tsvector parser. Note that tsquery also uses this code to
- * parse its input, hence the boolean flags. The two flags are both true or
- * both false in current usage, but we keep them separate for clarity.
+ * parse its input, hence the boolean flags. The oprisdelim and is_tsquery
+ * flags are both true or both false in current usage, but we keep them
+ * separate for clarity.
+ *
+ * If oprisdelim is set, the following characters are treated as delimiters
+ * (in addition to whitespace): ! | & ( )
+ *
* is_tsquery affects *only* the content of error messages.
+ *
+ * is_web can be true to further modify tsquery parsing.
+ *
+ * If escontext is an ErrorSaveContext node, then soft errors can be
+ * captured there rather than being thrown.
*/
struct TSVectorParseStateData
{
bool oprisdelim; /* treat ! | * ( ) as delimiters? */
bool is_tsquery; /* say "tsquery" not "tsvector" in errors? */
bool is_web; /* we're in websearch_to_tsquery() */
+ Node *escontext; /* for soft error reporting */
};
/*
- * Initializes parser for the input string. If oprisdelim is set, the
- * following characters are treated as delimiters in addition to whitespace:
- * ! | & ( )
+ * Initializes a parser state object for the given input string.
+ * A bitmask of flags (see ts_utils.h) and an error context object
+ * can be provided as well.
*/
TSVectorParseState
-init_tsvector_parser(char *input, int flags)
+init_tsvector_parser(char *input, int flags, Node *escontext)
{
TSVectorParseState state;
state->oprisdelim = (flags & P_TSV_OPR_IS_DELIM) != 0;
state->is_tsquery = (flags & P_TSV_IS_TSQUERY) != 0;
state->is_web = (flags & P_TSV_IS_WEB) != 0;
+ state->escontext = escontext;
return state;
}
/*
* Reinitializes parser to parse 'input', instead of previous input.
+ *
+ * Note that bufstart (the string reported in errors) is not changed.
*/
void
reset_tsvector_parser(TSVectorParseState state, char *input)
#define WAITPOSDELIM 7
#define WAITCHARCMPLX 8
-#define PRSSYNTAXERROR prssyntaxerror(state)
+#define PRSSYNTAXERROR return prssyntaxerror(state)
-static void
+static bool
prssyntaxerror(TSVectorParseState state)
{
- ereport(ERROR,
+ errsave(state->escontext,
(errcode(ERRCODE_SYNTAX_ERROR),
state->is_tsquery ?
errmsg("syntax error in tsquery: \"%s\"", state->bufstart) :
errmsg("syntax error in tsvector: \"%s\"", state->bufstart)));
+ /* In soft error situation, return false as convenience for caller */
+ return false;
}
/*
* Get next token from string being parsed. Returns true if successful,
- * false if end of input string is reached. On success, these output
- * parameters are filled in:
+ * false if end of input string is reached or soft error.
+ *
+ * On success, these output parameters are filled in:
*
* *strval pointer to token
* *lenval length of *strval
* *poslen number of elements in *pos_ptr
* *endptr scan resumption point
*
- * Pass NULL for unwanted output parameters.
+ * Pass NULL for any unwanted output parameters.
+ *
+ * If state->escontext is an ErrorSaveContext, then caller must check
+ * SOFT_ERROR_OCCURRED() to determine whether a "false" result means
+ * error or normal end-of-string.
*/
bool
gettoken_tsvector(TSVectorParseState state,
else if (statecode == WAITNEXTCHAR)
{
if (*(state->prsbuf) == '\0')
- ereport(ERROR,
+ ereturn(state->escontext, false,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("there is no escaped character: \"%s\"",
state->bufstart)));
WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
/* we cannot get here in tsquery, so no need for 2 errmsgs */
if (WEP_GETPOS(pos[npos - 1]) == 0)
- ereport(ERROR,
+ ereturn(state->escontext, false,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("wrong position info in tsvector: \"%s\"",
state->bufstart)));
struct TSVectorParseStateData; /* opaque struct in tsvector_parser.c */
typedef struct TSVectorParseStateData *TSVectorParseState;
+/* flag bits that can be passed to init_tsvector_parser: */
#define P_TSV_OPR_IS_DELIM (1 << 0)
#define P_TSV_IS_TSQUERY (1 << 1)
#define P_TSV_IS_WEB (1 << 2)
-extern TSVectorParseState init_tsvector_parser(char *input, int flags);
+extern TSVectorParseState init_tsvector_parser(char *input, int flags,
+ Node *escontext);
extern void reset_tsvector_parser(TSVectorParseState state, char *input);
extern bool gettoken_tsvector(TSVectorParseState state,
char **strval, int *lenval,
* QueryOperand struct */
bool prefix);
+/* flag bits that can be passed to parse_tsquery: */
#define P_TSQ_PLAIN (1 << 0)
#define P_TSQ_WEB (1 << 1)
extern TSQuery parse_tsquery(char *buf,
PushFunction pushval,
Datum opaque,
- int flags);
+ int flags,
+ Node *escontext);
/* Functions for use by PushFunction implementations */
extern void pushValue(TSQueryParserState state,
* TSQuery Utilities
*/
extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len);
-extern TSQuery cleanup_tsquery_stopwords(TSQuery in);
+extern TSQuery cleanup_tsquery_stopwords(TSQuery in, bool noisy);
typedef struct QTNode
{
ERROR: syntax error in tsvector: "'' '1' '2'"
LINE 1: SELECT $$'' '1' '2'$$::tsvector;
^
+-- Also try it with non-error-throwing API
+SELECT pg_input_is_valid('foo', 'tsvector');
+ pg_input_is_valid
+-------------------
+ t
+(1 row)
+
+SELECT pg_input_is_valid($$''$$, 'tsvector');
+ pg_input_is_valid
+-------------------
+ f
+(1 row)
+
+SELECT pg_input_error_message($$''$$, 'tsvector');
+ pg_input_error_message
+--------------------------------
+ syntax error in tsvector: "''"
+(1 row)
+
--Base tsquery test
SELECT '1'::tsquery;
tsquery
!!'a' & !!'b'
(1 row)
+-- Also try it with non-error-throwing API
+SELECT pg_input_is_valid('foo', 'tsquery');
+ pg_input_is_valid
+-------------------
+ t
+(1 row)
+
+SELECT pg_input_is_valid('foo!', 'tsquery');
+ pg_input_is_valid
+-------------------
+ f
+(1 row)
+
+SELECT pg_input_error_message('foo!', 'tsquery');
+ pg_input_error_message
+---------------------------------
+ syntax error in tsquery: "foo!"
+(1 row)
+
+SELECT pg_input_error_message('a <100000> b', 'tsquery');
+ pg_input_error_message
+---------------------------------------------------------------------------------------
+ distance in phrase operator must be an integer value between zero and 16384 inclusive
+(1 row)
+
--comparisons
SELECT 'a' < 'b & c'::tsquery as "true";
true
SELECT 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
SELECT $$'' '1' '2'$$::tsvector; -- error, empty lexeme is not allowed
+-- Also try it with non-error-throwing API
+SELECT pg_input_is_valid('foo', 'tsvector');
+SELECT pg_input_is_valid($$''$$, 'tsvector');
+SELECT pg_input_error_message($$''$$, 'tsvector');
+
--Base tsquery test
SELECT '1'::tsquery;
SELECT '1 '::tsquery;
SELECT '!!a & b'::tsquery;
SELECT '!!a & !!b'::tsquery;
+-- Also try it with non-error-throwing API
+SELECT pg_input_is_valid('foo', 'tsquery');
+SELECT pg_input_is_valid('foo!', 'tsquery');
+SELECT pg_input_error_message('foo!', 'tsquery');
+SELECT pg_input_error_message('a <100000> b', 'tsquery');
+
--comparisons
SELECT 'a' < 'b & c'::tsquery as "true";
SELECT 'a' > 'b & c'::tsquery as "false";