- change the alignment requirement of lexemes in TSVector slightly.
Lexeme strings were always padded to 2-byte aligned length to make sure
that if there's position array (uint16[]) it has the right alignment.
The patch changes that so that the padding is not done when there's no
positions. That makes the storage of tsvectors without positions
slightly more compact.
- added some #include "miscadmin.h" lines I missed in the earlier when I
added calls to check_stack_depth().
- Reimplement the send/recv functions, and added a comment
above them describing the on-wire format. The CRC is now recalculated in
tsquery as well per previous discussion.
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.3 2007/09/07 16:03:40 teodor Exp $
*
*-------------------------------------------------------------------------
*/
gin_extract_tsvector(PG_FUNCTION_ARGS)
{
TSVector vector = PG_GETARG_TSVECTOR(0);
- uint32 *nentries = (uint32 *) PG_GETARG_POINTER(1);
+ int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
Datum *entries = NULL;
*nentries = 0;
gin_extract_query(PG_FUNCTION_ARGS)
{
TSQuery query = PG_GETARG_TSQUERY(0);
- uint32 *nentries = (uint32 *) PG_GETARG_POINTER(1);
+ int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
StrategyNumber strategy = PG_GETARG_UINT16(2);
Datum *entries = NULL;
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.4 2007/09/07 15:35:10 teodor Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.5 2007/09/07 16:03:40 teodor Exp $
*
*-------------------------------------------------------------------------
*/
#include "tsearch/ts_utils.h"
#include "utils/memutils.h"
#include "utils/pg_crc.h"
-#include "nodes/bitmapset.h"
struct TSQueryParserStateData
}
}
-/*
- * Fills in the left-fields previously left unfilled. The input
- * QueryItems must be in polish (prefix) notation.
- */
static void
-findoprnd(QueryItem *ptr, uint32 *pos)
+findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes)
{
/* since this function recurses, it could be driven to stack overflow. */
check_stack_depth();
+ if (*pos >= nnodes)
+ elog(ERROR, "malformed tsquery; operand not found");
+
if (ptr[*pos].type == QI_VAL ||
ptr[*pos].type == QI_VALSTOP) /* need to handle VALSTOP here,
* they haven't been cleaned
{
ptr[*pos].operator.left = 1;
(*pos)++;
- findoprnd(ptr, pos);
+ findoprnd_recurse(ptr, pos, nnodes);
}
else
{
Assert(curitem->oper == OP_AND || curitem->oper == OP_OR);
(*pos)++;
- findoprnd(ptr, pos);
+ findoprnd_recurse(ptr, pos, nnodes);
curitem->left = *pos - tmp;
- findoprnd(ptr, pos);
+ findoprnd_recurse(ptr, pos, nnodes);
}
}
}
+
+/*
+ * Fills in the left-fields previously left unfilled. The input
+ * QueryItems must be in polish (prefix) notation.
+ */
+static void
+findoprnd(QueryItem *ptr, int size)
+{
+ uint32 pos;
+
+ pos = 0;
+ findoprnd_recurse(ptr, &pos, size);
+
+ if (pos != size)
+ elog(ERROR, "malformed tsquery; extra nodes");
+}
+
+
/*
* Each value (operand) in the query is be passed to pushval. pushval can
* transform the simple value to an arbitrarily complex expression using
TSQuery query;
int commonlen;
QueryItem *ptr;
- uint32 pos = 0;
ListCell *cell;
/* init state */
pfree(state.op);
/* Set left operand pointers for every operator. */
- pos = 0;
- findoprnd(ptr, &pos);
+ findoprnd(ptr, query->size);
return query;
}
PG_RETURN_CSTRING(nrm.buf);
}
+/*
+ * Binary Input / Output functions. The binary format is as follows:
+ *
+ * uint32 number of operators/operands in the query
+ *
+ * Followed by the operators and operands, in prefix notation. For each
+ * operand:
+ *
+ * uint8 type, QI_VAL
+ * uint8 weight
+ * operand text in client encoding, null-terminated
+ *
+ * For each operator:
+ * uint8 type, QI_OPR
+ * uint8 operator, one of OP_AND, OP_OR, OP_NOT.
+ */
Datum
tsquerysend(PG_FUNCTION_ARGS)
{
pq_begintypsend(&buf);
- pq_sendint(&buf, query->size, sizeof(int32));
+ pq_sendint(&buf, query->size, sizeof(uint32));
for (i = 0; i < query->size; i++)
{
pq_sendint(&buf, item->type, sizeof(item->type));
switch(item->type)
{
case QI_VAL:
- pq_sendint(&buf, item->operand.weight, sizeof(item->operand.weight));
- pq_sendint(&buf, item->operand.valcrc, sizeof(item->operand.valcrc));
- pq_sendint(&buf, item->operand.length, sizeof(int16));
+ pq_sendint(&buf, item->operand.weight, sizeof(uint8));
+ pq_sendstring(&buf, GETOPERAND(query) + item->operand.distance);
/* istrue flag is just for temporary use in tsrank.c/Cover,
* so we don't need to transfer that */
break;
case QI_OPR:
pq_sendint(&buf, item->operator.oper, sizeof(item->operator.oper));
- if (item->operator.oper != OP_NOT)
- pq_sendint(&buf, item->operator.left, sizeof(item->operator.left));
break;
default:
elog(ERROR, "unknown tsquery node type %d", item->type);
item++;
}
- item = GETQUERY(query);
- for (i = 0; i < query->size; i++)
- {
- if (item->type == QI_VAL)
- pq_sendbytes(&buf, GETOPERAND(query) + item->operand.distance, item->operand.length);
- item++;
- }
-
PG_FREE_IF_COPY(query, 0);
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
TSQuery query;
int i,
- size,
len;
QueryItem *item;
- int datalen = 0;
+ int datalen;
char *ptr;
- Bitmapset *parentset = NULL;
+ uint32 size;
+ const char **operands;
size = pq_getmsgint(buf, sizeof(uint32));
- if (size < 0 || size > (MaxAllocSize / sizeof(QueryItem)))
+ if (size > (MaxAllocSize / sizeof(QueryItem)))
elog(ERROR, "invalid size of tsquery");
- len = HDRSIZETQ + sizeof(QueryItem) * size;
+ /* Allocate space to temporarily hold operand strings */
+ operands = palloc(size * sizeof(char *));
+ /* Allocate space for all the QueryItems. */
+ len = HDRSIZETQ + sizeof(QueryItem) * size;
query = (TSQuery) palloc0(len);
query->size = size;
item = GETQUERY(query);
+ datalen = 0;
for (i = 0; i < size; i++)
{
item->type = (int8) pq_getmsgint(buf, sizeof(int8));
- switch(item->type)
+ if (item->type == QI_VAL)
{
- case QI_VAL:
- item->operand.weight = (int8) pq_getmsgint(buf, sizeof(int8));
- item->operand.valcrc = (int32) pq_getmsgint(buf, sizeof(int32));
- item->operand.length = pq_getmsgint(buf, sizeof(int16));
-
- /* Check that the weight bitmap is valid */
- if (item->operand.weight < 0 || item->operand.weight > 0xF)
- elog(ERROR, "invalid weight bitmap");
-
- /* XXX: We don't check that the CRC is valid. Actually, if we
- * bothered to calculate it to verify, there would be no need
- * to transfer it.
- */
-
- /*
- * Check that datalen doesn't grow too large. Without the
- * check, a malicious client could induce a buffer overflow
- * by sending a tsquery whose size exceeds 2GB. datalen
- * would overflow, we would allocate a too small buffer below,
- * and overflow the buffer. Because operand.length is a 20-bit
- * field, adding one such value to datalen must exceed
- * MaxAllocSize before wrapping over the 32-bit datalen field,
- * so this check will protect from it.
- */
- if (datalen > MAXSTRLEN)
- elog(ERROR, "invalid tsquery; total operand length exceeded");
-
- /* We can calculate distance from datalen, no need to send it
- * across the wire. If we did, we would have to check that
- * it's valid anyway.
- */
- item->operand.distance = datalen;
-
- datalen += item->operand.length + 1; /* \0 */
-
- break;
- case QI_OPR:
- item->operator.oper = (int8) pq_getmsgint(buf, sizeof(int8));
- if (item->operator.oper != OP_NOT &&
- item->operator.oper != OP_OR &&
- item->operator.oper != OP_AND)
- elog(ERROR, "unknown operator type %d", (int) item->operator.oper);
-
- /*
- * Check that no previous operator node points to the right
- * operand. That would mean that the operand node
- * has two parents.
- */
- if (bms_is_member(i + 1, parentset))
- elog(ERROR, "malformed query tree");
-
- parentset = bms_add_member(parentset, i + 1);
-
- if(item->operator.oper != OP_NOT)
- {
- uint32 left = (uint32) pq_getmsgint(buf, sizeof(uint32));
-
- /*
- * Right operand is implicitly at "this+1". Don't allow
- * left to point to the right operand, or to self.
- */
- if (left <= 1 || i + left >= size)
- elog(ERROR, "invalid pointer to left operand");
-
- /*
- * Check that no previous operator node points to the left
- * operand.
- */
- if (bms_is_member(i + left, parentset))
- elog(ERROR, "malformed query tree");
-
- parentset = bms_add_member(parentset, i + left);
-
- item->operator.left = left;
- }
- else
- item->operator.left = 1; /* do not leave uninitialized fields */
-
- if (i == size - 1)
- elog(ERROR, "invalid pointer to right operand");
- break;
- default:
- elog(ERROR, "unknown tsquery node type %d", item->type);
+ size_t val_len; /* length after recoding to server encoding */
+ uint8 weight;
+ const char *val;
+ pg_crc32 valcrc;
+
+ weight = (uint8) pq_getmsgint(buf, sizeof(uint8));
+ val = pq_getmsgstring(buf);
+ val_len = strlen(val);
+
+ /* Sanity checks */
+
+ if (weight > 0xF)
+ elog(ERROR, "invalid tsquery; invalid weight bitmap");
+
+ if (val_len > MAXSTRLEN)
+ elog(ERROR, "invalid tsquery; operand too long");
+
+ if (datalen > MAXSTRPOS)
+ elog(ERROR, "invalid tsquery; total operand length exceeded");
+
+ /* Looks valid. */
+
+ INIT_CRC32(valcrc);
+ COMP_CRC32(valcrc, val, val_len);
+ FIN_CRC32(valcrc);
+
+ item->operand.weight = weight;
+ item->operand.valcrc = (int32) valcrc;
+ item->operand.length = val_len;
+ item->operand.distance = datalen;
+
+ /*
+ * Operand strings are copied to the final struct after this loop;
+ * here we just collect them to an array
+ */
+ operands[i] = val;
+
+ datalen += val_len + 1; /* + 1 for the '\0' terminator */
+ }
+ else if (item->type == QI_OPR)
+ {
+ int8 oper;
+ oper = (int8) pq_getmsgint(buf, sizeof(int8));
+ if (oper != OP_NOT && oper != OP_OR && oper != OP_AND)
+ elog(ERROR, "invalid tsquery; unknown operator type %d", (int) oper);
+ if (i == size - 1)
+ elog(ERROR, "invalid pointer to right operand");
+
+ item->operator.oper = oper;
}
+ else
+ elog(ERROR, "unknown tsquery node type %d", item->type);
item++;
}
- /* Now check that each node, except the root, has a parent. We
- * already checked above that no node has more than one parent. */
- if (bms_num_members(parentset) != size - 1 && size != 0)
- elog(ERROR, "malformed query tree");
-
- bms_free( parentset );
-
+ /* Enlarge buffer to make room for the operand values. */
query = (TSQuery) repalloc(query, len + datalen);
-
item = GETQUERY(query);
ptr = GETOPERAND(query);
+
+ /*
+ * Fill in the left-pointers. Checks that the tree is well-formed
+ * as a side-effect.
+ */
+ findoprnd(item, size);
+
+ /* Copy operands to output struct */
for (i = 0; i < size; i++)
{
if (item->type == QI_VAL)
{
- memcpy(ptr,
- pq_getmsgbytes(buf, item->operand.length),
- item->operand.length);
- ptr += item->operand.length;
- *ptr++ = '\0';
+ memcpy(ptr, operands[i], item->operand.length + 1);
+ ptr += item->operand.length + 1;
}
item++;
}
+ pfree(operands);
+
Assert(ptr - GETOPERAND(query) == datalen);
SET_VARSIZE(query, len + datalen);
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.3 2007/09/07 15:35:10 teodor Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.4 2007/09/07 16:03:40 teodor Exp $
*
*-------------------------------------------------------------------------
*/
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
+#include "miscadmin.h"
typedef struct NODE
{
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.3 2007/09/07 15:35:10 teodor Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.4 2007/09/07 16:03:40 teodor Exp $
*
*-------------------------------------------------------------------------
*/
#include "executor/spi.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
+#include "miscadmin.h"
static int
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.3 2007/09/07 15:35:10 teodor Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.4 2007/09/07 16:03:40 teodor Exp $
*
*-------------------------------------------------------------------------
*/
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
+#include "miscadmin.h"
QTNode *
QT2QTN(QueryItem * in, char *operand)
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.3 2007/09/07 15:35:10 teodor Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.4 2007/09/07 16:03:40 teodor Exp $
*
*-------------------------------------------------------------------------
*/
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
#include "utils/array.h"
+#include "miscadmin.h"
static float weights[] = {0.1, 0.2, 0.4, 1.0};
return res;
}
+/* A dummy WordEntryPos array to use when haspos is false */
static WordEntryPos POSNULL[] = {
- 0,
+ 1, /* Number of elements that follow */
0
};
}
pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
memset(pos, 0, sizeof(uint16 *) * q->size);
- *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
WEP_SETPOS(POSNULL[1], MAXENTRYPOS - 1);
for (i = 0; i < size; i++)
QueryOperand **item;
int size = q->size;
- *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
item = SortAndUniqItems(q, &size);
for (i = 0; i < size; i++)
DocRepresentation *doc;
char *operand;
- *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
operand = GETOPERAND(query);
reset_istrue_flag(query);
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.4 2007/09/07 16:03:40 teodor Exp $
*
*-------------------------------------------------------------------------
*/
}
static int
-compareentry(const void *a, const void *b, void *arg)
+compareentry(const void *va, const void *vb, void *arg)
{
char *BufferStr = (char *) arg;
+ WordEntryIN *a = (WordEntryIN *) va;
+ WordEntryIN *b = (WordEntryIN *) vb;
- if (((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
+ if (a->entry.len == b->entry.len)
{
- return strncmp(&BufferStr[((WordEntryIN *) a)->entry.pos],
- &BufferStr[((WordEntryIN *) b)->entry.pos],
- ((WordEntryIN *) a)->entry.len);
+ return strncmp(&BufferStr[a->entry.pos],
+ &BufferStr[b->entry.pos],
+ a->entry.len);
}
- return (((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len) ? 1 : -1;
+ return (a->entry.len > b->entry.len) ? 1 : -1;
}
static int
a->poslen = uniquePos(a->pos, a->poslen);
*outbuflen = SHORTALIGN(a->entry.len) + (a->poslen + 1) * sizeof(WordEntryPos);
}
+ else
+ *outbuflen = a->entry.len;
+
return l;
}
res = a;
{
if (res->entry.haspos)
{
+ *outbuflen += SHORTALIGN(res->entry.len);
res->poslen = uniquePos(res->pos, res->poslen);
*outbuflen += res->poslen * sizeof(WordEntryPos);
}
- *outbuflen += SHORTALIGN(res->entry.len);
+ else
+ *outbuflen += res->entry.len;
res++;
memcpy(res, ptr, sizeof(WordEntryIN));
}
}
ptr++;
}
+
+ /* add last item */
+
if (res->entry.haspos)
{
+ *outbuflen += SHORTALIGN(res->entry.len);
+
res->poslen = uniquePos(res->pos, res->poslen);
*outbuflen += res->poslen * sizeof(WordEntryPos);
}
- *outbuflen += SHORTALIGN(res->entry.len);
+ else
+ *outbuflen += res->entry.len;
return res + 1 - a;
}
PG_RETURN_CSTRING(outbuf);
}
+/*
+ * Binary Input / Output functions. The binary format is as follows:
+ *
+ * uint32 number of lexemes
+ *
+ * for each lexeme:
+ * lexeme text in client encoding, null-terminated
+ * uint16 number of positions
+ * for each position:
+ * uint16 WordEntryPos
+ */
+
Datum
tsvectorsend(PG_FUNCTION_ARGS)
{
pq_sendint(&buf, vec->size, sizeof(int32));
for (i = 0; i < vec->size; i++)
{
- /*
- * We are sure that sizeof(WordEntry) == sizeof(int32)
+ uint16 npos;
+
+ /* the strings in the TSVector array are not null-terminated, so
+ * we have to send the null-terminator separately
*/
- pq_sendint(&buf, *(int32 *) weptr, sizeof(int32));
+ pq_sendtext(&buf, STRPTR(vec) + weptr->pos, weptr->len);
+ pq_sendbyte(&buf, '\0');
- pq_sendbytes(&buf, STRPTR(vec) + weptr->pos, weptr->len);
- if (weptr->haspos)
+ npos = POSDATALEN(vec, weptr);
+ pq_sendint(&buf, npos, sizeof(uint16));
+
+ if(npos > 0)
{
WordEntryPos *wepptr = POSDATAPTR(vec, weptr);
- pq_sendint(&buf, POSDATALEN(vec, weptr), sizeof(WordEntryPos));
- for (j = 0; j < POSDATALEN(vec, weptr); j++)
+ for (j = 0; j < npos; j++)
pq_sendint(&buf, wepptr[j], sizeof(WordEntryPos));
}
weptr++;
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
TSVector vec;
int i;
- uint32 size;
- WordEntry *weptr;
- int datalen = 0;
- Size len;
-
- size = pq_getmsgint(buf, sizeof(uint32));
- if (size < 0 || size > (MaxAllocSize / sizeof(WordEntry)))
+ int32 nentries;
+ int datalen; /* number of bytes used in the variable size area
+ * after fixed size TSVector header and WordEntries
+ */
+ Size hdrlen;
+ Size len; /* allocated size of vec */
+
+ nentries = pq_getmsgint(buf, sizeof(int32));
+ if (nentries < 0 || nentries > (MaxAllocSize / sizeof(WordEntry)))
elog(ERROR, "invalid size of tsvector");
- len = DATAHDRSIZE + sizeof(WordEntry) * size;
+ hdrlen = DATAHDRSIZE + sizeof(WordEntry) * nentries;
- len = len * 2; /* times two to make room for lexemes */
+ len = hdrlen * 2; /* times two to make room for lexemes */
vec = (TSVector) palloc0(len);
- vec->size = size;
+ vec->size = nentries;
- weptr = ARRPTR(vec);
- for (i = 0; i < size; i++)
+ datalen = 0;
+ for (i = 0; i < nentries; i++)
{
- int32 tmp;
+ const char *lexeme;
+ uint16 npos;
+ size_t lex_len;
+
+ lexeme = pq_getmsgstring(buf);
+ npos = (uint16) pq_getmsgint(buf, sizeof(uint16));
+
+ /* sanity checks */
+
+ lex_len = strlen(lexeme);
+ if (lex_len < 0 || lex_len > MAXSTRLEN)
+ elog(ERROR, "invalid tsvector; lexeme too long");
+
+ if (datalen > MAXSTRPOS)
+ elog(ERROR, "invalid tsvector; maximum total lexeme length exceeded");
- weptr = ARRPTR(vec) + i;
+ if (npos > MAXNUMPOS)
+ elog(ERROR, "unexpected number of positions");
/*
- * We are sure that sizeof(WordEntry) == sizeof(int32)
+ * Looks valid. Fill the WordEntry struct, and copy lexeme.
+ *
+ * But make sure the buffer is large enough first.
*/
- tmp = pq_getmsgint(buf, sizeof(int32));
- *weptr = *(WordEntry *) & tmp;
-
- while (CALCDATASIZE(size, datalen + SHORTALIGN(weptr->len)) >= len)
+ while (hdrlen + SHORTALIGN(datalen + lex_len) +
+ (npos + 1) * sizeof(WordEntryPos) >= len)
{
len *= 2;
vec = (TSVector) repalloc(vec, len);
- weptr = ARRPTR(vec) + i;
}
- memcpy(STRPTR(vec) + weptr->pos,
- pq_getmsgbytes(buf, weptr->len),
- weptr->len);
- datalen += SHORTALIGN(weptr->len);
+ vec->entries[i].haspos = (npos > 0) ? 1 : 0;
+ vec->entries[i].len = lex_len;
+ vec->entries[i].pos = datalen;
+
+ memcpy(STRPTR(vec) + datalen, lexeme, lex_len);
+
+ datalen += lex_len;
- if (i > 0 && WordEntryCMP(weptr, weptr - 1, STRPTR(vec)) <= 0)
+ if (i > 0 && WordEntryCMP(&vec->entries[i], &vec->entries[i - 1], STRPTR(vec)) <= 0)
elog(ERROR, "lexemes are unordered");
- if (weptr->haspos)
+ /* Receive positions */
+
+ if (npos > 0)
{
- uint16 j,
- npos;
+ uint16 j;
WordEntryPos *wepptr;
- npos = (uint16) pq_getmsgint(buf, sizeof(uint16));
- if (npos > MAXNUMPOS)
- elog(ERROR, "unexpected number of positions");
-
- while (CALCDATASIZE(size, datalen + (npos + 1) * sizeof(WordEntryPos)) >= len)
+ /*
+ * Pad to 2-byte alignment if necessary. Though we used palloc0
+ * for the initial allocation, subsequent repalloc'd memory
+ * areas are not initialized to zero.
+ */
+ if (datalen != SHORTALIGN(datalen))
{
- len *= 2;
- vec = (TSVector) repalloc(vec, len);
- weptr = ARRPTR(vec) + i;
+ *(STRPTR(vec) + datalen) = '\0';
+ datalen = SHORTALIGN(datalen);
}
- memcpy(_POSDATAPTR(vec, weptr), &npos, sizeof(int16));
- wepptr = POSDATAPTR(vec, weptr);
+ memcpy(STRPTR(vec) + datalen, &npos, sizeof(uint16));
+
+ wepptr = POSDATAPTR(vec, &vec->entries[i]);
for (j = 0; j < npos; j++)
{
- wepptr[j] = (WordEntryPos) pq_getmsgint(buf, sizeof(int16));
+ wepptr[j] = (WordEntryPos) pq_getmsgint(buf, sizeof(WordEntryPos));
if (j > 0 && WEP_GETPOS(wepptr[j]) <= WEP_GETPOS(wepptr[j - 1]))
elog(ERROR, "position information is unordered");
}
}
}
- SET_VARSIZE(vec, CALCDATASIZE(vec->size, datalen));
+ SET_VARSIZE(vec, hdrlen + datalen);
PG_RETURN_TSVECTOR(vec);
}
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.4 2007/09/07 16:03:40 teodor Exp $
*
*-------------------------------------------------------------------------
*/
char *cur;
for (i = 0; i < in->size; i++)
- len += SHORTALIGN(arrin[i].len);
+ len += arrin[i].len;
len = CALCDATASIZE(in->size, len);
out = (TSVector) palloc0(len);
arrout[i].haspos = 0;
arrout[i].len = arrin[i].len;
arrout[i].pos = cur - STRPTR(out);
- cur += SHORTALIGN(arrout[i].len);
+ cur += arrout[i].len;
}
PG_FREE_IF_COPY(in, 0);
ptr->len = ptr1->len;
memcpy(cur, data1 + ptr1->pos, ptr1->len);
ptr->pos = cur - data;
- cur += SHORTALIGN(ptr1->len);
if (ptr->haspos)
{
+ cur += SHORTALIGN(ptr1->len);
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
}
+ else
+ cur += ptr1->len;
+
ptr++;
ptr1++;
i1--;
ptr->len = ptr2->len;
memcpy(cur, data2 + ptr2->pos, ptr2->len);
ptr->pos = cur - data;
- cur += SHORTALIGN(ptr2->len);
if (ptr->haspos)
{
int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
+ cur += SHORTALIGN(ptr2->len);
+
if (addlen == 0)
ptr->haspos = 0;
else
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
}
+ else
+ cur += ptr2->len;
+
ptr++;
ptr2++;
i2--;
ptr->len = ptr1->len;
memcpy(cur, data1 + ptr1->pos, ptr1->len);
ptr->pos = cur - data;
- cur += SHORTALIGN(ptr1->len);
if (ptr->haspos)
{
+ cur += SHORTALIGN(ptr1->len);
if (ptr1->haspos)
{
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
}
}
+ else
+ cur += ptr1->len;
+
ptr++;
ptr1++;
ptr2++;
ptr->len = ptr1->len;
memcpy(cur, data1 + ptr1->pos, ptr1->len);
ptr->pos = cur - data;
- cur += SHORTALIGN(ptr1->len);
if (ptr->haspos)
{
+ cur += SHORTALIGN(ptr1->len);
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
}
+ else
+ cur += ptr1->len;
+
ptr++;
ptr1++;
i1--;
ptr->len = ptr2->len;
memcpy(cur, data2 + ptr2->pos, ptr2->len);
ptr->pos = cur - data;
- cur += SHORTALIGN(ptr2->len);
if (ptr->haspos)
{
int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
+ cur += SHORTALIGN(ptr2->len);
+
if (addlen == 0)
ptr->haspos = 0;
else
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
}
+ else
+ cur += ptr2->len;
+
ptr++;
ptr2++;
i2--;
static bool
checkclass_str(CHKVAL * chkval, WordEntry * val, QueryOperand * item)
{
- WordEntryPos *ptr = (WordEntryPos *) (chkval->values + val->pos + SHORTALIGN(val->len) + sizeof(uint16));
- uint16 len = *((uint16 *) (chkval->values + val->pos + SHORTALIGN(val->len)));
+ WordEntryPos *ptr = (WordEntryPos *) (chkval->values + SHORTALIGN(val->pos + val->len) + sizeof(uint16));
+ uint16 len = *((uint16 *) (chkval->values + SHORTALIGN(val->pos + val->len)));
while (len--)
{
*
* Copyright (c) 1998-2007, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.3 2007/09/07 15:35:11 teodor Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.4 2007/09/07 16:03:40 teodor Exp $
*
*-------------------------------------------------------------------------
*/
* bytes from end of WordEntry array to start of
* corresponding lexeme.
* 4) Lexeme's storage:
- * SHORTALIGNED(lexeme) and position information if it exists
- * Position information: first int2 - is a number of positions and it
- * follows array of WordEntryPos
+ * lexeme (without null-terminator)
+ * if haspos is true:
+ * padding byte if necessary to make the number of positions 2-byte aligned
+ * uint16 number of positions that follow.
+ * uint16[] positions
+ *
+ * The positions must be sorted.
*/
typedef struct
{
int32 vl_len_; /* varlena header (do not touch directly!) */
- uint32 size;
- char data[1];
+ int32 size;
+ WordEntry entries[1]; /* var size */
+ /* lexemes follow */
} TSVectorData;
typedef TSVectorData *TSVector;
-#define DATAHDRSIZE (VARHDRSZ + sizeof(int4))
-#define CALCDATASIZE(x, lenstr) ( (x) * sizeof(WordEntry) + DATAHDRSIZE + (lenstr) )
-#define ARRPTR(x) ( (WordEntry*) ( (char*)(x) + DATAHDRSIZE ) )
-#define STRPTR(x) ( (char*)(x) + DATAHDRSIZE + ( sizeof(WordEntry) * ((TSVector)(x))->size ) )
-#define STRSIZE(x) ( ((TSVector)(x))->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((TSVector)(x))->size ) )
-#define _POSDATAPTR(x,e) (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
+#define DATAHDRSIZE (offsetof(TSVectorData, entries))
+#define CALCDATASIZE(x, lenstr) (DATAHDRSIZE + (x) * sizeof(WordEntry) + (lenstr) )
+#define ARRPTR(x) ( (x)->entries )
+
+/* returns a pointer to the beginning of lexemes */
+#define STRPTR(x) ( (char *) &(x)->entries[x->size] )
+
+#define _POSDATAPTR(x,e) (STRPTR(x) + SHORTALIGN((e)->pos + (e)->len))
#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 )
#define POSDATAPTR(x,e) ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
typedef struct
{
QueryItemType type; /* operand or kind of operator (ts_tokentype) */
- int8 weight; /* weights of operand to search. It's a bitmask of allowed weights.
+ uint8 weight; /* weights of operand to search. It's a bitmask of allowed weights.
* if it =0 then any weight are allowed.
* Weights and bit map:
* A: 1<<3