Refactor from Heikki Linnakangas <heikki@enterprisedb.com>:

author Teodor Sigaev <teodor@sigaev.ru>

Tue, 11 Sep 2007 08:46:29 +0000 (08:46 +0000)

committer Teodor Sigaev <teodor@sigaev.ru>

Tue, 11 Sep 2007 08:46:29 +0000 (08:46 +0000)
author Teodor Sigaev <teodor@sigaev.ru>
Tue, 11 Sep 2007 08:46:29 +0000 (08:46 +0000)
committer Teodor Sigaev <teodor@sigaev.ru>
Tue, 11 Sep 2007 08:46:29 +0000 (08:46 +0000)
diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c

index 974a1b7ae4e545e6023947d3cc7aa9319079a727..ba4a10313cbaace24fa85efd928d73ef0383845b 100644 (file)
--- a/src/backend/utils/adt/tsginidx.c
+++ b/src/backend/utils/adt/tsginidx.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.3 2007/09/07 16:03:40 teodor Exp $
+ *   $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.4 2007/09/11 08:46:29 teodor Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -25,13 +25,12 @@ gin_extract_tsvector(PG_FUNCTION_ARGS)
     int32      *nentries = (int32 *) PG_GETARG_POINTER(1);
     Datum      *entries = NULL;
  
-   *nentries = 0;
+   *nentries = vector->size;
     if (vector->size > 0)
     {
         int         i;
         WordEntry  *we = ARRPTR(vector);
  
-       *nentries = (uint32) vector->size;
         entries = (Datum *) palloc(sizeof(Datum) * vector->size);
  
         for (i = 0; i < vector->size; i++)
@@ -134,11 +133,19 @@ gin_ts_consistent(PG_FUNCTION_ARGS)
  
     if (query->size > 0)
     {
-       int4        i,
+       int         i,
                     j = 0;
         QueryItem  *item;
         GinChkVal   gcv;
  
+       /*
+        * check-parameter array has one entry for each value (operand) in the
+        * query. We expand that array into mapped_check, so that there's one
+        * entry in mapped_check for every node in the query, including 
+        * operators, to allow quick lookups in checkcondition_gin. Only the 
+        * entries corresponding operands are actually used.
+        */
+
         gcv.frst = item = GETQUERY(query);
         gcv.mapped_check = (bool *) palloc(sizeof(bool) * query->size);
  
diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c

index 4fc51378b4bf5c70cbfc5e3e7d16195e7597f79c..985b917d0f022446828b4fa56d9120b495aecc9a 100644 (file)
--- a/src/backend/utils/adt/tsgistidx.c
+++ b/src/backend/utils/adt/tsgistidx.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
+ *   $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.4 2007/09/11 08:46:29 teodor Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -133,20 +133,27 @@ gtsvectorout(PG_FUNCTION_ARGS)
  }
  
  static int
-compareint(const void *a, const void *b)
+compareint(const void *va, const void *vb)
  {
-   if (*((int4 *) a) == *((int4 *) b))
+   int4 a = *((int4 *) va);
+   int4 b = *((int4 *) vb);
+
+   if (a == b)
         return 0;
-   return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1;
+   return (a > b) ? 1 : -1;
  }
  
+/*
+ * Removes duplicates from an array of int4. 'l' is
+ * size of the input array. Returns the new size of the array.
+ */
  static int
  uniqueint(int4 *a, int4 l)
  {
     int4       *ptr,
                *res;
  
-   if (l == 1)
+   if (l <= 1)
         return l;
  
     ptr = res = a;
@@ -570,12 +577,15 @@ typedef struct
  } SPLITCOST;
  
  static int
-comparecost(const void *a, const void *b)
+comparecost(const void *va, const void *vb)
  {
-   if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
+   SPLITCOST *a = (SPLITCOST *) va;
+   SPLITCOST *b = (SPLITCOST *) vb;
+
+   if (a->cost == b->cost)
         return 0;
     else
-       return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
+       return (a->cost > b->cost) ? 1 : -1;
  }
  
  
diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c

index 535a3541bf75cfced4baddb8e254bbcbf21e7321..453b67df431d53e58f332ebc925688b24789a3ca 100644 (file)
--- a/src/backend/utils/adt/tsrank.c
+++ b/src/backend/utils/adt/tsrank.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.4 2007/09/07 16:03:40 teodor Exp $
+ *   $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.5 2007/09/11 08:46:29 teodor Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -53,22 +53,24 @@ cnt_length(TSVector t)
  {
     WordEntry  *ptr = ARRPTR(t),
                *end = (WordEntry *) STRPTR(t);
-   int         len = 0,
-               clen;
+   int         len = 0;
  
     while (ptr < end)
     {
-       if ((clen = POSDATALEN(t, ptr)) == 0)
+       int clen = POSDATALEN(t, ptr);
+
+       if (clen == 0)
             len += 1;
         else
             len += clen;
+
         ptr++;
     }
  
     return len;
  }
  
-static int4
+static int
  WordECompareQueryItem(char *eval, char *qval, WordEntry *ptr, QueryOperand *item)
  {
     if (ptr->len == item->length)
@@ -80,6 +82,10 @@ WordECompareQueryItem(char *eval, char *qval, WordEntry *ptr, QueryOperand *item
     return (ptr->len > item->length) ? 1 : -1;
  }
  
+/*
+ * Returns a pointer to a WordEntry corresponding 'item' from tsvector 't'. 'q'
+ * is the TSQuery containing 'item'. Returns NULL if not found.
+ */
  static WordEntry *
  find_wordentry(TSVector t, TSQuery q, QueryOperand *item)
  {
@@ -178,15 +184,15 @@ SortAndUniqItems(TSQuery q, int *size)
  }
  
  /* A dummy WordEntryPos array to use when haspos is false */
-static WordEntryPos POSNULL[] = {
+static WordEntryPosVector POSNULL = {
     1, /* Number of elements that follow */
-   0
+   { 0 }
  };
  
  static float
  calc_rank_and(float *w, TSVector t, TSQuery q)
  {
-   uint16    **pos;
+   WordEntryPosVector   **pos;
     int         i,
                 k,
                 l,
@@ -207,9 +213,8 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
         pfree(item);
         return calc_rank_or(w, t, q);
     }
-   pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
-   memset(pos, 0, sizeof(uint16 *) * q->size);
-   WEP_SETPOS(POSNULL[1], MAXENTRYPOS - 1);
+   pos = (WordEntryPosVector **) palloc0(sizeof(WordEntryPosVector *) * q->size);
+   WEP_SETPOS(POSNULL.pos[0], MAXENTRYPOS - 1);
  
     for (i = 0; i < size; i++)
     {
@@ -218,25 +223,25 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
             continue;
  
         if (entry->haspos)
-           pos[i] = (uint16 *) _POSDATAPTR(t, entry);
+           pos[i] = _POSVECPTR(t, entry);
         else
-           pos[i] = (uint16 *) POSNULL;
+           pos[i] = &POSNULL;
  
  
-       dimt = *(uint16 *) (pos[i]);
-       post = (WordEntryPos *) (pos[i] + 1);
+       dimt = pos[i]->npos;
+       post = pos[i]->pos;
         for (k = 0; k < i; k++)
         {
             if (!pos[k])
                 continue;
-           lenct = *(uint16 *) (pos[k]);
-           ct = (WordEntryPos *) (pos[k] + 1);
+           lenct = pos[k]->npos;
+           ct = pos[k]->pos;
             for (l = 0; l < dimt; l++)
             {
                 for (p = 0; p < lenct; p++)
                 {
                     dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
-                   if (dist || (dist == 0 && (pos[i] == (uint16 *) POSNULL || pos[k] == (uint16 *) POSNULL)))
+                   if (dist || (dist == 0 && (pos[i] == &POSNULL || pos[k] == &POSNULL)))
                     {
                         float       curw;
  
@@ -285,8 +290,8 @@ calc_rank_or(float *w, TSVector t, TSQuery q)
         }
         else
         {
-           dimt = *(uint16 *) POSNULL;
-           post = POSNULL + 1;
+           dimt = POSNULL.npos;
+           post = POSNULL.pos;
         }
  
         resj = 0.0;
@@ -456,17 +461,19 @@ typedef struct
  {
     QueryItem **item;
     int16       nitem;
-   bool        needfree;
     uint8       wclass;
     int32       pos;
  } DocRepresentation;
  
  static int
-compareDocR(const void *a, const void *b)
+compareDocR(const void *va, const void *vb)
  {
-   if (((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos)
+   DocRepresentation *a = (DocRepresentation *) va;
+   DocRepresentation *b = (DocRepresentation *) vb;
+
+   if (a->pos == b->pos)
         return 0;
-   return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1;
+   return (a->pos > b->pos) ? 1 : -1;
  }
  
  static bool
@@ -547,11 +554,11 @@ Cover(DocRepresentation *doc, int len, TSQuery query, Extention *ext)
  
     ptr = doc + lastpos;
  
-   /* find lower bound of cover from founded upper bound, move down */
+   /* find lower bound of cover from found upper bound, move down */
     while (ptr >= doc + ext->pos)
     {
         for (i = 0; i < ptr->nitem; i++)
-           if(ptr->item[i]->type  == QI_VAL) /* XXX */
+           if(ptr->item[i]->type  == QI_VAL)
                 ptr->item[i]->operand.istrue = 1;
         if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryOperand))
         {
@@ -620,8 +627,8 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
         }
         else
         {
-           dimt = *(uint16 *) POSNULL;
-           post = POSNULL + 1;
+           dimt = POSNULL.npos;
+           post = POSNULL.pos;
         }
  
         while (cur + dimt >= len)
@@ -636,7 +643,6 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
             {
                 int         k;
  
-               doc[cur].needfree = false;
                 doc[cur].nitem = 0;
                 doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * query->size);
  
@@ -658,7 +664,6 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
             }
             else
             {
-               doc[cur].needfree = false;
                 doc[cur].nitem = doc[cur - 1].nitem;
                 doc[cur].item = doc[cur - 1].item;
             }
@@ -764,9 +769,6 @@ calc_rank_cd(float4 *arrdata, TSVector txt, TSQuery query, int method)
     if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
         Wdoc /= log((double) (txt->size + 1)) / log(2.0);
  
-   for (i = 0; i < doclen; i++)
-       if (doc[i].needfree)
-           pfree(doc[i].item);
     pfree(doc);
  
     return (float4) Wdoc;
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c

index 8e7593513ff70baf7f365b5a723b00874a4437ff..e150f9a267837f79d63385d4e559f0637fbd1e70 100644 (file)
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.4 2007/09/07 16:03:40 teodor Exp $
+ *   $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.5 2007/09/11 08:46:29 teodor Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -269,7 +269,7 @@ compareEntry(char *ptra, WordEntry * a, char *ptrb, WordEntry * b)
  static int4
  add_pos(TSVector src, WordEntry * srcptr, TSVector dest, WordEntry * destptr, int4 maxpos)
  {
-   uint16     *clen = (uint16 *) _POSDATAPTR(dest, destptr);
+   uint16     *clen = &_POSVECPTR(dest, destptr)->npos;
     int         i;
     uint16      slen = POSDATALEN(src, srcptr),
                 startlen;
@@ -354,7 +354,7 @@ tsvector_concat(PG_FUNCTION_ARGS)
             if (ptr->haspos)
             {
                 cur += SHORTALIGN(ptr1->len);
-               memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
+               memcpy(cur, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
                 cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
             }
             else
@@ -399,7 +399,7 @@ tsvector_concat(PG_FUNCTION_ARGS)
                 cur += SHORTALIGN(ptr1->len);
                 if (ptr1->haspos)
                 {
-                   memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
+                   memcpy(cur, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
                     cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
                     if (ptr2->haspos)
                         cur += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
@@ -434,7 +434,7 @@ tsvector_concat(PG_FUNCTION_ARGS)
         if (ptr->haspos)
         {
             cur += SHORTALIGN(ptr1->len);
-           memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
+           memcpy(cur, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
             cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
         }
         else
@@ -499,10 +499,17 @@ ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryOperand * item)
   * check weight info
   */
  static bool
-checkclass_str(CHKVAL * chkval, WordEntry * val, QueryOperand * item)
+checkclass_str(CHKVAL *chkval, WordEntry *val, QueryOperand *item)
  {
-   WordEntryPos *ptr = (WordEntryPos *) (chkval->values + SHORTALIGN(val->pos + val->len) + sizeof(uint16));
-   uint16      len = *((uint16 *) (chkval->values + SHORTALIGN(val->pos + val->len)));
+   WordEntryPosVector *posvec;
+   WordEntryPos *ptr;
+   uint16      len;
+
+   posvec = (WordEntryPosVector *) 
+       (chkval->values + SHORTALIGN(val->pos + val->len));
+
+   len = posvec->npos;
+   ptr = posvec->pos;
  
     while (len--)
     {
@@ -674,7 +681,13 @@ ts_match_tq(PG_FUNCTION_ARGS)
  }
  
  /*
- * Statistics of tsvector
+ * ts_stat statistic function support
+ */
+
+
+/*
+ * Returns the number of positions in value 'wptr' within tsvector 'txt',
+ * that have a weight equal to one of the weights in 'weight' bitmask.
   */
  static int
  check_weight(TSVector txt, WordEntry * wptr, int8 weight)
@@ -824,6 +837,18 @@ formstat(tsstat * stat, TSVector txt, WordEntry ** entry, uint32 len)
     return newstat;
  }
  
+/*
+ * This is written like a custom aggregate function, because the
+ * original plan was to do just that. Unfortunately, an aggregate function
+ * can't return a set, so that plan was abandoned. If that limitation is
+ * lifted in the future, ts_stat could be a real aggregate function so that 
+ * you could use it like this:
+ *
+ *   SELECT ts_stat(vector_column) FROM vector_table;
+ *
+ *  where vector_column is a tsvector-type column in vector_table.
+ */
+
  static tsstat *
  ts_accum(tsstat * stat, Datum data)
  {
diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h

index 0aa95e892cc00325624995039d6d8ebf72ee9f03..107fc4a71127fde7422deb21af99e3c2c9f46441 100644 (file)
--- a/src/include/tsearch/ts_type.h
+++ b/src/include/tsearch/ts_type.h
@@ -5,7 +5,7 @@
   *
   * Copyright (c) 1998-2007, PostgreSQL Global Development Group
   *
- * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.4 2007/09/07 16:03:40 teodor Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.5 2007/09/11 08:46:29 teodor Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -43,6 +43,13 @@ typedef struct
  
  typedef uint16 WordEntryPos;
  
+typedef struct
+{
+   uint16 npos;
+   WordEntryPos pos[1]; /* var length */
+} WordEntryPosVector;
+
+
  #define WEP_GETWEIGHT(x)   ( (x) >> 14 )
  #define WEP_GETPOS(x)      ( (x) & 0x3fff )
  
@@ -88,9 +95,9 @@ typedef TSVectorData *TSVector;
  /* returns a pointer to the beginning of lexemes */
  #define STRPTR(x)  ( (char *) &(x)->entries[x->size] )
  
-#define _POSDATAPTR(x,e)   (STRPTR(x) + SHORTALIGN((e)->pos + (e)->len))
-#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 )
-#define POSDATAPTR(x,e) ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
+#define _POSVECPTR(x, e)   ((WordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len)))
+#define POSDATALEN(x,e) ( ( (e)->haspos ) ? (_POSVECPTR(x,e)->npos) : 0 )
+#define POSDATAPTR(x,e) (_POSVECPTR(x,e)->pos)
  
  /*
   * fmgr interface macros
author	Teodor Sigaev <teodor@sigaev.ru>
	Tue, 11 Sep 2007 08:46:29 +0000 (08:46 +0000)
committer	Teodor Sigaev <teodor@sigaev.ru>
	Tue, 11 Sep 2007 08:46:29 +0000 (08:46 +0000)
src/backend/utils/adt/tsginidx.c		patch \| blob \| blame \| history
src/backend/utils/adt/tsgistidx.c		patch \| blob \| blame \| history
src/backend/utils/adt/tsrank.c		patch \| blob \| blame \| history
src/backend/utils/adt/tsvector_op.c		patch \| blob \| blame \| history
src/include/tsearch/ts_type.h		patch \| blob \| blame \| history