Full Text Search support for json and jsonb
authorAndrew Dunstan <andrew@dunslane.net>
Fri, 31 Mar 2017 18:26:03 +0000 (14:26 -0400)
committerAndrew Dunstan <andrew@dunslane.net>
Fri, 31 Mar 2017 18:26:03 +0000 (14:26 -0400)
The new functions are ts_headline() and to_tsvector.

Dmitry Dolgov, edited and documented by me.

doc/src/sgml/func.sgml
src/backend/tsearch/to_tsany.c
src/backend/tsearch/wparser.c
src/include/catalog/pg_proc.h
src/include/tsearch/ts_type.h
src/test/regress/expected/json.out
src/test/regress/expected/jsonb.out
src/test/regress/sql/json.sql
src/test/regress/sql/jsonb.sql

index 076be587eacf17a5589441452e08c70906a565c1..6887eabd0e922bb2c55fdd427b95033513b8d5f5 100644 (file)
@@ -9564,6 +9564,15 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
         <entry><literal>to_tsvector('english', 'The Fat Rats')</literal></entry>
         <entry><literal>'fat':2 'rat':3</literal></entry>
        </row>
+       <row>
+        <entry>
+         <literal><function>to_tsvector(<optional> <replaceable class="PARAMETER">config</> <type>regconfig</> , </optional> <replaceable class="PARAMETER">document</> <type>json(b)</type>)</function></literal>
+        </entry>
+        <entry><type>tsvector</type></entry>
+        <entry>reduce document text to <type>tsvector</></entry>
+        <entry><literal>to_tsvector('english', '{"a": "The Fat Rats"}'::json)</literal></entry>
+        <entry><literal>'fat':2 'rat':3</literal></entry>
+       </row>
        <row>
         <entry>
          <indexterm>
@@ -9610,6 +9619,15 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
         <entry><literal>ts_headline('x y z', 'z'::tsquery)</literal></entry>
         <entry><literal>x y &lt;b&gt;z&lt;/b&gt;</literal></entry>
        </row>
+       <row>
+        <entry>
+         <literal><function>ts_headline(<optional> <replaceable class="PARAMETER">config</replaceable> <type>regconfig</>, </optional> <replaceable class="PARAMETER">document</replaceable> <type>json(b)</>, <replaceable class="PARAMETER">query</replaceable> <type>tsquery</> <optional>, <replaceable class="PARAMETER">options</replaceable> <type>text</> </optional>)</function></literal>
+        </entry>
+        <entry><type>text</type></entry>
+        <entry>display a query match</entry>
+        <entry><literal>ts_headline('{"a":"x y z"}'::json, 'z'::tsquery)</literal></entry>
+        <entry><literal>{"a":"x y &lt;b&gt;z&lt;/b&gt;"}</literal></entry>
+       </row>
        <row>
         <entry>
          <indexterm>
index 398a781c037ee455216fbb4677f06e11038c2aef..93c08bcf85e64cae83af1f71b014e6854cac4ad3 100644 (file)
@@ -16,6 +16,7 @@
 #include "tsearch/ts_cache.h"
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
+#include "utils/jsonapi.h"
 
 
 typedef struct MorphOpaque
@@ -24,6 +25,14 @@ typedef struct MorphOpaque
    int         qoperator;      /* query operator */
 } MorphOpaque;
 
+typedef struct TSVectorBuildState
+{
+   ParsedText  *prs;
+   TSVector    result;
+   Oid         cfgId;
+} TSVectorBuildState;
+
+static void add_to_tsvector(void *state, char *elem_value, int elem_len);
 
 Datum
 get_current_ts_config(PG_FUNCTION_ARGS)
@@ -256,6 +265,135 @@ to_tsvector(PG_FUNCTION_ARGS)
                                        PointerGetDatum(in)));
 }
 
+Datum
+jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+   Oid                 cfgId = PG_GETARG_OID(0);
+   Jsonb               *jb = PG_GETARG_JSONB(1);
+   TSVectorBuildState  state;
+   ParsedText          *prs = (ParsedText *) palloc(sizeof(ParsedText));
+
+   prs->words = NULL;
+   state.result = NULL;
+   state.cfgId = cfgId;
+   state.prs = prs;
+
+   iterate_jsonb_string_values(jb, &state, (JsonIterateStringValuesAction) add_to_tsvector);
+
+   PG_FREE_IF_COPY(jb, 1);
+
+   if (state.result == NULL)
+   {
+       /* There weren't any string elements in jsonb,
+        * so wee need to return an empty vector */
+
+       if (prs->words != NULL)
+           pfree(prs->words);
+
+       state.result = palloc(CALCDATASIZE(0, 0));
+       SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
+       state.result->size = 0;
+   }
+
+   PG_RETURN_TSVECTOR(state.result);
+}
+
+Datum
+jsonb_to_tsvector(PG_FUNCTION_ARGS)
+{
+   Jsonb   *jb = PG_GETARG_JSONB(0);
+   Oid     cfgId;
+
+   cfgId = getTSCurrentConfig(true);
+   PG_RETURN_DATUM(DirectFunctionCall2(jsonb_to_tsvector_byid,
+                                       ObjectIdGetDatum(cfgId),
+                                       JsonbGetDatum(jb)));
+}
+
+Datum
+json_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+   Oid                 cfgId = PG_GETARG_OID(0);
+   text                *json = PG_GETARG_TEXT_P(1);
+   TSVectorBuildState  state;
+   ParsedText          *prs = (ParsedText *) palloc(sizeof(ParsedText));
+
+   prs->words = NULL;
+   state.result = NULL;
+   state.cfgId = cfgId;
+   state.prs = prs;
+
+   iterate_json_string_values(json, &state, (JsonIterateStringValuesAction) add_to_tsvector);
+
+   PG_FREE_IF_COPY(json, 1);
+   if (state.result == NULL)
+   {
+       /* There weren't any string elements in json,
+        * so wee need to return an empty vector */
+
+       if (prs->words != NULL)
+           pfree(prs->words);
+
+       state.result = palloc(CALCDATASIZE(0, 0));
+       SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
+       state.result->size = 0;
+   }
+
+   PG_RETURN_TSVECTOR(state.result);
+}
+
+Datum
+json_to_tsvector(PG_FUNCTION_ARGS)
+{
+   text    *json = PG_GETARG_TEXT_P(0);
+   Oid     cfgId;
+
+   cfgId = getTSCurrentConfig(true);
+   PG_RETURN_DATUM(DirectFunctionCall2(json_to_tsvector_byid,
+                                       ObjectIdGetDatum(cfgId),
+                                       PointerGetDatum(json)));
+}
+
+/*
+ * Extend current TSVector from _state with a new one,
+ * build over a json(b) element.
+ */
+static void
+add_to_tsvector(void *_state, char *elem_value, int elem_len)
+{
+   TSVectorBuildState *state = (TSVectorBuildState *) _state;
+   ParsedText  *prs = state->prs;
+   TSVector    item_vector;
+   int         i;
+
+   prs->lenwords = elem_len / 6;
+   if (prs->lenwords == 0)
+       prs->lenwords = 2;
+
+   prs->words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs->lenwords);
+   prs->curwords = 0;
+   prs->pos = 0;
+
+   parsetext(state->cfgId, prs, elem_value, elem_len);
+
+   if (prs->curwords)
+   {
+       if (state->result != NULL)
+       {
+           for (i = 0; i < prs->curwords; i++)
+               prs->words[i].pos.pos = prs->words[i].pos.pos + TS_JUMP;
+
+           item_vector = make_tsvector(prs);
+
+           state->result = (TSVector) DirectFunctionCall2(tsvector_concat,
+                                   TSVectorGetDatum(state->result),
+                                   PointerGetDatum(item_vector));
+       }
+       else
+           state->result = make_tsvector(prs);
+   }
+}
+
 /*
  * to_tsquery
  */
index d8f2f65542bb66459b35c1bbbc78803d46542124..c19937d644a670ef617aeaca435ab09f089dfc40 100644 (file)
@@ -20,6 +20,7 @@
 #include "tsearch/ts_cache.h"
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
+#include "utils/jsonapi.h"
 #include "utils/varlena.h"
 
 
@@ -31,6 +32,19 @@ typedef struct
    LexDescr   *list;
 } TSTokenTypeStorage;
 
+/* state for ts_headline_json_* */
+typedef struct HeadlineJsonState
+{
+   HeadlineParsedText *prs;
+   TSConfigCacheEntry *cfg;
+   TSParserCacheEntry *prsobj;
+   TSQuery             query;
+   List                *prsoptions;
+   bool                transformed;
+} HeadlineJsonState;
+
+static text * headline_json_value(void *_state, char *elem_value, int elem_len);
+
 static void
 tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
 {
@@ -363,3 +377,179 @@ ts_headline_opt(PG_FUNCTION_ARGS)
                                        PG_GETARG_DATUM(1),
                                        PG_GETARG_DATUM(2)));
 }
+
+Datum
+ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)
+{
+   Jsonb           *out, *jb = PG_GETARG_JSONB(1);
+   TSQuery         query = PG_GETARG_TSQUERY(2);
+   text            *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
+   JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
+
+   HeadlineParsedText prs;
+   HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
+
+   memset(&prs, 0, sizeof(HeadlineParsedText));
+   prs.lenwords = 32;
+   prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
+
+   state->prs = &prs;
+   state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
+   state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
+   state->query = query;
+   if (opt)
+       state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
+   else
+       state->prsoptions = NIL;
+
+   if (!OidIsValid(state->prsobj->headlineOid))
+       ereport(ERROR,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+          errmsg("text search parser does not support headline creation")));
+
+   out = transform_jsonb_string_values(jb, state, action);
+
+   PG_FREE_IF_COPY(jb, 1);
+   PG_FREE_IF_COPY(query, 2);
+   if (opt)
+       PG_FREE_IF_COPY(opt, 3);
+
+   pfree(prs.words);
+
+   if (state->transformed)
+   {
+       pfree(prs.startsel);
+       pfree(prs.stopsel);
+   }
+
+   PG_RETURN_JSONB(out);
+}
+
+Datum
+ts_headline_jsonb(PG_FUNCTION_ARGS)
+{
+   PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
+                                 ObjectIdGetDatum(getTSCurrentConfig(true)),
+                                       PG_GETARG_DATUM(0),
+                                       PG_GETARG_DATUM(1)));
+}
+
+Datum
+ts_headline_jsonb_byid(PG_FUNCTION_ARGS)
+{
+   PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
+                                       PG_GETARG_DATUM(0),
+                                       PG_GETARG_DATUM(1),
+                                       PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_jsonb_opt(PG_FUNCTION_ARGS)
+{
+   PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_jsonb_byid_opt,
+                                 ObjectIdGetDatum(getTSCurrentConfig(true)),
+                                       PG_GETARG_DATUM(0),
+                                       PG_GETARG_DATUM(1),
+                                       PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_json_byid_opt(PG_FUNCTION_ARGS)
+{
+   text                *json = PG_GETARG_TEXT_P(1);
+   TSQuery             query = PG_GETARG_TSQUERY(2);
+   text                *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
+   text                *out;
+   JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
+
+   HeadlineParsedText prs;
+   HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
+
+   memset(&prs, 0, sizeof(HeadlineParsedText));
+   prs.lenwords = 32;
+   prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
+
+   state->prs = &prs;
+   state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
+   state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
+   state->query = query;
+   if (opt)
+       state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
+   else
+       state->prsoptions = NIL;
+
+   if (!OidIsValid(state->prsobj->headlineOid))
+       ereport(ERROR,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+          errmsg("text search parser does not support headline creation")));
+
+   out = transform_json_string_values(json, state, action);
+
+   PG_FREE_IF_COPY(json, 1);
+   PG_FREE_IF_COPY(query, 2);
+   if (opt)
+       PG_FREE_IF_COPY(opt, 3);
+   pfree(prs.words);
+
+   if (state->transformed)
+   {
+       pfree(prs.startsel);
+       pfree(prs.stopsel);
+   }
+
+   PG_RETURN_TEXT_P(out);
+}
+
+Datum
+ts_headline_json(PG_FUNCTION_ARGS)
+{
+   PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
+                                 ObjectIdGetDatum(getTSCurrentConfig(true)),
+                                       PG_GETARG_DATUM(0),
+                                       PG_GETARG_DATUM(1)));
+}
+
+Datum
+ts_headline_json_byid(PG_FUNCTION_ARGS)
+{
+   PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
+                                       PG_GETARG_DATUM(0),
+                                       PG_GETARG_DATUM(1),
+                                       PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_json_opt(PG_FUNCTION_ARGS)
+{
+   PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_json_byid_opt,
+                                 ObjectIdGetDatum(getTSCurrentConfig(true)),
+                                       PG_GETARG_DATUM(0),
+                                       PG_GETARG_DATUM(1),
+                                       PG_GETARG_DATUM(2)));
+}
+
+
+/*
+ * Return headline in text from, generated from a json(b) element
+ */
+static text *
+headline_json_value(void *_state, char *elem_value, int elem_len)
+{
+   HeadlineJsonState *state = (HeadlineJsonState *) _state;
+
+   HeadlineParsedText *prs = state->prs;
+   TSConfigCacheEntry *cfg = state->cfg;
+   TSParserCacheEntry *prsobj = state->prsobj;
+   TSQuery query = state->query;
+   List *prsoptions = state->prsoptions;
+
+   prs->curwords = 0;
+   hlparsetext(cfg->cfgId, prs, query, elem_value, elem_len);
+   FunctionCall3(&(prsobj->prsheadline),
+                 PointerGetDatum(prs),
+                 PointerGetDatum(prsoptions),
+                 PointerGetDatum(query));
+
+   state->transformed = true;
+   return generateHeadline(prs);
+}
index 220ba7be60222e189c7df244047120b892976949..1132a6052e7224959650acd0a62cd9fff959998c 100644 (file)
@@ -4812,6 +4812,24 @@ DESCR("generate headline");
 DATA(insert OID = 3755 (  ts_headline  PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 25 "25 3615" _null_ _null_ _null_ _null_ _null_ ts_headline _null_ _null_ _null_ ));
 DESCR("generate headline");
 
+DATA(insert OID = 4201 (  ts_headline  PGNSP PGUID 12 100 0 0 0 f f f f t f i s 4 0 3802 "3734 3802 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_byid_opt _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+DATA(insert OID = 4202 (  ts_headline  PGNSP PGUID 12 100 0 0 0 f f f f t f i s 3 0 3802 "3734 3802 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_byid _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+DATA(insert OID = 4203 (  ts_headline  PGNSP PGUID 12 100 0 0 0 f f f f t f s s 3 0 3802 "3802 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_opt _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+DATA(insert OID = 4204 (  ts_headline  PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 3802 "3802 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb _null_ _null_ _null_ ));
+DESCR("generate headline from jsonb");
+
+DATA(insert OID = 4205 (  ts_headline  PGNSP PGUID 12 100 0 0 0 f f f f t f i s 4 0 114 "3734 114 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_json_byid_opt _null_ _null_ _null_ ));
+DESCR("generate headline from json");
+DATA(insert OID = 4206 (  ts_headline  PGNSP PGUID 12 100 0 0 0 f f f f t f i s 3 0 114 "3734 114 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_json_byid _null_ _null_ _null_ ));
+DESCR("generate headline from json");
+DATA(insert OID = 4207 (  ts_headline  PGNSP PGUID 12 100 0 0 0 f f f f t f s s 3 0 114 "114 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_json_opt _null_ _null_ _null_ ));
+DESCR("generate headline from json");
+DATA(insert OID = 4208 (  ts_headline  PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 114 "114 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_json _null_ _null_ _null_ ));
+DESCR("generate headline from json");
+
 DATA(insert OID = 3745 (  to_tsvector      PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3614 "3734 25" _null_ _null_ _null_ _null_ _null_ to_tsvector_byid _null_ _null_ _null_ ));
 DESCR("transform to tsvector");
 DATA(insert OID = 3746 (  to_tsquery       PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ to_tsquery_byid _null_ _null_ _null_ ));
@@ -4828,6 +4846,14 @@ DATA(insert OID = 3751 (  plainto_tsquery    PGNSP PGUID 12 100 0 0 0 f f f f t f s
 DESCR("transform to tsquery");
 DATA(insert OID = 5001 (  phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ ));
 DESCR("transform to tsquery");
+DATA(insert OID = 4209 (  to_tsvector      PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ ));
+DESCR("transform jsonb to tsvector");
+DATA(insert OID = 4210 (  to_tsvector      PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector _null_ _null_ _null_ ));
+DESCR("transform json to tsvector");
+DATA(insert OID = 4211 (  to_tsvector      PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 3614 "3734 3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector_byid _null_ _null_ _null_ ));
+DESCR("transform jsonb to tsvector");
+DATA(insert OID = 4212 (  to_tsvector      PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 3614 "3734 114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector_byid _null_ _null_ _null_ ));
+DESCR("transform json to tsvector");
 
 DATA(insert OID = 3752 (  tsvector_update_trigger          PGNSP PGUID 12 1 0 0 0 f f f f f f v s 0 0 2279 "" _null_ _null_ _null_ _null_ _null_ tsvector_update_trigger_byid _null_ _null_ _null_ ));
 DESCR("trigger for automatic update of tsvector column");
index 155650c6f36389b60972f94033e024a6bdd0b467..873e2e18565fe4f9b7748344b9d4d444b7b1ca54 100644 (file)
@@ -86,6 +86,15 @@ typedef struct
 #define MAXNUMPOS  (256)
 #define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
 
+/*
+ * In case if a TSVector contains several parts and we want to treat them as
+ * separate, it's necessary to add an artificial increment to position of each
+ * lexeme from every next part. It's required to avoid the situation when
+ * tsquery can find a phrase consisting of lexemes from two of such parts.
+ * TS_JUMP defined a value of this increment.
+ */
+#define TS_JUMP 1
+
 /* This struct represents a complete tsvector datum */
 typedef struct
 {
index 1bb87689fbedb199d5c79e9690eb159a9dd929eb..47b2b6e6a589cb3ef148b56856483514b04b1fca 100644 (file)
@@ -1674,3 +1674,93 @@ select json_strip_nulls('{"a": {"b": null, "c": null}, "d": {} }');
  {"a":{},"d":{}}
 (1 row)
 
+-- json to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::json);
+                                to_tsvector                                
+---------------------------------------------------------------------------
+ 'aaa':1 'bbb':2 'ccc':4 'ddd':3 'eee':6 'fff':7 'ggg':8 'hhh':10 'iii':11
+(1 row)
+
+-- json to tsvector with config
+select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::json);
+                                to_tsvector                                
+---------------------------------------------------------------------------
+ 'aaa':1 'bbb':2 'ccc':4 'ddd':3 'eee':6 'fff':7 'ggg':8 'hhh':10 'iii':11
+(1 row)
+
+-- json to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::json);
+                                to_tsvector                                 
+----------------------------------------------------------------------------
+ 'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
+(1 row)
+
+-- ts_vector corner cases
+select to_tsvector('""'::json);
+ to_tsvector 
+-------------
+(1 row)
+
+select to_tsvector('{}'::json);
+ to_tsvector 
+-------------
+(1 row)
+
+select to_tsvector('[]'::json);
+ to_tsvector 
+-------------
+(1 row)
+
+select to_tsvector('null'::json);
+ to_tsvector 
+-------------
+(1 row)
+
+-- ts_headline for json
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
+                                               ts_headline                                               
+---------------------------------------------------------------------------------------------------------
+ {"a":"aaa <b>bbb</b>","b":{"c":"ccc <b>ddd</b> fff","c1":"ccc1 ddd1"},"d":["ggg <b>hhh</b>","iii jjj"]}
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
+                                      ts_headline                                       
+----------------------------------------------------------------------------------------
+ {"a":"aaa <b>bbb</b>","b":{"c":"ccc <b>ddd</b> fff"},"d":["ggg <b>hhh</b>","iii jjj"]}
+(1 row)
+
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                                       ts_headline                                        
+------------------------------------------------------------------------------------------
+ {"a":"aaa <bbb>","b":{"c":"ccc <ddd> fff","c1":"ccc1 ddd1"},"d":["ggg <hhh>","iii jjj"]}
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                                       ts_headline                                        
+------------------------------------------------------------------------------------------
+ {"a":"aaa <bbb>","b":{"c":"ccc <ddd> fff","c1":"ccc1 ddd1"},"d":["ggg <hhh>","iii jjj"]}
+(1 row)
+
+-- corner cases for ts_headline with json
+select ts_headline('null'::json, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ null
+(1 row)
+
+select ts_headline('{}'::json, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ {}
+(1 row)
+
+select ts_headline('[]'::json, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ []
+(1 row)
+
index 8ec4150bc285dad373b194d7539f34dd5aa2215a..e72a950599341603b036e0e9925496efa7c271f4 100644 (file)
@@ -3474,3 +3474,93 @@ HINT:  Try using the function jsonb_set to replace key value.
 select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"', true);
 ERROR:  cannot replace existing key
 HINT:  Try using the function jsonb_set to replace key value.
+-- jsonb to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);
+                                to_tsvector                                
+---------------------------------------------------------------------------
+ 'aaa':1 'bbb':2 'ccc':4 'ddd':3 'eee':6 'fff':7 'ggg':8 'hhh':10 'iii':11
+(1 row)
+
+-- jsonb to tsvector with config
+select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);
+                                to_tsvector                                
+---------------------------------------------------------------------------
+ 'aaa':1 'bbb':2 'ccc':4 'ddd':3 'eee':6 'fff':7 'ggg':8 'hhh':10 'iii':11
+(1 row)
+
+-- jsonb to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb);
+                                to_tsvector                                 
+----------------------------------------------------------------------------
+ 'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
+(1 row)
+
+-- ts_vector corner cases
+select to_tsvector('""'::jsonb);
+ to_tsvector 
+-------------
+(1 row)
+
+select to_tsvector('{}'::jsonb);
+ to_tsvector 
+-------------
+(1 row)
+
+select to_tsvector('[]'::jsonb);
+ to_tsvector 
+-------------
+(1 row)
+
+select to_tsvector('null'::jsonb);
+ to_tsvector 
+-------------
+(1 row)
+
+-- ts_headline for jsonb
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+                                                   ts_headline                                                    
+------------------------------------------------------------------------------------------------------------------
+ {"a": "aaa <b>bbb</b>", "b": {"c": "ccc <b>ddd</b> fff", "c1": "ccc1 ddd1"}, "d": ["ggg <b>hhh</b>", "iii jjj"]}
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+                                          ts_headline                                          
+-----------------------------------------------------------------------------------------------
+ {"a": "aaa <b>bbb</b>", "b": {"c": "ccc <b>ddd</b> fff"}, "d": ["ggg <b>hhh</b>", "iii jjj"]}
+(1 row)
+
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                                            ts_headline                                            
+---------------------------------------------------------------------------------------------------
+ {"a": "aaa <bbb>", "b": {"c": "ccc <ddd> fff", "c1": "ccc1 ddd1"}, "d": ["ggg <hhh>", "iii jjj"]}
+(1 row)
+
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+                                            ts_headline                                            
+---------------------------------------------------------------------------------------------------
+ {"a": "aaa <bbb>", "b": {"c": "ccc <ddd> fff", "c1": "ccc1 ddd1"}, "d": ["ggg <hhh>", "iii jjj"]}
+(1 row)
+
+-- corner cases for ts_headline with jsonb
+select ts_headline('null'::jsonb, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ null
+(1 row)
+
+select ts_headline('{}'::jsonb, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ {}
+(1 row)
+
+select ts_headline('[]'::jsonb, tsquery('aaa & bbb'));
+ ts_headline 
+-------------
+ []
+(1 row)
+
index 5e61922fbf11f1429795d2bdd9b34f7425e4513a..1acf4decd68058d20c8d6f7bee767dc5a18a977e 100644 (file)
@@ -551,3 +551,29 @@ select json_strip_nulls('[1,{"a":1,"b":null,"c":2},3]');
 
 -- an empty object is not null and should not be stripped
 select json_strip_nulls('{"a": {"b": null, "c": null}, "d": {} }');
+
+-- json to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::json);
+
+-- json to tsvector with config
+select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::json);
+
+-- json to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::json);
+
+-- ts_vector corner cases
+select to_tsvector('""'::json);
+select to_tsvector('{}'::json);
+select to_tsvector('[]'::json);
+select to_tsvector('null'::json);
+
+-- ts_headline for json
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+
+-- corner cases for ts_headline with json
+select ts_headline('null'::json, tsquery('aaa & bbb'));
+select ts_headline('{}'::json, tsquery('aaa & bbb'));
+select ts_headline('[]'::json, tsquery('aaa & bbb'));
index e2eaca0e2722744ec197a1c9da3ecb2c508b9a7c..c9fa1fc393b380f4890aab079acb5247675a56f4 100644 (file)
@@ -878,3 +878,29 @@ select jsonb_insert('{"a": {"b": "value"}}', '{a, c}', '"new_value"', true);
 
 select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"');
 select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"', true);
+
+-- jsonb to tsvector
+select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);
+
+-- jsonb to tsvector with config
+select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);
+
+-- jsonb to tsvector with stop words
+select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb);
+
+-- ts_vector corner cases
+select to_tsvector('""'::jsonb);
+select to_tsvector('{}'::jsonb);
+select to_tsvector('[]'::jsonb);
+select to_tsvector('null'::jsonb);
+
+-- ts_headline for jsonb
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
+select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
+
+-- corner cases for ts_headline with jsonb
+select ts_headline('null'::jsonb, tsquery('aaa & bbb'));
+select ts_headline('{}'::jsonb, tsquery('aaa & bbb'));
+select ts_headline('[]'::jsonb, tsquery('aaa & bbb'));