From 6c8f670323d22acb62104f7f8f1b1a078dadd255 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Wed, 20 Nov 2024 23:53:19 +0900 Subject: [PATCH] file_fdw: Add REJECT_LIMIT option to file_fdw. Commit 4ac2a9bece introduced the REJECT_LIMIT option for the COPY command. This commit extends the support for this option to file_fdw. As well as REJECT_LIMIT option for COPY, this option limits the maximum number of erroneous rows that can be skipped. If the number of data type conversion errors exceeds this limit, accessing the file_fdw foreign table will fail with an error, even when on_error = 'ignore' is specified. Since the CREATE/ALTER FOREIGN TABLE commands require foreign table options to be single-quoted, this commit updates defGetCopyRejectLimitOption() to handle also string value for them, in addition to int64 value for COPY command option. Author: Atsushi Torikoshi Reviewed-by: Fujii Masao, Yugo Nagata, Kirill Reshke Discussion: https://postgr.es/m/bab68a9fc502b12693f0755b6f35f327@oss.nttdata.com --- contrib/file_fdw/data/agg.bad | 1 + contrib/file_fdw/expected/file_fdw.out | 18 ++++++++++++++++-- contrib/file_fdw/file_fdw.c | 8 ++++++++ contrib/file_fdw/sql/file_fdw.sql | 7 ++++++- doc/src/sgml/file-fdw.sgml | 12 ++++++++++++ src/backend/commands/copy.c | 16 +++++++++++++++- 6 files changed, 58 insertions(+), 4 deletions(-) diff --git a/contrib/file_fdw/data/agg.bad b/contrib/file_fdw/data/agg.bad index 3415b150072..04279ce55b5 100644 --- a/contrib/file_fdw/data/agg.bad +++ b/contrib/file_fdw/data/agg.bad @@ -2,3 +2,4 @@ 100;@99.097@ 0;@aaa@ 42;@324.78@ +1;@bbb@ diff --git a/contrib/file_fdw/expected/file_fdw.out b/contrib/file_fdw/expected/file_fdw.out index 593fdc782e3..4f63c047ecf 100644 --- a/contrib/file_fdw/expected/file_fdw.out +++ b/contrib/file_fdw/expected/file_fdw.out @@ -90,6 +90,8 @@ ERROR: COPY delimiter cannot be newline or carriage return CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', null ' '); -- ERROR ERROR: COPY null representation cannot use newline or carriage return +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (reject_limit '1'); -- ERROR +ERROR: COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE CREATE FOREIGN TABLE tbl () SERVER file_server; -- ERROR ERROR: either filename or program is required for file_fdw foreign tables \set filename :abs_srcdir '/data/agg.data' @@ -206,10 +208,10 @@ SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; SELECT * FROM agg_bad; -- ERROR ERROR: invalid input syntax for type real: "aaa" CONTEXT: COPY agg_bad, line 3, column b: "aaa" --- on_error and log_verbosity tests +-- on_error, log_verbosity and reject_limit tests ALTER FOREIGN TABLE agg_bad OPTIONS (ADD on_error 'ignore'); SELECT * FROM agg_bad; -NOTICE: 1 row was skipped due to data type incompatibility +NOTICE: 2 rows were skipped due to data type incompatibility a | b -----+-------- 100 | 99.097 @@ -224,6 +226,18 @@ SELECT * FROM agg_bad; 42 | 324.78 (2 rows) +ALTER FOREIGN TABLE agg_bad OPTIONS (ADD reject_limit '1'); -- ERROR +SELECT * FROM agg_bad; +ERROR: skipped more than REJECT_LIMIT (1) rows due to data type incompatibility +CONTEXT: COPY agg_bad, line 5, column b: "bbb" +ALTER FOREIGN TABLE agg_bad OPTIONS (SET reject_limit '2'); +SELECT * FROM agg_bad; + a | b +-----+-------- + 100 | 99.097 + 42 | 324.78 +(2 rows) + ANALYZE agg_bad; -- misc query tests \t on diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index 043204c3e7e..9e2896f32ae 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -77,6 +77,7 @@ static const struct FileFdwOption valid_options[] = { {"encoding", ForeignTableRelationId}, {"on_error", ForeignTableRelationId}, {"log_verbosity", ForeignTableRelationId}, + {"reject_limit", ForeignTableRelationId}, {"force_not_null", AttributeRelationId}, {"force_null", AttributeRelationId}, @@ -788,6 +789,13 @@ retry: */ ResetPerTupleExprContext(estate); + if (cstate->opts.reject_limit > 0 && + cstate->num_errors > cstate->opts.reject_limit) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("skipped more than REJECT_LIMIT (%lld) rows due to data type incompatibility", + (long long) cstate->opts.reject_limit))); + /* Repeat NextCopyFrom() until no soft error occurs */ goto retry; } diff --git a/contrib/file_fdw/sql/file_fdw.sql b/contrib/file_fdw/sql/file_fdw.sql index edd77c5cd20..4805ca8c04f 100644 --- a/contrib/file_fdw/sql/file_fdw.sql +++ b/contrib/file_fdw/sql/file_fdw.sql @@ -77,6 +77,7 @@ CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', delimiter '); -- ERROR CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', null ' '); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (reject_limit '1'); -- ERROR CREATE FOREIGN TABLE tbl () SERVER file_server; -- ERROR \set filename :abs_srcdir '/data/agg.data' @@ -150,11 +151,15 @@ SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; -- error context report tests SELECT * FROM agg_bad; -- ERROR --- on_error and log_verbosity tests +-- on_error, log_verbosity and reject_limit tests ALTER FOREIGN TABLE agg_bad OPTIONS (ADD on_error 'ignore'); SELECT * FROM agg_bad; ALTER FOREIGN TABLE agg_bad OPTIONS (ADD log_verbosity 'silent'); SELECT * FROM agg_bad; +ALTER FOREIGN TABLE agg_bad OPTIONS (ADD reject_limit '1'); -- ERROR +SELECT * FROM agg_bad; +ALTER FOREIGN TABLE agg_bad OPTIONS (SET reject_limit '2'); +SELECT * FROM agg_bad; ANALYZE agg_bad; -- misc query tests diff --git a/doc/src/sgml/file-fdw.sgml b/doc/src/sgml/file-fdw.sgml index bb3579b0777..882d9a76d21 100644 --- a/doc/src/sgml/file-fdw.sgml +++ b/doc/src/sgml/file-fdw.sgml @@ -138,6 +138,18 @@ + + reject_limit + + + + Specifies the maximum number of errors tolerated while converting a column's + input value to its data type, the same as COPY's + REJECT_LIMIT option. + + + + log_verbosity diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 3485ba8663f..2d98ecf3f4e 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -420,11 +420,25 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from) /* * Extract REJECT_LIMIT value from a DefElem. + * + * REJECT_LIMIT can be specified in two ways: as an int64 for the COPY command + * option or as a single-quoted string for the foreign table option using + * file_fdw. Therefore this function needs to handle both formats. */ static int64 defGetCopyRejectLimitOption(DefElem *def) { - int64 reject_limit = defGetInt64(def); + int64 reject_limit; + + if (def->arg == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s requires a numeric value", + def->defname))); + else if (nodeTag(def->arg) == T_String) + reject_limit = pg_strtoint64(strVal(def->arg)); + else + reject_limit = defGetInt64(def); if (reject_limit <= 0) ereport(ERROR, -- 2.30.2