Make all unicode perl scripts to use strict, rearrange logic for clarity.

author Heikki Linnakangas <heikki.linnakangas@iki.fi>

Wed, 30 Nov 2016 16:06:34 +0000 (18:06 +0200)

committer Heikki Linnakangas <heikki.linnakangas@iki.fi>

Wed, 30 Nov 2016 16:06:34 +0000 (18:06 +0200)
author Heikki Linnakangas <heikki.linnakangas@iki.fi>
Wed, 30 Nov 2016 16:06:34 +0000 (18:06 +0200)
committer Heikki Linnakangas <heikki.linnakangas@iki.fi>
Wed, 30 Nov 2016 16:06:34 +0000 (18:06 +0200)
diff --git a/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl b/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl

index 6a1321bab84eed95dd6d2059fbd70b6d09779ded..bfd4511d724ca8ad595ae3f3e504625b53a451d8 100755 (executable)
--- a/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl
@@ -24,8 +24,8 @@
  #       UCS-2 code in hex
  #       # and Unicode name (not used in this script)
  
-
-require "convutils.pm";
+use strict;
+require convutils;
  
  # Load BIG5.TXT
  my $all = &read_source("BIG5.TXT");
diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl

index 8df23f8be65fae3ff3b90c32328b2dc49c592b32..6b65c11a65475653428c145e8df0ff3f1e4b84c0 100755 (executable)
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl
@@ -13,24 +13,24 @@
  # where the "u" field is the Unicode code point in hex,
  # and the "b" field is the hex byte sequence for GB18030
  
-require "convutils.pm";
+use strict;
+require convutils;
  
  # Read the input
  
-$in_file = "gb-18030-2000.xml";
+my $in_file = "gb-18030-2000.xml";
  
-open(FILE, $in_file) || die("cannot open $in_file");
+open(my $in, '<', $in_file) || die("cannot open $in_file");
  
  my @mapping;
  
-while (<FILE>)
+while (<$in>)
  {
     next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/);
-   $u = $1;
-   $c = $2;
+   my ($u, $c) = ($1, $2);
     $c =~ s/ //g;
-   $ucs  = hex($u);
-   $code = hex($c);
+   my $ucs  = hex($u);
+   my $code = hex($c);
  
     # The GB-18030 character set, which we use as the source, contains
     # a lot of extra characters on top of the GB2312 character set that
@@ -71,6 +71,6 @@ while (<FILE>)
         direction => 'both'
     }
  }
-close(FILE);
+close($in);
  
  print_tables("EUC_CN", \@mapping);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl

index b4e140b657c993be291dfafa3ea66d13181341af..b1ad19a69d8929d4564064ab0aa3a234b93d1b48 100755 (executable)
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl
@@ -7,27 +7,27 @@
  # Generate UTF-8 <--> EUC_JIS_2004 code conversion tables from
  # "euc-jis-2004-std.txt" (http://x0213.org)
  
-require "convutils.pm";
+use strict;
+require convutils;
  
  # first generate UTF-8 --> EUC_JIS_2004 table
  
-$in_file = "euc-jis-2004-std.txt";
+my $in_file = "euc-jis-2004-std.txt";
  
-open(FILE, $in_file) || die("cannot open $in_file");
+open(my $in, '<', $in_file) || die("cannot open $in_file");
  
  my @all;
  
-while ($line = <FILE>)
+while (my $line = <$in>)
  {
     if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
     {
-       $c              = $1;
-       $u1             = $2;
-       $u2             = $3;
-       $rest           = "U+" . $u1 . "+" . $u2 . $4;
-       $code           = hex($c);
-       $ucs1           = hex($u1);
-       $ucs2           = hex($u2);
+       # combined characters
+       my ($c, $u1, $u2) = ($1, $2, $3);
+       my $rest = "U+" . $u1 . "+" . $u2 . $4;
+       my $code = hex($c);
+       my $ucs1 = hex($u1);
+       my $ucs2 = hex($u2);
  
         push @all, { direction => 'both',
                      ucs => $ucs1,
@@ -38,22 +38,16 @@ while ($line = <FILE>)
     }
     elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
     {
-       $c    = $1;
-       $u    = $2;
-       $rest = "U+" . $u . $3;
-   }
-   else
-   {
-       next;
-   }
-
-   $ucs  = hex($u);
-   $code = hex($c);
+       # non-combined characters
+       my ($c, $u, $rest) = ($1, $2, "U+" . $2 . $3);
+       my $ucs  = hex($u);
+       my $code = hex($c);
  
-   next if ($code < 0x80 && $ucs < 0x80);
+       next if ($code < 0x80 && $ucs < 0x80);
  
-   push @all, { direction => 'both', ucs => $ucs, code => $code, comment => $rest };
+       push @all, { direction => 'both', ucs => $ucs, code => $code, comment => $rest };
+   }
  }
-close(FILE);
+close($in);
  
  print_tables("EUC_JIS_2004", \@all, 1);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl

index 0e9dd292bff11c58df0000cd563cbc59c5b76229..1bfd3b850e0906b4f1041079438d37728eacaa2f 100755 (executable)
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl
@@ -12,7 +12,7 @@
  # organization's ftp site.
  
  use strict;
-require "convutils.pm";
+require convutils;
  
  # Load JIS0212.TXT
  my $jis0212 = &read_source("JIS0212.TXT");
diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl

index a917d067172a25c53fad45a840b2f1f792e68474..dffcdc40ded63aa9500564f27113e99397d67a34 100755 (executable)
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl
@@ -16,7 +16,8 @@
  #       UCS-2 code in hex
  #       # and Unicode name (not used in this script)
  
-require "convutils.pm";
+use strict;
+require convutils;
  
  # Load the source file.
  
diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl

index aceef5433c28bd59590c164ec58019893957cbc1..cb81c4650d53fcb8a2f436f9411827ffdca4d616 100755 (executable)
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl
@@ -17,7 +17,8 @@
  #       UCS-2 code in hex
  #       # and Unicode name (not used in this script)
  
-require "convutils.pm";
+use strict;
+require convutils;
  
  my $mapping = &read_source("CNS11643.TXT");
  
diff --git a/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl b/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl

index f58361024e4c67e375b25a11cf90728fb36cbdb8..7be4b7b0abebf3a4ff299f6138f26e61795f69bc 100755 (executable)
--- a/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
@@ -13,24 +13,24 @@
  # where the "u" field is the Unicode code point in hex,
  # and the "b" field is the hex byte sequence for GB18030
  
-require "convutils.pm";
+use strict;
+require convutils;
  
  # Read the input
  
-$in_file = "gb-18030-2000.xml";
+my $in_file = "gb-18030-2000.xml";
  
-open(FILE, $in_file) || die("cannot open $in_file");
+open(my $in, '<', $in_file) || die("cannot open $in_file");
  
  my @mapping;
  
-while (<FILE>)
+while (<$in>)
  {
     next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/);
-   $u = $1;
-   $c = $2;
+   my ($u, $c) = ($1, $2);
     $c =~ s/ //g;
-   $ucs  = hex($u);
-   $code = hex($c);
+   my $ucs  = hex($u);
+   my $code = hex($c);
     if ($code >= 0x80 && $ucs >= 0x0080)
     {
         push @mapping, {
@@ -40,6 +40,6 @@ while (<FILE>)
         }
     }
  }
-close(FILE);
+close($in);
  
  print_tables("GB18030", \@mapping);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl b/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl

index b98f9a7bf55b56521eafb89e510e533fb76bd709..b249b81096cdc79de19a1bcbd819b729a299dd20 100755 (executable)
--- a/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl
@@ -15,7 +15,8 @@
  #       UCS-2 code in hex
  #       # and Unicode name (not used in this script)
  
-require "convutils.pm";
+use strict;
+require convutils;
  
  # Load the source file.
  
diff --git a/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl b/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl

index 16a53ad1d9fa74d46f80f61de5c178db13efa775..6be56b5b526d640f80709e981ae276f3d3f2863b 100755 (executable)
--- a/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl
@@ -7,27 +7,27 @@
  # Generate UTF-8 <--> SHIFT_JIS_2004 code conversion tables from
  # "sjis-0213-2004-std.txt" (http://x0213.org)
  
-require "convutils.pm";
+use strict;
+require convutils;
  
  # first generate UTF-8 --> SHIFT_JIS_2004 table
  
-$in_file = "sjis-0213-2004-std.txt";
+my $in_file = "sjis-0213-2004-std.txt";
  
-open(FILE, $in_file) || die("cannot open $in_file");
+open(my $in, '<', $in_file) || die("cannot open $in_file");
  
  my @mapping;
  
-while ($line = <FILE>)
+while (my $line = <$in>)
  {
     if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
     {
-       $c              = $1;
-       $u1             = $2;
-       $u2             = $3;
-       $rest           = "U+" . $u1 . "+" . $u2 . $4;
-       $code           = hex($c);
-       $ucs1           = hex($u1);
-       $ucs2           = hex($u2);
+       # combined characters
+       my ($c, $u1, $u2) = ($1, $2, $3);
+       my $rest = "U+" . $u1 . "+" . $u2 . $4;
+       my $code = hex($c);
+       my $ucs1 = hex($u1);
+       my $ucs2 = hex($u2);
  
         push @mapping, {
             code => $code,
@@ -40,42 +40,37 @@ while ($line = <FILE>)
     }
     elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
     {
-       $c    = $1;
-       $u    = $2;
-       $rest = "U+" . $u . $3;
-   }
-   else
-   {
-       next;
-   }
+       # non-combined characters
+       my ($c, $u, $rest) = ($1, $2, "U+" . $2 . $3);
+       my $ucs  = hex($u);
+       my $code = hex($c);
+       my $direction;
  
-   $ucs  = hex($u);
-   $code = hex($c);
+       if ($code < 0x80 && $ucs < 0x80)
+       {
+           next;
+       }
+       elsif ($code < 0x80)
+       {
+           $direction = 'from_unicode';
+       }
+       elsif ($ucs < 0x80)
+       {
+           $direction = 'to_unicode';
+       }
+       else
+       {
+           $direction = 'both';
+       }
  
-   if ($code < 0x80 && $ucs < 0x80)
-   {
-       next;
-   }
-   elsif ($code < 0x80)
-   {
-       $direction = 'from_unicode';
-   }
-   elsif ($ucs < 0x80)
-   {
-       $direction = 'to_unicode';
-   }
-   else
-   {
-       $direction = 'both';
+       push @mapping, {
+           code => $code,
+           ucs => $ucs,
+           comment => $rest,
+           direction => $direction
+       };
     }
-
-   push @mapping, {
-       code => $code,
-       ucs => $ucs,
-       comment => $rest,
-       direction => $direction
-   };
  }
-close(FILE);
+close($in);
  
  print_tables("SHIFT_JIS_2004", \@mapping, 1);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl b/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl

index c8ff712af8fd279349da855752bb848ee6101a6e..17289fc5e3efd21a5ae562fa4626da05df68d722 100755 (executable)
--- a/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl
@@ -11,7 +11,7 @@
  # ftp site.
  
  use strict;
-require "convutils.pm";
+require convutils;
  
  my $charset = read_source("CP932.TXT");
  
diff --git a/src/backend/utils/mb/Unicode/UCS_to_UHC.pl b/src/backend/utils/mb/Unicode/UCS_to_UHC.pl

index b6bf3bd8f27aac6db57e678a6d088859b46e6164..667f6c177c77c189e05febf6671538cd8f493a01 100755 (executable)
--- a/src/backend/utils/mb/Unicode/UCS_to_UHC.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_UHC.pl
@@ -13,24 +13,24 @@
  # where the "u" field is the Unicode code point in hex,
  # and the "b" field is the hex byte sequence for UHC
  
-require "convutils.pm";
+use strict;
+require convutils;
  
  # Read the input
  
-$in_file = "windows-949-2000.xml";
+my $in_file = "windows-949-2000.xml";
  
-open(FILE, $in_file) || die("cannot open $in_file");
+open(my $in, '<', $in_file) || die("cannot open $in_file");
  
  my @mapping;
  
-while (<FILE>)
+while (<$in>)
  {
     next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/);
-   $u = $1;
-   $c = $2;
+   my ($u, $c) = ($1, $2);
     $c =~ s/ //g;
-   $ucs  = hex($u);
-   $code = hex($c);
+   my $ucs  = hex($u);
+   my $code = hex($c);
  
     next if ($code == 0x0080 || $code == 0x00FF);
  
@@ -43,7 +43,7 @@ while (<FILE>)
         }
     }
  }
-close(FILE);
+close($in);
  
  # One extra character that's not in the source file.
  push @mapping, { direction => 'both', code => 0xa2e8, ucs => 0x327e, comment => 'CIRCLED HANGUL IEUNG U' };
diff --git a/src/backend/utils/mb/Unicode/UCS_to_most.pl b/src/backend/utils/mb/Unicode/UCS_to_most.pl

index a3cf436eefd56708788d8ac18bb64fef67c85b9e..b3188f3709a9e36b8764d3bcbf321eb66b74b3a5 100755 (executable)
--- a/src/backend/utils/mb/Unicode/UCS_to_most.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_most.pl
@@ -15,9 +15,10 @@
  #       UCS-2 code in hex
  #       # and Unicode name (not used in this script)
  
-require "convutils.pm";
+use strict;
+require convutils;
  
-%filename = (
+my %filename = (
     'WIN866'     => 'CP866.TXT',
     'WIN874'     => 'CP874.TXT',
     'WIN1250'    => 'CP1250.TXT',
@@ -46,9 +47,10 @@ require "convutils.pm";
     'KOI8U'      => 'KOI8-U.TXT',
     'GBK'        => 'CP936.TXT');
  
-@charsets = keys(%filename);
-@charsets = @ARGV if scalar(@ARGV);
-foreach $charset (@charsets)
+# make maps for all encodings if not specified
+my @charsets = (scalar(@ARGV) > 0) ? @ARGV : keys(%filename);
+
+foreach my $charset (@charsets)
  {
     my $mapping = &read_source($filename{$charset});
author	Heikki Linnakangas <heikki.linnakangas@iki.fi>
	Wed, 30 Nov 2016 16:06:34 +0000 (18:06 +0200)
committer	Heikki Linnakangas <heikki.linnakangas@iki.fi>
	Wed, 30 Nov 2016 16:06:34 +0000 (18:06 +0200)
src/backend/utils/mb/Unicode/UCS_to_BIG5.pl		patch \| blob \| blame \| history
src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl		patch \| blob \| blame \| history
src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl		patch \| blob \| blame \| history
src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl		patch \| blob \| blame \| history
src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl		patch \| blob \| blame \| history
src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl		patch \| blob \| blame \| history
src/backend/utils/mb/Unicode/UCS_to_GB18030.pl		patch \| blob \| blame \| history
src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl		patch \| blob \| blame \| history
src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl		patch \| blob \| blame \| history
src/backend/utils/mb/Unicode/UCS_to_SJIS.pl		patch \| blob \| blame \| history
src/backend/utils/mb/Unicode/UCS_to_UHC.pl		patch \| blob \| blame \| history
src/backend/utils/mb/Unicode/UCS_to_most.pl		patch \| blob \| blame \| history