From 7712181ef753fe77c8b8662d39939316449afe32 Mon Sep 17 00:00:00 2001 From: vbarakou1 Date: Wed, 7 Oct 2020 13:31:57 +0300 Subject: [PATCH 1/3] added support for Greek code page --- .../parser/encoding/codepage/CodePage.scala | 1 + .../encoding/codepage/CodePage875.scala | 57 +++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage875.scala diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage.scala index 564c97e9a..06d34cd81 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage.scala @@ -61,6 +61,7 @@ object CodePage { case "common" => new CodePageCommon case "common_extended" => new CodePageCommonExt case "cp037" => new CodePage037 + case "cp875" => new CodePage875 case "cp037_extended" => new CodePage037Ext case codePage => throw new IllegalArgumentException(s"The code page '$codePage' is not one of the builtin EBCDIC code pages.") } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage875.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage875.scala new file mode 100644 index 000000000..385b8dcfa --- /dev/null +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage875.scala @@ -0,0 +1,57 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.cobrix.cobol.parser.encoding.codepage + +/** + * EBCDIC code page 875 contains all of the Greek characters. + * + */ +class CodePage875 extends CodePage { + + override def codePageShortName: String = "cp875" + + override protected def ebcdicToAsciiMapping: Array[Char] = { + /* This is the EBCDIC Code Page 875 to ASCII conversion table with non-printable characters mapping + from https://en.wikipedia.org/wiki/EBCDIC_037 */ + val ebcdic2ascii: Array[Char] = { + val clf = '\r' + val ccr = '\n' + val spc = ' ' + val qts = '\'' + val qtd = '\"' + val bsh = '\\' + Array[Char]( + spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, ccr, spc, spc, // 0 - 15 + spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, // 16 - 31 + spc, spc, spc, spc, spc, clf, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, // 32 - 47 + spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, spc, // 48 - 63 + ' ', 'Α', 'Β', 'Γ', 'Δ', 'Ε', 'Ζ', 'Η', 'Θ', 'Ι', '[', '.', '<', '(', '+', '!', // 64 - 79 + '&', 'Κ', 'Λ', 'Μ', 'Ν', 'Ξ', 'Ο', 'Π', 'Ρ', 'Σ', ']', '$', '*', ')', ';', '^', // 80 - 95 + '-', '/', 'Τ', 'Υ', 'Φ', 'Χ', 'Ψ', 'Ω', 'Ϊ', 'Ϋ', '|', ',', '%', '_', '>', '?', // 96 - 111 + '¨', 'Ά', 'Έ', 'Ή', spc, 'Ί', 'Ό', 'Ύ', 'Ώ', '`', ':', '#', '@', qts, '=', qtd, // 112 - 127 + '΅', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'α', 'β', 'γ', 'δ', 'ε', 'ζ', // 128 - 143 + '°', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 'η', 'θ', 'ι', 'κ', 'λ', 'μ', // 144 - 159 + '´', '~', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'ν', 'ξ', 'ο', 'π', 'ρ', 'σ', // 160 - 175 + '£', 'ά', 'έ', 'ή', 'ϊ', 'ί', 'ό', 'ύ', 'ϋ', 'ώ', 'ς', 'τ', 'υ', 'φ', 'χ', 'ψ', // 176 - 191 + '{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', '-', 'ω', 'ΐ', 'ΰ', '‘', '―', // 192 - 207 + '}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', '±', '½', spc, '·', '’', '¦', // 208 - 223 + bsh, '₯', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '²', '§', 'ͺ', spc, '«', '¬', // 224 - 239 + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '³', '©', '€', spc, '»', spc) // 240 - 255 + } + ebcdic2ascii + } +} From c6803b01e094722a1cf7d6c8a61a47527a61b257 Mon Sep 17 00:00:00 2001 From: Yannis Date: Wed, 7 Oct 2020 14:32:27 +0300 Subject: [PATCH 2/3] Update CodePage875.scala Corrected the URL for the mapping of the codepage --- .../cobrix/cobol/parser/encoding/codepage/CodePage875.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage875.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage875.scala index 385b8dcfa..007e3fbfa 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage875.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage875.scala @@ -26,7 +26,7 @@ class CodePage875 extends CodePage { override protected def ebcdicToAsciiMapping: Array[Char] = { /* This is the EBCDIC Code Page 875 to ASCII conversion table with non-printable characters mapping - from https://en.wikipedia.org/wiki/EBCDIC_037 */ + from https://wutils.com/encodings/cp875 */ val ebcdic2ascii: Array[Char] = { val clf = '\r' val ccr = '\n' From ad05e02fe8c2de592d5fa73d0b7d3175b335dcb1 Mon Sep 17 00:00:00 2001 From: Yannis Date: Thu, 8 Oct 2020 10:38:22 +0300 Subject: [PATCH 3/3] Update cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage.scala MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Saša Zejnilović --- .../absa/cobrix/cobol/parser/encoding/codepage/CodePage.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage.scala index 06d34cd81..e3e0c35d9 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage.scala @@ -61,7 +61,7 @@ object CodePage { case "common" => new CodePageCommon case "common_extended" => new CodePageCommonExt case "cp037" => new CodePage037 - case "cp875" => new CodePage875 + case "cp875" => new CodePage875 case "cp037_extended" => new CodePage037Ext case codePage => throw new IllegalArgumentException(s"The code page '$codePage' is not one of the builtin EBCDIC code pages.") }