3871_2.patch

Sergey Ivanovskiy, 10/10/2019 03:01 PM

     ** 050 CA  20190905          The data file is not mapped to memory, but to a byte buffer.  This
     **                           allows very large dump files to be imported, without limitations from
     **                           the physical memory.
     ** 051 SBI 20191008          Added the source converter for one byte encodings, set the source
     **                           encoding.
     */
     /*
     ** This program is free software: you can redistribute it and/or modify
-...
     import java.io.*;
     import java.lang.InstantiationException;
     import java.lang.reflect.*;
     import java.nio.charset.*;
     import java.sql.*;
     import java.util.*;
     import java.util.logging.*;
     import com.goldencode.p2j.util.*;
     import com.goldencode.p2j.util.ErrorManager;
     import org.hibernate.*;
     import org.hibernate.cfg.*;
     import org.hibernate.dialect.*;
     import org.hibernate.service.*;
     import org.hibernate.type.*;
     import org.hibernate.type.Type;
     import com.goldencode.p2j.pattern.*;
     import com.goldencode.p2j.persist.*;
     import com.goldencode.p2j.persist.type.*;
-...
                    // Open a new session and begin a transaction.
                    session = openSession();
                    Transaction tx = session.beginTransaction();
                    Query charsetQuery = session.createSQLQuery(
                             "SELECT character_set_name FROM information_schema.character_sets;");
                    Object targetCharset = charsetQuery.list().get(0);
                    if (targetCharset instanceof String)
+                   {
                       stream.setConvertTarget((String) targetCharset);
+                   }
                    // Read up to batchSize records from input file.
                    for (int i = records.size(); i < batchSize && !eof && !recovery; i++, counter++)
+                   {
-...
           /** The date format  as it was set when the table was dumped. */
           private String dateFormat = null;
           /** The import source charset converter */
           private CharsetConverter charsetConverter;
           /**
            * Constructor.
+           *
-...
                 // If any IO issue would occur, it should have happened in the super c'tor.
                 ErrorManager.recordOrThrowError(98, "Unable to open file:" + filename + ".");
+             }
              charsetConverter = createSourceCharsetConverter();
              boolean isUTF8 = isSourceCodePageUTF8();
              setUtf8Mode(isUTF8);
              // take into account the target conversion that depends on this mode
              setConvert((charsetConverter != null) || isUTF8);
+          }
           /**
-...
+          }
           /**
            * Returns the charset converter.
+           *
            * @return   The charset converter
            */
           protected CharsetConverter getCharsetConverter()
+          {
              if (charsetConverter != null || !convert)
+             {
                 return charsetConverter;
+             }
              return super.getCharsetConverter();
+          }
           /**
            * Reads the PSC footer, storing the key/values pairs in private map pscHeader.
+           *
            * @return  The number of PSC records actually read. If negative, the footer could not
-...
              encoding    = getMetadata("cpstream");    // eg. ISO8859-15
              // cc = new CharsetConverter(encoding);
              setConvertSource(encoding);
              ldbname     = getMetadata("ldbname");     // eg. p2j_test
              timestamp   = getMetadata("timestamp");   // eg. 2013/06/07-09:57:02

     ** 019 ECF 20171026          Added write(byte[], int, int) method.
     ** 020 EVL 20180620          Adding pulse on close for empty frames.
     ** 021 CA  20190905          Allow byte buffer instead of memory buffers for read-only files.
     ** 022 SBI 20191008          Added getCharsetConverter(), createSourceCharsetConverter() and
     **                           isSourceCodePageUTF8().
     */
     /*
-...
     import java.io.*;
     import java.nio.*;
     import java.nio.channels.*;
     import java.nio.charset.*;
     /**
      * A stream class supporting input and output semantics for any file-like
-...
        /** Number of bytes in the write buffer (non-memory-mapped mode). */
        private int pending = 0;
        protected boolean utf8Mode;
        /**
         * Constructs an instance using a filename, this file or device will be
         * opened for reading or writing based on the given <code>write</code>
-...
+       }
        /**
         * Returns the charset converter.
+        *
         * @return   The charset converter
         */
        protected CharsetConverter getCharsetConverter()
+       {
           return cc;
+       }
        /**
         * Creates the source charset converter if the source code page is one byte encoding, otherwise
         * returns null value.
+        *
         * @return   The source charset converter
         */
        protected CharsetConverter createSourceCharsetConverter()
+       {
           CharsetConverter charsetConverter;
           try
+          {
              Charset charset = Charset.forName(sourceCp);
              if (charset.newEncoder().maxBytesPerChar() == 1)
+             {
                 charsetConverter = new CharsetConverter(sourceCp);
+             }
              else
+             {
                 charsetConverter = null;
+             }
+          }
           catch(IllegalArgumentException | UnsupportedOperationException e)
+          {
              charsetConverter = null;
+          }
           return charsetConverter;
+       }
        /**
         * Tests if the source code page is UTF-8.
+        *
         * @return   true if the source code page is UTF-8, otherwise false.
         */
        protected boolean isSourceCodePageUTF8()
+       {
           Charset cs = Charset.forName(sourceCp);
           return StandardCharsets.UTF_8.equals(cs);
+       }
        /**
         * Sets the UTF-8 mode when the read character can be encoded in a sequence of 4 bytes.
+        *
         * @param    utf8Mode
         *           The flag indicating if this file stream is in UTF-8 mode.
         */
        protected void setUtf8Mode(boolean utf8Mode)
+       {
           this.utf8Mode = utf8Mode;
+       }
        /**
         * Write a byte to the buffer, flushing if the buffer is full.
+        *
         * @param    b
-...
+       {
           int ch = readWorker(false);
           if (convert && ch >= 0)
           if (convert && !utf8Mode && ch >= 0)
+          {
              ch = cc.toChar(ch);
              ch = getCharsetConverter().toChar(ch);
+          }
           return ch;
-...
+             {
                 mem.mark();
+             }
              ch = (mem.get() & 0x000000FF);
              byte b0 = mem.get();
              ch = (b0 & 0x000000FF);
              if (utf8Mode)
+             {
                 byte[] utf8Bytes = null;
                 int prefix = ch >> 4;
                 if ((prefix >> 1) == 0b00000110)
+                {
                    byte b1 = mem.get();
                    utf8Bytes = new byte[] { b0, b1};
+                }
                 else if (prefix == 0b00001110)
+                {
                    byte b1 = mem.get();
                    byte b2 = mem.get();
                    utf8Bytes = new byte[] { b0, b1, b2};
+                }
                 else if (prefix == 0b00001111)
+                {
                    byte b1 = mem.get();
                    byte b2 = mem.get();
                    byte b3 = mem.get();
                    utf8Bytes = new byte[] { b0, b1, b2, b3};
+                }
                 if (utf8Bytes != null)
+                {
                    ch = new String(utf8Bytes, StandardCharsets.UTF_8).codePointAt(0);
+                }
+             }
              if (peek)
+             {
                 mem.reset();

Project

General

Profile

FWD » Core Development » Database

3871_2.patch