Purpose 

The purpose of this document to describe the native serialization of various data types which Geode understand. 

Data Types

Geode supports all the Java primitive data types and Java arrays and collections.. Custom Java objects can be serialized through Geode PdxSerializable and DataSerializable interfaces. The Application can attach its data serializer through the Geode DataSerializer and PdxSerializer interfaces. Objects implementing java.io.Serializable can be serialized with Geode as well.

In Geode, every supported data type is associated with a single-byte type Id. To serialize any date type Geode first writes the typeId. For variable sized objects, it then writes the length of the serialized object followed by the serialized bytes. For fixed data types it just writes the serialized bytes in Big Endian byte order.

For native serialization, strings are serialized using the Java Modified UTF-8 Format.

Data TypeGeode Region Key TypesType IdValueSerialized BytesDescription
Null 41 = 0x29null
typeId0x29
 
Boolean YES53 = 0x35true
typeId0x35
bytes0x01
 
Character YES54 = 0x36'a'
typeId0x36
bytes

0x00 0x61

 
Byte YES55 = 0x371
typeId0x37
bytes0x01
 
Short YES56 = 0x381000
typeId0x38
bytes0x03 0xE8
 
Integer YES57 = 0x391000
typeId0x39
bytes0x00 0x00 0x03 0xE8
 
Long YES58 = 0x3A1000
typeId0x3A
bytes0x00 0x00 0x00 0x00 0x00 0x00 0x03 0xE8
 
Float YES59 = 0x3B1000f
typeId0x3B
bytes

0x44 0x7A 0x00 0x00

 
Double YES60 = 0x3C1000d



typeId0x3C
bytes

0x40 0xF1 0x40 0x00 0x00 0x00 0x00 0x00

 
ASCII_STRINGYES87 = 0x57"hello"
typeid0x57
len0x00 0x05
bytes0x68 0x65 0x40 0x40 0x6F
This represents ASCII string with maximum length 0xFFFF. Code snippet to serialize and deserialize the string.
UTF_STRING YES42 = 0x2A  This represents UTF string with maximum length 0xFFFF. Code snippet to serialize and deserialize the string.
HUGE_ASCII_STRING YES88 = 0x58  This represents ASCII string with length greater than 0xFFFF. Code snippet to serialize and deserialize the string.
HUGE_UTF_STRING YES89 = 0x59  This represents UTF string with length greater than 0xFFFF. Code snippet to serialize and deserialize the string.
byte[] This we plan to support46 = 0x2Ebyte[] {1,2}
typeId0x2E
len0x02
bytes0x01 0x02
 
short[] 47 = 0x2Fshort[] {1,2}
typeId0x2F
len 0x02
bytes

0x00 0x01 0x00 0x02

 
int[] 48 = 0x30int[] {1,2}
typeId0x30
len 0x02
bytes

0x00 0x00 0x00 0x01 0x00 0x00 0x00 0x02

 
long[] 49 = 0x31long[] {1}
typeId0x31
len 0x01
bytes

0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x01

 
float[] 50 = 0x32float[] {2.0f}
typeId0x32
len 0x01
bytes

0x40 0x00 0x00 0x00

 
double[] 51 = 0x33double[] {2.0d}
typeId0x33
len0x01
bytes

0x40 0x00 0x00 0x00 0x00 0x00 0x00 0x00

 
string[] 64 = 0x40 String[]{"hello", "world"}
typeId0x40
len 0x02

"hello"

bytes

0x57 - ASCII_STRING

0x00 0x05

0x68 0x65 0x6c 0x6c 0x6f

 

"world"

bytes

0x57 -  ASCII_STRING

0x00 0x05 - len

0x77 0x6f 0x72 0x6c 0x64


 
Map 67 = 0x43

Map s = new HashMap<>();

 s.put("hello", "world")

typeId0x43
len 0x01

"hello"

bytes

0x57 - ASCII_STRING

0x00 0x05

0x68 0x65 0x6c 0x6c 0x6f

 

"world"

bytes

0x57 -  ASCII_STRING

0x00 0x05 - len

0x77 0x6f 0x72 0x6c 0x64

 
Set 66 = 0x42

Set s = new HashSet();

 s.add("hello");

s.add("world");


typeId0x42
len 0x02

"hello"

bytes

0x57 - ASCII_STRING

0x00 0x05

0x68 0x65 0x6c 0x6c 0x6f

"world"

bytes

0x57 -  ASCII_STRING

0x00 0x05 - len

0x77 0x6f 0x72 0x6c 0x64

 
List 10 = 0x0a

List s = new LinkedList();
s.add("hello");
s.add("world");

typeId0x0a
len 0x02

"hello"

bytes

0x57 - ASCII_STRING

0x00 0x05

0x68 0x65 0x6c 0x6c 0x6f

"world"

bytes

0x57 -  ASCII_STRING

0x00 0x05 - len

0x77 0x6f 0x72 0x6c 0x64

 
ArrayList 65=0x41

List s = new ArrayList();
s.add("hello");
s.add("world");

typeId0x41
len 0x02

"hello"

bytes

0x57 - ASCII_STRING

0x00 0x05

0x68 0x65 0x6c 0x6c 0x6f

"world"

bytes

0x57 -  ASCII_STRING

0x00 0x05 - len

0x77 0x6f 0x72 0x6c 0x64

 
PDX_SERIALIZATION 93=0x5D  Java Object can implement PdxSerializable interface to serialize data in pdx format
PDX_SERIALIZER    

The application can implement PdxSerializer interface and then install with geode cache

to serialize data in pdx format.

DATA_SERIALIZATION     Java Object can implement DataSerializable interface to serialize data.
USER_SERIALIZATION     

The application can implement DataSerializer interface and then install with geode cache

to serialize data.

JAVA_SERIALIZATIOn44= 0x2C    


Calculate Collection/Array Size

Geode calculates the size of collection or Array in following way

Write Array Length functionRead Array Length Function
 public static final byte NULL_ARRAY = -1; // array is null
  /**
   * @since GemFire 5.7
   */
  private static final byte SHORT_ARRAY_LEN = -2; // array len encoded as unsigned short in next 2
                                                  // bytes
  /**
   * @since GemFire 5.7
   */
  public static final byte INT_ARRAY_LEN = -3; // array len encoded as int in next 4 bytes
  private static final int MAX_BYTE_ARRAY_LEN = ((byte) -4) & 0xFF;
  public static void writeArrayLength(int len, DataOutput out) throws IOException {
    if (len == -1) {
      out.writeByte(NULL_ARRAY);
    } else if (len <= MAX_BYTE_ARRAY_LEN) {
      out.writeByte(len);
    } else if (len <= 0xFFFF) {
      out.writeByte(SHORT_ARRAY_LEN);
      out.writeShort(len);
    } else {
      out.writeByte(INT_ARRAY_LEN);
      out.writeInt(len);
    }
  }
  public static int readArrayLength(DataInput in) throws IOException {
    byte code = in.readByte();
    if (code == NULL_ARRAY) {
      return -1;
    } else {
      int result = ubyteToInt(code);
      if (result > MAX_BYTE_ARRAY_LEN) {
        if (code == SHORT_ARRAY_LEN) {
          result = in.readUnsignedShort();
        } else if (code == INT_ARRAY_LEN) {
          result = in.readInt();
        } else {
          throw new IllegalStateException("unexpected array length code=" + code);
        }
      }
      return result;
    }
  }

 

Write And Read String

The String is serialized in following way. It distinguishes ASCII string and its size.

Write StringRead String
public static void writeString(String value, DataOutput out) throws IOException {
    InternalDataSerializer.checkOut(out);
    final boolean isDebugEnabled = logger.isTraceEnabled(LogMarker.SERIALIZER);
    if (isDebugEnabled) {
      logger.trace(LogMarker.SERIALIZER, "Writing String \"{}\"", value);
    }
    if (value == null) {
      if (isDebugEnabled) {
        logger.trace(LogMarker.SERIALIZER, "Writing NULL_STRING");
      }
      out.writeByte(DSCODE.NULL_STRING);
    } else {
      // [bruce] writeUTF is expensive - it creates a char[] to fetch
      // the string's contents, iterates over the array to compute the
      // encoded length, creates a byte[] to hold the encoded bytes,
      // iterates over the char[] again to create the encode bytes,
      // then writes the bytes. Since we usually deal with ISO-8859-1
      // strings, we can accelerate this by accessing chars directly
      // with charAt and fill a single-byte buffer. If we run into
      // a multibyte char, we revert to using writeUTF()
      int len = value.length();
      int utfLen = len; // added for bug 40932
      for (int i = 0; i < len; i++) {
        char c = value.charAt(i);
        if ((c <= 0x007F) && (c >= 0x0001)) {
          // nothing needed
        } else if (c > 0x07FF) {
          utfLen += 2;
        } else {
          utfLen += 1;
        }
        // Note we no longer have an early out when we detect the first
        // non-ascii char because we need to compute the utfLen for bug 40932.
        // This is not a performance problem because most strings are ascii
        // and they never did the early out.
      }
      boolean writeUTF = utfLen > len;
      if (writeUTF) {
        if (utfLen > 0xFFFF) {
          if (isDebugEnabled) {
            logger.trace(LogMarker.SERIALIZER, "Writing utf HUGE_STRING of len={}", len);
          }
          out.writeByte(DSCODE.HUGE_STRING);
          out.writeInt(len);
          out.writeChars(value);
        } else {
          if (isDebugEnabled) {
            logger.trace(LogMarker.SERIALIZER, "Writing utf STRING of len={}", len);
          }
          out.writeByte(DSCODE.STRING);
          out.writeUTF(value);
        }
      } else {
        if (len > 0xFFFF) {
          if (isDebugEnabled) {
            logger.trace(LogMarker.SERIALIZER, "Writing HUGE_STRING_BYTES of len={}", len);
          }
          out.writeByte(DSCODE.HUGE_STRING_BYTES);
          out.writeInt(len);
          out.writeBytes(value);
        } else {
          if (isDebugEnabled) {
            logger.trace(LogMarker.SERIALIZER, "Writing STRING_BYTES of len={}", len);
          }
          out.writeByte(DSCODE.STRING_BYTES);
          out.writeShort(len);
          out.writeBytes(value);
        }
      }
    }
  }
  public static String readString(DataInput in, byte header) throws IOException {
    if (header == DSCODE.STRING_BYTES) {
      int len = in.readUnsignedShort();
      if (logger.isTraceEnabled(LogMarker.SERIALIZER)) {
        logger.trace(LogMarker.SERIALIZER, "Reading STRING_BYTES of len={}", len);
      }
      byte[] buf = new byte[len];
      in.readFully(buf, 0, len);
      return new String(buf, 0); // intentionally using deprecated constructor
    } else if (header == DSCODE.STRING) {
      if (logger.isTraceEnabled(LogMarker.SERIALIZER)) {
        logger.trace(LogMarker.SERIALIZER, "Reading utf STRING");
      }
      return in.readUTF();
    } else if (header == DSCODE.NULL_STRING) {
      if (logger.isTraceEnabled(LogMarker.SERIALIZER)) {
        logger.trace(LogMarker.SERIALIZER, "Reading NULL_STRING");
      }
      return null;
    } else if (header == DSCODE.HUGE_STRING_BYTES) {
      int len = in.readInt();
      if (logger.isTraceEnabled(LogMarker.SERIALIZER)) {
        logger.trace(LogMarker.SERIALIZER, "Reading HUGE_STRING_BYTES of len={}", len);
      }
      byte[] buf = new byte[len];
      in.readFully(buf, 0, len);
      return new String(buf, 0); // intentionally using deprecated constructor
    } else if (header == DSCODE.HUGE_STRING) {
      int len = in.readInt();
      if (logger.isTraceEnabled(LogMarker.SERIALIZER)) {
        logger.trace(LogMarker.SERIALIZER, "Reading HUGE_STRING of len={}", len);
      }
      char[] buf = new char[len];
      for (int i = 0; i < len; i++) {
        buf[i] = in.readChar();
      }
      return new String(buf);
    } else {
      String s = "Unknown String header " + header;
      throw new IOException(s);
    }
  }
  • No labels