Purpose
The purpose of this document to describe the native serialization of various data types which Geode understand.
Data Types
Geode supports all the Java primitive data types and Java arrays and collections.. Custom Java objects can be serialized through Geode PdxSerializable and DataSerializable interfaces. The Application can attach its data serializer through the Geode DataSerializer
and PdxSerializer
interfaces. Objects implementing java.io.Serializable
can be serialized with Geode as well.
In Geode, every supported data type is associated with a single-byte type Id. To serialize any date type Geode first writes the typeId. For variable sized objects, it then writes the length of the serialized object followed by the serialized bytes. For fixed data types it just writes the serialized bytes in Big Endian byte order.
For native serialization, strings are serialized using the Java Modified UTF-8 Format.
Data Type | Geode Region Key Types | Type Id | Value | Serialized Bytes | Description |
---|
Null | | 41 = 0x29 | null | | |
Boolean | YES | 53 = 0x35 | true | | |
Character | YES | 54 = 0x36 | 'a' | | |
Byte | YES | 55 = 0x37 | 1 | | |
Short | YES | 56 = 0x38 | 1000 | | |
Integer | YES | 57 = 0x39 | 1000 | typeId | 0x39 |
---|
bytes | 0x00 0x00 0x03 0xE8 |
| |
Long | YES | 58 = 0x3A | 1000 | typeId | 0x3A |
---|
bytes | 0x00 0x00 0x00 0x00 0x00 0x00 0x03 0xE8 |
| |
Float | YES | 59 = 0x3B | 1000f | typeId | 0x3B |
---|
bytes | 0x44 0x7A 0x00 0x00 |
| |
Double | YES | 60 = 0x3C | 1000d |
typeId | 0x3C |
---|
bytes | 0x40 0xF1 0x40 0x00 0x00 0x00 0x00 0x00 |
| |
ASCII_STRING | YES | 87 = 0x57 | "hello" | typeid | 0x57 |
---|
len | 0x00 0x05 | bytes | 0x68 0x65 0x40 0x40 0x6F |
| This represents ASCII string with maximum length 0xFFFF. Code snippet to serialize and deserialize the string. |
UTF_STRING | YES | 42 = 0x2A | | | This represents UTF string with maximum length 0xFFFF. Code snippet to serialize and deserialize the string. |
HUGE_ASCII_STRING | YES | 88 = 0x58 | | | This represents ASCII string with length greater than 0xFFFF. Code snippet to serialize and deserialize the string. |
HUGE_UTF_STRING | YES | 89 = 0x59 | | | This represents UTF string with length greater than 0xFFFF. Code snippet to serialize and deserialize the string. |
byte[] | This we plan to support | 46 = 0x2E | byte[] {1,2} | typeId | 0x2E |
---|
len | 0x02 | bytes | 0x01 0x02 |
| |
short[] | | 47 = 0x2F | short[] {1,2} | typeId | 0x2F |
---|
len | 0x02 | bytes | 0x00 0x01 0x00 0x02 |
| |
int[] | | 48 = 0x30 | int[] {1,2} | typeId | 0x30 |
---|
len | 0x02 | bytes | 0x00 0x00 0x00 0x01 0x00 0x00 0x00 0x02 |
| |
long[] | | 49 = 0x31 | long[] {1} | typeId | 0x31 |
---|
len | 0x01 | bytes | 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x01 |
| |
float[] | | 50 = 0x32 | float[] {2.0f} | typeId | 0x32 |
---|
len | 0x01 | bytes | 0x40 0x00 0x00 0x00 |
| |
double[] | | 51 = 0x33 | double[] {2.0d} | typeId | 0x33 |
---|
len | 0x01 | bytes | 0x40 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
| |
string[] | | 64 = 0x40 | String[]{"hello", "world"} | typeId | 0x40 |
---|
len | 0x02 | "hello" bytes | 0x57 - ASCII_STRING 0x00 0x05 0x68 0x65 0x6c 0x6c 0x6f | "world" bytes | 0x57 - ASCII_STRING 0x00 0x05 - len 0x77 0x6f 0x72 0x6c 0x64
|
| |
Map | | 67 = 0x43 | Map s = new HashMap<>(); s.put("hello", "world") | typeId | 0x43 |
---|
len | 0x01 | "hello" bytes | 0x57 - ASCII_STRING 0x00 0x05 0x68 0x65 0x6c 0x6c 0x6f | "world" bytes | 0x57 - ASCII_STRING 0x00 0x05 - len 0x77 0x6f 0x72 0x6c 0x64 |
| |
Set | | 66 = 0x42 | Set s = new HashSet(); s.add("hello"); s.add("world");
| typeId | 0x42 |
---|
len | 0x02 | "hello" bytes | 0x57 - ASCII_STRING 0x00 0x05 0x68 0x65 0x6c 0x6c 0x6f | "world" bytes | 0x57 - ASCII_STRING 0x00 0x05 - len 0x77 0x6f 0x72 0x6c 0x64 |
| |
List | | 10 = 0x0a | List s = new LinkedList(); s.add("hello"); s.add("world"); | typeId | 0x0a |
---|
len | 0x02 | "hello" bytes | 0x57 - ASCII_STRING 0x00 0x05 0x68 0x65 0x6c 0x6c 0x6f | "world" bytes | 0x57 - ASCII_STRING 0x00 0x05 - len 0x77 0x6f 0x72 0x6c 0x64 |
| |
ArrayList | | 65=0x41 | List s = new ArrayList(); s.add("hello"); s.add("world"); | typeId | 0x41 |
---|
len | 0x02 | "hello" bytes | 0x57 - ASCII_STRING 0x00 0x05 0x68 0x65 0x6c 0x6c 0x6f | "world" bytes | 0x57 - ASCII_STRING 0x00 0x05 - len 0x77 0x6f 0x72 0x6c 0x64 |
| |
PDX_SERIALIZATION | | 93=0x5D | | | Java Object can implement PdxSerializable interface to serialize data in pdx format |
PDX_SERIALIZER | | | | | The application can implement PdxSerializer interface and then install with geode cache to serialize data in pdx format. |
DATA_SERIALIZATION | | | | | Java Object can implement DataSerializable interface to serialize data. |
USER_SERIALIZATION | | | | | The application can implement DataSerializer interface and then install with geode cache to serialize data. |
JAVA_SERIALIZATIOn | 44= 0x2C | | | | |
Calculate Collection/Array Size
Geode calculates the size of collection or Array in following way
Write Array Length function | Read Array Length Function |
---|
public static final byte NULL_ARRAY = -1; // array is null
/**
* @since GemFire 5.7
*/
private static final byte SHORT_ARRAY_LEN = -2; // array len encoded as unsigned short in next 2
// bytes
/**
* @since GemFire 5.7
*/
public static final byte INT_ARRAY_LEN = -3; // array len encoded as int in next 4 bytes
private static final int MAX_BYTE_ARRAY_LEN = ((byte) -4) & 0xFF;
public static void writeArrayLength(int len, DataOutput out) throws IOException {
if (len == -1) {
out.writeByte(NULL_ARRAY);
} else if (len <= MAX_BYTE_ARRAY_LEN) {
out.writeByte(len);
} else if (len <= 0xFFFF) {
out.writeByte(SHORT_ARRAY_LEN);
out.writeShort(len);
} else {
out.writeByte(INT_ARRAY_LEN);
out.writeInt(len);
}
}
|
public static int readArrayLength(DataInput in) throws IOException {
byte code = in.readByte();
if (code == NULL_ARRAY) {
return -1;
} else {
int result = ubyteToInt(code);
if (result > MAX_BYTE_ARRAY_LEN) {
if (code == SHORT_ARRAY_LEN) {
result = in.readUnsignedShort();
} else if (code == INT_ARRAY_LEN) {
result = in.readInt();
} else {
throw new IllegalStateException("unexpected array length code=" + code);
}
}
return result;
}
}
|
Write And Read String
The String is serialized in following way. It distinguishes ASCII string and its size.
Write String | Read String |
---|
public static void writeString(String value, DataOutput out) throws IOException {
InternalDataSerializer.checkOut(out);
final boolean isDebugEnabled = logger.isTraceEnabled(LogMarker.SERIALIZER);
if (isDebugEnabled) {
logger.trace(LogMarker.SERIALIZER, "Writing String \"{}\"", value);
}
if (value == null) {
if (isDebugEnabled) {
logger.trace(LogMarker.SERIALIZER, "Writing NULL_STRING");
}
out.writeByte(DSCODE.NULL_STRING);
} else {
// [bruce] writeUTF is expensive - it creates a char[] to fetch
// the string's contents, iterates over the array to compute the
// encoded length, creates a byte[] to hold the encoded bytes,
// iterates over the char[] again to create the encode bytes,
// then writes the bytes. Since we usually deal with ISO-8859-1
// strings, we can accelerate this by accessing chars directly
// with charAt and fill a single-byte buffer. If we run into
// a multibyte char, we revert to using writeUTF()
int len = value.length();
int utfLen = len; // added for bug 40932
for (int i = 0; i < len; i++) {
char c = value.charAt(i);
if ((c <= 0x007F) && (c >= 0x0001)) {
// nothing needed
} else if (c > 0x07FF) {
utfLen += 2;
} else {
utfLen += 1;
}
// Note we no longer have an early out when we detect the first
// non-ascii char because we need to compute the utfLen for bug 40932.
// This is not a performance problem because most strings are ascii
// and they never did the early out.
}
boolean writeUTF = utfLen > len;
if (writeUTF) {
if (utfLen > 0xFFFF) {
if (isDebugEnabled) {
logger.trace(LogMarker.SERIALIZER, "Writing utf HUGE_STRING of len={}", len);
}
out.writeByte(DSCODE.HUGE_STRING);
out.writeInt(len);
out.writeChars(value);
} else {
if (isDebugEnabled) {
logger.trace(LogMarker.SERIALIZER, "Writing utf STRING of len={}", len);
}
out.writeByte(DSCODE.STRING);
out.writeUTF(value);
}
} else {
if (len > 0xFFFF) {
if (isDebugEnabled) {
logger.trace(LogMarker.SERIALIZER, "Writing HUGE_STRING_BYTES of len={}", len);
}
out.writeByte(DSCODE.HUGE_STRING_BYTES);
out.writeInt(len);
out.writeBytes(value);
} else {
if (isDebugEnabled) {
logger.trace(LogMarker.SERIALIZER, "Writing STRING_BYTES of len={}", len);
}
out.writeByte(DSCODE.STRING_BYTES);
out.writeShort(len);
out.writeBytes(value);
}
}
}
}
|
public static String readString(DataInput in, byte header) throws IOException {
if (header == DSCODE.STRING_BYTES) {
int len = in.readUnsignedShort();
if (logger.isTraceEnabled(LogMarker.SERIALIZER)) {
logger.trace(LogMarker.SERIALIZER, "Reading STRING_BYTES of len={}", len);
}
byte[] buf = new byte[len];
in.readFully(buf, 0, len);
return new String(buf, 0); // intentionally using deprecated constructor
} else if (header == DSCODE.STRING) {
if (logger.isTraceEnabled(LogMarker.SERIALIZER)) {
logger.trace(LogMarker.SERIALIZER, "Reading utf STRING");
}
return in.readUTF();
} else if (header == DSCODE.NULL_STRING) {
if (logger.isTraceEnabled(LogMarker.SERIALIZER)) {
logger.trace(LogMarker.SERIALIZER, "Reading NULL_STRING");
}
return null;
} else if (header == DSCODE.HUGE_STRING_BYTES) {
int len = in.readInt();
if (logger.isTraceEnabled(LogMarker.SERIALIZER)) {
logger.trace(LogMarker.SERIALIZER, "Reading HUGE_STRING_BYTES of len={}", len);
}
byte[] buf = new byte[len];
in.readFully(buf, 0, len);
return new String(buf, 0); // intentionally using deprecated constructor
} else if (header == DSCODE.HUGE_STRING) {
int len = in.readInt();
if (logger.isTraceEnabled(LogMarker.SERIALIZER)) {
logger.trace(LogMarker.SERIALIZER, "Reading HUGE_STRING of len={}", len);
}
char[] buf = new char[len];
for (int i = 0; i < len; i++) {
buf[i] = in.readChar();
}
return new String(buf);
} else {
String s = "Unknown String header " + header;
throw new IOException(s);
}
}
|