.NET - JSON compression by replacing key names

When serializing using the Newtonsoft.Json library, we can specify a custom ContractResolver to e.g. make the properties appear to have a different name. We can use this possibility to shorten the key names.

Important note: although there can be some specific circumstances when the method described below is suitable, there are many better methods how to reduce JSON size.

Firstly, some test classes:

class Company
{
  public string Name { get; set; }
  public Person Owner { get; set; }
  public List<Person> Employees { get; set; }
}

class Person
{
  public string Name { get; set; }
  public string Surname { get; set; }
  [JsonIgnore]
  public string Details { get; set; }
}

And a test object we will serialize:

var testObject = new Company()
{
  Name = "Company1",
  Owner = new Person() { Name = "a", Surname = "1" },
  Employees = new List<Person>
  {
    new Person() {  Name = "b", Surname = "2"},
    new Person() {  Name = "c", Surname = "3"},
    new Person() {  Name = "d", Surname = "4"},
    new Person() {  Name = "e", Surname = "5"},
    new Person() {  Name = "f", Surname = "6"},
    new Person() {  Name = "g", Surname = "7"},
    new Person() {  Name = "h", Surname = "8"},
    new Person() {  Name = "i", Surname = "9"},
    new Person() {  Name = "j", Surname = "10"},
  }
};

When serializing it using the default method:

JsonConvert.SerializeObject(testObject, Formatting.None);

We will get:

{"Name":"Company1","Owner":{"Name":"a","Surname":"1"},"Employees":[{"Name":"b","Surname":"2"},{"Name":"c","Surname":"3"},{"Name":"d","Surname":"4"},{"Name":"e","Surname":"5"},{"Name":"f","Surname":"6"},{"Name":"g","Surname":"7"},{"Name":"h","Surname":"8"},{"Name":"i","Surname":"9"},{"Name":"j","Surname":"10"}]}

When replacing the key names, we will get a shorter output:

{"A":"Company1","B":{"A":"a","D":"1"},"C":[{"A":"b","D":"2"},{"A":"c","D":"3"},{"A":"d","D":"4"},{"A":"e","D":"5"},{"A":"f","D":"6"},{"A":"g","D":"7"},{"A":"h","D":"8"},{"A":"i","D":"9"},{"A":"j","D":"10"}]};Name,Owner,Employees,Surname,Details

Of course, we have to actually store the original key names to be able to deserialize it again. The key names are listed at the end (after the semicolon). The first key listed here is always translated to A, the second to B, etc. - so the mapping is implicitly defined by the position in the list.

The implementation:

public static class CompressingSerialization
{
  public static string SerializeWithNamesCompression(object o)
  {
    JsonCompressingContext compressingContext = new JsonCompressingContext();
    compressingContext.InitEmpty();
    string s = JsonConvert.SerializeObject(o, new JsonSerializerSettings { ContractResolver = new CustomDataContractResolver(compressingContext) });

    string dictionary = string.Join(",", compressingContext.GetPropertyNames());

    return s + ";" + dictionary;
  }

  public static T DeserializeWithNamesCompression<T>(string str)
  {
    JsonCompressingContext compressingContext = new JsonCompressingContext();

    int dictionaryBegin = str.LastIndexOf(';');
    compressingContext.InitWithPropertyNames(new List<string>(str.Substring(dictionaryBegin + 1).Split(',')));

    string jsonPart = str.Substring(0, dictionaryBegin);
    return JsonConvert.DeserializeObject<T>(jsonPart, new JsonSerializerSettings { ContractResolver = new CustomDataContractResolver(compressingContext) });
  }

  public class JsonCompressingContext
  {
    private List<string> PropertyNames;

    public void InitEmpty()
    {
      PropertyNames = new List<string>();
    }

    public void InitWithPropertyNames(List<string> propertyNames)
    {
      PropertyNames = propertyNames;
    }

    public string RegisterPropertyName(string propertyName)
    {
      int index = PropertyNames.IndexOf(propertyName);
      if (index == -1)
      {
        index = PropertyNames.Count;
        PropertyNames.Add(propertyName);
      }

      return IndexToShortString(index);
    }

    public IList<string> GetPropertyNames()
    {
      return PropertyNames.AsReadOnly();
    }

    private static string IndexToShortString(int index)
    {
      StringBuilder sb = new StringBuilder();

      while (true)
      {
        int current = index % 62;

        if (current < 26)
        {
          sb.Append((char)('A' + current));
        }
        else if (current < 52)
        {
          sb.Append((char)('a' + (current - 26)));
        }
        else
        {
          sb.Append((char)('0' + (current - 52)));
        }

        index /= 62;

        if (index == 0)
        {
          break;
        }
      }

      return sb.ToString();
    }
  }

  public class CustomDataContractResolver : DefaultContractResolver
  {
    private JsonCompressingContext compressingContext;

    public CustomDataContractResolver(JsonCompressingContext compressingContext)
    {
      this.compressingContext = compressingContext;
    }

    protected override JsonProperty CreateProperty(MemberInfo member, MemberSerialization memberSerialization)
    {
      var property = base.CreateProperty(member, memberSerialization);
      property.PropertyName = compressingContext.RegisterPropertyName(property.PropertyName);
      return property;
    }
  }
}

And usage:

var s = CompressingSerialization.SerializeWithNamesCompression(testObject);
var testObject = CompressingSerialization.DeserializeWithNamesCompression<Company>(s);

// instead of:
var s = JsonConvert.SerializeObject((testObject);
var testObject = JsonConvert.DeserializeObject<Company>(s);

This approach worked well on the test data presented at the beginning, mainly because:

  • the data contained an array with a considerable count of objects, so the same property names were repeated more times (so e.g. for shortening 10 occurrences of the same property name we only had to write the original long name once to the mapping list at the end)
  • the values were quite short, so the key names took a considerable amount of space in the whole serialized JSON

Note that the mapping is global for the whole JSON, not only for each specific array. If the same property name appears in some other part of the JSON, the same mapping is used (the name will still appear only once in the mapping list at the end).

When using the implementation of CompressingSerialization, be sure to measure the performance to see if it is suitable for your task. I took a quick lame performance measurement and discovered that it is a lot slower than only using SerializeObject and DeserializeObject. This is mainly because for each usage, a new CustomDataContractResolver is created, so type information in DefaultContractResolver (which is normally cached globally) has to be re-built every time.

Some considerations about how to improve the performance:

  • always re-use the same DefaultContractResolver somehow
  • cache the short property names (at least the most used ones, e.g. “A” to “Z”) to skip creating StringBuilder to always build a new string with only one character

One more problem: in the example above, it registered even the property name Details (it looks like CreateProperty is called for it), even though this property is marked with JsonIgnore and thus never actually used.

Additionally, here is an alternative CompressingSerialization using custom JsonConverter instead of a custom ContractResolver. This implementation is more complicated and it is incomplete (e.g. does not handle all TokenTypes). But it is a good example to demonstrate another way how to influence the behavior of the Newtonsoft.Json library - by overriding another part of the processing.

public static class CompressingSerialization
{
  public static string SerializeWithNamesCompression(object o)
  {
    JsonCompressingContext compressingContext = new JsonCompressingContext();
    compressingContext.InitEmpty();
    string s = JsonConvert.SerializeObject(o, new CustomJsonConverter(compressingContext));

    string dictionary = string.Join(",", compressingContext.GetPropertyNames());

    return s + ";" + dictionary;
  }

  public static T DeserializeWithNamesCompression<T>(string str)
  {
    JsonCompressingContext compressingContext = new JsonCompressingContext();

    int dictionaryBegin = str.LastIndexOf(';');
    compressingContext.InitWithPropertyNames(new List<string>(str.Substring(dictionaryBegin + 1).Split(',')));

    string jsonPart = str.Substring(0, dictionaryBegin);
    return JsonConvert.DeserializeObject<T>(jsonPart, new CustomJsonConverter(compressingContext));
  }

  public class JsonCompressingContext
  {
    private List<string> PropertyNames;

    public void InitEmpty()
    {
      PropertyNames = new List<string>();
    }

    public void InitWithPropertyNames(List<string> propertyNames)
    {
      PropertyNames = propertyNames;
    }

    public string RegisterPropertyName(string propertyName)
    {
      int index = PropertyNames.IndexOf(propertyName);
      if (index == -1)
      {
        index = PropertyNames.Count;
        PropertyNames.Add(propertyName);
      }

      return IndexToShortString(index);
    }

    public string GetPropertyName(string shortString)
    {
      return PropertyNames[ShortStringToIndex(shortString)];
    }

    public IList<string> GetPropertyNames()
    {
      return PropertyNames.AsReadOnly();
    }

    private static string IndexToShortString(int index)
    {
      StringBuilder sb = new StringBuilder();

      while (true)
      {
        int current = index % 62;

        if (current < 26)
        {
          sb.Append((char)('A' + current));
        }
        else if (current < 52)
        {
          sb.Append((char)('a' + (current - 26)));
        }
        else
        {
          sb.Append((char)('0' + (current - 52)));
        }

        index /= 62;

        if (index == 0)
        {
          break;
        }
      }

      return sb.ToString();
    }

    private static int ShortStringToIndex(string shortString)
    {
      int index = 0;

      for (int i = shortString.Length - 1; i >= 0; --i)
      {
        char c = shortString[i];

        index *= 62;
        if (c >= 'A' && c <= 'Z')
        {
          index += c - 'A';
        }
        else if (c >= 'a' && c <= 'z')
        {
          index += 26 + c - 'a';
        }
        else if (c >= '0' && c <= '9')
        {
          index += 52 + c - '0';
        }
        else
        {
          throw new Exception("Invalid character: " + c);
        }
      }

      return index;
    }
  }

  public class CustomJsonConverter : JsonConverter
  {
    private JsonCompressingContext compressingContext;

    public CustomJsonConverter(JsonCompressingContext compressingContext)
    {
      this.compressingContext = compressingContext;
    }

    public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer)
    {
      JToken t = JToken.FromObject(value);

      if (t.Type == JTokenType.Array)
      {
        writer.WriteStartArray();

        JArray a = (JArray)t;

        foreach (var item in a.Children())
        {
          WriteJson(writer, item, serializer);
        }

        writer.WriteEndArray();
      }
      else if (t.Type == JTokenType.Object)
      {
        writer.WriteStartObject();
        JObject o = (JObject)t;
        foreach (var property in o.Properties())
        {
          writer.WritePropertyName(compressingContext.RegisterPropertyName(property.Name).ToString());
          WriteJson(writer, property.Value, serializer);
        }

        writer.WriteEndObject();
      }
      else
      {
        t.WriteTo(writer);
      }
    }

    public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer)
    {
      if (reader.TokenType == JsonToken.StartArray)
      {
        Type itemType = objectType.GenericTypeArguments[0];
        var result = (IList)Activator.CreateInstance(objectType);
        reader.Read();

        while (true)
        {
          if (reader.TokenType == JsonToken.EndArray)
          {
            reader.Read();
            break;
          }
          else
          {
            var item = ReadJson(reader, itemType, existingValue, serializer);
            result.Add(item);
          }
        }

        return result;
      }
      else if (reader.TokenType == JsonToken.StartObject)
      {
        var result = Activator.CreateInstance(objectType);
        reader.Read();

        while (true)
        {
          if (reader.TokenType == JsonToken.EndObject)
          {
            reader.Read();
            break;
          }
          else if (reader.TokenType == JsonToken.PropertyName)
          {
            string name = (string)reader.Value;

            if (name.Length != 1)
            {
              throw new Exception("Invalid property name: " + name);
            }

            var prop = result.GetType().GetProperty(compressingContext.GetPropertyName(name));

            reader.Read();
            var propValue = ReadJson(reader, prop.PropertyType, null, serializer);

            if (prop != null)
            {
              prop.SetValue(result, propValue);
            }
          }
          else
          {
            throw new Exception("Invalid state");
          }
        }

        return result;
      }
      else if (reader.TokenType == JsonToken.String)
      {
        var result = (string)reader.Value;
        reader.Read();
        return result;
      }

      throw new Exception("Unhandled token type: " + reader.TokenType);
    }

    public override bool CanRead
    {
      get { return true; }
    }

    public override bool CanConvert(Type objectType)
    {
      return true;
    }
  }
}
Written on January 11, 2020