Share this page

Learn X in Y minutes

Where X=Protocol Buffers

Protocol buffers are Google's language-neutral, platform-neutral, extensible mechanism for serializing structured data – think XML, but smaller, faster, and simpler. You define how you want your data to be structured once, then you can use special generated source code to easily write and read your structured data to and from a variety of data streams and using a variety of languages. Protocol Buffers are Schema Of Messages. They are language agnostic. They can be converted to binary and converted back to message formats using the code generated by the protoc compiler for various languages.

/*
* Language Syntax
*/

/*
* Specifying Syntax Of Protocol Buffer Version
* Specifying Which Protocol Buffer Version To Use
* It can be usually proto3 or proto2
*/
syntax = "proto3";

/*
* Declaring Message In Protocol Buffer:
* As you can see, each field in the message definition has a unique number.
* These field numbers are used to identify your fields in the message binary format,
* and should not be changed once your message type is in use.
* Note that field numbers in the range 1 through 15 take one byte to encode, including the field number and the field's type (you can find out more about this in Protocol Buffer Encoding).
* Field numbers in the range 16 through 2047 take two bytes. So you should reserve the numbers 1 through 15 for very frequently occurring message elements.
* Remember to leave some room for frequently occurring elements that might be added in the future.
* The smallest field number you can specify is 1, and the largest is 2^29 - 1, or 536,870,911.
* You also cannot use the numbers 19000 through 19999 (FieldDescriptor::kFirstReservedNumber through FieldDescriptor::kLastReservedNumber),
* as they are reserved for the Protocol Buffers implementation - the protocol buffer compiler will complain if you use one of these reserved numbers in your .proto.
* Similarly, you cannot use any previously reserved field numbers.
*
*/

/*
Syntax For Declaring Message:
    message ${MessageName} {
        ${Scalar Value Type} ${FieldName1} = ${Tag Number1};
                .
                .
                .
        ${Scalar Value Type} ${FieldNameN} = ${Tag NumberN};
    }

Default Values Will be applied any case if the message doesn't contain a existing field defined
in the message definition
*/

message MessageTypes {
    /*
    * Scalar Value Types
    */
    string stringType = 1; // A string must always contain UTF-8 encoded or 7-bit ASCII text. Default value = ""

    // Number Types, Default Value = 0
    int32 int32Type = 2; // Uses Variable Length Encoding. Inefficient For Negative Numbers, Instead Use sint32.
    int64 int64Type = 3; // Uses Variable Length Encoding. Inefficient For Negative Numbers, Instead Use sint64.
    uint32 uInt32Type = 4; // Uses Variable Length Encoding
    uint64 uInt64Type = 5; // Uses Variable Length Encoding
    sint32 sInt32Type = 6; // Uses Variable Length Encoding. They are efficient in encoding for negative numbers.
                           // Use this instead of int32 for negative numbers
    sint64 sInt64Type = 7; // Uses Variable Length Encoding. They are efficient in encoding for negative numbers.
    // Use this instead of int64 for negative numbers.

    fixed32 fixed32Type = 8; // Always four bytes. More efficient than uint32 if values are often greater than 2^28.
    fixed64 fixed64Type = 9; // Always eight bytes. More efficient than uint64 if values are often greater than 2^56

    sfixed32 sfixed32Type = 10; // Always four bytes.
    sfixed64 sfixed64Type = 11; // Always Eight bytes.

    bool boolType = 12; // Boolean Type. Default Value = false

    bytes bytesType = 13; // May contain any arbitrary sequence of bytes. Default Value = Empty Bytes

    double doubleType = 14;
    float floatType = 15;

    enum Week {
        UNDEFINED = 0; // Tag 0 is always used as default in case of enum
        SUNDAY = 1;
        MONDAY = 2;
        TUESDAY = 3;
        WEDNESDAY = 4;
        THURSDAY = 5;
        FRIDAY = 6;
        SATURDAY = 7;
    }
    Week wkDayType = 16;

    /*
    * Defining Collection Of Scalar Value Type
    * Syntax: repeated ${ScalarType} ${name} = TagValue
    */
    repeated string listOfString = 17; // List[String]
}

/*
* Defining Defined Message Types In Other Message Definition
*/
message Person {
    string fname = 1;
    string sname = 2;
}

message City {
    Person p = 1;
}

/*
* Nested Message Definitions
*/

message NestedMessages {
    message FirstLevelNestedMessage {
        string firstString = 1;
        message SecondLevelNestedMessage {
            string secondString = 2;
        }
    }
    FirstLevelNestedMessage msg = 1;
    FirstLevelNestedMessage.SecondLevelNestedMessage msg2 = 2;
}

/*
* Importing Message From A File
*/

// one.proto
// message One {
//     string oneMsg = 1;
// }

// two.proto
//  import "myproject/one.proto"
//  message Two {
//       string twoMsg = 2;
//  }


/*
* Advanced Topics
*/

/*
* Handling Message Type Changes:
* Never Change/Use The TagNumber Of A Message Field Which Was Removed
* We should use reserved in case of message definition update.
* (https://developers.google.com/protocol-buffers/docs/proto3#updating)
*/

/*
* Reserved Fields
* It's used in case if we need to add/remove new fields into message.
* Using Reserved Backward and Forward Compatibility Of Messages can be achieved
*/


message ReservedMessage {
    reserved 0, 1, 2, 3 to 10; // Set Of Tag Numbers Which Can't be reused.
    reserved "firstMsg", "secondMsg", "thirdMsg"; // Set Of Labels Which Can't Be reused.
}

/*
* Any
* The Any message type lets you use messages as embedded types without having their .proto definition.
* An Any contains an arbitrary serialized message as bytes,
* along with a URL that acts as a globally unique identifier for and resolves to that message's type.
* For Any to work we need to import it as shown below.
*/
/*
    import "google/protobuf/any.proto";
    message AnySampleMessage {
        repeated google.protobuf.Any.details = 1;
    }

*/


/*
*  OneOf
* There are cases, wherein only one field at-most might be present as part of the message.
* Note: OneOf messages can't be repeated.
*/

message OneOfMessage {
    oneof msg {
        string fname = 1;
        string sname = 2;
    };
}

/*
* Maps
* Map fields cannot be repeated.
* Ordering Of A Map Is Not Guaranteed.
*/

message MessageWithMaps {
    map<string, string> mapOfMessages = 1;
}


/*
* Packages
* Used for preventing name clashes between protocol message types
* Syntax:
    package ${packageName};

    To Access the package;
    ${packageName}.${messageName} = ${tagNumber};
*/

/*
* Services
* Message Types Defined For Using In RPC system.
*  When protoc compiler generates for various languages it generates stub methods for the services.
*/

message SearchRequest {
    string queryString = 1;
}

message SearchResponse {
    string queryResponse = 1;
}
service SearchService {
    rpc Search (SearchRequest) returns (SearchResponse);
}

Generating Classes In Various Languages For Protocol Buffers

protoc --proto_path=IMPORT_PATH --cpp_out=DST_DIR --java_out=DST_DIR --python_out=DST_DIR --go_out=DST_DIR --ruby_out=DST_DIR --objc_out=DST_DIR --csharp_out=DST_DIR path/to/file.proto

References

Google Protocol Buffers


Got a suggestion? A correction, perhaps? Open an Issue on the GitHub Repo, or make a pull request yourself!

Originally contributed by Shankar Shastri, and updated by 2 contributors.