blob: f441c89c830c615f39eb039ac3c000dfb4583e1d [file] [log] [blame]
// Amalgamated source file
** This file contains shared definitions that are widely used across upb.
** This is a mixed C/C++ interface that offers a full API to both languages.
** See the top-level README for more information.
#ifndef UPB_H_
#define UPB_H_
#include <assert.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stddef.h>
#ifdef __cplusplus
namespace upb {
class Allocator;
class Arena;
class Environment;
class ErrorSpace;
class Status;
template <int N> class InlinedArena;
template <int N> class InlinedEnvironment;
/* UPB_INLINE: inline if possible, emit standalone code if required. */
#ifdef __cplusplus
#define UPB_INLINE inline
#elif defined (__GNUC__)
#define UPB_INLINE static __inline__
#define UPB_INLINE static
/* Hints to the compiler about likely/unlikely branches. */
#define UPB_LIKELY(x) __builtin_expect((x),1)
/* Define UPB_BIG_ENDIAN manually if you're on big endian and your compiler
* doesn't provide these preprocessor symbols. */
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
/* Macros for function attributes on compilers that support them. */
#ifdef __GNUC__
#define UPB_FORCEINLINE __inline__ __attribute__((always_inline))
#define UPB_NOINLINE __attribute__((noinline))
#define UPB_NORETURN __attribute__((__noreturn__))
#else /* !defined(__GNUC__) */
#if __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L
/* C99/C++11 versions. */
#include <stdio.h>
#define _upb_snprintf snprintf
#define _upb_vsnprintf vsnprintf
#define _upb_va_copy(a, b) va_copy(a, b)
#elif defined __GNUC__
/* A few hacky workarounds for functions not in C89.
* For internal use only!
* TODO(haberman): fix these by including our own implementations, or finding
* another workaround.
#define _upb_snprintf __builtin_snprintf
#define _upb_vsnprintf __builtin_vsnprintf
#define _upb_va_copy(a, b) __va_copy(a, b)
#error Need implementations of [v]snprintf and va_copy
#if ((defined(__cplusplus) && __cplusplus >= 201103L) || \
defined(__GXX_EXPERIMENTAL_CXX0X__)) && !defined(UPB_NO_CXX11)
#define UPB_CXX11
* Declare these in the "private" section of a C++ class to forbid copy/assign
* or all POD ops (construct, destruct, copy, assign) on that class. */
#ifdef UPB_CXX11
#include <type_traits>
#define UPB_DISALLOW_COPY_AND_ASSIGN(class_name) \
class_name(const class_name&) = delete; \
void operator=(const class_name&) = delete;
#define UPB_DISALLOW_POD_OPS(class_name, full_class_name) \
class_name() = delete; \
~class_name() = delete; \
#define UPB_ASSERT_STDLAYOUT(type) \
static_assert(std::is_standard_layout<type>::value, \
#type " must be standard layout");
#define UPB_FINAL final
#else /* !defined(UPB_CXX11) */
#define UPB_DISALLOW_COPY_AND_ASSIGN(class_name) \
class_name(const class_name&); \
void operator=(const class_name&);
#define UPB_DISALLOW_POD_OPS(class_name, full_class_name) \
class_name(); \
~class_name(); \
#define UPB_FINAL
* Macros for declaring C and C++ types both, including inheritance.
* The inheritance doesn't use real C++ inheritance, to stay compatible with C.
* These macros also provide upcasts:
* - in C: types-specific functions (ie. upb_foo_upcast(foo))
* - in C++: upb::upcast(foo) along with implicit conversions
* Downcasts are not provided, but upb/def.h defines downcasts for upb::Def. */
#define UPB_C_UPCASTS(ty, base) \
UPB_INLINE base *ty ## _upcast_mutable(ty *p) { return (base*)p; } \
UPB_INLINE const base *ty ## _upcast(const ty *p) { return (const base*)p; }
#define UPB_C_UPCASTS2(ty, base, base2) \
UPB_C_UPCASTS(ty, base) \
UPB_INLINE base2 *ty ## _upcast2_mutable(ty *p) { return (base2*)p; } \
UPB_INLINE const base2 *ty ## _upcast2(const ty *p) { return (const base2*)p; }
#ifdef __cplusplus
#define UPB_BEGIN_EXTERN_C extern "C" {
#define UPB_END_EXTERN_C }
#define UPB_PRIVATE_FOR_CPP private:
#define UPB_DECLARE_TYPE(cppname, cname) typedef cppname cname;
#define UPB_DECLARE_DERIVED_TYPE(cppname, cppbase, cname, cbase) \
UPB_DECLARE_TYPE(cppname, cname) \
UPB_C_UPCASTS(cname, cbase) \
namespace upb { \
template <> \
class Pointer<cppname> : public PointerBase<cppname, cppbase> { \
public: \
explicit Pointer(cppname* ptr) \
: PointerBase<cppname, cppbase>(ptr) {} \
}; \
template <> \
class Pointer<const cppname> \
: public PointerBase<const cppname, const cppbase> { \
public: \
explicit Pointer(const cppname* ptr) \
: PointerBase<const cppname, const cppbase>(ptr) {} \
}; \
#define UPB_DECLARE_DERIVED_TYPE2(cppname, cppbase, cppbase2, cname, cbase, \
cbase2) \
UPB_DECLARE_TYPE(cppname, cname) \
UPB_C_UPCASTS2(cname, cbase, cbase2) \
namespace upb { \
template <> \
class Pointer<cppname> : public PointerBase2<cppname, cppbase, cppbase2> { \
public: \
explicit Pointer(cppname* ptr) \
: PointerBase2<cppname, cppbase, cppbase2>(ptr) {} \
}; \
template <> \
class Pointer<const cppname> \
: public PointerBase2<const cppname, const cppbase, const cppbase2> { \
public: \
explicit Pointer(const cppname* ptr) \
: PointerBase2<const cppname, const cppbase, const cppbase2>(ptr) {} \
}; \
#else /* !defined(__cplusplus) */
#define UPB_DECLARE_TYPE(cppname, cname) \
struct cname; \
typedef struct cname cname;
#define UPB_DECLARE_DERIVED_TYPE(cppname, cppbase, cname, cbase) \
UPB_DECLARE_TYPE(cppname, cname) \
UPB_C_UPCASTS(cname, cbase)
#define UPB_DECLARE_DERIVED_TYPE2(cppname, cppbase, cppbase2, \
cname, cbase, cbase2) \
UPB_DECLARE_TYPE(cppname, cname) \
UPB_C_UPCASTS2(cname, cbase, cbase2)
#endif /* defined(__cplusplus) */
#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
#define UPB_UNUSED(var) (void)var
/* UPB_ASSERT(): in release mode, we use the expression without letting it be
* evaluated. This prevents "unused variable" warnings. */
#ifdef NDEBUG
#define UPB_ASSERT(expr) do {} while (false && (expr))
#define UPB_ASSERT(expr) assert(expr)
/* UPB_ASSERT_DEBUGVAR(): assert that uses functions or variables that only
* exist in debug mode. This turns into regular assert. */
#define UPB_ASSERT_DEBUGVAR(expr) assert(expr)
#ifdef __GNUC__
#define UPB_UNREACHABLE() do { assert(0); __builtin_unreachable(); } while(0)
#define UPB_UNREACHABLE() do { assert(0); } while(0)
/* Generic function type. */
typedef void upb_func();
/* C++ Casts ******************************************************************/
#ifdef __cplusplus
namespace upb {
template <class T> class Pointer;
/* Casts to a subclass. The caller must know that cast is correct; an
* incorrect cast will throw an assertion failure in debug mode.
* Example:
* upb::Def* def = GetDef();
* // Assert-fails if this was not actually a MessageDef.
* upb::MessgeDef* md = upb::down_cast<upb::MessageDef>(def);
* Note that downcasts are only defined for some types (at the moment you can
* only downcast from a upb::Def to a specific Def type). */
template<class To, class From> To down_cast(From* f);
/* Casts to a subclass. If the class does not actually match the given To type,
* returns NULL.
* Example:
* upb::Def* def = GetDef();
* // md will be NULL if this was not actually a MessageDef.
* upb::MessgeDef* md = upb::down_cast<upb::MessageDef>(def);
* Note that dynamic casts are only defined for some types (at the moment you
* can only downcast from a upb::Def to a specific Def type).. */
template<class To, class From> To dyn_cast(From* f);
/* Casts to any base class, or the type itself (ie. can be a no-op).
* Example:
* upb::MessageDef* md = GetDef();
* // This will fail to compile if this wasn't actually a base class.
* upb::Def* def = upb::upcast(md);
template <class T> inline Pointer<T> upcast(T *f) { return Pointer<T>(f); }
/* Attempt upcast to specific base class.
* Example:
* upb::MessageDef* md = GetDef();
* upb::upcast_to<upb::Def>(md)->MethodOnDef();
template <class T, class F> inline T* upcast_to(F *f) {
return static_cast<T*>(upcast(f));
/* PointerBase<T>: implementation detail of upb::upcast().
* It is implicitly convertable to pointers to the Base class(es).
template <class T, class Base>
class PointerBase {
explicit PointerBase(T* ptr) : ptr_(ptr) {}
operator T*() { return ptr_; }
operator Base*() { return (Base*)ptr_; }
T* ptr_;
template <class T, class Base, class Base2>
class PointerBase2 : public PointerBase<T, Base> {
explicit PointerBase2(T* ptr) : PointerBase<T, Base>(ptr) {}
operator Base2*() { return Pointer<Base>(*this); }
/* A list of types as they are encoded on-the-wire. */
typedef enum {
} upb_wiretype_t;
/* upb::ErrorSpace ************************************************************/
/* A upb::ErrorSpace represents some domain of possible error values. This lets
* upb::Status attach specific error codes to operations, like POSIX/C errno,
* Win32 error codes, etc. Clients who want to know the very specific error
* code can check the error space and then know the type of the integer code.
* NOTE: upb::ErrorSpace is currently not used and should be considered
* experimental. It is important primarily in cases where upb is performing
* I/O, but upb doesn't currently have any components that do this. */
UPB_DECLARE_TYPE(upb::ErrorSpace, upb_errorspace)
#ifdef __cplusplus
class upb::ErrorSpace {
struct upb_errorspace {
const char *name;
/* upb::Status ****************************************************************/
/* upb::Status represents a success or failure status and error message.
* It owns no resources and allocates no memory, so it should work
* even in OOM situations. */
UPB_DECLARE_TYPE(upb::Status, upb_status)
/* The maximum length of an error message before it will get truncated. */
const char *upb_status_errmsg(const upb_status *status);
bool upb_ok(const upb_status *status);
upb_errorspace *upb_status_errspace(const upb_status *status);
int upb_status_errcode(const upb_status *status);
/* Any of the functions that write to a status object allow status to be NULL,
* to support use cases where the function's caller does not care about the
* status message. */
void upb_status_clear(upb_status *status);
void upb_status_seterrmsg(upb_status *status, const char *msg);
void upb_status_seterrf(upb_status *status, const char *fmt, ...);
void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args);
void upb_status_copy(upb_status *to, const upb_status *from);
#ifdef __cplusplus
class upb::Status {
Status() { upb_status_clear(this); }
/* Returns true if there is no error. */
bool ok() const { return upb_ok(this); }
/* Optional error space and code, useful if the caller wants to
* programmatically check the specific kind of error. */
ErrorSpace* error_space() { return upb_status_errspace(this); }
int error_code() const { return upb_status_errcode(this); }
/* The returned string is invalidated by any other call into the status. */
const char *error_message() const { return upb_status_errmsg(this); }
/* The error message will be truncated if it is longer than
void SetErrorMessage(const char* msg) { upb_status_seterrmsg(this, msg); }
void SetFormattedErrorMessage(const char* fmt, ...) {
va_list args;
va_start(args, fmt);
upb_status_vseterrf(this, fmt, args);
/* Resets the status to a successful state with no message. */
void Clear() { upb_status_clear(this); }
void CopyFrom(const Status& other) { upb_status_copy(this, &other); }
struct upb_status {
bool ok_;
/* Specific status code defined by some error space (optional). */
int code_;
upb_errorspace *error_space_;
/* TODO(haberman): add file/line of error? */
/* Error message; NULL-terminated. */
#define UPB_STATUS_INIT {true, 0, NULL, {0}}
/** Built-in error spaces. ****************************************************/
/* Errors raised by upb that we want to be able to detect programmatically. */
typedef enum {
UPB_NOMEM /* Can't reuse ENOMEM because it is POSIX, not ISO C. */
} upb_errcode_t;
extern upb_errorspace upb_upberr;
void upb_upberr_setoom(upb_status *s);
/* Since errno is defined by standard C, we define an error space for it in
* core upb. Other error spaces should be defined in other, platform-specific
* modules. */
extern upb_errorspace upb_errnoerr;
/** upb::Allocator ************************************************************/
/* A upb::Allocator is a possibly-stateful allocator object.
* It could either be an arena allocator (which doesn't require individual
* free() calls) or a regular malloc() (which does). The client must therefore
* free memory unless it knows that the allocator is an arena allocator. */
UPB_DECLARE_TYPE(upb::Allocator, upb_alloc)
/* A malloc()/free() function.
* If "size" is 0 then the function acts like free(), otherwise it acts like
* realloc(). Only "oldsize" bytes from a previous allocation are preserved. */
typedef void *upb_alloc_func(upb_alloc *alloc, void *ptr, size_t oldsize,
size_t size);
#ifdef __cplusplus
class upb::Allocator UPB_FINAL {
Allocator() {}
struct upb_alloc {
#endif /* __cplusplus */
upb_alloc_func *func;
UPB_INLINE void *upb_malloc(upb_alloc *alloc, size_t size) {
return alloc->func(alloc, NULL, 0, size);
UPB_INLINE void *upb_realloc(upb_alloc *alloc, void *ptr, size_t oldsize,
size_t size) {
return alloc->func(alloc, ptr, oldsize, size);
UPB_INLINE void upb_free(upb_alloc *alloc, void *ptr) {
alloc->func(alloc, ptr, 0, 0);
/* The global allocator used by upb. Uses the standard malloc()/free(). */
extern upb_alloc upb_alloc_global;
/* Functions that hard-code the global malloc.
* We still get benefit because we can put custom logic into our global
* allocator, like injecting out-of-memory faults in debug/testing builds. */
UPB_INLINE void *upb_gmalloc(size_t size) {
return upb_malloc(&upb_alloc_global, size);
UPB_INLINE void *upb_grealloc(void *ptr, size_t oldsize, size_t size) {
return upb_realloc(&upb_alloc_global, ptr, oldsize, size);
UPB_INLINE void upb_gfree(void *ptr) {
upb_free(&upb_alloc_global, ptr);
/* upb::Arena *****************************************************************/
/* upb::Arena is a specific allocator implementation that uses arena allocation.
* The user provides an allocator that will be used to allocate the underlying
* arena blocks. Arenas by nature do not require the individual allocations
* to be freed. However the Arena does allow users to register cleanup
* functions that will run when the arena is destroyed.
* A upb::Arena is *not* thread-safe.
* You could write a thread-safe arena allocator that satisfies the
* upb::Allocator interface, but it would not be as efficient for the
* single-threaded case. */
UPB_DECLARE_TYPE(upb::Arena, upb_arena)
typedef void upb_cleanup_func(void *ud);
#define UPB_ARENA_BLOCK_OVERHEAD (sizeof(size_t)*4)
void upb_arena_init(upb_arena *a);
void upb_arena_init2(upb_arena *a, void *mem, size_t n, upb_alloc *alloc);
void upb_arena_uninit(upb_arena *a);
bool upb_arena_addcleanup(upb_arena *a, upb_cleanup_func *func, void *ud);
size_t upb_arena_bytesallocated(const upb_arena *a);
void upb_arena_setnextblocksize(upb_arena *a, size_t size);
void upb_arena_setmaxblocksize(upb_arena *a, size_t size);
UPB_INLINE upb_alloc *upb_arena_alloc(upb_arena *a) { return (upb_alloc*)a; }
#ifdef __cplusplus
class upb::Arena {
/* A simple arena with no initial memory block and the default allocator. */
Arena() { upb_arena_init(this); }
/* Constructs an arena with the given initial block which allocates blocks
* with the given allocator. The given allocator must outlive the Arena.
* If you pass NULL for the allocator it will default to the global allocator
* upb_alloc_global, and NULL/0 for the initial block will cause there to be
* no initial block. */
Arena(void *mem, size_t len, Allocator* a) {
upb_arena_init2(this, mem, len, a);
~Arena() { upb_arena_uninit(this); }
/* Sets the size of the next block the Arena will request (unless the
* requested allocation is larger). Each block will double in size until the
* max limit is reached. */
void SetNextBlockSize(size_t size) { upb_arena_setnextblocksize(this, size); }
/* Sets the maximum block size. No blocks larger than this will be requested
* from the underlying allocator unless individual arena allocations are
* larger. */
void SetMaxBlockSize(size_t size) { upb_arena_setmaxblocksize(this, size); }
/* Allows this arena to be used as a generic allocator.
* The arena does not need free() calls so when using Arena as an allocator
* it is safe to skip them. However they are no-ops so there is no harm in
* calling free() either. */
Allocator* allocator() { return upb_arena_alloc(this); }
/* Add a cleanup function to run when the arena is destroyed.
* Returns false on out-of-memory. */
bool AddCleanup(upb_cleanup_func* func, void* ud) {
return upb_arena_addcleanup(this, func, ud);
/* Total number of bytes that have been allocated. It is undefined what
* Realloc() does to this counter. */
size_t BytesAllocated() const {
return upb_arena_bytesallocated(this);
struct upb_arena {
#endif /* __cplusplus */
/* We implement the allocator interface.
* This must be the first member of upb_arena! */
upb_alloc alloc;
/* Allocator to allocate arena blocks. We are responsible for freeing these
* when we are destroyed. */
upb_alloc *block_alloc;
size_t bytes_allocated;
size_t next_block_size;
size_t max_block_size;
/* Linked list of blocks. Points to an arena_block, defined in env.c */
void *block_head;
/* Cleanup entries. Pointer to a cleanup_ent, defined in env.c */
void *cleanup_head;
/* For future expansion, since the size of this struct is exposed to users. */
void *future1;
void *future2;
/* upb::Environment ***********************************************************/
/* A upb::Environment provides a means for injecting malloc and an
* error-reporting callback into encoders/decoders. This allows them to be
* independent of nearly all assumptions about their actual environment.
* It is also a container for allocating the encoders/decoders themselves that
* insulates clients from knowing their actual size. This provides ABI
* compatibility even if the size of the objects change. And this allows the
* structure definitions to be in the .c files instead of the .h files, making
* the .h files smaller and more readable.
* We might want to consider renaming this to "Pipeline" if/when the concept of
* a pipeline element becomes more formalized. */
UPB_DECLARE_TYPE(upb::Environment, upb_env)
/* A function that receives an error report from an encoder or decoder. The
* callback can return true to request that the error should be recovered, but
* if the error is not recoverable this has no effect. */
typedef bool upb_error_func(void *ud, const upb_status *status);
void upb_env_init(upb_env *e);
void upb_env_init2(upb_env *e, void *mem, size_t n, upb_alloc *alloc);
void upb_env_uninit(upb_env *e);
void upb_env_initonly(upb_env *e);
UPB_INLINE upb_arena *upb_env_arena(upb_env *e) { return (upb_arena*)e; }
bool upb_env_ok(const upb_env *e);
void upb_env_seterrorfunc(upb_env *e, upb_error_func *func, void *ud);
/* Convenience wrappers around the methods of the contained arena. */
void upb_env_reporterrorsto(upb_env *e, upb_status *s);
bool upb_env_reporterror(upb_env *e, const upb_status *s);
void *upb_env_malloc(upb_env *e, size_t size);
void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size);
void upb_env_free(upb_env *e, void *ptr);
bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud);
size_t upb_env_bytesallocated(const upb_env *e);
#ifdef __cplusplus
class upb::Environment {
/* The given Arena must outlive this environment. */
Environment() { upb_env_initonly(this); }
Environment(void *mem, size_t len, Allocator *a) : arena_(mem, len, a) {
Arena* arena() { return upb_env_arena(this); }
/* Set a custom error reporting function. */
void SetErrorFunction(upb_error_func* func, void* ud) {
upb_env_seterrorfunc(this, func, ud);
/* Set the error reporting function to simply copy the status to the given
* status and abort. */
void ReportErrorsTo(Status* status) { upb_env_reporterrorsto(this, status); }
/* Returns true if all allocations and AddCleanup() calls have succeeded,
* and no errors were reported with ReportError() (except ones that recovered
* successfully). */
bool ok() const { return upb_env_ok(this); }
/* Reports an error to this environment's callback, returning true if
* the caller should try to recover. */
bool ReportError(const Status* status) {
return upb_env_reporterror(this, status);
struct upb_env {
#endif /* __cplusplus */
upb_arena arena_;
upb_error_func *error_func_;
void *error_ud_;
bool ok_;
/* upb::InlinedArena **********************************************************/
/* upb::InlinedEnvironment ****************************************************/
/* upb::InlinedArena and upb::InlinedEnvironment seed their arenas with a
* predefined amount of memory. No heap memory will be allocated until the
* initial block is exceeded.
* These types only exist in C++ */
#ifdef __cplusplus
template <int N> class upb::InlinedArena : public upb::Arena {
InlinedArena() : Arena(initial_block_, N, NULL) {}
explicit InlinedArena(Allocator* a) : Arena(initial_block_, N, a) {}
char initial_block_[N + UPB_ARENA_BLOCK_OVERHEAD];
template <int N> class upb::InlinedEnvironment : public upb::Environment {
InlinedEnvironment() : Environment(initial_block_, N, NULL) {}
explicit InlinedEnvironment(Allocator *a)
: Environment(initial_block_, N, a) {}
char initial_block_[N + UPB_ARENA_BLOCK_OVERHEAD];
#endif /* __cplusplus */
#endif /* UPB_H_ */
** upb_decode: parsing into a upb_msg using a upb_msglayout.
#ifndef UPB_DECODE_H_
#define UPB_DECODE_H_
** upb::Message is a representation for protobuf messages.
** However it differs from other common representations like
** google::protobuf::Message in one key way: it does not prescribe any
** ownership between messages and submessages, and it relies on the
** client to delete each message/submessage/array/map at the appropriate
** time.
** A client can access a upb::Message without knowing anything about
** ownership semantics, but to create or mutate a message a user needs
** to implement the memory management themselves.
** Currently all messages, arrays, and maps store a upb_alloc* internally.
** Mutating operations use this when they require dynamically-allocated
** memory. We could potentially eliminate this size overhead later by
** letting the user flip a bit on the factory that prevents this from
** being stored. The user would then need to use separate functions where
** the upb_alloc* is passed explicitly. However for handlers to populate
** such structures, they would need a place to store this upb_alloc* during
** parsing; upb_handlers don't currently have a good way to accommodate this.
** TODO: UTF-8 checking?
#ifndef UPB_MSG_H_
#define UPB_MSG_H_
** Defs are upb's internal representation of the constructs that can appear
** in a .proto file:
** - upb::MessageDef (upb_msgdef): describes a "message" construct.
** - upb::FieldDef (upb_fielddef): describes a message field.
** - upb::FileDef (upb_filedef): describes a .proto file and its defs.
** - upb::EnumDef (upb_enumdef): describes an enum.
** - upb::OneofDef (upb_oneofdef): describes a oneof.
** - upb::Def (upb_def): base class of all the others.
** TODO: definitions of services.
** Like upb_refcounted objects, defs are mutable only until frozen, and are
** only thread-safe once frozen.
** This is a mixed C/C++ interface that offers a full API to both languages.
** See the top-level README for more information.
#ifndef UPB_DEF_H_
#define UPB_DEF_H_
** upb::RefCounted (upb_refcounted)
** A refcounting scheme that supports circular refs. It accomplishes this by
** partitioning the set of objects into groups such that no cycle spans groups;
** we can then reference-count the group as a whole and ignore refs within the
** group. When objects are mutable, these groups are computed very
** conservatively; we group any objects that have ever had a link between them.
** When objects are frozen, we compute strongly-connected components which
** allows us to be precise and only group objects that are actually cyclic.
** This is a mixed C/C++ interface that offers a full API to both languages.
** See the top-level README for more information.
** upb_table
** This header is INTERNAL-ONLY! Its interfaces are not public or stable!
** This file defines very fast int->upb_value (inttable) and string->upb_value
** (strtable) hash tables.
** The table uses chained scatter with Brent's variation (inspired by the Lua
** implementation of hash tables). The hash function for strings is Austin
** Appleby's "MurmurHash."
** The inttable uses uintptr_t as its key, which guarantees it can be used to
** store pointers or integers of at least 32 bits (upb isn't really useful on
** systems where sizeof(void*) < 4).
** The table must be homogenous (all values of the same type). In debug
** mode, we check this on insert and lookup.
#ifndef UPB_TABLE_H_
#define UPB_TABLE_H_
#include <stdint.h>
#include <string.h>
#ifdef __cplusplus
extern "C" {
/* upb_value ******************************************************************/
/* A tagged union (stored untagged inside the table) so that we can check that
* clients calling table accessors are correctly typed without having to have
* an explosion of accessors. */
typedef enum {
} upb_ctype_t;
typedef struct {
uint64_t val;
#ifndef NDEBUG
/* In debug mode we carry the value type around also so we can check accesses
* to be sure the right member is being read. */
upb_ctype_t ctype;
} upb_value;
#ifdef NDEBUG
#define SET_TYPE(dest, val) UPB_UNUSED(val)
#define SET_TYPE(dest, val) dest = val
/* Like strdup(), which isn't always available since it's not ANSI C. */
char *upb_strdup(const char *s, upb_alloc *a);
/* Variant that works with a length-delimited rather than NULL-delimited string,
* as supported by strtable. */
char *upb_strdup2(const char *s, size_t len, upb_alloc *a);
UPB_INLINE char *upb_gstrdup(const char *s) {
return upb_strdup(s, &upb_alloc_global);
UPB_INLINE void _upb_value_setval(upb_value *v, uint64_t val,
upb_ctype_t ctype) {
v->val = val;
SET_TYPE(v->ctype, ctype);
UPB_INLINE upb_value _upb_value_val(uint64_t val, upb_ctype_t ctype) {
upb_value ret;
_upb_value_setval(&ret, val, ctype);
return ret;
/* For each value ctype, define the following set of functions:
* // Get/set an int32 from a upb_value.
* int32_t upb_value_getint32(upb_value val);
* void upb_value_setint32(upb_value *val, int32_t cval);
* // Construct a new upb_value from an int32.
* upb_value upb_value_int32(int32_t val); */
#define FUNCS(name, membername, type_t, converter, proto_type) \
UPB_INLINE void upb_value_set ## name(upb_value *val, type_t cval) { \
val->val = (converter)cval; \
SET_TYPE(val->ctype, proto_type); \
} \
UPB_INLINE upb_value upb_value_ ## name(type_t val) { \
upb_value ret; \
upb_value_set ## name(&ret, val); \
return ret; \
} \
UPB_INLINE type_t upb_value_get ## name(upb_value val) { \
UPB_ASSERT_DEBUGVAR(val.ctype == proto_type); \
return (type_t)(converter)val.val; \
FUNCS(int32, int32, int32_t, int32_t, UPB_CTYPE_INT32)
FUNCS(int64, int64, int64_t, int64_t, UPB_CTYPE_INT64)
FUNCS(uint32, uint32, uint32_t, uint32_t, UPB_CTYPE_UINT32)
FUNCS(uint64, uint64, uint64_t, uint64_t, UPB_CTYPE_UINT64)
FUNCS(bool, _bool, bool, bool, UPB_CTYPE_BOOL)
FUNCS(cstr, cstr, char*, uintptr_t, UPB_CTYPE_CSTR)
FUNCS(ptr, ptr, void*, uintptr_t, UPB_CTYPE_PTR)
FUNCS(constptr, constptr, const void*, uintptr_t, UPB_CTYPE_CONSTPTR)
FUNCS(fptr, fptr, upb_func*, uintptr_t, UPB_CTYPE_FPTR)
#undef FUNCS
UPB_INLINE void upb_value_setfloat(upb_value *val, float cval) {
memcpy(&val->val, &cval, sizeof(cval));
UPB_INLINE void upb_value_setdouble(upb_value *val, double cval) {
memcpy(&val->val, &cval, sizeof(cval));
UPB_INLINE upb_value upb_value_float(float cval) {
upb_value ret;
upb_value_setfloat(&ret, cval);
return ret;
UPB_INLINE upb_value upb_value_double(double cval) {
upb_value ret;
upb_value_setdouble(&ret, cval);
return ret;
#undef SET_TYPE
/* upb_tabkey *****************************************************************/
/* Either:
* 1. an actual integer key, or
* 2. a pointer to a string prefixed by its uint32_t length, owned by us.
* ...depending on whether this is a string table or an int table. We would
* make this a union of those two types, but C89 doesn't support statically
* initializing a non-first union member. */
typedef uintptr_t upb_tabkey;
#define UPB_TABKEY_NUM(n) n
/* The preprocessor isn't quite powerful enough to turn the compile-time string
* length into a byte-wise string representation, so code generation needs to
* help it along.
* "len1" is the low byte and len4 is the high byte. */
#define UPB_TABKEY_STR(len1, len2, len3, len4, strval) \
(uintptr_t)(len4 len3 len2 len1 strval)
#define UPB_TABKEY_STR(len1, len2, len3, len4, strval) \
(uintptr_t)(len1 len2 len3 len4 strval)
UPB_INLINE char *upb_tabstr(upb_tabkey key, uint32_t *len) {
char* mem = (char*)key;
if (len) memcpy(len, mem, sizeof(*len));
return mem + sizeof(*len);
/* upb_tabval *****************************************************************/
#ifdef __cplusplus
/* Status initialization not supported.
* This separate definition is necessary because in C++, UINTPTR_MAX isn't
* reliably available. */
typedef struct {
uint64_t val;
} upb_tabval;
/* C -- supports static initialization, but to support static initialization of
* both integers and points for both 32 and 64 bit targets, it takes a little
* bit of doing. */
#if UINTPTR_MAX == 0xffffffffffffffffULL
#define UPB_PTR_IS_64BITS
#elif UINTPTR_MAX != 0xffffffff
#error Could not determine how many bits pointers are.
typedef union {
/* For static initialization.
* Unfortunately this ugliness is necessary -- it is the only way that we can,
* with -std=c89 -pedantic, statically initialize this to either a pointer or
* an integer on 32-bit platforms. */
struct {
#ifdef UPB_PTR_IS_64BITS
uintptr_t val;
uintptr_t val1;
uintptr_t val2;
} staticinit;
/* The normal accessor that we use for everything at runtime. */
uint64_t val;
} upb_tabval;
#ifdef UPB_PTR_IS_64BITS
#define UPB_TABVALUE_INT_INIT(v) {{v}}
/* 32-bit pointers */
#define UPB_TABVALUE_INT_INIT(v) {{0, v}}
#define UPB_TABVALUE_EMPTY_INIT {{-1, -1}}
#define UPB_TABVALUE_INT_INIT(v) {{v, 0}}
#define UPB_TABVALUE_EMPTY_INIT {{-1, -1}}
#undef UPB_PTR_IS_64BITS
#endif /* __cplusplus */
/* upb_table ******************************************************************/
typedef struct _upb_tabent {
upb_tabkey key;
upb_tabval val;
/* Internal chaining. This is const so we can create static initializers for
* tables. We cast away const sometimes, but *only* when the containing
* upb_table is known to be non-const. This requires a bit of care, but
* the subtlety is confined to table.c. */
const struct _upb_tabent *next;
} upb_tabent;
typedef struct {
size_t count; /* Number of entries in the hash part. */
size_t mask; /* Mask to turn hash value -> bucket. */
upb_ctype_t ctype; /* Type of all values. */
uint8_t size_lg2; /* Size of the hashtable part is 2^size_lg2 entries. */
/* Hash table entries.
* Making this const isn't entirely accurate; what we really want is for it to
* have the same const-ness as the table it's inside. But there's no way to
* declare that in C. So we have to make it const so that we can statically
* initialize const hash tables. Then we cast away const when we have to.
const upb_tabent *entries;
#ifndef NDEBUG
/* This table's allocator. We make the user pass it in to every relevant
* function and only use this to check it in debug mode. We do this solely
* to keep upb_table as small as possible. This might seem slightly paranoid
* but the plan is to use upb_table for all map fields and extension sets in
* a forthcoming message representation, so there could be a lot of these.
* If this turns out to be too annoying later, we can change it (since this
* is an internal-only header file). */
upb_alloc *alloc;
} upb_table;
#ifdef NDEBUG
# define UPB_TABLE_INIT(count, mask, ctype, size_lg2, entries) \
{count, mask, ctype, size_lg2, entries}
/* At the moment the only mutable tables we statically initialize are debug
* ref tables. */
# define UPB_TABLE_INIT(count, mask, ctype, size_lg2, entries) \
{count, mask, ctype, size_lg2, entries, &upb_alloc_debugrefs}
# else
# define UPB_TABLE_INIT(count, mask, ctype, size_lg2, entries) \
{count, mask, ctype, size_lg2, entries, NULL}
# endif
typedef struct {
upb_table t;
} upb_strtable;
#define UPB_STRTABLE_INIT(count, mask, ctype, size_lg2, entries) \
{UPB_TABLE_INIT(count, mask, ctype, size_lg2, entries)}
#define UPB_EMPTY_STRTABLE_INIT(ctype) \
UPB_STRTABLE_INIT(0, 0, ctype, 0, NULL)
typedef struct {
upb_table t; /* For entries that don't fit in the array part. */
const upb_tabval *array; /* Array part of the table. See const note above. */
size_t array_size; /* Array part size. */
size_t array_count; /* Array part number of elements. */
} upb_inttable;
#define UPB_INTTABLE_INIT(count, mask, ctype, size_lg2, ent, a, asize, acount) \
{UPB_TABLE_INIT(count, mask, ctype, size_lg2, ent), a, asize, acount}
#define UPB_EMPTY_INTTABLE_INIT(ctype) \
UPB_INTTABLE_INIT(0, 0, ctype, 0, NULL, NULL, 0, 0)
UPB_INLINE size_t upb_table_size(const upb_table *t) {
if (t->size_lg2 == 0)
return 0;
return 1 << t->size_lg2;
/* Internal-only functions, in .h file only out of necessity. */
UPB_INLINE bool upb_tabent_isempty(const upb_tabent *e) {
return e->key == 0;
/* Used by some of the unit tests for generic hashing functionality. */
uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed);
UPB_INLINE uintptr_t upb_intkey(uintptr_t key) {
return key;
UPB_INLINE uint32_t upb_inthash(uintptr_t key) {
return (uint32_t)key;
static const upb_tabent *upb_getentry(const upb_table *t, uint32_t hash) {
return t->entries + (hash & t->mask);
UPB_INLINE bool upb_arrhas(upb_tabval key) {
return key.val != (uint64_t)-1;
/* Initialize and uninitialize a table, respectively. If memory allocation
* failed, false is returned that the table is uninitialized. */
bool upb_inttable_init2(upb_inttable *table, upb_ctype_t ctype, upb_alloc *a);
bool upb_strtable_init2(upb_strtable *table, upb_ctype_t ctype, upb_alloc *a);
void upb_inttable_uninit2(upb_inttable *table, upb_alloc *a);
void upb_strtable_uninit2(upb_strtable *table, upb_alloc *a);
UPB_INLINE bool upb_inttable_init(upb_inttable *table, upb_ctype_t ctype) {
return upb_inttable_init2(table, ctype, &upb_alloc_global);
UPB_INLINE bool upb_strtable_init(upb_strtable *table, upb_ctype_t ctype) {
return upb_strtable_init2(table, ctype, &upb_alloc_global);
UPB_INLINE void upb_inttable_uninit(upb_inttable *table) {
upb_inttable_uninit2(table, &upb_alloc_global);
UPB_INLINE void upb_strtable_uninit(upb_strtable *table) {
upb_strtable_uninit2(table, &upb_alloc_global);
/* Returns the number of values in the table. */
size_t upb_inttable_count(const upb_inttable *t);
UPB_INLINE size_t upb_strtable_count(const upb_strtable *t) {
return t->t.count;
void upb_inttable_packedsize(const upb_inttable *t, size_t *size);
void upb_strtable_packedsize(const upb_strtable *t, size_t *size);
upb_inttable *upb_inttable_pack(const upb_inttable *t, void *p, size_t *ofs,
size_t size);
upb_strtable *upb_strtable_pack(const upb_strtable *t, void *p, size_t *ofs,
size_t size);
/* Inserts the given key into the hashtable with the given value. The key must
* not already exist in the hash table. For string tables, the key must be
* NULL-terminated, and the table will make an internal copy of the key.
* Inttables must not insert a value of UINTPTR_MAX.
* If a table resize was required but memory allocation failed, false is
* returned and the table is unchanged. */
bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val,
upb_alloc *a);
bool upb_strtable_insert3(upb_strtable *t, const char *key, size_t len,
upb_value val, upb_alloc *a);
UPB_INLINE bool upb_inttable_insert(upb_inttable *t, uintptr_t key,
upb_value val) {
return upb_inttable_insert2(t, key, val, &upb_alloc_global);
UPB_INLINE bool upb_strtable_insert2(upb_strtable *t, const char *key,
size_t len, upb_value val) {
return upb_strtable_insert3(t, key, len, val, &upb_alloc_global);
/* For NULL-terminated strings. */
UPB_INLINE bool upb_strtable_insert(upb_strtable *t, const char *key,
upb_value val) {
return upb_strtable_insert2(t, key, strlen(key), val);
/* Looks up key in this table, returning "true" if the key was found.
* If v is non-NULL, copies the value for this key into *v. */
bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v);
bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
upb_value *v);
/* For NULL-terminated strings. */
UPB_INLINE bool upb_strtable_lookup(const upb_strtable *t, const char *key,
upb_value *v) {
return upb_strtable_lookup2(t, key, strlen(key), v);
/* Removes an item from the table. Returns true if the remove was successful,
* and stores the removed item in *val if non-NULL. */
bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val);
bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len,
upb_value *val, upb_alloc *alloc);
UPB_INLINE bool upb_strtable_remove2(upb_strtable *t, const char *key,
size_t len, upb_value *val) {
return upb_strtable_remove3(t, key, len, val, &upb_alloc_global);
/* For NULL-terminated strings. */
UPB_INLINE bool upb_strtable_remove(upb_strtable *t, const char *key,
upb_value *v) {
return upb_strtable_remove2(t, key, strlen(key), v);
/* Updates an existing entry in an inttable. If the entry does not exist,
* returns false and does nothing. Unlike insert/remove, this does not
* invalidate iterators. */
bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val);
/* Handy routines for treating an inttable like a stack. May not be mixed with
* other insert/remove calls. */
bool upb_inttable_push2(upb_inttable *t, upb_value val, upb_alloc *a);
upb_value upb_inttable_pop(upb_inttable *t);
UPB_INLINE bool upb_inttable_push(upb_inttable *t, upb_value val) {
return upb_inttable_push2(t, val, &upb_alloc_global);
/* Convenience routines for inttables with pointer keys. */
bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val,
upb_alloc *a);
bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val);
bool upb_inttable_lookupptr(
const upb_inttable *t, const void *key, upb_value *val);
UPB_INLINE bool upb_inttable_insertptr(upb_inttable *t, const void *key,
upb_value val) {
return upb_inttable_insertptr2(t, key, val, &upb_alloc_global);
/* Optimizes the table for the current set of entries, for both memory use and
* lookup time. Client should call this after all entries have been inserted;
* inserting more entries is legal, but will likely require a table resize. */
void upb_inttable_compact2(upb_inttable *t, upb_alloc *a);
UPB_INLINE void upb_inttable_compact(upb_inttable *t) {
upb_inttable_compact2(t, &upb_alloc_global);
/* A special-case inlinable version of the lookup routine for 32-bit
* integers. */
UPB_INLINE bool upb_inttable_lookup32(const upb_inttable *t, uint32_t key,
upb_value *v) {
*v = upb_value_int32(0); /* Silence compiler warnings. */
if (key < t->array_size) {
upb_tabval arrval = t->array[key];
if (upb_arrhas(arrval)) {
_upb_value_setval(v, arrval.val, t->t.ctype);
return true;
} else {
return false;
} else {
const upb_tabent *e;
if (t->t.entries == NULL) return false;
for (e = upb_getentry(&t->t, upb_inthash(key)); true; e = e->next) {
if ((uint32_t)e->key == key) {
_upb_value_setval(v, e->val.val, t->t.ctype);
return true;
if (e->next == NULL) return false;
/* Exposed for testing only. */
bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a);
/* Iterators ******************************************************************/
/* Iterators for int and string tables. We are subject to some kind of unusual
* design constraints:
* For high-level languages:
* - we must be able to guarantee that we don't crash or corrupt memory even if
* the program accesses an invalidated iterator.
* For C++11 range-based for:
* - iterators must be copyable
* - iterators must be comparable
* - it must be possible to construct an "end" value.
* Iteration order is undefined.
* Modifying the table invalidates iterators. upb_{str,int}table_done() is
* guaranteed to work even on an invalidated iterator, as long as the table it
* is iterating over has not been freed. Calling next() or accessing data from
* an invalidated iterator yields unspecified elements from the table, but it is
* guaranteed not to crash and to return real table elements (except when done()
* is true). */
/* upb_strtable_iter **********************************************************/
/* upb_strtable_iter i;
* upb_strtable_begin(&i, t);
* for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
* const char *key = upb_strtable_iter_key(&i);
* const upb_value val = upb_strtable_iter_value(&i);
* // ...
* }
typedef struct {
const upb_strtable *t;
size_t index;
} upb_strtable_iter;
void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t);
void upb_strtable_next(upb_strtable_iter *i);
bool upb_strtable_done(const upb_strtable_iter *i);
const char *upb_strtable_iter_key(const upb_strtable_iter *i);
size_t upb_strtable_iter_keylength(const upb_strtable_iter *i);
upb_value upb_strtable_iter_value(const upb_strtable_iter *i);
void upb_strtable_iter_setdone(upb_strtable_iter *i);
bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
const upb_strtable_iter *i2);
/* upb_inttable_iter **********************************************************/
/* upb_inttable_iter i;
* upb_inttable_begin(&i, t);
* for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
* uintptr_t key = upb_inttable_iter_key(&i);
* upb_value val = upb_inttable_iter_value(&i);
* // ...
* }
typedef struct {
const upb_inttable *t;
size_t index;
bool array_part;
} upb_inttable_iter;
void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t);
void upb_inttable_next(upb_inttable_iter *i);
bool upb_inttable_done(const upb_inttable_iter *i);
uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i);
upb_value upb_inttable_iter_value(const upb_inttable_iter *i);
void upb_inttable_iter_setdone(upb_inttable_iter *i);
bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
const upb_inttable_iter *i2);
#ifdef __cplusplus
} /* extern "C" */
#endif /* UPB_TABLE_H_ */
/* Reference tracking will check ref()/unref() operations to make sure the
* ref ownership is correct. Where possible it will also make tools like
* Valgrind attribute ref leaks to the code that took the leaked ref, not
* the code that originally created the object.
* Enabling this requires the application to define upb_lock()/upb_unlock()
* functions that acquire/release a global mutex (or #define UPB_THREAD_UNSAFE).
* For this reason we don't enable it by default, even in debug builds.
/* #define UPB_DEBUG_REFS */
#ifdef __cplusplus
namespace upb {
class RefCounted;
template <class T> class reffed_ptr;
UPB_DECLARE_TYPE(upb::RefCounted, upb_refcounted)
struct upb_refcounted_vtbl;
#ifdef __cplusplus
class upb::RefCounted {
/* Returns true if the given object is frozen. */
bool IsFrozen() const;
/* Increases the ref count, the new ref is owned by "owner" which must not
* already own a ref (and should not itself be a refcounted object if the ref
* could possibly be circular; see below).
* Thread-safe iff "this" is frozen. */
void Ref(const void *owner) const;
/* Release a ref that was acquired from upb_refcounted_ref() and collects any
* objects it can. */
void Unref(const void *owner) const;
/* Moves an existing ref from "from" to "to", without changing the overall
* ref count. DonateRef(foo, NULL, owner) is the same as Ref(foo, owner),
* but "to" may not be NULL. */
void DonateRef(const void *from, const void *to) const;
/* Verifies that a ref to the given object is currently held by the given
* owner. Only effective in UPB_DEBUG_REFS builds. */
void CheckRef(const void *owner) const;
UPB_DISALLOW_POD_OPS(RefCounted, upb::RefCounted)
struct upb_refcounted {
/* TODO(haberman): move the actual structure definition to
* The only reason they are here is because inline functions need to see the
* definition of upb_handlers, which needs to see this definition. But we
* can change the upb_handlers inline functions to deal in raw offsets
* instead.
/* A single reference count shared by all objects in the group. */
uint32_t *group;
/* A singly-linked list of all objects in the group. */
upb_refcounted *next;
/* Table of function pointers for this type. */
const struct upb_refcounted_vtbl *vtbl;
/* Maintained only when mutable, this tracks the number of refs (but not
* ref2's) to this object. *group should be the sum of all individual_count
* in the group. */
uint32_t individual_count;
bool is_frozen;
upb_inttable *refs; /* Maps owner -> trackedref for incoming refs. */
upb_inttable *ref2s; /* Set of targets for outgoing ref2s. */
extern upb_alloc upb_alloc_debugrefs;
#define UPB_REFCOUNT_INIT(vtbl, refs, ref2s) \
{&static_refcount, NULL, vtbl, 0, true, refs, ref2s}
#define UPB_REFCOUNT_INIT(vtbl, refs, ref2s) \
{&static_refcount, NULL, vtbl, 0, true}
/* It is better to use tracked refs when possible, for the extra debugging
* capability. But if this is not possible (because you don't have easy access
* to a stable pointer value that is associated with the ref), you can pass
* UPB_UNTRACKED_REF instead. */
extern const void *UPB_UNTRACKED_REF;
/* Native C API. */
bool upb_refcounted_isfrozen(const upb_refcounted *r);
void upb_refcounted_ref(const upb_refcounted *r, const void *owner);
void upb_refcounted_unref(const upb_refcounted *r, const void *owner);
void upb_refcounted_donateref(
const upb_refcounted *r, const void *from, const void *to);
void upb_refcounted_checkref(const upb_refcounted *r, const void *owner);
#define UPB_REFCOUNTED_CMETHODS(type, upcastfunc) \
UPB_INLINE bool type ## _isfrozen(const type *v) { \
return upb_refcounted_isfrozen(upcastfunc(v)); \
} \
UPB_INLINE void type ## _ref(const type *v, const void *owner) { \
upb_refcounted_ref(upcastfunc(v), owner); \
} \
UPB_INLINE void type ## _unref(const type *v, const void *owner) { \
upb_refcounted_unref(upcastfunc(v), owner); \
} \
UPB_INLINE void type ## _donateref(const type *v, const void *from, const void *to) { \
upb_refcounted_donateref(upcastfunc(v), from, to); \
} \
UPB_INLINE void type ## _checkref(const type *v, const void *owner) { \
upb_refcounted_checkref(upcastfunc(v), owner); \
bool IsFrozen() const { \
return upb::upcast_to<const upb::RefCounted>(this)->IsFrozen(); \
} \
void Ref(const void *owner) const { \
return upb::upcast_to<const upb::RefCounted>(this)->Ref(owner); \
} \
void Unref(const void *owner) const { \
return upb::upcast_to<const upb::RefCounted>(this)->Unref(owner); \
} \
void DonateRef(const void *from, const void *to) const { \
return upb::upcast_to<const upb::RefCounted>(this)->DonateRef(from, to); \
} \
void CheckRef(const void *owner) const { \
return upb::upcast_to<const upb::RefCounted>(this)->CheckRef(owner); \
/* Internal-to-upb Interface **************************************************/
typedef void upb_refcounted_visit(const upb_refcounted *r,
const upb_refcounted *subobj,
void *closure);
struct upb_refcounted_vtbl {
/* Must visit all subobjects that are currently ref'd via upb_refcounted_ref2.
* Must be longjmp()-safe. */
void (*visit)(const upb_refcounted *r, upb_refcounted_visit *visit, void *c);
/* Must free the object and release all references to other objects. */
void (*free)(upb_refcounted *r);
/* Initializes the refcounted with a single ref for the given owner. Returns
* false if memory could not be allocated. */
bool upb_refcounted_init(upb_refcounted *r,
const struct upb_refcounted_vtbl *vtbl,
const void *owner);
/* Adds a ref from one refcounted object to another ("from" must not already
* own a ref). These refs may be circular; cycles will be collected correctly
* (if conservatively). These refs do not need to be freed in from's free()
* function. */
void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from);
/* Removes a ref that was acquired from upb_refcounted_ref2(), and collects any
* object it can. This is only necessary when "from" no longer points to "r",
* and not from from's "free" function. */
void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from);
#define upb_ref2(r, from) \
upb_refcounted_ref2((const upb_refcounted*)r, (upb_refcounted*)from)
#define upb_unref2(r, from) \
upb_refcounted_unref2((const upb_refcounted*)r, (upb_refcounted*)from)
/* Freezes all mutable object reachable by ref2() refs from the given roots.
* This will split refcounting groups into precise SCC groups, so that
* refcounting of frozen objects can be more aggressive. If memory allocation
* fails, or if more than 2**31 mutable objects are reachable from "roots", or
* if the maximum depth of the graph exceeds "maxdepth", false is returned and
* the objects are unchanged.
* After this operation succeeds, the objects are frozen/const, and may not be
* used through non-const pointers. In particular, they may not be passed as
* the second parameter of upb_refcounted_{ref,unref}2(). On the upside, all
* operations on frozen refcounteds are threadsafe, and objects will be freed
* at the precise moment that they become unreachable.
* Caller must own refs on each object in the "roots" list. */
bool upb_refcounted_freeze(upb_refcounted *const*roots, int n, upb_status *s,
int maxdepth);
/* Shared by all compiled-in refcounted objects. */
extern uint32_t static_refcount;
#ifdef __cplusplus
/* C++ Wrappers. */
namespace upb {
inline bool RefCounted::IsFrozen() const {
return upb_refcounted_isfrozen(this);
inline void RefCounted::Ref(const void *owner) const {
upb_refcounted_ref(this, owner);
inline void RefCounted::Unref(const void *owner) const {
upb_refcounted_unref(this, owner);
inline void RefCounted::DonateRef(const void *from, const void *to) const {
upb_refcounted_donateref(this, from, to);
inline void RefCounted::CheckRef(const void *owner) const {
upb_refcounted_checkref(this, owner);
} /* namespace upb */
/* upb::reffed_ptr ************************************************************/
#ifdef __cplusplus
#include <algorithm> /* For std::swap(). */
/* Provides RAII semantics for upb refcounted objects. Each reffed_ptr owns a
* ref on whatever object it points to (if any). */
template <class T> class upb::reffed_ptr {
reffed_ptr() : ptr_(NULL) {}
/* If ref_donor is NULL, takes a new ref, otherwise adopts from ref_donor. */
template <class U>
reffed_ptr(U* val, const void* ref_donor = NULL)
: ptr_(upb::upcast(val)) {
if (ref_donor) {
ptr_->DonateRef(ref_donor, this);
} else if (ptr_) {
template <class U>
reffed_ptr(const reffed_ptr<U>& other)
: ptr_(upb::upcast(other.get())) {
if (ptr_) ptr_->Ref(this);
reffed_ptr(const reffed_ptr& other)
: ptr_(upb::upcast(other.get())) {
if (ptr_) ptr_->Ref(this);
~reffed_ptr() { if (ptr_) ptr_->Unref(this); }
template <class U>
reffed_ptr& operator=(const reffed_ptr<U>& other) {
return *this;
reffed_ptr& operator=(const reffed_ptr& other) {
return *this;
/* TODO(haberman): add C++11 move construction/assignment for greater
* efficiency. */
void swap(reffed_ptr& other) {
if (ptr_ == other.ptr_) {
if (ptr_) ptr_->DonateRef(this, &other);
if (other.ptr_) other.ptr_->DonateRef(&other, this);
std::swap(ptr_, other.ptr_);
T& operator*() const {
return *ptr_;
T* operator->() const {
return ptr_;
T* get() const { return ptr_; }
/* If ref_donor is NULL, takes a new ref, otherwise adopts from ref_donor. */
template <class U>
void reset(U* ptr = NULL, const void* ref_donor = NULL) {
reffed_ptr(ptr, ref_donor).swap(*this);
template <class U>
reffed_ptr<U> down_cast() {
return reffed_ptr<U>(upb::down_cast<U*>(get()));
template <class U>
reffed_ptr<U> dyn_cast() {
return reffed_ptr<U>(upb::dyn_cast<U*>(get()));
/* Plain release() is unsafe; if we were the only owner, it would leak the
* object. Instead we provide this: */
T* ReleaseTo(const void* new_owner) {
T* ret = NULL;
ptr_->DonateRef(this, new_owner);
std::swap(ret, ptr_);
return ret;
T* ptr_;
#endif /* __cplusplus */
#endif /* UPB_REFCOUNT_H_ */
#ifdef __cplusplus
#include <cstring>
#include <string>
#include <vector>
namespace upb {
class Def;
class EnumDef;
class FieldDef;
class FileDef;
class MessageDef;
class OneofDef;
class SymbolTable;
UPB_DECLARE_DERIVED_TYPE(upb::Def, upb::RefCounted, upb_def, upb_refcounted)
UPB_DECLARE_DERIVED_TYPE(upb::OneofDef, upb::RefCounted, upb_oneofdef,
UPB_DECLARE_DERIVED_TYPE(upb::FileDef, upb::RefCounted, upb_filedef,
UPB_DECLARE_TYPE(upb::SymbolTable, upb_symtab)
/* The maximum message depth that the type graph can have. This is a resource
* limit for the C stack since we sometimes need to recursively traverse the
* graph. Cycles are ok; the traversal will stop when it detects a cycle, but
* we must hit the cycle before the maximum depth is reached.
* If having a single static limit is too inflexible, we can add another variant
* of Def::Freeze that allows specifying this as a parameter. */
/* upb::Def: base class for top-level defs ***********************************/
/* All the different kind of defs that can be defined at the top-level and put
* in a SymbolTable or appear in a FileDef::defs() list. This excludes some
* defs (like oneofs and files). It only includes fields because they can be
* defined as extensions. */
typedef enum {
UPB_DEF_SERVICE, /* Not yet implemented. */
UPB_DEF_ANY = -1 /* Wildcard for upb_symtab_get*() */
} upb_deftype_t;
#ifdef __cplusplus
/* The base class of all defs. Its base is upb::RefCounted (use upb::upcast()
* to convert). */
class upb::Def {
typedef upb_deftype_t Type;
/* upb::RefCounted methods like Ref()/Unref(). */
Type def_type() const;
/* "fullname" is the def's fully-qualified name (eg. */
const char *full_name() const;
/* The final part of a def's name (eg. Message). */
const char *name() const;
/* The def must be mutable. Caller retains ownership of fullname. Defs are
* not required to have a name; if a def has no name when it is frozen, it
* will remain an anonymous def. On failure, returns false and details in "s"
* if non-NULL. */
bool set_full_name(const char* fullname, upb::Status* s);
bool set_full_name(const std::string &fullname, upb::Status* s);
/* The file in which this def appears. It is not necessary to add a def to a
* file (and consequently the accessor may return NULL). Set this by calling
* file->Add(def). */
FileDef* file() const;
/* Freezes the given defs; this validates all constraints and marks the defs
* as frozen (read-only). "defs" may not contain any fielddefs, but fields
* of any msgdefs will be frozen.
* Symbolic references to sub-types and enum defaults must have already been
* resolved. Any mutable defs reachable from any of "defs" must also be in
* the list; more formally, "defs" must be a transitive closure of mutable
* defs.
* After this operation succeeds, the finalized defs must only be accessed
* through a const pointer! */
static bool Freeze(Def* const* defs, size_t n, Status* status);
static bool Freeze(const std::vector<Def*>& defs, Status* status);
#endif /* __cplusplus */
/* Include upb_refcounted methods like upb_def_ref()/upb_def_unref(). */
UPB_REFCOUNTED_CMETHODS(upb_def, upb_def_upcast)
upb_deftype_t upb_def_type(const upb_def *d);
const char *upb_def_fullname(const upb_def *d);
const char *upb_def_name(const upb_def *d);
const upb_filedef *upb_def_file(const upb_def *d);
bool upb_def_setfullname(upb_def *def, const char *fullname, upb_status *s);
bool upb_def_freeze(upb_def *const *defs, size_t n, upb_status *s);
/* Temporary API: for internal use only. */
bool _upb_def_validate(upb_def *const*defs, size_t n, upb_status *s);
/* upb::Def casts *************************************************************/
#ifdef __cplusplus
#define UPB_CPP_CASTS(cname, cpptype) \
namespace upb { \
template <> \
inline cpptype *down_cast<cpptype *, Def>(Def * def) { \
return upb_downcast_##cname##_mutable(def); \
} \
template <> \
inline cpptype *dyn_cast<cpptype *, Def>(Def * def) { \
return upb_dyncast_##cname##_mutable(def); \
} \
template <> \
inline const cpptype *down_cast<const cpptype *, const Def>( \
const Def *def) { \
return upb_downcast_##cname(def); \
} \
template <> \
inline const cpptype *dyn_cast<const cpptype *, const Def>(const Def *def) { \
return upb_dyncast_##cname(def); \
} \
template <> \
inline const cpptype *down_cast<const cpptype *, Def>(Def * def) { \
return upb_downcast_##cname(def); \
} \
template <> \
inline const cpptype *dyn_cast<const cpptype *, Def>(Def * def) { \
return upb_dyncast_##cname(def); \
} \
} /* namespace upb */
#define UPB_CPP_CASTS(cname, cpptype)
#endif /* __cplusplus */
/* Dynamic casts, for determining if a def is of a particular type at runtime.
* Downcasts, for when some wants to assert that a def is of a particular type.
* These are only checked if we are building debug. */
#define UPB_DEF_CASTS(lower, upper, cpptype) \
UPB_INLINE const upb_##lower *upb_dyncast_##lower(const upb_def *def) { \
if (upb_def_type(def) != UPB_DEF_##upper) return NULL; \
return (upb_##lower *)def; \
} \
UPB_INLINE const upb_##lower *upb_downcast_##lower(const upb_def *def) { \
UPB_ASSERT(upb_def_type(def) == UPB_DEF_##upper); \
return (const upb_##lower *)def; \
} \
UPB_INLINE upb_##lower *upb_dyncast_##lower##_mutable(upb_def *def) { \
return (upb_##lower *)upb_dyncast_##lower(def); \
} \
UPB_INLINE upb_##lower *upb_downcast_##lower##_mutable(upb_def *def) { \
return (upb_##lower *)upb_downcast_##lower(def); \
} \
UPB_CPP_CASTS(lower, cpptype)
#define UPB_DEFINE_DEF(cppname, lower, upper, cppmethods, members) \
UPB_DEFINE_CLASS2(cppname, upb::Def, upb::RefCounted, cppmethods, \
members) \
UPB_DEF_CASTS(lower, upper, cppname)
#define UPB_DECLARE_DEF_TYPE(cppname, lower, upper) \
UPB_DECLARE_DERIVED_TYPE2(cppname, upb::Def, upb::RefCounted, \
upb_ ## lower, upb_def, upb_refcounted) \
UPB_DEF_CASTS(lower, upper, cppname)
UPB_DECLARE_DEF_TYPE(upb::FieldDef, fielddef, FIELD)
UPB_DECLARE_DEF_TYPE(upb::MessageDef, msgdef, MSG)
UPB_DECLARE_DEF_TYPE(upb::EnumDef, enumdef, ENUM)
/* upb::FieldDef **************************************************************/
/* The types a field can have. Note that this list is not identical to the
* types defined in descriptor.proto, which gives INT32 and SINT32 separate
* types (we distinguish the two with the "integer encoding" enum below). */
typedef enum {
/* Types stored in 1 byte. */
/* Types stored in 4 bytes. */
UPB_TYPE_ENUM = 5, /* Enum values are int32. */
/* Types stored as pointers (probably 4 or 8 bytes). */
/* Types stored as 8 bytes. */
UPB_TYPE_INT64 = 10,
} upb_fieldtype_t;
/* The repeated-ness of each field; this matches descriptor.proto. */
typedef enum {
} upb_label_t;
/* How integers should be encoded in serializations that offer multiple
* integer encoding methods. */
typedef enum {
UPB_INTFMT_ZIGZAG = 3 /* Only for signed types (INT32/INT64). */
} upb_intfmt_t;
/* Descriptor types, as defined in descriptor.proto. */
typedef enum {
} upb_descriptortype_t;
typedef enum {
} upb_syntax_t;
/* Maximum field number allowed for FieldDefs. This is an inherent limit of the
* protobuf wire format. */
#define UPB_MAX_FIELDNUMBER ((1 << 29) - 1)
#ifdef __cplusplus
/* A upb_fielddef describes a single field in a message. It is most often
* found as a part of a upb_msgdef, but can also stand alone to represent
* an extension.
* Its base class is upb::Def (use upb::upcast() to convert). */
class upb::FieldDef {
typedef upb_fieldtype_t Type;
typedef upb_label_t Label;
typedef upb_intfmt_t IntegerFormat;
typedef upb_descriptortype_t DescriptorType;
/* These return true if the given value is a valid member of the enumeration. */
static bool CheckType(int32_t val);
static bool CheckLabel(int32_t val);
static bool CheckDescriptorType(int32_t val);
static bool CheckIntegerFormat(int32_t val);
/* These convert to the given enumeration; they require that the value is
* valid. */
static Type ConvertType(int32_t val);
static Label ConvertLabel(int32_t val);
static DescriptorType ConvertDescriptorType(int32_t val);
static IntegerFormat ConvertIntegerFormat(int32_t val);
/* Returns NULL if memory allocation failed. */
static reffed_ptr<FieldDef> New();
/* upb::RefCounted methods like Ref()/Unref(). */
/* Functionality from upb::Def. */
const char* full_name() const;
bool type_is_set() const; /* set_[descriptor_]type() has been called? */
Type type() const; /* Requires that type_is_set() == true. */
Label label() const; /* Defaults to UPB_LABEL_OPTIONAL. */
const char* name() const; /* NULL if uninitialized. */
uint32_t number() const; /* Returns 0 if uninitialized. */
bool is_extension() const;
/* Copies the JSON name for this field into the given buffer. Returns the
* actual size of the JSON name, including the NULL terminator. If the
* return value is 0, the JSON name is unset. If the return value is
* greater than len, the JSON name was truncated. The buffer is always
* NULL-terminated if len > 0.
* The JSON name always defaults to a camelCased version of the regular
* name. However if the regular name is unset, the JSON name will be unset
* also.
size_t GetJsonName(char* buf, size_t len) const;
/* Convenience version of the above function which copies the JSON name
* into the given string, returning false if the name is not set. */
template <class T>
bool GetJsonName(T* str) {
str->resize(GetJsonName(NULL, 0));
GetJsonName(&(*str)[0], str->size());
return str->size() > 0;
/* For UPB_TYPE_MESSAGE fields only where is_tag_delimited() == false,
* indicates whether this field should have lazy parsing handlers that yield
* the unparsed string for the submessage.
* TODO(haberman): I think we want to move this into a FieldOptions container
* when we add support for custom options (the FieldOptions struct will
* contain both regular FieldOptions like "lazy" *and* custom options). */
bool lazy() const;
/* For non-string, non-submessage fields, this indicates whether binary
* protobufs are encoded in packed or non-packed format.
* TODO(haberman): see note above about putting options like this into a
* FieldOptions container. */
bool packed() const;
/* An integer that can be used as an index into an array of fields for
* whatever message this field belongs to. Guaranteed to be less than
* f->containing_type()->field_count(). May only be accessed once the def has
* been finalized. */
uint32_t index() const;
/* The MessageDef to which this field belongs.
* If this field has been added to a MessageDef, that message can be retrieved
* directly (this is always the case for frozen FieldDefs).
* If the field has not yet been added to a MessageDef, you can set the name
* of the containing type symbolically instead. This is mostly useful for
* extensions, where the extension is declared separately from the message. */
const MessageDef* containing_type() const;
const char* containing_type_name();
/* The OneofDef to which this field belongs, or NULL if this field is not part
* of a oneof. */
const OneofDef* containing_oneof() const;
/* The field's type according to the enum in descriptor.proto. This is not
* the same as UPB_TYPE_*, because it distinguishes between (for example)
* INT32 and SINT32, whereas our "type" enum does not. This return of
* descriptor_type() is a function of type(), integer_format(), and
* is_tag_delimited(). Likewise set_descriptor_type() sets all three
* appropriately. */
DescriptorType descriptor_type() const;
/* Convenient field type tests. */
bool IsSubMessage() const;
bool IsString() const;
bool IsSequence() const;
bool IsPrimitive() const;
bool IsMap() const;
/* Returns whether this field explicitly represents presence.
* For proto2 messages: Returns true for any scalar (non-repeated) field.
* For proto3 messages: Returns true for scalar submessage or oneof fields. */
bool HasPresence() const;
/* How integers are encoded. Only meaningful for integer types.
* Defaults to UPB_INTFMT_VARIABLE, and is reset when "type" changes. */
IntegerFormat integer_format() const;
/* Whether a submessage field is tag-delimited or not (if false, then
* length-delimited). May only be set when type() == UPB_TYPE_MESSAGE. */
bool is_tag_delimited() const;
/* Returns the non-string default value for this fielddef, which may either
* be something the client set explicitly or the "default default" (0 for
* numbers, empty for strings). The field's type indicates the type of the
* returned value, except for enum fields that are still mutable.
* Requires that the given function matches the field's current type. */
int64_t default_int64() const;
int32_t default_int32() const;
uint64_t default_uint64() const;
uint32_t default_uint32() const;
bool default_bool() const;
float default_float() const;
double default_double() const;
/* The resulting string is always NULL-terminated. If non-NULL, the length
* will be stored in *len. */
const char *default_string(size_t* len) const;
/* For frozen UPB_TYPE_ENUM fields, enum defaults can always be read as either
* string or int32, and both of these methods will always return true.
* For mutable UPB_TYPE_ENUM fields, the story is a bit more complicated.
* Enum defaults are unusual. They can be specified either as string or int32,
* but to be valid the enum must have that value as a member. And if no
* default is specified, the "default default" comes from the EnumDef.
* We allow reading the default as either an int32 or a string, but only if
* we have a meaningful value to report. We have a meaningful value if it was
* set explicitly, or if we could get the "default default" from the EnumDef.
* Also if you explicitly set the name and we find the number in the EnumDef */
bool EnumHasStringDefault() const;
bool EnumHasInt32Default() const;
/* Submessage and enum fields must reference a "subdef", which is the
* upb::MessageDef or upb::EnumDef that defines their type. Note that when
* the FieldDef is mutable it may not have a subdef *yet*, but this function
* still returns true to indicate that the field's type requires a subdef. */
bool HasSubDef() const;
/* Returns the enum or submessage def for this field, if any. The field's
* type must match (ie. you may only call enum_subdef() for fields where
* type() == UPB_TYPE_ENUM). Returns NULL if the subdef has not been set or
* is currently set symbolically. */
const EnumDef* enum_subdef() const;
const MessageDef* message_subdef() const;
/* Returns the generic subdef for this field. Requires that HasSubDef() (ie.
* only works for UPB_TYPE_ENUM and UPB_TYPE_MESSAGE fields). */
const Def* subdef() const;
/* Returns the symbolic name of the subdef. If the subdef is currently set
* unresolved (ie. set symbolically) returns the symbolic name. If it has
* been resolved to a specific subdef, returns the name from that subdef. */
const char* subdef_name() const;
/* Setters (non-const methods), only valid for mutable FieldDefs! ***********/
bool set_full_name(const char* fullname, upb::Status* s);
bool set_full_name(const std::string& fullname, upb::Status* s);
/* This may only be called if containing_type() == NULL (ie. the field has not
* been added to a message yet). */
bool set_containing_type_name(const char *name, Status* status);
bool set_containing_type_name(const std::string& name, Status* status);
/* Defaults to false. When we freeze, we ensure that this can only be true
* for length-delimited message fields. Prior to freezing this can be true or
* false with no restrictions. */
void set_lazy(bool lazy);
/* Defaults to true. Sets whether this field is encoded in packed format. */
void set_packed(bool packed);
/* "type" or "descriptor_type" MUST be set explicitly before the fielddef is
* finalized. These setters require that the enum value is valid; if the
* value did not come directly from an enum constant, the caller should
* validate it first with the functions above (CheckFieldType(), etc). */
void set_type(Type type);
void set_label(Label label);
void set_descriptor_type(DescriptorType type);
void set_is_extension(bool is_extension);
/* "number" and "name" must be set before the FieldDef is added to a
* MessageDef, and may not be set after that.
* "name" is the same as full_name()/set_full_name(), but since fielddefs
* most often use simple, non-qualified names, we provide this accessor
* also. Generally only extensions will want to think of this name as
* fully-qualified. */
bool set_number(uint32_t number, upb::Status* s);
bool set_name(const char* name, upb::Status* s);
bool set_name(const std::string& name, upb::Status* s);
/* Sets the JSON name to the given string. */
/* TODO(haberman): implement. Right now only default json_name (camelCase)
* is supported. */
bool set_json_name(const char* json_name, upb::Status* s);
bool set_json_name(const std::string& name, upb::Status* s);
/* Clears the JSON name. This will make it revert to its default, which is
* a camelCased version of the regular field name. */
void clear_json_name();
void set_integer_format(IntegerFormat format);
bool set_tag_delimited(bool tag_delimited, upb::Status* s);
/* Sets default value for the field. The call must exactly match the type
* of the field. Enum fields may use either setint32 or setstring to set
* the default numerically or symbolically, respectively, but symbolic
* defaults must be resolved before finalizing (see ResolveEnumDefault()).
* Changing the type of a field will reset its default. */
void set_default_int64(int64_t val);
void set_default_int32(int32_t val);
void set_default_uint64(uint64_t val);
void set_default_uint32(uint32_t val);
void set_default_bool(bool val);
void set_default_float(float val);
void set_default_double(double val);
bool set_default_string(const void *str, size_t len, Status *s);
bool set_default_string(const std::string &str, Status *s);
void set_default_cstr(const char *str, Status *s);
/* Before a fielddef is frozen, its subdef may be set either directly (with a
* upb::Def*) or symbolically. Symbolic refs must be resolved before the
* containing msgdef can be frozen (see upb_resolve() above). upb always
* guarantees that any def reachable from a live def will also be kept alive.
* Both methods require that upb_hassubdef(f) (so the type must be set prior
* to calling these methods). Returns false if this is not the case, or if
* the given subdef is not of the correct type. The subdef is reset if the
* field's type is changed. The subdef can be set to NULL to clear it. */
bool set_subdef(const Def* subdef, Status* s);
bool set_enum_subdef(const EnumDef* subdef, Status* s);
bool set_message_subdef(const MessageDef* subdef, Status* s);
bool set_subdef_name(const char* name, Status* s);
bool set_subdef_name(const std::string &name, Status* s);
UPB_DISALLOW_POD_OPS(FieldDef, upb::FieldDef)
# endif /* defined(__cplusplus) */
/* Native C API. */
upb_fielddef *upb_fielddef_new(const void *owner);
/* Include upb_refcounted methods like upb_fielddef_ref(). */
UPB_REFCOUNTED_CMETHODS(upb_fielddef, upb_fielddef_upcast2)
/* Methods from upb_def. */
const char *upb_fielddef_fullname(const upb_fielddef *f);
bool upb_fielddef_setfullname(upb_fielddef *f, const char *fullname,
upb_status *s);
bool upb_fielddef_typeisset(const upb_fielddef *f);
upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f);
upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f);
upb_label_t upb_fielddef_label(const upb_fielddef *f);
uint32_t upb_fielddef_number(const upb_fielddef *f);
const char *upb_fielddef_name(const upb_fielddef *f);
bool upb_fielddef_isextension(const upb_fielddef *f);
bool upb_fielddef_lazy(const upb_fielddef *f);
bool upb_fielddef_packed(const upb_fielddef *f);
size_t upb_fielddef_getjsonname(const upb_fielddef *f, char *buf, size_t len);
const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f);
const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f);
upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f);
const char *upb_fielddef_containingtypename(upb_fielddef *f);
upb_intfmt_t upb_fielddef_intfmt(const upb_fielddef *f);
uint32_t upb_fielddef_index(const upb_fielddef *f);
bool upb_fielddef_istagdelim(const upb_fielddef *f);
bool upb_fielddef_issubmsg(const upb_fielddef *f);
bool upb_fielddef_isstring(const upb_fielddef *f);
bool upb_fielddef_isseq(const upb_fielddef *f);
bool upb_fielddef_isprimitive(const upb_fielddef *f);
bool upb_fielddef_ismap(const upb_fielddef *f);
bool upb_fielddef_haspresence(const upb_fielddef *f);
int64_t upb_fielddef_defaultint64(const upb_fielddef *f);
int32_t upb_fielddef_defaultint32(const upb_fielddef *f);
uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f);
uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f);
bool upb_fielddef_defaultbool(const upb_fielddef *f);
float upb_fielddef_defaultfloat(const upb_fielddef *f);
double upb_fielddef_defaultdouble(const upb_fielddef *f);
const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len);
bool upb_fielddef_enumhasdefaultint32(const upb_fielddef *f);
bool upb_fielddef_enumhasdefaultstr(const upb_fielddef *f);
bool upb_fielddef_hassubdef(const upb_fielddef *f);
const upb_def *upb_fielddef_subdef(const upb_fielddef *f);
const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f);
const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f);
const char *upb_fielddef_subdefname(const upb_fielddef *f);
void upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type);
void upb_fielddef_setdescriptortype(upb_fielddef *f, int type);
void upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label);
bool upb_fielddef_setnumber(upb_fielddef *f, uint32_t number, upb_status *s);
bool upb_fielddef_setname(upb_fielddef *f, const char *name, upb_status *s);
bool upb_fielddef_setjsonname(upb_fielddef *f, const char *name, upb_status *s);
bool upb_fielddef_clearjsonname(upb_fielddef *f);
bool upb_fielddef_setcontainingtypename(upb_fielddef *f, const char *name,
upb_status *s);
void upb_fielddef_setisextension(upb_fielddef *f, bool is_extension);
void upb_fielddef_setlazy(upb_fielddef *f, bool lazy);
void upb_fielddef_setpacked(upb_fielddef *f, bool packed);
void upb_fielddef_setintfmt(upb_fielddef *f, upb_intfmt_t fmt);
void upb_fielddef_settagdelim(upb_fielddef *f, bool tag_delim);
void upb_fielddef_setdefaultint64(upb_fielddef *f, int64_t val);
void upb_fielddef_setdefaultint32(upb_fielddef *f, int32_t val);
void upb_fielddef_setdefaultuint64(upb_fielddef *f, uint64_t val);
void upb_fielddef_setdefaultuint32(upb_fielddef *f, uint32_t val);
void upb_fielddef_setdefaultbool(upb_fielddef *f, bool val);
void upb_fielddef_setdefaultfloat(upb_fielddef *f, float val);
void upb_fielddef_setdefaultdouble(upb_fielddef *f, double val);
bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len,
upb_status *s);
void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str,
upb_status *s);
bool upb_fielddef_setsubdef(upb_fielddef *f, const upb_def *subdef,
upb_status *s);
bool upb_fielddef_setmsgsubdef(upb_fielddef *f, const upb_msgdef *subdef,
upb_status *s);
bool upb_fielddef_setenumsubdef(upb_fielddef *f, const upb_enumdef *subdef,
upb_status *s);
bool upb_fielddef_setsubdefname(upb_fielddef *f, const char *name,
upb_status *s);
bool upb_fielddef_checklabel(int32_t label);
bool upb_fielddef_checktype(int32_t type);
bool upb_fielddef_checkdescriptortype(int32_t type);
bool upb_fielddef_checkintfmt(int32_t fmt);
/* upb::MessageDef ************************************************************/
typedef upb_inttable_iter upb_msg_field_iter;
typedef upb_strtable_iter upb_msg_oneof_iter;
/* Well-known field tag numbers for map-entry messages. */
#ifdef __cplusplus
/* Structure that describes a single .proto message type.
* Its base class is upb::Def (use upb::upcast() to convert). */
class upb::MessageDef {
/* Returns NULL if memory allocation failed. */
static reffed_ptr<MessageDef> New();
/* upb::RefCounted methods like Ref()/Unref(). */
/* Functionality from upb::Def. */
const char* full_name() const;
const char* name() const;
bool set_full_name(const char* fullname, Status* s);
bool set_full_name(const std::string& fullname, Status* s);
/* Call to freeze this MessageDef.
* WARNING: this will fail if this message has any unfrozen submessages!
* Messages with cycles must be frozen as a batch using upb::Def::Freeze(). */
bool Freeze(Status* s);
/* The number of fields that belong to the MessageDef. */
int field_count() const;
/* The number of oneofs that belong to the MessageDef. */
int oneof_count() const;
/* Adds a field (upb_fielddef object) to a msgdef. Requires that the msgdef
* and the fielddefs are mutable. The fielddef's name and number must be
* set, and the message may not already contain any field with this name or
* number, and this fielddef may not be part of another message. In error
* cases false is returned and the msgdef is unchanged.
* If the given field is part of a oneof, this call succeeds if and only if
* that oneof is already part of this msgdef. (Note that adding a oneof to a
* msgdef automatically adds all of its fields to the msgdef at the time that
* the oneof is added, so it is usually more idiomatic to add the oneof's
* fields first then add the oneof to the msgdef. This case is supported for
* convenience.)
* If |f| is already part of this MessageDef, this method performs no action
* and returns true (success). Thus, this method is idempotent. */
bool AddField(FieldDef* f, Status* s);
bool AddField(const reffed_ptr<FieldDef>& f, Status* s);
/* Adds a oneof (upb_oneofdef object) to a msgdef. Requires that the msgdef,
* oneof, and any fielddefs are mutable, that the fielddefs contained in the
* oneof do not have any name or number conflicts with existing fields in the
* msgdef, and that the oneof's name is unique among all oneofs in the msgdef.
* If the oneof is added successfully, all of its fields will be added
* directly to the msgdef as well. In error cases, false is returned and the
* msgdef is unchanged. */
bool AddOneof(OneofDef* o, Status* s);
bool AddOneof(const reffed_ptr<OneofDef>& o, Status* s);
upb_syntax_t syntax() const;
/* Returns false if we don't support this syntax value. */
bool set_syntax(upb_syntax_t syntax);
/* Set this to false to indicate that primitive fields should not have
* explicit presence information associated with them. This will affect all
* fields added to this message. Defaults to true. */
void SetPrimitivesHavePresence(bool have_presence);
/* These return NULL if the field is not found. */
FieldDef* FindFieldByNumber(uint32_t number);
FieldDef* FindFieldByName(const char *name, size_t len);
const FieldDef* FindFieldByNumber(uint32_t number) const;
const FieldDef* FindFieldByName(const char* name, size_t len) const;
FieldDef* FindFieldByName(const char *name) {
return FindFieldByName(name, strlen(name));
const FieldDef* FindFieldByName(const char *name) const {
return FindFieldByName(name, strlen(name));
template <class T>
FieldDef* FindFieldByName(const T& str) {
return FindFieldByName(str.c_str(), str.size());
template <class T>
const FieldDef* FindFieldByName(const T& str) const {
return FindFieldByName(str.c_str(), str.size());
OneofDef* FindOneofByName(const char* name, size_t len);
const OneofDef* FindOneofByName(const char* name, size_t len) const;
OneofDef* FindOneofByName(const char* name) {
return FindOneofByName(name, strlen(name));
const OneofDef* FindOneofByName(const char* name) const {
return FindOneofByName(name, strlen(name));
template<class T>
OneofDef* FindOneofByName(const T& str) {
return FindOneofByName(str.c_str(), str.size());
template<class T>
const OneofDef* FindOneofByName(const T& str) const {
return FindOneofByName(str.c_str(), str.size());
/* Is this message a map entry? */
void setmapentry(bool map_entry);
bool mapentry() const;
/* Iteration over fields. The order is undefined. */
class field_iterator
: public std::iterator<std::forward_iterator_tag, FieldDef*> {
explicit field_iterator(MessageDef* md);
static field_iterator end(MessageDef* md);
void operator++();
FieldDef* operator*() const;
bool operator!=(const field_iterator& other) const;
bool operator==(const field_iterator& other) const;
upb_msg_field_iter iter_;
class const_field_iterator
: public std::iterator<std::forward_iterator_tag, const FieldDef*> {
explicit const_field_iterator(const MessageDef* md);
static const_field_iterator end(const MessageDef* md);
void operator++();
const FieldDef* operator*() const;
bool operator!=(const const_field_iterator& other) const;
bool operator==(const const_field_iterator& other) const;
upb_msg_field_iter iter_;
/* Iteration over oneofs. The order is undefined. */
class oneof_iterator
: public std::iterator<std::forward_iterator_tag, FieldDef*> {
explicit oneof_iterator(MessageDef* md);
static oneof_iterator end(MessageDef* md);
void operator++();
OneofDef* operator*() const;
bool operator!=(const oneof_iterator& other) const;
bool operator==(const oneof_iterator& other) const;
upb_msg_oneof_iter iter_;
class const_oneof_iterator
: public std::iterator<std::forward_iterator_tag, const FieldDef*> {
explicit const_oneof_iterator(const MessageDef* md);
static const_oneof_iterator end(const MessageDef* md);
void operator++();
const OneofDef* operator*() const;
bool operator!=(const const_oneof_iterator& other) const;
bool operator==(const const_oneof_iterator& other) const;
upb_msg_oneof_iter iter_;
class FieldAccessor {
explicit FieldAccessor(MessageDef* msg) : msg_(msg) {}
field_iterator begin() { return msg_->field_begin(); }
field_iterator end() { return msg_->field_end(); }
MessageDef* msg_;
class ConstFieldAccessor {
explicit ConstFieldAccessor(const MessageDef* msg) : msg_(msg) {}
const_field_iterator begin() { return msg_->field_begin(); }
const_field_iterator end() { return msg_->field_end(); }
const MessageDef* msg_;
class OneofAccessor {
explicit OneofAccessor(MessageDef* msg) : msg_(msg) {}
oneof_iterator begin() { return msg_->oneof_begin(); }
oneof_iterator end() { return msg_->oneof_end(); }
MessageDef* msg_;
class ConstOneofAccessor {
explicit ConstOneofAccessor(const MessageDef* msg) : msg_(msg) {}
const_oneof_iterator begin() { return msg_->oneof_begin(); }
const_oneof_iterator end() { return msg_->oneof_end(); }
const MessageDef* msg_;
field_iterator field_begin();
field_iterator field_end();
const_field_iterator field_begin() const;
const_field_iterator field_end() const;
oneof_iterator oneof_begin();
oneof_iterator oneof_end();
const_oneof_iterator oneof_begin() const;
const_oneof_iterator oneof_end() const;
FieldAccessor fields() { return FieldAccessor(this); }
ConstFieldAccessor fields() const { return ConstFieldAccessor(this); }
OneofAccessor oneofs() { return OneofAccessor(this); }
ConstOneofAccessor oneofs() const { return ConstOneofAccessor(this); }
UPB_DISALLOW_POD_OPS(MessageDef, upb::MessageDef)
#endif /* __cplusplus */
/* Returns NULL if memory allocation failed. */
upb_msgdef *upb_msgdef_new(const void *owner);
/* Include upb_refcounted methods like upb_msgdef_ref(). */
UPB_REFCOUNTED_CMETHODS(upb_msgdef, upb_msgdef_upcast2)
bool upb_msgdef_freeze(upb_msgdef *m, upb_status *status);
const char *upb_msgdef_fullname(const upb_msgdef *m);
const char *upb_msgdef_name(const upb_msgdef *m);
int upb_msgdef_numoneofs(const upb_msgdef *m);
upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m);
bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor,
upb_status *s);
bool upb_msgdef_addoneof(upb_msgdef *m, upb_oneofdef *o, const void *ref_donor,
upb_status *s);
bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname, upb_status *s);
void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry);
bool upb_msgdef_mapentry(const upb_msgdef *m);
bool upb_msgdef_setsyntax(upb_msgdef *m, upb_syntax_t syntax);
/* Field lookup in a couple of different variations:
* - itof = int to field
* - ntof = name to field
* - ntofz = name to field, null-terminated string. */
const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i);
const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
size_t len);
int upb_msgdef_numfields(const upb_msgdef *m);
UPB_INLINE const upb_fielddef *upb_msgdef_ntofz(const upb_msgdef *m,
const char *name) {
return upb_msgdef_ntof(m, name, strlen(name));
UPB_INLINE upb_fielddef *upb_msgdef_itof_mutable(upb_msgdef *m, uint32_t i) {
return (upb_fielddef*)upb_msgdef_itof(m, i);
UPB_INLINE upb_fielddef *upb_msgdef_ntof_mutable(upb_msgdef *m,
const char *name, size_t len) {
return (upb_fielddef *)upb_msgdef_ntof(m, name, len);
/* Oneof lookup:
* - ntoo = name to oneof
* - ntooz = name to oneof, null-terminated string. */
const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
size_t len);
int upb_msgdef_numoneofs(const upb_msgdef *m);
UPB_INLINE const upb_oneofdef *upb_msgdef_ntooz(const upb_msgdef *m,
const char *name) {
return upb_msgdef_ntoo(m, name, strlen(name));
UPB_INLINE upb_oneofdef *upb_msgdef_ntoo_mutable(upb_msgdef *m,
const char *name, size_t len) {
return (upb_oneofdef *)upb_msgdef_ntoo(m, name, len);
/* Lookup of either field or oneof by name. Returns whether either was found.
* If the return is true, then the found def will be set, and the non-found
* one set to NULL. */
bool upb_msgdef_lookupname(const upb_msgdef *m, const char *name, size_t len,
const upb_fielddef **f, const upb_oneofdef **o);
UPB_INLINE bool upb_msgdef_lookupnamez(const upb_msgdef *m, const char *name,
const upb_fielddef **f,
const upb_oneofdef **o) {
return upb_msgdef_lookupname(m, name, strlen(name), f, o);
/* Iteration over fields and oneofs. For example:
* upb_msg_field_iter i;
* for(upb_msg_field_begin(&i, m);
* !upb_msg_field_done(&i);
* upb_msg_field_next(&i)) {
* upb_fielddef *f = upb_msg_iter_field(&i);
* // ...
* }
* For C we don't have separate iterators for const and non-const.
* It is the caller's responsibility to cast the upb_fielddef* to
* const if the upb_msgdef* is const. */
void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m);
void upb_msg_field_next(upb_msg_field_iter *iter);
bool upb_msg_field_done(const upb_msg_field_iter *iter);
upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter);
void upb_msg_field_iter_setdone(upb_msg_field_iter *iter);
/* Similar to above, we also support iterating through the oneofs in a
* msgdef. */
void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m);
void upb_msg_oneof_next(upb_msg_oneof_iter *iter);
bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter);
upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter);
void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter);
/* upb::EnumDef ***************************************************************/
typedef upb_strtable_iter upb_enum_iter;
#ifdef __cplusplus
/* Class that represents an enum. Its base class is upb::Def (convert with
* upb::upcast()). */
class upb::EnumDef {
/* Returns NULL if memory allocation failed. */
static reffed_ptr<EnumDef> New();
/* upb::RefCounted methods like Ref()/Unref(). */
/* Functionality from upb::Def. */
const char* full_name() const;
const char* name() const;
bool set_full_name(const char* fullname, Status* s);
bool set_full_name(const std::string& fullname, Status* s);
/* Call to freeze this EnumDef. */
bool Freeze(Status* s);
/* The value that is used as the default when no field default is specified.
* If not set explicitly, the first value that was added will be used.
* The default value must be a member of the enum.
* Requires that value_count() > 0. */
int32_t default_value() const;
/* Sets the default value. If this value is not valid, returns false and an
* error message in status. */
bool set_default_value(int32_t val, Status* status);
/* Returns the number of values currently defined in the enum. Note that
* multiple names can refer to the same number, so this may be greater than
* the total number of unique numbers. */
int value_count() const;
/* Adds a single name/number pair to the enum. Fails if this name has
* already been used by another value. */
bool AddValue(const char* name, int32_t num, Status* status);
bool AddValue(const std::string& name, int32_t num, Status* status);
/* Lookups from name to integer, returning true if found. */
bool FindValueByName(const char* name, int32_t* num) const;
/* Finds the name corresponding to the given number, or NULL if none was
* found. If more than one name corresponds to this number, returns the
* first one that was added. */
const char* FindValueByNumber(int32_t num) const;
/* Iteration over name/value pairs. The order is undefined.
* Adding an enum val invalidates any iterators.
* TODO: make compatible with range-for, with elements as pairs? */
class Iterator {
explicit Iterator(const EnumDef*);