Rendering animated models

When these rendering a model (.obj or .gltf) the vertices contain information on what triangles to draw, what their normals are and how textures should be applied. Unpacking an .obj there were different resources available for odin. And while cgltf is an “official” vendor library packaged with odin, there weren’t a lot of examples to learn how to use it. The odin types were helpful in decyphering what kind of data it contains.

Reading data using cgltf

When rendering a static model you first need to find the vertices.

model, result := cgltf.parse_file({}, "./my-model.glb")
 
// Important, model will not have any actual data if you only parse the file
ok := cgltf.load_buffers({}, model, "./my-model.glb")

Hierarchy of nodes

The model contains scenes, which contain nodes (model.scenes: []Node). Nodes determine the hierarchy of meshes. The torso is a parent of the arms, arms are the parent of a hand, etc.

// Create this struct for every node
NodeInfo :: struct {
    index:       int,
    children:    [dynamic]int,
    parent:      int,
    translation: Vec3,
    rotation:    quaternion128,
    scale:       Vec3,
}
// Start with index 1 to have 0 mean "no parent"
node_i := 1
 
// Map to link node name to index
node_indices := make(map[cstring]int)
 
for scene in data.scenes {
    nodes := make([dynamic]^cgltf.node)
 
    // stack based tree traversal of all the nodes
    for node in scene.nodes {
        // Start with the top level nodes
        append(&nodes, node)
    }
 
    for n in nodes {
        // fall back to identity for translation, rotation, scale
        translation := n.has_translation ? n.translation : {0, 0, 0}
        scale := n.has_scale ? n.scale : {1, 1, 1}
        rotation :=
            n.has_rotation ? transmute(quaternion128)n.rotation : linalg.QUATERNIONF32_IDENTITY
 
        parent_index: int = 0
        if n.parent != nil {
            // Get index of parent name
            i, ok := node_indices[n.parent.name]
            // Add this node as a child on the parent
            parent_node := &node_infos[i]
            append(&parent_node.children, node_i)
 
            // Set parent index for this node
            parent_index = parent_node.index
        }
 
        // Update global node name -> index map
        node_indices[n.name] = node_i
 
        node_infos[node_i] = {
            index       = node_i,
            parent      = parent_index,
            translation = translation,
            rotation    = rotation,
            scale       = scale,
            children    = make([dynamic]int),
        }
 
        node_i += 1
 
        // Add children to the stack to process later
        for node in n.children {
            append(&nodes, node)
        }
    }
}

Mesh per node

In model.meshes there are primitives (triangles) that contain actual information about vertices. The name of a mesh corresponds with the name of the nodes in the hierarchy. To extract all the vertex information cgltf has a thing called accessors.

// We convert data from the model to:
Vertex :: struct {
    pos:    Vec3,
    color:  Vec4,
    uv:     Vec2,
    normal: Vec3,
 
    // The index in the node hierarchy
    node_index: u32
}
 
for prim in mesh.primitives {
    pos_accessor: ^cgltf.accessor
    norm_accessor: ^cgltf.accessor
    uv_accessor: ^cgltf.accessor
 
    // Get accessors for attributes you are interested in
    for attr in prim.attributes {
        #partial switch attr.type {
        case cgltf.attribute_type.position:
            pos_accessor = attr.data
        case cgltf.attribute_type.normal:
            norm_accessor = attr.data
        case cgltf.attribute_type.texcoord:
            uv_accessor = attr.data
        }
    }
 
    vertex_count := pos_accessor.count
    for mesh: uint = 0; mesh <= vertex_count; mesh += 1 {
        v: Vertex = {
            // default data, we don't extract color here
            color      = {1, 1, 1, 1},
            node_index = u32(node_info.index),
        }
 
        // write 3 floats from the buffer to a pointer to the Vertex
        ok := cgltf.accessor_read_float(
            pos_accessor,
            mesh,
            &v.pos[0],
            3,
        );assert(ok == true)
 
        if norm_accessor != nil {
            ok := cgltf.accessor_read_float(
                norm_accessor,
                mesh,
                &v.normal[0],
                3,
            );assert(ok == true)
        }
 
        if uv_accessor != nil {
            ok := cgltf.accessor_read_float(
                uv_accessor,
                mesh,
                &v.uv[0],
                2,
            );assert(ok == true)
        }
 
        append(&vertices, v)
    }
 
    if prim.indices != nil {
        for i: uint = 0; i < prim.indices.count; i += 1 {
            // indices work slightly differently, they don't need a pointer but return the int directly
            index := cgltf.accessor_read_index(prim.indices, i)
            append(&indices, u16(index + vertex_buffer_len))
        }
    }
}

Positioning nodes in model space

Once you get here you have hierarchy of node, and a list of vertices which includes which node the vertex belongs to. You need the hierarchy because when you render these vertices, you will notice that every node has it’s own local space. That means every arm, leg and torso will be rendered at 0,0 over each other. To fix this you need to multiply every node’s transform with it’s parent transform to position it in the model space. You upload that list of matrices to the gpu, where the index of the matrix is the vertex attribute node_index where the matrix should be applied to.

node_indices : map[cstring]int
node_infos : []NodeInfo
 
// We want to upload every matrix (multiplied with all it's parent matrices) per node
storage := make([dynamic]Mat4, len(node_indices) + 1)
 
for key, i in node_indices {
    // For every node in the hierarchy
    node_info := node_infos[i]
    tf := linalg.MATRIX4F32_IDENTITY
    parent := node_info.parent
    // temp list of all parent matrices
    t := make([dynamic]Mat4, context.temp_allocator)
 
    // 0 means "no parent", so recursively go through all parents
    for parent != 0 {
        parent_node := node_infos[parent]
        tf = linalg.matrix4_from_trs_f32(
            parent_node.translation,
            parent_node.rotation,
            parent_node.scale
        )
 
        append(&t, tf)
 
        parent = parent_node.parent
    }
 
    // multiply the hierarchy of matrices
    #reverse for parent_tf in t {
        tf = parent_tf * tf
    }
 
    tf = linalg.matrix4_from_trs_f32(
        node_info.translation,
        node_info.rotation,
        node_info.scale
    )
    // Last multiply it with this node's transform
    storage[i] = tf * anim_tf
 
}
// Create a storage buffer of these matrices
 
storage_buffer = sg.make_buffer(
    {type = .STORAGEBUFFER, data = sg_range(storage[:])},
)
g.bind.storage_buffers[1] = storage_buffer

In the shader you can multiply the vertex with the correct transform:

 
layout(binding = 1) readonly buffer node_offsets_buffer {
    NodeOffset node_offsets[];
};
// Vertex attribute for node index
in uint node_index;
 
 
// In the main vertex fn:
// mvp * node transform * position of model in world
gl_Position = mvp * node_offsets[node_index].offset * vec4(pos.xyz, 1);

Animation

Now there is a model displayed (most likely in a static T-Pose as that seems to be the standard.) Some model also include animation data. The simplest type of animation that is described here is animation of the transforms for every node. There is also something called skinning which involves vertices being influenced by multiple bones. When/if I get to implementing that I’ll update this note.

First step is to get the data from the model in this struct:

Every model can have multiple animations
MeshAnimationData :: struct {
    duration: f32,
    channels: []MeshAnimationChannelData,
}
 
MeshAnimationChannelData :: struct {
    times_translations: []f32,
    translations:       []Vec3,
    times_rotations:    []f32,
    rotations:          []quaternion128,
    times_scales:       []f32,
    scales:             []Vec3,
}

A channel is the change in a property of a node over time. So left-leg’s rotation will change from 0-90 degrees from 0-1 in time. All channels combined make the model’s nodes move over time.

for anim in data.animations {
    channels := make(
        [dynamic]MeshAnimationChannelData,
        len(node_indices) + 1,
    )
 
    for &channel in anim.channels {
        times: [dynamic]f32 = make(
            [dynamic]f32,
            channel.sampler.input.count,
        )
        node_index := node_indices[channel.target_node.name]
        // Every channel that targets a node goes in to the same channel data structure
        node_channel := &channels[node_index]
 
        #partial switch channel.target_path {
        case cgltf.animation_path_type.translation:
            translations := make([dynamic]Vec3)
            // sampling it uses the same accessor structure as getting node transform data
            sample_it(&channel, &times, &translations, 3)
            node_channel.translations = translations[:]
            node_channel.times_translations = times[:]
 
        case cgltf.animation_path_type.rotation:
            rotations := make([dynamic]Vec4)
            sample_it(&channel, &times, &rotations, 4)
            rotations_q := make([dynamic]quaternion128)
            for r in rotations {
                append(&rotations_q, transmute(quaternion128)r)
            }
            node_channel.rotations = rotations_q[:]
            node_channel.times_rotations = times[:]
        case cgltf.animation_path_type.scale:
            scales := make([dynamic]Vec3)
            sample_it(&channel, &times, &scales, 3)
            node_channel.scales = scales[:]
            node_channel.times_scales = times[:]
        }
    }
    // To find the duration of the animation we find the highest times value in any of the channels
    max: f32 = 0
    for i in channels {
        last_times_rotations := len(i.times_rotations) - 1
        if last_times_rotations > 0 &&
           max < i.times_rotations[last_times_rotations] {
            max = i.times_rotations[len(i.times_rotations) - 1]
        }
 
        last_times_translations := len(i.times_translations) - 1
        if last_times_translations > 0 &&
           max < i.times_translations[last_times_translations] {
            max = i.times_translations[len(i.times_translations) - 1]
        }
 
        last_times_scales := len(i.times_scales) - 1
        if last_times_scales > 0 &&
           max < i.times_scales[last_times_scales] {
            max = i.times_scales[len(i.times_scales) - 1]
        }
    }
 
    animation := MeshAnimationData {
        channels = channels[:],
        duration = max,
    }
}

Second step is a minor change to the transform matrices that we upload per node. Animation is stored as positions over time, and a list of times. So if we imagine it as a one dimension change it would be a height of [0, 10, 15, 16] And each height position would have a corresponding time component: [0.1s, 0.4s, 0.6s, 1.0s]

We define the current time of the animation we want to play, say 0.5. In this example we would interpolate between 10, 15 (0.4 and 0.6 second keyframes.) Interpolate by the factor of where you are between the two keyframes in time (0.4 -> 0.6, 0.5 is right in the middle so the factor is 0.5.) Animations include the base position of the property they are transforming, so you don’t need the base node translation if the animation is animating the translation. So we use the node TRS as the default and override it with the animated version when it exists. This prevents double applying of translations, rotations and scales.

interpolate_animation :: proc(
    anim_data: MeshAnimationChannelData,
    t: f32,
    default_translation: Vec3,
    default_rotation: quaternion128,
    default_scale: Vec3,
) -> Mat4 {
    translation := default_translation
    rotation := default_rotation
    scale := default_scale
    if len(anim_data.translations) > 0 {
        i1, i2 := find_keyframe_indices(anim_data.times_translations, t)
        time0 := anim_data.times_translations[i1]
        time1 := anim_data.times_translations[i2]
        factor: f32 = 0.0
        if time1 > time0 {
            factor = (t - time0) / (time1 - time0)
        }
        v1 := anim_data.translations[i1]
        v2 := anim_data.translations[i2]
        translation = linalg.lerp(v1, v2, factor)
    }
    if len(anim_data.scales) > 0 {
        i1, i2 := find_keyframe_indices(anim_data.times_scales, t)
        time0 := anim_data.times_scales[i1]
        time1 := anim_data.times_scales[i2]
        factor: f32 = 0.0
        if time1 > time0 {
            factor = (t - time0) / (time1 - time0)
        }
        v1 := anim_data.scales[i1]
        v2 := anim_data.scales[i2]
        scale = linalg.lerp(v1, v2, factor)
    }
    if len(anim_data.rotations) > 0 {
        i1, i2 := find_keyframe_indices(anim_data.times_rotations, t)
        time0 := anim_data.times_rotations[i1]
        time1 := anim_data.times_rotations[i2]
        factor: f32 = 0.0
        if time1 > time0 {
            factor = (t - time0) / (time1 - time0)
        }
        v1 := anim_data.rotations[i1]
        v2 := anim_data.rotations[i2]
        rotation = linalg.quaternion_slerp_f32(v1, v2, factor)
    }
 
    return linalg.matrix4_from_trs(translation, rotation, scale)
}

The end result is mostly similar, but use animation transforms when they exist.

node_indices : map[cstring]int
node_infos : []NodeInfo
storage := make([dynamic]Mat4, len(node_indices) + 1)
 
for _key, i in node_indices {
    node_info := node_infos[i]
    tf := linalg.MATRIX4F32_IDENTITY
    parent := node_info.parent
    t := make([dynamic]Mat4, context.temp_allocator)
 
    for parent != 0 {
        parent_node := node_infos[parent]
        // Instead of using the node transform, we interpolate
        anim_tf := interpolate_animation(
            mesh_anim_data[entity.mesh][entity.animation].channels[parent],
            entity.animation_t,
            parent_node.translation,
            parent_node.rotation,
            parent_node.scale,
        )
 
        append(&t, anim_tf)
 
        parent = parent_node.parent
 
 
    }
    #reverse for parent_tf in t {
        tf = parent_tf * tf
    }
    // Instead of using the node transform, we interpolate
    anim_tf := interpolate_animation(
        mesh_anim_data[entity.mesh][entity.animation].channels[node_info.index],
        entity.animation_t,
        node_info.translation,
        node_info.rotation,
        node_info.scale,
    )
    storage[i] = tf * anim_tf
 
}
 
storage_buffer = sg.make_buffer(
    {type = .STORAGEBUFFER, data = sg_range(storage[:])},
)
g.bind.storage_buffers[1] = storage_buffer
 
entity.animation_t += dt
if entity.animation_t >
   mesh_anim_data[entity.mesh][entity.animation].duration {
    entity.animation_t = 0
}

mahulst.dev