#version 450
layout(location=0) in vec2 v_tex_coords;
layout(location=1) in vec3 v_color;
layout(location=0) out vec4 f_color;
layout(set = 0, binding = 0) uniform texture2D t_diffuse;
layout(set = 0, binding = 1) uniform sampler s_diffuse;
void main() {
f_color = texture(sampler2D(t_diffuse, s_diffuse), v_tex_coords);
// f_color = vec4(v_color, 1);

use winit::{
event_loop::{EventLoop, ControlFlow},
window::{Window, WindowBuilder},
use cgmath::prelude::*;
#[derive(Copy, Clone, Debug)]
struct Vertex {
position: [f32; 3],
tex_coords: [f32; 2],
impl Vertex {
fn desc<'a>() -> wgpu::VertexBufferDescriptor<'a> {
use std::mem;
wgpu::VertexBufferDescriptor {
stride: mem::size_of::<Vertex>() as wgpu::BufferAddress,
step_mode: wgpu::InputStepMode::Vertex,
attributes: &[
wgpu::VertexAttributeDescriptor {
offset: 0,
shader_location: 0,
format: wgpu::VertexFormat::Float3,
wgpu::VertexAttributeDescriptor {
offset: mem::size_of::<[f32; 3]>() as wgpu::BufferAddress,
shader_location: 1,
format: wgpu::VertexFormat::Float2,
const VERTICES: &[Vertex] = &[
Vertex { position: [-0.0868241, -0.49240386, 0.0], tex_coords: [1.0 - 0.4131759, 1.0 - 0.00759614], }, // A
Vertex { position: [-0.49513406, -0.06958647, 0.0], tex_coords: [1.0 - 0.0048659444, 1.0 - 0.43041354], }, // B
Vertex { position: [-0.21918549, 0.44939706, 0.0], tex_coords: [1.0 - 0.28081453, 1.0 - 0.949397057], }, // C
Vertex { position: [0.35966998, 0.3473291, 0.0], tex_coords: [1.0 - 0.85967, 1.0 - 0.84732911], }, // D
Vertex { position: [0.44147372, -0.2347359, 0.0], tex_coords: [1.0 - 0.9414737, 1.0 - 0.2652641], }, // E
const INDICES: &[u16] = &[
0, 1, 4,
1, 2, 4,
2, 3, 4,
#[cfg_attr(rustfmt, rustfmt_skip)]
pub const OPENGL_TO_WGPU_MATRIX: cgmath::Matrix4<f32> = cgmath::Matrix4::new(
1.0, 0.0, 0.0, 0.0,
0.0, -1.0, 0.0, 0.0,
0.0, 0.0, 0.5, 0.0,
0.0, 0.0, 0.5, 1.0,
const NUM_INSTANCES_PER_ROW: u32 = 10;
const INSTANCE_DISPLACEMENT: cgmath::Vector3<f32> = cgmath::Vector3::new(NUM_INSTANCES_PER_ROW as f32 * 0.5, 0.0, NUM_INSTANCES_PER_ROW as f32 * 0.5);
struct Camera {
eye: cgmath::Point3<f32>,
target: cgmath::Point3<f32>,
up: cgmath::Vector3<f32>,
aspect: f32,
fovy: f32,
znear: f32,
zfar: f32,
impl Camera {
fn build_view_projection_matrix(&self) -> cgmath::Matrix4<f32> {
let view = cgmath::Matrix4::look_at(self.eye,, self.up);
let proj = cgmath::perspective(cgmath::Deg(self.fovy), self.aspect, self.znear, self.zfar);
return proj * view;
#[derive(Copy, Clone)]
struct Uniforms {
view_proj: cgmath::Matrix4<f32>,
impl Uniforms {
fn new() -> Self {
Self {
view_proj: cgmath::Matrix4::identity(),
fn update_view_proj(&mut self, camera: &Camera) {
self.view_proj = OPENGL_TO_WGPU_MATRIX * camera.build_view_projection_matrix();
struct CameraController {
speed: f32,
is_up_pressed: bool,
is_down_pressed: bool,
is_forward_pressed: bool,
is_backward_pressed: bool,
is_left_pressed: bool,
is_right_pressed: bool,
impl CameraController {
fn new(speed: f32) -> Self {
Self {
is_up_pressed: false,
is_down_pressed: false,
is_forward_pressed: false,
is_backward_pressed: false,
is_left_pressed: false,
is_right_pressed: false,
fn process_events(&mut self, event: &WindowEvent) -> bool {
match event {
WindowEvent::KeyboardInput {
input: KeyboardInput {
virtual_keycode: Some(keycode),
} => {
let is_pressed = *state == ElementState::Pressed;
match keycode {
VirtualKeyCode::Space => {
self.is_up_pressed = is_pressed;
VirtualKeyCode::LShift => {
self.is_down_pressed = is_pressed;
VirtualKeyCode::W | VirtualKeyCode::Up => {
self.is_forward_pressed = is_pressed;
VirtualKeyCode::A | VirtualKeyCode::Left => {
self.is_left_pressed = is_pressed;
VirtualKeyCode::S | VirtualKeyCode::Down => {
self.is_backward_pressed = is_pressed;
VirtualKeyCode::D | VirtualKeyCode::Right => {
self.is_right_pressed = is_pressed;
_ => false,
_ => false,
fn update_camera(&self, camera: &mut Camera) {
let forward = ( - camera.eye).normalize();
if self.is_forward_pressed {
camera.eye += forward * self.speed;
if self.is_backward_pressed {
camera.eye -= forward * self.speed;
let right = forward.cross(camera.up);
if self.is_right_pressed {
camera.eye += right * self.speed;
if self.is_left_pressed {
camera.eye -= right * self.speed;
struct Instance {
position: cgmath::Vector3<f32>,
rotation: cgmath::Quaternion<f32>,
impl Instance {
fn to_matrix(&self) -> cgmath::Matrix4<f32> {
cgmath::Matrix4::from_translation(self.position) * cgmath::Matrix4::from(self.rotation)
struct State {
surface: wgpu::Surface,
device: wgpu::Device,
queue: wgpu::Queue,
sc_desc: wgpu::SwapChainDescriptor,
swap_chain: wgpu::SwapChain,
render_pipeline: wgpu::RenderPipeline,
vertex_buffer: wgpu::Buffer,
index_buffer: wgpu::Buffer,
num_indices: u32,
diffuse_texture: wgpu::Texture,
diffuse_texture_view: wgpu::TextureView,
diffuse_sampler: wgpu::Sampler,
diffuse_bind_group: wgpu::BindGroup,
camera: Camera,
camera_controller: CameraController,
uniforms: Uniforms,
uniform_buffer: wgpu::Buffer,
uniform_bind_group: wgpu::BindGroup,
hidpi_factor: f64,
size: winit::dpi::LogicalSize,
instances: Vec<Instance>,
instance_buffer: wgpu::Buffer,
impl State {
fn new(window: &Window) -> Self {
let hidpi_factor = window.hidpi_factor();
let size = window.inner_size();
let physical_size = size.to_physical(hidpi_factor);
let surface = wgpu::Surface::create(window);
let adapter = wgpu::Adapter::request(&wgpu::RequestAdapterOptions {
let (device, mut queue) = adapter.request_device(&wgpu::DeviceDescriptor {
extensions: wgpu::Extensions {
anisotropic_filtering: false,
limits: Default::default(),
let sc_desc = wgpu::SwapChainDescriptor {
usage: wgpu::TextureUsage::OUTPUT_ATTACHMENT,
format: wgpu::TextureFormat::Bgra8UnormSrgb,
width: physical_size.width.round() as u32,
height: physical_size.height.round() as u32,
present_mode: wgpu::PresentMode::Vsync,
let swap_chain = device.create_swap_chain(&surface, &sc_desc);
let diffuse_bytes = include_bytes!("happy-tree.png");
let diffuse_image = image::load_from_memory(diffuse_bytes).unwrap();
let diffuse_rgba = diffuse_image.as_rgba8().unwrap();
use image::GenericImageView;
let dimensions = diffuse_image.dimensions();
let size3d = wgpu::Extent3d {
width: dimensions.0,
height: dimensions.1,
depth: 1,
let diffuse_texture = device.create_texture(&wgpu::TextureDescriptor {
size: size3d,
array_layer_count: 1,
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format: wgpu::TextureFormat::Rgba8UnormSrgb,
usage: wgpu::TextureUsage::SAMPLED | wgpu::TextureUsage::COPY_DST,
let diffuse_buffer = device
.create_buffer_mapped(diffuse_rgba.len(), wgpu::BufferUsage::COPY_SRC)
let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
todo: 0,
wgpu::BufferCopyView {
buffer: &diffuse_buffer,
offset: 0,
row_pitch: 4 * dimensions.0,
image_height: dimensions.1,
wgpu::TextureCopyView {
texture: &diffuse_texture,
mip_level: 0,
array_layer: 0,
origin: wgpu::Origin3d::ZERO,
let diffuse_texture_view = diffuse_texture.create_default_view();
let diffuse_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
address_mode_u: wgpu::AddressMode::ClampToEdge,
address_mode_v: wgpu::AddressMode::ClampToEdge,
address_mode_w: wgpu::AddressMode::ClampToEdge,
mag_filter: wgpu::FilterMode::Linear,
min_filter: wgpu::FilterMode::Nearest,
mipmap_filter: wgpu::FilterMode::Nearest,
lod_min_clamp: -100.0,
lod_max_clamp: 100.0,
compare_function: wgpu::CompareFunction::Always,
let texture_bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
bindings: &[
wgpu::BindGroupLayoutBinding {
binding: 0,
visibility: wgpu::ShaderStage::FRAGMENT,
ty: wgpu::BindingType::SampledTexture {
multisampled: false,
dimension: wgpu::TextureViewDimension::D2,
wgpu::BindGroupLayoutBinding {
binding: 1,
visibility: wgpu::ShaderStage::FRAGMENT,
ty: wgpu::BindingType::Sampler,
let diffuse_bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
layout: &texture_bind_group_layout,
bindings: &[
wgpu::Binding {
binding: 0,
resource: wgpu::BindingResource::TextureView(&diffuse_texture_view),
wgpu::Binding {
binding: 1,
resource: wgpu::BindingResource::Sampler(&diffuse_sampler),
let camera = Camera {
eye: (0.0, 5.0, -10.0).into(),
target: (0.0, 0.0, 0.0).into(),
up: cgmath::Vector3::unit_y(),
aspect: sc_desc.width as f32 / sc_desc.height as f32,
fovy: 45.0,
znear: 0.1,
zfar: 100.0,
let camera_controller = CameraController::new(0.2);
let mut uniforms = Uniforms::new();
let uniform_buffer = device
.create_buffer_mapped(1, wgpu::BufferUsage::UNIFORM | wgpu::BufferUsage::COPY_DST)
let instances = (0..NUM_INSTANCES_PER_ROW).flat_map(|z| {
(0..NUM_INSTANCES_PER_ROW).map(move |x| {
let position = cgmath::Vector3 { x: x as f32, y: 0.0, z: z as f32 } - INSTANCE_DISPLACEMENT;
let rotation = if position.is_zero() {
// this is needed so an object at (0, 0, 0) won't get scaled to zero
// as Quaternions can effect scale if they're not create correctly
cgmath::Quaternion::from_axis_angle(cgmath::Vector3::unit_z(), cgmath::Deg(0.0))
} else {
cgmath::Quaternion::from_axis_angle(position.clone().normalize(), cgmath::Deg(45.0))
Instance {
position, rotation,
let instance_data = instances.iter().map(Instance::to_matrix).collect::<Vec<_>>();
let instance_buffer_size = instance_data.len() * std::mem::size_of::<cgmath::Matrix4<f32>>();
let instance_buffer = device
.create_buffer_mapped(instance_data.len(), wgpu::BufferUsage::STORAGE_READ)
let uniform_bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
bindings: &[
wgpu::BindGroupLayoutBinding {
binding: 0,
visibility: wgpu::ShaderStage::VERTEX,
ty: wgpu::BindingType::UniformBuffer {
dynamic: false,
wgpu::BindGroupLayoutBinding {
binding: 1,
visibility: wgpu::ShaderStage::VERTEX,
ty: wgpu::BindingType::StorageBuffer {
dynamic: false,
readonly: true,
let uniform_bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
layout: &uniform_bind_group_layout,
bindings: &[
wgpu::Binding {
binding: 0,
resource: wgpu::BindingResource::Buffer {
buffer: &uniform_buffer,
range: 0..std::mem::size_of_val(&uniforms) as wgpu::BufferAddress,
wgpu::Binding {
binding: 1,
resource: wgpu::BindingResource::Buffer {
buffer: &instance_buffer,
range: 0..instance_buffer_size as wgpu::BufferAddress,
let vs_src = include_str!("storage.vert");
let fs_src = include_str!("shader.frag");
let vs_spirv = glsl_to_spirv::compile(vs_src, glsl_to_spirv::ShaderType::Vertex).unwrap();
let fs_spirv = glsl_to_spirv::compile(fs_src, glsl_to_spirv::ShaderType::Fragment).unwrap();
let vs_data = wgpu::read_spirv(vs_spirv).unwrap();
let fs_data = wgpu::read_spirv(fs_spirv).unwrap();
let vs_module = device.create_shader_module(&vs_data);
let fs_module = device.create_shader_module(&fs_data);
let render_pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
bind_group_layouts: &[&texture_bind_group_layout, &uniform_bind_group_layout],
let render_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
layout: &render_pipeline_layout,
vertex_stage: wgpu::ProgrammableStageDescriptor {
module: &vs_module,
entry_point: "main",
fragment_stage: Some(wgpu::ProgrammableStageDescriptor {
module: &fs_module,
entry_point: "main",
rasterization_state: Some(wgpu::RasterizationStateDescriptor {
front_face: wgpu::FrontFace::Ccw,
cull_mode: wgpu::CullMode::Back,
depth_bias: 0,
depth_bias_slope_scale: 0.0,
depth_bias_clamp: 0.0,
primitive_topology: wgpu::PrimitiveTopology::TriangleList,
color_states: &[
wgpu::ColorStateDescriptor {
format: sc_desc.format,
color_blend: wgpu::BlendDescriptor::REPLACE,
alpha_blend: wgpu::BlendDescriptor::REPLACE,
write_mask: wgpu::ColorWrite::ALL,
depth_stencil_state: None,
index_format: wgpu::IndexFormat::Uint16,
vertex_buffers: &[
sample_count: 1,
sample_mask: !0,
alpha_to_coverage_enabled: false,
let vertex_buffer = device
.create_buffer_mapped(VERTICES.len(), wgpu::BufferUsage::VERTEX)
let index_buffer = device
.create_buffer_mapped(INDICES.len(), wgpu::BufferUsage::INDEX)
let num_indices = INDICES.len() as u32;
Self {
fn update_hidpi_and_resize(&mut self, new_hidpi_factor: f64) {
self.hidpi_factor = new_hidpi_factor;
fn resize(&mut self, new_size: winit::dpi::LogicalSize) {
let physical_size = new_size.to_physical(self.hidpi_factor);
self.size = new_size;
self.sc_desc.width = physical_size.width.round() as u32;
self.sc_desc.height = physical_size.height.round() as u32;
self.swap_chain = self.device.create_swap_chain(&self.surface, &self.sc_desc); = self.sc_desc.width as f32 / self.sc_desc.height as f32;
fn input(&mut self, event: &WindowEvent) -> bool {
fn update(&mut self) {
let mut encoder = self.device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
todo: 0,
let staging_buffer = self.device
.create_buffer_mapped(1, wgpu::BufferUsage::COPY_SRC)
encoder.copy_buffer_to_buffer(&staging_buffer, 0, &self.uniform_buffer, 0, std::mem::size_of::<Uniforms>() as wgpu::BufferAddress);
fn render(&mut self) {
let frame = self.swap_chain.get_next_texture();
let mut encoder = self.device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
todo: 0,
let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
color_attachments: &[
wgpu::RenderPassColorAttachmentDescriptor {
attachment: &frame.view,
resolve_target: None,
load_op: wgpu::LoadOp::Clear,
store_op: wgpu::StoreOp::Store,
clear_color: wgpu::Color {
r: 0.1,
g: 0.2,
b: 0.3,
a: 1.0,
depth_stencil_attachment: None,
render_pass.set_bind_group(0, &self.diffuse_bind_group, &[]);
render_pass.set_bind_group(1, &self.uniform_bind_group, &[]);
render_pass.set_vertex_buffers(0, &[(&self.vertex_buffer, 0)]);
render_pass.set_index_buffer(&self.index_buffer, 0);
render_pass.draw_indexed(0..self.num_indices, 0, 0..self.instances.len() as u32);
fn main() {
let event_loop = EventLoop::new();
let window = WindowBuilder::new()
let mut state = State::new(&window); |event, _, control_flow| {
match event {
Event::WindowEvent {
ref event,
} if window_id == => if state.input(event) {
*control_flow = ControlFlow::Wait;
} else {
match event {
WindowEvent::CloseRequested => *control_flow = ControlFlow::Exit,
WindowEvent::KeyboardInput {
} => {
match input {
KeyboardInput {
state: ElementState::Pressed,
virtual_keycode: Some(VirtualKeyCode::Escape),
} => *control_flow = ControlFlow::Exit,
_ => *control_flow = ControlFlow::Wait,
WindowEvent::Resized(logical_size) => {
*control_flow = ControlFlow::Wait;
WindowEvent::HiDpiFactorChanged(new_hidpi_factor) => {
*control_flow = ControlFlow::Wait;
_ => *control_flow = ControlFlow::Wait,
Event::EventsCleared => {
*control_flow = ControlFlow::Wait;
_ => *control_flow = ControlFlow::Wait,

#version 450
layout(location=0) in vec3 a_position;
layout(location=1) in vec2 a_tex_coords;
layout(location=0) out vec2 v_tex_coords;
layout(set=1, binding=0)
uniform Uniforms {
mat4 u_view_proj;
layout(set=1, binding=1)
buffer Instances {
mat4 s_models[];
void main() {
v_tex_coords = a_tex_coords;
gl_Position = u_view_proj * s_models[gl_InstanceIndex] * vec4(a_position, 1.0);

use winit::{
event_loop::{EventLoop, ControlFlow},
window::{Window, WindowBuilder},
use cgmath::prelude::*;
#[derive(Copy, Clone, Debug)]
struct Vertex {
position: [f32; 3],
tex_coords: [f32; 2],
impl Vertex {
fn desc<'a>() -> wgpu::VertexBufferDescriptor<'a> {
use std::mem;
wgpu::VertexBufferDescriptor {
stride: mem::size_of::<Vertex>() as wgpu::BufferAddress,
step_mode: wgpu::InputStepMode::Vertex,
attributes: &[
wgpu::VertexAttributeDescriptor {
offset: 0,
shader_location: 0,
format: wgpu::VertexFormat::Float3,
wgpu::VertexAttributeDescriptor {
offset: mem::size_of::<[f32; 3]>() as wgpu::BufferAddress,
shader_location: 1,
format: wgpu::VertexFormat::Float2,
const VERTICES: &[Vertex] = &[
Vertex { position: [-0.0868241, -0.49240386, 0.0], tex_coords: [1.0 - 0.4131759, 1.0 - 0.00759614], }, // A
Vertex { position: [-0.49513406, -0.06958647, 0.0], tex_coords: [1.0 - 0.0048659444, 1.0 - 0.43041354], }, // B
Vertex { position: [-0.21918549, 0.44939706, 0.0], tex_coords: [1.0 - 0.28081453, 1.0 - 0.949397057], }, // C
Vertex { position: [0.35966998, 0.3473291, 0.0], tex_coords: [1.0 - 0.85967, 1.0 - 0.84732911], }, // D
Vertex { position: [0.44147372, -0.2347359, 0.0], tex_coords: [1.0 - 0.9414737, 1.0 - 0.2652641], }, // E
const INDICES: &[u16] = &[
0, 1, 4,
1, 2, 4,
2, 3, 4,
#[cfg_attr(rustfmt, rustfmt_skip)]
pub const OPENGL_TO_WGPU_MATRIX: cgmath::Matrix4<f32> = cgmath::Matrix4::new(
1.0, 0.0, 0.0, 0.0,
0.0, -1.0, 0.0, 0.0,
0.0, 0.0, 0.5, 0.0,
0.0, 0.0, 0.5, 1.0,
const NUM_INSTANCES_PER_ROW: u32 = 10;
const INSTANCE_DISPLACEMENT: cgmath::Vector3<f32> = cgmath::Vector3::new(NUM_INSTANCES_PER_ROW as f32 * 0.5, 0.0, NUM_INSTANCES_PER_ROW as f32 * 0.5);
struct Camera {
eye: cgmath::Point3<f32>,
target: cgmath::Point3<f32>,
up: cgmath::Vector3<f32>,
aspect: f32,
fovy: f32,
znear: f32,
zfar: f32,
impl Camera {
fn build_view_projection_matrix(&self) -> cgmath::Matrix4<f32> {
let view = cgmath::Matrix4::look_at(self.eye,, self.up);
let proj = cgmath::perspective(cgmath::Deg(self.fovy), self.aspect, self.znear, self.zfar);
return proj * view;
#[derive(Copy, Clone)]
struct Uniforms {
view_proj: cgmath::Matrix4<f32>,
impl Uniforms {
fn new() -> Self {
Self {
view_proj: cgmath::Matrix4::identity(),
fn update_view_proj(&mut self, camera: &Camera) {
self.view_proj = OPENGL_TO_WGPU_MATRIX * camera.build_view_projection_matrix();
struct CameraController {
speed: f32,
is_up_pressed: bool,
is_down_pressed: bool,
is_forward_pressed: bool,
is_backward_pressed: bool,
is_left_pressed: bool,
is_right_pressed: bool,
impl CameraController {
fn new(speed: f32) -> Self {
Self {
is_up_pressed: false,
is_down_pressed: false,
is_forward_pressed: false,
is_backward_pressed: false,
is_left_pressed: false,
is_right_pressed: false,
fn process_events(&mut self, event: &WindowEvent) -> bool {
match event {
WindowEvent::KeyboardInput {
input: KeyboardInput {
virtual_keycode: Some(keycode),
} => {
let is_pressed = *state == ElementState::Pressed;
match keycode {
VirtualKeyCode::Space => {
self.is_up_pressed = is_pressed;
VirtualKeyCode::LShift => {
self.is_down_pressed = is_pressed;
VirtualKeyCode::W | VirtualKeyCode::Up => {
self.is_forward_pressed = is_pressed;
VirtualKeyCode::A | VirtualKeyCode::Left => {
self.is_left_pressed = is_pressed;
VirtualKeyCode::S | VirtualKeyCode::Down => {
self.is_backward_pressed = is_pressed;
VirtualKeyCode::D | VirtualKeyCode::Right => {
self.is_right_pressed = is_pressed;
_ => false,
_ => false,
fn update_camera(&self, camera: &mut Camera) {
let forward = ( - camera.eye).normalize();
if self.is_forward_pressed {
camera.eye += forward * self.speed;
if self.is_backward_pressed {
camera.eye -= forward * self.speed;
let right = forward.cross(camera.up);
if self.is_right_pressed {
camera.eye += right * self.speed;
if self.is_left_pressed {
camera.eye -= right * self.speed;
struct Instance {
position: cgmath::Vector3<f32>,
rotation: cgmath::Quaternion<f32>,
impl Instance {
fn to_matrix(&self) -> cgmath::Matrix4<f32> {
cgmath::Matrix4::from_translation(self.position) * cgmath::Matrix4::from(self.rotation)
struct State {
surface: wgpu::Surface,
device: wgpu::Device,
queue: wgpu::Queue,
sc_desc: wgpu::SwapChainDescriptor,
swap_chain: wgpu::SwapChain,
render_pipeline: wgpu::RenderPipeline,
vertex_buffer: wgpu::Buffer,
index_buffer: wgpu::Buffer,
num_indices: u32,
diffuse_texture: wgpu::Texture,
diffuse_texture_view: wgpu::TextureView,
diffuse_sampler: wgpu::Sampler,
diffuse_bind_group: wgpu::BindGroup,
camera: Camera,
camera_controller: CameraController,
uniforms: Uniforms,
uniform_buffer: wgpu::Buffer,
uniform_bind_group: wgpu::BindGroup,
hidpi_factor: f64,
size: winit::dpi::LogicalSize,
instances: Vec<Instance>,
instance_texture: wgpu::Texture,
instance_texture_view: wgpu::TextureView,
impl State {
fn new(window: &Window) -> Self {
let hidpi_factor = window.hidpi_factor();
let size = window.inner_size();
let physical_size = size.to_physical(hidpi_factor);
let surface = wgpu::Surface::create(window);
let adapter = wgpu::Adapter::request(&wgpu::RequestAdapterOptions {
let (device, mut queue) = adapter.request_device(&wgpu::DeviceDescriptor {
extensions: wgpu::Extensions {
anisotropic_filtering: false,
limits: Default::default(),
let sc_desc = wgpu::SwapChainDescriptor {
usage: wgpu::TextureUsage::OUTPUT_ATTACHMENT,
format: wgpu::TextureFormat::Bgra8UnormSrgb,
width: physical_size.width.round() as u32,
height: physical_size.height.round() as u32,
present_mode: wgpu::PresentMode::Vsync,
let swap_chain = device.create_swap_chain(&surface, &sc_desc);
let diffuse_bytes = include_bytes!("happy-tree.png");
let diffuse_image = image::load_from_memory(diffuse_bytes).unwrap();
let diffuse_rgba = diffuse_image.as_rgba8().unwrap();
use image::GenericImageView;
let dimensions = diffuse_image.dimensions();
let size3d = wgpu::Extent3d {
width: dimensions.0,
height: dimensions.1,
depth: 1,
let diffuse_texture = device.create_texture(&wgpu::TextureDescriptor {
size: size3d,
array_layer_count: 1,
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format: wgpu::TextureFormat::Rgba8UnormSrgb,
usage: wgpu::TextureUsage::SAMPLED | wgpu::TextureUsage::COPY_DST,
let diffuse_buffer = device
.create_buffer_mapped(diffuse_rgba.len(), wgpu::BufferUsage::COPY_SRC)
let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
todo: 0,
wgpu::BufferCopyView {
buffer: &diffuse_buffer,
offset: 0,
row_pitch: 4 * dimensions.0,
image_height: dimensions.1,
wgpu::TextureCopyView {
texture: &diffuse_texture,
mip_level: 0,
array_layer: 0,
origin: wgpu::Origin3d::ZERO,
let diffuse_texture_view = diffuse_texture.create_default_view();
let diffuse_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
address_mode_u: wgpu::AddressMode::ClampToEdge,
address_mode_v: wgpu::AddressMode::ClampToEdge,
address_mode_w: wgpu::AddressMode::ClampToEdge,
mag_filter: wgpu::FilterMode::Linear,
min_filter: wgpu::FilterMode::Nearest,
mipmap_filter: wgpu::FilterMode::Nearest,
lod_min_clamp: -100.0,
lod_max_clamp: 100.0,
compare_function: wgpu::CompareFunction::Always,
let texture_bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
bindings: &[
wgpu::BindGroupLayoutBinding {
binding: 0,
visibility: wgpu::ShaderStage::FRAGMENT,
ty: wgpu::BindingType::SampledTexture {
multisampled: false,
dimension: wgpu::TextureViewDimension::D2,
wgpu::BindGroupLayoutBinding {
binding: 1,
visibility: wgpu::ShaderStage::FRAGMENT,
ty: wgpu::BindingType::Sampler,
let diffuse_bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
layout: &texture_bind_group_layout,
bindings: &[
wgpu::Binding {
binding: 0,
resource: wgpu::BindingResource::TextureView(&diffuse_texture_view),
wgpu::Binding {
binding: 1,
resource: wgpu::BindingResource::Sampler(&diffuse_sampler),
let camera = Camera {
eye: (0.0, 5.0, -10.0).into(),
target: (0.0, 0.0, 0.0).into(),
up: cgmath::Vector3::unit_y(),
aspect: sc_desc.width as f32 / sc_desc.height as f32,
fovy: 45.0,
znear: 0.1,
zfar: 100.0,
let camera_controller = CameraController::new(0.2);
let mut uniforms = Uniforms::new();
let uniform_buffer = device
.create_buffer_mapped(1, wgpu::BufferUsage::UNIFORM | wgpu::BufferUsage::COPY_DST)
let instances = (0..NUM_INSTANCES_PER_ROW).flat_map(|z| {
(0..NUM_INSTANCES_PER_ROW).map(move |x| {
let position = cgmath::Vector3 { x: x as f32, y: 0.0, z: z as f32 } - INSTANCE_DISPLACEMENT;
let rotation = if position.is_zero() {
// this is needed so an object at (0, 0, 0) won't get scaled to zero
// as Quaternions can effect scale if they're not create correctly
cgmath::Quaternion::from_axis_angle(cgmath::Vector3::unit_z(), cgmath::Deg(0.0))
} else {
cgmath::Quaternion::from_axis_angle(position.clone().normalize(), cgmath::Deg(45.0))
Instance {
position, rotation,
let instance_data = instances.iter().map(Instance::to_matrix).collect::<Vec<_>>();
let instance_buffer_size = instance_data.len() * std::mem::size_of::<cgmath::Matrix4<f32>>();
let instance_buffer = device
.create_buffer_mapped(instance_data.len(), wgpu::BufferUsage::COPY_SRC)
let instance_extent = wgpu::Extent3d {
width: instance_data.len() as u32 * 4,
height: 1,
depth: 1,
let instance_texture = device.create_texture(&wgpu::TextureDescriptor {
size: instance_extent,
array_layer_count: 1,
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D1,
format: wgpu::TextureFormat::Rgba32Float,
usage: wgpu::TextureUsage::SAMPLED | wgpu::TextureUsage::COPY_DST,
let instance_texture_view = instance_texture.create_default_view();
let instance_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
address_mode_u: wgpu::AddressMode::ClampToEdge,
address_mode_v: wgpu::AddressMode::ClampToEdge,
address_mode_w: wgpu::AddressMode::ClampToEdge,
mag_filter: wgpu::FilterMode::Nearest,
min_filter: wgpu::FilterMode::Nearest,
mipmap_filter: wgpu::FilterMode::Nearest,
lod_min_clamp: -100.0,
lod_max_clamp: 100.0,
compare_function: wgpu::CompareFunction::Always,
wgpu::BufferCopyView {
buffer: &instance_buffer,
offset: 0,
row_pitch: std::mem::size_of::<f32>() as u32 * 4,
image_height: instance_data.len() as u32 * 4,
wgpu::TextureCopyView {
texture: &instance_texture,
mip_level: 0,
array_layer: 0,
origin: wgpu::Origin3d::ZERO,
let uniform_bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
bindings: &[
wgpu::BindGroupLayoutBinding {
binding: 0,
visibility: wgpu::ShaderStage::VERTEX,
ty: wgpu::BindingType::UniformBuffer {
dynamic: false,
wgpu::BindGroupLayoutBinding {
binding: 1,
visibility: wgpu::ShaderStage::VERTEX,
ty: wgpu::BindingType::SampledTexture {
multisampled: false,
dimension: wgpu::TextureViewDimension::D1,
wgpu::BindGroupLayoutBinding {
binding: 2,
visibility: wgpu::ShaderStage::VERTEX,
ty: wgpu::BindingType::Sampler,
let uniform_bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
layout: &uniform_bind_group_layout,
bindings: &[
wgpu::Binding {
binding: 0,
resource: wgpu::BindingResource::Buffer {
buffer: &uniform_buffer,
range: 0..std::mem::size_of_val(&uniforms) as wgpu::BufferAddress,
wgpu::Binding {
binding: 1,
resource: wgpu::BindingResource::TextureView(&instance_texture_view),
wgpu::Binding {
binding: 2,
resource: wgpu::BindingResource::Sampler(&instance_sampler),
let vs_src = include_str!("texture.vert");
let fs_src = include_str!("shader.frag");
let vs_spirv = glsl_to_spirv::compile(vs_src, glsl_to_spirv::ShaderType::Vertex).unwrap();
let fs_spirv = glsl_to_spirv::compile(fs_src, glsl_to_spirv::ShaderType::Fragment).unwrap();
let vs_data = wgpu::read_spirv(vs_spirv).unwrap();
let fs_data = wgpu::read_spirv(fs_spirv).unwrap();
let vs_module = device.create_shader_module(&vs_data);
let fs_module = device.create_shader_module(&fs_data);
let render_pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
bind_group_layouts: &[&texture_bind_group_layout, &uniform_bind_group_layout],
let render_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
layout: &render_pipeline_layout,
vertex_stage: wgpu::ProgrammableStageDescriptor {
module: &vs_module,
entry_point: "main",
fragment_stage: Some(wgpu::ProgrammableStageDescriptor {
module: &fs_module,
entry_point: "main",
rasterization_state: Some(wgpu::RasterizationStateDescriptor {
front_face: wgpu::FrontFace::Ccw,
cull_mode: wgpu::CullMode::Back,
depth_bias: 0,
depth_bias_slope_scale: 0.0,
depth_bias_clamp: 0.0,
primitive_topology: wgpu::PrimitiveTopology::TriangleList,
color_states: &[
wgpu::ColorStateDescriptor {
format: sc_desc.format,
color_blend: wgpu::BlendDescriptor::REPLACE,
alpha_blend: wgpu::BlendDescriptor::REPLACE,
write_mask: wgpu::ColorWrite::ALL,
depth_stencil_state: None,
index_format: wgpu::IndexFormat::Uint16,
vertex_buffers: &[
sample_count: 1,
sample_mask: !0,
alpha_to_coverage_enabled: false,
let vertex_buffer = device
.create_buffer_mapped(VERTICES.len(), wgpu::BufferUsage::VERTEX)
let index_buffer = device
.create_buffer_mapped(INDICES.len(), wgpu::BufferUsage::INDEX)
let num_indices = INDICES.len() as u32;
Self {
fn update_hidpi_and_resize(&mut self, new_hidpi_factor: f64) {
self.hidpi_factor = new_hidpi_factor;
fn resize(&mut self, new_size: winit::dpi::LogicalSize) {
let physical_size = new_size.to_physical(self.hidpi_factor);
self.size = new_size;
self.sc_desc.width = physical_size.width.round() as u32;
self.sc_desc.height = physical_size.height.round() as u32;
self.swap_chain = self.device.create_swap_chain(&self.surface, &self.sc_desc); = self.sc_desc.width as f32 / self.sc_desc.height as f32;
fn input(&mut self, event: &WindowEvent) -> bool {
fn update(&mut self) {
let mut encoder = self.device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
todo: 0,
let staging_buffer = self.device
.create_buffer_mapped(1, wgpu::BufferUsage::COPY_SRC)
encoder.copy_buffer_to_buffer(&staging_buffer, 0, &self.uniform_buffer, 0, std::mem::size_of::<Uniforms>() as wgpu::BufferAddress);
fn render(&mut self) {
let frame = self.swap_chain.get_next_texture();
let mut encoder = self.device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
todo: 0,
let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
color_attachments: &[
wgpu::RenderPassColorAttachmentDescriptor {
attachment: &frame.view,
resolve_target: None,
load_op: wgpu::LoadOp::Clear,
store_op: wgpu::StoreOp::Store,
clear_color: wgpu::Color {
r: 0.1,
g: 0.2,
b: 0.3,
a: 1.0,
depth_stencil_attachment: None,
render_pass.set_bind_group(0, &self.diffuse_bind_group, &[]);
render_pass.set_bind_group(1, &self.uniform_bind_group, &[]);
render_pass.set_vertex_buffers(0, &[(&self.vertex_buffer, 0)]);
render_pass.set_index_buffer(&self.index_buffer, 0);
render_pass.draw_indexed(0..self.num_indices, 0, 0..self.instances.len() as u32);
fn main() {
let event_loop = EventLoop::new();
let window = WindowBuilder::new()
let mut state = State::new(&window); |event, _, control_flow| {
match event {
Event::WindowEvent {
ref event,
} if window_id == => if state.input(event) {
*control_flow = ControlFlow::Wait;
} else {
match event {
WindowEvent::CloseRequested => *control_flow = ControlFlow::Exit,
WindowEvent::KeyboardInput {
} => {
match input {
KeyboardInput {
state: ElementState::Pressed,
virtual_keycode: Some(VirtualKeyCode::Escape),
} => *control_flow = ControlFlow::Exit,
_ => *control_flow = ControlFlow::Wait,
WindowEvent::Resized(logical_size) => {
*control_flow = ControlFlow::Wait;
WindowEvent::HiDpiFactorChanged(new_hidpi_factor) => {
*control_flow = ControlFlow::Wait;
_ => *control_flow = ControlFlow::Wait,
Event::EventsCleared => {
*control_flow = ControlFlow::Wait;
_ => *control_flow = ControlFlow::Wait,

#version 450
layout(location=0) in vec3 a_position;
layout(location=1) in vec2 a_tex_coords;
layout(location=0) out vec2 v_tex_coords;
layout(set=1, binding=0)
uniform Uniforms {
mat4 u_view_proj;
layout(set = 1, binding = 1) uniform texture1D t_model;
layout(set = 1, binding = 2) uniform sampler s_model;
mat4 get_matrix(int index) {
return mat4(
texelFetch(sampler1D(t_model, s_model), index * 4, 0),
texelFetch(sampler1D(t_model, s_model), index * 4 + 1, 0),
texelFetch(sampler1D(t_model, s_model), index * 4 + 2, 0),
texelFetch(sampler1D(t_model, s_model), index * 4 + 3, 0)
void main() {
v_tex_coords = a_tex_coords;
mat4 transform = get_matrix(gl_InstanceIndex);
gl_Position = u_view_proj * transform * vec4(a_position, 1.0);

use winit::{
event_loop::{EventLoop, ControlFlow},
window::{Window, WindowBuilder},
use cgmath::prelude::*;
trait VBDesc {
fn desc<'a>() -> wgpu::VertexBufferDescriptor<'a>;
#[derive(Copy, Clone, Debug)]
struct Vertex {
position: [f32; 3],
tex_coords: [f32; 2],
impl VBDesc for Vertex {
fn desc<'a>() -> wgpu::VertexBufferDescriptor<'a> {
use std::mem;
wgpu::VertexBufferDescriptor {
stride: mem::size_of::<Vertex>() as wgpu::BufferAddress,
step_mode: wgpu::InputStepMode::Vertex,
attributes: &[
wgpu::VertexAttributeDescriptor {
offset: 0,
shader_location: 0,
format: wgpu::VertexFormat::Float3,
wgpu::VertexAttributeDescriptor {
offset: mem::size_of::<[f32; 3]>() as wgpu::BufferAddress,
shader_location: 1,
format: wgpu::VertexFormat::Float2,
const VERTICES: &[Vertex] = &[
Vertex { position: [-0.0868241, -0.49240386, 0.0], tex_coords: [1.0 - 0.4131759, 1.0 - 0.00759614], }, // A
Vertex { position: [-0.49513406, -0.06958647, 0.0], tex_coords: [1.0 - 0.0048659444, 1.0 - 0.43041354], }, // B
Vertex { position: [-0.21918549, 0.44939706, 0.0], tex_coords: [1.0 - 0.28081453, 1.0 - 0.949397057], }, // C
Vertex { position: [0.35966998, 0.3473291, 0.0], tex_coords: [1.0 - 0.85967, 1.0 - 0.84732911], }, // D
Vertex { position: [0.44147372, -0.2347359, 0.0], tex_coords: [1.0 - 0.9414737, 1.0 - 0.2652641], }, // E
const INDICES: &[u16] = &[
0, 1, 4,
1, 2, 4,
2, 3, 4,
#[cfg_attr(rustfmt, rustfmt_skip)]
pub const OPENGL_TO_WGPU_MATRIX: cgmath::Matrix4<f32> = cgmath::Matrix4::new(
1.0, 0.0, 0.0, 0.0,
0.0, -1.0, 0.0, 0.0,
0.0, 0.0, 0.5, 0.0,
0.0, 0.0, 0.5, 1.0,
const NUM_INSTANCES_PER_ROW: u32 = 10;
const INSTANCE_DISPLACEMENT: cgmath::Vector3<f32> = cgmath::Vector3::new(NUM_INSTANCES_PER_ROW as f32 * 0.5, 0.0, NUM_INSTANCES_PER_ROW as f32 * 0.5);
struct Camera {
eye: cgmath::Point3<f32>,
target: cgmath::Point3<f32>,
up: cgmath::Vector3<f32>,
aspect: f32,
fovy: f32,
znear: f32,
zfar: f32,
impl Camera {
fn build_view_projection_matrix(&self) -> cgmath::Matrix4<f32> {
let view = cgmath::Matrix4::look_at(self.eye,, self.up);
let proj = cgmath::perspective(cgmath::Deg(self.fovy), self.aspect, self.znear, self.zfar);
return proj * view;
#[derive(Copy, Clone)]
struct Uniforms {
view_proj: cgmath::Matrix4<f32>,
impl Uniforms {
fn new() -> Self {
Self {
view_proj: cgmath::Matrix4::identity(),
fn update_view_proj(&mut self, camera: &Camera) {
self.view_proj = OPENGL_TO_WGPU_MATRIX * camera.build_view_projection_matrix();
struct CameraController {
speed: f32,
is_up_pressed: bool,
is_down_pressed: bool,
is_forward_pressed: bool,
is_backward_pressed: bool,
is_left_pressed: bool,
is_right_pressed: bool,
impl CameraController {
fn new(speed: f32) -> Self {
Self {
is_up_pressed: false,
is_down_pressed: false,
is_forward_pressed: false,
is_backward_pressed: false,
is_left_pressed: false,
is_right_pressed: false,
fn process_events(&mut self, event: &WindowEvent) -> bool {
match event {
WindowEvent::KeyboardInput {
input: KeyboardInput {
virtual_keycode: Some(keycode),
} => {
let is_pressed = *state == ElementState::Pressed;
match keycode {
VirtualKeyCode::Space => {
self.is_up_pressed = is_pressed;
VirtualKeyCode::LShift => {
self.is_down_pressed = is_pressed;
VirtualKeyCode::W | VirtualKeyCode::Up => {
self.is_forward_pressed = is_pressed;
VirtualKeyCode::A | VirtualKeyCode::Left => {
self.is_left_pressed = is_pressed;
VirtualKeyCode::S | VirtualKeyCode::Down => {
self.is_backward_pressed = is_pressed;
VirtualKeyCode::D | VirtualKeyCode::Right => {
self.is_right_pressed = is_pressed;
_ => false,
_ => false,
fn update_camera(&self, camera: &mut Camera) {
let forward = ( - camera.eye).normalize();
if self.is_forward_pressed {
camera.eye += forward * self.speed;
if self.is_backward_pressed {
camera.eye -= forward * self.speed;
let right = forward.cross(camera.up);
if self.is_right_pressed {
camera.eye += right * self.speed;
if self.is_left_pressed {
camera.eye -= right * self.speed;
#[derive(Debug, Copy, Clone)]
struct InstanceRaw {
model: cgmath::Matrix4<f32>,
const FLOAT_SIZE: wgpu::BufferAddress = std::mem::size_of::<f32>() as wgpu::BufferAddress;
impl VBDesc for InstanceRaw {
fn desc<'a>() -> wgpu::VertexBufferDescriptor<'a> {
wgpu::VertexBufferDescriptor {
stride: std::mem::size_of::<InstanceRaw>() as wgpu::BufferAddress,
step_mode: wgpu::InputStepMode::Instance,
attributes: &[
wgpu::VertexAttributeDescriptor {
offset: 0,
format: wgpu::VertexFormat::Float4,
shader_location: 2,
wgpu::VertexAttributeDescriptor {
offset: FLOAT_SIZE * 4,
format: wgpu::VertexFormat::Float4,
shader_location: 3,
wgpu::VertexAttributeDescriptor {
offset: FLOAT_SIZE * 4 * 2,
format: wgpu::VertexFormat::Float4,
shader_location: 4,
wgpu::VertexAttributeDescriptor {
offset: FLOAT_SIZE * 4 * 3,
format: wgpu::VertexFormat::Float4,
shader_location: 5,
struct Instance {
position: cgmath::Vector3<f32>,
rotation: cgmath::Quaternion<f32>,
impl Instance {
fn to_raw(&self) -> InstanceRaw {
let model = cgmath::Matrix4::from_translation(self.position)
* cgmath::Matrix4::from(self.rotation);
InstanceRaw { model }
struct State {
surface: wgpu::Surface,
device: wgpu::Device,
queue: wgpu::Queue,
sc_desc: wgpu::SwapChainDescriptor,
swap_chain: wgpu::SwapChain,
render_pipeline: wgpu::RenderPipeline,
vertex_buffer: wgpu::Buffer,
index_buffer: wgpu::Buffer,
num_indices: u32,
diffuse_texture: wgpu::Texture,
diffuse_texture_view: wgpu::TextureView,
diffuse_sampler: wgpu::Sampler,
diffuse_bind_group: wgpu::BindGroup,
camera: Camera,
camera_controller: CameraController,
uniforms: Uniforms,
uniform_buffer: wgpu::Buffer,
uniform_bind_group: wgpu::BindGroup,
hidpi_factor: f64,
size: winit::dpi::LogicalSize,
instances: Vec<Instance>,
instance_buffer: wgpu::Buffer,
impl State {
fn new(window: &Window) -> Self {
let hidpi_factor = window.hidpi_factor();
let size = window.inner_size();
let physical_size = size.to_physical(hidpi_factor);
let surface = wgpu::Surface::create(window);
let adapter = wgpu::Adapter::request(&wgpu::RequestAdapterOptions {
let (device, mut queue) = adapter.request_device(&wgpu::DeviceDescriptor {
extensions: wgpu::Extensions {
anisotropic_filtering: false,
limits: Default::default(),
let sc_desc = wgpu::SwapChainDescriptor {
usage: wgpu::TextureUsage::OUTPUT_ATTACHMENT,
format: wgpu::TextureFormat::Bgra8UnormSrgb,
width: physical_size.width.round() as u32,
height: physical_size.height.round() as u32,
present_mode: wgpu::PresentMode::Vsync,
let swap_chain = device.create_swap_chain(&surface, &sc_desc);
let diffuse_bytes = include_bytes!("happy-tree.png");
let diffuse_image = image::load_from_memory(diffuse_bytes).unwrap();
let diffuse_rgba = diffuse_image.as_rgba8().unwrap();
use image::GenericImageView;
let dimensions = diffuse_image.dimensions();
let size3d = wgpu::Extent3d {
width: dimensions.0,
height: dimensions.1,
depth: 1,
let diffuse_texture = device.create_texture(&wgpu::TextureDescriptor {
size: size3d,
array_layer_count: 1,
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format: wgpu::TextureFormat::Rgba8UnormSrgb,
usage: wgpu::TextureUsage::SAMPLED | wgpu::TextureUsage::COPY_DST,
let diffuse_buffer = device
.create_buffer_mapped(diffuse_rgba.len(), wgpu::BufferUsage::COPY_SRC)
let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
todo: 0,
wgpu::BufferCopyView {
buffer: &diffuse_buffer,
offset: 0,
row_pitch: 4 * dimensions.0,
image_height: dimensions.1,
wgpu::TextureCopyView {
texture: &diffuse_texture,
mip_level: 0,
array_layer: 0,
origin: wgpu::Origin3d::ZERO,
let diffuse_texture_view = diffuse_texture.create_default_view();
let diffuse_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
address_mode_u: wgpu::AddressMode::ClampToEdge,
address_mode_v: wgpu::AddressMode::ClampToEdge,
address_mode_w: wgpu::AddressMode::ClampToEdge,
mag_filter: wgpu::FilterMode::Linear,
min_filter: wgpu::FilterMode::Nearest,
mipmap_filter: wgpu::FilterMode::Nearest,
lod_min_clamp: -100.0,
lod_max_clamp: 100.0,
compare_function: wgpu::CompareFunction::Always,
let texture_bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
bindings: &[
wgpu::BindGroupLayoutBinding {
binding: 0,
visibility: wgpu::ShaderStage::FRAGMENT,
ty: wgpu::BindingType::SampledTexture {
multisampled: false,
dimension: wgpu::TextureViewDimension::D2,
wgpu::BindGroupLayoutBinding {
binding: 1,
visibility: wgpu::ShaderStage::FRAGMENT,
ty: wgpu::BindingType::Sampler,
let diffuse_bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
layout: &texture_bind_group_layout,
bindings: &[
wgpu::Binding {
binding: 0,
resource: wgpu::BindingResource::TextureView(&diffuse_texture_view),
wgpu::Binding {
binding: 1,
resource: wgpu::BindingResource::Sampler(&diffuse_sampler),
let camera = Camera {
eye: (0.0, 5.0, -10.0).into(),
target: (0.0, 0.0, 0.0).into(),
up: cgmath::Vector3::unit_y(),
aspect: sc_desc.width as f32 / sc_desc.height as f32,
fovy: 45.0,
znear: 0.1,
zfar: 100.0,
let camera_controller = CameraController::new(0.2);
let mut uniforms = Uniforms::new();
let uniform_buffer = device
.create_buffer_mapped(1, wgpu::BufferUsage::UNIFORM | wgpu::BufferUsage::COPY_DST)
let instances = (0..NUM_INSTANCES_PER_ROW).flat_map(|z| {
(0..NUM_INSTANCES_PER_ROW).map(move |x| {
let position = cgmath::Vector3 { x: x as f32, y: 0.0, z: z as f32 } - INSTANCE_DISPLACEMENT;
let rotation = if position.is_zero() {
// this is needed so an object at (0, 0, 0) won't get scaled to zero
// as Quaternions can effect scale if they're not create correctly
cgmath::Quaternion::from_axis_angle(cgmath::Vector3::unit_z(), cgmath::Deg(0.0))
} else {
cgmath::Quaternion::from_axis_angle(position.clone().normalize(), cgmath::Deg(45.0))
Instance {
position, rotation,
let instance_data = instances.iter().map(Instance::to_raw).collect::<Vec<_>>();
let instance_buffer = device
.create_buffer_mapped(instance_data.len(), wgpu::BufferUsage::VERTEX)
let uniform_bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
bindings: &[
wgpu::BindGroupLayoutBinding {
binding: 0,
visibility: wgpu::ShaderStage::VERTEX,
ty: wgpu::BindingType::UniformBuffer {
dynamic: false,
let uniform_bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
layout: &uniform_bind_group_layout,
bindings: &[
wgpu::Binding {
binding: 0,
resource: wgpu::BindingResource::Buffer {
buffer: &uniform_buffer,
range: 0..std::mem::size_of_val(&uniforms) as wgpu::BufferAddress,
let vs_src = include_str!("vertex.vert");
let fs_src = include_str!("shader.frag");
let vs_spirv = glsl_to_spirv::compile(vs_src, glsl_to_spirv::ShaderType::Vertex).unwrap();
let fs_spirv = glsl_to_spirv::compile(fs_src, glsl_to_spirv::ShaderType::Fragment).unwrap();
let vs_data = wgpu::read_spirv(vs_spirv).unwrap();
let fs_data = wgpu::read_spirv(fs_spirv).unwrap();
let vs_module = device.create_shader_module(&vs_data);
let fs_module = device.create_shader_module(&fs_data);
let render_pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
bind_group_layouts: &[&texture_bind_group_layout, &uniform_bind_group_layout],
let render_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
layout: &render_pipeline_layout,
vertex_stage: wgpu::ProgrammableStageDescriptor {
module: &vs_module,
entry_point: "main",
fragment_stage: Some(wgpu::ProgrammableStageDescriptor {
module: &fs_module,
entry_point: "main",
rasterization_state: Some(wgpu::RasterizationStateDescriptor {
front_face: wgpu::FrontFace::Ccw,
cull_mode: wgpu::CullMode::Back,
depth_bias: 0,
depth_bias_slope_scale: 0.0,
depth_bias_clamp: 0.0,
primitive_topology: wgpu::PrimitiveTopology::TriangleList,
color_states: &[
wgpu::ColorStateDescriptor {
format: sc_desc.format,
color_blend: wgpu::BlendDescriptor::REPLACE,
alpha_blend: wgpu::BlendDescriptor::REPLACE,
write_mask: wgpu::ColorWrite::ALL,
depth_stencil_state: None,
index_format: wgpu::IndexFormat::Uint16,
vertex_buffers: &[
Vertex::desc(), InstanceRaw::desc(),
sample_count: 1,
sample_mask: !0,
alpha_to_coverage_enabled: false,
let vertex_buffer = device
.create_buffer_mapped(VERTICES.len(), wgpu::BufferUsage::VERTEX)
let index_buffer = device
.create_buffer_mapped(INDICES.len(), wgpu::BufferUsage::INDEX)
let num_indices = INDICES.len() as u32;
Self {
fn update_hidpi_and_resize(&mut self, new_hidpi_factor: f64) {
self.hidpi_factor = new_hidpi_factor;
fn resize(&mut self, new_size: winit::dpi::LogicalSize) {
let physical_size = new_size.to_physical(self.hidpi_factor);
self.size = new_size;
self.sc_desc.width = physical_size.width.round() as u32;
self.sc_desc.height = physical_size.height.round() as u32;
self.swap_chain = self.device.create_swap_chain(&self.surface, &self.sc_desc); = self.sc_desc.width as f32 / self.sc_desc.height as f32;
fn input(&mut self, event: &WindowEvent) -> bool {
fn update(&mut self) {
let mut encoder = self.device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
todo: 0,
let staging_buffer = self.device
.create_buffer_mapped(1, wgpu::BufferUsage::COPY_SRC)
encoder.copy_buffer_to_buffer(&staging_buffer, 0, &self.uniform_buffer, 0, std::mem::size_of::<Uniforms>() as wgpu::BufferAddress);
fn render(&mut self) {
let frame = self.swap_chain.get_next_texture();
let mut encoder = self.device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
todo: 0,
let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
color_attachments: &[
wgpu::RenderPassColorAttachmentDescriptor {
attachment: &frame.view,
resolve_target: None,
load_op: wgpu::LoadOp::Clear,
store_op: wgpu::StoreOp::Store,
clear_color: wgpu::Color {
r: 0.1,
g: 0.2,
b: 0.3,
a: 1.0,
depth_stencil_attachment: None,
render_pass.set_bind_group(0, &self.diffuse_bind_group, &[]);
render_pass.set_bind_group(1, &self.uniform_bind_group, &[]);
render_pass.set_vertex_buffers(0, &[(&self.vertex_buffer, 0), (&self.instance_buffer, 0)]);
render_pass.set_index_buffer(&self.index_buffer, 0);
render_pass.draw_indexed(0..self.num_indices, 0, 0..self.instances.len() as u32);
fn main() {
let event_loop = EventLoop::new();
let window = WindowBuilder::new()
let mut state = State::new(&window); |event, _, control_flow| {
match event {
Event::WindowEvent {
ref event,
} if window_id == => if state.input(event) {
*control_flow = ControlFlow::Wait;
} else {
match event {
WindowEvent::CloseRequested => *control_flow = ControlFlow::Exit,
WindowEvent::KeyboardInput {
} => {
match input {
KeyboardInput {
state: ElementState::Pressed,
virtual_keycode: Some(VirtualKeyCode::Escape),
} => *control_flow = ControlFlow::Exit,
_ => *control_flow = ControlFlow::Wait,
WindowEvent::Resized(logical_size) => {
*control_flow = ControlFlow::Wait;
WindowEvent::HiDpiFactorChanged(new_hidpi_factor) => {
*control_flow = ControlFlow::Wait;
_ => *control_flow = ControlFlow::Wait,
Event::EventsCleared => {
*control_flow = ControlFlow::Wait;
_ => *control_flow = ControlFlow::Wait,

#version 450
layout(location=0) in vec3 a_position;
layout(location=1) in vec2 a_tex_coords;
layout(location=2) in mat4 a_model;
layout(location=0) out vec2 v_tex_coords;
layout(set=1, binding=0)
uniform Uniforms {
mat4 u_view_proj;
void main() {
v_tex_coords = a_tex_coords;
gl_Position = u_view_proj * a_model * vec4(a_position, 1.0);

use cgmath::prelude::*;
trait VBDesc {
fn desc<'a>() -> wgpu::VertexBufferDescriptor<'a>;
#[derive(Copy, Clone, Debug)]
struct Vertex {
tex_coords: [f32; 2],
impl Vertex {
impl VBDesc for Vertex {
fn desc<'a>() -> wgpu::VertexBufferDescriptor<'a> {
use std::mem;
wgpu::VertexBufferDescriptor {
const NUM_INSTANCES_PER_ROW: u32 = 10;
const INSTANCE_DISPLACEMENT: cgmath::Vector3<f32> = cgmath::Vector3::new(NUM_INSTANCES_PER_ROW as f32 * 0.5, 0.0, NUM_INSTANCES_PER_ROW as f32 * 0.5);
@ -83,14 +86,12 @@ impl Camera {
#[derive(Copy, Clone)]
struct Uniforms {
view_proj: cgmath::Matrix4<f32>,
model: [cgmath::Matrix4<f32>; NUM_INSTANCES as usize],
impl Uniforms {
fn new() -> Self {
Self {
view_proj: cgmath::Matrix4::identity(),
model: [cgmath::Matrix4::identity(); NUM_INSTANCES as usize],
#[derive(Debug, Copy, Clone)]
struct InstanceRaw {
model: cgmath::Matrix4<f32>,
const FLOAT_SIZE: wgpu::BufferAddress = std::mem::size_of::<f32>() as wgpu::BufferAddress;
impl VBDesc for InstanceRaw {
fn desc<'a>() -> wgpu::VertexBufferDescriptor<'a> {
wgpu::VertexBufferDescriptor {
stride: std::mem::size_of::<InstanceRaw>() as wgpu::BufferAddress,
step_mode: wgpu::InputStepMode::Instance,
attributes: &[
wgpu::VertexAttributeDescriptor {
offset: 0,
format: wgpu::VertexFormat::Float4,
shader_location: 2,
wgpu::VertexAttributeDescriptor {
offset: FLOAT_SIZE * 4,
format: wgpu::VertexFormat::Float4,
shader_location: 3,
wgpu::VertexAttributeDescriptor {
offset: FLOAT_SIZE * 4 * 2,
format: wgpu::VertexFormat::Float4,
shader_location: 4,
wgpu::VertexAttributeDescriptor {
offset: FLOAT_SIZE * 4 * 3,
format: wgpu::VertexFormat::Float4,
shader_location: 5,
struct Instance {
position: cgmath::Vector3<f32>,
rotation: cgmath::Quaternion<f32>,
impl Instance {
fn to_matrix(&self) -> cgmath::Matrix4<f32> {
cgmath::Matrix4::from_translation(self.position) * cgmath::Matrix4::from(self.rotation)
fn to_raw(&self) -> InstanceRaw {
let model = cgmath::Matrix4::from_translation(self.position)
* cgmath::Matrix4::from(self.rotation);
InstanceRaw { model }
size: winit::dpi::LogicalSize,
instances: Vec<Instance>,
instance_buffer: wgpu::Buffer,
@ -366,6 +408,29 @@ impl State {
.create_buffer_mapped(1, wgpu::BufferUsage::UNIFORM | wgpu::BufferUsage::COPY_DST)
let instances = (0..NUM_INSTANCES_PER_ROW).flat_map(|z| {
(0..NUM_INSTANCES_PER_ROW).map(move |x| {
let position = cgmath::Vector3 { x: x as f32, y: 0.0, z: z as f32 } - INSTANCE_DISPLACEMENT;
let rotation = if position.is_zero() {
// this is needed so an object at (0, 0, 0) won't get scaled to zero
// as Quaternions can effect scale if they're not create correctly
cgmath::Quaternion::from_axis_angle(cgmath::Vector3::unit_z(), cgmath::Deg(0.0))
} else {
cgmath::Quaternion::from_axis_angle(position.clone().normalize(), cgmath::Deg(45.0))
Instance {
position, rotation,
let instance_data = instances.iter().map(Instance::to_raw).collect::<Vec<_>>();
let instance_buffer = device
.create_buffer_mapped(instance_data.len(), wgpu::BufferUsage::VERTEX)
let uniform_bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
bindings: &[
wgpu::BindGroupLayoutBinding {
ty: wgpu::BindingType::UniformBuffer {
dynamic: false,
buffer: &uniform_buffer,
range: 0..std::mem::size_of_val(&uniforms) as wgpu::BufferAddress,
depth_stencil_state: None,
index_format: wgpu::IndexFormat::Uint16,
vertex_buffers: &[
Vertex::desc(), InstanceRaw::desc(),
sample_count: 1,
sample_mask: !0,
let num_indices = INDICES.len() as u32;
let instances = (0..NUM_INSTANCES_PER_ROW).flat_map(|z| {
(0..NUM_INSTANCES_PER_ROW).map(move |x| {
let position = cgmath::Vector3 { x: x as f32, y: 0.0, z: z as f32 } - INSTANCE_DISPLACEMENT;
let rotation = if position.is_zero() {
// this is needed so an object at (0, 0, 0) won't get scaled to zero
// as Quaternions can effect scale if they're not create correctly
cgmath::Quaternion::from_axis_angle(cgmath::Vector3::unit_z(), cgmath::Deg(0.0))
} else {
cgmath::Quaternion::from_axis_angle(position.clone().normalize(), cgmath::Deg(45.0))
Instance {
position, rotation,
Self {
for (i, instance) in self.instances.iter().enumerate() {
self.uniforms.model[i] = instance.to_matrix();
let mut encoder = self.device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
todo: 0,
@ -560,9 +604,9 @@ impl State {
render_pass.set_bind_group(0, &self.diffuse_bind_group, &[]);
render_pass.set_bind_group(1, &self.uniform_bind_group, &[]);
render_pass.set_vertex_buffers(0, &[(&self.vertex_buffer, 0)]);
render_pass.set_vertex_buffers(0, &[(&self.vertex_buffer, 0), (&self.instance_buffer, 0)]);
render_pass.set_index_buffer(&self.index_buffer, 0);
render_pass.draw_indexed(0..self.num_indices, 0, 0..NUM_INSTANCES);
render_pass.draw_indexed(0..self.num_indices, 0, 0..self.instances.len() as u32);

#version 450
layout(location=0) in vec2 v_tex_coords;
layout(location=1) in vec3 v_color;
layout(location=0) out vec4 f_color;
layout(set = 0, binding = 0) uniform texture2D t_diffuse;
@ -8,4 +10,5 @@ layout(set = 0, binding = 1) uniform sampler s_diffuse;
void main() {
f_color = texture(sampler2D(t_diffuse, s_diffuse), v_tex_coords);
// f_color = vec4(v_color, 1);

layout(location=0) in vec3 a_position;
layout(location=1) in vec2 a_tex_coords;
layout(location=2) in mat4 a_model;
layout(location=0) out vec2 v_tex_coords;
layout(set=1, binding=0)
uniform Uniforms {
mat4 u_view_proj;
mat4 u_model[100];
void main() {
v_tex_coords = a_tex_coords;
gl_Position = u_view_proj * u_model[gl_InstanceIndex] * vec4(a_position, 1.0);
gl_Position = u_view_proj * a_model * vec4(a_position, 1.0);

@ -243,10 +243,427 @@ Running the program now won't change anything visually from our last example, bu
This technique has its drawbacks.
1. We can't use a `Vec` like we've mentioned before
2. We're limited in the number of instances we can process at a time requiring use to cap it at some abitrary number, or render things in "batches"
2. We're limited in the number of instances we can process at a time requiring use to cap it at some abitrary number, or render things in "batches". If we want to increase the size of instances, we have to recompile our code.
## Another better way - storage buffers
A storage buffer gives us the flexibility that arrays did not. We don't have to specify it's size in the shader, and we can even use a `Vec` to create it!
We create a storage buffer in a similar way as any other buffer.
let instance_data = instances.iter().map(Instance::to_matrix).collect::<Vec<_>>();
// we'll need the size for later
let instance_buffer_size = instance_data.len() * std::mem::size_of::<cgmath::Matrix4<f32>>();
let instance_buffer = device
.create_buffer_mapped(instance_data.len(), wgpu::BufferUsage::STORAGE_READ)
To get this buffer into the shader, we'll need to attach it to a bind group. We'll use `uniform_bind_group` just to keep things simple.
let uniform_bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
bindings: &[
// ...
wgpu::BindGroupLayoutBinding {
binding: 1,
visibility: wgpu::ShaderStage::VERTEX,
ty: wgpu::BindingType::StorageBuffer {
dynamic: false,
readonly: true,
let uniform_bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
layout: &uniform_bind_group_layout,
bindings: &[
// ...
wgpu::Binding {
binding: 1,
resource: wgpu::BindingResource::Buffer {
buffer: &instance_buffer,
range: 0..instance_buffer_size as wgpu::BufferAddress,
*Note you'll probably need to shift your `instance_buffer` creation above the `uniform_bind_group` creation.*
You don't need to change the draw call at all from the previous example, but we'll need to change the vertex shader.
#version 450
layout(location=0) in vec3 a_position;
layout(location=1) in vec2 a_tex_coords;
layout(location=0) out vec2 v_tex_coords;
layout(set=1, binding=0)
uniform Uniforms {
mat4 u_view_proj;
layout(set=1, binding=1)
buffer Instances {
mat4 s_models[];
void main() {
v_tex_coords = a_tex_coords;
gl_Position = u_view_proj * s_models[gl_InstanceIndex] * vec4(a_position, 1.0);
You can see that we got rid of the `u_model` field from the `Uniforms` block and create a new `Instances` located at `set=1, binding=1` corresponding with our bind group layout. Another thing to notice is that we use the `buffer` keyword for the block instead of `uniform`. The details of the `buffer` can be found on [the OpenGL wiki](
This method is nice because it allows use to store more data overall as storage buffers can theoretically store as much data as the GPU can handle, where uniform buffers are capped. This does mean that storage buffers are slower that uniform buffers as they are stored like other buffers such as textures as and therefore aren't as close in memory, but that usually won't matter much if you're dealing with large amounts of data.
Another benefit to storage buffers is that they can be written to by the shader, unlike uniform buffers. If we want to mutate a large amount of data with a compute shader, we'd use a writeable storage buffer for our output (and potentially input as well).
## Another better way - instance buffers
When we created the `VertexBufferDescriptor` for our model, it required a `step_mode` field. We specified
When we created the `VertexBufferDescriptor` for our model, it required a `step_mode` field. We used `InputStepMode::Vertex`, this time we'll create a `VertexBufferDescriptor` for our `instance_buffer`.
We'll take the code from the previous example and then create a trait called `VBDesc`, and implement it for `Vertex` (replacing the old `impl`), and a newly created `InstanceRaw` class. *Note: we could just `impl VBDesc for cgmath::Matrix4<f32>` instead, but instances could have more data in the future, so it's better to create a new struct.*
Here's our new trait.
trait VBDesc {
fn desc<'a>() -> wgpu::VertexBufferDescriptor<'a>;
To change `Vertex` to use this, we just have to swap `impl Vertex`, for `impl VBDesc for Vertex`.
Now we create `InstanceRaw`. It's pretty simple.
#[derive(Debug, Copy, Clone)]
struct InstanceRaw {
model: cgmath::Matrix4<f32>,
We'll also want to change `Instance::to_matrix()` to `Instance::to_raw()`.
impl Instance {
fn to_raw(&self) -> InstanceRaw {
let model = cgmath::Matrix4::from_translation(self.position)
* cgmath::Matrix4::from(self.rotation)
InstanceRaw { model }
Make sure to change any references to `to_matrix` to `to_raw` as well. We'll also want to change our `BufferUsage` to `VERTEX`.
let instance_data = instances.iter().map(Instance::to_raw).collect::<Vec<_>>();
let instance_buffer = device
.create_buffer_mapped(instance_data.len(), wgpu::BufferUsage::VERTEX)
With that done we can implement `VBDesc` for `InstanceRaw`.
const FLOAT_SIZE: wgpu::BufferAddress = std::mem::size_of::<f32>() as wgpu::BufferAddress;
impl VBDesc for InstanceRaw {
fn desc<'a>() -> wgpu::VertexBufferDescriptor<'a> {
wgpu::VertexBufferDescriptor {
stride: std::mem::size_of::<InstanceRaw>() as wgpu::BufferAddress,
step_mode: wgpu::InputStepMode::Instance, // 1.
attributes: &[
wgpu::VertexAttributeDescriptor {
offset: 0,
format: wgpu::VertexFormat::Float4, // 2.
shader_location: 2, // 3.
wgpu::VertexAttributeDescriptor {
offset: FLOAT_SIZE * 4,
format: wgpu::VertexFormat::Float4,
shader_location: 3,
wgpu::VertexAttributeDescriptor {
offset: FLOAT_SIZE * 4 * 2,
format: wgpu::VertexFormat::Float4,
shader_location: 4,
wgpu::VertexAttributeDescriptor {
offset: FLOAT_SIZE * 4 * 3,
format: wgpu::VertexFormat::Float4,
shader_location: 5,
Let's unpack this a bit.
1. This line makes what would be a vertex buffer into and index buffer. If we didn't specify this, the shader would loop through the elements in this list for every vertex.
2. Vertex attributes have a limited size: `Float4` or the equivalent. This means that our instance buffer will take up multiple attribute slots. 4 in our case.
3. Since we're using 2 slots for our `Vertex` struct, we need to start the `shader_location` at 2.
Now we need to add our a `VertexBufferDescriptor` to our `render_pipeline`.
let render_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
// ...
vertex_buffers: &[
Vertex::desc(), InstanceRaw::desc(),
// ...
*You'll probably want to remove the `BindGroupLayoutBinding` and `Binding` from `uniform_bind_group_layout` and `uniform_bind_group` respectively, as we won't be accessing our buffer from there.*
This last thing we'll need to do from Rust is use our `instance_buffer` in the `render()` method.
render_pass.set_vertex_buffers(0, &[(&self.vertex_buffer, 0), (&self.instance_buffer, 0)]);
Now we get to the shader. We don't have to change much, we just make our shader reference our `instance_buffer` through the attributes rather than a uniform/buffer block.
#version 450
layout(location=0) in vec3 a_position;
layout(location=1) in vec2 a_tex_coords;
layout(location=2) in mat4 a_model; // NEW!
layout(location=0) out vec2 v_tex_coords;
layout(set=1, binding=0)
uniform Uniforms {
mat4 u_view_proj;
void main() {
v_tex_coords = a_tex_coords;
gl_Position = u_view_proj * a_model * vec4(a_position, 1.0); // UPDATED!
That's all you need to get an instance buffer working! There's a bit of overhead to get things working, and there are a few quirks, but it gets the job.
## A different way - textures
This seems like a really backwards way to do instancing. Storing non image data in a texture seems really bizarre even though it's a perfectly valid thing to do. After all, a texture is just an array of bytes, and that could theoretically be anything. In our case, we're going to cram our matrix data into that array of bytes.
If you're following along, it'd be best to start from the storage buffer example. We're going to modify it to take our `instance_buffer`, and copy it into a 1D `instance_texture`. First we need to create the texture.
let instance_extent = wgpu::Extent3d {
width: instance_data.len() as u32 * 4,
height: 1,
depth: 1,
let instance_texture = device.create_texture(&wgpu::TextureDescriptor {
size: instance_extent,
array_layer_count: 1,
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D1,
format: wgpu::TextureFormat::Rgba32Float,
usage: wgpu::TextureUsage::SAMPLED | wgpu::TextureUsage::COPY_DST,
All of this is fairly normal texture creation stuff, save two things:
1. We specify the height of the texture as 1. While you could theoretically use a height greater than 1, keeping the texture 1D simplifies things a bit. This also means that we need to use `TextureDimension::D1` for our `dimension`.
2. We're using `TextureFormat::Rgba32Float` for the texture format. Since our matrices are 32bit floats, this makes sense. We could use lower memory formats such as `Rgba16Float`, or even `Rgba8UnormSrgb`, but we loose precision when we do that. We might not need that precision for basic rendering, but applications that need to model reality definetly do.
With that said, let's create our a texture view and sampler for our `instance_texture`.
let instance_texture_view = instance_texture.create_default_view();
let instance_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
address_mode_u: wgpu::AddressMode::ClampToEdge,
address_mode_v: wgpu::AddressMode::ClampToEdge,
address_mode_w: wgpu::AddressMode::ClampToEdge,
mag_filter: wgpu::FilterMode::Nearest,
min_filter: wgpu::FilterMode::Nearest,
minmap_filter: wgpu::FilterMode::Nearest,
lod_min_clamp: -100.0,
lod_max_clamp: 100.0,
compare_function: wgpu::CompareFunction::Always,
Then we need to copy the `instance_buffer` to our `instance_texture`. *You may need to move the `queue.submit(&[encoder.finish()]);` line to use appease the borrow checker.*
wgpu::BufferCopyView {
buffer: &instance_buffer,
offset: 0,
row_pitch: std::mem::size_of::<f32>() as u32 * 4,
image_height: instance_data.len() * 4,
wgpu::TextureCopyView {
texture: &instance_texture,
mip_level: 0,
array_layer: 0,
origin: wgpu::Origin3d::ZERO,
Now we need to add our texture and sampler to a bind group. Let with the storage buffer example, we'll use `uniform_bind_group` and its corresponding layout.
let uniform_bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
bindings: &[
// ...
wgpu::BindGroupLayoutBinding {
binding: 1,
visibility: wgpu::ShaderStage::VERTEX,
ty: wgpu::BindingType::SampledTexture {
multisampled: false,
dimension: wgpu::TextureViewDimension::D1,
wgpu::BindGroupLayoutBinding {
binding: 2,
visibility: wgpu::ShaderStage::VERTEX,
ty: wgpu::BindingType::Sampler,
let uniform_bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
layout: &uniform_bind_group_layout,
bindings: &[
// ...
wgpu::Binding {
binding: 1,
resource: wgpu::BindingResource::TextureView(&instance_texture_view),
wgpu::Binding {
binding: 2,
resource: wgpu::BindingResource::Sampler(&instance_sampler),
With all that done we can now move onto the vertex shader. Let's start with the new uniforms. *Don't forget to delete the old `buffer` block.*
// we use a texture1D instead of texture2d because our texture is 1D
layout(set = 1, binding = 1) uniform texture1D t_model;
layout(set = 1, binding = 2) uniform sampler s_model;
The next part is a little more intensive, as there's now built in way to process our texture data as matrix data. We'll have to write a function to do that.
mat4 get_matrix(int index) {
return mat4(
texelFetch(sampler1D(t_model, s_model), index * 4, 0),
texelFetch(sampler1D(t_model, s_model), index * 4 + 1, 0),
texelFetch(sampler1D(t_model, s_model), index * 4 + 2, 0),
texelFetch(sampler1D(t_model, s_model), index * 4 + 3, 0)
This function takes in the index of the instance of the model we are rendering, and pulls our 4 pixels from the image corresponding to to 4 sets of floats that make up that instance's matrix. It then packs them into a `mat4` and returns that.
Now we need to change our `main()` function to use `get_matrix()`.
void main() {
v_tex_coords = a_tex_coords;
mat4 transform = get_matrix(gl_InstanceIndex);
gl_Position = u_view_proj * transform * vec4(a_position, 1.0);
<!-- ## Another better way (storage buffers) -->
That's a lot more work than the other method's, but it's still good to know that you can use textures store things other then color. This technique does come in handy when other solutions are not available, or not as performant. It's good to be aware of the possibilities!
For fun, here's what our matrix data looks like when converted into a texture (scaled up 10 times)!
![lots of colorful squares](./instance_texture_scaled.png)
## Recap
<table style="width:100%">
<td>Naive Approach</td>
<td><ul><li>Super simple</li></ul></td>
<td><ul><li>Super slow with lots of instances</li></ul></td>
<td>Uniform Buffer</td>
<li>Quicker then other techniques</li>
<li>Requires using fixed size array</li>
<li>Limited size</li>
<li>Requires a bind group</li>
<td>Storage Buffer</td>
<li>Larger size</li>
<li>Allows modifying data</li>
<li>We can use <code>Vec</code></li>
<li>Slower than uniform buffers</li>
<li>Requires a bind group</li>
<td>Instance Buffer</td>
<li>Larger size</li>
<li>Doesn't need <code>gl_InstanceIndex</code></li>
<li>Requires <code>VertexBufferDescriptor</code></li>
<li>Requires passing in the vertex buffer to the render pass</li>
<li>Vertex attributes are limited in size (4 floats)</li>
<li>Universally supported</li>
<li>Faster than naive approach</li>
<li>Requires decoding data manually</li>
<li>Limited to by pixel format</li>
<li>Requires a copying data to the texture via buffer</li>
<li>Requires a bind group</li>
## About the depth issues...
You may have noticed that some of the back pentagons are rendering in front of the ones in the front. This is a draw order issue. We could solve this by sorting the instances from back to front, that would only work from certain camera angles. A more flexible approach would be to use a *depth buffer*. We'll talk about those [next time](/todo).

