01101001 01101110
fn init() -> Result<()>
for x in 0..buf.len()
load(addr, 0xFF)
sys.run(0x4A, flags)
if val > 0 { dispatch() }
>> 0x00: READY
loop { poll(); yield; }
stream.flush()
0xDEAD :: 0xBEEF
bind(sock, &addr, len)
pub fn connect(host: &str)
match state {
State::Init => boot(),
State::Run => tick(),
_ => halt(),
}
reg[0x3] = 0b11001010
clk.tick()
assert!(val != null)
>> SIGNAL RECEIVED
buf[i] ^= key[i % klen]
let n = read(fd, buf, 64)
while !done { step(); }
push(stack, frame)
0x7F :: OK
type Handler = fn(Ctx)
emit(Event::Data, payload)
select! { rx => handle(rx) }
spawn(async move { run() })
>> 0x01: PROCESSING
map.insert(k, v)
drain().collect::<Vec<_>>()
let _ = tx.send(msg)
timeout(Duration::ms(100))
>> CHECKSUM PASS
fn encode(src: &[u8]) -> Vec
pipe.write_all(&frame)
crc32(data, len)
>> 0x00FF: ACK
schedule(task, interval)
lock.acquire()
>> SYNC COMPLETE
release(ptr)
0x00 0x00 0x00 0x01
watchdog.reset()
>> LINK ESTABLISHED
fn poll(&mut self) -> Poll
waker.wake_by_ref()
cx.waker().clone()
01101001 01101110
fn init() -> Result<()>
for x in 0..buf.len()
load(addr, 0xFF)
sys.run(0x4A, flags)
if val > 0 { dispatch() }
>> 0x00: READY
loop { poll(); yield; }
stream.flush()
0xDEAD :: 0xBEEF
bind(sock, &addr, len)
pub fn connect(host: &str)
match state {
State::Init => boot(),
State::Run => tick(),
_ => halt(),
}
reg[0x3] = 0b11001010
clk.tick()
assert!(val != null)
>> SIGNAL RECEIVED
buf[i] ^= key[i % klen]
let n = read(fd, buf, 64)
while !done { step(); }
push(stack, frame)
0x7F :: OK
type Handler = fn(Ctx)
emit(Event::Data, payload)
select! { rx => handle(rx) }
spawn(async move { run() })
>> 0x01: PROCESSING
map.insert(k, v)
drain().collect::<Vec<_>>()
let _ = tx.send(msg)
timeout(Duration::ms(100))
>> CHECKSUM PASS
fn encode(src: &[u8]) -> Vec
pipe.write_all(&frame)
crc32(data, len)
>> 0x00FF: ACK
schedule(task, interval)
lock.acquire()
>> SYNC COMPLETE
release(ptr)
0x00 0x00 0x00 0x01
watchdog.reset()
>> LINK ESTABLISHED
fn poll(&mut self) -> Poll
waker.wake_by_ref()
cx.waker().clone()
stream.flush()
0xDEAD :: 0xBEEF
bind(sock, &addr, len)
pub fn connect(host: &str)
match state {
State::Init => boot(),
State::Run => tick(),
_ => halt(),
}
reg[0x3] = 0b11001010
clk.tick()
assert!(val != null)
>> SIGNAL RECEIVED
buf[i] ^= key[i % klen]
let n = read(fd, buf, 64)
while !done { step(); }
push(stack, frame)
0x7F :: OK
type Handler = fn(Ctx)
emit(Event::Data, payload)
select! { rx => handle(rx) }
spawn(async move { run() })
>> 0x01: PROCESSING
map.insert(k, v)
drain().collect::<Vec<_>>()
let _ = tx.send(msg)
timeout(Duration::ms(100))
>> CHECKSUM PASS
fn encode(src: &[u8]) -> Vec
pipe.write_all(&frame)
crc32(data, len)
>> 0x00FF: ACK
schedule(task, interval)
lock.acquire()
>> SYNC COMPLETE
release(ptr)
0x00 0x00 0x00 0x01
watchdog.reset()
>> LINK ESTABLISHED
fn poll(&mut self) -> Poll
waker.wake_by_ref()
cx.waker().clone()
01101001 01101110
fn init() -> Result<()>
for x in 0..buf.len()
load(addr, 0xFF)
sys.run(0x4A, flags)
if val > 0 { dispatch() }
>> 0x00: READY
loop { poll(); yield; }
stream.flush()
0xDEAD :: 0xBEEF
bind(sock, &addr, len)
pub fn connect(host: &str)
match state {
State::Init => boot(),
State::Run => tick(),
_ => halt(),
}
reg[0x3] = 0b11001010
clk.tick()
assert!(val != null)
>> SIGNAL RECEIVED
buf[i] ^= key[i % klen]
let n = read(fd, buf, 64)
while !done { step(); }
push(stack, frame)
0x7F :: OK
type Handler = fn(Ctx)
emit(Event::Data, payload)
select! { rx => handle(rx) }
spawn(async move { run() })
>> 0x01: PROCESSING
map.insert(k, v)
drain().collect::<Vec<_>>()
let _ = tx.send(msg)
timeout(Duration::ms(100))
>> CHECKSUM PASS
fn encode(src: &[u8]) -> Vec
pipe.write_all(&frame)
crc32(data, len)
>> 0x00FF: ACK
schedule(task, interval)
lock.acquire()
>> SYNC COMPLETE
release(ptr)
0x00 0x00 0x00 0x01
watchdog.reset()
>> LINK ESTABLISHED
fn poll(&mut self) -> Poll
waker.wake_by_ref()
cx.waker().clone()
01101001 01101110
fn init() -> Result<()>
for x in 0..buf.len()
load(addr, 0xFF)
sys.run(0x4A, flags)
if val > 0 { dispatch() }
>> 0x00: READY
loop { poll(); yield; }
}
reg[0x3] = 0b11001010
clk.tick()
assert!(val != null)
>> SIGNAL RECEIVED
buf[i] ^= key[i % klen]
let n = read(fd, buf, 64)
while !done { step(); }
push(stack, frame)
0x7F :: OK
type Handler = fn(Ctx)
emit(Event::Data, payload)
select! { rx => handle(rx) }
spawn(async move { run() })
>> 0x01: PROCESSING
map.insert(k, v)
drain().collect::<Vec<_>>()
let _ = tx.send(msg)
timeout(Duration::ms(100))
>> CHECKSUM PASS
fn encode(src: &[u8]) -> Vec
pipe.write_all(&frame)
crc32(data, len)
>> 0x00FF: ACK
schedule(task, interval)
lock.acquire()
>> SYNC COMPLETE
release(ptr)
0x00 0x00 0x00 0x01
watchdog.reset()
>> LINK ESTABLISHED
fn poll(&mut self) -> Poll
waker.wake_by_ref()
cx.waker().clone()
01101001 01101110
fn init() -> Result<()>
for x in 0..buf.len()
load(addr, 0xFF)
sys.run(0x4A, flags)
if val > 0 { dispatch() }
>> 0x00: READY
loop { poll(); yield; }
stream.flush()
0xDEAD :: 0xBEEF
bind(sock, &addr, len)
pub fn connect(host: &str)
match state {
State::Init => boot(),
State::Run => tick(),
_ => halt(),
}
reg[0x3] = 0b11001010
clk.tick()
assert!(val != null)
>> SIGNAL RECEIVED
buf[i] ^= key[i % klen]
let n = read(fd, buf, 64)
while !done { step(); }
push(stack, frame)
0x7F :: OK
type Handler = fn(Ctx)
emit(Event::Data, payload)
select! { rx => handle(rx) }
spawn(async move { run() })
>> 0x01: PROCESSING
map.insert(k, v)
drain().collect::<Vec<_>>()
let _ = tx.send(msg)
timeout(Duration::ms(100))
>> CHECKSUM PASS
fn encode(src: &[u8]) -> Vec
pipe.write_all(&frame)
crc32(data, len)
>> 0x00FF: ACK
schedule(task, interval)
lock.acquire()
>> SYNC COMPLETE
release(ptr)
0x00 0x00 0x00 0x01
watchdog.reset()
>> LINK ESTABLISHED
fn poll(&mut self) -> Poll
waker.wake_by_ref()
cx.waker().clone()
01101001 01101110
fn init() -> Result<()>
for x in 0..buf.len()
load(addr, 0xFF)
sys.run(0x4A, flags)
if val > 0 { dispatch() }
>> 0x00: READY
loop { poll(); yield; }
stream.flush()
0xDEAD :: 0xBEEF
bind(sock, &addr, len)
pub fn connect(host: &str)
match state {
State::Init => boot(),
State::Run => tick(),
_ => halt(),
push(stack, frame)
0x7F :: OK
type Handler = fn(Ctx)
emit(Event::Data, payload)
select! { rx => handle(rx) }
spawn(async move { run() })
>> 0x01: PROCESSING
map.insert(k, v)
drain().collect::<Vec<_>>()
let _ = tx.send(msg)
timeout(Duration::ms(100))
>> CHECKSUM PASS
fn encode(src: &[u8]) -> Vec
pipe.write_all(&frame)
crc32(data, len)
>> 0x00FF: ACK
schedule(task, interval)
lock.acquire()
>> SYNC COMPLETE
release(ptr)
0x00 0x00 0x00 0x01
watchdog.reset()
>> LINK ESTABLISHED
fn poll(&mut self) -> Poll
waker.wake_by_ref()
cx.waker().clone()
01101001 01101110
fn init() -> Result<()>
for x in 0..buf.len()
load(addr, 0xFF)
sys.run(0x4A, flags)
if val > 0 { dispatch() }
>> 0x00: READY
loop { poll(); yield; }
stream.flush()
0xDEAD :: 0xBEEF
bind(sock, &addr, len)
pub fn connect(host: &str)
match state {
State::Init => boot(),
State::Run => tick(),
_ => halt(),
}
reg[0x3] = 0b11001010
clk.tick()
assert!(val != null)
>> SIGNAL RECEIVED
buf[i] ^= key[i % klen]
let n = read(fd, buf, 64)
while !done { step(); }
push(stack, frame)
0x7F :: OK
type Handler = fn(Ctx)
emit(Event::Data, payload)
select! { rx => handle(rx) }
spawn(async move { run() })
>> 0x01: PROCESSING
map.insert(k, v)
drain().collect::<Vec<_>>()
let _ = tx.send(msg)
timeout(Duration::ms(100))
>> CHECKSUM PASS
fn encode(src: &[u8]) -> Vec
pipe.write_all(&frame)
crc32(data, len)
>> 0x00FF: ACK
schedule(task, interval)
lock.acquire()
>> SYNC COMPLETE
release(ptr)
0x00 0x00 0x00 0x01
watchdog.reset()
>> LINK ESTABLISHED
fn poll(&mut self) -> Poll
waker.wake_by_ref()
cx.waker().clone()
01101001 01101110
fn init() -> Result<()>
for x in 0..buf.len()
load(addr, 0xFF)
sys.run(0x4A, flags)
if val > 0 { dispatch() }
>> 0x00: READY
loop { poll(); yield; }
stream.flush()
0xDEAD :: 0xBEEF
bind(sock, &addr, len)
pub fn connect(host: &str)
match state {
State::Init => boot(),
State::Run => tick(),
_ => halt(),
}
reg[0x3] = 0b11001010
clk.tick()
assert!(val != null)
>> SIGNAL RECEIVED
buf[i] ^= key[i % klen]
let n = read(fd, buf, 64)
while !done { step(); }
drain().collect::<Vec<_>>()
let _ = tx.send(msg)
timeout(Duration::ms(100))
>> CHECKSUM PASS
fn encode(src: &[u8]) -> Vec
pipe.write_all(&frame)
crc32(data, len)
>> 0x00FF: ACK
schedule(task, interval)
lock.acquire()
>> SYNC COMPLETE
release(ptr)
0x00 0x00 0x00 0x01
watchdog.reset()
>> LINK ESTABLISHED
fn poll(&mut self) -> Poll
waker.wake_by_ref()
cx.waker().clone()
01101001 01101110
fn init() -> Result<()>
for x in 0..buf.len()
load(addr, 0xFF)
sys.run(0x4A, flags)
if val > 0 { dispatch() }
>> 0x00: READY
loop { poll(); yield; }
stream.flush()
0xDEAD :: 0xBEEF
bind(sock, &addr, len)
pub fn connect(host: &str)
match state {
State::Init => boot(),
State::Run => tick(),
_ => halt(),
}
reg[0x3] = 0b11001010
clk.tick()
assert!(val != null)
>> SIGNAL RECEIVED
buf[i] ^= key[i % klen]
let n = read(fd, buf, 64)
while !done { step(); }
push(stack, frame)
0x7F :: OK
type Handler = fn(Ctx)
emit(Event::Data, payload)
select! { rx => handle(rx) }
spawn(async move { run() })
>> 0x01: PROCESSING
map.insert(k, v)
drain().collect::<Vec<_>>()
let _ = tx.send(msg)
timeout(Duration::ms(100))
>> CHECKSUM PASS
fn encode(src: &[u8]) -> Vec
pipe.write_all(&frame)
crc32(data, len)
>> 0x00FF: ACK
schedule(task, interval)
lock.acquire()
>> SYNC COMPLETE
release(ptr)
0x00 0x00 0x00 0x01
watchdog.reset()
>> LINK ESTABLISHED
fn poll(&mut self) -> Poll
waker.wake_by_ref()
cx.waker().clone()
01101001 01101110
fn init() -> Result<()>
for x in 0..buf.len()
load(addr, 0xFF)
sys.run(0x4A, flags)
if val > 0 { dispatch() }
>> 0x00: READY
loop { poll(); yield; }
stream.flush()
0xDEAD :: 0xBEEF
bind(sock, &addr, len)
pub fn connect(host: &str)
match state {
State::Init => boot(),
State::Run => tick(),
_ => halt(),
}
reg[0x3] = 0b11001010
clk.tick()
assert!(val != null)
>> SIGNAL RECEIVED
buf[i] ^= key[i % klen]
let n = read(fd, buf, 64)
while !done { step(); }
push(stack, frame)
0x7F :: OK
type Handler = fn(Ctx)
emit(Event::Data, payload)
select! { rx => handle(rx) }
spawn(async move { run() })
>> 0x01: PROCESSING
map.insert(k, v)
bind(sock, &addr, len)
pub fn connect(host: &str)
match state {
State::Init => boot(),
State::Run => tick(),
_ => halt(),
}
reg[0x3] = 0b11001010
clk.tick()
assert!(val != null)
>> SIGNAL RECEIVED
buf[i] ^= key[i % klen]
let n = read(fd, buf, 64)
while !done { step(); }
push(stack, frame)
0x7F :: OK
type Handler = fn(Ctx)
emit(Event::Data, payload)
select! { rx => handle(rx) }
spawn(async move { run() })
>> 0x01: PROCESSING
map.insert(k, v)
drain().collect::<Vec<_>>()
let _ = tx.send(msg)
timeout(Duration::ms(100))
>> CHECKSUM PASS
fn encode(src: &[u8]) -> Vec
pipe.write_all(&frame)
crc32(data, len)
>> 0x00FF: ACK
schedule(task, interval)
lock.acquire()
>> SYNC COMPLETE
release(ptr)
0x00 0x00 0x00 0x01
watchdog.reset()
>> LINK ESTABLISHED
fn poll(&mut self) -> Poll
waker.wake_by_ref()
cx.waker().clone()
01101001 01101110
fn init() -> Result<()>
for x in 0..buf.len()
load(addr, 0xFF)
sys.run(0x4A, flags)
if val > 0 { dispatch() }
>> 0x00: READY
loop { poll(); yield; }
stream.flush()
0xDEAD :: 0xBEEF
bind(sock, &addr, len)
pub fn connect(host: &str)
match state {
State::Init => boot(),
State::Run => tick(),
_ => halt(),
}
reg[0x3] = 0b11001010
clk.tick()
assert!(val != null)
>> SIGNAL RECEIVED
buf[i] ^= key[i % klen]
let n = read(fd, buf, 64)
while !done { step(); }
push(stack, frame)
0x7F :: OK
type Handler = fn(Ctx)
emit(Event::Data, payload)
select! { rx => handle(rx) }
spawn(async move { run() })
>> 0x01: PROCESSING
map.insert(k, v)
drain().collect::<Vec<_>>()
let _ = tx.send(msg)
timeout(Duration::ms(100))
>> CHECKSUM PASS
fn encode(src: &[u8]) -> Vec
pipe.write_all(&frame)
crc32(data, len)
>> 0x00FF: ACK
schedule(task, interval)
lock.acquire()
>> SYNC COMPLETE
release(ptr)
0x00 0x00 0x00 0x01
watchdog.reset()
>> LINK ESTABLISHED
fn poll(&mut self) -> Poll
waker.wake_by_ref()
cx.waker().clone()
01101001 01101110
fn init() -> Result<()>
for x in 0..buf.len()
load(addr, 0xFF)
sys.run(0x4A, flags)
if val > 0 { dispatch() }
>> 0x00: READY
loop { poll(); yield; }
stream.flush()
0xDEAD :: 0xBEEF
>> SIGNAL RECEIVED
buf[i] ^= key[i % klen]
let n = read(fd, buf, 64)
while !done { step(); }
push(stack, frame)
0x7F :: OK
type Handler = fn(Ctx)
emit(Event::Data, payload)
select! { rx => handle(rx) }
spawn(async move { run() })
>> 0x01: PROCESSING
map.insert(k, v)
drain().collect::<Vec<_>>()
let _ = tx.send(msg)
timeout(Duration::ms(100))
>> CHECKSUM PASS
fn encode(src: &[u8]) -> Vec
pipe.write_all(&frame)
crc32(data, len)
>> 0x00FF: ACK
schedule(task, interval)
lock.acquire()
>> SYNC COMPLETE
release(ptr)
0x00 0x00 0x00 0x01
watchdog.reset()
>> LINK ESTABLISHED
fn poll(&mut self) -> Poll
waker.wake_by_ref()
cx.waker().clone()
01101001 01101110
fn init() -> Result<()>
for x in 0..buf.len()
load(addr, 0xFF)
sys.run(0x4A, flags)
if val > 0 { dispatch() }
>> 0x00: READY
loop { poll(); yield; }
stream.flush()
0xDEAD :: 0xBEEF
bind(sock, &addr, len)
pub fn connect(host: &str)
match state {
State::Init => boot(),
State::Run => tick(),
_ => halt(),
}
reg[0x3] = 0b11001010
clk.tick()
assert!(val != null)
>> SIGNAL RECEIVED
buf[i] ^= key[i % klen]
let n = read(fd, buf, 64)
while !done { step(); }
push(stack, frame)
0x7F :: OK
type Handler = fn(Ctx)
emit(Event::Data, payload)
select! { rx => handle(rx) }
spawn(async move { run() })
>> 0x01: PROCESSING
map.insert(k, v)
drain().collect::<Vec<_>>()
let _ = tx.send(msg)
timeout(Duration::ms(100))
>> CHECKSUM PASS
fn encode(src: &[u8]) -> Vec
pipe.write_all(&frame)
crc32(data, len)
>> 0x00FF: ACK
schedule(task, interval)
lock.acquire()
>> SYNC COMPLETE
release(ptr)
0x00 0x00 0x00 0x01
watchdog.reset()
>> LINK ESTABLISHED
fn poll(&mut self) -> Poll
waker.wake_by_ref()
cx.waker().clone()
01101001 01101110
fn init() -> Result<()>
for x in 0..buf.len()
load(addr, 0xFF)
sys.run(0x4A, flags)
if val > 0 { dispatch() }
>> 0x00: READY
loop { poll(); yield; }
stream.flush()
0xDEAD :: 0xBEEF
bind(sock, &addr, len)
pub fn connect(host: &str)
match state {
State::Init => boot(),
State::Run => tick(),
_ => halt(),
}
reg[0x3] = 0b11001010
clk.tick()
assert!(val != null)
crc32(data, len)
>> 0x00FF: ACK
schedule(task, interval)
lock.acquire()
>> SYNC COMPLETE
release(ptr)
0x00 0x00 0x00 0x01
watchdog.reset()
>> LINK ESTABLISHED
fn poll(&mut self) -> Poll
waker.wake_by_ref()
cx.waker().clone()
01101001 01101110
fn init() -> Result<()>
for x in 0..buf.len()
load(addr, 0xFF)
sys.run(0x4A, flags)
if val > 0 { dispatch() }
>> 0x00: READY
loop { poll(); yield; }
stream.flush()
0xDEAD :: 0xBEEF
bind(sock, &addr, len)
pub fn connect(host: &str)
match state {
State::Init => boot(),
State::Run => tick(),
_ => halt(),
}
reg[0x3] = 0b11001010
clk.tick()
assert!(val != null)
>> SIGNAL RECEIVED
buf[i] ^= key[i % klen]
let n = read(fd, buf, 64)
while !done { step(); }
push(stack, frame)
0x7F :: OK
type Handler = fn(Ctx)
emit(Event::Data, payload)
select! { rx => handle(rx) }
spawn(async move { run() })
>> 0x01: PROCESSING
map.insert(k, v)
drain().collect::<Vec<_>>()
let _ = tx.send(msg)
timeout(Duration::ms(100))
>> CHECKSUM PASS
fn encode(src: &[u8]) -> Vec
pipe.write_all(&frame)
crc32(data, len)
>> 0x00FF: ACK
schedule(task, interval)
lock.acquire()
>> SYNC COMPLETE
release(ptr)
0x00 0x00 0x00 0x01
watchdog.reset()
>> LINK ESTABLISHED
fn poll(&mut self) -> Poll
waker.wake_by_ref()
cx.waker().clone()
01101001 01101110
fn init() -> Result<()>
for x in 0..buf.len()
load(addr, 0xFF)
sys.run(0x4A, flags)
if val > 0 { dispatch() }
>> 0x00: READY
loop { poll(); yield; }
stream.flush()
0xDEAD :: 0xBEEF
bind(sock, &addr, len)
pub fn connect(host: &str)
match state {
State::Init => boot(),
State::Run => tick(),
_ => halt(),
}
reg[0x3] = 0b11001010
clk.tick()
assert!(val != null)
>> SIGNAL RECEIVED
buf[i] ^= key[i % klen]
let n = read(fd, buf, 64)
while !done { step(); }
push(stack, frame)
0x7F :: OK
type Handler = fn(Ctx)
emit(Event::Data, payload)
select! { rx => handle(rx) }
spawn(async move { run() })
>> 0x01: PROCESSING
map.insert(k, v)
drain().collect::<Vec<_>>()
let _ = tx.send(msg)
timeout(Duration::ms(100))
>> CHECKSUM PASS
fn encode(src: &[u8]) -> Vec
pipe.write_all(&frame)
INDIGO-NX// DEV LOG
← JOURNAL
PUBLIC6 min read

From Plots to GPU — Real-Time Physics on a Graphics Card

simulationgputaichiphysicspythonclaude-codedzhanibekov

The GPU was right there. Doing nothing.


The Starting Point

Yesterday I published 92 simulations exploring whether the Dzhanibekov effect could act as a propulsive mechanism. Quaternion dynamics, Euler's equations, RK4 integration. Proper 3D rigid body physics. The result was definitive — no net thrust — and the journey taught me more about rotational dynamics than any textbook could.

But the output was matplotlib. Static plots. Twelve subplots on a dark background, numbers converging to zero. You could see the physics in the data, but you couldn't feel it. The wing nut flipped in the numbers. I wanted to watch it flip in space.

I also had a graphics card sitting in my machine doing approximately nothing.


What's Out There

I went looking for open-source tools that could run physics simulations on a GPU. The landscape breaks down like this:

The heavyweights — NVIDIA PhysX 5 (open-sourced 2023, BSD licence), Project Chrono (engineering-grade multi-physics), Bullet/PyBullet. These are serious engines. They're also C++ build chains, CMake configurations, and days of setup before you see a triangle.

The compute layers — CuPy (NumPy on CUDA), Numba (JIT to GPU kernels), JAX (Google's autograd compiler). These give you raw GPU power but no rendering. You'd still need a separate visualisation stack.

The sweet spot — Taichi. A Python framework that JIT-compiles to CUDA kernels with a built-in real-time 3D renderer. Write your physics as decorated Python functions, Taichi compiles them to GPU code, and the GGUI system renders the results with zero-copy from GPU memory.

Install: pip install taichi. That's it.


The Build

The existing simulation was numpy — arrays, loops, quaternion maths. Translating it to Taichi meant rewriting the physics as GPU kernels using @ti.kernel and @ti.func decorators. The maths is identical. The execution model is completely different.

Here's the core of the quaternion rotation, running on CUDA:

@ti.func
def qmul(a: tm.vec4, b: tm.vec4) -> tm.vec4:
    return tm.vec4(
        a[0]*b[0] - a[1]*b[1] - a[2]*b[2] - a[3]*b[3],
        a[0]*b[1] + a[1]*b[0] + a[2]*b[3] - a[3]*b[2],
        a[0]*b[2] - a[1]*b[3] + a[2]*b[0] + a[3]*b[1],
        a[0]*b[3] + a[1]*b[2] - a[2]*b[1] + a[3]*b[0],
    )

@ti.func
def qrot(q: tm.vec4, v: tm.vec3) -> tm.vec3:
    p = tm.vec4(0.0, v[0], v[1], v[2])
    qc = tm.vec4(q[0], -q[1], -q[2], -q[3])
    r = qmul(qmul(q, p), qc)
    return tm.vec3(r[1], r[2], r[3])

That's not pseudocode. That's the actual GPU kernel. Taichi compiles it to CUDA at runtime. The syntax is Python. The execution is parallel on the graphics card.

The RK4 integrator, Euler's equations, mesh transforms, trail updates — all GPU kernels. Every frame, 50 physics substeps run on the GPU, the mesh vertices get rotated through the current quaternion, and the GGUI renders the result. No data copies between CPU and GPU for the hot path.

@ti.kernel
def step_physics():
    for _ in range(SUBSTEPS):
        q = quat[None]
        w = omega[None]
        # ... RK4 integration of Euler's equations ...
        omega[None] = w + (h / 6.0) * (dw1 + 2*dw2 + 2*dw3 + dw4)
        quat[None] = qnorm(q + (h / 6.0) * (dq1 + 2*dq2 + 2*dq3 + dq4))

What You See

Real-time GPU simulation of a T-handle undergoing the Dzhanibekov effect — the body flips periodically while angular momentum remains conserved

A T-handle — cyan shaft, magenta crossbar, yellow asymmetric nut — spinning on its intermediate axis. The Dzhanibekov instability builds. The body flips. The angular momentum (orange trail) stays constant while the body tumbles around it. The shaft tip traces out the flip pattern in cyan.

A grid floor for spatial reference. Three coloured axis indicators rotating with the body. A HUD showing simulation time, flip count, angular velocity, and kinetic energy (which is conserved — that's how you know the integrator is working).

Orbit the camera with the mouse. Zoom with scroll. Pause with space. Record video with C — it captures every frame and pipes them through ffmpeg to H.264. Screenshots with G.

The whole thing is about 300 lines of Python. No shaders. No vertex buffers. No OpenGL state machine. No build system.


What This Means

A few years ago, getting a custom rigid body simulation running on a GPU with real-time 3D rendering would have been a project. CMake, CUDA toolkit, OpenGL or Vulkan boilerplate, shader compilation, buffer management — a week of infrastructure before you wrote a single line of physics.

Now it's an afternoon. pip install taichi, write your maths, and it runs on CUDA with built-in rendering. The gap between "I have an idea about rotational dynamics" and "I'm watching it happen in real time on my GPU" collapsed to hours.

This matters because visualisation isn't decoration. Watching the Dzhanibekov flip in real time — seeing the angular momentum vector hold steady while the body tumbles around it — builds physical intuition in a way that twelve matplotlib subplots never could. The plots proved there was no net thrust. The visualiser shows you why. You can see the symmetry. The body rotates through every orientation, and the force averages out because the rotation is continuous, not impulsive.


What I Learned

  1. Taichi's compilation model is the breakthrough. Python syntax, CUDA execution. The JIT compilation adds a few seconds at startup, then everything runs at GPU speed. No separate build step, no C++ interop layer, no pain.

  2. GGUI is underrated. Taichi's built-in renderer does meshes, particles, lines, and text overlays with zero-copy from GPU fields. It's not a game engine, but for scientific visualisation it's exactly right — you see your data, not someone else's abstraction of it.

  3. RK4 on GPU is trivial. The same integration scheme that ran in numpy runs identically in a Taichi kernel. The maths doesn't change. The decorator does.

  4. Conservation laws are your debugger. Kinetic energy is conserved in the torque-free case. If the HUD shows KE drifting, the integrator has a bug. It didn't drift. The physics is right.

  5. Your GPU is probably idle. Unless you're gaming or training a model, that graphics card is sitting there doing nothing. Taichi makes it trivially accessible for compute. There's no reason not to use it.


What's Next

This is the first post in a series. The Dzhanibekov visualiser proves the pipeline: Python physics to GPU kernel to real-time 3D. Now the question is what to throw at it.

Fluid simulation — SPH or MPM, thousands of particles interacting in real time. Taichi has built-in examples for both, and the GPU makes it feasible at interactive framerates.

Particle systems — force fields, electromagnetic simulations, gravitational N-body. Millions of particles is where GPU parallelism really shines.

Interactive physics sandboxes — tweak parameters with sliders, throw objects, change initial conditions. The kind of thing that turns a simulation into a tool for thinking.

The GPU is awake now. Might as well use it.


One pip install. 200 lines of Python. A wing nut flipping in real time on a graphics card. Built with Claude Code. Published at indigo-nx.com.

// FEEDBACK

COMMENTS

Anonymous comments need approval before they appear. Log in for instant posting.
← BACK TO JOURNAL