Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# CLAUDE.md

This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.

## Project Overview

Wuffs (Wrangling Untrusted File Formats Safely) is a memory-safe programming language and standard library for decoding/encoding untrusted file formats. Wuffs source (`.wuffs` files) transpiles to C99 code. The generated C is distributed as a single-file library (`release/c/wuffs-*.c`). Safety guarantees (buffer overflows, integer overflow, null dereferences) are enforced at compile time with zero runtime overhead.

## Build Commands

```bash
# Install Go-based toolchain (wuffs, wuffsfmt)
go install ./cmd/wuffs*

# Regenerate C code after editing .wuffs files
wuffs gen std/... # all modules
wuffs gen std/gif # single module

# Run tests
wuffs test # all tests
wuffs test std/gif # single module
wuffs test -mimic # compare against reference C libraries (giflib, libpng, etc.)

# Run benchmarks
wuffs bench std/gif # single module
wuffs bench -mimic # compare performance vs reference libs

# Run Go unit tests (for toolchain code in lang/)
go test ./...

# Build example programs
./build-example.sh example/zcat # single example
./build-example.sh # all examples

# Build fuzz harnesses
./build-fuzz.sh

# Full CI check (run before submitting a PR)
./build-all.sh
```

## Architecture

**Toolchain (`lang/`)** — Go code implementing the Wuffs-to-C compiler:
- `lang/parse` — parser producing AST
- `lang/check` — type checker, bounds checker, proof/assertion verifier
- `lang/generate` — C code generation orchestration
- `lang/ast` — AST node definitions
- `lang/builtin` — built-in type and function signatures
- `lang/token` — tokenizer
- `lang/wuffsroot` — repository root discovery

**Standard Library (`std/`)** — Wuffs source for codecs: image formats (gif, png, jpeg, bmp, webp, qoi, targa, wbmp, vp8, etc2, thumbhash), compression (deflate, gzip, zlib, bzip2, lzma, lzip, lzw, xz), checksums/hashes (crc32, crc64, adler32, sha256, xxhash32/64), data formats (json, cbor, netpbm, nie).

**Generated Output (`release/c/`)** — Pre-generated single-file C libraries checked into the repo. Users `#include` these directly; define `WUFFS_IMPLEMENTATION` to compile the implementation, not just headers.

**Internal C Templates (`internal/cgen/`)** — Base C code and auxiliary C++ helpers that get incorporated into generated output.

**Tests (`test/c/`)** — C test files per codec in `test/c/std/`. Mimic tests in `test/c/mimiclib/` compare against third-party C libraries. Test data in `test/data/`.

**CLI Tools (`cmd/`)** — `wuffs` (gen/test/bench/genlib), `wuffs-c`, `wuffsfmt` (auto-formatter), `ractool`, `dumbindent`.

**Supporting Go Libraries (`lib/`)** — Go wrappers and utilities used by tools and examples.

## Key Language Concepts

- **Hermetic**: No I/O, no memory allocation, no syscalls. Callers provide all buffers.
- **Coroutines**: Methods marked `?` can suspend on `$short read`/`$short write`; callers refill buffers and resume.
- **Refinement types**: e.g. `base.u32[..= 255]` constrains value ranges, verified at compile time via interval arithmetic.
- **Facts and assertions**: Compile-time proof system; `assert` statements with named axioms for bounds safety.
- **Effects**: `!` marks impure methods, `?` marks coroutines.
- **Syntax differences from C**: `and`/`or`/`not` for logical ops, `<>` for not-equals, `~mod+`/`~sat+` for modular/saturating arithmetic, no operator precedence (explicit parens required).

## Code Style

- Wuffs source: auto-formatted with `wuffsfmt`
- C/C++ code: Chromium style (`.clang-format` in repo root)
- License: Apache-2.0 OR MIT (dual-licensed)
93 changes: 93 additions & 0 deletions fuzz/c/std/webp_fuzzer.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// Copyright 2024 The Wuffs Authors.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//
// SPDX-License-Identifier: Apache-2.0 OR MIT

// ----------------

// Silence the nested slash-star warning for the next comment's command line.
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wcomment"

/*
This fuzzer (the fuzz function) is typically run indirectly, by a framework
such as https://github.com/google/oss-fuzz calling LLVMFuzzerTestOneInput.

When working on the fuzz implementation, or as a coherence check, defining
WUFFS_CONFIG__FUZZLIB_MAIN will let you manually run fuzz over a set of files:

gcc -DWUFFS_CONFIG__FUZZLIB_MAIN webp_fuzzer.c
./a.out ../../../test/data/*.webp
rm -f ./a.out

It should print "PASS", amongst other information, and exit(0).
*/

#pragma clang diagnostic pop

// Wuffs ships as a "single file C library" or "header file library" as per
// https://github.com/nothings/stb/blob/master/docs/stb_howto.txt
//
// To use that single file as a "foo.c"-like implementation, instead of a
// "foo.h"-like header, #define WUFFS_IMPLEMENTATION before #include'ing or
// compiling it.
#define WUFFS_IMPLEMENTATION

#if defined(WUFFS_CONFIG__FUZZLIB_MAIN)
// Defining the WUFFS_CONFIG__STATIC_FUNCTIONS macro is optional, but when
// combined with WUFFS_IMPLEMENTATION, it demonstrates making all of Wuffs'
// functions have static storage.
//
// This can help the compiler ignore or discard unused code, which can produce
// faster compiles and smaller binaries. Other motivations are discussed in the
// "ALLOW STATIC IMPLEMENTATION" section of
// https://raw.githubusercontent.com/nothings/stb/master/docs/stb_howto.txt
#define WUFFS_CONFIG__STATIC_FUNCTIONS
#endif // defined(WUFFS_CONFIG__FUZZLIB_MAIN)

// Defining the WUFFS_CONFIG__MODULE* macros are optional, but it lets users of
// release/c/etc.c choose which parts of Wuffs to build. That file contains the
// entire Wuffs standard library, implementing a variety of codecs and file
// formats. Without this macro definition, an optimizing compiler or linker may
// very well discard Wuffs code for unused codecs, but listing the Wuffs
// modules we use makes that process explicit. Preprocessing means that such
// code simply isn't compiled.
#define WUFFS_CONFIG__MODULES
#define WUFFS_CONFIG__MODULE__BASE
#define WUFFS_CONFIG__MODULE__VP8
#define WUFFS_CONFIG__MODULE__WEBP

// If building this program in an environment that doesn't easily accommodate
// relative includes, you can use the script/inline-c-relative-includes.go
// program to generate a stand-alone C file.
#include "../../../release/c/wuffs-unsupported-snapshot.c"
#include "../fuzzlib/fuzzlib.c"
#include "../fuzzlib/fuzzlib_image_decoder.c"

const char* //
fuzz(wuffs_base__io_buffer* src, uint64_t hash) {
// Heap-allocate: the WebP decoder struct is too large for the stack.
wuffs_webp__decoder* dec =
(wuffs_webp__decoder*)calloc(1, sizeof(wuffs_webp__decoder));
if (!dec) {
return "out of memory";
}
wuffs_base__status status = wuffs_webp__decoder__initialize(
dec, sizeof *dec, WUFFS_VERSION,
(hash & 1) ? WUFFS_INITIALIZE__LEAVE_INTERNAL_BUFFERS_UNINITIALIZED : 0);
hash = wuffs_base__u64__rotate_right(hash, 1);
if (!wuffs_base__status__is_ok(&status)) {
free(dec);
return wuffs_base__status__message(&status);
}
const char* ret = fuzz_image_decoder(
src, hash,
wuffs_webp__decoder__upcast_as__wuffs_base__image_decoder(dec));
free(dec);
return ret;
}
2 changes: 2 additions & 0 deletions internal/cgen/base/all-impl.c
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ const uint32_t wuffs_private_impl__pixel_format__bits_per_channel[16] = {

// ¡ INSERT base/pixconv-submodule-x86-avx2.c.

// ¡ INSERT base/pixconv-submodule-arm-neon.c.

#endif // !defined(WUFFS_CONFIG__MODULES) ||
// defined(WUFFS_CONFIG__MODULE__BASE) ||
// defined(WUFFS_CONFIG__MODULE__BASE__PIXCONV)
Expand Down
72 changes: 72 additions & 0 deletions internal/cgen/base/fundamental-public.h
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,31 @@ wuffs_base__cpu_arch__have_x86_sse42(void) {
#define WUFFS_BASE__GENERATED_C_CODE
#endif

// WUFFS_BASE__GENERATED_C_CODE_NOINLINE is WUFFS_BASE__GENERATED_C_CODE with
// an additional noinline hint. It is used for cold helper functions (e.g. byte
// loading) that should not be inlined into their callers, so that the callers
// remain small enough for the compiler to inline them at their call sites.
#if defined(__GNUC__) || defined(__clang__)
#define WUFFS_BASE__GENERATED_C_CODE_NOINLINE \
WUFFS_BASE__GENERATED_C_CODE __attribute__((noinline))
#elif defined(_MSC_VER)
#define WUFFS_BASE__GENERATED_C_CODE_NOINLINE \
WUFFS_BASE__GENERATED_C_CODE __declspec(noinline)
#else
#define WUFFS_BASE__GENERATED_C_CODE_NOINLINE WUFFS_BASE__GENERATED_C_CODE
#endif

// WUFFS_BASE__GENERATED_C_CODE_ALWAYS_INLINE is
// WUFFS_BASE__GENERATED_C_CODE with an additional always_inline hint. It is
// used for hot helper functions that should always be inlined into their
// callers (e.g. coefficient decoding in boolean decoders).
#if defined(__GNUC__) || defined(__clang__)
#define WUFFS_BASE__GENERATED_C_CODE_ALWAYS_INLINE \
WUFFS_BASE__GENERATED_C_CODE inline __attribute__((always_inline))
#else
#define WUFFS_BASE__GENERATED_C_CODE_ALWAYS_INLINE WUFFS_BASE__GENERATED_C_CODE
#endif

// --------

// Options (bitwise or'ed together) for wuffs_foo__bar__initialize functions.
Expand Down Expand Up @@ -1077,6 +1102,38 @@ wuffs_base__count_leading_zeroes_u64(uint64_t u) {

#endif // (defined(__GNUC__) || defined(__clang__)) && (__SIZEOF_LONG__ == 8)

static inline uint32_t //
wuffs_base__count_leading_zeroes_u32(uint32_t u) {
#if defined(__GNUC__) || defined(__clang__)
return u ? ((uint32_t)(__builtin_clz(u))) : 32u;
#else
if (u == 0) {
return 32;
}
uint32_t n = 0;
if ((u >> 16) == 0) {
n |= 16;
u <<= 16;
}
if ((u >> 24) == 0) {
n |= 8;
u <<= 8;
}
if ((u >> 28) == 0) {
n |= 4;
u <<= 4;
}
if ((u >> 30) == 0) {
n |= 2;
u <<= 2;
}
if ((u >> 31) == 0) {
n |= 1;
}
return n;
#endif
}

// --------

// Normally, the wuffs_base__peek_etc and wuffs_base__poke_etc implementations
Expand Down Expand Up @@ -1111,6 +1168,10 @@ wuffs_base__peek_u16be__no_bounds_check(const uint8_t* p) {
uint16_t x;
memcpy(&x, p, 2);
return _byteswap_ushort(x);
#elif defined(__GNUC__) || defined(__clang__)
uint16_t x;
memcpy(&x, p, 2);
return __builtin_bswap16(x);
#else
return (uint16_t)(((uint16_t)(p[0]) << 8) | ((uint16_t)(p[1]) << 0));
#endif
Expand Down Expand Up @@ -1145,6 +1206,13 @@ wuffs_base__peek_u32be__no_bounds_check(const uint8_t* p) {
uint32_t x;
memcpy(&x, p, 4);
return _byteswap_ulong(x);
#elif defined(__GNUC__) || defined(__clang__)
// Use memcpy + bswap to guarantee a single 32-bit load. The byte-shift
// pattern below is semantically equivalent, but compilers may fail to merge
// the four byte loads in large functions.
uint32_t x;
memcpy(&x, p, 4);
return __builtin_bswap32(x);
#else
return ((uint32_t)(p[0]) << 24) | ((uint32_t)(p[1]) << 16) |
((uint32_t)(p[2]) << 8) | ((uint32_t)(p[3]) << 0);
Expand Down Expand Up @@ -1213,6 +1281,10 @@ wuffs_base__peek_u64be__no_bounds_check(const uint8_t* p) {
uint64_t x;
memcpy(&x, p, 8);
return _byteswap_uint64(x);
#elif defined(__GNUC__) || defined(__clang__)
uint64_t x;
memcpy(&x, p, 8);
return __builtin_bswap64(x);
#else
return ((uint64_t)(p[0]) << 56) | ((uint64_t)(p[1]) << 48) |
((uint64_t)(p[2]) << 40) | ((uint64_t)(p[3]) << 32) |
Expand Down
1 change: 1 addition & 0 deletions internal/cgen/base/image-private.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ wuffs_base__pixel_swizzler__swizzle_ycck(
uint8_t v3,
bool is_rgb_or_cmyk,
bool triangle_filter_for_2to1,
bool src_is_bt601,
wuffs_base__slice_u8 scratch_buffer_2k);

// ---------------- Images (Utility)
Expand Down
57 changes: 57 additions & 0 deletions internal/cgen/base/image-public.h
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,63 @@ wuffs_base__color_ycc__as__color_u32_abgr(uint8_t yy, uint8_t cb, uint8_t cr) {
((0x00FF0000 & rr32) >> 16);
}

// wuffs_base__color_ycc_bt601__as__color_u32 converts from BT.601 studio-range
// YCbCr (as used by VP8, H.264, etc.) to 0xAARRGGBB. The alpha bits are
// always 0xFF.
//
// This uses the studio-range formula from ITU-R BT.601 / RFC 6386 section 13:
// R = 1.164*(Y-16) + 1.596*(Cr-128)
// G = 1.164*(Y-16) - 0.391*(Cb-128) - 0.813*(Cr-128)
// B = 1.164*(Y-16) + 2.018*(Cb-128)
//
// The fixed-point arithmetic matches libwebp's VP8YUVToR/G/B for bit-exact
// results.
static inline wuffs_base__color_u32_argb_premul //
wuffs_base__color_ycc_bt601__as__color_u32(uint8_t yy,
uint8_t cb,
uint8_t cr) {
int32_t yc = ((int32_t)yy * 19077) >> 8;
int32_t rc = ((int32_t)cr * 26149) >> 8;
int32_t gc_u = ((int32_t)cb * 6419) >> 8;
int32_t gc_v = ((int32_t)cr * 13320) >> 8;
int32_t bc = ((int32_t)cb * 33050) >> 8;

int32_t rr = yc + rc - 14234;
int32_t gg = yc - gc_u - gc_v + 8708;
int32_t bb = yc + bc - 17685;

// Clip to [0, 255]: if in range [0, 16320], shift right by 6.
uint32_t r = (rr < 0) ? 0u : (rr > 16320) ? 255u : ((uint32_t)rr >> 6);
uint32_t g = (gg < 0) ? 0u : (gg > 16320) ? 255u : ((uint32_t)gg >> 6);
uint32_t b = (bb < 0) ? 0u : (bb > 16320) ? 255u : ((uint32_t)bb >> 6);

return 0xFF000000u | (r << 16) | (g << 8) | b;
}

// wuffs_base__color_ycc_bt601__as__color_u32_abgr is like
// wuffs_base__color_ycc_bt601__as__color_u32 but the uint32_t returned is in
// 0xAABBGGRR order, not 0xAARRGGBB.
static inline uint32_t //
wuffs_base__color_ycc_bt601__as__color_u32_abgr(uint8_t yy,
uint8_t cb,
uint8_t cr) {
int32_t yc = ((int32_t)yy * 19077) >> 8;
int32_t rc = ((int32_t)cr * 26149) >> 8;
int32_t gc_u = ((int32_t)cb * 6419) >> 8;
int32_t gc_v = ((int32_t)cr * 13320) >> 8;
int32_t bc = ((int32_t)cb * 33050) >> 8;

int32_t rr = yc + rc - 14234;
int32_t gg = yc - gc_u - gc_v + 8708;
int32_t bb = yc + bc - 17685;

uint32_t r = (rr < 0) ? 0u : (rr > 16320) ? 255u : ((uint32_t)rr >> 6);
uint32_t g = (gg < 0) ? 0u : (gg > 16320) ? 255u : ((uint32_t)gg >> 6);
uint32_t b = (bb < 0) ? 0u : (bb > 16320) ? 255u : ((uint32_t)bb >> 6);

return 0xFF000000u | (b << 16) | (g << 8) | r;
}

// --------

typedef uint8_t wuffs_base__pixel_blend;
Expand Down
Loading