Support existing colmap setups

Hi. It would be incredibly useful to be able to use existing colmap setups (created from something like vggt or vggsfm) in this project.

I used chatgpt to create a python script to do it for me and create the transforms.json if that is useful for you. My results haven't been successful though as the resulting mesh glb is just a big blob.

<img width="1505" height="907" alt="Image" src="https://github.com/user-attachments/assets/e76f624e-0e2d-44af-acfe-febb14b707b8" />

```
#!/usr/bin/env python3
import os
import sys
import json
import math
import struct
from pathlib import Path

try:
    from PIL import Image
except ImportError:
    print("Please install Pillow: pip install pillow")
    sys.exit(1)


# -----------------------------
# COLMAP binary readers
# -----------------------------
def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"):
    data = fid.read(num_bytes)
    if len(data) != num_bytes:
        raise EOFError("Unexpected end of file while reading COLMAP binary.")
    return struct.unpack(endian_character + format_char_sequence, data)


CAMERA_MODEL_IDS = {
    0: "SIMPLE_PINHOLE",
    1: "PINHOLE",
    2: "SIMPLE_RADIAL",
    3: "RADIAL",
    4: "OPENCV",
    5: "OPENCV_FISHEYE",
    6: "FULL_OPENCV",
    7: "FOV",
    8: "SIMPLE_RADIAL_FISHEYE",
    9: "RADIAL_FISHEYE",
    10: "THIN_PRISM_FISHEYE",
}

CAMERA_MODEL_NUM_PARAMS = {
    "SIMPLE_PINHOLE": 3,
    "PINHOLE": 4,
    "SIMPLE_RADIAL": 4,
    "RADIAL": 5,
    "OPENCV": 8,
    "OPENCV_FISHEYE": 8,
    "FULL_OPENCV": 12,
    "FOV": 5,
    "SIMPLE_RADIAL_FISHEYE": 4,
    "RADIAL_FISHEYE": 5,
    "THIN_PRISM_FISHEYE": 12,
}


def read_cameras_binary(path):
    cameras = {}
    with open(path, "rb") as fid:
        num_cameras = read_next_bytes(fid, 8, "Q")[0]
        for _ in range(num_cameras):
            camera_properties = read_next_bytes(fid, 24, "iiQQ")
            camera_id = camera_properties[0]
            model_id = camera_properties[1]
            width = camera_properties[2]
            height = camera_properties[3]
            model_name = CAMERA_MODEL_IDS[model_id]
            num_params = CAMERA_MODEL_NUM_PARAMS[model_name]
            params = read_next_bytes(fid, 8 * num_params, "d" * num_params)

            cameras[camera_id] = {
                "id": camera_id,
                "model": model_name,
                "width": width,
                "height": height,
                "params": params,
            }
    return cameras


def read_images_binary(path):
    images = {}
    with open(path, "rb") as fid:
        num_images = read_next_bytes(fid, 8, "Q")[0]
        print("ni: " + str(num_images))
        for _ in range(num_images):
            binary_image_props = read_next_bytes(fid, 64, "idddddddi")
            image_id = binary_image_props[0]
            qvec = binary_image_props[1:5]
            tvec = binary_image_props[5:8]
            camera_id = binary_image_props[8]

            name_bytes = b""
            while True:
                char = fid.read(1)
                if char == b"\x00":
                    break
                name_bytes += char
            name = name_bytes.decode("utf-8")

            num_points2D = read_next_bytes(fid, 8, "Q")[0]
            fid.read(num_points2D * 24)  # skip x, y, point3D_id

            images[image_id] = {
                "id": image_id,
                "qvec": qvec,
                "tvec": tvec,
                "camera_id": camera_id,
                "name": name,
            }
    return images


# -----------------------------
# Math helpers
# -----------------------------
def qvec_to_rotmat(qvec):
    q0, q1, q2, q3 = qvec
    return [
        [
            1 - 2 * q2 * q2 - 2 * q3 * q3,
            2 * q1 * q2 - 2 * q0 * q3,
            2 * q3 * q1 + 2 * q0 * q2,
        ],
        [
            2 * q1 * q2 + 2 * q0 * q3,
            1 - 2 * q1 * q1 - 2 * q3 * q3,
            2 * q2 * q3 - 2 * q0 * q1,
        ],
        [
            2 * q3 * q1 - 2 * q0 * q2,
            2 * q2 * q3 + 2 * q0 * q1,
            1 - 2 * q1 * q1 - 2 * q2 * q2,
        ],
    ]


def mat3_transpose(m):
    return [
        [m[0][0], m[1][0], m[2][0]],
        [m[0][1], m[1][1], m[2][1]],
        [m[0][2], m[1][2], m[2][2]],
    ]


def mat3_vec_mul(m, v):
    return [
        m[0][0] * v[0] + m[0][1] * v[1] + m[0][2] * v[2],
        m[1][0] * v[0] + m[1][1] * v[1] + m[1][2] * v[2],
        m[2][0] * v[0] + m[2][1] * v[1] + m[2][2] * v[2],
    ]


def negate(v):
    return [-x for x in v]


def colmap_image_to_c2w_opengl(qvec, tvec):
    """
    COLMAP stores world-to-camera extrinsics:
        X_cam = R * X_world + t

    We convert to camera-to-world:
        R_c2w = R^T
        C = -R^T * t

    COLMAP camera coordinates are OpenCV-style:
        x right, y down, z forward

    Requested output is OGL-style:
        x right, y up, z backward

    So we post-multiply by diag(1, -1, -1) on the camera frame,
    which flips the Y and Z axes.
    """
    R_wc = qvec_to_rotmat(qvec)
    R_cw = mat3_transpose(R_wc)
    C = mat3_vec_mul(R_cw, negate(tvec))

    # Convert camera basis from OpenCV to OpenGL
    # Equivalent to flipping columns 1 and 2 of the c2w rotation.
    R_cw[0][1] *= -1
    R_cw[1][1] *= -1
    R_cw[2][1] *= -1

    R_cw[0][2] *= -1
    R_cw[1][2] *= -1
    R_cw[2][2] *= -1

    return [
        [float(R_cw[0][0]), float(R_cw[0][1]), float(R_cw[0][2]), float(C[0])],
        [float(R_cw[1][0]), float(R_cw[1][1]), float(R_cw[1][2]), float(C[1])],
        [float(R_cw[2][0]), float(R_cw[2][1]), float(R_cw[2][2]), float(C[2])],
        [0.0, 0.0, 0.0, 1.0],
    ]


# -----------------------------
# Camera intrinsics helpers
# -----------------------------
def get_intrinsics(camera):
    model = camera["model"]
    p = camera["params"]

    if model == "SIMPLE_PINHOLE":
        f, cx, cy = p
        fx = fy = f
    elif model == "PINHOLE":
        fx, fy, cx, cy = p
    elif model == "SIMPLE_RADIAL":
        f, cx, cy, _ = p
        fx = fy = f
    elif model == "RADIAL":
        f, cx, cy, _, _ = p
        fx = fy = f
    elif model == "OPENCV":
        fx, fy, cx, cy, _, _, _, _ = p
    elif model == "OPENCV_FISHEYE":
        fx, fy, cx, cy, _, _, _, _ = p
    elif model == "FULL_OPENCV":
        fx, fy, cx, cy = p[:4]
    elif model == "FOV":
        fx, fy, cx, cy, _ = p
    elif model == "SIMPLE_RADIAL_FISHEYE":
        f, cx, cy, _ = p
        fx = fy = f
    elif model == "RADIAL_FISHEYE":
        f, cx, cy, _, _ = p
        fx = fy = f
    elif model == "THIN_PRISM_FISHEYE":
        fx, fy, cx, cy = p[:4]
    else:
        raise ValueError(f"Unsupported camera model: {model}")

    return float(fx), float(fy), float(cx), float(cy)


def image_size(path, fallback_width=None, fallback_height=None):
    try:
        with Image.open(path) as img:
            return img.width, img.height
    except Exception:
        if fallback_width is not None and fallback_height is not None:
            return fallback_width, fallback_height
        raise


def parse_view_index(filename, fallback):
    stem = Path(filename).stem
    try:
        return int(stem)
    except ValueError:
        return fallback


# -----------------------------
# Main conversion
# -----------------------------
def convert(parent_folder, output_json):
    parent = Path(parent_folder)
    images_dir = parent / "images"
    sparse_dir = parent / "sparse"

    cameras_bin = sparse_dir / "cameras.bin"
    images_bin = sparse_dir / "images.bin"

    if not images_dir.is_dir():
        raise FileNotFoundError(f"Missing images directory: {images_dir}")
    if not cameras_bin.is_file():
        raise FileNotFoundError(f"Missing file: {cameras_bin}")
    if not images_bin.is_file():
        raise FileNotFoundError(
            f"Missing file: {images_bin}\n"
            "cameras.bin alone is not enough to build frame transform matrices."
        )

    cameras = read_cameras_binary(cameras_bin)
    images = read_images_binary(images_bin)
    print(len(images))
    frames = []
    sorted_items = sorted(images.values(), key=lambda x: x["name"])

    for i, image_rec in enumerate(sorted_items):
        print(i)
        img_name = image_rec["name"]
        print(img_name)
        img_path = images_dir / img_name
        print(img_path)
        camera = cameras[image_rec["camera_id"]]

        fx, fy, cx, cy = get_intrinsics(camera)
        width, height = image_size(
            img_path,
            fallback_width=camera["width"],
            fallback_height=camera["height"],
        )

        fov_x = 2.0 * math.atan(width / (2.0 * fx))
        fov_y = 2.0 * math.atan(height / (2.0 * fy))

        transform_matrix = colmap_image_to_c2w_opengl(
            image_rec["qvec"], image_rec["tvec"]
        )

        frame = {
            "view_index": parse_view_index(img_name, i),
            "file_path": f"images/{img_name}",
            "width": int(width),
            "height": int(height),
            "transform_matrix": transform_matrix,
            "camera_fov": [float(fov_x), float(fov_y)],
            "camera_principal_point": [float(cx), float(cy)],
        }
        frames.append(frame)

    frames.sort(key=lambda x: x["view_index"])

    output = {
        "object_uid": "Camera_01",
        "illumination_index": 0,
        "illumination": {
            "type": "environment_illumination",
            "z_rotation": 0.0,
            "img_name": ""
        },
        "coordinate_system": "ogl",
        "frames": frames,
    }

    with open(output_json, "w", encoding="utf-8") as f:
        json.dump(output, f, indent=2)

    print(f"Wrote {output_json}")


if __name__ == "__main__":
    if len(sys.argv) != 3:
        print(
            "Usage:\n"
            "  python colmap_to_json.py /path/to/parent_folder output.json\n\n"
            "Expected structure:\n"
            "  parent_folder/\n"
            "    images/\n"
            "    sparse/\n"
            "      cameras.bin\n"
            "      images.bin"
        )
        sys.exit(1)

    convert(sys.argv[1], sys.argv[2])
```



Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Support existing colmap setups #6

Metadata

Assignees

Labels

Type

Fields

Projects

Milestone

Relationships

Development

Support existing colmap setups #6

Description

Metadata

Metadata

Assignees

Labels

Type

Fields

Projects

Milestone

Relationships

Development

Issue actions