From 1534e80c3300b41ca15755d8ff74c0ce5f2a64bd Mon Sep 17 00:00:00 2001 From: Oscar Puente Date: Tue, 31 Mar 2026 11:39:31 -0700 Subject: [PATCH 01/14] make bytecode generic for 32 and 64 bit word size --- source/pip/qsharp/_adaptive_pass.py | 25 ++- source/pip/qsharp/_simulation.py | 8 +- source/pip/src/qir_simulation.rs | 128 ++++++------- .../pip/src/qir_simulation/gpu_full_state.rs | 7 +- source/pip/tests/test_adaptive_pass.py | 4 +- source/simulators/src/bytecode.rs | 177 ++++++++---------- .../src/gpu_full_state_simulator.rs | 2 +- .../gpu_full_state_simulator/gpu_context.rs | 24 ++- .../gpu_full_state_simulator/shader_types.rs | 57 ++++++ 9 files changed, 236 insertions(+), 196 deletions(-) diff --git a/source/pip/qsharp/_adaptive_pass.py b/source/pip/qsharp/_adaptive_pass.py index 81c7c4b6f6..d7f11fdeaa 100644 --- a/source/pip/qsharp/_adaptive_pass.py +++ b/source/pip/qsharp/_adaptive_pass.py @@ -12,11 +12,18 @@ from __future__ import annotations from dataclasses import dataclass, astuple +from enum import Enum import pyqir import struct from typing import Any, Dict, List, Optional, Tuple, TypeAlias, cast from ._adaptive_bytecode import * + +class Bytecode(Enum): + Bit32 = 1 + Bit64 = 2 + + # --------------------------------------------------------------------------- # Gate name → OpID mapping (must match shader_types.rs OpID enum) # --------------------------------------------------------------------------- @@ -201,8 +208,8 @@ def __post_init__(self): class FloatOperand: - def __init__(self, val: float = 0.0) -> None: - self.val: int = encode_float_as_bits(val) + def __init__(self, val: float, bytecode_kind: Bytecode) -> None: + self.val: int = encode_float_as_bits(val, bytecode_kind) @dataclass @@ -255,14 +262,17 @@ def unwrap_operands( return (dst, src0, src1, aux0, aux1, aux2, aux3) -def encode_float_as_bits(val: float) -> int: - return struct.unpack(" int: + if bytecode_kind == Bytecode.Bit32: + return struct.unpack(" IntOperand | FloatOperand | Re if isinstance(value, pyqir.FloatConstant): val = value.value - return FloatOperand(val) + return FloatOperand(val, self._bytecode_kind) # Forward reference (e.g. phi incoming from a later block). # Pre-allocate a register; the defining instruction will reuse it @@ -752,7 +763,7 @@ def _emit_quantum_call(self, call: pyqir.Call) -> None: angle = self._resolve_angle_operand(call.args[0]) else: qubit_arg_offset = 0 - angle = FloatOperand() + angle = FloatOperand(0.0, self._bytecode_kind) qubit_arg_offset = 1 if gate_name in ROTATION_GATES else 0 q1, q2, q3 = self._resolve_qubit_operands(call.args[qubit_arg_offset:]) qop_idx = self._emit_quantum_op(op_id, q1.val, q2.val, q3.val, angle.val) diff --git a/source/pip/qsharp/_simulation.py b/source/pip/qsharp/_simulation.py index 084b7ca625..f237e46278 100644 --- a/source/pip/qsharp/_simulation.py +++ b/source/pip/qsharp/_simulation.py @@ -25,7 +25,7 @@ ) from ._qsharp import QirInputData, Result from typing import TYPE_CHECKING -from ._adaptive_pass import AdaptiveProfilePass, OP_RECORD_OUTPUT +from ._adaptive_pass import AdaptiveProfilePass, Bytecode, OP_RECORD_OUTPUT if TYPE_CHECKING: # This is in the pyi file only from ._native import GpuShotResults @@ -551,7 +551,7 @@ def run_qir_gpu( # Ccx is not support in the GPU simulator, decompose it DecomposeCcxPass().run(mod) if is_adaptive(mod): - program = AdaptiveProfilePass().run(mod, noise) + program = AdaptiveProfilePass(Bytecode.Bit32).run(mod, noise) results = run_adaptive_parallel_shots(program.as_dict(), shots, noise, seed) # Extract recorded output result indices from the bytecode. @@ -646,7 +646,9 @@ def set_program(self, input: Union[QirInputData, str, bytes]): noise_intrinsics = None if self.tables is not None: noise_intrinsics = {name: table_id for table_id, name, _ in self.tables} - program = AdaptiveProfilePass().run(mod, noise_intrinsics=noise_intrinsics) + program = AdaptiveProfilePass(Bytecode.Bit32).run( + mod, noise_intrinsics=noise_intrinsics + ) self.gpu_context.set_adaptive_program(program.as_dict()) # Extract recorded output result indices from the bytecode. diff --git a/source/pip/src/qir_simulation.rs b/source/pip/src/qir_simulation.rs index 6e41c2da97..db416cdd41 100644 --- a/source/pip/src/qir_simulation.rs +++ b/source/pip/src/qir_simulation.rs @@ -7,7 +7,7 @@ pub(crate) mod gpu_full_state; use crate::qir_simulation::correlated_noise::parse_noise_table; -use num_traits::Float; +use num_traits::{Float, Unsigned}; use pyo3::{ Bound, FromPyObject, Py, PyRef, PyResult, Python, exceptions::{PyAttributeError, PyKeyError, PyTypeError, PyValueError}, @@ -752,89 +752,69 @@ fn from_intrinsics_table_ref( .collect() } -fn pydict_to_adaptive_program(program: &Bound<'_, PyDict>) -> PyResult { - use bytecode::{AdaptiveProgram, Block, Function, Instruction, PhiNodeEntry, SwitchCase}; +fn extract_key(dict: &Bound<'_, PyDict>, key: &'static str) -> PyResult +where + T: for<'a, 'py> FromPyObject<'a, 'py, Error = pyo3::PyErr>, +{ use pyo3::types::PyDictMethods; + dict.get_item(key)? + .ok_or_else(move || PyKeyError::new_err(key))? + .extract() +} + +fn adaptive_program_from_pydict( + dict: &Bound<'_, PyDict>, +) -> PyResult> +where + Word: Unsigned + Default + for<'a, 'py> FromPyObject<'a, 'py, Error = pyo3::PyErr>, +{ + use bytecode::{AdaptiveProgram, Block, Function, Instruction, Op, PhiNodeEntry, SwitchCase}; + + type BlockTuple = (W, W, W); + type InsTuple = (W, W, W, W, W, W, W, W); + type OpTuple = (W, W, W, W, f64); + type FunTuple = (W, W, W); + type PhiTuple = (W, W); + type SwitchTuple = (W, W); + + let num_qubits: u32 = extract_key(dict, "num_qubits")?; + let num_results: u32 = extract_key(dict, "num_results")?; + let num_registers: u32 = extract_key(dict, "num_registers")?; + let entry_block: Word = extract_key(dict, "entry_block")?; + + let instructions = extract_key::>>(dict, "instructions")? + .into_iter() + .map(Instruction::from_tuple) + .collect(); - // Extract scalar fields - let num_qubits: u32 = program - .get_item("num_qubits")? - .ok_or_else(|| PyKeyError::new_err("num_qubits"))? - .extract()?; - let num_results: u32 = program - .get_item("num_results")? - .ok_or_else(|| PyKeyError::new_err("num_results"))? - .extract()?; - let num_registers: u32 = program - .get_item("num_registers")? - .ok_or_else(|| PyKeyError::new_err("num_registers"))? - .extract()?; - let entry_block: u32 = program - .get_item("entry_block")? - .ok_or_else(|| PyKeyError::new_err("entry_block"))? - .extract()?; - - // Extract array fields - let blocks: Vec<(u32, u32, u32)> = program - .get_item("blocks")? - .ok_or_else(|| PyKeyError::new_err("blocks"))? - .extract()?; - #[allow(clippy::type_complexity)] - let instructions: Vec<(u32, u32, u32, u32, u32, u32, u32, u32)> = program - .get_item("instructions")? - .ok_or_else(|| PyKeyError::new_err("instructions"))? - .extract()?; - let quantum_ops_raw: Vec<(u32, u32, u32, u32, f64)> = program - .get_item("quantum_ops")? - .ok_or_else(|| PyKeyError::new_err("quantum_ops"))? - .extract()?; - let functions: Vec<(u32, u32, u32)> = program - .get_item("functions")? - .ok_or_else(|| PyKeyError::new_err("functions"))? - .extract()?; - let phi_entries: Vec<(u32, u32)> = program - .get_item("phi_entries")? - .ok_or_else(|| PyKeyError::new_err("phi_entries"))? - .extract()?; - let switch_cases: Vec<(u32, u32)> = program - .get_item("switch_cases")? - .ok_or_else(|| PyKeyError::new_err("switch_cases"))? - .extract()?; - let mut call_args: Vec = program - .get_item("call_args")? - .ok_or_else(|| PyKeyError::new_err("call_args"))? - .extract()?; - - // Build quantum Op pool using existing gate constructors - let quantum_ops = bytecode::build_op_pool(&quantum_ops_raw); - - // Convert instructions to Instruction structs - let bytecode: Vec = instructions - .iter() - .map(|t| Instruction::from_tuple(*t)) + let quantum_ops = extract_key::>>(dict, "quantum_ops")? + .into_iter() + .map(Op::from_tuple) .collect(); - // Convert block table: strip block_id and pred_count, keep (instr_offset, instr_count) - let mut block_table: Vec = blocks - .iter() - .map(|&(_block_id, instr_offset, instr_count)| (instr_offset, instr_count)) + let mut block_table = extract_key::>>(dict, "blocks")? + .into_iter() + .map(|(_block_id, instr_offset, instr_count)| (instr_offset, instr_count)) .map(Block::from_tuple) .collect(); - // Convert function table - let mut function_table: Vec = - functions.iter().map(|&t| Function::from_tuple(t)).collect(); + let mut function_table = extract_key::>>(dict, "functions")? + .into_iter() + .map(Function::from_tuple) + .collect(); - // Convert phi entries and switch cases - let mut phi_entries: Vec = phi_entries - .iter() - .map(|&t| PhiNodeEntry::from_tuple(t)) + let mut phi_entries = extract_key::>>(dict, "phi_entries")? + .into_iter() + .map(PhiNodeEntry::from_tuple) .collect(); - let mut switch_cases: Vec = switch_cases - .iter() - .map(|&t| SwitchCase::from_tuple(t)) + + let mut switch_cases = extract_key::>>(dict, "switch_cases")? + .into_iter() + .map(SwitchCase::from_tuple) .collect(); + let mut call_args = extract_key::>(dict, "call_args")?; + // WebGPU requires that arrays have at least one element, // so, we push a dummy element on each of these arrays if they are empty. push_default_if_empty(&mut block_table); @@ -844,7 +824,7 @@ fn pydict_to_adaptive_program(program: &Bound<'_, PyDict>) -> PyResult( ) -> PyResult> { let noise = noise_config.map(|noise_config| unbind_noise_config(py, noise_config)); let rng_seed = seed.unwrap_or(0xfeed_face); - let program = pydict_to_adaptive_program(input)?; + let program = adaptive_program_from_pydict(input)?; let result_count: usize = program.num_results as usize; let sim_results = qdk_simulators::run_adaptive_shots_sync(program, &noise, shots, rng_seed, 0) .map_err(PyRuntimeError::new_err)?; diff --git a/source/pip/tests/test_adaptive_pass.py b/source/pip/tests/test_adaptive_pass.py index 8e1d0b89fa..b8df0dae4f 100644 --- a/source/pip/tests/test_adaptive_pass.py +++ b/source/pip/tests/test_adaptive_pass.py @@ -12,7 +12,7 @@ import pyqir import pytest -from qsharp._adaptive_pass import AdaptiveProfilePass, AdaptiveProgram +from qsharp._adaptive_pass import AdaptiveProfilePass, AdaptiveProgram, Bytecode from qsharp._adaptive_bytecode import * @@ -24,7 +24,7 @@ def _run_pass(ir: str, name: str = "test.ll") -> AdaptiveProgram: """Parse an LLVM IR string and run through AdaptiveProfilePass.""" mod = pyqir.Module.from_ir(pyqir.Context(), ir, name) - return AdaptiveProfilePass().run(mod) + return AdaptiveProfilePass(Bytecode.Bit32).run(mod) def _primary(opcode_word: int) -> int: diff --git a/source/simulators/src/bytecode.rs b/source/simulators/src/bytecode.rs index 8c2e4e066a..6fb246e857 100644 --- a/source/simulators/src/bytecode.rs +++ b/source/simulators/src/bytecode.rs @@ -6,12 +6,18 @@ //! Values must stay in sync with the Python `_adaptive_opcodes.py` module. use bytemuck::{Pod, Zeroable}; +use num_traits::Unsigned; -use crate::shader_types::{Op, ops}; +// We need these for uploading data to the GPU. +unsafe impl Pod for Instruction {} +unsafe impl Pod for Block {} +unsafe impl Pod for Function {} +unsafe impl Pod for PhiNodeEntry {} +unsafe impl Pod for SwitchCase {} /// Stores a parsed adaptive program. #[derive(Debug)] -pub struct AdaptiveProgram { +pub struct AdaptiveProgram { /// Number of qubits used by the program. pub num_qubits: u32, /// Number of result registers used by the program. @@ -19,21 +25,21 @@ pub struct AdaptiveProgram { /// Number of virtual registers used by the program. pub num_registers: u32, /// Entry block ID for the program. - pub entry_block: u32, + pub entry_block: Word, /// Bytecode instructions. - pub instructions: Vec, + pub instructions: Vec>, /// Block table: indexed by block ID. - pub block_table: Vec, + pub block_table: Vec>, /// Function table. - pub function_table: Vec, + pub function_table: Vec>, /// Phi side table: `[predecessor_block_id, value_register]` entries. - pub phi_entries: Vec, + pub phi_entries: Vec>, /// Switch side table: `[match_value, target_block]` entries. - pub switch_cases: Vec, + pub switch_cases: Vec>, /// Call argument register indices. - pub call_args: Vec, + pub call_args: Vec, /// Quantum op pool (full `Op` structs with expanded unitaries). - pub quantum_ops: Vec, + pub quantum_ops: Vec>, } // --------------------------------------------------------------------------- @@ -48,28 +54,29 @@ pub struct AdaptiveProgram { /// - `src0`, `src1`: source registers or immediates /// - `aux0`–`aux3`: auxiliary fields (gate index, block ids, side-table offsets, etc.) #[repr(C)] -#[derive(Copy, Clone, Debug, Default, Pod, Zeroable)] -pub struct Instruction { - pub opcode: u32, - pub dst: u32, - pub src0: u32, - pub src1: u32, - pub aux0: u32, - pub aux1: u32, - pub aux2: u32, - pub aux3: u32, +#[derive(Copy, Clone, Debug, Default, Zeroable)] +pub struct Instruction { + pub opcode: Word, + pub dst: Word, + pub src0: Word, + pub src1: Word, + pub aux0: Word, + pub aux1: Word, + pub aux2: Word, + pub aux3: Word, } -const _: () = assert!(std::mem::size_of::() == 32); +const _: () = assert!(std::mem::size_of::>() == 32); +const _: () = assert!(std::mem::size_of::>() == 64); // --------------------------------------------------------------------------- // Helper functions // --------------------------------------------------------------------------- -impl Instruction { +impl Instruction { /// Create an [`Instruction`] from an 8-tuple (matching Python emission format). #[must_use] - pub const fn from_tuple(t: (u32, u32, u32, u32, u32, u32, u32, u32)) -> Self { + pub fn from_tuple(t: (Word, Word, Word, Word, Word, Word, Word, Word)) -> Self { Self { opcode: t.0, dst: t.1, @@ -81,7 +88,9 @@ impl Instruction { aux3: t.7, } } +} +impl Instruction { /// Extract the primary opcode (bits [7:0]). #[must_use] pub const fn primary_opcode(&self) -> u8 { @@ -102,23 +111,23 @@ impl Instruction { /// Check whether a specific flag bit is set. #[must_use] - pub const fn has_flag(&self, flag: u32) -> bool { + pub const fn has_flag(&self, flag: u64) -> bool { self.opcode & flag != 0 } } /// A basic block descriptor. #[repr(C)] -#[derive(Copy, Clone, Debug, Default, Pod, Zeroable)] -pub struct Block { - pub instr_offset: u32, - pub instr_count: u32, +#[derive(Copy, Clone, Debug, Default, Zeroable)] +pub struct Block { + pub instr_offset: Word, + pub instr_count: Word, } -impl Block { +impl Block { /// Create a [`Block`] from an 2-tuple (matching Python emission format). #[must_use] - pub const fn from_tuple(t: (u32, u32)) -> Self { + pub fn from_tuple(t: (Word, Word)) -> Self { Self { instr_offset: t.0, instr_count: t.1, @@ -133,39 +142,39 @@ impl Block { /// The `reserved` field pads the struct to 16 bytes so it matches /// the GPU shader layout (`vec4`). #[repr(C)] -#[derive(Copy, Clone, Debug, Default, Pod, Zeroable)] -pub struct Function { - pub entry_block_id: u32, - pub param_count: u32, - pub param_base_reg: u32, - pub reserved: u32, +#[derive(Copy, Clone, Debug, Default, Zeroable)] +pub struct Function { + pub entry_block_id: Word, + pub param_count: Word, + pub param_base_reg: Word, + pub reserved: Word, } -impl Function { +impl Function { /// Create a [`Function`] from a 3-tuple (matching Python emission format). #[must_use] - pub const fn from_tuple(t: (u32, u32, u32)) -> Self { + pub fn from_tuple(t: (Word, Word, Word)) -> Self { Self { entry_block_id: t.0, param_count: t.1, param_base_reg: t.2, - reserved: 0, + reserved: Word::default(), } } } /// A component of a phi node. #[repr(C)] -#[derive(Copy, Clone, Debug, Default, Pod, Zeroable)] -pub struct PhiNodeEntry { - block_id: u32, - val_reg: u32, +#[derive(Copy, Clone, Debug, Default, Zeroable)] +pub struct PhiNodeEntry { + block_id: Word, + val_reg: Word, } -impl PhiNodeEntry { +impl PhiNodeEntry { /// Create a [`PhiNodeEntry`] from an 2-tuple (matching Python emission format). #[must_use] - pub const fn from_tuple(t: (u32, u32)) -> Self { + pub fn from_tuple(t: (Word, Word)) -> Self { Self { block_id: t.0, val_reg: t.1, @@ -175,16 +184,16 @@ impl PhiNodeEntry { /// A switch case. #[repr(C)] -#[derive(Copy, Clone, Debug, Default, Pod, Zeroable)] -pub struct SwitchCase { - case_val: u32, - target_block: u32, +#[derive(Copy, Clone, Debug, Default, Zeroable)] +pub struct SwitchCase { + case_val: Word, + target_block: Word, } -impl SwitchCase { +impl SwitchCase { /// Create a [`SwitchCase`] from an 2-tuple (matching Python emission format). #[must_use] - pub const fn from_tuple(t: (u32, u32)) -> Self { + pub fn from_tuple(t: (Word, Word)) -> Self { Self { case_val: t.0, target_block: t.1, @@ -192,50 +201,24 @@ impl SwitchCase { } } -/// Build a pool of [`Op`] structs from compact `(op_id, q1, q2, q3, angle)` tuples. -/// -/// Maps each `OpID` integer to the corresponding `Op::new_*` constructor, expanding -/// the unitary matrix for use on the GPU. -#[must_use] -pub fn build_op_pool(compact_ops: &[(u32, u32, u32, u32, f64)]) -> Vec { - compact_ops - .iter() - .map(|&(op_id, q1, q2, _q3, angle)| { - #[allow(clippy::cast_possible_truncation)] - let angle_f32 = angle as f32; - match op_id { - ops::ID => Op::new_id_gate(q1), - ops::RESETZ => Op::new_resetz_gate(q1), - ops::X => Op::new_x_gate(q1), - ops::Y => Op::new_y_gate(q1), - ops::Z => Op::new_z_gate(q1), - ops::H => Op::new_h_gate(q1), - ops::S => Op::new_s_gate(q1), - ops::S_ADJ => Op::new_s_adj_gate(q1), - ops::T => Op::new_t_gate(q1), - ops::T_ADJ => Op::new_t_adj_gate(q1), - ops::SX => Op::new_sx_gate(q1), - ops::SX_ADJ => Op::new_sx_adj_gate(q1), - ops::RX => Op::new_rx_gate(angle_f32, q1), - ops::RY => Op::new_ry_gate(angle_f32, q1), - ops::RZ => Op::new_rz_gate(angle_f32, q1), - ops::CX => Op::new_cx_gate(q1, q2), - ops::CY => Op::new_cy_gate(q1, q2), - ops::CZ => Op::new_cz_gate(q1, q2), - ops::RXX => Op::new_rxx_gate(angle_f32, q1, q2), - ops::RYY => Op::new_ryy_gate(angle_f32, q1, q2), - ops::RZZ => Op::new_rzz_gate(angle_f32, q1, q2), - ops::SWAP => Op::new_swap_gate(q1, q2), - ops::MZ => Op::new_mz_gate(q1, q2), - ops::MRESETZ => Op::new_mresetz_gate(q1, q2), - ops::MOVE => Op::new_move_gate(q1), - ops::CORRELATED_NOISE => { - // For adaptive path: q1 = noise_table_idx, q2 = qubit_count. - // Qubit IDs are resolved at runtime from instruction aux fields. - Op::new_2q_gate(ops::CORRELATED_NOISE, q1, q2) - } - _ => panic!("Unknown op_id in adaptive quantum op pool: {op_id}"), - } - }) - .collect() +#[derive(Debug)] +pub struct Op { + pub op_id: Word, + pub q1: Word, + pub q2: Word, + pub q3: Word, + pub angle: f64, +} + +impl Op { + #[must_use] + pub fn from_tuple(t: (Word, Word, Word, Word, f64)) -> Self { + Self { + op_id: t.0, + q1: t.1, + q2: t.2, + q3: t.3, + angle: t.4, + } + } } diff --git a/source/simulators/src/gpu_full_state_simulator.rs b/source/simulators/src/gpu_full_state_simulator.rs index 0bebb02d40..503b7cd434 100644 --- a/source/simulators/src/gpu_full_state_simulator.rs +++ b/source/simulators/src/gpu_full_state_simulator.rs @@ -44,7 +44,7 @@ pub fn run_shots_sync( } pub fn run_adaptive_shots_sync( - program: AdaptiveProgram, + program: AdaptiveProgram, noise: &Option>, shot_count: i32, rng_seed: u32, diff --git a/source/simulators/src/gpu_full_state_simulator/gpu_context.rs b/source/simulators/src/gpu_full_state_simulator/gpu_context.rs index c0d8902a9d..df8ae7191e 100644 --- a/source/simulators/src/gpu_full_state_simulator/gpu_context.rs +++ b/source/simulators/src/gpu_full_state_simulator/gpu_context.rs @@ -12,9 +12,10 @@ use crate::gpu_resources::GpuResources; use crate::noise_config::NoiseConfig; use crate::noise_mapping::get_noise_ops; use crate::shader_types::{ - DiagnosticsData, InterpreterState, MAX_BUFFER_SIZE, MAX_QUBIT_COUNT, MAX_QUBITS_PER_WORKGROUP, - MAX_REGISTERS, MAX_SHOT_ENTRIES, MAX_SHOTS_PER_BATCH, MIN_QUBIT_COUNT, MIN_REGISTERS, Op, - SIZEOF_SHOTDATA, THREADS_PER_WORKGROUP, Uniforms, WorkgroupCollationBuffer, ops, + self, DiagnosticsData, InterpreterState, MAX_BUFFER_SIZE, MAX_QUBIT_COUNT, + MAX_QUBITS_PER_WORKGROUP, MAX_REGISTERS, MAX_SHOT_ENTRIES, MAX_SHOTS_PER_BATCH, + MIN_QUBIT_COUNT, MIN_REGISTERS, Op, SIZEOF_SHOTDATA, THREADS_PER_WORKGROUP, Uniforms, + WorkgroupCollationBuffer, ops, }; // On Windows, running larger circuits/shots can hit TDR issues if too many ops are dispatched in one go. @@ -33,7 +34,7 @@ pub struct GpuContext { run_params: RunParams, // Adaptive program data (set via set_adaptive_program) - adaptive_program: Option, + adaptive_program: Option>, // Indicates if items impacting the Ops have changed and need to be re-uploaded / recompiled program_is_dirty: bool, @@ -539,8 +540,10 @@ impl GpuContext { } } - pub fn set_adaptive_program(&mut self, program: AdaptiveProgram) -> Result<(), String> { + pub fn set_adaptive_program(&mut self, program: AdaptiveProgram) -> Result<(), String> { self.program.clear(); + self.program + .extend_from_slice(&shader_types::build_op_pool(&program.quantum_ops)); let num_qubits = u32_to_i32(program.num_qubits); // Always allocate a minumum number of qubits to ensure good data alignment, GPU thread usage, etc. @@ -625,8 +628,11 @@ impl GpuContext { .copy_from_slice(&program_bytes); self.resources.upload_batch_data(&batch_data)?; - self.resources - .upload_ops_data(cast_slice(&program.quantum_ops))?; + if let Some(program) = &self.program_with_noise { + self.resources.upload_ops_data(cast_slice(program))?; + } else { + self.resources.upload_ops_data(cast_slice(&self.program))?; + } Ok(()) } @@ -665,7 +671,7 @@ impl GpuContext { .adaptive_program .as_mut() .ok_or("No adaptive program has been set")?; - let (noisy_ops, index_map) = add_noise_to_adaptive_ops(&program.quantum_ops, noise); + let (noisy_ops, index_map) = add_noise_to_adaptive_ops(&self.program, noise); // Patch bytecode instructions that reference quantum op indices. // OP_QUANTUM_GATE (0x10), OP_MEASURE (0x11), OP_RESET (0x12) // all store the op pool index in `aux0`. @@ -675,7 +681,7 @@ impl GpuContext { instr.aux0 = index_map[instr.aux0 as usize]; } } - program.quantum_ops = noisy_ops; + self.program_with_noise = Some(noisy_ops); } // Upload the combined batch_data buffer (noise + program) to binding 7 self.upload_batch_data()?; diff --git a/source/simulators/src/gpu_full_state_simulator/shader_types.rs b/source/simulators/src/gpu_full_state_simulator/shader_types.rs index c63d64c1dd..5a45c7c0ed 100644 --- a/source/simulators/src/gpu_full_state_simulator/shader_types.rs +++ b/source/simulators/src/gpu_full_state_simulator/shader_types.rs @@ -3,6 +3,7 @@ use std::f32::consts::FRAC_1_SQRT_2; +use crate::bytecode; use bytemuck::{Pod, Zeroable}; // ********** Constants used by the GPU shader code and structures ********* @@ -1094,3 +1095,59 @@ pub struct InterpreterState { // Total struct size = 64 u32 = 256 bytes (which is aligned to 128 bytes) // safety check to make sure Op is the correct size with padding at compile time const _: () = assert!(std::mem::size_of::() == 256); + +/// Build a pool of [`Op`] structs from compact `(op_id, q1, q2, q3, angle)` tuples. +/// +/// Maps each `OpID` integer to the corresponding `Op::new_*` constructor, expanding +/// the unitary matrix for use on the GPU. +#[must_use] +pub fn build_op_pool(compact_ops: &[bytecode::Op]) -> Vec { + compact_ops + .iter() + .map( + |&bytecode::Op { + op_id, + q1, + q2, + q3: _, + angle, + }| { + #[allow(clippy::cast_possible_truncation)] + let angle_f32 = angle as f32; + match op_id { + ops::ID => Op::new_id_gate(q1), + ops::RESETZ => Op::new_resetz_gate(q1), + ops::X => Op::new_x_gate(q1), + ops::Y => Op::new_y_gate(q1), + ops::Z => Op::new_z_gate(q1), + ops::H => Op::new_h_gate(q1), + ops::S => Op::new_s_gate(q1), + ops::S_ADJ => Op::new_s_adj_gate(q1), + ops::T => Op::new_t_gate(q1), + ops::T_ADJ => Op::new_t_adj_gate(q1), + ops::SX => Op::new_sx_gate(q1), + ops::SX_ADJ => Op::new_sx_adj_gate(q1), + ops::RX => Op::new_rx_gate(angle_f32, q1), + ops::RY => Op::new_ry_gate(angle_f32, q1), + ops::RZ => Op::new_rz_gate(angle_f32, q1), + ops::CX => Op::new_cx_gate(q1, q2), + ops::CY => Op::new_cy_gate(q1, q2), + ops::CZ => Op::new_cz_gate(q1, q2), + ops::RXX => Op::new_rxx_gate(angle_f32, q1, q2), + ops::RYY => Op::new_ryy_gate(angle_f32, q1, q2), + ops::RZZ => Op::new_rzz_gate(angle_f32, q1, q2), + ops::SWAP => Op::new_swap_gate(q1, q2), + ops::MZ => Op::new_mz_gate(q1, q2), + ops::MRESETZ => Op::new_mresetz_gate(q1, q2), + ops::MOVE => Op::new_move_gate(q1), + ops::CORRELATED_NOISE => { + // For adaptive path: q1 = noise_table_idx, q2 = qubit_count. + // Qubit IDs are resolved at runtime from instruction aux fields. + Op::new_2q_gate(ops::CORRELATED_NOISE, q1, q2) + } + _ => panic!("Unknown op_id in adaptive quantum op pool: {op_id}"), + } + }, + ) + .collect() +} From 18ed36d75651560f1922fdd45e52946182b65d46 Mon Sep 17 00:00:00 2001 From: Oscar Puente Date: Wed, 1 Apr 2026 15:21:05 -0700 Subject: [PATCH 02/14] [wip] add cpu bytecode interpreter --- source/pip/qsharp/_adaptive_pass.py | 12 +- source/pip/qsharp/_native.pyi | 36 + source/pip/qsharp/_simulation.py | 125 +- source/pip/src/interpreter.rs | 4 +- source/pip/src/qir_simulation.rs | 2 +- .../pip/src/qir_simulation/cpu_simulators.rs | 160 +- .../pip/tests/test_adaptive_cpu_bytecode.py | 1580 +++++++++++++++++ source/pip/tests/test_adaptive_cpu_noise.py | 410 +++++ .../tests/test_adaptive_cpu_quantum_ops.py | 373 ++++ source/pip/tests/test_adaptive_gpu_noise.py | 2 +- source/pip/tests/test_clifford_simulator.py | 12 +- source/simulators/src/bytecode.rs | 10 +- source/simulators/src/bytecode/runtime.rs | 690 +++++++ 13 files changed, 3345 insertions(+), 71 deletions(-) create mode 100644 source/pip/tests/test_adaptive_cpu_bytecode.py create mode 100644 source/pip/tests/test_adaptive_cpu_noise.py create mode 100644 source/pip/tests/test_adaptive_cpu_quantum_ops.py create mode 100644 source/simulators/src/bytecode/runtime.rs diff --git a/source/pip/qsharp/_adaptive_pass.py b/source/pip/qsharp/_adaptive_pass.py index d7f11fdeaa..1aca506831 100644 --- a/source/pip/qsharp/_adaptive_pass.py +++ b/source/pip/qsharp/_adaptive_pass.py @@ -200,11 +200,14 @@ class SwitchCase: @dataclass class IntOperand: val: int = 0 + bits: int = 32 def __post_init__(self): - # Mask to u32 range so negative Python ints become their - # two's-complement u32 representation (e.g. -7 → 0xFFFFFFF9). - self.val = self.val & 0xFFFFFFFF + # Mask to the appropriate word-width so negative Python ints become + # their two's-complement representation + # (e.g. -7 → 0xFFFFFFF9 for 32-bit, 0xFFFFFFFFFFFFFFF9 for 64-bit). + mask = (1 << self.bits) - 1 + self.val = self.val & mask class FloatOperand: @@ -294,6 +297,7 @@ def __init__(self, bytecode_kind: Bytecode): self._func_to_id: Dict[str, int] = {} # function name → function ID self._current_func_is_entry: bool = True self._noise_intrinsics: Optional[Dict[str, int]] = None + self._int_bits = 32 if bytecode_kind == Bytecode.Bit32 else 64 def run( self, @@ -436,7 +440,7 @@ def _resolve_operand(self, value: pyqir.Value) -> IntOperand | FloatOperand | Re if isinstance(value, pyqir.IntConstant): val = value.value - return IntOperand(val) + return IntOperand(val, self._int_bits) if isinstance(value, pyqir.FloatConstant): val = value.value diff --git a/source/pip/qsharp/_native.pyi b/source/pip/qsharp/_native.pyi index 1463ac5e6b..2d059e83ad 100644 --- a/source/pip/qsharp/_native.pyi +++ b/source/pip/qsharp/_native.pyi @@ -1005,6 +1005,42 @@ def run_cpu_full_state( """ ... +def run_cpu_adaptive( + input: dict, + shots: int, + noise: Optional[NoiseConfig] = None, + seed: Optional[int] = None, +) -> List[str]: + """ + Run an adaptive profile QIR program on a CPU full-state simulator. + + The input is an `AdaptiveProgram` converted to a dict using the + .as_dict() method. Uses 64-bit bytecode for full LLVM i64 semantics. + + Returns a list of result strings. Each result string is composed + of '0's, '1's, and 'L's, representing if each measurement result + was a Zero, One, or Loss respectively. + """ + ... + +def run_clifford_adaptive( + input: dict, + shots: int, + noise: Optional[NoiseConfig] = None, + seed: Optional[int] = None, +) -> List[str]: + """ + Run an adaptive profile QIR program on a Clifford stabilizer simulator. + + The input is an `AdaptiveProgram` converted to a dict using the + .as_dict() method. Uses 64-bit bytecode for full LLVM i64 semantics. + + Returns a list of result strings. Each result string is composed + of '0's, '1's, and 'L's, representing if each measurement result + was a Zero, One, or Loss respectively. + """ + ... + def try_create_gpu_adapter() -> str: """ Checks if a compatible GPU adapter is available on the system. diff --git a/source/pip/qsharp/_simulation.py b/source/pip/qsharp/_simulation.py index f237e46278..7a3aae40af 100644 --- a/source/pip/qsharp/_simulation.py +++ b/source/pip/qsharp/_simulation.py @@ -9,8 +9,10 @@ QirInstructionId, QirInstruction, run_clifford, + run_clifford_adaptive, run_parallel_shots, run_adaptive_parallel_shots, + run_cpu_adaptive, run_cpu_full_state, NoiseConfig, GpuContext, @@ -25,7 +27,12 @@ ) from ._qsharp import QirInputData, Result from typing import TYPE_CHECKING -from ._adaptive_pass import AdaptiveProfilePass, Bytecode, OP_RECORD_OUTPUT +from ._adaptive_pass import ( + AdaptiveProfilePass, + AdaptiveProgram, + Bytecode, + OP_RECORD_OUTPUT, +) if TYPE_CHECKING: # This is in the pyi file only from ._native import GpuShotResults @@ -485,6 +492,43 @@ def is_adaptive(mod: pyqir.Module) -> bool: return func_attrs["qir_profiles"].string_value == "adaptive_profile" +def str_to_result(result: str): + match result: + case "0": + return Result.Zero + case "1": + return Result.One + case "L": + return Result.Loss + case _: + raise ValueError(f"Invalid result {result}") + + +def run_adaptive( + rust_run_adaptive_fn: Callable, + program: AdaptiveProgram, + shots: int, + noise: Optional[NoiseConfig], + seed: int, +): + """ + Runs an adaptive program given a rust simulator. Adds output recording logic. + """ + results = rust_run_adaptive_fn(program.as_dict(), shots, noise, seed) + # Extract recorded output result indices from the bytecode. + # OP_RECORD_OUTPUT with aux1=0 is result_record_output where + # src0 is the result index in the results buffer. + recorded_result_indices = [] + for ins in program.instructions: + if (ins.opcode & 0xFF) == OP_RECORD_OUTPUT and ins.aux1 == 0: + recorded_result_indices.append(ins.src0) + # Filter shot_results to only include recorded output indices + filtered = [] + for s in results: + filtered.append([str_to_result(s[i]) for i in recorded_result_indices]) + return filtered + + def run_qir_clifford( input: Union[QirInputData, str, bytes], shots: Optional[int] = 1, @@ -492,19 +536,23 @@ def run_qir_clifford( seed: Optional[int] = None, ) -> List: (mod, shots, noise, seed) = preprocess_simulation_input(input, shots, noise, seed) - if noise is None: - (gates, num_qubits, num_results) = AggregateGatesPass().run(mod) + if is_adaptive(mod): + program = AdaptiveProfilePass(Bytecode.Bit64).run(mod, noise) + return run_adaptive(run_clifford_adaptive, program, shots, noise, seed) else: - (gates, num_qubits, num_results) = CorrelatedNoisePass(noise).run(mod) - recorder = OutputRecordingPass() - recorder.run(mod) - - return list( - map( - recorder.process_output, - run_clifford(gates, num_qubits, num_results, shots, noise, seed), + if noise is None: + (gates, num_qubits, num_results) = AggregateGatesPass().run(mod) + else: + (gates, num_qubits, num_results) = CorrelatedNoisePass(noise).run(mod) + recorder = OutputRecordingPass() + recorder.run(mod) + + return list( + map( + recorder.process_output, + run_clifford(gates, num_qubits, num_results, shots, noise, seed), + ) ) - ) def run_qir_cpu( @@ -514,31 +562,23 @@ def run_qir_cpu( seed: Optional[int] = None, ) -> List: (mod, shots, noise, seed) = preprocess_simulation_input(input, shots, noise, seed) - if noise is None: - (gates, num_qubits, num_results) = AggregateGatesPass().run(mod) + if is_adaptive(mod): + program = AdaptiveProfilePass(Bytecode.Bit64).run(mod, noise) + return run_adaptive(run_cpu_adaptive, program, shots, noise, seed) else: - (gates, num_qubits, num_results) = CorrelatedNoisePass(noise).run(mod) - recorder = OutputRecordingPass() - recorder.run(mod) - - return list( - map( - recorder.process_output, - run_cpu_full_state(gates, num_qubits, num_results, shots, noise, seed), - ) - ) - + if noise is None: + (gates, num_qubits, num_results) = AggregateGatesPass().run(mod) + else: + (gates, num_qubits, num_results) = CorrelatedNoisePass(noise).run(mod) + recorder = OutputRecordingPass() + recorder.run(mod) -def str_to_result(result: str): - match result: - case "0": - return Result.Zero - case "1": - return Result.One - case "L": - return Result.Loss - case _: - raise ValueError(f"Invalid result {result}") + return list( + map( + recorder.process_output, + run_cpu_full_state(gates, num_qubits, num_results, shots, noise, seed), + ) + ) def run_qir_gpu( @@ -552,20 +592,7 @@ def run_qir_gpu( DecomposeCcxPass().run(mod) if is_adaptive(mod): program = AdaptiveProfilePass(Bytecode.Bit32).run(mod, noise) - results = run_adaptive_parallel_shots(program.as_dict(), shots, noise, seed) - - # Extract recorded output result indices from the bytecode. - # OP_RECORD_OUTPUT with aux1=0 is result_record_output where - # src0 is the result index in the results buffer. - recorded_result_indices = [] - for ins in program.instructions: - if (ins.opcode & 0xFF) == OP_RECORD_OUTPUT and ins.aux1 == 0: - recorded_result_indices.append(ins.src0) - # Filter shot_results to only include recorded output indices - filtered = [] - for s in results: - filtered.append([str_to_result(s[i]) for i in recorded_result_indices]) - return filtered + return run_adaptive(run_adaptive_parallel_shots, program, shots, noise, seed) else: if noise is None: (gates, num_qubits, num_results) = AggregateGatesPass().run(mod) diff --git a/source/pip/src/interpreter.rs b/source/pip/src/interpreter.rs index ea631ebd23..e488bf3b88 100644 --- a/source/pip/src/interpreter.rs +++ b/source/pip/src/interpreter.rs @@ -24,7 +24,7 @@ use crate::{ noisy_simulator::register_noisy_simulator_submodule, qir_simulation::{ IdleNoiseParams, NoiseConfig, NoiseTable, QirInstruction, QirInstructionId, - cpu_simulators::{run_clifford, run_cpu_full_state}, + cpu_simulators::{run_clifford, run_clifford_adaptive, run_cpu_adaptive, run_cpu_full_state}, gpu_full_state::{ GpuContext, run_adaptive_parallel_shots, run_parallel_shots, try_create_gpu_adapter, }, @@ -134,6 +134,8 @@ fn _native<'a>(py: Python<'a>, m: &Bound<'a, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(run_clifford, m)?)?; m.add_function(wrap_pyfunction!(try_create_gpu_adapter, m)?)?; m.add_function(wrap_pyfunction!(run_cpu_full_state, m)?)?; + m.add_function(wrap_pyfunction!(run_cpu_adaptive, m)?)?; + m.add_function(wrap_pyfunction!(run_clifford_adaptive, m)?)?; m.add_function(wrap_pyfunction!(run_parallel_shots, m)?)?; m.add_function(wrap_pyfunction!(run_adaptive_parallel_shots, m)?)?; m.add("QSharpError", py.get_type::())?; diff --git a/source/pip/src/qir_simulation.rs b/source/pip/src/qir_simulation.rs index db416cdd41..27f360c92c 100644 --- a/source/pip/src/qir_simulation.rs +++ b/source/pip/src/qir_simulation.rs @@ -762,7 +762,7 @@ where .extract() } -fn adaptive_program_from_pydict( +pub(crate) fn adaptive_program_from_pydict( dict: &Bound<'_, PyDict>, ) -> PyResult> where diff --git a/source/pip/src/qir_simulation/cpu_simulators.rs b/source/pip/src/qir_simulation/cpu_simulators.rs index 45d594ae34..dbcc663ebf 100644 --- a/source/pip/src/qir_simulation/cpu_simulators.rs +++ b/source/pip/src/qir_simulation/cpu_simulators.rs @@ -4,11 +4,15 @@ #[cfg(test)] mod tests; -use crate::qir_simulation::{NoiseConfig, QirInstruction, QirInstructionId, unbind_noise_config}; +use crate::qir_simulation::{ + NoiseConfig, QirInstruction, QirInstructionId, adaptive_program_from_pydict, + unbind_noise_config, +}; use pyo3::{IntoPyObjectExt, exceptions::PyValueError, prelude::*, types::PyList}; -use pyo3::{PyResult, pyfunction}; +use pyo3::{PyResult, pyfunction, types::PyDict}; use qdk_simulators::{ MeasurementResult, Simulator, + bytecode::{self, runtime::run_shot as adaptive_run_shot}, cpu_full_state_simulator::{NoiselessSimulator, NoisySimulator}, noise_config::{self, CumulativeNoiseConfig}, stabilizer_simulator::StabilizerSimulator, @@ -267,3 +271,155 @@ fn run_shot(instructions: &[QirInstruction], sim: &mut S) { } } } + +// --------------------------------------------------------------------------- +// Adaptive Profile CPU simulation +// --------------------------------------------------------------------------- + +#[pyfunction] +#[allow(clippy::too_many_arguments)] +pub fn run_cpu_adaptive<'py>( + py: Python<'py>, + input: &Bound<'py, PyDict>, + shots: u32, + noise_config: Option<&Bound<'py, NoiseConfig>>, + seed: Option, +) -> PyResult> { + use qdk_simulators::cpu_full_state_simulator::noise::Fault; + + let program: bytecode::AdaptiveProgram = adaptive_program_from_pydict(input)?; + + let noise: noise_config::NoiseConfig = if let Some(nc) = noise_config { + unbind_noise_config(py, nc) + } else { + noise_config::NoiseConfig::NOISELESS + }; + + let output = if noise_config.is_some() { + let make_simulator = + |num_qubits, num_results, seed, noise: Arc>| { + NoisySimulator::new(num_qubits, num_results, seed, noise) + }; + run_adaptive(&program, shots, seed, noise, make_simulator) + } else { + let make_simulator = + |num_qubits, num_results, seed, _noise: Arc>| { + NoiselessSimulator::new(num_qubits, num_results, seed, ()) + }; + run_adaptive(&program, shots, seed, noise, make_simulator) + }; + + let mut array = Vec::with_capacity(shots as usize); + for val in output { + array.push( + val.into_py_any(py).map_err(|e| { + PyValueError::new_err(format!("failed to create Python string: {e}")) + })?, + ); + } + + PyList::new(py, array) + .map_err(|e| PyValueError::new_err(format!("failed to create Python list: {e}")))? + .into_py_any(py) +} + +#[pyfunction] +#[allow(clippy::too_many_arguments)] +pub fn run_clifford_adaptive<'py>( + py: Python<'py>, + input: &Bound<'py, PyDict>, + shots: u32, + noise_config: Option<&Bound<'py, NoiseConfig>>, + seed: Option, +) -> PyResult> { + use qdk_simulators::stabilizer_simulator::noise::Fault; + + let program: bytecode::AdaptiveProgram = adaptive_program_from_pydict(input)?; + + let noise: noise_config::NoiseConfig = if let Some(nc) = noise_config { + unbind_noise_config(py, nc) + } else { + noise_config::NoiseConfig::NOISELESS + }; + + let make_simulator = + |num_qubits, num_results, seed, noise: Arc>| { + StabilizerSimulator::new(num_qubits, num_results, seed, noise) + }; + let output = run_adaptive(&program, shots, seed, noise, make_simulator); + + let mut array = Vec::with_capacity(shots as usize); + for val in output { + array.push( + val.into_py_any(py).map_err(|e| { + PyValueError::new_err(format!("failed to create Python string: {e}")) + })?, + ); + } + + PyList::new(py, array) + .map_err(|e| PyValueError::new_err(format!("failed to create Python list: {e}")))? + .into_py_any(py) +} + +fn run_adaptive( + program: &bytecode::AdaptiveProgram, + shots: u32, + seed: Option, + mut noise: noise_config::NoiseConfig, + make_simulator: SimulatorBuilder, +) -> Vec +where + SimulatorBuilder: Fn(usize, usize, u32, Arc) -> S + Send + Sync, + Noise: From> + Send + Sync, + S: Simulator, +{ + if !noise.rz.is_noiseless() { + if noise.s.is_noiseless() { + noise.s = noise.rz.clone(); + } + if noise.z.is_noiseless() { + noise.z = noise.rz.clone(); + } + if noise.s_adj.is_noiseless() { + noise.s_adj = noise.rz.clone(); + } + } + + let noise: Noise = noise.into(); + let noise = Arc::new(noise); + + let num_qubits = program.num_qubits as usize; + let num_results = program.num_results as usize; + + let mut rng = if let Some(seed) = seed { + StdRng::seed_from_u64(seed.into()) + } else { + StdRng::from_entropy() + }; + + let output = (0..shots) + .map(|_| rng.r#gen()) + .collect::>() + .par_iter() + .map(|shot_seed| { + let mut simulator = make_simulator(num_qubits, num_results, *shot_seed, noise.clone()); + adaptive_run_shot(program, &mut simulator); + simulator.take_measurements() + }) + .collect::>(); + + let mut values = Vec::with_capacity(shots as usize); + for shot_result in output { + let mut buffer = String::with_capacity(shot_result.len()); + for measurement in shot_result { + match measurement { + MeasurementResult::Zero => write!(&mut buffer, "0").expect("write should succeed"), + MeasurementResult::One => write!(&mut buffer, "1").expect("write should succeed"), + MeasurementResult::Loss => write!(&mut buffer, "L").expect("write should succeed"), + } + } + values.push(buffer); + } + values +} diff --git a/source/pip/tests/test_adaptive_cpu_bytecode.py b/source/pip/tests/test_adaptive_cpu_bytecode.py new file mode 100644 index 0000000000..6de5a36da3 --- /dev/null +++ b/source/pip/tests/test_adaptive_cpu_bytecode.py @@ -0,0 +1,1580 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Per-opcode tests for the adaptive CPU bytecode interpreter. + +Each test targets one (or a small family of) bytecode instruction(s) by +supplying hand-written Adaptive Profile QIR that exercises the instruction +and encodes the expected result into a measurement outcome. + +Tests are ordered to match the opcode definitions in ``_adaptive_opcodes.py`` +so that coverage can be verified by reading both files side by side. + +This is a CPU counterpart to ``test_adaptive_gpu_bytecode.py``. +""" + +from collections import Counter +import pytest +import qsharp.openqasm + +from qsharp._simulation import run_qir, Result + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +# Deterministic programs need a single shot but we run multiple shots +# to verify that multiple shots yield the same result. +SHOTS = 100 + +SIM_TYPES = ["cpu", "clifford"] + + +def map_result_list_to_str(results): + results_str = "" + for r in results: + match r: + case Result.Zero: + results_str += "0" + case Result.One: + results_str += "1" + case Result.Loss: + results_str += "L" + return results_str + + +def _run(qir: str, shots: int = SHOTS, seed: int = 42, sim_type: str = "cpu"): + """Run *qir* on the given simulator and return shot results as a list of strings.""" + results = run_qir(qir, shots, seed=seed, type=sim_type) + return [map_result_list_to_str(r) for r in results] + + +def check_result( + qir_fragment: str, + expected: str, + *, + extra_decls: str = "", + num_qubits: int = 1, + num_results: int = 1, + record=None, + sim_type: str = "cpu", +): + """Assert every shot produces *expected*.""" + qir = format_qir( + qir_fragment, + extra_decls=extra_decls, + num_qubits=num_qubits, + num_results=num_results, + record=record, + ) + results = _run(qir, SHOTS, sim_type=sim_type) + counts = Counter(results) + assert counts == { + expected: SHOTS + }, f"Expected all {SHOTS} shots to be '{expected}', got {counts}" + + +def check_arith_result(qir_fragment: str, expected: str, sim_type: str = "cpu"): + body = build_arith_body(qir_fragment) + check_result(body, expected, sim_type=sim_type) + + +_DECLS = """\ +declare void @__quantum__qis__x__body(%Qubit*) +declare void @__quantum__qis__h__body(%Qubit*) +declare void @__quantum__qis__mresetz__body(%Qubit*, %Result*) +declare void @__quantum__qis__mz__body(%Qubit*, %Result*) #1 +declare void @__quantum__qis__reset__body(%Qubit*) +declare void @__quantum__qis__cnot__body(%Qubit*, %Qubit*) +declare void @__quantum__qis__z__body(%Qubit*) +declare void @__quantum__qis__s__body(%Qubit*) +declare void @__quantum__qis__t__body(%Qubit*) +declare void @__quantum__qis__cz__body(%Qubit*, %Qubit*) +declare void @__quantum__qis__rz__body(double, %Qubit*) +declare i1 @__quantum__qis__read_result__body(%Result*) +declare void @__quantum__rt__tuple_record_output(i64, i8*) +declare void @__quantum__rt__result_record_output(%Result*, i8*) +declare void @__quantum__rt__initialize(i8*) +""" + + +def format_qir( + body: str, + *, + extra_decls: str = "", + num_qubits: int = 1, + num_results: int = 1, + record=None, +): + if record is None: + record = range(num_results) + output_recording = ( + f" call void @__quantum__rt__tuple_record_output(i64 {len(record)}, i8* null)" + ) + for result_id in record: + output_recording += f"\n call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 {result_id} to %Result*), i8* null)" + + return f"""\ +%Result = type opaque +%Qubit = type opaque + +define i64 @ENTRYPOINT__main() #0 {{ +{body} +{output_recording} + ret i64 0 +}} + +{_DECLS} +{extra_decls} +attributes #0 = {{ "entry_point" "qir_profiles"="adaptive_profile" "required_num_qubits"="{num_qubits}" "required_num_results"="{num_results}" }} +attributes #1 = {{ "irreversible" }} +""" + + +def build_arith_body( + arith_fragment: str, +): + """Builds the body for a QIR module that does classical work and + then conditionally applies X to qubit 0 before measuring into result 0. + + *arith_fragment* should produce ``%flag`` (i1) which, when true, causes X. + The measurement of qubit 0 into result 0 is the observable. + """ + return f"""\ +entry: +{arith_fragment} + br i1 %flag, label %then, label %end +then: + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + br label %end +end: + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) +""" + + +# ######################################################################### +# Control Flow +# ######################################################################### + + +# ========================================================================= +# OP_NOP — no-op +# ========================================================================= + +NOP_SMOKE_QIR = """ +entry: + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_nop_smoke(sim_type): + """Minimal program: just measure |0⟩ → always 0.""" + check_result(NOP_SMOKE_QIR, "0", sim_type=sim_type) + + +# ========================================================================= +# OP_RET — return / program termination +# ========================================================================= + +RET_QIR = """ +entry: + ret i64 0 + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_ret(sim_type): + check_result(RET_QIR, "0", sim_type=sim_type) + + +# ========================================================================= +# OP_JUMP — unconditional jump +# ========================================================================= + +JUMP_QIR = """ +entry: + br label %target + ret i64 0 ; early return - unreachable +target: + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_jump(sim_type): + """Unconditional jump lands at target block, X applied → measure 1.""" + check_result(JUMP_QIR, "1", sim_type=sim_type) + + +# ========================================================================= +# OP_BRANCH — conditional branch +# ========================================================================= + +BRANCH_TRUE_QIR = """ +entry: + %c = icmp eq i64 1, 1 + br i1 %c, label %yes, label %no + ret i64 0 ; early return - unreachable +yes: + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + br label %measure +no: + br label %measure +measure: + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) +""" + +BRANCH_FALSE_QIR = """ +entry: + %c = icmp eq i64 1, 2 + br i1 %c, label %yes, label %no + ret i64 0 ; early return - unreachable +yes: + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + br label %measure +no: + br label %measure +measure: + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_branch_true(sim_type): + check_result(BRANCH_TRUE_QIR, "1", sim_type=sim_type) + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_branch_false(sim_type): + check_result(BRANCH_FALSE_QIR, "0", sim_type=sim_type) + + +# ========================================================================= +# OP_SWITCH — switch dispatch +# ========================================================================= + +SWITCH_CASE1_QIR = """ +entry: + %val = add i64 0, 1 + switch i64 %val, label %default [ + i64 0, label %case0 + i64 1, label %case1 + i64 2, label %case2 + ] +case0: + br label %measure +case1: + ; This is the expected path for val==1 + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + br label %measure +case2: + br label %measure +default: + br label %measure +measure: + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) +""" + +SWITCH_DEFAULT_QIR = """ +entry: + %val = add i64 0, 99 + switch i64 %val, label %default [ + i64 0, label %case0 + i64 1, label %case1 + ] +case0: + br label %measure +case1: + br label %measure +default: + ; val=99 takes default path → X applied + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + br label %measure +measure: + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_switch_case(sim_type): + check_result(SWITCH_CASE1_QIR, "1", sim_type=sim_type) + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_switch_default(sim_type): + check_result(SWITCH_DEFAULT_QIR, "1", sim_type=sim_type) + + +# ========================================================================= +# OP_CALL / OP_CALL_RETURN — function calls +# ========================================================================= + +CALL_QIR = """ +entry: + call void @apply_x(%Qubit* inttoptr (i64 0 to %Qubit*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) +""" + +CALL_QIR_FN = """ +define void @apply_x(%Qubit* %q) { +entry: + call void @__quantum__qis__x__body(%Qubit* %q) + ret void +} +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_call_and_return(sim_type): + """Call a helper function that applies X, then measure.""" + check_result(CALL_QIR, "1", extra_decls=CALL_QIR_FN, sim_type=sim_type) + + +# ######################################################################### +# Quantum +# ######################################################################### + + +# ========================================================================= +# OP_QUANTUM_GATE — single and two-qubit gates +# ========================================================================= + +GATE_X_QIR = """ +entry: + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) +""" + +GATE_CNOT_QIR = """ +entry: + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + call void @__quantum__qis__cnot__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Qubit* inttoptr (i64 1 to %Qubit*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 1 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_gate_x(sim_type): + check_result(GATE_X_QIR, "1", sim_type=sim_type) + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_gate_cnot(sim_type): + check_result(GATE_CNOT_QIR, "1", num_qubits=2, sim_type=sim_type) + + +# ========================================================================= +# OP_MEASURE — measurement (also see OP_READ_RESULT below) +# ========================================================================= + +MZ_THEN_RESET_QIR = """ +entry: + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + call void @__quantum__qis__mz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) + ; After mz, qubit should still be |1⟩ + call void @__quantum__qis__mz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 1 to %Result*)) + call void @__quantum__qis__reset__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + ; After reset, qubit should be |0⟩ + call void @__quantum__qis__mz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 2 to %Result*)) +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_mz_then_reset(sim_type): + "X → MZ → MZ → reset should give 110." + check_result(MZ_THEN_RESET_QIR, "110", num_results=3, sim_type=sim_type) + + +# ========================================================================= +# OP_RESET — qubit reset +# ========================================================================= + +RESET_QIR = """ +entry: + ; Put qubit 0 in |1⟩ + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + ; Reset it back to |0⟩ + call void @__quantum__qis__reset__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + ; Measure — should be 0 + call void @__quantum__qis__mz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_reset(sim_type): + """X → reset → measure should give 0.""" + check_result(RESET_QIR, "0", sim_type=sim_type) + + +# ========================================================================= +# OP_READ_RESULT + OP_MEASURE — read measurement results +# ========================================================================= + +READ_RESULT_QIR = """ +entry: + ; Prepare |1⟩ on qubit 0 via X + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + ; Measure qubit 0 → should always be 1 + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) + ; Read back the result + %r = call i1 @__quantum__qis__read_result__body(%Result* inttoptr (i64 0 to %Result*)) + ; If result was 1, apply X again so qubit is back in |1⟩ for second measurement + br i1 %r, label %then, label %end + +then: + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + br label %end + +end: + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 1 to %Result*)) +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_read_result(sim_type): + """X → MResetZ → read_result → if 1: X again → MResetZ. + First result is always 1, read_result sees it, applies X, second result is also 1. + """ + check_result(READ_RESULT_QIR, "11", num_results=2, sim_type=sim_type) + + +# ========================================================================= +# OP_RECORD_OUTPUT — output recording +# ========================================================================= + +RECORD_OUTPUT_QIR = """ +entry: + ; q0 = |1⟩, q1 = |0⟩ + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 1 to %Qubit*), %Result* inttoptr (i64 1 to %Result*)) +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_record_output_ordering(sim_type): + """Two results recorded: result0=1, result1=0 → '10'.""" + check_result(RECORD_OUTPUT_QIR, "10", num_qubits=2, num_results=2, sim_type=sim_type) + + +# ######################################################################### +# Integer Arithmetic +# ######################################################################### + +INT_ARITH_PARAMS = [ + # Int + ("add", 3, 4, 7), + ("sub", 10, 3, 7), + ("sub", 3, 10, -7), + ("mul", 6, 7, 42), + ("udiv", 42, 7, 6), + ("sdiv", -42, 7, -6), + ("urem", 10, 3, 1), + ("srem", -10, 3, -1), + # Bitwise + ("and", 255, 15, 15), + ("or", 240, 15, 255), + ("xor", 255, 15, 240), + ("shl", 1, 3, 8), + ("lshr", 32, 2, 8), + ("ashr", -16, 2, -4), +] + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +@pytest.mark.parametrize( + "bin_op,lhs,rhs,expected", + INT_ARITH_PARAMS, +) +def test_int_arith_imm_imm(sim_type, bin_op, lhs, rhs, expected): + check_arith_result( + f""" + %a = {bin_op} i64 {lhs}, {rhs} + %flag = icmp eq i64 %a, {expected}""", + "1", + sim_type=sim_type, + ) + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +@pytest.mark.parametrize( + "bin_op,lhs,rhs,expected", + INT_ARITH_PARAMS, +) +def test_int_arith_imm_reg(sim_type, bin_op, lhs, rhs, expected): + check_arith_result( + f""" + %rhs = add i64 {rhs}, 0 + %a = {bin_op} i64 {lhs}, %rhs + %flag = icmp eq i64 %a, {expected}""", + "1", + sim_type=sim_type, + ) + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +@pytest.mark.parametrize( + "bin_op,lhs,rhs,expected", + INT_ARITH_PARAMS, +) +def test_int_arith_reg_imm(sim_type, bin_op, lhs, rhs, expected): + check_arith_result( + f""" + %lhs = add i64 {lhs}, 0 + %a = {bin_op} i64 %lhs, {rhs} + %flag = icmp eq i64 %a, {expected}""", + "1", + sim_type=sim_type, + ) + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +@pytest.mark.parametrize( + "bin_op,lhs,rhs,expected", + INT_ARITH_PARAMS, +) +def test_int_arith_reg_reg(sim_type, bin_op, lhs, rhs, expected): + check_arith_result( + f""" + %lhs = add i64 {lhs}, 0 + %rhs = add i64 {rhs}, 0 + %a = {bin_op} i64 %lhs, %rhs + %flag = icmp eq i64 %a, {expected}""", + "1", + sim_type=sim_type, + ) + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +@pytest.mark.parametrize( + "bin_op,lhs,rhs,expected", + INT_ARITH_PARAMS, +) +def test_int_arith_negative_test(sim_type, bin_op, lhs, rhs, expected): + """Checks that the tests fail if the result is different from the expected value.""" + expected = 12345 + check_arith_result( + f""" + %a = {bin_op} i64 {lhs}, {rhs} + %flag = icmp eq i64 %a, {expected}""", + "0", + sim_type=sim_type, + ) + + +# ######################################################################### +# Comparison (OP_ICMP, OP_FCMP) +# ######################################################################### + + +# ========================================================================= +# OP_ICMP — integer comparison (all condition codes) +# ========================================================================= + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +@pytest.mark.parametrize( + "pred,lhs,rhs,expected", + [ + ("eq", 2, 2, "1"), + ("eq", 2, 3, "0"), + ("ne", 2, 3, "1"), + ("ne", 2, 2, "0"), + ("slt", 2, 3, "1"), + ("slt", 2, 2, "0"), + ("sle", 2, 2, "1"), + ("sle", 3, 2, "0"), + ("sgt", 3, 2, "1"), + ("sgt", 2, 3, "0"), + ("sge", 3, 3, "1"), + ("sge", 2, 3, "0"), + ("ult", 2, 3, "1"), + ("ult", 3, 2, "0"), + ("ule", 3, 3, "1"), + ("ule", 3, 2, "0"), + ("ugt", 3, 2, "1"), + ("ugt", 2, 3, "0"), + ("uge", 3, 3, "1"), + ("uge", 2, 3, "0"), + ], +) +def test_icmp(sim_type, pred, lhs, rhs, expected): + check_arith_result( + f"%flag = icmp {pred} i64 {lhs}, {rhs}", + expected, + sim_type=sim_type, + ) + + +# ========================================================================= +# OP_ICMP — signed vs unsigned edge case (negative as unsigned) +# ========================================================================= + +ICMP_SIGNED_VS_UNSIGNED_QIR = """ + ; -1 in two's complement is 0xFFFFFFFFFFFFFFFF, which is the max u64 + ; signed: -1 < 0 → true + %neg1 = sub i64 0, 1 + %flag = icmp slt i64 %neg1, 0 +""" + +ICMP_UNSIGNED_WRAP_QIR = """ + ; unsigned: -1 wraps to max u64, so -1 > 0 → true (unsigned) + %neg1 = sub i64 0, 1 + %flag = icmp ugt i64 %neg1, 0 +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_icmp_signed_negative(sim_type): + check_arith_result(ICMP_SIGNED_VS_UNSIGNED_QIR, "1", sim_type=sim_type) + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_icmp_unsigned_wrap(sim_type): + check_arith_result(ICMP_UNSIGNED_WRAP_QIR, "1", sim_type=sim_type) + + +# ========================================================================= +# OP_FCMP — float comparison +# ========================================================================= + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +@pytest.mark.parametrize( + "pred,lhs,rhs,expected", + [ + ("oeq", "3.0", "3.0", "1"), + ("oeq", "3.0", "4.0", "0"), + ("one", "3.0", "4.0", "1"), + ("one", "3.0", "3.0", "0"), + ("olt", "2.0", "3.0", "1"), + ("olt", "3.0", "2.0", "0"), + ("ole", "3.0", "3.0", "1"), + ("ole", "4.0", "3.0", "0"), + ("ogt", "4.0", "3.0", "1"), + ("ogt", "3.0", "4.0", "0"), + ("oge", "3.0", "3.0", "1"), + ("oge", "2.0", "3.0", "0"), + ], +) +def test_fcmp(sim_type, pred, lhs, rhs, expected): + check_arith_result( + f"%flag = fcmp {pred} double {lhs}, {rhs}", + expected, + sim_type=sim_type, + ) + + +# ######################################################################### +# Float Arithmetic (OP_FADD → OP_FDIV) +# ######################################################################### + +FLOAT_ARITH_PARAMS = [ + ("fadd", 1.5, 2.5, 4.0), + ("fsub", 10.0, 3.0, 7.0), + ("fsub", 3.0, 10.0, -7.0), + ("fmul", 6.0, 7.0, 42.0), + ("fdiv", 8.0, 2.0, 4.0), +] + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +@pytest.mark.parametrize( + "bin_op,lhs,rhs,expected", + FLOAT_ARITH_PARAMS, +) +def test_float_arith_imm_imm(sim_type, bin_op, lhs, rhs, expected): + check_arith_result( + f""" + %a = {bin_op} double {lhs}, {rhs} + %flag = fcmp oeq double %a, {expected}""", + "1", + sim_type=sim_type, + ) + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +@pytest.mark.parametrize( + "bin_op,lhs,rhs,expected", + FLOAT_ARITH_PARAMS, +) +def test_float_arith_imm_reg(sim_type, bin_op, lhs, rhs, expected): + check_arith_result( + f""" + %rhs = fadd double {rhs}, 0.0 + %a = {bin_op} double {lhs}, %rhs + %flag = fcmp oeq double %a, {expected}""", + "1", + sim_type=sim_type, + ) + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +@pytest.mark.parametrize( + "bin_op,lhs,rhs,expected", + FLOAT_ARITH_PARAMS, +) +def test_float_arith_reg_imm(sim_type, bin_op, lhs, rhs, expected): + check_arith_result( + f""" + %lhs = fadd double {lhs}, 0.0 + %a = {bin_op} double %lhs, {rhs} + %flag = fcmp oeq double %a, {expected}""", + "1", + sim_type=sim_type, + ) + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +@pytest.mark.parametrize( + "bin_op,lhs,rhs,expected", + FLOAT_ARITH_PARAMS, +) +def test_float_arith_reg_reg(sim_type, bin_op, lhs, rhs, expected): + check_arith_result( + f""" + %lhs = fadd double {lhs}, 0.0 + %rhs = fadd double {rhs}, 0.0 + %a = {bin_op} double %lhs, %rhs + %flag = fcmp oeq double %a, {expected}""", + "1", + sim_type=sim_type, + ) + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +@pytest.mark.parametrize( + "bin_op,lhs,rhs,expected", + FLOAT_ARITH_PARAMS, +) +def test_float_arith_negative_test(sim_type, bin_op, lhs, rhs, expected): + """Checks that the tests fail if the result is different from the expected value.""" + expected = 12345.0 + check_arith_result( + f""" + %a = {bin_op} double {lhs}, {rhs} + %flag = fcmp oeq double %a, {expected}""", + "0", + sim_type=sim_type, + ) + + +# ######################################################################### +# Type Conversion (OP_ZEXT → OP_SITOFP) +# ######################################################################### + + +# ========================================================================= +# OP_ZEXT — zero extension +# ========================================================================= + +ZEXT_QIR = """ + ; zext i1 true to i64 → 1, check 1 == 1 → true + %z = zext i1 true to i64 + %flag = icmp eq i64 %z, 1 +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_zext(sim_type): + check_arith_result(ZEXT_QIR, "1", sim_type=sim_type) + + +# ========================================================================= +# OP_SEXT — sign extension +# ========================================================================= + +SEXT_QIR = """ + ; sext i1 true to i64 → -1 (all ones), check -1 < 0 → true + %s = sext i1 true to i64 + %flag = icmp slt i64 %s, 0 +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_sext(sim_type): + check_arith_result(SEXT_QIR, "1", sim_type=sim_type) + + +# ========================================================================= +# OP_TRUNC — truncation +# ========================================================================= + +TRUNC_QIR = """ + ; trunc i64 257 to i32 → 257 (fits), check 257 == 257 → true + %t = trunc i64 257 to i32 + %z = zext i32 %t to i64 + %flag = icmp eq i64 %z, 257 +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_trunc(sim_type): + check_arith_result(TRUNC_QIR, "1", sim_type=sim_type) + + +# ========================================================================= +# OP_FPEXT / OP_FPTRUNC — float extension/truncation +# ========================================================================= + +FPEXT_QIR = """ + ; fpext float 3.0 to double, then check == 3 + %f32 = fadd float 1.0, 2.0 + %f64 = fpext float %f32 to double + %i = fptosi double %f64 to i64 + %flag = icmp eq i64 %i, 3 +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_fpext(sim_type): + check_arith_result(FPEXT_QIR, "1", sim_type=sim_type) + + +# ========================================================================= +# OP_INTTOPTR / OP_MOV — dynamic qubit addressing +# ========================================================================= + +INTTOPTR_QIR = """ +entry: + ; Compute qubit ID 0 from arithmetic + %q_id = sub i64 1, 1 + %q = inttoptr i64 %q_id to %Qubit* + call void @__quantum__qis__x__body(%Qubit* %q) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_inttoptr_dynamic_qubit(sim_type): + check_result(INTTOPTR_QIR, "1", sim_type=sim_type) + + +# ========================================================================= +# OP_FPTOSI — float to signed int +# ========================================================================= + +FPTOSI_QIR = """ + ; fptosi -3.7 → -3 (truncation toward zero), check -3 < 0 → true + %neg = fsub double 0.0, 3.7 + %i = fptosi double %neg to i64 + %flag = icmp slt i64 %i, 0 +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_fptosi(sim_type): + check_arith_result(FPTOSI_QIR, "1", sim_type=sim_type) + + +# ========================================================================= +# OP_SITOFP — signed int to float +# ========================================================================= + +SITOFP_QIR = """ + ; sitofp -5 → -5.0, then -5.0 < 0.0 → true + %neg5 = sub i64 0, 5 + %f = sitofp i64 %neg5 to double + %zero = sitofp i64 0 to double + %flag = fcmp olt double %f, %zero +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_sitofp(sim_type): + check_arith_result(SITOFP_QIR, "1", sim_type=sim_type) + + +# ######################################################################### +# SSA / Data Movement (OP_PHI → OP_CONST) +# ######################################################################### + + +# ========================================================================= +# OP_PHI — phi node +# ========================================================================= + +PHI_LOOP_QIR = """ +entry: + br label %loop + +loop: + %i = phi i64 [ 0, %entry ], [ %next, %loop ] + %next = add i64 %i, 1 + %cond = icmp slt i64 %next, 5 + br i1 %cond, label %loop, label %done + +done: + ; %next should be 5 here + %flag = icmp eq i64 %next, 5 + br i1 %flag, label %apply_x, label %measure + +apply_x: + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + br label %measure + +measure: + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_phi_loop_counter(sim_type): + check_result(PHI_LOOP_QIR, "1", sim_type=sim_type) + + +# ========================================================================= +# OP_SELECT +# ========================================================================= + +SELECT_TRUE_QIR = """ + ; select i1 true, i64 1, i64 0 → 1, then icmp eq 1, 1 → true + %s = select i1 true, i64 1, i64 0 + %flag = icmp eq i64 %s, 1 +""" + +SELECT_FALSE_QIR = """ + ; select i1 false, i64 1, i64 0 → 0, then icmp eq 0, 0 → true + %s = select i1 false, i64 1, i64 0 + %flag = icmp eq i64 %s, 0 +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_select_true(sim_type): + check_arith_result(SELECT_TRUE_QIR, "1", sim_type=sim_type) + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_select_false(sim_type): + check_arith_result(SELECT_FALSE_QIR, "1", sim_type=sim_type) + + +# ========================================================================= +# OP_CONST — constant materialization +# ========================================================================= + +CONST_QIR = """ + ; Use a specific constant 12345, check add identity + %a = add i64 12345, 0 + %flag = icmp eq i64 %a, 12345 +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_const(sim_type): + check_arith_result(CONST_QIR, "1", sim_type=sim_type) + + +# ######################################################################### +# Boolean (i1) variants of bitwise ops +# ######################################################################### + + +# ========================================================================= +# OP_AND with i1 (boolean AND) — used in classical boolean logic +# ========================================================================= + +AND_I1_QIR = """ +entry: + ; Prepare both qubits in |1⟩ deterministically + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 1 to %Qubit*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 1 to %Qubit*), %Result* inttoptr (i64 1 to %Result*)) + %r0 = call i1 @__quantum__qis__read_result__body(%Result* inttoptr (i64 0 to %Result*)) + %r1 = call i1 @__quantum__qis__read_result__body(%Result* inttoptr (i64 1 to %Result*)) + %both = and i1 %r0, %r1 + ; both should be true (1 AND 1 = 1), apply X → measure 1 + br i1 %both, label %then, label %measure + +then: + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + br label %measure + +measure: + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 2 to %Result*)) +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_and_i1_boolean(sim_type): + """Deterministic boolean AND: both qubits |1⟩ → and i1 true, true → X → 1.""" + check_result(AND_I1_QIR, "1", num_qubits=2, num_results=3, record=[2], sim_type=sim_type) + + +# ========================================================================= +# OP_OR with i1 (boolean OR) +# ========================================================================= + +OR_I1_QIR = """ +entry: + ; q0 = |1⟩, q1 = |0⟩ + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 1 to %Qubit*), %Result* inttoptr (i64 1 to %Result*)) + %r0 = call i1 @__quantum__qis__read_result__body(%Result* inttoptr (i64 0 to %Result*)) + %r1 = call i1 @__quantum__qis__read_result__body(%Result* inttoptr (i64 1 to %Result*)) + %either = or i1 %r0, %r1 + ; true OR false = true → X → measure 1 + br i1 %either, label %then, label %measure +then: + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + br label %measure +measure: + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 2 to %Result*)) +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_or_i1_boolean(sim_type): + """Deterministic boolean OR: q0=1, q1=0 → or i1 true, false → true → X → 1.""" + check_result(OR_I1_QIR, "1", num_qubits=2, num_results=3, record=[2], sim_type=sim_type) + + +# ========================================================================= +# OP_XOR with i1 (boolean XOR / NOT) +# ========================================================================= + +XOR_NOT_QIR = """ +entry: + ; q0 = |0⟩ → measure 0 + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) + %r0 = call i1 @__quantum__qis__read_result__body(%Result* inttoptr (i64 0 to %Result*)) + ; XOR with true is NOT: false XOR true = true + %not_r0 = xor i1 %r0, true + br i1 %not_r0, label %then, label %measure + +then: + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + br label %measure + +measure: + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 1 to %Result*)) +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_xor_i1_not(sim_type): + """XOR i1 used as NOT: measure 0 → XOR true → true → X → 1.""" + check_result(XOR_NOT_QIR, "1", num_qubits=1, num_results=2, record=[1], sim_type=sim_type) + + +# ######################################################################### +# Compound / Integration Tests +# ######################################################################### + + +# ========================================================================= +# Chained arithmetic — complex expression +# ========================================================================= + +CHAINED_ARITH_QIR = """ + ; (3 + 4) * 2 - 1 = 13, check 13 == 13 → true + %a = add i64 3, 4 + %b = mul i64 %a, 2 + %c = sub i64 %b, 1 + %flag = icmp eq i64 %c, 13 +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_chained_arithmetic(sim_type): + check_arith_result(CHAINED_ARITH_QIR, "1", sim_type=sim_type) + + +# ========================================================================= +# OP_PHI with multiple predecessors (diamond CFG) +# ========================================================================= + +PHI_DIAMOND_QIR = """ +entry: + %c = icmp eq i64 1, 1 + br i1 %c, label %left, label %right +left: + br label %merge +right: + br label %merge +merge: + ; From left: 42, from right: 0. Since condition is true, we go left → 42. + %v = phi i64 [ 42, %left ], [ 0, %right ] + %flag = icmp eq i64 %v, 42 + br i1 %flag, label %apply_x, label %measure +apply_x: + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + br label %measure +measure: + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_phi_diamond(sim_type): + """Diamond CFG with phi: true branch → phi resolves to 42 → X → 1.""" + check_result(PHI_DIAMOND_QIR, "1", sim_type=sim_type) + + +# ========================================================================= +# OP_SELECT with computed condition +# ========================================================================= + +SELECT_COMPUTED_QIR = """ + ; 5 > 3 is true → select returns 10, check 10 == 10 → true + %cmp = icmp sgt i64 5, 3 + %s = select i1 %cmp, i64 10, i64 20 + %flag = icmp eq i64 %s, 10 +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_select_computed(sim_type): + check_arith_result(SELECT_COMPUTED_QIR, "1", sim_type=sim_type) + + +# ========================================================================= +# Nested loop — OP_PHI + OP_BRANCH + OP_ADD + OP_ICMP combined +# ========================================================================= + +NESTED_LOOP_SUM_QIR = """ +entry: + br label %loop +loop: + %i = phi i64 [ 1, %entry ], [ %next_i, %loop ] + %sum = phi i64 [ 0, %entry ], [ %next_sum, %loop ] + %next_sum = add i64 %sum, %i + %next_i = add i64 %i, 1 + %cond = icmp sle i64 %next_i, 5 + br i1 %cond, label %loop, label %done +done: + ; %next_sum should be 15 + %flag = icmp eq i64 %next_sum, 15 + br i1 %flag, label %apply_x, label %measure +apply_x: + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + br label %measure +measure: + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_nested_loop_sum(sim_type): + """Sum 1..5 using phi loop, check total == 15.""" + check_result(NESTED_LOOP_SUM_QIR, "1", sim_type=sim_type) + + +# ========================================================================= +# OP_QUANTUM_GATE — dynamic qubit addressing in a loop (GHZ-like) +# ========================================================================= + +DYNAMIC_QUBIT_LOOP_QIR = """ +entry: + ; Create |+⟩ on q0 + call void @__quantum__qis__h__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + br label %loop +loop: + %i = phi i64 [ 1, %entry ], [ %next_i, %loop ] + %qi = inttoptr i64 %i to %Qubit* + call void @__quantum__qis__cnot__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Qubit* %qi) + %next_i = add i64 %i, 1 + %cond = icmp sle i64 %next_i, 2 + br i1 %cond, label %loop, label %measure +measure: + ; Measure all 3 qubits — GHZ state means all agree + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 1 to %Qubit*), %Result* inttoptr (i64 1 to %Result*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 2 to %Qubit*), %Result* inttoptr (i64 2 to %Result*)) +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_dynamic_qubit_loop(sim_type): + """3-qubit GHZ via dynamic qubit loop — only '000' and '111' should appear.""" + qir = format_qir(DYNAMIC_QUBIT_LOOP_QIR, num_qubits=3, num_results=3) + results = _run(qir, shots=5000, seed=42, sim_type=sim_type) + counts = Counter(results) + assert set(counts.keys()) <= {"000", "111"}, f"Unexpected GHZ outcomes: {counts}" + assert counts.get("000", 0) > 1500 + assert counts.get("111", 0) > 1500 + + +# ========================================================================= +# OP_SHL + OP_OR combined — bit packing +# ========================================================================= + +BIT_PACK_QIR = """ + ; Pack bits: (1 << 2) | 1 = 5, check 5 == 5 → true + %shifted = shl i64 1, 2 + %packed = or i64 %shifted, 1 + %flag = icmp eq i64 %packed, 5 +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_bit_packing(sim_type): + check_arith_result(BIT_PACK_QIR, "1", sim_type=sim_type) + + +# ========================================================================= +# Combined test: all shift and bitwise ops in sequence +# ========================================================================= + +SHIFT_BITWISE_CHAIN_QIR = """ + ; Start with 0b1010 = 10 + ; SHL by 1 → 0b10100 = 20 + ; OR with 0b00011 = 3 → 0b10111 = 23 + ; AND with 0b11110 = 30 → 0b10110 = 22 + ; XOR with 0b00010 = 2 → 0b10100 = 20 + ; LSHR by 2 → 0b00101 = 5 + %step1 = shl i64 10, 1 + %step2 = or i64 %step1, 3 + %step3 = and i64 %step2, 30 + %step4 = xor i64 %step3, 2 + %step5 = lshr i64 %step4, 2 + %flag = icmp eq i64 %step5, 5 +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_shift_bitwise_chain(sim_type): + check_arith_result(SHIFT_BITWISE_CHAIN_QIR, "1", sim_type=sim_type) + + +# ========================================================================= +# OP_SWITCH with computed value from arithmetic +# ========================================================================= + +SWITCH_ARITH_QIR = """ +entry: + ; Compute 2 * 3 - 4 = 2 + %a = mul i64 2, 3 + %val = sub i64 %a, 4 + switch i64 %val, label %default [ + i64 0, label %case0 + i64 1, label %case1 + i64 2, label %case2 + i64 3, label %case3 + ] +case0: + br label %measure +case1: + br label %measure +case2: + ; Expected path + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + br label %measure +case3: + br label %measure +default: + br label %measure +measure: + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_switch_from_arithmetic(sim_type): + """Switch on computed value 2*3-4=2 → case2 → X → 1.""" + check_result(SWITCH_ARITH_QIR, "1", sim_type=sim_type) + + +# ========================================================================= +# Float: sitofp → fadd → fptosi round-trip +# ========================================================================= + +FLOAT_ROUNDTRIP_QIR = """ + ; sitofp 7 → 7.0, fadd 7.0 + 3.0 → 10.0, fptosi → 10, check == 10 + %f = sitofp i64 7 to double + %three = fadd double 0.0, 3.0 + %sum = fadd double %f, %three + %i = fptosi double %sum to i64 + %flag = icmp eq i64 %i, 10 +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_float_roundtrip(sim_type): + check_arith_result(FLOAT_ROUNDTRIP_QIR, "1", sim_type=sim_type) + + +# ========================================================================= +# OP_CALL with return value +# ========================================================================= + +CALL_WITH_RETVAL_QIR = """ +entry: + %result = call i64 @add_numbers(i64 3, i64 4) + %flag = icmp eq i64 %result, 7 + br i1 %flag, label %then, label %measure +then: + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + br label %measure +measure: + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) +""" + +CALL_WITH_RETVAL_QIR_FN = """ +define i64 @add_numbers(i64 %a, i64 %b) { +entry: + %sum = add i64 %a, %b + ret i64 %sum +} +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_call_with_return_value(sim_type): + """Call a function returning i64, use result in comparison.""" + check_result(CALL_WITH_RETVAL_QIR, "1", extra_decls=CALL_WITH_RETVAL_QIR_FN, sim_type=sim_type) + + +# ========================================================================= +# OP_MUL + OP_UDIV + OP_UREM combined +# ========================================================================= + +MUL_DIV_REM_QIR = """ + ; 17 / 5 = 3 (udiv), 17 % 5 = 2 (urem), 3 * 5 + 2 = 17 + %q = udiv i64 17, 5 + %r = urem i64 17, 5 + %product = mul i64 %q, 5 + %reconstructed = add i64 %product, %r + %flag = icmp eq i64 %reconstructed, 17 +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_mul_div_rem_identity(sim_type): + """Division identity: (a/b)*b + (a%b) == a.""" + check_arith_result(MUL_DIV_REM_QIR, "1", sim_type=sim_type) + + +# ========================================================================= +# OP_MEASURE with mid-circuit branch (measure-and-correct pattern) +# ========================================================================= + +MEASURE_BRANCH_QIR = """ +entry: + ; Deterministically put qubit in |1⟩ + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + ; Measure (should be 1) and reset to |0⟩ + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) + %r = call i1 @__quantum__qis__read_result__body(%Result* inttoptr (i64 0 to %Result*)) + ; Since r=1, branch to 'correct' which applies X to restore |1⟩ + br i1 %r, label %correct, label %measure + +correct: + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + br label %measure + +measure: + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 1 to %Result*)) +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_measure_and_branch(sim_type): + """Deterministic measure-and-correct: X→MResetZ→read_result→X→MResetZ → always 1.""" + check_result(MEASURE_BRANCH_QIR, "1", num_results=2, record=[1], sim_type=sim_type) + + +# ========================================================================= +# OP_ADD with register-register (no immediates) +# ========================================================================= + +ADD_REG_REG_QIR = """ + ; Use computed values in registers, not just immediates + %a = add i64 2, 1 + %b = add i64 3, 1 + %c = add i64 %a, %b + ; 3 + 4 = 7 + %flag = icmp eq i64 %c, 7 +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_add_register_register(sim_type): + check_arith_result(ADD_REG_REG_QIR, "1", sim_type=sim_type) + + +# ######################################################################### +# Regression tests — exercising specific edge-cases that previously failed +# ######################################################################### + + +# ========================================================================= +# SREM with negative dividend +# ========================================================================= + +SREM_NEG_DIVIDEND_QIR = """ + ; -7 % 2 = -1, verify result < 0 + %neg7 = sub i64 0, 7 + %a = srem i64 %neg7, 2 + %flag = icmp slt i64 %a, 0 +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_srem_negative_dividend(sim_type): + """srem must preserve the sign of the dividend.""" + check_arith_result(SREM_NEG_DIVIDEND_QIR, "1", sim_type=sim_type) + + +SREM_NEG_BOTH_QIR = """ + ; -10 % -3 = -1 (sign follows dividend) + %neg10 = sub i64 0, 10 + %neg3 = sub i64 0, 3 + %a = srem i64 %neg10, %neg3 + %neg1 = sub i64 0, 1 + %flag = icmp eq i64 %a, %neg1 +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_srem_negative_both(sim_type): + """srem with both operands negative.""" + check_arith_result(SREM_NEG_BOTH_QIR, "1", sim_type=sim_type) + + +# ========================================================================= +# SEXT from i1 (sign-extension must convert 1 → -1) +# ========================================================================= + +SEXT_I1_FALSE_QIR = """ + ; sext i1 false to i64 → 0, check 0 == 0 → true + %s = sext i1 false to i64 + %flag = icmp eq i64 %s, 0 +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_sext_i1_false(sim_type): + """sext of false (i1 0) must be 0.""" + check_arith_result(SEXT_I1_FALSE_QIR, "1", sim_type=sim_type) + + +SEXT_I1_RUNTIME_QIR = """ + ; compute i1 true at runtime, sext → -1, check < 0 + %one = add i64 1, 0 + %b = icmp eq i64 %one, 1 + %s = sext i1 %b to i64 + %flag = icmp slt i64 %s, 0 +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_sext_i1_runtime(sim_type): + """sext of a runtime i1 true value must also sign-extend to -1.""" + check_arith_result(SEXT_I1_RUNTIME_QIR, "1", sim_type=sim_type) + + +# ========================================================================= +# Call to IR-defined function with inttoptr constant argument +# ========================================================================= + +CALL_INTTOPTR_ARG_QIR = """ +entry: + call void @apply_h_then_z_then_h(%Qubit* inttoptr (i64 0 to %Qubit*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) +""" + +CALL_INTTOPTR_ARG_QIR_FN = """ +define void @apply_h_then_z_then_h(%Qubit* %q) { +entry: + call void @__quantum__qis__h__body(%Qubit* %q) + call void @__quantum__qis__z__body(%Qubit* %q) + call void @__quantum__qis__h__body(%Qubit* %q) + ret void +} +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_call_inttoptr_arg(sim_type): + """Call a helper with an inttoptr constant expression argument.""" + check_result(CALL_INTTOPTR_ARG_QIR, "1", extra_decls=CALL_INTTOPTR_ARG_QIR_FN, sim_type=sim_type) + + +# ========================================================================= +# SITOFP with negative value (signed int → float) +# ========================================================================= + +SITOFP_NEG_QIR = """ + ; sitofp -3 → -3.0, then -3.0 < 0.0 → true + %neg3 = sub i64 0, 3 + %f = sitofp i64 %neg3 to double + %zero = sitofp i64 0 to double + %flag = fcmp olt double %f, %zero +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_sitofp_negative(sim_type): + """sitofp must correctly convert a negative integer.""" + check_arith_result(SITOFP_NEG_QIR, "1", sim_type=sim_type) + + +# ######################################################################### +# Dynamic register file sizing (programs exceeding 128 registers) +# ######################################################################### + + +def _run_openqasm(qasm_src: str, shots: int = SHOTS, seed: int = 42, sim_type: str = "cpu"): + """Compile OpenQASM source via the adaptive pass and run on the given simulator.""" + qir = qsharp.openqasm.compile( + qasm_src, + output_semantics=qsharp.openqasm.OutputSemantics.OpenQasm, + target_profile=qsharp.TargetProfile.Adaptive_RIF, + ) + results = run_qir(qir, shots, seed=seed, type=sim_type) + return [map_result_list_to_str(r) for r in results] + + +# ========================================================================= +# Complex RUS loop — requires >128 registers after loop unrolling +# ========================================================================= + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_complex_rus_exceeds_128_registers(sim_type): + """A complex repeat-until-success pattern with 50 iterations. + + The Q# compiler fully unrolls the loop for the Adaptive_RIF profile, + producing ~301 registers — well above the old fixed limit of 128. + This validates that dynamic register file sizing works correctly. + """ + qasm_src = """\ +OPENQASM 3.0; +include "stdgates.inc"; +qubit[4] q; +bit c; +int total = 0; +int i = 0; +while (i < 50) { + h q[0]; + cx q[0], q[1]; + c = measure q[0]; + if (c) { + x q[1]; + reset q[0]; + total = total + 1; + } + h q[2]; + cx q[2], q[3]; + c = measure q[2]; + if (c) { + x q[3]; + reset q[2]; + total = total + 1; + } + i = i + 1; +} +bit[4] result = measure q; +""" + results = _run_openqasm(qasm_src, shots=100, sim_type=sim_type) + assert all( + len(r) >= 4 and all(c in "01" for c in r) for r in results + ), f"Unexpected result format: {results[:5]}" diff --git a/source/pip/tests/test_adaptive_cpu_noise.py b/source/pip/tests/test_adaptive_cpu_noise.py new file mode 100644 index 0000000000..4aec68ccf2 --- /dev/null +++ b/source/pip/tests/test_adaptive_cpu_noise.py @@ -0,0 +1,410 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Noise tests for the adaptive CPU bytecode interpreter. + +Each test targets noise injection by supplying hand-written Adaptive Profile +QIR that exercises noise channels and encodes the expected result into a +measurement outcome. + +This is a CPU counterpart to ``test_adaptive_gpu_noise.py``. +""" + +from collections import Counter +from typing import Optional, List +import pytest +import qsharp.openqasm + +from qsharp._simulation import run_qir, NoiseConfig, Result + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +SHOTS = 100 + +SIM_TYPES = ["cpu", "clifford"] + + +def map_result_list_to_str(results: List[Result]): + results_str = "" + for r in results: + match r: + case Result.Zero: + results_str += "0" + case Result.One: + results_str += "1" + case Result.Loss: + results_str += "L" + return results_str + + +def get_histogram( + qir_fragment: str, + *, + extra_decls: str = "", + num_qubits: int = 1, + num_results: int = 1, + noise: Optional[NoiseConfig] = None, + record: Optional[List[int]] = None, + shots=SHOTS, + sim_type: str = "cpu", +): + qir = format_qir( + qir_fragment, + extra_decls=extra_decls, + num_qubits=num_qubits, + num_results=num_results, + record=record, + ) + results = map( + map_result_list_to_str, run_qir(qir, shots, noise, seed=42, type=sim_type) + ) + return Counter(results) + + +def check_result( + qir_fragment: str, + expected: str, + *, + extra_decls: str = "", + num_qubits: int = 1, + num_results: int = 1, + noise: Optional[NoiseConfig] = None, + record: Optional[List[int]] = None, + sim_type: str = "cpu", +): + """Assert every shot produces *expected*.""" + counts = get_histogram( + qir_fragment, + extra_decls=extra_decls, + num_qubits=num_qubits, + num_results=num_results, + noise=noise, + record=record, + sim_type=sim_type, + ) + + assert counts == { + expected: SHOTS + }, f"Expected all {SHOTS} shots to be '{expected}', got {counts}" + + +_DECLS = """\ +declare void @__quantum__qis__x__body(%Qubit*) +declare void @__quantum__qis__h__body(%Qubit*) +declare void @__quantum__qis__mresetz__body(%Qubit*, %Result*) +declare void @__quantum__qis__mz__body(%Qubit*, %Result*) #1 +declare void @__quantum__qis__reset__body(%Qubit*) +declare void @__quantum__qis__cx__body(%Qubit*, %Qubit*) +declare void @__quantum__qis__z__body(%Qubit*) +declare void @__quantum__qis__s__body(%Qubit*) +declare void @__quantum__qis__t__body(%Qubit*) +declare void @__quantum__qis__cz__body(%Qubit*, %Qubit*) +declare void @__quantum__qis__rz__body(double, %Qubit*) +declare i1 @__quantum__qis__read_result__body(%Result*) +declare void @__quantum__rt__tuple_record_output(i64, i8*) +declare void @__quantum__rt__result_record_output(%Result*, i8*) +declare void @__quantum__rt__initialize(i8*) +""" + + +def format_qir( + body: str, + *, + extra_decls: str = "", + num_qubits: int = 1, + num_results: int = 1, + record=None, +): + if record is None: + record = range(num_results) + output_recording = ( + f" call void @__quantum__rt__tuple_record_output(i64 {len(record)}, i8* null)" + ) + for result_id in record: + output_recording += f"\n call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 {result_id} to %Result*), i8* null)" + + return f"""\ +%Result = type opaque +%Qubit = type opaque + +define i64 @ENTRYPOINT__main() #0 {{ +{body} +{output_recording} + ret i64 0 +}} + +{_DECLS} +{extra_decls} +attributes #0 = {{ "entry_point" "qir_profiles"="adaptive_profile" "required_num_qubits"="{num_qubits}" "required_num_results"="{num_results}" }} +attributes #1 = {{ "irreversible" }} +""" + + +# The purpose of this test is to inject noise in an identity gate, and assert its behavior. +# Since QIS does not specify an identity gate, we use CNOT and inject noise in the target qubit. +I_QIR = """ +entry: + call void @__quantum__qis__cx__body(%Qubit* inttoptr (i64 1 to %Qubit*), %Qubit* inttoptr (i64 0 to %Qubit*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) +""" + +H_I_H_QIR = """ +entry: + call void @__quantum__qis__h__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + call void @__quantum__qis__cx__body(%Qubit* inttoptr (i64 1 to %Qubit*), %Qubit* inttoptr (i64 0 to %Qubit*)) + call void @__quantum__qis__h__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_no_noise_on_i_yields_0(sim_type): + check_result(I_QIR, "0", num_qubits=2, sim_type=sim_type) + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_x_noise_on_i_yields_1(sim_type): + noise = NoiseConfig() + noise.cx.ix = 1.0 + check_result(I_QIR, "1", num_qubits=2, noise=noise, sim_type=sim_type) + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_y_noise_on_i_yields_1(sim_type): + noise = NoiseConfig() + noise.cx.iy = 1.0 + check_result(I_QIR, "1", num_qubits=2, noise=noise, sim_type=sim_type) + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_z_noise_on_i_yields_0(sim_type): + noise = NoiseConfig() + noise.cx.iz = 1.0 + check_result(I_QIR, "0", num_qubits=2, noise=noise, sim_type=sim_type) + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_x_noise_on_h_i_h_yields_0(sim_type): + noise = NoiseConfig() + noise.cx.ix = 1.0 + check_result(H_I_H_QIR, "0", num_qubits=2, noise=noise, sim_type=sim_type) + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_y_noise_on_h_i_h_yields_1(sim_type): + noise = NoiseConfig() + noise.cx.iy = 1.0 + check_result(H_I_H_QIR, "1", num_qubits=2, noise=noise, sim_type=sim_type) + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_z_noise_on_h_i_h_yields_1(sim_type): + noise = NoiseConfig() + noise.cx.iz = 1.0 + check_result(H_I_H_QIR, "1", num_qubits=2, noise=noise, sim_type=sim_type) + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_probabilistic_x_noise(sim_type): + noise = NoiseConfig() + noise.cx.ix = 0.5 + counts = get_histogram(I_QIR, shots=1000, num_qubits=2, noise=noise, sim_type=sim_type) + + assert counts["0"] > 400, f"Expected ~500 '0' results, got {counts['0']}" + assert counts["1"] > 400, f"Expected ~500 '1' results, got {counts['1']}" + + +QASM_WITH_CORRELATED_NOISE = """ +OPENQASM 3.0; +include "stdgates.inc"; + +@qdk.qir.noise_intrinsic +gate test_noise_intrinsic q0, q1, q2 {} + +qubit[3] qs; +x qs[1]; +test_noise_intrinsic qs[0], qs[1], qs[2]; +bit[3] res = measure qs; +""" + +QIR_WITH_CORRELATED_NOISE = qsharp.openqasm.compile( + QASM_WITH_CORRELATED_NOISE, + output_semantics=qsharp.openqasm.OutputSemantics.OpenQasm, + target_profile=qsharp.TargetProfile.Adaptive_RIF, +) + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_noise_intrinsics_noiseless(sim_type): + output = run_qir(QIR_WITH_CORRELATED_NOISE, shots=1, noise=None, type=sim_type) + assert output == [[Result.Zero, Result.One, Result.Zero]] + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_noise_intrinsics_noisy(sim_type): + noise = NoiseConfig() + table = noise.intrinsic("test_noise_intrinsic", 3) + table.yyy = 1.0 + output = run_qir(QIR_WITH_CORRELATED_NOISE, shots=1, noise=noise, type=sim_type) + assert output == [[Result.One, Result.Zero, Result.One]] + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_noise_intrinsics_load_csv_dir(sim_type): + noise = NoiseConfig() + noise.load_csv_dir("./csv_dir_test") + output = run_qir(QIR_WITH_CORRELATED_NOISE, shots=1, noise=noise, type=sim_type) + assert output == [[Result.One, Result.Zero, Result.One]] + + +NOISE_INTRINSICS_WITH_REGISTERS_QIR = r""" +%Result = type opaque +%Qubit = type opaque + +@0 = internal constant [4 x i8] c"0_a\00" +@1 = internal constant [6 x i8] c"1_a0r\00" +@2 = internal constant [6 x i8] c"2_a1r\00" +@3 = internal constant [6 x i8] c"3_a2r\00" + +define i64 @ENTRYPOINT__main() #0 { +block_0: + %q1 = inttoptr i64 0 to %Qubit* + %q2 = inttoptr i64 1 to %Qubit* + %q3 = inttoptr i64 2 to %Qubit* + call void @__quantum__rt__initialize(i8* null) + call void @__quantum__qis__x__body(%Qubit* %q2) + call void @test_noise_intrinsic(%Qubit* %q1, %Qubit* %q2, %Qubit* %q3) + call void @__quantum__qis__m__body(%Qubit* %q1, %Result* inttoptr (i64 0 to %Result*)) + call void @__quantum__qis__m__body(%Qubit* %q2, %Result* inttoptr (i64 1 to %Result*)) + call void @__quantum__qis__m__body(%Qubit* %q3, %Result* inttoptr (i64 2 to %Result*)) + call void @__quantum__rt__array_record_output(i64 3, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @0, i64 0, i64 0)) + call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 0 to %Result*), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @1, i64 0, i64 0)) + call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 1 to %Result*), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @2, i64 0, i64 0)) + call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 2 to %Result*), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @3, i64 0, i64 0)) + ret i64 0 +} + +declare void @__quantum__rt__initialize(i8*) +declare void @__quantum__qis__x__body(%Qubit*) +declare void @test_noise_intrinsic(%Qubit*, %Qubit*, %Qubit*) #2 +declare void @__quantum__qis__m__body(%Qubit*, %Result*) #1 +declare void @__quantum__rt__array_record_output(i64, i8*) +declare void @__quantum__rt__result_record_output(%Result*, i8*) + +attributes #0 = { "entry_point" "output_labeling_schema" "qir_profiles"="adaptive_profile" "required_num_qubits"="3" "required_num_results"="3" } +attributes #1 = { "irreversible" } +attributes #2 = { "qdk_noise" } + +!llvm.module.flags = !{!0, !1, !2, !3, !4, !5} + +!0 = !{i32 1, !"qir_major_version", i32 1} +!1 = !{i32 7, !"qir_minor_version", i32 0} +!2 = !{i32 1, !"dynamic_qubit_management", i1 false} +!3 = !{i32 1, !"dynamic_result_management", i1 false} +!4 = !{i32 5, !"int_computations", !{!"i64"}} +!5 = !{i32 5, !"float_computations", !{!"double"}} +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_noise_intrinsics_with_registers_noisy(sim_type): + noise = NoiseConfig() + table = noise.intrinsic("test_noise_intrinsic", 3) + table.yyy = 1.0 + output = run_qir( + NOISE_INTRINSICS_WITH_REGISTERS_QIR, shots=1, noise=noise, type=sim_type + ) + assert output == [[Result.One, Result.Zero, Result.One]] + + +# --- Tests for varied qubit counts (1, 2, 5) --- + +QASM_NOISE_1Q = """ +OPENQASM 3.0; +include "stdgates.inc"; + +@qdk.qir.noise_intrinsic +gate noise_1q q0 {} + +qubit q; +noise_1q q; +bit res = measure q; +""" + +QIR_NOISE_1Q = qsharp.openqasm.compile( + QASM_NOISE_1Q, + output_semantics=qsharp.openqasm.OutputSemantics.OpenQasm, + target_profile=qsharp.TargetProfile.Adaptive_RIF, +) + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_noise_intrinsic_1q_x_flip(sim_type): + noise = NoiseConfig() + table = noise.intrinsic("noise_1q", 1) + table.x = 1.0 + output = run_qir(QIR_NOISE_1Q, shots=1, noise=noise, type=sim_type) + assert output == [[Result.One]] + + +QASM_NOISE_2Q = """ +OPENQASM 3.0; +include "stdgates.inc"; + +@qdk.qir.noise_intrinsic +gate noise_2q q0, q1 {} + +qubit[2] qs; +x qs[0]; +noise_2q qs[0], qs[1]; +bit[2] res = measure qs; +""" + +QIR_NOISE_2Q = qsharp.openqasm.compile( + QASM_NOISE_2Q, + output_semantics=qsharp.openqasm.OutputSemantics.OpenQasm, + target_profile=qsharp.TargetProfile.Adaptive_RIF, +) + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_noise_intrinsic_2q_xx_flip(sim_type): + noise = NoiseConfig() + table = noise.intrinsic("noise_2q", 2) + table.xx = 1.0 + # qs[0] was |1>, qs[1] was |0> -> XX flips both -> qs[0]=|0>, qs[1]=|1> + output = run_qir(QIR_NOISE_2Q, shots=1, noise=noise, type=sim_type) + assert output == [[Result.Zero, Result.One]] + + +QASM_NOISE_5Q = """ +OPENQASM 3.0; +include "stdgates.inc"; + +@qdk.qir.noise_intrinsic +gate noise_5q q0, q1, q2, q3, q4 {} + +qubit[5] qs; +x qs[1]; +x qs[3]; +noise_5q qs[0], qs[1], qs[2], qs[3], qs[4]; +bit[5] res = measure qs; +""" + +QIR_NOISE_5Q = qsharp.openqasm.compile( + QASM_NOISE_5Q, + output_semantics=qsharp.openqasm.OutputSemantics.OpenQasm, + target_profile=qsharp.TargetProfile.Adaptive_RIF, +) + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_noise_intrinsic_5q_xxxxx_flip(sim_type): + noise = NoiseConfig() + table = noise.intrinsic("noise_5q", 5) + table.xxxxx = 1.0 + # Initial: |01010> -> XXXXX flips all -> |10101> + output = run_qir(QIR_NOISE_5Q, shots=1, noise=noise, type=sim_type) + assert output == [[Result.One, Result.Zero, Result.One, Result.Zero, Result.One]] diff --git a/source/pip/tests/test_adaptive_cpu_quantum_ops.py b/source/pip/tests/test_adaptive_cpu_quantum_ops.py new file mode 100644 index 0000000000..db34a07c5d --- /dev/null +++ b/source/pip/tests/test_adaptive_cpu_quantum_ops.py @@ -0,0 +1,373 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""End-to-end tests for the adaptive CPU bytecode interpreter pipeline. + +Tests run Adaptive Profile QIR through the full pipeline: +Python AdaptiveProfilePass → Rust receiver → CPU interpreter → results. + +This is a CPU counterpart to ``test_adaptive_gpu_quantum_ops.py``. + +For smaller tests covering the full Adaptive Profile instruction set, +see ``test_adaptive_cpu_bytecode.py``. +""" + +from collections import Counter + +import pytest + +from qsharp._simulation import run_qir, Result + +SIM_TYPES = ["cpu", "clifford"] + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def map_result_list_to_str(results): + results_str = "" + for r in results: + match r: + case Result.Zero: + results_str += "0" + case Result.One: + results_str += "1" + case Result.Loss: + results_str += "L" + return results_str + + +def _run(qir: str, shots: int, seed: int = 42, sim_type: str = "cpu"): + """Run *qir* on the given simulator and return shot results as a list of strings.""" + results = run_qir(qir, shots, seed=seed, type=sim_type) + return [map_result_list_to_str(r) for r in results] + + +# --------------------------------------------------------------------------- +# QIR source +# --------------------------------------------------------------------------- + +# Example 1: Measure-and-correct (H → MResetZ → read_result → branch → X) +MEASURE_AND_CORRECT_QIR = """\ +%Result = type opaque +%Qubit = type opaque + +define void @ENTRYPOINT__main() #0 { +entry: + call void @__quantum__qis__h__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) + %r = call i1 @__quantum__qis__read_result__body(%Result* inttoptr (i64 0 to %Result*)) + br i1 %r, label %then, label %end + +then: + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + br label %end + +end: + call void @__quantum__rt__tuple_record_output(i64 1, i8* null) + call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 0 to %Result*), i8* null) + ret void +} + +declare void @__quantum__qis__h__body(%Qubit*) +declare void @__quantum__qis__mresetz__body(%Qubit*, %Result*) +declare i1 @__quantum__qis__read_result__body(%Result*) +declare void @__quantum__qis__x__body(%Qubit*) +declare void @__quantum__rt__tuple_record_output(i64, i8*) +declare void @__quantum__rt__result_record_output(%Result*, i8*) + +attributes #0 = { "entry_point" "qir_profiles"="adaptive_profile" "required_num_qubits"="1" "required_num_results"="1" } +""" + +# Example 3: Conditionally terminating loop +CONDITIONAL_LOOP_QIR = """\ +%Result = type opaque +%Qubit = type opaque + +define void @ENTRYPOINT__main() #0 { +entry: + br label %loop + +loop: + call void @__quantum__qis__h__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) + %r = call i1 @__quantum__qis__read_result__body(%Result* inttoptr (i64 0 to %Result*)) + br i1 %r, label %done, label %loop + +done: + call void @__quantum__rt__tuple_record_output(i64 1, i8* null) + call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 0 to %Result*), i8* null) + ret void +} + +declare void @__quantum__qis__h__body(%Qubit*) +declare void @__quantum__qis__mresetz__body(%Qubit*, %Result*) +declare i1 @__quantum__qis__read_result__body(%Result*) +declare void @__quantum__rt__tuple_record_output(i64, i8*) +declare void @__quantum__rt__result_record_output(%Result*, i8*) + +attributes #0 = { "entry_point" "qir_profiles"="adaptive_profile" "required_num_qubits"="1" "required_num_results"="1" } +""" + +# Example 2: Loop with phi node — GHZ state preparation +LOOP_WITH_PHI_QIR = """\ +%Result = type opaque +%Qubit = type opaque + +define void @ENTRYPOINT__main() #0 { +entry: + call void @__quantum__qis__h__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + br label %loop + +loop: + %i = phi i64 [ 1, %entry ], [ %next_i, %loop ] + %qi = inttoptr i64 %i to %Qubit* + call void @__quantum__qis__cnot__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Qubit* %qi) + %next_i = add i64 %i, 1 + %cond = icmp sle i64 %next_i, 4 + br i1 %cond, label %loop, label %measure + +measure: + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 1 to %Qubit*), %Result* inttoptr (i64 1 to %Result*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 2 to %Qubit*), %Result* inttoptr (i64 2 to %Result*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 3 to %Qubit*), %Result* inttoptr (i64 3 to %Result*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 4 to %Qubit*), %Result* inttoptr (i64 4 to %Result*)) + call void @__quantum__rt__tuple_record_output(i64 5, i8* null) + call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 0 to %Result*), i8* null) + call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 1 to %Result*), i8* null) + call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 2 to %Result*), i8* null) + call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 3 to %Result*), i8* null) + call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 4 to %Result*), i8* null) + ret void +} + +declare void @__quantum__qis__h__body(%Qubit*) +declare void @__quantum__qis__cnot__body(%Qubit*, %Qubit*) +declare void @__quantum__qis__mresetz__body(%Qubit*, %Result*) +declare void @__quantum__rt__tuple_record_output(i64, i8*) +declare void @__quantum__rt__result_record_output(%Result*, i8*) + +attributes #0 = { "entry_point" "qir_profiles"="adaptive_profile" "required_num_qubits"="5" "required_num_results"="5" } +""" + +# Example 4: Classical boolean computation +BOOLEAN_COMPUTATION_QIR = """\ +%Result = type opaque +%Qubit = type opaque + +define void @ENTRYPOINT__main() #0 { +entry: + call void @__quantum__qis__h__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + call void @__quantum__qis__h__body(%Qubit* inttoptr (i64 1 to %Qubit*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 1 to %Qubit*), %Result* inttoptr (i64 1 to %Result*)) + %r0 = call i1 @__quantum__qis__read_result__body(%Result* inttoptr (i64 0 to %Result*)) + %r1 = call i1 @__quantum__qis__read_result__body(%Result* inttoptr (i64 1 to %Result*)) + %both = and i1 %r0, %r1 + br i1 %both, label %then, label %else + +then: + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + br label %end + +else: + br label %end + +end: + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 2 to %Result*)) + call void @__quantum__rt__tuple_record_output(i64 1, i8* null) + call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 2 to %Result*), i8* null) + ret void +} + +declare void @__quantum__qis__h__body(%Qubit*) +declare void @__quantum__qis__x__body(%Qubit*) +declare void @__quantum__qis__mresetz__body(%Qubit*, %Result*) +declare i1 @__quantum__qis__read_result__body(%Result*) +declare void @__quantum__rt__tuple_record_output(i64, i8*) +declare void @__quantum__rt__result_record_output(%Result*, i8*) + +attributes #0 = { "entry_point" "qir_profiles"="adaptive_profile" "required_num_qubits"="2" "required_num_results"="3" } +""" + +# Example 5: Teleport chain +TELEPORT_CHAIN_QIR = """\ +%Result = type opaque +%Qubit = type opaque + +@0 = internal constant [5 x i8] c"0_t0\\00" +@1 = internal constant [5 x i8] c"0_t1\\00" + +define void @TeleportChain() #0 { +entry: + call void @__quantum__rt__initialize(i8* null) + br label %body +body: + call void @__quantum__qis__h__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + call void @__quantum__qis__cnot__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Qubit* inttoptr (i64 1 to %Qubit*)) + call void @__quantum__qis__h__body(%Qubit* inttoptr (i64 2 to %Qubit*)) + call void @__quantum__qis__cnot__body(%Qubit* inttoptr (i64 2 to %Qubit*), %Qubit* inttoptr (i64 4 to %Qubit*)) + call void @__quantum__qis__cnot__body(%Qubit* inttoptr (i64 1 to %Qubit*), %Qubit* inttoptr (i64 2 to %Qubit*)) + call void @__quantum__qis__h__body(%Qubit* inttoptr (i64 1 to %Qubit*)) + call void @__quantum__qis__mz__body(%Qubit* inttoptr (i64 1 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) + call void @__quantum__qis__reset__body(%Qubit* inttoptr (i64 1 to %Qubit*)) + %0 = call i1 @__quantum__qis__read_result__body(%Result* inttoptr (i64 0 to %Result*)) + br i1 %0, label %then__1, label %continue__1 +then__1: + call void @__quantum__qis__z__body(%Qubit* inttoptr (i64 4 to %Qubit*)) + br label %continue__1 +continue__1: + call void @__quantum__qis__mz__body(%Qubit* inttoptr (i64 2 to %Qubit*), %Result* inttoptr (i64 1 to %Result*)) + call void @__quantum__qis__reset__body(%Qubit* inttoptr (i64 2 to %Qubit*)) + %1 = call i1 @__quantum__qis__read_result__body(%Result* inttoptr (i64 1 to %Result*)) + br i1 %1, label %then__2, label %continue__2 +then__2: + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 4 to %Qubit*)) + br label %continue__2 +continue__2: + call void @__quantum__qis__mz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 2 to %Result*)) + call void @__quantum__qis__reset__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + call void @__quantum__qis__mz__body(%Qubit* inttoptr (i64 4 to %Qubit*), %Result* inttoptr (i64 3 to %Result*)) + call void @__quantum__qis__reset__body(%Qubit* inttoptr (i64 4 to %Qubit*)) + br label %exit +exit: + call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 2 to %Result*), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @0, i32 0, i32 0)) + call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 3 to %Result*), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @1, i32 0, i32 0)) + ret void +} + +declare void @__quantum__qis__cnot__body(%Qubit*, %Qubit*) +declare void @__quantum__qis__h__body(%Qubit*) +declare void @__quantum__qis__x__body(%Qubit*) +declare void @__quantum__qis__z__body(%Qubit*) +declare void @__quantum__qis__reset__body(%Qubit*) +declare void @__quantum__qis__mz__body(%Qubit*, %Result*) #1 +declare void @__quantum__rt__initialize(i8*) +declare i1 @__quantum__qis__read_result__body(%Result*) +declare void @__quantum__rt__result_record_output(%Result*, i8*) + +attributes #0 = { "entry_point" "qir_profiles"="adaptive_profile" "required_num_qubits"="5" "required_num_results"="4" } +attributes #1 = { "irreversible" } +""" + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_measure_and_correct_histogram(sim_type): + """Example 1: H → MResetZ → read_result → conditional X. + + Run 10000 shots and verify ~50/50 split of "0" and "1" outcomes. + """ + results = _run(MEASURE_AND_CORRECT_QIR, shots=10000, seed=42, sim_type=sim_type) + assert len(results) == 10000 + + counts = Counter(results) + count_0 = counts.get("0", 0) + count_1 = counts.get("1", 0) + + assert count_0 > 4000, f"Expected ~5000 '0' results, got {count_0}" + assert count_1 > 4000, f"Expected ~5000 '1' results, got {count_1}" + assert count_0 + count_1 == 10000, "All shots should produce a result" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_conditional_loop_all_results_are_one(sim_type): + """Example 3: The loop exits only when measurement yields 1. + + Every shot's recorded result should be "1". + """ + shots = 5000 + results = _run(CONDITIONAL_LOOP_QIR, shots=shots, seed=99, sim_type=sim_type) + assert len(results) == shots + + counts = Counter(results) + assert ( + counts.get("1", 0) == shots + ), f"Expected all {shots} shots to produce '1', got counts: {counts}" + + +# --------------------------------------------------------------------------- +# Tests — Example 2: Loop with phi (GHZ state) +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_loop_with_phi_ghz_histogram(sim_type): + """Example 2: H → loop CNOT(q0, q_i) for i=1..4 → measure all. + + Creates (|00000⟩ + |11111⟩)/√2. All 5 measurements must agree. + """ + results = _run(LOOP_WITH_PHI_QIR, shots=10000, seed=42, sim_type=sim_type) + assert len(results) == 10000 + + counts = Counter(results) + assert set(counts.keys()) <= { + "00000", + "11111", + }, f"Unexpected outcomes in GHZ state: {counts}" + + count_00000 = counts.get("00000", 0) + count_11111 = counts.get("11111", 0) + + assert count_00000 > 4000, f"Expected ~5000 '00000' results, got {count_00000}" + assert count_11111 > 4000, f"Expected ~5000 '11111' results, got {count_11111}" + assert count_00000 + count_11111 == 10000, "All shots should produce a result" + + +# --------------------------------------------------------------------------- +# Tests — Example 4: Boolean computation (AND gate) +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_boolean_computation_histogram(sim_type): + """Example 4: H(q0), H(q1) → MResetZ both → AND results → conditional X. + + r2=1 only when both r0=1 AND r1=1 (~25% of shots). + """ + results = _run(BOOLEAN_COMPUTATION_QIR, shots=10000, seed=42, sim_type=sim_type) + assert len(results) == 10000 + + counts = Counter(results) + count_0 = counts.get("0", 0) + count_1 = counts.get("1", 0) + + assert 1500 < count_1 < 3500, f"Expected ~2500 '1' results (~25%), got {count_1}" + assert 6500 < count_0 < 8500, f"Expected ~7500 '0' results (~75%), got {count_0}" + assert count_0 + count_1 == 10000, "All shots should produce a result" + + +# --------------------------------------------------------------------------- +# Tests — Example 5: Teleport chain +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_teleport_chain_histogram(sim_type): + """Example 5: Teleport chain with 2 Bell pairs and measure-and-correct. + + Final measurements of q0 and q4 should be correlated: + both "0" or both "1", near 50/50. + """ + results = _run(TELEPORT_CHAIN_QIR, shots=10000, seed=42, sim_type=sim_type) + assert len(results) == 10000 + + counts = Counter(results) + assert set(counts.keys()) <= { + "00", + "11", + }, f"Unexpected outcomes in teleport chain: {counts}" + + count_00 = counts.get("00", 0) + count_11 = counts.get("11", 0) + + assert count_00 > 4000, f"Expected ~5000 '00' results, got {count_00}" + assert count_11 > 4000, f"Expected ~5000 '11' results, got {count_11}" + assert count_00 + count_11 == 10000, "All shots should produce a result" diff --git a/source/pip/tests/test_adaptive_gpu_noise.py b/source/pip/tests/test_adaptive_gpu_noise.py index e44d55294c..819f773b30 100644 --- a/source/pip/tests/test_adaptive_gpu_noise.py +++ b/source/pip/tests/test_adaptive_gpu_noise.py @@ -228,7 +228,7 @@ def test_z_noise_on_h_i_h_yields_1(): def test_probabilistic_x_noise(): noise = NoiseConfig() noise.cx.ix = 0.5 - counts = get_histogram(I_QIR, shots=1000, noise=noise) + counts = get_histogram(I_QIR, shots=1000, num_qubits=2, noise=noise) assert counts["0"] > 400, f"Expected ~500 '0' results, got {counts['0']}" assert counts["1"] > 400, f"Expected ~500 '1' results, got {counts['1']}" diff --git a/source/pip/tests/test_clifford_simulator.py b/source/pip/tests/test_clifford_simulator.py index 2c47fc0c8e..76acfd934e 100644 --- a/source/pip/tests/test_clifford_simulator.py +++ b/source/pip/tests/test_clifford_simulator.py @@ -101,7 +101,7 @@ def test_s_adj_noise_inherits_from_rz(): assert output == [Result.One] -def test_program_with_branching_fails(): +def test_program_with_branching_succeeds(): qsharp.init(target_profile=TargetProfile.Adaptive_RI) qsharp.eval( """ @@ -116,14 +116,8 @@ def test_program_with_branching_fails(): """ ) ir = qsharp.compile("Main()") - try: - run_qir_clifford(str(ir), 1, NoiseConfig()) - assert False, "Expected ValueError for branching control flow" - except ValueError as e: - assert ( - "simulation of programs with branching control flow is not supported" - in str(e) - ) + results = run_qir_clifford(str(ir), 1, NoiseConfig()) + assert len(results) == 1 def test_program_with_unconditional_branching_succeeds(): diff --git a/source/simulators/src/bytecode.rs b/source/simulators/src/bytecode.rs index 6fb246e857..afc77f5685 100644 --- a/source/simulators/src/bytecode.rs +++ b/source/simulators/src/bytecode.rs @@ -5,6 +5,8 @@ //! //! Values must stay in sync with the Python `_adaptive_opcodes.py` module. +pub mod runtime; + use bytemuck::{Pod, Zeroable}; use num_traits::Unsigned; @@ -167,8 +169,8 @@ impl Function { #[repr(C)] #[derive(Copy, Clone, Debug, Default, Zeroable)] pub struct PhiNodeEntry { - block_id: Word, - val_reg: Word, + pub block_id: Word, + pub val_reg: Word, } impl PhiNodeEntry { @@ -186,8 +188,8 @@ impl PhiNodeEntry { #[repr(C)] #[derive(Copy, Clone, Debug, Default, Zeroable)] pub struct SwitchCase { - case_val: Word, - target_block: Word, + pub case_val: Word, + pub target_block: Word, } impl SwitchCase { diff --git a/source/simulators/src/bytecode/runtime.rs b/source/simulators/src/bytecode/runtime.rs new file mode 100644 index 0000000000..db0d0e6894 --- /dev/null +++ b/source/simulators/src/bytecode/runtime.rs @@ -0,0 +1,690 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! CPU bytecode interpreter for the Adaptive Profile QIR. + +// The interpreter intentionally uses u64 registers and must cast between u64, i64, +// usize, and u32 pervasively. These casts are correct by construction (values come +// from a well-formed bytecode program). Suppressing the pedantic clippy lints here +// keeps the opcode dispatch readable. +#![allow( + clippy::cast_possible_truncation, + clippy::cast_possible_wrap, + clippy::cast_precision_loss, + clippy::cast_sign_loss, + clippy::float_cmp, + clippy::match_same_arms, + clippy::single_match_else, + clippy::too_many_lines +)] + +use crate::{ + MeasurementResult, Simulator, + bytecode::{AdaptiveProgram, Instruction}, +}; + +// --------------------------------------------------------------------------- +// Opcode constants — must stay in sync with the Python `_adaptive_bytecode.py` +// and the WGSL `simulator_adaptive.wgsl` shader. +// --------------------------------------------------------------------------- + +// Flags (pre-shifted to bit 16+) +const FLAG_SRC0_IMM: u64 = 1 << 16; +const FLAG_SRC1_IMM: u64 = 1 << 17; +const FLAG_DST_IMM: u64 = 1 << 18; +const FLAG_AUX0_IMM: u64 = 1 << 19; +const FLAG_AUX1_IMM: u64 = 1 << 20; +const FLAG_AUX2_IMM: u64 = 1 << 21; +const FLAG_AUX3_IMM: u64 = 1 << 22; + +// Control flow +const OP_NOP: u8 = 0x00; +const OP_RET: u8 = 0x02; +const OP_JUMP: u8 = 0x04; +const OP_BRANCH: u8 = 0x05; +const OP_SWITCH: u8 = 0x06; +const OP_CALL: u8 = 0x07; +const OP_CALL_RETURN: u8 = 0x08; + +// Quantum +const OP_QUANTUM_GATE: u8 = 0x10; +const OP_MEASURE: u8 = 0x11; +const OP_RESET: u8 = 0x12; +const OP_READ_RESULT: u8 = 0x13; +const OP_RECORD_OUTPUT: u8 = 0x14; + +// Integer arithmetic +const OP_ADD: u8 = 0x20; +const OP_SUB: u8 = 0x21; +const OP_MUL: u8 = 0x22; +const OP_UDIV: u8 = 0x23; +const OP_SDIV: u8 = 0x24; +const OP_UREM: u8 = 0x25; +const OP_SREM: u8 = 0x26; + +// Bitwise / shift +const OP_AND: u8 = 0x28; +const OP_OR: u8 = 0x29; +const OP_XOR: u8 = 0x2A; +const OP_SHL: u8 = 0x2B; +const OP_LSHR: u8 = 0x2C; +const OP_ASHR: u8 = 0x2D; + +// Comparison +const OP_ICMP: u8 = 0x30; +const OP_FCMP: u8 = 0x31; + +// Float arithmetic +const OP_FADD: u8 = 0x38; +const OP_FSUB: u8 = 0x39; +const OP_FMUL: u8 = 0x3A; +const OP_FDIV: u8 = 0x3B; + +// Type conversion +const OP_ZEXT: u8 = 0x40; +const OP_SEXT: u8 = 0x41; +const OP_TRUNC: u8 = 0x42; +const OP_FPEXT: u8 = 0x43; +const OP_FPTRUNC: u8 = 0x44; +const OP_INTTOPTR: u8 = 0x45; +const OP_FPTOSI: u8 = 0x46; +const OP_SITOFP: u8 = 0x47; + +// SSA / data movement +const OP_PHI: u8 = 0x50; +const OP_SELECT: u8 = 0x51; +const OP_MOV: u8 = 0x52; +const OP_CONST: u8 = 0x53; + +// ICmp condition codes (sub-opcode) +const ICMP_EQ: u8 = 0; +const ICMP_NE: u8 = 1; +const ICMP_SLT: u8 = 2; +const ICMP_SLE: u8 = 3; +const ICMP_SGT: u8 = 4; +const ICMP_SGE: u8 = 5; +const ICMP_ULT: u8 = 6; +const ICMP_ULE: u8 = 7; +const ICMP_UGT: u8 = 8; +const ICMP_UGE: u8 = 9; + +// FCmp condition codes (sub-opcode) +const FCMP_OEQ: u8 = 1; +const FCMP_OGT: u8 = 2; +const FCMP_OGE: u8 = 3; +const FCMP_OLT: u8 = 4; +const FCMP_OLE: u8 = 5; +const FCMP_ONE: u8 = 6; + +// Quantum op IDs — must match `shader_types.rs` `OpID` and `GATE_MAP` in `_adaptive_pass.py`. +const OPID_RESETZ: u64 = 1; +const OPID_X: u64 = 2; +const OPID_Y: u64 = 3; +const OPID_Z: u64 = 4; +const OPID_H: u64 = 5; +const OPID_S: u64 = 6; +const OPID_S_ADJ: u64 = 7; +const OPID_T: u64 = 8; +const OPID_T_ADJ: u64 = 9; +const OPID_SX: u64 = 10; +const OPID_SX_ADJ: u64 = 11; +const OPID_RX: u64 = 12; +const OPID_RY: u64 = 13; +const OPID_RZ: u64 = 14; +const OPID_CX: u64 = 15; +const OPID_CZ: u64 = 16; +const OPID_RXX: u64 = 17; +const OPID_RYY: u64 = 18; +const OPID_RZZ: u64 = 19; +const OPID_MZ: u64 = 21; +const OPID_MRESETZ: u64 = 22; +const OPID_SWAP: u64 = 24; +const OPID_MOVE: u64 = 28; +const OPID_CY: u64 = 29; +const OPID_CORRELATED_NOISE: u64 = 131; + +// Sentinel +const VOID_RETURN: u64 = 0xFFFF_FFFF; + +// --------------------------------------------------------------------------- +// Runtime state +// --------------------------------------------------------------------------- + +struct CallStackFrame { + block_id: u64, + return_pc: u64, + return_reg: u64, +} + +struct Runtime { + pc: u64, + current_block_id: u64, + previous_block_id: u64, + exit_code: u64, + registers: Vec, + call_stack: Vec, +} + +impl Runtime { + fn new(num_registers: u32, entry_block: u64, entry_pc: u64) -> Self { + Self { + pc: entry_pc, + current_block_id: entry_block, + previous_block_id: 0, + exit_code: 0, + registers: vec![0; num_registers as usize], + call_stack: Vec::with_capacity(128), + } + } + + fn read_reg(&self, reg: u64) -> u64 { + self.registers[reg as usize] + } + + fn write_reg(&mut self, reg: u64, val: u64) { + self.registers[reg as usize] = val; + } + + fn resolve_u64(&self, operand: u64, flags: u64, operand_idx: u64) -> u64 { + let imm_flag = match operand_idx { + 0 => FLAG_SRC0_IMM, + 1 => FLAG_SRC1_IMM, + 2 => FLAG_DST_IMM, + 3 => FLAG_AUX0_IMM, + 4 => FLAG_AUX1_IMM, + 5 => FLAG_AUX2_IMM, + 6 => FLAG_AUX3_IMM, + _ => panic!("invalid operand index {operand_idx}"), + }; + if flags & imm_flag != 0 { + operand + } else { + self.read_reg(operand) + } + } + + fn resolve_i64(&self, operand: u64, flags: u64, operand_idx: u64) -> i64 { + self.resolve_u64(operand, flags, operand_idx) as i64 + } + + fn resolve_f64(&self, operand: u64, flags: u64, operand_idx: u64) -> f64 { + f64::from_bits(self.resolve_u64(operand, flags, operand_idx)) + } + + fn write_f64(&mut self, reg: u64, val: f64) { + self.write_reg(reg, val.to_bits()); + } +} + +// --------------------------------------------------------------------------- +// Quantum op dispatch +// --------------------------------------------------------------------------- + +fn dispatch_quantum_gate( + program: &AdaptiveProgram, + sim: &mut S, + instr: &Instruction, + rt: &Runtime, +) { + let op_idx = instr.aux0 as usize; + let op = &program.quantum_ops[op_idx]; + let op_id = op.op_id; + + match op_id { + OPID_CORRELATED_NOISE => { + let qubit_count = rt.resolve_u64(instr.aux1, instr.opcode, 4) as usize; + let arg_offset = rt.resolve_u64(instr.aux2, instr.opcode, 5) as usize; + let table_id = op.q1 as u32; + let targets: Vec = (0..qubit_count) + .map(|i| rt.read_reg(program.call_args[arg_offset + i]) as usize) + .collect(); + sim.correlated_noise_intrinsic(table_id, &targets); + } + _ => { + let q1 = rt.resolve_u64(instr.aux1, instr.opcode, 4) as usize; + let q2 = rt.resolve_u64(instr.aux2, instr.opcode, 5) as usize; + let angle = op.angle; + match op_id { + OPID_X => sim.x(q1), + OPID_Y => sim.y(q1), + OPID_Z => sim.z(q1), + OPID_H => sim.h(q1), + OPID_S => sim.s(q1), + OPID_S_ADJ => sim.s_adj(q1), + OPID_T => sim.t(q1), + OPID_T_ADJ => sim.t_adj(q1), + OPID_SX => sim.sx(q1), + OPID_SX_ADJ => sim.sx_adj(q1), + OPID_RX => sim.rx(angle, q1), + OPID_RY => sim.ry(angle, q1), + OPID_RZ => sim.rz(angle, q1), + OPID_CX => sim.cx(q1, q2), + OPID_CY => sim.cy(q1, q2), + OPID_CZ => sim.cz(q1, q2), + OPID_RXX => sim.rxx(angle, q1, q2), + OPID_RYY => sim.ryy(angle, q1, q2), + OPID_RZZ => sim.rzz(angle, q1, q2), + OPID_SWAP => sim.swap(q1, q2), + OPID_MOVE => sim.mov(q1), + _ => panic!("unsupported quantum gate op_id={op_id}"), + } + } + } +} + +fn dispatch_measure( + program: &AdaptiveProgram, + sim: &mut S, + instr: &Instruction, + rt: &Runtime, +) { + let op_idx = instr.aux0 as usize; + let op = &program.quantum_ops[op_idx]; + let qubit = rt.resolve_u64(instr.aux1, instr.opcode, 4) as usize; + let result_id = rt.resolve_u64(instr.aux2, instr.opcode, 5) as usize; + + match op.op_id { + OPID_MZ => sim.mz(qubit, result_id), + OPID_MRESETZ => sim.mresetz(qubit, result_id), + _ => panic!("unsupported measure op_id={}", op.op_id), + } +} + +fn dispatch_reset( + program: &AdaptiveProgram, + sim: &mut S, + instr: &Instruction, + rt: &Runtime, +) { + let op_idx = instr.aux0 as usize; + let op = &program.quantum_ops[op_idx]; + let qubit = rt.resolve_u64(instr.aux1, instr.opcode, 4) as usize; + + match op.op_id { + OPID_RESETZ => sim.resetz(qubit), + _ => panic!("unsupported reset op_id={}", op.op_id), + } +} + +// --------------------------------------------------------------------------- +// Main interpreter entry point +// --------------------------------------------------------------------------- + +pub fn run_shot(program: &AdaptiveProgram, sim: &mut S) { + const MAX_STEPS: u64 = 10_000_000; + + let entry_pc = program.block_table[program.entry_block as usize].instr_offset; + let mut rt = Runtime::new(program.num_registers, program.entry_block, entry_pc); + + for _ in 0..MAX_STEPS { + let instr = program.instructions[rt.pc as usize]; + let op = instr.primary_opcode(); + let subcode = instr.sub_opcode(); + let flags = instr.opcode; + + match op { + OP_NOP => rt.pc += 1, + + OP_RET => { + rt.exit_code = rt.resolve_u64(instr.dst, flags, 2); + break; + } + + OP_JUMP => { + rt.previous_block_id = rt.current_block_id; + rt.current_block_id = instr.dst; + rt.pc = block_pc(program, rt.current_block_id); + } + + OP_BRANCH => { + let cond = rt.resolve_u64(instr.src0, flags, 0) != 0; + let next_block = if cond { instr.aux0 } else { instr.aux1 }; + rt.previous_block_id = rt.current_block_id; + rt.current_block_id = next_block; + rt.pc = block_pc(program, rt.current_block_id); + } + + OP_SWITCH => { + let val = rt.resolve_u64(instr.src0, flags, 0); + let default_block = instr.aux0; + let case_offset = instr.aux1 as usize; + let case_count = instr.aux2 as usize; + let mut target_block = default_block; + for i in 0..case_count { + let entry = program.switch_cases[case_offset + i]; + if entry.case_val == val { + target_block = entry.target_block; + break; + } + } + rt.previous_block_id = rt.current_block_id; + rt.current_block_id = target_block; + rt.pc = block_pc(program, rt.current_block_id); + } + + OP_CALL => { + let func_id = instr.aux0 as usize; + let arg_count = instr.aux1 as usize; + let arg_offset = instr.aux2 as usize; + let func = program.function_table[func_id]; + + rt.call_stack.push(CallStackFrame { + block_id: rt.current_block_id, + return_pc: rt.pc + 1, + return_reg: instr.dst, + }); + + let param_base = func.param_base_reg; + for i in 0..arg_count { + let arg_reg = program.call_args[arg_offset + i]; + let val = rt.read_reg(arg_reg); + rt.write_reg(param_base + i as u64, val); + } + + rt.current_block_id = func.entry_block_id; + rt.pc = block_pc(program, rt.current_block_id); + } + + OP_CALL_RETURN => { + let frame = rt.call_stack.pop().expect("call stack underflow"); + let return_block = frame.block_id; + let return_pc = frame.return_pc; + let return_reg = frame.return_reg; + + rt.current_block_id = return_block; + rt.pc = return_pc; + if return_reg != VOID_RETURN { + let ret_val = rt.resolve_u64(instr.src0, flags, 0); + rt.write_reg(return_reg, ret_val); + } + } + + // ----- Quantum operations ----- + OP_QUANTUM_GATE => { + dispatch_quantum_gate(program, sim, &instr, &rt); + rt.pc += 1; + } + + OP_MEASURE => { + dispatch_measure(program, sim, &instr, &rt); + rt.pc += 1; + } + + OP_RESET => { + dispatch_reset(program, sim, &instr, &rt); + rt.pc += 1; + } + + OP_READ_RESULT => { + let result_id = rt.resolve_u64(instr.src0, flags, 0) as usize; + let measurements = sim.measurements(); + let val = if result_id < measurements.len() { + match measurements[result_id] { + MeasurementResult::One => 1u64, + _ => 0u64, + } + } else { + 0u64 + }; + rt.write_reg(instr.dst, val); + rt.pc += 1; + } + + OP_RECORD_OUTPUT => { + // No-op on CPU — results are read from the simulator directly. + rt.pc += 1; + } + + // ----- Integer arithmetic ----- + OP_ADD => { + let a = rt.resolve_i64(instr.src0, flags, 0); + let b = rt.resolve_i64(instr.src1, flags, 1); + rt.write_reg(instr.dst, a.wrapping_add(b) as u64); + rt.pc += 1; + } + + OP_SUB => { + let a = rt.resolve_i64(instr.src0, flags, 0); + let b = rt.resolve_i64(instr.src1, flags, 1); + rt.write_reg(instr.dst, a.wrapping_sub(b) as u64); + rt.pc += 1; + } + + OP_MUL => { + let a = rt.resolve_i64(instr.src0, flags, 0); + let b = rt.resolve_i64(instr.src1, flags, 1); + rt.write_reg(instr.dst, a.wrapping_mul(b) as u64); + rt.pc += 1; + } + + OP_UDIV => { + let a = rt.resolve_u64(instr.src0, flags, 0); + let b = rt.resolve_u64(instr.src1, flags, 1); + rt.write_reg(instr.dst, a / b); + rt.pc += 1; + } + + OP_SDIV => { + let a = rt.resolve_i64(instr.src0, flags, 0); + let b = rt.resolve_i64(instr.src1, flags, 1); + rt.write_reg(instr.dst, a.wrapping_div(b) as u64); + rt.pc += 1; + } + + OP_UREM => { + let a = rt.resolve_u64(instr.src0, flags, 0); + let b = rt.resolve_u64(instr.src1, flags, 1); + rt.write_reg(instr.dst, a % b); + rt.pc += 1; + } + + OP_SREM => { + let a = rt.resolve_i64(instr.src0, flags, 0); + let b = rt.resolve_i64(instr.src1, flags, 1); + rt.write_reg(instr.dst, a.wrapping_rem(b) as u64); + rt.pc += 1; + } + + // ----- Bitwise / shift ----- + OP_AND => { + let a = rt.resolve_u64(instr.src0, flags, 0); + let b = rt.resolve_u64(instr.src1, flags, 1); + rt.write_reg(instr.dst, a & b); + rt.pc += 1; + } + + OP_OR => { + let a = rt.resolve_u64(instr.src0, flags, 0); + let b = rt.resolve_u64(instr.src1, flags, 1); + rt.write_reg(instr.dst, a | b); + rt.pc += 1; + } + + OP_XOR => { + let a = rt.resolve_u64(instr.src0, flags, 0); + let b = rt.resolve_u64(instr.src1, flags, 1); + rt.write_reg(instr.dst, a ^ b); + rt.pc += 1; + } + + OP_SHL => { + let a = rt.resolve_u64(instr.src0, flags, 0); + let b = rt.resolve_u64(instr.src1, flags, 1) as u32; + rt.write_reg(instr.dst, a.wrapping_shl(b)); + rt.pc += 1; + } + + OP_LSHR => { + let a = rt.resolve_u64(instr.src0, flags, 0); + let b = rt.resolve_u64(instr.src1, flags, 1) as u32; + rt.write_reg(instr.dst, a.wrapping_shr(b)); + rt.pc += 1; + } + + OP_ASHR => { + let a = rt.resolve_i64(instr.src0, flags, 0); + let b = rt.resolve_u64(instr.src1, flags, 1) as u32; + rt.write_reg(instr.dst, a.wrapping_shr(b) as u64); + rt.pc += 1; + } + + // ----- Integer comparison ----- + OP_ICMP => { + let a = rt.resolve_i64(instr.src0, flags, 0); + let b = rt.resolve_i64(instr.src1, flags, 1); + let result = match subcode { + ICMP_EQ => a == b, + ICMP_NE => a != b, + ICMP_SLT => a < b, + ICMP_SLE => a <= b, + ICMP_SGT => a > b, + ICMP_SGE => a >= b, + ICMP_ULT => (a as u64) < (b as u64), + ICMP_ULE => (a as u64) <= (b as u64), + ICMP_UGT => (a as u64) > (b as u64), + ICMP_UGE => (a as u64) >= (b as u64), + _ => panic!("unsupported icmp condition code {subcode}"), + }; + rt.write_reg(instr.dst, u64::from(result)); + rt.pc += 1; + } + + // ----- Float comparison ----- + OP_FCMP => { + let a = rt.resolve_f64(instr.src0, flags, 0); + let b = rt.resolve_f64(instr.src1, flags, 1); + let result = match subcode { + FCMP_OEQ => a == b, + FCMP_ONE => a != b, + FCMP_OLT => a < b, + FCMP_OLE => a <= b, + FCMP_OGT => a > b, + FCMP_OGE => a >= b, + _ => panic!("unsupported fcmp condition code {subcode}"), + }; + rt.write_reg(instr.dst, u64::from(result)); + rt.pc += 1; + } + + // ----- Float arithmetic ----- + OP_FADD => { + let a = rt.resolve_f64(instr.src0, flags, 0); + let b = rt.resolve_f64(instr.src1, flags, 1); + rt.write_f64(instr.dst, a + b); + rt.pc += 1; + } + + OP_FSUB => { + let a = rt.resolve_f64(instr.src0, flags, 0); + let b = rt.resolve_f64(instr.src1, flags, 1); + rt.write_f64(instr.dst, a - b); + rt.pc += 1; + } + + OP_FMUL => { + let a = rt.resolve_f64(instr.src0, flags, 0); + let b = rt.resolve_f64(instr.src1, flags, 1); + rt.write_f64(instr.dst, a * b); + rt.pc += 1; + } + + OP_FDIV => { + let a = rt.resolve_f64(instr.src0, flags, 0); + let b = rt.resolve_f64(instr.src1, flags, 1); + rt.write_f64(instr.dst, a / b); + rt.pc += 1; + } + + // ----- Type conversion ----- + OP_ZEXT => { + let val = rt.resolve_u64(instr.src0, flags, 0); + rt.write_reg(instr.dst, val); + rt.pc += 1; + } + + OP_SEXT => { + let val = rt.resolve_i64(instr.src0, flags, 0); + let src_bits = instr.aux0 as u32; + let result = if src_bits > 0 && src_bits < 64 { + let shift = 64 - src_bits; + (val.wrapping_shl(shift)).wrapping_shr(shift) + } else { + val + }; + rt.write_reg(instr.dst, result as u64); + rt.pc += 1; + } + + OP_TRUNC => { + let val = rt.resolve_u64(instr.src0, flags, 0); + rt.write_reg(instr.dst, val); + rt.pc += 1; + } + + OP_FPEXT | OP_FPTRUNC => { + let val = rt.resolve_f64(instr.src0, flags, 0); + rt.write_f64(instr.dst, val); + rt.pc += 1; + } + + OP_INTTOPTR => { + let val = rt.resolve_u64(instr.src0, flags, 0); + rt.write_reg(instr.dst, val); + rt.pc += 1; + } + + OP_FPTOSI => { + let val = rt.resolve_f64(instr.src0, flags, 0); + rt.write_reg(instr.dst, val as i64 as u64); + rt.pc += 1; + } + + OP_SITOFP => { + let val = rt.resolve_i64(instr.src0, flags, 0); + rt.write_f64(instr.dst, val as f64); + rt.pc += 1; + } + + // ----- PHI node ----- + OP_PHI => { + let offset = instr.aux0 as usize; + let count = instr.aux1 as usize; + for i in 0..count { + let entry = program.phi_entries[offset + i]; + if entry.block_id == rt.previous_block_id { + let val = rt.read_reg(entry.val_reg); + rt.write_reg(instr.dst, val); + break; + } + } + rt.pc += 1; + } + + // ----- Data movement ----- + OP_SELECT => { + let cond = rt.resolve_u64(instr.src0, flags, 0) != 0; + let true_val = rt.resolve_u64(instr.aux0, flags, 3); + let false_val = rt.resolve_u64(instr.aux1, flags, 4); + rt.write_reg(instr.dst, if cond { true_val } else { false_val }); + rt.pc += 1; + } + + OP_MOV => { + let val = rt.resolve_u64(instr.src0, flags, 0); + rt.write_reg(instr.dst, val); + rt.pc += 1; + } + + OP_CONST => { + rt.write_reg(instr.dst, instr.src0); + rt.pc += 1; + } + + _ => panic!("unsupported opcode 0x{op:02X} at pc={}", rt.pc), + } + } +} + +fn block_pc(program: &AdaptiveProgram, block_id: u64) -> u64 { + program.block_table[block_id as usize].instr_offset +} From 88cd287988497caca42c6f2a66cc47ad2b10f792 Mon Sep 17 00:00:00 2001 From: Oscar Puente Date: Wed, 1 Apr 2026 15:28:47 -0700 Subject: [PATCH 03/14] cargo fmt --- source/pip/src/interpreter.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/pip/src/interpreter.rs b/source/pip/src/interpreter.rs index e488bf3b88..be4cae3c77 100644 --- a/source/pip/src/interpreter.rs +++ b/source/pip/src/interpreter.rs @@ -24,7 +24,9 @@ use crate::{ noisy_simulator::register_noisy_simulator_submodule, qir_simulation::{ IdleNoiseParams, NoiseConfig, NoiseTable, QirInstruction, QirInstructionId, - cpu_simulators::{run_clifford, run_clifford_adaptive, run_cpu_adaptive, run_cpu_full_state}, + cpu_simulators::{ + run_clifford, run_clifford_adaptive, run_cpu_adaptive, run_cpu_full_state, + }, gpu_full_state::{ GpuContext, run_adaptive_parallel_shots, run_parallel_shots, try_create_gpu_adapter, }, From b2634d3ce1abb186aeec7c8f4c6fad773ac0480a Mon Sep 17 00:00:00 2001 From: Oscar Puente Date: Thu, 2 Apr 2026 11:01:51 -0700 Subject: [PATCH 04/14] better python signatures --- source/pip/qsharp/_adaptive_pass.py | 41 +++++++------- .../pip/tests/test_adaptive_cpu_bytecode.py | 54 ++++++++++++++----- source/pip/tests/test_adaptive_cpu_noise.py | 12 +++-- .../tests/test_adaptive_cpu_quantum_ops.py | 10 ++-- 4 files changed, 76 insertions(+), 41 deletions(-) diff --git a/source/pip/qsharp/_adaptive_pass.py b/source/pip/qsharp/_adaptive_pass.py index 1aca506831..143dc50bde 100644 --- a/source/pip/qsharp/_adaptive_pass.py +++ b/source/pip/qsharp/_adaptive_pass.py @@ -20,8 +20,8 @@ class Bytecode(Enum): - Bit32 = 1 - Bit64 = 2 + Bit32 = 32 + Bit64 = 64 # --------------------------------------------------------------------------- @@ -199,8 +199,8 @@ class SwitchCase: @dataclass class IntOperand: - val: int = 0 - bits: int = 32 + val: int + bits: int def __post_init__(self): # Mask to the appropriate word-width so negative Python ints become @@ -289,6 +289,7 @@ def __init__(self, bytecode_kind: Bytecode): # Internal tracking. self._bytecode_kind = bytecode_kind + self._int_bits = bytecode_kind.value self._next_reg: int = 0 self._next_block: int = 0 self._next_qop: int = 0 @@ -297,7 +298,6 @@ def __init__(self, bytecode_kind: Bytecode): self._func_to_id: Dict[str, int] = {} # function name → function ID self._current_func_is_entry: bool = True self._noise_intrinsics: Optional[Dict[str, int]] = None - self._int_bits = 32 if bytecode_kind == Bytecode.Bit32 else 64 def run( self, @@ -457,10 +457,10 @@ def _resolve_operand(self, value: pyqir.Value) -> IntOperand | FloatOperand | Re # Try extracting as a qubit/result pointer constant. qid = pyqir.qubit_id(value) if qid is not None: - return IntOperand(qid) + return IntOperand(qid, self._int_bits) rid = pyqir.result_id(value) if rid is not None: - return IntOperand(rid) + return IntOperand(rid, self._int_bits) # Null pointer if value.is_null: reg = self._alloc_reg(value, REG_TYPE_PTR) @@ -717,7 +717,11 @@ def _emit_call(self, call: pyqir.Call) -> None: def _resolve_qubit_operands( self, args: List[pyqir.Value] ) -> Tuple[IntOperand | Reg, IntOperand | Reg, IntOperand | Reg]: - qs: List[IntOperand | Reg] = [IntOperand(), IntOperand(), IntOperand()] + qs: List[IntOperand | Reg] = [ + IntOperand(0, self._int_bits), + IntOperand(0, self._int_bits), + IntOperand(0, self._int_bits), + ] for i, arg in enumerate(args): qs[i] = self._resolve_qubit_operand(arg) return (qs[0], qs[1], qs[2]) @@ -813,8 +817,8 @@ def _emit_noise_intrinsic_call(self, call: pyqir.Call) -> None: self._emit( OP_QUANTUM_GATE, aux0=qop_idx, - aux1=IntOperand(qubit_count), - aux2=IntOperand(arg_offset), + aux1=IntOperand(qubit_count, self._int_bits), + aux2=IntOperand(arg_offset, self._int_bits), ) elif self._noise_intrinsics is not None: raise ValueError(f"Missing noise intrinsic: {callee_name}") @@ -877,19 +881,14 @@ def _emit_switch(self, switch_instr: pyqir.Switch) -> None: reference when ``mod.functions`` has already been iterated (two-pass compilation). ``operands`` is not affected by this behavior. """ - # operands layout: [cond, default_block, case_val0, case_block0, ...] - ops = switch_instr.operands - cond_reg = self._resolve_operand(ops[0]) - default_block = self._block_to_id[ops[1]] + cond_reg = self._resolve_operand(switch_instr.operands[0]) + default_block = self._block_to_id[switch_instr.default] case_offset = len(self.switch_cases) - num_case_pairs = (len(ops) - 2) // 2 - for i in range(num_case_pairs): - case_val = ops[2 + 2 * i] - case_block = ops[2 + 2 * i + 1] - target_block = self._block_to_id[case_block] + for case_val, block in switch_instr.cases: + target_block = self._block_to_id[block] switch_case = SwitchCase(case_val.value, target_block) self.switch_cases.append(switch_case) - case_count = num_case_pairs + case_count = len(switch_instr.cases) self._emit( OP_SWITCH, src0=cond_reg, @@ -914,7 +913,7 @@ def _emit_ret(self, instr: Any) -> None: self._emit(OP_RET, dst=ret_reg) else: # Void return — use immediate 0 as exit code. - self._emit(OP_RET, dst=IntOperand(0)) + self._emit(OP_RET, dst=IntOperand(0, self._int_bits)) # ------------------------------------------------------------------ # Comparison emitters diff --git a/source/pip/tests/test_adaptive_cpu_bytecode.py b/source/pip/tests/test_adaptive_cpu_bytecode.py index 6de5a36da3..29656d8b26 100644 --- a/source/pip/tests/test_adaptive_cpu_bytecode.py +++ b/source/pip/tests/test_adaptive_cpu_bytecode.py @@ -15,9 +15,10 @@ from collections import Counter import pytest +from qsharp._simulation import run_qir, Result import qsharp.openqasm +from typing import Literal -from qsharp._simulation import run_qir, Result # --------------------------------------------------------------------------- # Helpers @@ -26,7 +27,6 @@ # Deterministic programs need a single shot but we run multiple shots # to verify that multiple shots yield the same result. SHOTS = 100 - SIM_TYPES = ["cpu", "clifford"] @@ -43,7 +43,12 @@ def map_result_list_to_str(results): return results_str -def _run(qir: str, shots: int = SHOTS, seed: int = 42, sim_type: str = "cpu"): +def _run( + qir: str, + shots: int = SHOTS, + seed: int = 42, + sim_type: Literal["clifford", "cpu"] = "cpu", +): """Run *qir* on the given simulator and return shot results as a list of strings.""" results = run_qir(qir, shots, seed=seed, type=sim_type) return [map_result_list_to_str(r) for r in results] @@ -57,7 +62,7 @@ def check_result( num_qubits: int = 1, num_results: int = 1, record=None, - sim_type: str = "cpu", + sim_type: Literal["clifford", "cpu"] = "cpu", ): """Assert every shot produces *expected*.""" qir = format_qir( @@ -74,7 +79,9 @@ def check_result( }, f"Expected all {SHOTS} shots to be '{expected}', got {counts}" -def check_arith_result(qir_fragment: str, expected: str, sim_type: str = "cpu"): +def check_arith_result( + qir_fragment: str, expected: str, sim_type: Literal["clifford", "cpu"] = "cpu" +): body = build_arith_body(qir_fragment) check_result(body, expected, sim_type=sim_type) @@ -458,7 +465,9 @@ def test_read_result(sim_type): @pytest.mark.parametrize("sim_type", SIM_TYPES) def test_record_output_ordering(sim_type): """Two results recorded: result0=1, result1=0 → '10'.""" - check_result(RECORD_OUTPUT_QIR, "10", num_qubits=2, num_results=2, sim_type=sim_type) + check_result( + RECORD_OUTPUT_QIR, "10", num_qubits=2, num_results=2, sim_type=sim_type + ) # ######################################################################### @@ -1004,7 +1013,9 @@ def test_const(sim_type): @pytest.mark.parametrize("sim_type", SIM_TYPES) def test_and_i1_boolean(sim_type): """Deterministic boolean AND: both qubits |1⟩ → and i1 true, true → X → 1.""" - check_result(AND_I1_QIR, "1", num_qubits=2, num_results=3, record=[2], sim_type=sim_type) + check_result( + AND_I1_QIR, "1", num_qubits=2, num_results=3, record=[2], sim_type=sim_type + ) # ========================================================================= @@ -1033,7 +1044,9 @@ def test_and_i1_boolean(sim_type): @pytest.mark.parametrize("sim_type", SIM_TYPES) def test_or_i1_boolean(sim_type): """Deterministic boolean OR: q0=1, q1=0 → or i1 true, false → true → X → 1.""" - check_result(OR_I1_QIR, "1", num_qubits=2, num_results=3, record=[2], sim_type=sim_type) + check_result( + OR_I1_QIR, "1", num_qubits=2, num_results=3, record=[2], sim_type=sim_type + ) # ========================================================================= @@ -1061,7 +1074,9 @@ def test_or_i1_boolean(sim_type): @pytest.mark.parametrize("sim_type", SIM_TYPES) def test_xor_i1_not(sim_type): """XOR i1 used as NOT: measure 0 → XOR true → true → X → 1.""" - check_result(XOR_NOT_QIR, "1", num_qubits=1, num_results=2, record=[1], sim_type=sim_type) + check_result( + XOR_NOT_QIR, "1", num_qubits=1, num_results=2, record=[1], sim_type=sim_type + ) # ######################################################################### @@ -1329,7 +1344,12 @@ def test_float_roundtrip(sim_type): @pytest.mark.parametrize("sim_type", SIM_TYPES) def test_call_with_return_value(sim_type): """Call a function returning i64, use result in comparison.""" - check_result(CALL_WITH_RETVAL_QIR, "1", extra_decls=CALL_WITH_RETVAL_QIR_FN, sim_type=sim_type) + check_result( + CALL_WITH_RETVAL_QIR, + "1", + extra_decls=CALL_WITH_RETVAL_QIR_FN, + sim_type=sim_type, + ) # ========================================================================= @@ -1495,7 +1515,12 @@ def test_sext_i1_runtime(sim_type): @pytest.mark.parametrize("sim_type", SIM_TYPES) def test_call_inttoptr_arg(sim_type): """Call a helper with an inttoptr constant expression argument.""" - check_result(CALL_INTTOPTR_ARG_QIR, "1", extra_decls=CALL_INTTOPTR_ARG_QIR_FN, sim_type=sim_type) + check_result( + CALL_INTTOPTR_ARG_QIR, + "1", + extra_decls=CALL_INTTOPTR_ARG_QIR_FN, + sim_type=sim_type, + ) # ========================================================================= @@ -1522,7 +1547,12 @@ def test_sitofp_negative(sim_type): # ######################################################################### -def _run_openqasm(qasm_src: str, shots: int = SHOTS, seed: int = 42, sim_type: str = "cpu"): +def _run_openqasm( + qasm_src: str, + shots: int = SHOTS, + seed: int = 42, + sim_type: Literal["clifford", "cpu"] = "cpu", +): """Compile OpenQASM source via the adaptive pass and run on the given simulator.""" qir = qsharp.openqasm.compile( qasm_src, diff --git a/source/pip/tests/test_adaptive_cpu_noise.py b/source/pip/tests/test_adaptive_cpu_noise.py index 4aec68ccf2..df829528e2 100644 --- a/source/pip/tests/test_adaptive_cpu_noise.py +++ b/source/pip/tests/test_adaptive_cpu_noise.py @@ -13,16 +13,16 @@ from collections import Counter from typing import Optional, List import pytest +from qsharp._simulation import run_qir, NoiseConfig, Result import qsharp.openqasm +from typing import Literal -from qsharp._simulation import run_qir, NoiseConfig, Result # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- SHOTS = 100 - SIM_TYPES = ["cpu", "clifford"] @@ -48,7 +48,7 @@ def get_histogram( noise: Optional[NoiseConfig] = None, record: Optional[List[int]] = None, shots=SHOTS, - sim_type: str = "cpu", + sim_type: Literal["clifford", "cpu"] = "cpu", ): qir = format_qir( qir_fragment, @@ -72,7 +72,7 @@ def check_result( num_results: int = 1, noise: Optional[NoiseConfig] = None, record: Optional[List[int]] = None, - sim_type: str = "cpu", + sim_type: Literal["clifford", "cpu"] = "cpu", ): """Assert every shot produces *expected*.""" counts = get_histogram( @@ -210,7 +210,9 @@ def test_z_noise_on_h_i_h_yields_1(sim_type): def test_probabilistic_x_noise(sim_type): noise = NoiseConfig() noise.cx.ix = 0.5 - counts = get_histogram(I_QIR, shots=1000, num_qubits=2, noise=noise, sim_type=sim_type) + counts = get_histogram( + I_QIR, shots=1000, num_qubits=2, noise=noise, sim_type=sim_type + ) assert counts["0"] > 400, f"Expected ~500 '0' results, got {counts['0']}" assert counts["1"] > 400, f"Expected ~500 '1' results, got {counts['1']}" diff --git a/source/pip/tests/test_adaptive_cpu_quantum_ops.py b/source/pip/tests/test_adaptive_cpu_quantum_ops.py index db34a07c5d..1296b510d2 100644 --- a/source/pip/tests/test_adaptive_cpu_quantum_ops.py +++ b/source/pip/tests/test_adaptive_cpu_quantum_ops.py @@ -13,10 +13,9 @@ """ from collections import Counter - import pytest - from qsharp._simulation import run_qir, Result +from typing import Literal SIM_TYPES = ["cpu", "clifford"] @@ -39,7 +38,12 @@ def map_result_list_to_str(results): return results_str -def _run(qir: str, shots: int, seed: int = 42, sim_type: str = "cpu"): +def _run( + qir: str, + shots: int, + seed: int = 42, + sim_type: Literal["clifford", "cpu"] = "cpu", +): """Run *qir* on the given simulator and return shot results as a list of strings.""" results = run_qir(qir, shots, seed=seed, type=sim_type) return [map_result_list_to_str(r) for r in results] From 37bb014bb7a18e2910bd8ebb492bcb14395803eb Mon Sep 17 00:00:00 2001 From: Oscar Puente Date: Thu, 2 Apr 2026 11:03:54 -0700 Subject: [PATCH 05/14] add back operands laoyout comment in _emit_switch --- source/pip/qsharp/_adaptive_pass.py | 1 + 1 file changed, 1 insertion(+) diff --git a/source/pip/qsharp/_adaptive_pass.py b/source/pip/qsharp/_adaptive_pass.py index 143dc50bde..214764d8c9 100644 --- a/source/pip/qsharp/_adaptive_pass.py +++ b/source/pip/qsharp/_adaptive_pass.py @@ -881,6 +881,7 @@ def _emit_switch(self, switch_instr: pyqir.Switch) -> None: reference when ``mod.functions`` has already been iterated (two-pass compilation). ``operands`` is not affected by this behavior. """ + # operands layout: [cond, default_block, case_val0, case_block0, ...] cond_reg = self._resolve_operand(switch_instr.operands[0]) default_block = self._block_to_id[switch_instr.default] case_offset = len(self.switch_cases) From c5cc017e7dc04362b3e913f452390d08ef3ed956 Mon Sep 17 00:00:00 2001 From: Oscar Puente Date: Thu, 2 Apr 2026 11:10:10 -0700 Subject: [PATCH 06/14] remove `s`, `s_adj`, and `z` noise overriding --- .../pip/src/qir_simulation/cpu_simulators.rs | 28 ++----------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/source/pip/src/qir_simulation/cpu_simulators.rs b/source/pip/src/qir_simulation/cpu_simulators.rs index dbcc663ebf..de5662bd97 100644 --- a/source/pip/src/qir_simulation/cpu_simulators.rs +++ b/source/pip/src/qir_simulation/cpu_simulators.rs @@ -155,7 +155,7 @@ fn run( num_results: u32, shots: u32, seed: Option, - mut noise: noise_config::NoiseConfig, + noise: noise_config::NoiseConfig, make_simulator: SimulatorBuilder, ) -> Vec where @@ -164,18 +164,6 @@ where Noise: From> + Send + Sync, S: Simulator, { - if !noise.rz.is_noiseless() { - if noise.s.is_noiseless() { - noise.s = noise.rz.clone(); - } - if noise.z.is_noiseless() { - noise.z = noise.rz.clone(); - } - if noise.s_adj.is_noiseless() { - noise.s_adj = noise.rz.clone(); - } - } - let noise: Noise = noise.into(); let noise = Arc::new(noise); @@ -366,7 +354,7 @@ fn run_adaptive( program: &bytecode::AdaptiveProgram, shots: u32, seed: Option, - mut noise: noise_config::NoiseConfig, + noise: noise_config::NoiseConfig, make_simulator: SimulatorBuilder, ) -> Vec where @@ -374,18 +362,6 @@ where Noise: From> + Send + Sync, S: Simulator, { - if !noise.rz.is_noiseless() { - if noise.s.is_noiseless() { - noise.s = noise.rz.clone(); - } - if noise.z.is_noiseless() { - noise.z = noise.rz.clone(); - } - if noise.s_adj.is_noiseless() { - noise.s_adj = noise.rz.clone(); - } - } - let noise: Noise = noise.into(); let noise = Arc::new(noise); From 20007aa4673d909b38d2544d2e83bb6f3b22ab3f Mon Sep 17 00:00:00 2001 From: Oscar Puente Date: Thu, 2 Apr 2026 11:25:00 -0700 Subject: [PATCH 07/14] make `VOID_RETURN` sentil value depend on bytecode word width --- source/pip/qsharp/_adaptive_bytecode.py | 3 --- source/pip/qsharp/_adaptive_pass.py | 9 ++++++++- source/simulators/src/bytecode/runtime.rs | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/source/pip/qsharp/_adaptive_bytecode.py b/source/pip/qsharp/_adaptive_bytecode.py index 876a0a196f..b86b8e2cda 100644 --- a/source/pip/qsharp/_adaptive_bytecode.py +++ b/source/pip/qsharp/_adaptive_bytecode.py @@ -127,6 +127,3 @@ REG_TYPE_F32 = 3 REG_TYPE_F64 = 4 REG_TYPE_PTR = 5 - -# ── Sentinel values ────────────────────────────────────────────────────────── -VOID_RETURN = 0xFFFFFFFF # Function does not have a return value. diff --git a/source/pip/qsharp/_adaptive_pass.py b/source/pip/qsharp/_adaptive_pass.py index 214764d8c9..504c3f5808 100644 --- a/source/pip/qsharp/_adaptive_pass.py +++ b/source/pip/qsharp/_adaptive_pass.py @@ -272,6 +272,13 @@ def encode_float_as_bits(val: float, bytecode_kind: Bytecode) -> int: return struct.unpack(" None: self.call_args.append(reg.val) # Allocate return register if function has non-void return type if call.type.is_void: - return_reg = VOID_RETURN # no return + return_reg = void_return(self._bytecode_kind) # no return else: return_reg = self._alloc_reg(call, REG_TYPE_I32) self._emit( diff --git a/source/simulators/src/bytecode/runtime.rs b/source/simulators/src/bytecode/runtime.rs index db0d0e6894..ef66368d36 100644 --- a/source/simulators/src/bytecode/runtime.rs +++ b/source/simulators/src/bytecode/runtime.rs @@ -144,7 +144,7 @@ const OPID_CY: u64 = 29; const OPID_CORRELATED_NOISE: u64 = 131; // Sentinel -const VOID_RETURN: u64 = 0xFFFF_FFFF; +const VOID_RETURN: u64 = 0xFFFF_FFFF_FFFF_FFFF; // --------------------------------------------------------------------------- // Runtime state From 11eac429e0ef757bd61fb0e769548a2fb7d398b2 Mon Sep 17 00:00:00 2001 From: Oscar Puente Date: Thu, 2 Apr 2026 11:52:43 -0700 Subject: [PATCH 08/14] cleanup runtime.rs --- .../pip/tests/test_adaptive_cpu_bytecode.py | 2 +- .../pip/tests/test_adaptive_gpu_bytecode.py | 2 +- source/simulators/src/bytecode/runtime.rs | 120 +++++++++--------- 3 files changed, 60 insertions(+), 64 deletions(-) diff --git a/source/pip/tests/test_adaptive_cpu_bytecode.py b/source/pip/tests/test_adaptive_cpu_bytecode.py index 29656d8b26..9118b27291 100644 --- a/source/pip/tests/test_adaptive_cpu_bytecode.py +++ b/source/pip/tests/test_adaptive_cpu_bytecode.py @@ -800,7 +800,7 @@ def test_zext(sim_type): SEXT_QIR = """ ; sext i1 true to i64 → -1 (all ones), check -1 < 0 → true %s = sext i1 true to i64 - %flag = icmp slt i64 %s, 0 + %flag = icmp eq i64 %s, -1 """ diff --git a/source/pip/tests/test_adaptive_gpu_bytecode.py b/source/pip/tests/test_adaptive_gpu_bytecode.py index 8cd2dcb93a..e7e1335e66 100644 --- a/source/pip/tests/test_adaptive_gpu_bytecode.py +++ b/source/pip/tests/test_adaptive_gpu_bytecode.py @@ -796,7 +796,7 @@ def test_zext(): SEXT_QIR = """ ; sext i1 true to i64 → -1 (all ones), check -1 < 0 → true %s = sext i1 true to i64 - %flag = icmp slt i64 %s, 0 + %flag = icmp eq i64 %s, -1 """ diff --git a/source/simulators/src/bytecode/runtime.rs b/source/simulators/src/bytecode/runtime.rs index ef66368d36..02a9ff420c 100644 --- a/source/simulators/src/bytecode/runtime.rs +++ b/source/simulators/src/bytecode/runtime.rs @@ -11,11 +11,7 @@ clippy::cast_possible_truncation, clippy::cast_possible_wrap, clippy::cast_precision_loss, - clippy::cast_sign_loss, - clippy::float_cmp, - clippy::match_same_arms, - clippy::single_match_else, - clippy::too_many_lines + clippy::cast_sign_loss )] use crate::{ @@ -230,44 +226,41 @@ fn dispatch_quantum_gate( let op = &program.quantum_ops[op_idx]; let op_id = op.op_id; - match op_id { - OPID_CORRELATED_NOISE => { - let qubit_count = rt.resolve_u64(instr.aux1, instr.opcode, 4) as usize; - let arg_offset = rt.resolve_u64(instr.aux2, instr.opcode, 5) as usize; - let table_id = op.q1 as u32; - let targets: Vec = (0..qubit_count) - .map(|i| rt.read_reg(program.call_args[arg_offset + i]) as usize) - .collect(); - sim.correlated_noise_intrinsic(table_id, &targets); - } - _ => { - let q1 = rt.resolve_u64(instr.aux1, instr.opcode, 4) as usize; - let q2 = rt.resolve_u64(instr.aux2, instr.opcode, 5) as usize; - let angle = op.angle; - match op_id { - OPID_X => sim.x(q1), - OPID_Y => sim.y(q1), - OPID_Z => sim.z(q1), - OPID_H => sim.h(q1), - OPID_S => sim.s(q1), - OPID_S_ADJ => sim.s_adj(q1), - OPID_T => sim.t(q1), - OPID_T_ADJ => sim.t_adj(q1), - OPID_SX => sim.sx(q1), - OPID_SX_ADJ => sim.sx_adj(q1), - OPID_RX => sim.rx(angle, q1), - OPID_RY => sim.ry(angle, q1), - OPID_RZ => sim.rz(angle, q1), - OPID_CX => sim.cx(q1, q2), - OPID_CY => sim.cy(q1, q2), - OPID_CZ => sim.cz(q1, q2), - OPID_RXX => sim.rxx(angle, q1, q2), - OPID_RYY => sim.ryy(angle, q1, q2), - OPID_RZZ => sim.rzz(angle, q1, q2), - OPID_SWAP => sim.swap(q1, q2), - OPID_MOVE => sim.mov(q1), - _ => panic!("unsupported quantum gate op_id={op_id}"), - } + if op_id == OPID_CORRELATED_NOISE { + let qubit_count = rt.resolve_u64(instr.aux1, instr.opcode, 4) as usize; + let arg_offset = rt.resolve_u64(instr.aux2, instr.opcode, 5) as usize; + let table_id = op.q1 as u32; + let targets: Vec = (0..qubit_count) + .map(|i| rt.read_reg(program.call_args[arg_offset + i]) as usize) + .collect(); + sim.correlated_noise_intrinsic(table_id, &targets); + } else { + let q1 = rt.resolve_u64(instr.aux1, instr.opcode, 4) as usize; + let q2 = rt.resolve_u64(instr.aux2, instr.opcode, 5) as usize; + let angle = op.angle; + match op_id { + OPID_X => sim.x(q1), + OPID_Y => sim.y(q1), + OPID_Z => sim.z(q1), + OPID_H => sim.h(q1), + OPID_S => sim.s(q1), + OPID_S_ADJ => sim.s_adj(q1), + OPID_T => sim.t(q1), + OPID_T_ADJ => sim.t_adj(q1), + OPID_SX => sim.sx(q1), + OPID_SX_ADJ => sim.sx_adj(q1), + OPID_RX => sim.rx(angle, q1), + OPID_RY => sim.ry(angle, q1), + OPID_RZ => sim.rz(angle, q1), + OPID_CX => sim.cx(q1, q2), + OPID_CY => sim.cy(q1, q2), + OPID_CZ => sim.cz(q1, q2), + OPID_RXX => sim.rxx(angle, q1, q2), + OPID_RYY => sim.ryy(angle, q1, q2), + OPID_RZZ => sim.rzz(angle, q1, q2), + OPID_SWAP => sim.swap(q1, q2), + OPID_MOVE => sim.mov(q1), + _ => panic!("unsupported quantum gate op_id={op_id}"), } } } @@ -310,6 +303,7 @@ fn dispatch_reset( // Main interpreter entry point // --------------------------------------------------------------------------- +#[allow(clippy::too_many_lines)] pub fn run_shot(program: &AdaptiveProgram, sim: &mut S) { const MAX_STEPS: u64 = 10_000_000; @@ -553,6 +547,7 @@ pub fn run_shot(program: &AdaptiveProgram, sim: &mut S) { OP_FCMP => { let a = rt.resolve_f64(instr.src0, flags, 0); let b = rt.resolve_f64(instr.src1, flags, 1); + #[allow(clippy::float_cmp)] let result = match subcode { FCMP_OEQ => a == b, FCMP_ONE => a != b, @@ -596,13 +591,25 @@ pub fn run_shot(program: &AdaptiveProgram, sim: &mut S) { } // ----- Type conversion ----- - OP_ZEXT => { + OP_ZEXT | OP_TRUNC | OP_INTTOPTR => { let val = rt.resolve_u64(instr.src0, flags, 0); rt.write_reg(instr.dst, val); rt.pc += 1; } OP_SEXT => { + // Sign-extend a narrower integer (src_bits wide) to a full i64. + // + // Uses the shift-left-then-arithmetic-shift-right trick: + // 1. Shift left by (64 - src_bits) to move the narrow sign bit + // into bit 63 (the i64 sign position). + // 2. Arithmetic shift right by the same amount to replicate the + // sign bit across all upper bits. + // + // Example: sext i1 true (value 1, src_bits=1) + // shift = 63 + // 1 << 63 = 0x8000..0 (sign bit set) + // >> 63 = 0xFFFF..F (-1 as i64) let val = rt.resolve_i64(instr.src0, flags, 0); let src_bits = instr.aux0 as u32; let result = if src_bits > 0 && src_bits < 64 { @@ -615,24 +622,12 @@ pub fn run_shot(program: &AdaptiveProgram, sim: &mut S) { rt.pc += 1; } - OP_TRUNC => { - let val = rt.resolve_u64(instr.src0, flags, 0); - rt.write_reg(instr.dst, val); - rt.pc += 1; - } - OP_FPEXT | OP_FPTRUNC => { let val = rt.resolve_f64(instr.src0, flags, 0); rt.write_f64(instr.dst, val); rt.pc += 1; } - OP_INTTOPTR => { - let val = rt.resolve_u64(instr.src0, flags, 0); - rt.write_reg(instr.dst, val); - rt.pc += 1; - } - OP_FPTOSI => { let val = rt.resolve_f64(instr.src0, flags, 0); rt.write_reg(instr.dst, val as i64 as u64); @@ -661,6 +656,13 @@ pub fn run_shot(program: &AdaptiveProgram, sim: &mut S) { } // ----- Data movement ----- + #[allow(clippy::match_same_arms)] + OP_MOV => { + let val = rt.resolve_u64(instr.src0, flags, 0); + rt.write_reg(instr.dst, val); + rt.pc += 1; + } + OP_SELECT => { let cond = rt.resolve_u64(instr.src0, flags, 0) != 0; let true_val = rt.resolve_u64(instr.aux0, flags, 3); @@ -669,12 +671,6 @@ pub fn run_shot(program: &AdaptiveProgram, sim: &mut S) { rt.pc += 1; } - OP_MOV => { - let val = rt.resolve_u64(instr.src0, flags, 0); - rt.write_reg(instr.dst, val); - rt.pc += 1; - } - OP_CONST => { rt.write_reg(instr.dst, instr.src0); rt.pc += 1; From 78134d3ef65511d92bf4a795dcf70f6a06508044 Mon Sep 17 00:00:00 2001 From: Oscar Puente Date: Thu, 2 Apr 2026 15:39:38 -0700 Subject: [PATCH 09/14] move `z`, `s`, `s_adj` inherit-noise-from-rz logic to device specific code --- source/pip/qsharp/_device/_atom/__init__.py | 19 ++++++++++ source/pip/qsharp/_native.pyi | 5 +++ source/pip/src/qir_simulation.rs | 10 ++++-- .../devices/test_atom_e2e.py | 35 +++++++++++++++++++ source/pip/tests/test_clifford_simulator.py | 32 ----------------- source/pip/tests/test_noisy_config.py | 9 +++-- 6 files changed, 73 insertions(+), 37 deletions(-) diff --git a/source/pip/qsharp/_device/_atom/__init__.py b/source/pip/qsharp/_device/_atom/__init__.py index 6a8b0ae80c..2fb6d16bd4 100644 --- a/source/pip/qsharp/_device/_atom/__init__.py +++ b/source/pip/qsharp/_device/_atom/__init__.py @@ -258,6 +258,25 @@ def simulate( if noise is None: noise = NoiseConfig() + # Override s, s_adj, and z noise if they are unset + # and rz noise is set. + if noise and not noise.rz.is_noiseless(): + if noise.s.is_noiseless(): + noise.s.x = noise.rz.x + noise.s.y = noise.rz.y + noise.s.z = noise.rz.z + noise.s.loss = noise.rz.loss + if noise.s_adj.is_noiseless(): + noise.s_adj.x = noise.rz.x + noise.s_adj.y = noise.rz.y + noise.s_adj.z = noise.rz.z + noise.s_adj.loss = noise.rz.loss + if noise.z.is_noiseless(): + noise.z.x = noise.rz.x + noise.z.y = noise.rz.y + noise.z.z = noise.rz.z + noise.z.loss = noise.rz.loss + compiled = self.compile(qir) module = Module.from_ir(Context(), str(compiled)) ValidateNoConditionalBranches().run(module) diff --git a/source/pip/qsharp/_native.pyi b/source/pip/qsharp/_native.pyi index 2d059e83ad..0dc20ae804 100644 --- a/source/pip/qsharp/_native.pyi +++ b/source/pip/qsharp/_native.pyi @@ -896,6 +896,11 @@ class NoiseTable: The phase flip noise to use in simulation. """ + def is_noiseless(self) -> bool: + """ + Returns `true` if there is no noise set. + """ + class NoiseIntrinsicsTable: def __contains__(self, name: str) -> bool: """ diff --git a/source/pip/src/qir_simulation.rs b/source/pip/src/qir_simulation.rs index 27f360c92c..28be031f63 100644 --- a/source/pip/src/qir_simulation.rs +++ b/source/pip/src/qir_simulation.rs @@ -418,9 +418,9 @@ impl NoiseTable { if let Some(p) = self.pauli_noise.get(&key) { return Ok(*p); } - Err(PyAttributeError::new_err(format!( - "'NoiseTable' object has no attribute '{pauli}'", - ))) + // If pauli string is valid but is not in the noise table + // it means it has not been set. Just return 0 in this case. + Ok(0.0) } /// Set the probability of noise for an element on the [`NoiseTable`] @@ -610,6 +610,10 @@ or one argument of type 'list[tuple[str, float]]', but found {py_args:?}" pub fn set_phaseflip(&mut self, value: Probability) -> PyResult<()> { self.set_pauli_noise_elt("Z", value) } + + pub fn is_noiseless(&self) -> PyResult { + Ok(self.pauli_noise.is_empty() && self.loss == 0.0) + } } impl From for qdk_simulators::noise_config::NoiseTable { diff --git a/source/pip/tests-integration/devices/test_atom_e2e.py b/source/pip/tests-integration/devices/test_atom_e2e.py index 753f93d784..258f17873a 100644 --- a/source/pip/tests-integration/devices/test_atom_e2e.py +++ b/source/pip/tests-integration/devices/test_atom_e2e.py @@ -158,3 +158,38 @@ def test_device_simulate_with_loss() -> None: assert result == [[qsharp.Result.Loss, qsharp.Result.Loss]] assert result2 == [[qsharp.Result.Loss, qsharp.Result.Loss]] + + +def test_s_noise_inherits_from_rz(): + qsharp.init(target_profile=qsharp.TargetProfile.Base) + qsharp.eval("operation Main() : Result { use q = Qubit(); S(q); MResetZ(q) }") + ir = qsharp.compile("Main()") + noise = NoiseConfig() + noise.rz.x = 1.0 + device = NeutralAtomDevice() + output = device.simulate(ir, 1, noise) + assert output == [qsharp.Result.One] + + +def test_z_noise_inherits_from_rz(): + qsharp.init(target_profile=qsharp.TargetProfile.Base) + qsharp.eval("operation Main() : Result { use q = Qubit(); Z(q); MResetZ(q) }") + ir = qsharp.compile("Main()") + noise = NoiseConfig() + noise.rz.x = 1.0 + device = NeutralAtomDevice() + output = device.simulate(ir, 1, noise) + assert output == [qsharp.Result.One] + + +def test_s_adj_noise_inherits_from_rz(): + qsharp.init(target_profile=qsharp.TargetProfile.Base) + qsharp.eval( + "operation Main() : Result { use q = Qubit(); Adjoint S(q); MResetZ(q) }" + ) + ir = qsharp.compile("Main()") + noise = NoiseConfig() + noise.rz.x = 1.0 + device = NeutralAtomDevice() + output = device.simulate(ir, 1, noise) + assert output == [qsharp.Result.One] diff --git a/source/pip/tests/test_clifford_simulator.py b/source/pip/tests/test_clifford_simulator.py index 76acfd934e..40a1ddaffa 100644 --- a/source/pip/tests/test_clifford_simulator.py +++ b/source/pip/tests/test_clifford_simulator.py @@ -69,38 +69,6 @@ def test_million(): print(output) -def test_s_noise_inherits_from_rz(): - qsharp.init(target_profile=TargetProfile.Base) - qsharp.eval("operation Main() : Result { use q = Qubit(); S(q); MResetZ(q) }") - ir = qsharp.compile("Main()") - noise = NoiseConfig() - noise.rz.x = 1.0 - output = run_qir_clifford(str(ir), 1, noise) - assert output == [Result.One] - - -def test_z_noise_inherits_from_rz(): - qsharp.init(target_profile=TargetProfile.Base) - qsharp.eval("operation Main() : Result { use q = Qubit(); Z(q); MResetZ(q) }") - ir = qsharp.compile("Main()") - noise = NoiseConfig() - noise.rz.x = 1.0 - output = run_qir_clifford(str(ir), 1, noise) - assert output == [Result.One] - - -def test_s_adj_noise_inherits_from_rz(): - qsharp.init(target_profile=TargetProfile.Base) - qsharp.eval( - "operation Main() : Result { use q = Qubit(); Adjoint S(q); MResetZ(q) }" - ) - ir = qsharp.compile("Main()") - noise = NoiseConfig() - noise.rz.x = 1.0 - output = run_qir_clifford(str(ir), 1, noise) - assert output == [Result.One] - - def test_program_with_branching_succeeds(): qsharp.init(target_profile=TargetProfile.Adaptive_RI) qsharp.eval( diff --git a/source/pip/tests/test_noisy_config.py b/source/pip/tests/test_noisy_config.py index 7bb46eadea..1042b21f31 100644 --- a/source/pip/tests/test_noisy_config.py +++ b/source/pip/tests/test_noisy_config.py @@ -5,6 +5,11 @@ import pytest +def test_accessing_unset_valid_pauli(): + noise = NoiseConfig() + assert noise.h.x == 0 + + def test_setting_1q_noise(): noise = NoiseConfig() noise.h.set_pauli_noise("X", 0.01) @@ -96,10 +101,10 @@ def test_setting_non_valid_pauli_through_attr_errors(): noise.h.w = 0.01 -def test_accessing_non_set_pauli_attr_errors(): +def test_accessing_invalid_pauli_attr_errors(): noise = NoiseConfig() with pytest.raises(AttributeError): - noise.h.x + noise.h.w def test_accessing_non_valid_pauli_attr_errors(): From 4c32c7275f64ca0f53bdb5fa2199e980e04ecffc Mon Sep 17 00:00:00 2001 From: Oscar Puente Date: Tue, 7 Apr 2026 13:05:33 -0700 Subject: [PATCH 10/14] fix dynamic angles --- source/pip/qsharp/_adaptive_pass.py | 5 +- source/pip/qsharp/_native.pyi | 2 +- source/pip/qsharp/_simulation.py | 67 +++---- source/pip/src/qir_simulation.rs | 2 +- .../pip/src/qir_simulation/gpu_full_state.rs | 2 +- .../tests/test_adaptive_cpu_quantum_ops.py | 60 ++++++ .../tests/test_adaptive_gpu_quantum_ops.py | 174 ++++++++++-------- source/simulators/src/bytecode.rs | 4 +- source/simulators/src/bytecode/runtime.rs | 2 +- .../src/gpu_full_state_simulator/common.wgsl | 2 + .../gpu_full_state_simulator/shader_types.rs | 2 +- .../simulator_adaptive.wgsl | 72 ++++++++ 12 files changed, 265 insertions(+), 129 deletions(-) diff --git a/source/pip/qsharp/_adaptive_pass.py b/source/pip/qsharp/_adaptive_pass.py index 504c3f5808..eac4e14f33 100644 --- a/source/pip/qsharp/_adaptive_pass.py +++ b/source/pip/qsharp/_adaptive_pass.py @@ -167,7 +167,7 @@ class QuantumOp: q1: int q2: int q3: int - angle: float + angle: int @dataclass @@ -423,7 +423,7 @@ def _emit_quantum_op( q1: int = 0, q2: int = 0, q3: int = 0, - angle: float = 0.0, + angle: int = 0, ) -> int: idx = self._next_qop self._next_qop += 1 @@ -784,6 +784,7 @@ def _emit_quantum_call(self, call: pyqir.Call) -> None: qop_idx = self._emit_quantum_op(op_id, q1.val, q2.val, q3.val, angle.val) self._emit( OP_QUANTUM_GATE, + src0=angle, aux0=qop_idx, aux1=q1, aux2=q2, diff --git a/source/pip/qsharp/_native.pyi b/source/pip/qsharp/_native.pyi index 0dc20ae804..e52158aabd 100644 --- a/source/pip/qsharp/_native.pyi +++ b/source/pip/qsharp/_native.pyi @@ -1064,9 +1064,9 @@ def try_create_gpu_adapter() -> str: def run_parallel_shots( input: List[QirInstruction], - shots: int, qubit_count: int, result_count: int, + shots: int, noise: Optional[NoiseConfig], seed: Optional[int], ) -> List[str]: diff --git a/source/pip/qsharp/_simulation.py b/source/pip/qsharp/_simulation.py index 7a3aae40af..b4df87f577 100644 --- a/source/pip/qsharp/_simulation.py +++ b/source/pip/qsharp/_simulation.py @@ -504,6 +504,30 @@ def str_to_result(result: str): raise ValueError(f"Invalid result {result}") +def run_base( + rust_run_base_fn: Callable, + mod: pyqir.Module, + shots: int, + noise: Optional[NoiseConfig], + seed: int, +): + """ + Runs a base profile program given a rust simulator. Adds output recording logic. + """ + if noise is None: + (gates, num_qubits, num_results) = AggregateGatesPass().run(mod) + else: + (gates, num_qubits, num_results) = CorrelatedNoisePass(noise).run(mod) + recorder = OutputRecordingPass() + recorder.run(mod) + return list( + map( + recorder.process_output, + rust_run_base_fn(gates, num_qubits, num_results, shots, noise, seed), + ) + ) + + def run_adaptive( rust_run_adaptive_fn: Callable, program: AdaptiveProgram, @@ -512,7 +536,7 @@ def run_adaptive( seed: int, ): """ - Runs an adaptive program given a rust simulator. Adds output recording logic. + Runs an adaptive profile program given a rust simulator. Adds output recording logic. """ results = rust_run_adaptive_fn(program.as_dict(), shots, noise, seed) # Extract recorded output result indices from the bytecode. @@ -540,19 +564,7 @@ def run_qir_clifford( program = AdaptiveProfilePass(Bytecode.Bit64).run(mod, noise) return run_adaptive(run_clifford_adaptive, program, shots, noise, seed) else: - if noise is None: - (gates, num_qubits, num_results) = AggregateGatesPass().run(mod) - else: - (gates, num_qubits, num_results) = CorrelatedNoisePass(noise).run(mod) - recorder = OutputRecordingPass() - recorder.run(mod) - - return list( - map( - recorder.process_output, - run_clifford(gates, num_qubits, num_results, shots, noise, seed), - ) - ) + return run_base(run_clifford, mod, shots, noise, seed) def run_qir_cpu( @@ -566,19 +578,7 @@ def run_qir_cpu( program = AdaptiveProfilePass(Bytecode.Bit64).run(mod, noise) return run_adaptive(run_cpu_adaptive, program, shots, noise, seed) else: - if noise is None: - (gates, num_qubits, num_results) = AggregateGatesPass().run(mod) - else: - (gates, num_qubits, num_results) = CorrelatedNoisePass(noise).run(mod) - recorder = OutputRecordingPass() - recorder.run(mod) - - return list( - map( - recorder.process_output, - run_cpu_full_state(gates, num_qubits, num_results, shots, noise, seed), - ) - ) + return run_base(run_cpu_full_state, mod, shots, noise, seed) def run_qir_gpu( @@ -594,18 +594,7 @@ def run_qir_gpu( program = AdaptiveProfilePass(Bytecode.Bit32).run(mod, noise) return run_adaptive(run_adaptive_parallel_shots, program, shots, noise, seed) else: - if noise is None: - (gates, num_qubits, num_results) = AggregateGatesPass().run(mod) - else: - (gates, num_qubits, num_results) = CorrelatedNoisePass(noise).run(mod) - recorder = OutputRecordingPass() - recorder.run(mod) - return list( - map( - recorder.process_output, - run_parallel_shots(gates, shots, num_qubits, num_results, noise, seed), - ) - ) + return run_base(run_parallel_shots, mod, shots, noise, seed) def prepare_qir_with_correlated_noise( diff --git a/source/pip/src/qir_simulation.rs b/source/pip/src/qir_simulation.rs index 28be031f63..555d97798d 100644 --- a/source/pip/src/qir_simulation.rs +++ b/source/pip/src/qir_simulation.rs @@ -776,7 +776,7 @@ where type BlockTuple = (W, W, W); type InsTuple = (W, W, W, W, W, W, W, W); - type OpTuple = (W, W, W, W, f64); + type OpTuple = (W, W, W, W, W); type FunTuple = (W, W, W); type PhiTuple = (W, W); type SwitchTuple = (W, W); diff --git a/source/pip/src/qir_simulation/gpu_full_state.rs b/source/pip/src/qir_simulation/gpu_full_state.rs index fc30a822ca..e0aa4ac7be 100644 --- a/source/pip/src/qir_simulation/gpu_full_state.rs +++ b/source/pip/src/qir_simulation/gpu_full_state.rs @@ -38,9 +38,9 @@ pub fn try_create_gpu_adapter() -> PyResult { pub fn run_parallel_shots<'py>( py: Python<'py>, input: &Bound<'py, PyList>, - shots: i32, qubit_count: i32, result_count: i32, + shots: i32, noise_config: Option<&Bound<'py, NoiseConfig>>, seed: Option, ) -> PyResult> { diff --git a/source/pip/tests/test_adaptive_cpu_quantum_ops.py b/source/pip/tests/test_adaptive_cpu_quantum_ops.py index 1296b510d2..dcc4f7323e 100644 --- a/source/pip/tests/test_adaptive_cpu_quantum_ops.py +++ b/source/pip/tests/test_adaptive_cpu_quantum_ops.py @@ -375,3 +375,63 @@ def test_teleport_chain_histogram(sim_type): assert count_00 > 4000, f"Expected ~5000 '00' results, got {count_00}" assert count_11 > 4000, f"Expected ~5000 '11' results, got {count_11}" assert count_00 + count_11 == 10000, "All shots should produce a result" + + +DYNAMIC_ROTATION_ANGLE_QIR = r""" +%Result = type opaque +%Qubit = type opaque + +@0 = internal constant [4 x i8] c"0_r\00" + +define i64 @ENTRYPOINT__main() #0 { +block_0: + call void @__quantum__rt__initialize(i8* null) + call void @__quantum__qis__h__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) + %var_1 = call i1 @__quantum__rt__read_result(%Result* inttoptr (i64 0 to %Result*)) + %var_2 = icmp eq i1 %var_1, false + br i1 %var_2, label %block_1, label %block_2 +block_1: + br label %block_3 +block_2: + br label %block_3 +block_3: + %var_3 = phi double [0.5, %block_1], [1.0, %block_2] + call void @__quantum__qis__rx__body(double %var_3, %Qubit* inttoptr (i64 1 to %Qubit*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 1 to %Qubit*), %Result* inttoptr (i64 1 to %Result*)) + call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 1 to %Result*), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @0, i64 0, i64 0)) + ret i64 0 +} + +declare void @__quantum__rt__initialize(i8*) +declare void @__quantum__qis__h__body(%Qubit*) +declare void @__quantum__qis__mresetz__body(%Qubit*, %Result*) #1 +declare i1 @__quantum__rt__read_result(%Result*) +declare void @__quantum__qis__rx__body(double, %Qubit*) +declare void @__quantum__rt__result_record_output(%Result*, i8*) + +attributes #0 = { "entry_point" "output_labeling_schema" "qir_profiles"="adaptive_profile" "required_num_qubits"="2" "required_num_results"="2" } +attributes #1 = { "irreversible" } + +!llvm.module.flags = !{!0, !1, !2, !3, !4, !5} + +!0 = !{i32 1, !"qir_major_version", i32 1} +!1 = !{i32 7, !"qir_minor_version", i32 0} +!2 = !{i32 1, !"dynamic_qubit_management", i1 false} +!3 = !{i32 1, !"dynamic_result_management", i1 false} +!4 = !{i32 5, !"int_computations", !{!"i64"}} +!5 = !{i32 5, !"float_computations", !{!"double"}} +""" + + +def test_dynamic_rotation_angle(): + results = _run(DYNAMIC_ROTATION_ANGLE_QIR, shots=10_000, seed=42, sim_type="cpu") + assert len(results) == 10_000 + + counts = Counter(results) + count_0 = counts.get("0", 0) + count_1 = counts.get("1", 0) + + assert count_1 > 1400, f"Expected ~15% '1' results, got {count_1}" + assert count_0 > 8400, f"Expected ~85% '0' results, got {count_0}" + assert count_0 + count_1 == 10_000, "All shots should produce a result" diff --git a/source/pip/tests/test_adaptive_gpu_quantum_ops.py b/source/pip/tests/test_adaptive_gpu_quantum_ops.py index 13b0bc0a1e..5befd7a4c8 100644 --- a/source/pip/tests/test_adaptive_gpu_quantum_ops.py +++ b/source/pip/tests/test_adaptive_gpu_quantum_ops.py @@ -37,6 +37,18 @@ from qsharp._simulation import GpuSimulator +# Acquiring the GPU resources takes time, so we acquire them once and use them +# for all the tests. This is fine since pytest runs tests sequencially. +sim = GpuSimulator() + + +def run_shots(qir: str, shots: int = 10_000, seed: int = 42): + """Run *qir* on the GPU and return the shot_results list.""" + global sim + sim.set_program(qir) + return sim.run_shots(shots, seed=seed) + + # --------------------------------------------------------------------------- # QIR source # --------------------------------------------------------------------------- @@ -124,12 +136,12 @@ def test_measure_and_correct_histogram(): Run 10000 shots and verify ~50/50 split of "0" and "1" outcomes. The measurement result records whether H collapsed to |1⟩ (then X corrects). """ - sim = GpuSimulator() - sim.set_program(MEASURE_AND_CORRECT_QIR) - results = sim.run_shots(10000, seed=42) - + results = run_shots(MEASURE_AND_CORRECT_QIR) shot_results = results["shot_results"] assert len(shot_results) == 10000 + assert all( + code == 0 for code in results["shot_result_codes"] + ), f"Some shots had non-zero error codes: {[c for c in results['shot_result_codes'] if c != 0]}" counts = Counter(shot_results) # Each shot produces a single-bit result string: "0" or "1" @@ -142,19 +154,6 @@ def test_measure_and_correct_histogram(): assert count_0 + count_1 == 10000, "All shots should produce a result" -@pytest.mark.skipif(not GPU_AVAILABLE, reason=SKIP_REASON) -def test_measure_and_correct_no_errors(): - """Example 1: All shots should complete without GPU errors.""" - sim = GpuSimulator() - sim.set_program(MEASURE_AND_CORRECT_QIR) - results = sim.run_shots(1000, seed=123) - - shot_result_codes = results["shot_result_codes"] - assert all( - code == 0 for code in shot_result_codes - ), f"Some shots had non-zero error codes: {[c for c in shot_result_codes if c != 0]}" - - @pytest.mark.skipif(not GPU_AVAILABLE, reason=SKIP_REASON) def test_conditional_loop_all_results_are_one(): """Example 3: The loop exits only when measurement yields 1. @@ -163,12 +162,12 @@ def test_conditional_loop_all_results_are_one(): until that outcome. """ shots = 5000 - sim = GpuSimulator() - sim.set_program(CONDITIONAL_LOOP_QIR) - results = sim.run_shots(shots, seed=99) - + results = run_shots(CONDITIONAL_LOOP_QIR, shots=shots) shot_results = results["shot_results"] assert len(shot_results) == shots + assert all( + code == 0 for code in results["shot_result_codes"] + ), f"Some shots had non-zero error codes: {[c for c in results['shot_result_codes'] if c != 0]}" counts = Counter(shot_results) # Every shot should exit with result "1" @@ -177,19 +176,6 @@ def test_conditional_loop_all_results_are_one(): ), f"Expected all {shots} shots to produce '1', got counts: {counts}" -@pytest.mark.skipif(not GPU_AVAILABLE, reason=SKIP_REASON) -def test_conditional_loop_no_errors(): - """Example 3: All shots should complete without GPU errors.""" - sim = GpuSimulator() - sim.set_program(CONDITIONAL_LOOP_QIR) - results = sim.run_shots(1000, seed=456) - - shot_result_codes = results["shot_result_codes"] - assert all( - code == 0 for code in shot_result_codes - ), f"Some shots had non-zero error codes: {[c for c in shot_result_codes if c != 0]}" - - # Example 2: Loop with phi node — GHZ state preparation # Applies H to qubit 0, then loops from i=1 to 4, # applying CNOT(q0, q_i) in each iteration using a phi node @@ -295,12 +281,12 @@ def test_loop_with_phi_ghz_histogram(): Creates (|00000⟩ + |11111⟩)/√2. All 5 measurements must agree. Run 10000 shots and verify only "00000" and "11111" appear near 50/50. """ - sim = GpuSimulator() - sim.set_program(LOOP_WITH_PHI_QIR) - results = sim.run_shots(10000, seed=42) - + results = run_shots(LOOP_WITH_PHI_QIR) shot_results = results["shot_results"] assert len(shot_results) == 10000 + assert all( + code == 0 for code in results["shot_result_codes"] + ), f"Some shots had non-zero error codes: {[c for c in results['shot_result_codes'] if c != 0]}" counts = Counter(shot_results) # Only "00000" and "11111" should appear @@ -317,19 +303,6 @@ def test_loop_with_phi_ghz_histogram(): assert count_00000 + count_11111 == 10000, "All shots should produce a result" -@pytest.mark.skipif(not GPU_AVAILABLE, reason=SKIP_REASON) -def test_loop_with_phi_no_errors(): - """Example 2: All shots should complete without GPU errors.""" - sim = GpuSimulator() - sim.set_program(LOOP_WITH_PHI_QIR) - results = sim.run_shots(1000, seed=123) - - shot_result_codes = results["shot_result_codes"] - assert all( - code == 0 for code in shot_result_codes - ), f"Some shots had non-zero error codes: {[c for c in shot_result_codes if c != 0]}" - - # --------------------------------------------------------------------------- # Tests — Example 4: Boolean computation (AND gate) # --------------------------------------------------------------------------- @@ -342,12 +315,12 @@ def test_boolean_computation_histogram(): r2=1 only when both r0=1 AND r1=1 (~25% of shots). Run 10000 shots and verify ~25% "1" and ~75% "0". """ - sim = GpuSimulator() - sim.set_program(BOOLEAN_COMPUTATION_QIR) - results = sim.run_shots(10000, seed=42) - + results = run_shots(BOOLEAN_COMPUTATION_QIR) shot_results = results["shot_results"] assert len(shot_results) == 10000 + assert all( + code == 0 for code in results["shot_result_codes"] + ), f"Some shots had non-zero error codes: {[c for c in results['shot_result_codes'] if c != 0]}" counts = Counter(shot_results) count_0 = counts.get("0", 0) @@ -358,19 +331,6 @@ def test_boolean_computation_histogram(): assert count_0 + count_1 == 10000, "All shots should produce a result" -@pytest.mark.skipif(not GPU_AVAILABLE, reason=SKIP_REASON) -def test_boolean_computation_no_errors(): - """Example 4: All shots should complete without GPU errors.""" - sim = GpuSimulator() - sim.set_program(BOOLEAN_COMPUTATION_QIR) - results = sim.run_shots(1000, seed=456) - - shot_result_codes = results["shot_result_codes"] - assert all( - code == 0 for code in shot_result_codes - ), f"Some shots had non-zero error codes: {[c for c in shot_result_codes if c != 0]}" - - # --------------------------------------------------------------------------- # QIR fixture — Example 5: Teleport chain # --------------------------------------------------------------------------- @@ -456,12 +416,12 @@ def test_teleport_chain_histogram(): Final measurements of q0 and q4 (results 2 and 3, labeled "0_t0" and "0_t1") should be correlated: both "0" or both "1", near 50/50. """ - sim = GpuSimulator() - sim.set_program(TELEPORT_CHAIN_QIR) - results = sim.run_shots(10000, seed=42) - + results = run_shots(TELEPORT_CHAIN_QIR) shot_results = results["shot_results"] assert len(shot_results) == 10000 + assert all( + code == 0 for code in results["shot_result_codes"] + ), f"Some shots had non-zero error codes: {[c for c in results['shot_result_codes'] if c != 0]}" counts = Counter(shot_results) # Only "00" and "11" should appear (results 4 and 5 are correlated) @@ -478,14 +438,66 @@ def test_teleport_chain_histogram(): assert count_00 + count_11 == 10000, "All shots should produce a result" -@pytest.mark.skipif(not GPU_AVAILABLE, reason=SKIP_REASON) -def test_teleport_chain_no_errors(): - """Example 5: All shots should complete without GPU errors.""" - sim = GpuSimulator() - sim.set_program(TELEPORT_CHAIN_QIR) - results = sim.run_shots(1000, seed=789) +DYNAMIC_ROTATION_ANGLE_QIR = r""" +%Result = type opaque +%Qubit = type opaque + +@0 = internal constant [4 x i8] c"0_r\00" + +define i64 @ENTRYPOINT__main() #0 { +block_0: + call void @__quantum__rt__initialize(i8* null) + call void @__quantum__qis__h__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) + %var_1 = call i1 @__quantum__rt__read_result(%Result* inttoptr (i64 0 to %Result*)) + %var_2 = icmp eq i1 %var_1, false + br i1 %var_2, label %block_1, label %block_2 +block_1: + br label %block_3 +block_2: + br label %block_3 +block_3: + %var_3 = phi double [0.5, %block_1], [1.0, %block_2] + call void @__quantum__qis__rx__body(double %var_3, %Qubit* inttoptr (i64 1 to %Qubit*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 1 to %Qubit*), %Result* inttoptr (i64 1 to %Result*)) + call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 1 to %Result*), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @0, i64 0, i64 0)) + ret i64 0 +} + +declare void @__quantum__rt__initialize(i8*) +declare void @__quantum__qis__h__body(%Qubit*) +declare void @__quantum__qis__mresetz__body(%Qubit*, %Result*) #1 +declare i1 @__quantum__rt__read_result(%Result*) +declare void @__quantum__qis__rx__body(double, %Qubit*) +declare void @__quantum__rt__result_record_output(%Result*, i8*) + +attributes #0 = { "entry_point" "output_labeling_schema" "qir_profiles"="adaptive_profile" "required_num_qubits"="2" "required_num_results"="2" } +attributes #1 = { "irreversible" } + +!llvm.module.flags = !{!0, !1, !2, !3, !4, !5} + +!0 = !{i32 1, !"qir_major_version", i32 1} +!1 = !{i32 7, !"qir_minor_version", i32 0} +!2 = !{i32 1, !"dynamic_qubit_management", i1 false} +!3 = !{i32 1, !"dynamic_result_management", i1 false} +!4 = !{i32 5, !"int_computations", !{!"i64"}} +!5 = !{i32 5, !"float_computations", !{!"double"}} +""" + - shot_result_codes = results["shot_result_codes"] +@pytest.mark.skipif(not GPU_AVAILABLE, reason=SKIP_REASON) +def test_dynamic_rotation_angle(): + results = run_shots(DYNAMIC_ROTATION_ANGLE_QIR) + shot_results = results["shot_results"] + assert len(shot_results) == 10_000 assert all( - code == 0 for code in shot_result_codes - ), f"Some shots had non-zero error codes: {[c for c in shot_result_codes if c != 0]}" + code == 0 for code in results["shot_result_codes"] + ), f"Some shots had non-zero error codes: {[c for c in results['shot_result_codes'] if c != 0]}" + + counts = Counter(shot_results) + count_0 = counts.get("0", 0) + count_1 = counts.get("1", 0) + + assert count_1 > 1400, f"Expected ~15% '1' results, got {count_1}" + assert count_0 > 8400, f"Expected ~85% '0' results, got {count_0}" + assert count_0 + count_1 == 10_000, "All shots should produce a result" diff --git a/source/simulators/src/bytecode.rs b/source/simulators/src/bytecode.rs index afc77f5685..c477b8abc3 100644 --- a/source/simulators/src/bytecode.rs +++ b/source/simulators/src/bytecode.rs @@ -209,12 +209,12 @@ pub struct Op { pub q1: Word, pub q2: Word, pub q3: Word, - pub angle: f64, + pub angle: Word, } impl Op { #[must_use] - pub fn from_tuple(t: (Word, Word, Word, Word, f64)) -> Self { + pub fn from_tuple(t: (Word, Word, Word, Word, Word)) -> Self { Self { op_id: t.0, q1: t.1, diff --git a/source/simulators/src/bytecode/runtime.rs b/source/simulators/src/bytecode/runtime.rs index 02a9ff420c..dfbe161a84 100644 --- a/source/simulators/src/bytecode/runtime.rs +++ b/source/simulators/src/bytecode/runtime.rs @@ -235,9 +235,9 @@ fn dispatch_quantum_gate( .collect(); sim.correlated_noise_intrinsic(table_id, &targets); } else { + let angle = rt.resolve_f64(instr.src0, instr.opcode, 0); let q1 = rt.resolve_u64(instr.aux1, instr.opcode, 4) as usize; let q2 = rt.resolve_u64(instr.aux2, instr.opcode, 5) as usize; - let angle = op.angle; match op_id { OPID_X => sim.x(q1), OPID_Y => sim.y(q1), diff --git a/source/simulators/src/gpu_full_state_simulator/common.wgsl b/source/simulators/src/gpu_full_state_simulator/common.wgsl index 769c7ee2ca..907dae91d8 100644 --- a/source/simulators/src/gpu_full_state_simulator/common.wgsl +++ b/source/simulators/src/gpu_full_state_simulator/common.wgsl @@ -34,6 +34,8 @@ const OPID_S = 6u; const OPID_SAdj = 7u; const OPID_T = 8u; const OPID_TAdj = 9u; +const OPID_RX = 12u; +const OPID_RY = 13u; const OPID_RZ = 14u; const OPID_CX = 15u; const OPID_CZ = 16u; diff --git a/source/simulators/src/gpu_full_state_simulator/shader_types.rs b/source/simulators/src/gpu_full_state_simulator/shader_types.rs index 5a45c7c0ed..6c1e66c468 100644 --- a/source/simulators/src/gpu_full_state_simulator/shader_types.rs +++ b/source/simulators/src/gpu_full_state_simulator/shader_types.rs @@ -1113,7 +1113,7 @@ pub fn build_op_pool(compact_ops: &[bytecode::Op]) -> Vec { angle, }| { #[allow(clippy::cast_possible_truncation)] - let angle_f32 = angle as f32; + let angle_f32 = f32::from_bits(angle); match op_id { ops::ID => Op::new_id_gate(q1), ops::RESETZ => Op::new_resetz_gate(q1), diff --git a/source/simulators/src/gpu_full_state_simulator/simulator_adaptive.wgsl b/source/simulators/src/gpu_full_state_simulator/simulator_adaptive.wgsl index 47b9af7b50..56f455558e 100644 --- a/source/simulators/src/gpu_full_state_simulator/simulator_adaptive.wgsl +++ b/source/simulators/src/gpu_full_state_simulator/simulator_adaptive.wgsl @@ -484,6 +484,15 @@ fn resolve_q2(shot_idx: u32) -> u32 { return read_reg(shot_idx, instr.aux2); } +// Resolves the rotation angle for the current quantum instruction. +// The angle is stored in the instruction's src0 field (register or immediate). +fn resolve_gate_angle(shot_idx: u32) -> f32 { + let state = shots[shot_idx].interp; + let instr = fetch_instr(state.pc - 1); + let flags = get_flags(instr.opcode); + return resolve_f32(shot_idx, instr.src0, flags, 0u); +} + fn get_measure_qubit(shot_idx: u32, op_idx: u32) -> u32 { return resolve_q1(shot_idx); } @@ -1343,6 +1352,69 @@ fn prepare_op(@builtin(global_invocation_id) globalId: vec3) { shot.unitary = op.unitary; + // For rotation gates, recompute the unitary from the (possibly dynamic) angle + // stored in the instruction's src0 field. The op pool unitary was built at upload + // time and may not reflect a runtime-computed angle. + if op_type == 0u { + if op.id == OPID_RX || op.id == OPID_RY || op.id == OPID_RZ { + let angle = resolve_gate_angle(shot_idx); + let half = angle * 0.5; + let c = cos(half); + let s = sin(half); + if op.id == OPID_RX { + // [[cos(θ/2), -i·sin(θ/2)], [-i·sin(θ/2), cos(θ/2)]] + shot.unitary[0] = vec2f(c, 0.0); + shot.unitary[1] = vec2f(0.0, -s); + shot.unitary[4] = vec2f(0.0, -s); + shot.unitary[5] = vec2f(c, 0.0); + } else if op.id == OPID_RY { + // [[cos(θ/2), -sin(θ/2)], [sin(θ/2), cos(θ/2)]] + shot.unitary[0] = vec2f(c, 0.0); + shot.unitary[1] = vec2f(-s, 0.0); + shot.unitary[4] = vec2f(s, 0.0); + shot.unitary[5] = vec2f(c, 0.0); + } else { + // RZ: [[1, 0], [0, e^(iθ)]] + shot.unitary[0] = vec2f(1.0, 0.0); + shot.unitary[1] = vec2f(0.0, 0.0); + shot.unitary[4] = vec2f(0.0, 0.0); + shot.unitary[5] = vec2f(cos(angle), sin(angle)); + } + } else if op.id == OPID_RXX || op.id == OPID_RYY || op.id == OPID_RZZ { + let angle = resolve_gate_angle(shot_idx); + let half = angle * 0.5; + let c = cos(half); + let s = sin(half); + if op.id == OPID_RXX { + // exp(-i·θ/2·X⊗X) + shot.unitary[0] = vec2f(c, 0.0); // 00,00 + shot.unitary[3] = vec2f(0.0, -s); // 00,11 + shot.unitary[5] = vec2f(c, 0.0); // 01,01 + shot.unitary[6] = vec2f(0.0, -s); // 01,10 + shot.unitary[9] = vec2f(0.0, -s); // 10,01 + shot.unitary[10] = vec2f(c, 0.0); // 10,10 + shot.unitary[12] = vec2f(0.0, -s); // 11,00 + shot.unitary[15] = vec2f(c, 0.0); // 11,11 + } else if op.id == OPID_RYY { + // exp(-i·θ/2·Y⊗Y) + shot.unitary[0] = vec2f(c, 0.0); // 00,00 + shot.unitary[3] = vec2f(0.0, s); // 00,11 (+i·sin) + shot.unitary[5] = vec2f(c, 0.0); // 01,01 + shot.unitary[6] = vec2f(0.0, -s); // 01,10 + shot.unitary[9] = vec2f(0.0, -s); // 10,01 + shot.unitary[10] = vec2f(c, 0.0); // 10,10 + shot.unitary[12] = vec2f(0.0, s); // 11,00 (+i·sin) + shot.unitary[15] = vec2f(c, 0.0); // 11,11 + } else { + // RZZ: diag(1, e^(iθ), e^(iθ), 1) + shot.unitary[0] = vec2f(1.0, 0.0); + shot.unitary[5] = vec2f(cos(angle), sin(angle)); + shot.unitary[10] = vec2f(cos(angle), sin(angle)); + shot.unitary[15] = vec2f(1.0, 0.0); + } + } + } + switch op_type { case 0u { // Gate shot.op_idx = op_idx; From 590d8b8546eb750e5259ef3c4092d0aa5da53a72 Mon Sep 17 00:00:00 2001 From: Oscar Puente Date: Tue, 7 Apr 2026 14:20:15 -0700 Subject: [PATCH 11/14] better comments --- .../simulator_adaptive.wgsl | 138 ++++++++++-------- 1 file changed, 75 insertions(+), 63 deletions(-) diff --git a/source/simulators/src/gpu_full_state_simulator/simulator_adaptive.wgsl b/source/simulators/src/gpu_full_state_simulator/simulator_adaptive.wgsl index 56f455558e..277134c8a7 100644 --- a/source/simulators/src/gpu_full_state_simulator/simulator_adaptive.wgsl +++ b/source/simulators/src/gpu_full_state_simulator/simulator_adaptive.wgsl @@ -507,6 +507,18 @@ fn read_measurement_result(shot_idx: u32, result_id: u32) -> bool { return atomicLoad(&results[shot_idx * RESULT_COUNT + result_id]) == 1u; } +// Return true if the id corresponds to a rotation gate. +fn is_rotation_gate(id: u32) -> bool { + return (12 <= id && id <= 14) || (17 <= id && id <= 19); +} + +// Return true if the angle for the current rotation gate is dynamic. +fn is_dynamic_angle(shot_idx: u32) -> bool { + let state = shots[shot_idx].interp; + let instr = fetch_instr(state.pc - 1); + return (instr.opcode | FLAG_SRC0_IMM) != 0; +} + // For every qubit, each 'execute' kernel thread will update its own workgroup storage location for accumulating probabilities // The final probabilities will be reduced and written back to the shot state after the parallel execution completes. struct QubitProbabilityPerThread { @@ -1352,71 +1364,71 @@ fn prepare_op(@builtin(global_invocation_id) globalId: vec3) { shot.unitary = op.unitary; - // For rotation gates, recompute the unitary from the (possibly dynamic) angle - // stored in the instruction's src0 field. The op pool unitary was built at upload - // time and may not reflect a runtime-computed angle. - if op_type == 0u { - if op.id == OPID_RX || op.id == OPID_RY || op.id == OPID_RZ { - let angle = resolve_gate_angle(shot_idx); - let half = angle * 0.5; - let c = cos(half); - let s = sin(half); - if op.id == OPID_RX { - // [[cos(θ/2), -i·sin(θ/2)], [-i·sin(θ/2), cos(θ/2)]] - shot.unitary[0] = vec2f(c, 0.0); - shot.unitary[1] = vec2f(0.0, -s); - shot.unitary[4] = vec2f(0.0, -s); - shot.unitary[5] = vec2f(c, 0.0); - } else if op.id == OPID_RY { - // [[cos(θ/2), -sin(θ/2)], [sin(θ/2), cos(θ/2)]] - shot.unitary[0] = vec2f(c, 0.0); - shot.unitary[1] = vec2f(-s, 0.0); - shot.unitary[4] = vec2f(s, 0.0); - shot.unitary[5] = vec2f(c, 0.0); - } else { - // RZ: [[1, 0], [0, e^(iθ)]] - shot.unitary[0] = vec2f(1.0, 0.0); - shot.unitary[1] = vec2f(0.0, 0.0); - shot.unitary[4] = vec2f(0.0, 0.0); - shot.unitary[5] = vec2f(cos(angle), sin(angle)); - } - } else if op.id == OPID_RXX || op.id == OPID_RYY || op.id == OPID_RZZ { - let angle = resolve_gate_angle(shot_idx); - let half = angle * 0.5; - let c = cos(half); - let s = sin(half); - if op.id == OPID_RXX { - // exp(-i·θ/2·X⊗X) - shot.unitary[0] = vec2f(c, 0.0); // 00,00 - shot.unitary[3] = vec2f(0.0, -s); // 00,11 - shot.unitary[5] = vec2f(c, 0.0); // 01,01 - shot.unitary[6] = vec2f(0.0, -s); // 01,10 - shot.unitary[9] = vec2f(0.0, -s); // 10,01 - shot.unitary[10] = vec2f(c, 0.0); // 10,10 - shot.unitary[12] = vec2f(0.0, -s); // 11,00 - shot.unitary[15] = vec2f(c, 0.0); // 11,11 - } else if op.id == OPID_RYY { - // exp(-i·θ/2·Y⊗Y) - shot.unitary[0] = vec2f(c, 0.0); // 00,00 - shot.unitary[3] = vec2f(0.0, s); // 00,11 (+i·sin) - shot.unitary[5] = vec2f(c, 0.0); // 01,01 - shot.unitary[6] = vec2f(0.0, -s); // 01,10 - shot.unitary[9] = vec2f(0.0, -s); // 10,01 - shot.unitary[10] = vec2f(c, 0.0); // 10,10 - shot.unitary[12] = vec2f(0.0, s); // 11,00 (+i·sin) - shot.unitary[15] = vec2f(c, 0.0); // 11,11 - } else { - // RZZ: diag(1, e^(iθ), e^(iθ), 1) - shot.unitary[0] = vec2f(1.0, 0.0); - shot.unitary[5] = vec2f(cos(angle), sin(angle)); - shot.unitary[10] = vec2f(cos(angle), sin(angle)); - shot.unitary[15] = vec2f(1.0, 0.0); - } - } - } - switch op_type { case 0u { // Gate + // For rotation gates, recompute the unitary from the dynamic angle stored + // in the instruction's src0 field if needed. The op pool unitary was built + // at upload time and may not reflect a runtime-computed angle. + if is_rotation_gate(op.id) && is_dynamic_angle(shot_idx) { + if op.id == OPID_RX || op.id == OPID_RY || op.id == OPID_RZ { + let angle = resolve_gate_angle(shot_idx); + let half = angle * 0.5; + let c = cos(half); + let s = sin(half); + if op.id == OPID_RX { + // [[cos(θ/2), -i·sin(θ/2)], [-i·sin(θ/2), cos(θ/2)]] + shot.unitary[0] = vec2f(c, 0.0); + shot.unitary[1] = vec2f(0.0, -s); + shot.unitary[4] = vec2f(0.0, -s); + shot.unitary[5] = vec2f(c, 0.0); + } else if op.id == OPID_RY { + // [[cos(θ/2), -sin(θ/2)], [sin(θ/2), cos(θ/2)]] + shot.unitary[0] = vec2f(c, 0.0); + shot.unitary[1] = vec2f(-s, 0.0); + shot.unitary[4] = vec2f(s, 0.0); + shot.unitary[5] = vec2f(c, 0.0); + } else { + // RZ: [[1, 0], [0, e^(iθ)]] + shot.unitary[0] = vec2f(1.0, 0.0); + shot.unitary[1] = vec2f(0.0, 0.0); + shot.unitary[4] = vec2f(0.0, 0.0); + shot.unitary[5] = vec2f(cos(angle), sin(angle)); + } + } else if op.id == OPID_RXX || op.id == OPID_RYY || op.id == OPID_RZZ { + let angle = resolve_gate_angle(shot_idx); + let half = angle * 0.5; + let c = cos(half); + let s = sin(half); + if op.id == OPID_RXX { + // exp(-i·θ/2·X⊗X) + shot.unitary[0] = vec2f(c, 0.0); + shot.unitary[3] = vec2f(0.0, -s); + shot.unitary[5] = vec2f(c, 0.0); + shot.unitary[6] = vec2f(0.0, -s); + shot.unitary[9] = vec2f(0.0, -s); + shot.unitary[10] = vec2f(c, 0.0); + shot.unitary[12] = vec2f(0.0, -s); + shot.unitary[15] = vec2f(c, 0.0); + } else if op.id == OPID_RYY { + // exp(-i·θ/2·Y⊗Y) + shot.unitary[0] = vec2f(c, 0.0); + shot.unitary[3] = vec2f(0.0, s); + shot.unitary[5] = vec2f(c, 0.0); + shot.unitary[6] = vec2f(0.0, -s); + shot.unitary[9] = vec2f(0.0, -s); + shot.unitary[10] = vec2f(c, 0.0); + shot.unitary[12] = vec2f(0.0, s); + shot.unitary[15] = vec2f(c, 0.0); + } else { + // RZZ: diag(1, e^(iθ), e^(iθ), 1) + shot.unitary[0] = vec2f(1.0, 0.0); + shot.unitary[5] = vec2f(cos(angle), sin(angle)); + shot.unitary[10] = vec2f(cos(angle), sin(angle)); + shot.unitary[15] = vec2f(1.0, 0.0); + } + } + } + shot.op_idx = op_idx; shot.op_type = op.id; From 9ef68fc37be4fd048d36415c8dc21db2244ba700 Mon Sep 17 00:00:00 2001 From: Oscar Puente Date: Mon, 27 Apr 2026 11:25:28 -0700 Subject: [PATCH 12/14] address PR feedback --- source/pip/qsharp/_adaptive_bytecode.py | 1 + source/pip/qsharp/_adaptive_pass.py | 15 +++- source/pip/qsharp/_device/_atom/__init__.py | 13 +++- .../pip/tests/test_adaptive_cpu_bytecode.py | 56 ++++++++++++++- .../pip/tests/test_adaptive_gpu_bytecode.py | 69 ++++++++++++++++++- source/simulators/src/bytecode/runtime.rs | 17 +++++ .../simulator_adaptive.wgsl | 13 ++++ 7 files changed, 179 insertions(+), 5 deletions(-) diff --git a/source/pip/qsharp/_adaptive_bytecode.py b/source/pip/qsharp/_adaptive_bytecode.py index b86b8e2cda..aa244fc59c 100644 --- a/source/pip/qsharp/_adaptive_bytecode.py +++ b/source/pip/qsharp/_adaptive_bytecode.py @@ -44,6 +44,7 @@ OP_RESET = 0x12 OP_READ_RESULT = 0x13 OP_RECORD_OUTPUT = 0x14 +OP_READ_LOSS = 0x15 # ── Integer Arithmetic ─────────────────────────────────────────────────────── OP_ADD = 0x20 diff --git a/source/pip/qsharp/_adaptive_pass.py b/source/pip/qsharp/_adaptive_pass.py index 4860f28c17..e8ad692dc3 100644 --- a/source/pip/qsharp/_adaptive_pass.py +++ b/source/pip/qsharp/_adaptive_pass.py @@ -167,6 +167,13 @@ class QuantumOp: q1: int q2: int q3: int + # ``angle`` is stored as the raw bit pattern of an IEEE-754 float + # (encoded via ``encode_float_as_bits``) so it can be packed into the + # same integer-typed FFI table as the qubit indices. The Rust side + # reinterprets these bits as f32/f64 depending on the bytecode width. + # + # This also follows the same pattern in which floats are encoded as ints + # in the ``Instruction`` class. angle: int @@ -703,9 +710,15 @@ def _emit_call(self, call: pyqir.Call) -> None: | "__quantum__rt__begin_parallel" | "__quantum__rt__end_parallel" | "__quantum__qis__barrier__body" - | "__quantum__rt__read_loss" ): pass # No-op + case "__quantum__rt__read_loss": + # Allocate a bool register and emit OP_READ_LOSS so the runtime + # can ask the simulator whether the given result was produced + # by measuring a lost qubit. Programs may branch on this value. + dst = self._alloc_reg(call, REG_TYPE_BOOL) + result_reg = self._resolve_result_operand(call.args[0]) + self._emit(OP_READ_LOSS, dst=dst, src0=result_reg) case _ if callee in self._func_to_id: self._emit_ir_function_call(call) case _ if "qdk_noise" in call.callee.attributes.func: diff --git a/source/pip/qsharp/_device/_atom/__init__.py b/source/pip/qsharp/_device/_atom/__init__.py index 74bb1a80b2..f58a7ab77f 100644 --- a/source/pip/qsharp/_device/_atom/__init__.py +++ b/source/pip/qsharp/_device/_atom/__init__.py @@ -258,9 +258,18 @@ def simulate( if noise is None: noise = NoiseConfig() - # Override s, s_adj, and z noise if they are unset - # and rz noise is set. + # Override t, t_adj, s, s_adj, and z noise if they are unset and rz noise is set. if noise and not noise.rz.is_noiseless(): + if noise.t.is_noiseless(): + noise.t.x = noise.rz.x + noise.t.y = noise.rz.y + noise.t.z = noise.rz.z + noise.t.loss = noise.rz.loss + if noise.t_adj.is_noiseless(): + noise.t_adj.x = noise.rz.x + noise.t_adj.y = noise.rz.y + noise.t_adj.z = noise.rz.z + noise.t_adj.loss = noise.rz.loss if noise.s.is_noiseless(): noise.s.x = noise.rz.x noise.s.y = noise.rz.y diff --git a/source/pip/tests/test_adaptive_cpu_bytecode.py b/source/pip/tests/test_adaptive_cpu_bytecode.py index 9118b27291..89f8229bf5 100644 --- a/source/pip/tests/test_adaptive_cpu_bytecode.py +++ b/source/pip/tests/test_adaptive_cpu_bytecode.py @@ -15,7 +15,7 @@ from collections import Counter import pytest -from qsharp._simulation import run_qir, Result +from qsharp._simulation import run_qir, NoiseConfig, Result import qsharp.openqasm from typing import Literal @@ -470,6 +470,60 @@ def test_record_output_ordering(sim_type): ) +# ========================================================================= +# OP_READ_LOSS — read whether a measurement observed qubit loss +# ========================================================================= + +READ_LOSS_QIR = """ +entry: + ; Apply s to qubit 0 purely for its noise side effect. With + ; ``noise.s.loss = 1.0`` the simulator faults qubit 0 as lost on every + ; shot, so the next mz on qubit 0 records ``MeasurementResult::Loss`` + ; into result 0. Qubit 1 is left untouched (no noise on x), so the + ; conditional X below cleanly flips it to |1⟩. + call void @__quantum__qis__s__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + call void @__quantum__qis__mz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) + ; Read the loss bit for result 0 — should be 1 because the qubit was lost. + %lost = call i1 @__quantum__rt__read_loss(%Result* inttoptr (i64 0 to %Result*)) + br i1 %lost, label %then, label %end + +then: + ; Witness: if read_loss reported true, flip qubit 1 to |1⟩. + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 1 to %Qubit*)) + br label %end + +end: + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 1 to %Qubit*), %Result* inttoptr (i64 1 to %Result*)) +""" + +READ_LOSS_DECLS = """ +declare i1 @__quantum__rt__read_loss(%Result*) +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_read_loss(sim_type): + """rz (with 100% loss) → mz → read_loss → branch on loss → mz witness. + + Record both results: result 0 should always be ``Loss`` ('L'), and + result 1 should always be ``One`` ('1') because ``read_loss`` saw the + loss and the conditional X was applied to qubit 1. + """ + qir = format_qir( + READ_LOSS_QIR, + extra_decls=READ_LOSS_DECLS, + num_qubits=2, + num_results=2, + ) + noise = NoiseConfig() + noise.s.loss = 1.0 + results = run_qir(qir, SHOTS, noise, seed=42, type=sim_type) + counts = Counter(map_result_list_to_str(r) for r in results) + assert counts == { + "L1": SHOTS + }, f"Expected all {SHOTS} shots to be 'L1', got {counts}" + + # ######################################################################### # Integer Arithmetic # ######################################################################### diff --git a/source/pip/tests/test_adaptive_gpu_bytecode.py b/source/pip/tests/test_adaptive_gpu_bytecode.py index e7e1335e66..29c9184bac 100644 --- a/source/pip/tests/test_adaptive_gpu_bytecode.py +++ b/source/pip/tests/test_adaptive_gpu_bytecode.py @@ -35,7 +35,7 @@ except OSError as e: SKIP_REASON = str(e) -from qsharp._simulation import GpuSimulator +from qsharp._simulation import GpuSimulator, NoiseConfig, Result, run_qir # --------------------------------------------------------------------------- # Helpers @@ -57,6 +57,19 @@ def _run(qir: str, shots: int = SHOTS, seed: int = 42): return sim.run_shots(shots, seed=seed) +def map_result_list_to_str(results): + s = "" + for r in results: + match r: + case Result.Zero: + s += "0" + case Result.One: + s += "1" + case Result.Loss: + s += "L" + return s + + def check_result( qir_fragment: str, expected: str, @@ -476,6 +489,60 @@ def test_record_output_ordering(): check_result(RECORD_OUTPUT_QIR, "10", num_qubits=2, num_results=2) +# ========================================================================= +# OP_READ_LOSS — read whether a measurement observed qubit loss +# ========================================================================= + +READ_LOSS_QIR = """ +entry: + ; Apply s to qubit 0 purely for its noise side effect. With + ; ``noise.s.loss = 1.0`` the simulator faults qubit 0 as lost on every + ; shot, so the next mz on qubit 0 records ``MeasurementResult::Loss`` + ; into result 0. Qubit 1 is left untouched (no noise on x), so the + ; conditional X below cleanly flips it to |1⟩. + call void @__quantum__qis__s__body(%Qubit* inttoptr (i64 0 to %Qubit*)) + call void @__quantum__qis__mz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) + ; Read the loss bit for result 0 — should be 1 because the qubit was lost. + %lost = call i1 @__quantum__rt__read_loss(%Result* inttoptr (i64 0 to %Result*)) + br i1 %lost, label %then, label %end + +then: + ; Witness: if read_loss reported true, flip qubit 1 to |1⟩. + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 1 to %Qubit*)) + br label %end + +end: + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 1 to %Qubit*), %Result* inttoptr (i64 1 to %Result*)) +""" + +READ_LOSS_DECLS = """\ +declare i1 @__quantum__rt__read_loss(%Result*) +""" + + +@pytest.mark.skipif(not GPU_AVAILABLE, reason=SKIP_REASON) +def test_read_loss(): + """s (with 100% loss) → mz → read_loss → branch on loss → mz witness. + + Record both results: result 0 should always be ``Loss`` ('L'), and + result 1 should always be ``One`` ('1') because ``read_loss`` saw the + loss and the conditional X was applied to qubit 1. + """ + qir = format_qir( + READ_LOSS_QIR, + extra_decls=READ_LOSS_DECLS, + num_qubits=2, + num_results=2, + ) + noise = NoiseConfig() + noise.s.loss = 1.0 + results = run_qir(qir, SHOTS, noise, seed=42, type="gpu") + counts = Counter(map_result_list_to_str(r) for r in results) + assert counts == { + "L1": SHOTS + }, f"Expected all {SHOTS} shots to be 'L1', got {counts}" + + # ######################################################################### # Integer Arithmetic # ######################################################################### diff --git a/source/simulators/src/bytecode/runtime.rs b/source/simulators/src/bytecode/runtime.rs index dfbe161a84..ebe136b725 100644 --- a/source/simulators/src/bytecode/runtime.rs +++ b/source/simulators/src/bytecode/runtime.rs @@ -48,6 +48,7 @@ const OP_MEASURE: u8 = 0x11; const OP_RESET: u8 = 0x12; const OP_READ_RESULT: u8 = 0x13; const OP_RECORD_OUTPUT: u8 = 0x14; +const OP_READ_LOSS: u8 = 0x15; // Integer arithmetic const OP_ADD: u8 = 0x20; @@ -424,6 +425,22 @@ pub fn run_shot(program: &AdaptiveProgram, sim: &mut S) { rt.pc += 1; } + OP_READ_LOSS => { + // Reports whether the measurement that produced this result + // observed a lost qubit. The simulator records ``Loss`` in + // its measurement buffer when the qubit was lost prior to + // the measurement; here we simply project that to a 1/0 bool + // for the program to branch on. + let result_id = rt.resolve_u64(instr.src0, flags, 0) as usize; + let measurements = sim.measurements(); + let val = u64::from( + result_id < measurements.len() + && matches!(measurements[result_id], MeasurementResult::Loss), + ); + rt.write_reg(instr.dst, val); + rt.pc += 1; + } + OP_RECORD_OUTPUT => { // No-op on CPU — results are read from the simulator directly. rt.pc += 1; diff --git a/source/simulators/src/gpu_full_state_simulator/simulator_adaptive.wgsl b/source/simulators/src/gpu_full_state_simulator/simulator_adaptive.wgsl index 277134c8a7..8dea1dc259 100644 --- a/source/simulators/src/gpu_full_state_simulator/simulator_adaptive.wgsl +++ b/source/simulators/src/gpu_full_state_simulator/simulator_adaptive.wgsl @@ -322,6 +322,7 @@ const OP_MEASURE: u32 = 0x11; const OP_RESET: u32 = 0x12; const OP_READ_RESULT: u32 = 0x13; const OP_RECORD_OUTPUT: u32 = 0x14; +const OP_READ_LOSS: u32 = 0x15; // -- Integer Arithmetic ------------------------------------------------------- const OP_ADD: u32 = 0x20; @@ -946,6 +947,18 @@ fn interpret_classical(@builtin(global_invocation_id) gid: vec3) { pc++; } + // READ_LOSS: Reports whether the measurement that produced a + // result observed a lost qubit. The per-shot ``results`` buffer + // encodes loss as the value 2u (0u = Zero, 1u = One, 2u = Loss), + // so we compare against 2u and write 1u when the result was a loss, + // else 0u. + case OP_READ_LOSS { + let result_id = instr.src0; + let val = atomicLoad(&results[shot_idx * RESULT_COUNT + result_id]); + write_reg(shot_idx, instr.dst, select(0u, 1u, val == 2u)); + pc++; + } + // ------------------------------------------------------------- // INTEGER ARITHMETIC // ------------------------------------------------------------- From 2bf618d3d2f6d33f5274559456c301c68880effb Mon Sep 17 00:00:00 2001 From: Oscar Puente Date: Mon, 27 Apr 2026 18:43:01 -0700 Subject: [PATCH 13/14] Fix multiple bugs uncovered by recently enable integration tests Adds quantum_move instruction to bytecode, fix output recording for adaptive profile, and fix a few bugs in the adaptive GPU shader. --- source/pip/qsharp/_adaptive_pass.py | 25 ++++ source/pip/qsharp/_simulation.py | 58 +++++----- .../pip/tests/test_adaptive_cpu_bytecode.py | 108 +++++++++++++++++- source/pip/tests/test_adaptive_cpu_noise.py | 2 +- .../tests/test_adaptive_cpu_quantum_ops.py | 19 +-- .../pip/tests/test_adaptive_gpu_bytecode.py | 105 +++++++++++++++-- source/pip/tests/test_adaptive_gpu_noise.py | 4 +- .../tests/test_adaptive_gpu_quantum_ops.py | 32 ++++-- .../gpu_full_state_simulator/gpu_context.rs | 3 +- .../simulator_adaptive.wgsl | 19 ++- 10 files changed, 307 insertions(+), 68 deletions(-) diff --git a/source/pip/qsharp/_adaptive_pass.py b/source/pip/qsharp/_adaptive_pass.py index e8ad692dc3..b184e9e28d 100644 --- a/source/pip/qsharp/_adaptive_pass.py +++ b/source/pip/qsharp/_adaptive_pass.py @@ -55,6 +55,7 @@ class Bytecode(Enum): "mz": 21, "mresetz": 22, "swap": 24, + "move": 28, } # Gates that take a result ID as a second argument @@ -66,6 +67,12 @@ class Bytecode(Enum): # Rotation gates that take an angle parameter as first argument ROTATION_GATES = {"rx", "ry", "rz", "rxx", "ryy", "rzz"} +# Single-qubit gates whose QIR signature carries device-specific extra +# arguments after the qubit pointer (e.g. ``move(qubit, i64, i64)``). The +# extra args are scheduling metadata for hardware backends and are not +# qubit IDs, so we resolve only ``args[0]`` and ignore the rest. +MOVE_GATES = {"move"} + # --------------------------------------------------------------------------- # ICmp / FCmp predicate mappings # --------------------------------------------------------------------------- @@ -783,6 +790,24 @@ def _emit_quantum_call(self, call: pyqir.Call) -> None: aux1=q, ) return + if gate_name in MOVE_GATES: + # ``move(qubit, i64, i64)``: only the first arg is a qubit; the + # remaining args are device-specific scheduling metadata that + # the simulator ignores. Emit a single-qubit OP_QUANTUM_GATE so + # the runtime invokes ``Simulator::mov`` (which applies the + # configured ``noise.mov`` faults to that qubit). + q1, q2, q3 = self._resolve_qubit_operands([call.args[0]]) + angle = FloatOperand(0.0, self._bytecode_kind) + qop_idx = self._emit_quantum_op(op_id, q1.val, q2.val, q3.val, angle.val) + self._emit( + OP_QUANTUM_GATE, + src0=angle, + aux0=qop_idx, + aux1=q1, + aux2=q2, + aux3=q3, + ) + return if gate_name in ROTATION_GATES: qubit_arg_offset = 1 angle = self._resolve_angle_operand(call.args[0]) diff --git a/source/pip/qsharp/_simulation.py b/source/pip/qsharp/_simulation.py index d3ee5a5b8c..b20a2ef54b 100644 --- a/source/pip/qsharp/_simulation.py +++ b/source/pip/qsharp/_simulation.py @@ -530,6 +530,7 @@ def run_base( def run_adaptive( rust_run_adaptive_fn: Callable, + mod: pyqir.Module, program: AdaptiveProgram, shots: int, noise: Optional[NoiseConfig], @@ -539,18 +540,9 @@ def run_adaptive( Runs an adaptive profile program given a rust simulator. Adds output recording logic. """ results = rust_run_adaptive_fn(program.as_dict(), shots, noise, seed) - # Extract recorded output result indices from the bytecode. - # OP_RECORD_OUTPUT with aux1=0 is result_record_output where - # src0 is the result index in the results buffer. - recorded_result_indices = [] - for ins in program.instructions: - if (ins.opcode & 0xFF) == OP_RECORD_OUTPUT and ins.aux1 == 0: - recorded_result_indices.append(ins.src0) - # Filter shot_results to only include recorded output indices - filtered = [] - for s in results: - filtered.append([str_to_result(s[i]) for i in recorded_result_indices]) - return filtered + recorder = OutputRecordingPass() + recorder.run(mod) + return list(map(recorder.process_output, results)) def run_qir_clifford( @@ -562,7 +554,7 @@ def run_qir_clifford( (mod, shots, noise, seed) = preprocess_simulation_input(input, shots, noise, seed) if is_adaptive(mod): program = AdaptiveProfilePass(Bytecode.Bit64).run(mod, noise) - return run_adaptive(run_clifford_adaptive, program, shots, noise, seed) + return run_adaptive(run_clifford_adaptive, mod, program, shots, noise, seed) else: return run_base(run_clifford, mod, shots, noise, seed) @@ -577,7 +569,7 @@ def run_qir_cpu( DecomposeCcxPass().run(mod) if is_adaptive(mod): program = AdaptiveProfilePass(Bytecode.Bit64).run(mod, noise) - return run_adaptive(run_cpu_adaptive, program, shots, noise, seed) + return run_adaptive(run_cpu_adaptive, mod, program, shots, noise, seed) else: return run_base(run_cpu_full_state, mod, shots, noise, seed) @@ -593,7 +585,9 @@ def run_qir_gpu( DecomposeCcxPass().run(mod) if is_adaptive(mod): program = AdaptiveProfilePass(Bytecode.Bit32).run(mod, noise) - return run_adaptive(run_adaptive_parallel_shots, program, shots, noise, seed) + return run_adaptive( + run_adaptive_parallel_shots, mod, program, shots, noise, seed + ) else: return run_base(run_parallel_shots, mod, shots, noise, seed) @@ -625,7 +619,7 @@ class GpuSimulator: def __init__(self): self.gpu_context = GpuContext() self._is_adaptive = False - self._recorded_result_indices = [] + self._recorder = None self.tables = None def load_noise_tables( @@ -667,15 +661,11 @@ def set_program(self, input: Union[QirInputData, str, bytes]): mod, noise_intrinsics=noise_intrinsics ) self.gpu_context.set_adaptive_program(program.as_dict()) - - # Extract recorded output result indices from the bytecode. - # OP_RECORD_OUTPUT with aux1=0 is result_record_output where - # src0 is the result index in the results buffer. - self._recorded_result_indices = [] - for instr in program.instructions: - if instr.opcode & 0xFF == OP_RECORD_OUTPUT and instr.aux1 == 0: - self._recorded_result_indices.append(instr.src0) + # This is used later for output recording + self._recorder = OutputRecordingPass() + self._recorder.run(mod) else: + self._is_adaptive = False (self.gates, self.required_num_qubits, self.required_num_results) = ( prepare_qir_with_correlated_noise( input, self.tables if not self.tables is None else [] @@ -693,13 +683,19 @@ def run_shots(self, shots: int, seed: Optional[int] = None) -> "GpuShotResults": seed = seed if seed is not None else random.randint(0, 2**32 - 1) if self._is_adaptive: results = self.gpu_context.run_adaptive_shots(shots, seed=seed) - # Filter shot_results to only include recorded output indices - if self._recorded_result_indices: - indices = self._recorded_result_indices - filtered = [] - for s in results["shot_results"]: - filtered.append("".join(s[i] for i in indices)) - results["shot_results"] = filtered + for i, (shot_ret_code, shot_result) in enumerate( + zip(results["shot_result_codes"], results["shot_results"]) + ): + if shot_ret_code == 0: + # If the ret_code was zero, we do an output recording pass + # on the output. + results["shot_results"][i] = self._recorder.process_output( + shot_result + ) + else: + # If the shot finished with a ret_code other than zero, + # we set the result to `None`. + results["shot_results"][i] = None return results return self.gpu_context.run_shots(shots, seed=seed) diff --git a/source/pip/tests/test_adaptive_cpu_bytecode.py b/source/pip/tests/test_adaptive_cpu_bytecode.py index 89f8229bf5..632bc69564 100644 --- a/source/pip/tests/test_adaptive_cpu_bytecode.py +++ b/source/pip/tests/test_adaptive_cpu_bytecode.py @@ -32,8 +32,11 @@ def map_result_list_to_str(results): results_str = "" - for r in results: - match r: + if isinstance(results, (list, tuple)): + for r in results: + results_str += map_result_list_to_str(r) + else: + match results: case Result.Zero: results_str += "0" case Result.One: @@ -186,15 +189,12 @@ def test_nop_smoke(sim_type): RET_QIR = """ entry: - ret i64 0 - call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) - call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) """ @pytest.mark.parametrize("sim_type", SIM_TYPES) def test_ret(sim_type): - check_result(RET_QIR, "0", sim_type=sim_type) + check_result(RET_QIR, "", sim_type=sim_type, num_qubits=0, num_results=0) # ========================================================================= @@ -524,6 +524,46 @@ def test_read_loss(sim_type): }, f"Expected all {SHOTS} shots to be 'L1', got {counts}" +# ========================================================================= +# move (OpID 28) — qubit move with associated noise +# ========================================================================= + +MOVE_QIR = """ +entry: + ; ``move`` is a no-op on the simulator state, but the simulator applies + ; the configured ``noise.mov`` faults to the moved qubit. With + ; ``noise.mov.x = 1.0`` every move flips the qubit, so q0 ends in |1⟩. + call void @__quantum__qis__move__body(%Qubit* inttoptr (i64 0 to %Qubit*), i64 0, i64 0) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) +""" + +MOVE_DECLS = """\ +declare void @__quantum__qis__move__body(%Qubit*, i64, i64) +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_move_applies_noise(sim_type): + """move (with 100% X noise) → mz → always 1.""" + qir = format_qir(MOVE_QIR, extra_decls=MOVE_DECLS, num_qubits=1, num_results=1) + noise = NoiseConfig() + noise.mov.x = 1.0 + results = run_qir(qir, SHOTS, noise, seed=42, type=sim_type) + counts = Counter(map_result_list_to_str(r) for r in results) + assert counts == {"1": SHOTS}, f"Expected all {SHOTS} shots to be '1', got {counts}" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_move_noiseless_is_noop(sim_type): + """move without noise is a pure no-op → q0 stays in |0⟩ → measure 0.""" + check_result( + MOVE_QIR, + "0", + extra_decls=MOVE_DECLS, + sim_type=sim_type, + ) + + # ######################################################################### # Integer Arithmetic # ######################################################################### @@ -1313,6 +1353,62 @@ def test_shift_bitwise_chain(sim_type): check_arith_result(SHIFT_BITWISE_CHAIN_QIR, "1", sim_type=sim_type) +# ######################################################################### +# Structured Output Recording +# ######################################################################### + + +NESTED_OUTPUT_QIR = """\ +%Result = type opaque +%Qubit = type opaque + +define i64 @ENTRYPOINT__main() #0 { + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 1 to %Qubit*)) + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 3 to %Qubit*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 1 to %Qubit*), %Result* inttoptr (i64 1 to %Result*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 2 to %Qubit*), %Result* inttoptr (i64 2 to %Result*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 3 to %Qubit*), %Result* inttoptr (i64 3 to %Result*)) + call void @__quantum__rt__tuple_record_output(i64 2, i8* null) + call void @__quantum__rt__array_record_output(i64 2, i8* null) + call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 0 to %Result*), i8* null) + call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 1 to %Result*), i8* null) + call void @__quantum__rt__array_record_output(i64 2, i8* null) + call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 2 to %Result*), i8* null) + call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 3 to %Result*), i8* null) + ret i64 0 +} + +declare void @__quantum__qis__x__body(%Qubit*) +declare void @__quantum__qis__mresetz__body(%Qubit*, %Result*) +declare void @__quantum__rt__tuple_record_output(i64, i8*) +declare void @__quantum__rt__array_record_output(i64, i8*) +declare void @__quantum__rt__result_record_output(%Result*, i8*) + +attributes #0 = { "entry_point" "qir_profiles"="adaptive_profile" "required_num_qubits"="4" "required_num_results"="4" } +""" + + +@pytest.mark.parametrize("sim_type", SIM_TYPES) +def test_nested_output_structure(sim_type): + """Verify that adaptive results preserve nested tuple/array structure. + + The QIR records output as a tuple of two arrays: ([r0, r1], [r2, r3]). + Before the fix, run_adaptive flattened this into [r0, r1, r2, r3]. + """ + results = run_qir(NESTED_OUTPUT_QIR, shots=10, seed=42, type=sim_type) + for shot in results: + assert isinstance(shot, tuple), f"Expected tuple, got {type(shot)}: {shot}" + assert len(shot) == 2, f"Expected 2-element tuple, got {len(shot)}: {shot}" + assert isinstance( + shot[0], list + ), f"Expected list, got {type(shot[0])}: {shot[0]}" + assert isinstance( + shot[1], list + ), f"Expected list, got {type(shot[1])}: {shot[1]}" + assert shot == ([Result.Zero, Result.One], [Result.Zero, Result.One]) + + # ========================================================================= # OP_SWITCH with computed value from arithmetic # ========================================================================= diff --git a/source/pip/tests/test_adaptive_cpu_noise.py b/source/pip/tests/test_adaptive_cpu_noise.py index df829528e2..303c56e2ab 100644 --- a/source/pip/tests/test_adaptive_cpu_noise.py +++ b/source/pip/tests/test_adaptive_cpu_noise.py @@ -348,7 +348,7 @@ def test_noise_intrinsic_1q_x_flip(sim_type): table = noise.intrinsic("noise_1q", 1) table.x = 1.0 output = run_qir(QIR_NOISE_1Q, shots=1, noise=noise, type=sim_type) - assert output == [[Result.One]] + assert output == [Result.One] QASM_NOISE_2Q = """ diff --git a/source/pip/tests/test_adaptive_cpu_quantum_ops.py b/source/pip/tests/test_adaptive_cpu_quantum_ops.py index dcc4f7323e..0a119f7a92 100644 --- a/source/pip/tests/test_adaptive_cpu_quantum_ops.py +++ b/source/pip/tests/test_adaptive_cpu_quantum_ops.py @@ -26,16 +26,19 @@ def map_result_list_to_str(results): - results_str = "" - for r in results: - match r: + s = "" + if isinstance(results, (list, tuple)): + for r in results: + s += map_result_list_to_str(r) + else: + match results: case Result.Zero: - results_str += "0" + s += "0" case Result.One: - results_str += "1" + s += "1" case Result.Loss: - results_str += "L" - return results_str + s += "L" + return s def _run( @@ -238,6 +241,7 @@ def _run( call void @__quantum__qis__reset__body(%Qubit* inttoptr (i64 4 to %Qubit*)) br label %exit exit: + call void @__quantum__rt__tuple_record_output(i64 2, i8* null) call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 2 to %Result*), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @0, i32 0, i32 0)) call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 3 to %Result*), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @1, i32 0, i32 0)) ret void @@ -252,6 +256,7 @@ def _run( declare void @__quantum__rt__initialize(i8*) declare i1 @__quantum__qis__read_result__body(%Result*) declare void @__quantum__rt__result_record_output(%Result*, i8*) +declare void @__quantum__rt__tuple_record_output(i64, i8*) attributes #0 = { "entry_point" "qir_profiles"="adaptive_profile" "required_num_qubits"="5" "required_num_results"="4" } attributes #1 = { "irreversible" } diff --git a/source/pip/tests/test_adaptive_gpu_bytecode.py b/source/pip/tests/test_adaptive_gpu_bytecode.py index 29c9184bac..195aec5262 100644 --- a/source/pip/tests/test_adaptive_gpu_bytecode.py +++ b/source/pip/tests/test_adaptive_gpu_bytecode.py @@ -59,8 +59,11 @@ def _run(qir: str, shots: int = SHOTS, seed: int = 42): def map_result_list_to_str(results): s = "" - for r in results: - match r: + if isinstance(results, (list, tuple)): + for r in results: + s += map_result_list_to_str(r) + else: + match results: case Result.Zero: s += "0" case Result.One: @@ -88,7 +91,7 @@ def check_result( record=record, ) results = _run(qir, SHOTS)["shot_results"] - counts = Counter(results) + counts = Counter(map_result_list_to_str(r) for r in results) assert counts == { expected: SHOTS }, f"Expected all {SHOTS} shots to be '{expected}', got {counts}" @@ -202,8 +205,6 @@ def test_nop_smoke(): # Every test already exercises RET implicitly. This tests an explicit early ret. RET_QIR = """ entry: - ret i64 0 - call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 0 to %Qubit*)) call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) """ @@ -543,6 +544,41 @@ def test_read_loss(): }, f"Expected all {SHOTS} shots to be 'L1', got {counts}" +# ========================================================================= +# move (OpID 28) — qubit move with associated noise +# ========================================================================= + +MOVE_QIR = """ +entry: + ; ``move`` is a no-op on the simulator state, but the simulator applies + ; the configured ``noise.mov`` faults to the moved qubit. With + ; ``noise.mov.x = 1.0`` every move flips the qubit, so q0 ends in |1⟩. + call void @__quantum__qis__move__body(%Qubit* inttoptr (i64 0 to %Qubit*), i64 0, i64 0) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) +""" + +MOVE_DECLS = """\ +declare void @__quantum__qis__move__body(%Qubit*, i64, i64) +""" + + +@pytest.mark.skipif(not GPU_AVAILABLE, reason=SKIP_REASON) +def test_move_applies_noise(): + """move (with 100% X noise) → mz → always 1.""" + qir = format_qir(MOVE_QIR, extra_decls=MOVE_DECLS, num_qubits=1, num_results=1) + noise = NoiseConfig() + noise.mov.x = 1.0 + results = run_qir(qir, SHOTS, noise, seed=42, type="gpu") + counts = Counter(map_result_list_to_str(r) for r in results) + assert counts == {"1": SHOTS}, f"Expected all {SHOTS} shots to be '1', got {counts}" + + +@pytest.mark.skipif(not GPU_AVAILABLE, reason=SKIP_REASON) +def test_move_noiseless_is_noop(): + """move without noise is a pure no-op → q0 stays in |0⟩ → measure 0.""" + check_result(MOVE_QIR, "0", extra_decls=MOVE_DECLS) + + # ######################################################################### # Integer Arithmetic # ######################################################################### @@ -1276,7 +1312,7 @@ def test_dynamic_qubit_loop(): """3-qubit GHZ via dynamic qubit loop — only '000' and '111' should appear.""" qir = format_qir(DYNAMIC_QUBIT_LOOP_QIR, num_qubits=3, num_results=3) results = _run(qir, shots=5000, seed=42)["shot_results"] - counts = Counter(results) + counts = Counter(map_result_list_to_str(r) for r in results) assert set(counts.keys()) <= {"000", "111"}, f"Unexpected GHZ outcomes: {counts}" assert counts.get("000", 0) > 1500 assert counts.get("111", 0) > 1500 @@ -1362,6 +1398,61 @@ def test_switch_from_arithmetic(): check_result(SWITCH_ARITH_QIR, "1") +# ######################################################################### +# Structured Output Recording +# ######################################################################### + +NESTED_OUTPUT_QIR = """\ +%Result = type opaque +%Qubit = type opaque + +define i64 @ENTRYPOINT__main() #0 { + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 1 to %Qubit*)) + call void @__quantum__qis__x__body(%Qubit* inttoptr (i64 3 to %Qubit*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 0 to %Qubit*), %Result* inttoptr (i64 0 to %Result*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 1 to %Qubit*), %Result* inttoptr (i64 1 to %Result*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 2 to %Qubit*), %Result* inttoptr (i64 2 to %Result*)) + call void @__quantum__qis__mresetz__body(%Qubit* inttoptr (i64 3 to %Qubit*), %Result* inttoptr (i64 3 to %Result*)) + call void @__quantum__rt__tuple_record_output(i64 2, i8* null) + call void @__quantum__rt__array_record_output(i64 2, i8* null) + call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 0 to %Result*), i8* null) + call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 1 to %Result*), i8* null) + call void @__quantum__rt__array_record_output(i64 2, i8* null) + call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 2 to %Result*), i8* null) + call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 3 to %Result*), i8* null) + ret i64 0 +} + +declare void @__quantum__qis__x__body(%Qubit*) +declare void @__quantum__qis__mresetz__body(%Qubit*, %Result*) +declare void @__quantum__rt__tuple_record_output(i64, i8*) +declare void @__quantum__rt__array_record_output(i64, i8*) +declare void @__quantum__rt__result_record_output(%Result*, i8*) + +attributes #0 = { "entry_point" "qir_profiles"="adaptive_profile" "required_num_qubits"="4" "required_num_results"="4" } +""" + + +@pytest.mark.skipif(not GPU_AVAILABLE, reason=SKIP_REASON) +def test_nested_output_structure(): + """Verify that adaptive results preserve nested tuple/array structure. + + The QIR records output as a tuple of two arrays: ([r0, r1], [r2, r3]). + Before the fix, run_adaptive flattened this into [r0, r1, r2, r3]. + """ + results = _run(NESTED_OUTPUT_QIR, shots=10) + for shot in results["shot_results"]: + assert isinstance(shot, tuple), f"Expected tuple, got {type(shot)}: {shot}" + assert len(shot) == 2, f"Expected 2-element tuple, got {len(shot)}: {shot}" + assert isinstance( + shot[0], list + ), f"Expected list, got {type(shot[0])}: {shot[0]}" + assert isinstance( + shot[1], list + ), f"Expected list, got {type(shot[1])}: {shot[1]}" + assert shot == ([Result.Zero, Result.One], [Result.Zero, Result.One]) + + # ========================================================================= # Float: sitofp → fadd → fptosi round-trip # ========================================================================= @@ -1722,7 +1813,7 @@ def test_complex_rus_exceeds_128_registers(): bit[4] result = measure q; """ results = _run_openqasm(qasm_src, shots=100) - shot_results = results["shot_results"] + shot_results = [map_result_list_to_str(r) for r in results["shot_results"]] # Results include the mid-circuit measurement bit plus 4 final qubits assert all( len(r) >= 4 and all(c in "01" for c in r) for r in shot_results diff --git a/source/pip/tests/test_adaptive_gpu_noise.py b/source/pip/tests/test_adaptive_gpu_noise.py index 819f773b30..d3795db810 100644 --- a/source/pip/tests/test_adaptive_gpu_noise.py +++ b/source/pip/tests/test_adaptive_gpu_noise.py @@ -283,7 +283,7 @@ def test_noise_intrinsics_gpu_sim_class(): sim.load_noise_tables("./csv_dir_test") sim.set_program(QIR_WITH_CORRELATED_NOISE) output = sim.run_shots(shots=1)["shot_results"] - assert output == ["101"] + assert output == [[Result.One, Result.Zero, Result.One]] NOISE_INTRINSICS_WITH_REGISTERS_QIR = r""" @@ -373,7 +373,7 @@ def test_noise_intrinsic_1q_x_flip(): table = noise.intrinsic("noise_1q", 1) table.x = 1.0 output = run_qir(QIR_NOISE_1Q, shots=1, noise=noise, type="gpu") - assert output == [[Result.One]] + assert output == [Result.One] QASM_NOISE_2Q = """ diff --git a/source/pip/tests/test_adaptive_gpu_quantum_ops.py b/source/pip/tests/test_adaptive_gpu_quantum_ops.py index 5befd7a4c8..01613fdc3a 100644 --- a/source/pip/tests/test_adaptive_gpu_quantum_ops.py +++ b/source/pip/tests/test_adaptive_gpu_quantum_ops.py @@ -34,7 +34,23 @@ except OSError as e: SKIP_REASON = str(e) -from qsharp._simulation import GpuSimulator +from qsharp._simulation import GpuSimulator, Result + + +def map_result_list_to_str(results): + s = "" + if isinstance(results, (list, tuple)): + for r in results: + s += map_result_list_to_str(r) + else: + match results: + case Result.Zero: + s += "0" + case Result.One: + s += "1" + case Result.Loss: + s += "L" + return s # Acquiring the GPU resources takes time, so we acquire them once and use them @@ -143,7 +159,7 @@ def test_measure_and_correct_histogram(): code == 0 for code in results["shot_result_codes"] ), f"Some shots had non-zero error codes: {[c for c in results['shot_result_codes'] if c != 0]}" - counts = Counter(shot_results) + counts = Counter(map_result_list_to_str(r) for r in shot_results) # Each shot produces a single-bit result string: "0" or "1" count_0 = counts.get("0", 0) count_1 = counts.get("1", 0) @@ -169,7 +185,7 @@ def test_conditional_loop_all_results_are_one(): code == 0 for code in results["shot_result_codes"] ), f"Some shots had non-zero error codes: {[c for c in results['shot_result_codes'] if c != 0]}" - counts = Counter(shot_results) + counts = Counter(map_result_list_to_str(r) for r in shot_results) # Every shot should exit with result "1" assert ( counts.get("1", 0) == shots @@ -288,7 +304,7 @@ def test_loop_with_phi_ghz_histogram(): code == 0 for code in results["shot_result_codes"] ), f"Some shots had non-zero error codes: {[c for c in results['shot_result_codes'] if c != 0]}" - counts = Counter(shot_results) + counts = Counter(map_result_list_to_str(r) for r in shot_results) # Only "00000" and "11111" should appear assert set(counts.keys()) <= { "00000", @@ -322,7 +338,7 @@ def test_boolean_computation_histogram(): code == 0 for code in results["shot_result_codes"] ), f"Some shots had non-zero error codes: {[c for c in results['shot_result_codes'] if c != 0]}" - counts = Counter(shot_results) + counts = Counter(map_result_list_to_str(r) for r in shot_results) count_0 = counts.get("0", 0) count_1 = counts.get("1", 0) @@ -382,6 +398,7 @@ def test_boolean_computation_histogram(): call void @__quantum__qis__reset__body(%Qubit* inttoptr (i64 4 to %Qubit*)) br label %exit exit: + call void @__quantum__rt__tuple_record_output(i64 2, i8* null) call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 2 to %Result*), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @0, i32 0, i32 0)) call void @__quantum__rt__result_record_output(%Result* inttoptr (i64 3 to %Result*), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @1, i32 0, i32 0)) ret void @@ -396,6 +413,7 @@ def test_boolean_computation_histogram(): declare void @__quantum__rt__initialize(i8*) declare i1 @__quantum__qis__read_result__body(%Result*) declare void @__quantum__rt__result_record_output(%Result*, i8*) +declare void @__quantum__rt__tuple_record_output(i64, i8*) attributes #0 = { "entry_point" "qir_profiles"="adaptive_profile" "required_num_qubits"="5" "required_num_results"="4" } attributes #1 = { "irreversible" } @@ -423,7 +441,7 @@ def test_teleport_chain_histogram(): code == 0 for code in results["shot_result_codes"] ), f"Some shots had non-zero error codes: {[c for c in results['shot_result_codes'] if c != 0]}" - counts = Counter(shot_results) + counts = Counter(map_result_list_to_str(r) for r in shot_results) # Only "00" and "11" should appear (results 4 and 5 are correlated) assert set(counts.keys()) <= { "00", @@ -494,7 +512,7 @@ def test_dynamic_rotation_angle(): code == 0 for code in results["shot_result_codes"] ), f"Some shots had non-zero error codes: {[c for c in results['shot_result_codes'] if c != 0]}" - counts = Counter(shot_results) + counts = Counter(map_result_list_to_str(r) for r in shot_results) count_0 = counts.get("0", 0) count_1 = counts.get("1", 0) diff --git a/source/simulators/src/gpu_full_state_simulator/gpu_context.rs b/source/simulators/src/gpu_full_state_simulator/gpu_context.rs index 125841fc31..6ac3d8f310 100644 --- a/source/simulators/src/gpu_full_state_simulator/gpu_context.rs +++ b/source/simulators/src/gpu_full_state_simulator/gpu_context.rs @@ -770,7 +770,8 @@ impl GpuContext { self.resources.reset_diagnostics_header()?; // Initialize state vectors and shot data via the init kernel. - // The init kernel zeros and configures the base ShotData fields per shot. + // The init kernel also zeros the results buffer per shot to prevent + // stale exit codes from prior runs leaking via atomicCompareExchangeWeak. { let kernels = self.resources.get_kernels()?; let mut encoder = self.resources.get_encoder("Adaptive Init Encoder")?; diff --git a/source/simulators/src/gpu_full_state_simulator/simulator_adaptive.wgsl b/source/simulators/src/gpu_full_state_simulator/simulator_adaptive.wgsl index 8dea1dc259..f9c620b6fa 100644 --- a/source/simulators/src/gpu_full_state_simulator/simulator_adaptive.wgsl +++ b/source/simulators/src/gpu_full_state_simulator/simulator_adaptive.wgsl @@ -572,6 +572,13 @@ fn initialize( // Set the |0...0> amplitude to 1.0 from the first workgroup & thread for the shot stateVector[params.shot_state_vector_start] = vec2f(1.0, 0.0); reset_all(params.shot_idx); + + // Zero the results buffer for this shot so stale exit codes from + // prior runs do not leak via atomicCompareExchangeWeak in OP_RET. + let results_base = u32(params.shot_idx) * RESULT_COUNT; + for (var r = 0u; r < RESULT_COUNT; r++) { + atomicStore(&results[results_base + r], 0u); + } } } @@ -805,7 +812,7 @@ fn interpret_classical(@builtin(global_invocation_id) gid: vec3) { let arg_offset = instr.aux2; let func = batch_data.program.function_table[func_id]; // Push return info onto the call stack - let sp = state.call_sp; + let sp = shots[shot_idx].interp.call_sp; // Guard: prevent call stack overflow (max 8 frames) if sp >= 8u { shots[shot_idx].interp.exit_code = ERR_CALL_STACK_OVERFLOW; @@ -839,7 +846,7 @@ fn interpret_classical(@builtin(global_invocation_id) gid: vec3) { // return register (not 0xFFFFFFFF), copies the return value into // that register. case OP_CALL_RETURN { - if state.call_sp == 0u { + if shots[shot_idx].interp.call_sp == 0u { shots[shot_idx].interp.exit_code = ERR_CALL_STACK_UNDERFLOW; let err_idx = (shot_idx + 1) * RESULT_COUNT - 1; atomicCompareExchangeWeak(&results[err_idx], 0u, ERR_CALL_STACK_UNDERFLOW); @@ -849,11 +856,11 @@ fn interpret_classical(@builtin(global_invocation_id) gid: vec3) { break; } - let sp = state.call_sp - 1; + let sp = shots[shot_idx].interp.call_sp - 1; shots[shot_idx].interp.call_sp = sp; - block_id = state.call_stack_frames[sp].block_id; // go back to the callers block - pc = state.call_stack_frames[sp].return_pc; // restore pc - let return_reg = state.call_stack_frames[sp].return_reg; + block_id = shots[shot_idx].interp.call_stack_frames[sp].block_id; + pc = shots[shot_idx].interp.call_stack_frames[sp].return_pc; + let return_reg = shots[shot_idx].interp.call_stack_frames[sp].return_reg; if return_reg != VOID_RETURN { write_reg(shot_idx, return_reg, read_reg(shot_idx, instr.src0)); } From db84962c9c7cacd7c33638eb73c6555a078e85d1 Mon Sep 17 00:00:00 2001 From: Oscar Puente Date: Mon, 27 Apr 2026 18:54:18 -0700 Subject: [PATCH 14/14] add int overflow check --- source/pip/qsharp/_adaptive_pass.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/source/pip/qsharp/_adaptive_pass.py b/source/pip/qsharp/_adaptive_pass.py index b184e9e28d..f5c731cd76 100644 --- a/source/pip/qsharp/_adaptive_pass.py +++ b/source/pip/qsharp/_adaptive_pass.py @@ -221,6 +221,9 @@ def __post_init__(self): # their two's-complement representation # (e.g. -7 → 0xFFFFFFF9 for 32-bit, 0xFFFFFFFFFFFFFFF9 for 64-bit). mask = (1 << self.bits) - 1 + min_val = -(1 << (self.bits - 1)) + if self.val < min_val or self.val > mask: + raise ValueError(f"Value {self.val} does not fit in {self.bits} bits") self.val = self.val & mask