diff --git a/source/compiler/qsc/src/interpret/circuit_tests.rs b/source/compiler/qsc/src/interpret/circuit_tests.rs
index 7e9cb65fa2..bdd53bfd59 100644
--- a/source/compiler/qsc/src/interpret/circuit_tests.rs
+++ b/source/compiler/qsc/src/interpret/circuit_tests.rs
@@ -1791,3 +1791,232 @@ mod debugger_stepping {
         .assert_eq(&circs);
     }
 }
+
+// Without parallel, released qubits have their IDs recycled on subsequent allocations.
+// The inner block releases q1/q2, so q3/q4 reuse the same wires (q_0, q_1).
+#[test]
+fn parallel_baseline_qubit_ids_recycled_without_parallel() {
+    let circ = circuit_without_groups(
+        r"
+            namespace Test {
+                @EntryPoint()
+                operation Main() : Unit {
+                    { use q1 = Qubit(); H(q1); use q2 = Qubit(); H(q2); }
+                    use q3 = Qubit();
+                    H(q3);
+                    use q4 = Qubit();
+                    H(q4);
+                }
+            }
+        ",
+        CircuitEntryPoint::EntryPoint,
+    );
+
+    expect![[r#"
+        q_0@test.qs:4:22, test.qs:5:20 ─ H@test.qs:4:40 ─── H@test.qs:6:20 ──
+        q_1@test.qs:4:47, test.qs:7:20 ─ H@test.qs:4:65 ─── H@test.qs:8:20 ──
+    "#]]
+    .assert_eq(&circ);
+}
+
+// Inside a parallel expression all releases are deferred, so q3/q4 get fresh wires
+// instead of reusing q_0/q_1. This mirrors the baseline test with `parallel` added.
+#[test]
+fn parallel_defers_qubit_release() {
+    let circ = circuit_without_groups(
+        r"
+            namespace Test {
+                @EntryPoint()
+                operation Main() : Unit {
+                    parallel {
+                        { use q1 = Qubit(); H(q1); use q2 = Qubit(); H(q2); }
+                        use q3 = Qubit();
+                        H(q3);
+                        use q4 = Qubit();
+                        H(q4);
+                    }
+                }
+            }
+        ",
+        CircuitEntryPoint::EntryPoint,
+    );
+
+    expect![[r#"
+        q_0@test.qs:5:26 ─ H@test.qs:5:44 ──
+        q_1@test.qs:5:51 ─ H@test.qs:5:69 ──
+        q_2@test.qs:6:24 ─ H@test.qs:7:24 ──
+        q_3@test.qs:8:24 ─ H@test.qs:9:24 ──
+    "#]]
+    .assert_eq(&circ);
+}
+
+// After a parallel block ends its deferred releases become available, so a second
+// parallel block reuses the same qubit wires.
+#[test]
+fn parallel_releases_available_after_block_ends() {
+    let circ = circuit_without_groups(
+        r"
+            namespace Test {
+                @EntryPoint()
+                operation Main() : Unit {
+                    parallel {
+                        use q = Qubit();
+                        H(q);
+                    }
+                    parallel {
+                        use q = Qubit();
+                        X(q);
+                    }
+                }
+            }
+        ",
+        CircuitEntryPoint::EntryPoint,
+    );
+
+    expect![[r#"
+        q_0@test.qs:5:24, test.qs:9:24 ─ H@test.qs:6:24 ─── X@test.qs:10:24 ─
+    "#]]
+    .assert_eq(&circ);
+}
+
+// In nested parallel expressions, inner block qubits flow to the outer layer on removal
+// so the outer block allocates fresh wires even after the inner block ends.
+#[test]
+fn parallel_nested_defers_inner_releases_to_outer() {
+    let circ = circuit_without_groups(
+        r"
+            namespace Test {
+                @EntryPoint()
+                operation Main() : Unit {
+                    parallel {
+                        use outer = Qubit();
+                        H(outer);
+                        parallel {
+                            use inner1 = Qubit();
+                            H(inner1);
+                            use inner2 = Qubit();
+                            H(inner2);
+                        }
+                        use outer2 = Qubit();
+                        H(outer2);
+                    }
+                }
+            }
+        ",
+        CircuitEntryPoint::EntryPoint,
+    );
+
+    expect![[r#"
+        q_0@test.qs:5:24   ─ H@test.qs:6:24 ──
+        q_1@test.qs:8:28   ─ H@test.qs:9:28 ──
+        q_2@test.qs:10:28  ─ H@test.qs:11:28 ─
+        q_3@test.qs:13:24  ─ H@test.qs:14:24 ─
+    "#]]
+    .assert_eq(&circ);
+}
+
+// parallel within N: once N qubits are deferred the pool replenishes, so later
+// allocations reuse existing wires rather than creating new ones.
+#[test]
+fn parallel_within_reuses_wires_after_limit() {
+    let circ = circuit_without_groups(
+        r"
+            namespace Test {
+                @EntryPoint()
+                operation Main() : Unit {
+                    parallel within 2 {
+                        { use q1 = Qubit(); H(q1); }
+                        { use q2 = Qubit(); H(q2); }
+                        { use q3 = Qubit(); H(q3); }
+                        { use q4 = Qubit(); H(q4); }
+                    }
+                }
+            }
+        ",
+        CircuitEntryPoint::EntryPoint,
+    );
+
+    expect![[r#"
+        q_0@test.qs:5:26 ─ H@test.qs:5:44 ─── H@test.qs:7:44 ──
+        q_1@test.qs:6:26 ─ H@test.qs:6:44 ─── H@test.qs:8:44 ──
+    "#]]
+    .assert_eq(&circ);
+}
+
+// Outer `parallel within 6` with inner `parallel within 2`. The inner limit reuses
+// wires within each iteration. Once the outer deferred count reaches 6 (iteration 3),
+// the outer layer replenishes and reuses its wires too.
+#[test]
+fn parallel_within_nested_defers_through_outer_limit() {
+    let circ = circuit_without_groups(
+        r"
+            namespace Test {
+                @EntryPoint()
+                operation Main() : Unit {
+                    parallel within 6 {
+                        for _ in 0..2 {
+                            { use q0 = Qubit(); H(q0); }
+                            parallel within 2 {
+                                { use q1 = Qubit(); H(q1); }
+                                { use q2 = Qubit(); H(q2); }
+                                { use q3 = Qubit(); H(q3); }
+                                { use q4 = Qubit(); H(q4); }
+                            }
+                        }
+                    }
+                }
+            }
+        ",
+        CircuitEntryPoint::EntryPoint,
+    );
+
+    expect![[r#"
+        q_0@test.qs:6:30 ─ H@test.qs:6:48 ─── H@test.qs:6:48 ────────────────────────────────────────
+        q_1@test.qs:8:34 ─ H@test.qs:8:52 ─── H@test.qs:10:52 ── H@test.qs:8:52 ─── H@test.qs:10:52 ─
+        q_2@test.qs:9:34 ─ H@test.qs:9:52 ─── H@test.qs:11:52 ── H@test.qs:9:52 ─── H@test.qs:11:52 ─
+        q_3@test.qs:6:30 ─ H@test.qs:6:48 ───────────────────────────────────────────────────────────
+        q_4@test.qs:8:34 ─ H@test.qs:8:52 ─── H@test.qs:10:52 ───────────────────────────────────────
+        q_5@test.qs:9:34 ─ H@test.qs:9:52 ─── H@test.qs:11:52 ───────────────────────────────────────
+    "#]].assert_eq(&circ);
+}
+
+// Same structure but the outer parallel has no limit. The inner `parallel within 2`
+// still reuses within each iteration, but the outer unlimited layer never replenishes,
+// so iteration 3 allocates fresh wires (q_6/q_7/q_8) instead of reusing q_0/q_1/q_2.
+#[test]
+fn parallel_nested_unlimited_outer_defers_all() {
+    let circ = circuit_without_groups(
+        r"
+            namespace Test {
+                @EntryPoint()
+                operation Main() : Unit {
+                    parallel {
+                        for _ in 0..2 {
+                            { use q0 = Qubit(); H(q0); }
+                            parallel within 2 {
+                                { use q1 = Qubit(); H(q1); }
+                                { use q2 = Qubit(); H(q2); }
+                                { use q3 = Qubit(); H(q3); }
+                                { use q4 = Qubit(); H(q4); }
+                            }
+                        }
+                    }
+                }
+            }
+        ",
+        CircuitEntryPoint::EntryPoint,
+    );
+
+    expect![[r#"
+        q_0@test.qs:6:30 ─ H@test.qs:6:48 ─────────────────────
+        q_1@test.qs:8:34 ─ H@test.qs:8:52 ─── H@test.qs:10:52 ─
+        q_2@test.qs:9:34 ─ H@test.qs:9:52 ─── H@test.qs:11:52 ─
+        q_3@test.qs:6:30 ─ H@test.qs:6:48 ─────────────────────
+        q_4@test.qs:8:34 ─ H@test.qs:8:52 ─── H@test.qs:10:52 ─
+        q_5@test.qs:9:34 ─ H@test.qs:9:52 ─── H@test.qs:11:52 ─
+        q_6@test.qs:6:30 ─ H@test.qs:6:48 ─────────────────────
+        q_7@test.qs:8:34 ─ H@test.qs:8:52 ─── H@test.qs:10:52 ─
+        q_8@test.qs:9:34 ─ H@test.qs:9:52 ─── H@test.qs:11:52 ─
+    "#]]
+    .assert_eq(&circ);
+}
diff --git a/source/compiler/qsc_ast/src/ast.rs b/source/compiler/qsc_ast/src/ast.rs
index 1e2b420ffe..e280a1a37b 100644
--- a/source/compiler/qsc_ast/src/ast.rs
+++ b/source/compiler/qsc_ast/src/ast.rs
@@ -917,6 +917,10 @@ pub enum ExprKind {
     Lambda(CallableKind, Box<Pat>, Box<Expr>),
     /// A literal.
     Lit(Box<Lit>),
+    /// A parallel expression: `parallel a`
+    Parallel(Box<Expr>),
+    /// A parallel-limited expression: `parallel within n a`
+    ParallelLimited(Box<Expr>, Box<Expr>),
     /// Parentheses: `(a)`.
     Paren(Box<Expr>),
     /// A path: `a` or `a.b`.
@@ -964,6 +968,10 @@ impl Display for ExprKind {
             ExprKind::Interpolate(components) => display_interpolate(indent, components)?,
             ExprKind::Lambda(kind, param, expr) => display_lambda(indent, *kind, param, expr)?,
             ExprKind::Lit(lit) => write!(indent, "Lit: {lit}")?,
+            ExprKind::Parallel(e) => write!(indent, "Parallel: {e}")?,
+            ExprKind::ParallelLimited(limit, body) => {
+                write!(indent, "ParallelLimited: {limit} {body}")?;
+            }
             ExprKind::Paren(e) => write!(indent, "Paren: {e}")?,
             ExprKind::Path(p) => write!(indent, "Path: {p}")?,
             ExprKind::Range(start, step, end) => {
diff --git a/source/compiler/qsc_ast/src/mut_visit.rs b/source/compiler/qsc_ast/src/mut_visit.rs
index ce80b9ca87..f2c11a5450 100644
--- a/source/compiler/qsc_ast/src/mut_visit.rs
+++ b/source/compiler/qsc_ast/src/mut_visit.rs
@@ -362,9 +362,16 @@ pub fn walk_expr(vis: &mut impl MutVisitor, expr: &mut Expr) {
             vis.visit_pat(pat);
             vis.visit_expr(expr);
         }
-        ExprKind::Paren(expr) | ExprKind::Return(expr) | ExprKind::UnOp(_, expr) => {
+        ExprKind::Parallel(expr)
+        | ExprKind::Paren(expr)
+        | ExprKind::Return(expr)
+        | ExprKind::UnOp(_, expr) => {
             vis.visit_expr(expr);
         }
+        ExprKind::ParallelLimited(limit, body) => {
+            vis.visit_expr(limit);
+            vis.visit_expr(body);
+        }
         ExprKind::Path(path) => vis.visit_path_kind(path),
         ExprKind::Range(start, step, end) => {
             for s in start.iter_mut() {
diff --git a/source/compiler/qsc_ast/src/visit.rs b/source/compiler/qsc_ast/src/visit.rs
index f123b320a0..9e2149f000 100644
--- a/source/compiler/qsc_ast/src/visit.rs
+++ b/source/compiler/qsc_ast/src/visit.rs
@@ -334,9 +334,16 @@ pub fn walk_expr<'a>(vis: &mut impl Visitor<'a>, expr: &'a Expr) {
             vis.visit_pat(pat);
             vis.visit_expr(expr);
         }
-        ExprKind::Paren(expr) | ExprKind::Return(expr) | ExprKind::UnOp(_, expr) => {
+        ExprKind::Parallel(expr)
+        | ExprKind::Paren(expr)
+        | ExprKind::Return(expr)
+        | ExprKind::UnOp(_, expr) => {
             vis.visit_expr(expr);
         }
+        ExprKind::ParallelLimited(limit, body) => {
+            vis.visit_expr(limit);
+            vis.visit_expr(body);
+        }
         ExprKind::Path(path) => vis.visit_path_kind(path),
         ExprKind::Range(start, step, end) => {
             if let Some(s) = start.as_ref() {
diff --git a/source/compiler/qsc_codegen/src/qsharp.rs b/source/compiler/qsc_codegen/src/qsharp.rs
index 71eee20519..3abe2c0efd 100644
--- a/source/compiler/qsc_codegen/src/qsharp.rs
+++ b/source/compiler/qsc_codegen/src/qsharp.rs
@@ -540,6 +540,16 @@ impl<W: Write> Visitor<'_> for QSharpGen<W> {
                 }
                 self.visit_expr(expr);
             }
+            ExprKind::Parallel(expr) => {
+                self.write("parallel ");
+                self.visit_expr(expr);
+            }
+            ExprKind::ParallelLimited(limit, body) => {
+                self.write("parallel within ");
+                self.visit_expr(limit);
+                self.write(" ");
+                self.visit_expr(body);
+            }
             ExprKind::Paren(expr) => {
                 self.write("(");
                 self.visit_expr(expr);
diff --git a/source/compiler/qsc_eval/src/lib.rs b/source/compiler/qsc_eval/src/lib.rs
index d16ab88a08..4f2a1ccd20 100644
--- a/source/compiler/qsc_eval/src/lib.rs
+++ b/source/compiler/qsc_eval/src/lib.rs
@@ -48,6 +48,8 @@ use qsc_lowerer::map_fir_package_to_hir;
 use rand::{SeedableRng, rngs::StdRng};
 use rustc_hash::{FxHashMap, FxHashSet};
 use std::array;
+use std::collections::VecDeque;
+use std::mem::take;
 use std::{
     cell::RefCell,
     fmt::{self, Display, Formatter},
@@ -562,10 +564,6 @@ impl Env {
     pub fn track_qubit(&mut self, qubit: Rc<Qubit>) {
         self.qubits.insert(qubit);
     }
-
-    pub fn release_qubit(&mut self, qubit: &Rc<Qubit>) {
-        self.qubits.remove(qubit);
-    }
 }
 
 #[derive(Default)]
@@ -602,6 +600,7 @@ pub struct State {
     error_behavior: ErrorBehavior,
     last_error: Option<(Error, Vec<Frame>)>,
     exec_graph_config: ExecGraphConfig,
+    delayed_release_qubits: DelayedQubitReleaseStack,
 }
 
 impl State {
@@ -634,6 +633,7 @@ impl State {
             error_behavior,
             last_error: None,
             exec_graph_config,
+            delayed_release_qubits: DelayedQubitReleaseStack::default(),
         }
     }
 
@@ -781,17 +781,7 @@ impl State {
                     self.idx += 1;
                     match self.eval_expr(env, sim, globals, out, *expr) {
                         Ok(()) => continue,
-                        Err(e) => {
-                            if self.error_behavior == ErrorBehavior::StopOnError {
-                                let error_str = e.to_string();
-                                self.set_last_error(e, self.capture_stack());
-                                // Clear the execution graph stack to indicate that execution has failed.
-                                // This will prevent further execution steps.
-                                self.exec_graph_stack.clear();
-                                return Ok(StepResult::Fail(error_str));
-                            }
-                            return Err((e, self.capture_stack()));
-                        }
+                        Err(e) => return self.handle_error(e),
                     }
                 }
                 Some(ExecGraphNode::Jump(idx)) => {
@@ -831,6 +821,63 @@ impl State {
                     env.leave_scope();
                     continue;
                 }
+                Some(ExecGraphNode::ParStart(has_limit)) => {
+                    let limit = if *has_limit {
+                        let limit_val = self.take_val_register().unwrap_int();
+                        if limit_val < 0 {
+                            let package = map_fir_package_to_hir(self.package);
+                            return self.handle_error(Error::InvalidNegativeInt(
+                                limit_val,
+                                PackageSpan {
+                                    package,
+                                    span: self.current_span,
+                                },
+                            ));
+                        }
+                        #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
+                        Some(limit_val as usize)
+                    } else {
+                        None
+                    };
+                    self.delayed_release_qubits.add_layer(limit);
+                    self.idx += 1;
+                    continue;
+                }
+                Some(ExecGraphNode::ParEnd) => {
+                    // After finishing all parallel sections, we should check for any delayed qubit releases that need to be performed.
+                    let call_stack = self.capture_stack_if_trace_enabled(sim);
+                    for qubit in self.delayed_release_qubits.remove_layer() {
+                        env.qubits.remove(&qubit);
+                        let is_borrowed = self.dirty_qubits.remove(&qubit.0);
+                        match (
+                            sim.qubit_release(qubit.0, &call_stack).map_err(|e| {
+                                let package_span = PackageSpan {
+                                    package: map_fir_package_to_hir(self.package),
+                                    span: self.current_span,
+                                };
+
+                                Error::SimulationError(e, package_span)
+                            }),
+                            is_borrowed,
+                        ) {
+                            (Ok(true), _) | (Ok(_), true) => {}
+                            (Ok(false), false) => {
+                                let package_span = PackageSpan {
+                                    package: map_fir_package_to_hir(self.package),
+                                    span: self.current_span,
+                                };
+
+                                return self.handle_error(Error::ReleasedQubitNotZero(
+                                    qubit.0,
+                                    package_span,
+                                ));
+                            }
+                            (Err(e), _) => return self.handle_error(e),
+                        }
+                    }
+                    self.idx += 1;
+                    continue;
+                }
                 Some(ExecGraphNode::Debug(dbg_node)) => match dbg_node {
                     ExecGraphDebugNode::PushScope => {
                         self.push_scope(env);
@@ -907,6 +954,19 @@ impl State {
         Ok(StepResult::Return(self.get_result()))
     }
 
+    fn handle_error(&mut self, error: Error) -> Result<StepResult, (Error, Vec<Frame>)> {
+        if self.error_behavior == ErrorBehavior::StopOnError {
+            let error_str = error.to_string();
+            self.set_last_error(error, self.capture_stack());
+            // Clear the execution graph stack to indicate that execution has failed.
+            // This will prevent further execution steps.
+            self.exec_graph_stack.clear();
+            Ok(StepResult::Fail(error_str))
+        } else {
+            Err((error, self.capture_stack()))
+        }
+    }
+
     fn check_for_break(
         &self,
         breakpoints: &[StmtId],
@@ -965,7 +1025,7 @@ impl State {
         self.val_register.take().unwrap_or_else(Value::unit)
     }
 
-    #[allow(clippy::similar_names)]
+    #[allow(clippy::similar_names, clippy::too_many_lines)]
     fn eval_expr<B: Backend>(
         &mut self,
         env: &mut Env,
@@ -1070,6 +1130,9 @@ impl State {
             ExprKind::While(..) => {
                 panic!("while expr should be handled by control flow")
             }
+            ExprKind::Parallel(..) => {
+                panic!("parallel expr should be handled by control flow")
+            }
         }
 
         Ok(())
@@ -1335,28 +1398,43 @@ impl State {
         let name = &callee.name.name;
         let val = match name.as_ref() {
             "__quantum__rt__qubit_allocate" | "__quantum__rt__qubit_borrow" => {
-                let q = sim
-                    .qubit_allocate(&call_stack)
-                    .map_err(|e| Error::SimulationError(e, callee_span))?;
-                let q = Rc::new(Qubit(q));
-                env.track_qubit(Rc::clone(&q));
-                if let Some(counter) = &mut self.qubit_counter {
-                    counter.allocated(q.0);
-                }
-                if name.as_ref() == "__quantum__rt__qubit_borrow" {
-                    self.dirty_qubits.insert(q.0);
+                if let Some(q) = self.delayed_release_qubits.allocate_delayed_qubit() {
+                    Value::Qubit(
+                        env.qubits
+                            .get(&q)
+                            .expect("qubit should be tracked")
+                            .clone()
+                            .into(),
+                    )
+                } else {
+                    let q = sim
+                        .qubit_allocate(&call_stack)
+                        .map_err(|e| Error::SimulationError(e, callee_span))?;
+                    let q = Rc::new(Qubit(q));
+                    env.track_qubit(Rc::clone(&q));
+                    if let Some(counter) = &mut self.qubit_counter {
+                        counter.allocated(q.0);
+                    }
+                    if name.as_ref() == "__quantum__rt__qubit_borrow" {
+                        self.dirty_qubits.insert(q.0);
+                    }
+                    Value::Qubit(q.into())
                 }
-                Value::Qubit(q.into())
             }
             "__quantum__rt__qubit_release" => {
                 let qubit = arg
                     .unwrap_qubit()
                     .try_deref()
                     .ok_or(Error::QubitDoubleRelease(arg_span))?;
-                env.release_qubit(&qubit);
-                let is_zero = sim
-                    .qubit_release(qubit.0, &call_stack)
-                    .map_err(|e| Error::SimulationError(e, callee_span))?;
+                let is_zero = if self.delayed_release_qubits.delay_release_qubit(*qubit) {
+                    // If the qubit is delayed for release, we don't check if it's zero yet.
+                    // The actual release will be handled later when the parallel section ends.
+                    true
+                } else {
+                    env.qubits.remove(&qubit);
+                    sim.qubit_release(qubit.0, &call_stack)
+                        .map_err(|e| Error::SimulationError(e, callee_span))?
+                };
                 let is_borrowed = self.dirty_qubits.remove(&qubit.0);
                 if is_zero || is_borrowed {
                     Value::unit()
@@ -1866,6 +1944,149 @@ impl State {
     }
 }
 
+#[derive(Debug, Default)]
+struct DelayedQubitReleaseLayer {
+    released_qubits: Vec<Qubit>,
+    available_qubits: VecDeque<Qubit>,
+    used_qubits: FxHashSet<Qubit>,
+    limit: Option<usize>,
+    allocated: usize,
+}
+
+#[derive(Debug, Default)]
+pub struct DelayedQubitReleaseStack {
+    layers: Vec<DelayedQubitReleaseLayer>,
+}
+
+impl DelayedQubitReleaseStack {
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
+        self.layers.is_empty()
+    }
+
+    /// Add a new layer for tracking delayed qubit releases to the stack,
+    /// optionally with the specified limit on the number of qubits that are allocated
+    /// fresh before reuse begins.
+    /// To help accomodate nested delayed release and reuse, the new layer will inherit any
+    /// available qubits from the previous layer.
+    pub fn add_layer(&mut self, limit: Option<usize>) {
+        let new_layer = if let Some(DelayedQubitReleaseLayer {
+            available_qubits, ..
+        }) = self.layers.last_mut()
+        {
+            DelayedQubitReleaseLayer {
+                available_qubits: take(available_qubits),
+                limit,
+                ..Default::default()
+            }
+        } else {
+            DelayedQubitReleaseLayer {
+                limit,
+                ..Default::default()
+            }
+        };
+        self.layers.push(new_layer);
+    }
+
+    /// Remove the top layer from the stack and return any qubits that should be released.
+    /// If there is a parent layer, release of the qubits in this layer will be delayed by
+    /// into the parent, and only unused available qubits will be returned to the parent
+    /// layer's available qubits.
+    pub fn remove_layer(&mut self) -> Vec<Qubit> {
+        if let Some(DelayedQubitReleaseLayer {
+            released_qubits,
+            available_qubits,
+            used_qubits,
+            ..
+        }) = self.layers.pop()
+        {
+            if let Some(DelayedQubitReleaseLayer {
+                released_qubits: parent_released_qubits,
+                available_qubits: parent_available_qubits,
+                ..
+            }) = self.layers.last_mut()
+            {
+                // Available qubits that were never used in the current layer must have come from the parents available qubits,
+                // so we need to return those to the parent layer's available qubits.
+                let mut available_qubits: Vec<Qubit> = available_qubits.into();
+                let mut new_parent_available_qubits: Vec<Qubit> = available_qubits
+                    .extract_if(.., |q| !used_qubits.contains(q))
+                    .collect();
+                new_parent_available_qubits.sort_unstable();
+                *parent_available_qubits = new_parent_available_qubits.into();
+
+                // The remaining qubits that were released or available but used in the current layer
+                // should be added to the parent layer's delayed release qubits.
+                parent_released_qubits.extend(released_qubits);
+                parent_released_qubits.extend(available_qubits);
+                parent_released_qubits.sort_unstable();
+
+                // Since th parent layer is now responsible for releasing the qubits, we don't return any qubits to be released.
+                Vec::new()
+            } else {
+                // This is the last layer, so return all qubits managed by the layer to be released.
+                released_qubits
+                    .into_iter()
+                    .chain(available_qubits)
+                    .collect()
+            }
+        } else {
+            Vec::new()
+        }
+    }
+
+    /// Add a qubit to the list of qubits that should be released when the current layer is removed.
+    /// If there is no configured delayed release layer, the qubit will not be added to any layer and the function
+    /// returns false so the caller can handle the qubit release immediately.
+    pub fn delay_release_qubit(&mut self, qubit: Qubit) -> bool {
+        if let Some(DelayedQubitReleaseLayer {
+            released_qubits,
+            used_qubits,
+            allocated,
+            ..
+        }) = self.layers.last_mut()
+        {
+            *allocated -= 1;
+            released_qubits.push(qubit);
+            used_qubits.insert(qubit);
+            true
+        } else {
+            false
+        }
+    }
+
+    /// Allocate a qubit from the current delayed release layer, if available.
+    /// If the layer has a limit and the number of allocated and deferred release qubits
+    /// exceeds the limit, the available qubits will be replenished from the released qubits.
+    /// If there is no configured delayed release layer or no available qubits, the
+    /// function returns None so the caller can perform a fresh qubit allocation.
+    pub fn allocate_delayed_qubit(&mut self) -> Option<Qubit> {
+        if let Some(DelayedQubitReleaseLayer {
+            available_qubits,
+            released_qubits,
+            used_qubits,
+            limit,
+            allocated,
+        }) = self.layers.last_mut()
+        {
+            *allocated += 1;
+            if let Some(limit) = limit
+                && released_qubits.len() + *allocated > *limit
+            {
+                let mut qubits = take(released_qubits);
+                qubits.extend(take(available_qubits));
+                qubits.sort_unstable();
+                *available_qubits = qubits.into();
+            }
+            if let Some(qubit) = available_qubits.pop_front() {
+                used_qubits.insert(qubit);
+                return Some(qubit);
+            }
+        }
+        None
+    }
+}
+
 pub fn are_ctls_unique(ctls: &[Value], tup: &Value) -> bool {
     let mut qubits = FxHashSet::default();
     for ctl in ctls.iter().flat_map(Value::qubits) {
diff --git a/source/compiler/qsc_eval/src/tests.rs b/source/compiler/qsc_eval/src/tests.rs
index 0634e8ca46..d2824bdb19 100644
--- a/source/compiler/qsc_eval/src/tests.rs
+++ b/source/compiler/qsc_eval/src/tests.rs
@@ -105,6 +105,62 @@ fn check_expr(file: &str, expr: &str, expect: &Expect) {
     }
 }
 
+fn check_output(file: &str, expr: &str, expect: &Expect) {
+    let mut fir_lowerer = qsc_lowerer::Lowerer::new();
+    let mut core = compile::core();
+    run_core_passes(&mut core);
+    let fir_store = fir::PackageStore::new();
+    let core_fir = fir_lowerer.lower_package(&core.package, &fir_store);
+    let mut store = PackageStore::new(core);
+
+    let mut std = compile::std(&store, TargetCapabilityFlags::all());
+    assert!(std.errors.is_empty());
+    assert!(run_default_passes(store.core(), &mut std, PackageType::Lib).is_empty());
+    let std_fir = fir_lowerer.lower_package(&std.package, &fir_store);
+    let std_id = store.insert(std);
+
+    let sources = SourceMap::new([("test".into(), file.into())], Some(expr.into()));
+    let mut unit = compile(
+        &store,
+        &[(std_id, None)],
+        sources,
+        TargetCapabilityFlags::all(),
+        LanguageFeatures::default(),
+    );
+    assert!(unit.errors.is_empty(), "{:?}", unit.errors);
+    let pass_errors = run_default_passes(store.core(), &mut unit, PackageType::Lib);
+    assert!(pass_errors.is_empty(), "{pass_errors:?}");
+    let unit_fir = fir_lowerer.lower_package(&unit.package, &fir_store);
+    let entry = unit_fir.entry_exec_graph.clone();
+    let id = store.insert(unit);
+
+    let mut fir_store = fir::PackageStore::new();
+    fir_store.insert(
+        map_hir_package_to_fir(qsc_hir::hir::PackageId::CORE),
+        core_fir,
+    );
+    fir_store.insert(map_hir_package_to_fir(std_id), std_fir);
+    fir_store.insert(map_hir_package_to_fir(id), unit_fir);
+
+    let mut out = Vec::new();
+    match eval_graph(
+        entry,
+        &mut SparseSim::new(),
+        &fir_store,
+        ExecGraphConfig::NoDebug,
+        map_hir_package_to_fir(id),
+        &mut Env::default(),
+        &mut GenericReceiver::new(&mut out),
+    ) {
+        Ok(_) => expect.assert_eq(
+            std::str::from_utf8(&out)
+                .expect("output should be valid UTF-8")
+                .trim_end(),
+        ),
+        Err((err, _)) => panic!("unexpected error: {err:?}"),
+    }
+}
+
 fn check_partial_eval_stmt(
     file: &str,
     expr: &str,
@@ -4227,3 +4283,175 @@ fn partial_eval_stmt_function_calls_from_library() {
         &expect!["3"],
     );
 }
+
+// Without parallel, released qubits have their IDs recycled on subsequent allocations.
+#[test]
+fn parallel_baseline_qubit_ids_recycled_without_parallel() {
+    check_output(
+        "",
+        indoc! {r#"{
+            // q1 and q2 are allocated and released inside the inner block
+            { use q1 = Qubit(); Message($"{q1}"); use q2 = Qubit(); Message($"{q2}"); }
+            // q1 and q2 are now released; next allocations reuse the same IDs
+            use q3 = Qubit();
+            Message($"{q3}");
+            use q4 = Qubit();
+            Message($"{q4}");
+        }"#},
+        &expect!["Qubit0\nQubit1\nQubit0\nQubit1"],
+    );
+}
+
+// Inside a parallel expression qubits are allocated fresh even after a sibling is released,
+// because all releases are deferred until the parallel block ends.
+// This mirrors the baseline test but with `parallel` wrapping the outermost block.
+#[test]
+fn parallel_defers_qubit_release() {
+    check_output(
+        "",
+        indoc! {r#"parallel {
+            // q1 and q2 are allocated and released inside the inner block
+            { use q1 = Qubit(); Message($"{q1}"); use q2 = Qubit(); Message($"{q2}"); }
+            // inside parallel their release is deferred, so q3 and q4 get fresh ids
+            use q3 = Qubit();
+            Message($"{q3}");
+            use q4 = Qubit();
+            Message($"{q4}");
+        }"#},
+        &expect!["Qubit0\nQubit1\nQubit2\nQubit3"],
+    );
+}
+
+// After the outer parallel block ends its deferred releases become available, so a
+// second parallel block can reuse those qubit IDs.
+#[test]
+fn parallel_releases_available_after_block_ends() {
+    check_output(
+        "",
+        indoc! {r#"{
+            parallel {
+                use q = Qubit();
+                Message($"first:{q}");
+            }
+            parallel {
+                use q = Qubit();
+                Message($"second:{q}");
+            }
+        }"#},
+        &expect!["first:Qubit0\nsecond:Qubit0"],
+    );
+}
+
+// In nested parallel expressions the inner block's qubits are not available to the outer
+// block and defferred until the outer parallel finishes.
+#[test]
+fn parallel_nested_defers_inner_releases_to_outer() {
+    check_output(
+        "",
+        indoc! {r#"parallel {
+            use outer = Qubit();
+            Message($"outer:{outer}");
+            parallel {
+                use inner1 = Qubit();
+                Message($"inner1:{inner1}");
+                use inner2 = Qubit();
+                Message($"inner2:{inner2}");
+            }
+            // inner qubits are now deferred in the outer layer, so a fresh id is allocated
+            use outer2 = Qubit();
+            Message($"outer2:{outer2}");
+        }"#},
+        &expect!["outer:Qubit0\ninner1:Qubit1\ninner2:Qubit2\nouter2:Qubit3"],
+    );
+}
+
+// parallel within N defers qubit release but once N qubits have been deferred the pool
+// is replenished and IDs are reused.
+#[test]
+fn parallel_within_reuses_ids_after_limit() {
+    check_output(
+        "",
+        indoc! {r#"parallel within 2 {
+            // Each nested block releases its qubit before the next allocation.
+            { use q1 = Qubit(); Message($"{q1}"); }
+            { use q2 = Qubit(); Message($"{q2}"); }
+            // 2 qubits have now been deferred; limit reached so q3 and q4 reuse ids
+            { use q3 = Qubit(); Message($"{q3}"); }
+            { use q4 = Qubit(); Message($"{q4}"); }
+        }"#},
+        &expect!["Qubit0\nQubit1\nQubit0\nQubit1"],
+    );
+}
+
+// Nested parallel within: the outer layer has a limit of 6 and the inner has a limit of 2.
+// Each of the 3 iterations allocates a qubit outside the inner parallel (tracked by the
+// outer layer) plus 4 qubits inside the inner parallel (2 fresh, 2 reused via inner limit).
+// After 2 iterations the outer layer has accumulated 6+ deferred qubits, so iteration 3
+// triggers outer replenishment and reuses IDs from the outer pool (Qubit0, 1, 2 reappear).
+#[test]
+fn parallel_within_nested_defers_through_outer_limit() {
+    check_output(
+        "",
+        indoc! {r#"parallel within 6 { for _ in 0..2 {
+            { use q0 = Qubit(); Message($"{q0}"); }
+            parallel within 2 {
+                { use q1 = Qubit(); Message($"{q1}"); }
+                { use q2 = Qubit(); Message($"{q2}"); }
+                { use q3 = Qubit(); Message($"{q3}"); }
+                { use q4 = Qubit(); Message($"{q4}"); }
+            }
+        } }"#},
+        &expect![[r#"
+            Qubit0
+            Qubit1
+            Qubit2
+            Qubit1
+            Qubit2
+            Qubit3
+            Qubit4
+            Qubit5
+            Qubit4
+            Qubit5
+            Qubit0
+            Qubit1
+            Qubit2
+            Qubit1
+            Qubit2"#]],
+    );
+}
+
+// Same as above but the outer parallel has no limit. The inner parallel within 2 still
+// reuses within each iteration, but the outer unlimited layer never triggers
+// replenishment so every iteration allocates fresh IDs at the outer level (Qubit6, 7, 8
+// in iteration 3 instead of reusing Qubit0, 1, 2).
+#[test]
+fn parallel_nested_unlimited_outer_defers_all() {
+    check_output(
+        "",
+        indoc! {r#"parallel { for _ in 0..2 {
+            { use q0 = Qubit(); Message($"{q0}"); }
+            parallel within 2 {
+                { use q1 = Qubit(); Message($"{q1}"); }
+                { use q2 = Qubit(); Message($"{q2}"); }
+                { use q3 = Qubit(); Message($"{q3}"); }
+                { use q4 = Qubit(); Message($"{q4}"); }
+            }
+        } }"#},
+        &expect![[r#"
+            Qubit0
+            Qubit1
+            Qubit2
+            Qubit1
+            Qubit2
+            Qubit3
+            Qubit4
+            Qubit5
+            Qubit4
+            Qubit5
+            Qubit6
+            Qubit7
+            Qubit8
+            Qubit7
+            Qubit8"#]],
+    );
+}
diff --git a/source/compiler/qsc_eval/src/val.rs b/source/compiler/qsc_eval/src/val.rs
index b1c392d2a8..35c509849d 100644
--- a/source/compiler/qsc_eval/src/val.rs
+++ b/source/compiler/qsc_eval/src/val.rs
@@ -152,7 +152,7 @@ impl QubitRef {
     }
 }
 
-#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
+#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, PartialOrd, Ord)]
 pub struct Qubit(pub usize);
 
 #[derive(Clone, Copy, Debug, PartialEq)]
diff --git a/source/compiler/qsc_fir/src/fir.rs b/source/compiler/qsc_fir/src/fir.rs
index b73482c987..a7b6da9e92 100644
--- a/source/compiler/qsc_fir/src/fir.rs
+++ b/source/compiler/qsc_fir/src/fir.rs
@@ -1036,6 +1036,10 @@ pub enum ExecGraphNode {
     Unit,
     /// The end of the control flow graph.
     Ret,
+    /// The start of a parallel region, with a Boolean indicating whether it has a limit.
+    ParStart(bool),
+    /// The end of a parallel region
+    ParEnd,
     /// A node only to be executed in debug mode.
     Debug(ExecGraphDebugNode),
 }
@@ -1208,6 +1212,8 @@ pub enum ExprKind {
     If(ExprId, ExprId, Option<ExprId>),
     /// An index accessor: `a[b]`.
     Index(ExprId, ExprId),
+    /// A parallel expression: `parallel a` or `parallel within n a`.
+    Parallel(Option<ExprId>, ExprId),
     /// A literal.
     Lit(Lit),
     /// A range: `start..step..end`, `start..end`, `start...`, `...end`, or `...`.
@@ -1255,6 +1261,13 @@ impl Display for ExprKind {
             ExprKind::Hole => write!(indent, "Hole")?,
             ExprKind::If(cond, body, els) => display_if(indent, *cond, *body, *els)?,
             ExprKind::Index(array, index) => display_index(indent, *array, *index)?,
+            ExprKind::Parallel(limit, e) => {
+                if let Some(limit) = limit {
+                    write!(indent, "Parallel({limit}): {e}")?;
+                } else {
+                    write!(indent, "Parallel: {e}")?;
+                }
+            }
             ExprKind::Lit(lit) => write!(indent, "Lit: {lit}")?,
             ExprKind::Range(start, step, end) => display_range(indent, *start, *step, *end)?,
             ExprKind::Return(e) => write!(indent, "Return: {e}")?,
diff --git a/source/compiler/qsc_fir/src/mut_visit.rs b/source/compiler/qsc_fir/src/mut_visit.rs
index 7cd88f5a4e..ca1e6b1658 100644
--- a/source/compiler/qsc_fir/src/mut_visit.rs
+++ b/source/compiler/qsc_fir/src/mut_visit.rs
@@ -171,6 +171,12 @@ pub fn walk_expr<'a>(vis: &mut impl MutVisitor<'a>, expr: ExprId) {
             vis.visit_expr(*array);
             vis.visit_expr(*index);
         }
+        ExprKind::Parallel(limit, expr) => {
+            if let Some(limit) = limit {
+                vis.visit_expr(*limit);
+            }
+            vis.visit_expr(*expr);
+        }
         ExprKind::Return(expr) | ExprKind::UnOp(_, expr) => {
             vis.visit_expr(*expr);
         }
diff --git a/source/compiler/qsc_fir/src/visit.rs b/source/compiler/qsc_fir/src/visit.rs
index b0461a3edf..94a8c7071d 100644
--- a/source/compiler/qsc_fir/src/visit.rs
+++ b/source/compiler/qsc_fir/src/visit.rs
@@ -172,6 +172,12 @@ pub fn walk_expr<'a>(vis: &mut impl Visitor<'a>, expr: ExprId) {
             vis.visit_expr(*array);
             vis.visit_expr(*index);
         }
+        ExprKind::Parallel(limit, expr) => {
+            if let Some(limit) = limit {
+                vis.visit_expr(*limit);
+            }
+            vis.visit_expr(*expr);
+        }
         ExprKind::Return(expr) | ExprKind::UnOp(_, expr) => {
             vis.visit_expr(*expr);
         }
diff --git a/source/compiler/qsc_frontend/src/lower.rs b/source/compiler/qsc_frontend/src/lower.rs
index aaccb920d8..de616af5f4 100644
--- a/source/compiler/qsc_frontend/src/lower.rs
+++ b/source/compiler/qsc_frontend/src/lower.rs
@@ -797,6 +797,13 @@ impl With<'_> {
                 self.lower_lambda(lambda, expr.span)
             }
             ast::ExprKind::Lit(lit) => self.lower_lit(lit),
+            ast::ExprKind::Parallel(expr) => {
+                hir::ExprKind::Parallel(None, Box::new(self.lower_expr(expr)))
+            }
+            ast::ExprKind::ParallelLimited(limit, body) => hir::ExprKind::Parallel(
+                Some(Box::new(self.lower_expr(limit))),
+                Box::new(self.lower_expr(body)),
+            ),
             ast::ExprKind::Paren(_) => unreachable!("parentheses should be removed earlier"),
             ast::ExprKind::Path(PathKind::Ok(path)) => {
                 let args = self
diff --git a/source/compiler/qsc_frontend/src/typeck/rules.rs b/source/compiler/qsc_frontend/src/typeck/rules.rs
index 677d0cf0f2..3e18091b78 100644
--- a/source/compiler/qsc_frontend/src/typeck/rules.rs
+++ b/source/compiler/qsc_frontend/src/typeck/rules.rs
@@ -455,7 +455,14 @@ impl<'a> Context<'a> {
                 Lit::Result(_) => converge(Ty::Prim(Prim::Result)),
                 Lit::String(_) => converge(Ty::Prim(Prim::String)),
             },
-            ExprKind::Paren(expr) => self.infer_expr(expr),
+            ExprKind::Paren(expr) | ExprKind::Parallel(expr) => self.infer_expr(expr),
+            ExprKind::ParallelLimited(limit, body) => {
+                let limit_span = limit.span;
+                let limit = self.infer_expr(limit);
+                self.inferrer.eq(limit_span, Ty::Prim(Prim::Int), limit.ty);
+                let body = self.infer_expr(body);
+                body.diverge_if(limit.diverges)
+            }
             ExprKind::Path(path) => self.infer_path_kind(expr, path),
             ExprKind::Range(start, step, end) => {
                 let mut diverges = false;
diff --git a/source/compiler/qsc_hir/src/hir.rs b/source/compiler/qsc_hir/src/hir.rs
index 81b2a371a8..936a5bb23b 100644
--- a/source/compiler/qsc_hir/src/hir.rs
+++ b/source/compiler/qsc_hir/src/hir.rs
@@ -708,6 +708,8 @@ pub enum ExprKind {
     Index(Box<Expr>, Box<Expr>),
     /// A literal.
     Lit(Lit),
+    /// A parallel expression: `parallel a` or `parallel within n a`.
+    Parallel(Option<Box<Expr>>, Box<Expr>),
     /// A range: `start..step..end`, `start..end`, `start...`, `...end`, or `...`.
     Range(Option<Box<Expr>>, Option<Box<Expr>>, Option<Box<Expr>>),
     /// A repeat-until loop with an optional fixup: `repeat { ... } until a fixup { ... }`.
@@ -762,6 +764,13 @@ impl Display for ExprKind {
             ExprKind::If(cond, body, els) => display_if(indent, cond, body, els.as_deref())?,
             ExprKind::Index(array, index) => display_index(indent, array, index)?,
             ExprKind::Lit(lit) => write!(indent, "Lit: {lit}")?,
+            ExprKind::Parallel(limit, expr) => {
+                if let Some(limit) = limit {
+                    write!(indent, "Parallel({limit}): {expr}")?;
+                } else {
+                    write!(indent, "Parallel: {expr}")?;
+                }
+            }
             ExprKind::Range(start, step, end) => {
                 display_range(indent, start.as_deref(), step.as_deref(), end.as_deref())?;
             }
diff --git a/source/compiler/qsc_hir/src/mut_visit.rs b/source/compiler/qsc_hir/src/mut_visit.rs
index 50a8fc4b56..d80e18574d 100644
--- a/source/compiler/qsc_hir/src/mut_visit.rs
+++ b/source/compiler/qsc_hir/src/mut_visit.rs
@@ -130,6 +130,7 @@ pub fn walk_stmt(vis: &mut impl MutVisitor, stmt: &mut Stmt) {
     }
 }
 
+#[allow(clippy::too_many_lines)]
 pub fn walk_expr(vis: &mut impl MutVisitor, expr: &mut Expr) {
     vis.visit_span(&mut expr.span);
 
@@ -181,6 +182,12 @@ pub fn walk_expr(vis: &mut impl MutVisitor, expr: &mut Expr) {
             vis.visit_expr(array);
             vis.visit_expr(index);
         }
+        ExprKind::Parallel(limit, expr) => {
+            if let Some(limit) = limit {
+                vis.visit_expr(limit);
+            }
+            vis.visit_expr(expr);
+        }
         ExprKind::Return(expr) | ExprKind::UnOp(_, expr) => {
             vis.visit_expr(expr);
         }
diff --git a/source/compiler/qsc_hir/src/visit.rs b/source/compiler/qsc_hir/src/visit.rs
index 510c79dece..1942c4752a 100644
--- a/source/compiler/qsc_hir/src/visit.rs
+++ b/source/compiler/qsc_hir/src/visit.rs
@@ -113,6 +113,7 @@ pub fn walk_stmt<'a>(vis: &mut impl Visitor<'a>, stmt: &'a Stmt) {
     }
 }
 
+#[allow(clippy::too_many_lines)]
 pub fn walk_expr<'a>(vis: &mut impl Visitor<'a>, expr: &'a Expr) {
     match &expr.kind {
         ExprKind::Array(exprs) => exprs.iter().for_each(|e| vis.visit_expr(e)),
@@ -162,6 +163,12 @@ pub fn walk_expr<'a>(vis: &mut impl Visitor<'a>, expr: &'a Expr) {
             vis.visit_expr(array);
             vis.visit_expr(index);
         }
+        ExprKind::Parallel(limit, expr) => {
+            if let Some(limit) = limit {
+                vis.visit_expr(limit);
+            }
+            vis.visit_expr(expr);
+        }
         ExprKind::Return(expr) | ExprKind::UnOp(_, expr) => {
             vis.visit_expr(expr);
         }
diff --git a/source/compiler/qsc_lowerer/src/lib.rs b/source/compiler/qsc_lowerer/src/lib.rs
index 10b035d4d1..4d07c04cf4 100644
--- a/source/compiler/qsc_lowerer/src/lib.rs
+++ b/source/compiler/qsc_lowerer/src/lib.rs
@@ -682,6 +682,15 @@ impl Lowerer {
                 let index = self.lower_expr(index);
                 fir::ExprKind::Index(container, index)
             }
+            hir::ExprKind::Parallel(limit, expr) => {
+                let limit = limit.as_ref().map(|l| self.lower_expr(l));
+
+                self.exec_graph
+                    .push(ExecGraphNode::ParStart(limit.is_some()));
+                let expr = self.lower_expr(expr);
+                self.exec_graph.push(ExecGraphNode::ParEnd);
+                fir::ExprKind::Parallel(limit, expr)
+            }
             hir::ExprKind::Lit(lit) => lower_lit(lit),
             hir::ExprKind::Range(start, step, end) => {
                 let start = start.as_ref().map(|s| self.lower_expr(s));
@@ -799,7 +808,8 @@ impl Lowerer {
             | fir::ExprKind::Block(..)
             | fir::ExprKind::If(..)
             | fir::ExprKind::Return(..)
-            | fir::ExprKind::While(..) => {}
+            | fir::ExprKind::While(..)
+            | fir::ExprKind::Parallel(..) => {}
 
             fir::ExprKind::Assign(..)
             | fir::ExprKind::AssignField(..)
diff --git a/source/compiler/qsc_parse/src/completion/word_kinds.rs b/source/compiler/qsc_parse/src/completion/word_kinds.rs
index 1d04a59220..2a4c05383c 100644
--- a/source/compiler/qsc_parse/src/completion/word_kinds.rs
+++ b/source/compiler/qsc_parse/src/completion/word_kinds.rs
@@ -115,6 +115,7 @@ bitflags! {
         const Open = keyword_bit(Keyword::Open);
         const Operation = keyword_bit(Keyword::Operation);
         const Or = keyword_bit(Keyword::Or);
+        const Parallel = keyword_bit(Keyword::Parallel);
         const PauliI = keyword_bit(Keyword::PauliI);
         const PauliX = keyword_bit(Keyword::PauliX);
         const PauliY = keyword_bit(Keyword::PauliY);
diff --git a/source/compiler/qsc_parse/src/expr.rs b/source/compiler/qsc_parse/src/expr.rs
index 6dd3741b9c..2f167d5e7e 100644
--- a/source/compiler/qsc_parse/src/expr.rs
+++ b/source/compiler/qsc_parse/src/expr.rs
@@ -101,7 +101,7 @@ pub(super) fn is_stmt_final(kind: &ExprKind) -> bool {
             | ExprKind::If(..)
             | ExprKind::Repeat(..)
             | ExprKind::While(..)
-    )
+    ) || matches!(kind, ExprKind::Parallel(expr) | ExprKind::ParallelLimited(.., expr) if is_stmt_final(&expr.kind))
 }
 
 fn expr_op(s: &mut ParserContext, context: OpContext) -> Result<Box<Expr>> {
@@ -174,6 +174,7 @@ fn expr_op(s: &mut ParserContext, context: OpContext) -> Result<Box<Expr>> {
     Ok(lhs)
 }
 
+#[allow(clippy::too_many_lines)]
 fn expr_base(s: &mut ParserContext) -> Result<Box<Expr>> {
     let lo = s.peek().span.lo;
     let kind = if token(s, TokenKind::Open(Delim::Paren)).is_ok() {
@@ -229,6 +230,15 @@ fn expr_base(s: &mut ParserContext) -> Result<Box<Expr>> {
         Ok(Box::new(ExprKind::Repeat(body, cond, fixup)))
     } else if token(s, TokenKind::Keyword(Keyword::Return)).is_ok() {
         Ok(Box::new(ExprKind::Return(expr(s)?)))
+    } else if token(s, TokenKind::Keyword(Keyword::Parallel)).is_ok() {
+        if s.peek().kind == TokenKind::Keyword(Keyword::Within) {
+            s.advance();
+            let limit = expr(s)?;
+            let body = expr(s)?;
+            Ok(Box::new(ExprKind::ParallelLimited(limit, body)))
+        } else {
+            Ok(Box::new(ExprKind::Parallel(expr(s)?)))
+        }
     } else if !s.contains_language_feature(LanguageFeatures::V2PreviewSyntax)
         && token(s, TokenKind::Keyword(Keyword::Set)).is_ok()
     {
diff --git a/source/compiler/qsc_parse/src/expr/tests.rs b/source/compiler/qsc_parse/src/expr/tests.rs
index 7786259ad3..29fd4d7d3d 100644
--- a/source/compiler/qsc_parse/src/expr/tests.rs
+++ b/source/compiler/qsc_parse/src/expr/tests.rs
@@ -3117,3 +3117,168 @@ fn call_with_incomplete_struct_arg() {
             ]"#]],
     );
 }
+
+#[test]
+fn parallel_expr() {
+    check(
+        expr,
+        "parallel x",
+        &expect![[
+            r#"Expr _id_ [0-10]: Parallel: Expr _id_ [9-10]: Path: Path _id_ [9-10] (Ident _id_ [9-10] "x")"#
+        ]],
+    );
+}
+
+#[test]
+fn parallel_with_block_body() {
+    check(
+        expr,
+        "parallel { x }",
+        &expect![[r#"
+            Expr _id_ [0-14]: Parallel: Expr _id_ [9-14]: Expr Block: Block _id_ [9-14]:
+                Stmt _id_ [11-12]: Expr: Expr _id_ [11-12]: Path: Path _id_ [11-12] (Ident _id_ [11-12] "x")"#]],
+    );
+}
+
+#[test]
+fn parallel_with_block_body_multiple_stmts() {
+    check(
+        expr,
+        "parallel { let a = 1; a }",
+        &expect![[r#"
+            Expr _id_ [0-25]: Parallel: Expr _id_ [9-25]: Expr Block: Block _id_ [9-25]:
+                Stmt _id_ [11-21]: Local (Immutable):
+                    Pat _id_ [15-16]: Bind:
+                        Ident _id_ [15-16] "a"
+                    Expr _id_ [19-20]: Lit: Int(1)
+                Stmt _id_ [22-23]: Expr: Expr _id_ [22-23]: Path: Path _id_ [22-23] (Ident _id_ [22-23] "a")"#]],
+    );
+}
+
+#[test]
+fn parallel_nested() {
+    check(
+        expr,
+        "parallel { parallel x }",
+        &expect![[r#"
+            Expr _id_ [0-23]: Parallel: Expr _id_ [9-23]: Expr Block: Block _id_ [9-23]:
+                Stmt _id_ [11-21]: Expr: Expr _id_ [11-21]: Parallel: Expr _id_ [20-21]: Path: Path _id_ [20-21] (Ident _id_ [20-21] "x")"#]],
+    );
+}
+
+#[test]
+fn parallel_limited_expr() {
+    check(
+        expr,
+        "parallel within 4 { }",
+        &expect![[r#"
+            Expr _id_ [0-21]: ParallelLimited: Expr _id_ [16-17]: Lit: Int(4) Expr _id_ [18-21]: Expr Block: Block _id_ [18-21]: <empty>"#]],
+    );
+}
+
+#[test]
+fn parallel_limited_with_path_body() {
+    check(
+        expr,
+        "parallel within 2 x",
+        &expect![[
+            r#"Expr _id_ [0-19]: ParallelLimited: Expr _id_ [16-17]: Lit: Int(2) Expr _id_ [18-19]: Path: Path _id_ [18-19] (Ident _id_ [18-19] "x")"#
+        ]],
+    );
+}
+
+#[test]
+fn parallel_limited_with_block_body() {
+    check(
+        expr,
+        "parallel within 3 { x }",
+        &expect![[r#"
+            Expr _id_ [0-23]: ParallelLimited: Expr _id_ [16-17]: Lit: Int(3) Expr _id_ [18-23]: Expr Block: Block _id_ [18-23]:
+                Stmt _id_ [20-21]: Expr: Expr _id_ [20-21]: Path: Path _id_ [20-21] (Ident _id_ [20-21] "x")"#]],
+    );
+}
+
+#[test]
+fn parallel_limited_with_computed_limit() {
+    check(
+        expr,
+        "parallel within (2 + 3) x",
+        &expect![[r#"
+            Expr _id_ [0-25]: ParallelLimited: Expr _id_ [16-23]: Paren: Expr _id_ [17-22]: BinOp (Add):
+                Expr _id_ [17-18]: Lit: Int(2)
+                Expr _id_ [21-22]: Lit: Int(3) Expr _id_ [24-25]: Path: Path _id_ [24-25] (Ident _id_ [24-25] "x")"#]],
+    );
+}
+
+#[test]
+fn parallel_limited_nested_in_parallel() {
+    check(
+        expr,
+        "parallel { parallel within 2 x }",
+        &expect![[r#"
+            Expr _id_ [0-32]: Parallel: Expr _id_ [9-32]: Expr Block: Block _id_ [9-32]:
+                Stmt _id_ [11-30]: Expr: Expr _id_ [11-30]: ParallelLimited: Expr _id_ [27-28]: Lit: Int(2) Expr _id_ [29-30]: Path: Path _id_ [29-30] (Ident _id_ [29-30] "x")"#]],
+    );
+}
+
+#[test]
+fn parallel_within_without_limit_parses_within_as_limit() {
+    // `parallel within { }` — the parser consumes `{ }` as the limit expression (a block),
+    // then fails to find a body expression since the input ends.
+    check(
+        expr,
+        "parallel within { }",
+        &expect![[r#"
+            Error(
+                Rule(
+                    "expression",
+                    Eof,
+                    Span {
+                        lo: 19,
+                        hi: 19,
+                    },
+                ),
+            )
+        "#]],
+    );
+}
+
+#[test]
+fn parallel_within_apply_is_error() {
+    // `parallel within {} apply {}` — the parser sees `parallel within` and tries to parse
+    // a ParallelLimited. It consumes `{}` as the limit and then `apply` as the start of the
+    // body expression, but `apply` is not a valid expression keyword, producing an error.
+    check(
+        expr,
+        "parallel within {} apply {}",
+        &expect![[r#"
+            Error(
+                Rule(
+                    "expression",
+                    Keyword(
+                        Apply,
+                    ),
+                    Span {
+                        lo: 19,
+                        hi: 24,
+                    },
+                ),
+            )
+        "#]],
+    );
+}
+
+#[test]
+fn parallel_with_paren_within_apply_is_conjugate() {
+    // `parallel (within {} apply {})` — the parser sees `parallel` (without an immediately
+    // following `within`), so it parses `parallel <expr>` where <expr> is the parenthesized
+    // within/apply (Conjugate) expression.
+    check(
+        expr,
+        "parallel (within {} apply {})",
+        &expect![[r#"
+            Expr _id_ [0-29]: Parallel: Expr _id_ [9-29]: Paren: Expr _id_ [10-28]: Conjugate:
+                Block _id_ [17-19]: <empty>
+                Block _id_ [26-28]: <empty>"#]],
+    );
+}
diff --git a/source/compiler/qsc_parse/src/keyword.rs b/source/compiler/qsc_parse/src/keyword.rs
index e87c0ef6a9..89e526cdff 100644
--- a/source/compiler/qsc_parse/src/keyword.rs
+++ b/source/compiler/qsc_parse/src/keyword.rs
@@ -47,6 +47,7 @@ pub enum Keyword {
     Open,
     Operation,
     Or,
+    Parallel,
     PauliI,
     PauliX,
     PauliY,
@@ -106,6 +107,7 @@ impl Keyword {
             Self::Open => "open",
             Self::Operation => "operation",
             Self::Or => "or",
+            Self::Parallel => "parallel",
             Self::PauliI => "PauliI",
             Self::PauliX => "PauliX",
             Self::PauliY => "PauliY",
@@ -197,6 +199,7 @@ impl FromStr for Keyword {
             // in the standard library for priority order.
             "PauliY" => Ok(Self::PauliY),
             "borrow" => Ok(Self::Borrow),
+            "parallel" => Ok(Self::Parallel),
             "_" => Ok(Self::Underscore),
             _ => Err(()),
         }
diff --git a/source/compiler/qsc_partial_eval/src/lib.rs b/source/compiler/qsc_partial_eval/src/lib.rs
index b71ce650c3..745ad9b9f8 100644
--- a/source/compiler/qsc_partial_eval/src/lib.rs
+++ b/source/compiler/qsc_partial_eval/src/lib.rs
@@ -869,6 +869,7 @@ impl<'a> PartialEvaluator<'a> {
         bin_op: BinOp,
         lhs_eval_var: Var,
         rhs_expr_id: ExprId,
+        bin_op_expr_span: PackageSpan,
     ) -> Result<EvalControlFlow, Error> {
         let result_var = match bin_op {
             BinOp::Eq | BinOp::Neq => {
@@ -877,12 +878,12 @@ impl<'a> PartialEvaluator<'a> {
             BinOp::AndL => {
                 // Logical AND Boolean operations short-circuit on false.
                 let lhs_rir_var = map_eval_var_to_rir_var(lhs_eval_var);
-                self.eval_logical_bool_bin_op(false, lhs_rir_var, rhs_expr_id)?
+                self.eval_logical_bool_bin_op(false, lhs_rir_var, rhs_expr_id, bin_op_expr_span)?
             }
             BinOp::OrL => {
                 // Logical OR Boolean operations short-circuit on true.
                 let lhs_rir_var = map_eval_var_to_rir_var(lhs_eval_var);
-                self.eval_logical_bool_bin_op(true, lhs_rir_var, rhs_expr_id)?
+                self.eval_logical_bool_bin_op(true, lhs_rir_var, rhs_expr_id, bin_op_expr_span)?
             }
             _ => panic!("invalid Boolean operator {bin_op:?}"),
         };
@@ -940,7 +941,10 @@ impl<'a> PartialEvaluator<'a> {
         short_circuit_on_true: bool,
         lhs_rir_var: rir::Variable,
         rhs_expr_id: ExprId,
+        bin_op_expr_span: PackageSpan,
     ) -> Result<Var, Error> {
+        self.fail_if_in_parallel_expr(bin_op_expr_span)?;
+
         // Create the variable where we will store the result of the Boolean operation and store a default value in it,
         // which will only be changed inside the conditional block where the RHS expression is evaluated.
         let result_var_id = self.resource_manager.next_var();
@@ -1132,9 +1136,12 @@ impl<'a> PartialEvaluator<'a> {
         bin_op_expr_span: PackageSpan, // For diagnostic purposes only.
     ) -> Result<EvalControlFlow, Error> {
         match lhs_eval_var.ty {
-            VarTy::Boolean => {
-                self.eval_bin_op_with_lhs_dynamic_bool_operand(bin_op, lhs_eval_var, rhs_expr_id)
-            }
+            VarTy::Boolean => self.eval_bin_op_with_lhs_dynamic_bool_operand(
+                bin_op,
+                lhs_eval_var,
+                rhs_expr_id,
+                bin_op_expr_span,
+            ),
             VarTy::Integer => {
                 let lhs_rir_var = map_eval_var_to_rir_var(lhs_eval_var);
                 let lhs_operand = Operand::Variable(lhs_rir_var);
@@ -1287,6 +1294,9 @@ impl<'a> PartialEvaluator<'a> {
                 "literal should have been classically evaluated".to_string(),
                 expr_package_span,
             )),
+            ExprKind::Parallel(limit_id, expr_id) => {
+                self.eval_expr_parallel(*expr_id, limit_id.as_ref())
+            }
             ExprKind::Range(_, _, _) => Err(Error::Unexpected(
                 "dynamic ranges are invalid".to_string(),
                 expr_package_span,
@@ -1961,6 +1971,7 @@ impl<'a> PartialEvaluator<'a> {
         // At this point the condition value is not classical, so we need to generate a branching instruction.
         // First, we pop the current block node and generate a new one which the new branches will jump to when their
         // instructions end.
+        self.fail_if_in_parallel_expr(self.get_expr_package_span(if_expr_id))?;
         let current_block_node = self.eval_context.pop_block_node();
         let continuation_block_node_id = self.create_program_block();
         let continuation_block_node = BlockNode {
@@ -2502,6 +2513,8 @@ impl<'a> PartialEvaluator<'a> {
             return Ok(EvalControlFlow::Continue(Value::unit()));
         }
 
+        self.fail_if_in_parallel_expr(self.get_expr_package_span(loop_expr_id))?;
+
         // Otherwise, branch to either the body block or the continuation block.
         let body_block_node_id = self.create_program_block();
         let body_block_node = BlockNode {
@@ -3136,7 +3149,6 @@ impl<'a> PartialEvaluator<'a> {
             ));
         };
         self.resource_manager.release_qubit(&qubit);
-
         // The value of a qubit release is unit.
         Ok(Value::unit())
     }
@@ -4043,6 +4055,49 @@ impl<'a> PartialEvaluator<'a> {
 
         Ok(Value::Var(eval_variable))
     }
+
+    fn eval_expr_parallel(
+        &mut self,
+        expr_id: ExprId,
+        limit_id: Option<&ExprId>,
+    ) -> Result<EvalControlFlow, Error> {
+        let limit = if let Some(&limit_id) = limit_id {
+            let limit_control_flow = self.try_eval_expr(limit_id)?;
+            let EvalControlFlow::Continue(limit_value) = limit_control_flow else {
+                return Err(Error::Unexpected(
+                    "embedded return in parallel limit expression".to_string(),
+                    self.get_expr_package_span(limit_id),
+                ));
+            };
+            let limit = limit_value.unwrap_int();
+            if limit < 0 {
+                return Err(EvalError::InvalidNegativeInt(
+                    limit,
+                    self.get_expr_package_span(limit_id),
+                )
+                .into());
+            }
+            #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
+            Some(limit as usize)
+        } else {
+            None
+        };
+
+        self.resource_manager.start_delayed_release_layer(limit);
+        let result = self.try_eval_expr(expr_id)?;
+        self.resource_manager.end_delayed_release_layer();
+        Ok(result)
+    }
+
+    fn fail_if_in_parallel_expr(&self, span: PackageSpan) -> Result<(), Error> {
+        if self.resource_manager.is_delaying_release() {
+            return Err(Error::Unimplemented(
+                "dynamic branching within parallel expression".to_string(),
+                span,
+            ));
+        }
+        Ok(())
+    }
 }
 
 #[derive(Default)]
diff --git a/source/compiler/qsc_partial_eval/src/management.rs b/source/compiler/qsc_partial_eval/src/management.rs
index 3712772040..ab5f62dbb7 100644
--- a/source/compiler/qsc_partial_eval/src/management.rs
+++ b/source/compiler/qsc_partial_eval/src/management.rs
@@ -7,6 +7,7 @@ use num_bigint::BigUint;
 use num_complex::Complex;
 use qsc_data_structures::index_map::IndexMap;
 use qsc_eval::{
+    DelayedQubitReleaseStack,
     backend::Backend,
     val::{Qubit, QubitRef, Result, Value},
 };
@@ -19,6 +20,7 @@ pub struct ResourceManager {
     qubits_in_use: Vec<bool>,
     qubit_id_map: IndexMap<usize, usize>,
     qubit_tracker: FxHashSet<Rc<Qubit>>,
+    delayed_release_qubits: DelayedQubitReleaseStack,
     next_callable: CallableId,
     next_block: BlockId,
     next_result_register: usize,
@@ -46,6 +48,14 @@ impl ResourceManager {
 
     /// Allocates a qubit by favoring available qubit IDs before using new ones.
     pub fn allocate_qubit(&mut self) -> QubitRef {
+        if let Some(qubit) = self.delayed_release_qubits.allocate_delayed_qubit() {
+            return self
+                .qubit_tracker
+                .get(&qubit)
+                .expect("qubit should be in map")
+                .into();
+        }
+
         let qubit = if let Some(qubit) = self.qubits_in_use.iter().position(|in_use| !in_use) {
             self.qubits_in_use[qubit] = true;
             qubit
@@ -70,12 +80,30 @@ impl ResourceManager {
 
     /// Releases a qubit ID for future use.
     pub fn release_qubit(&mut self, q: &QubitRef) {
-        let qubit = self.map_qubit(q);
-        self.qubits_in_use[qubit] = false;
-
         let q = q.deref();
-        self.qubit_id_map.remove(q.0);
-        self.qubit_tracker.remove(&q);
+        if !self.delayed_release_qubits.delay_release_qubit(*q) {
+            self.free_qubit_id(*q);
+        }
+    }
+
+    pub fn start_delayed_release_layer(&mut self, limit: Option<usize>) {
+        self.delayed_release_qubits.add_layer(limit);
+    }
+
+    pub fn end_delayed_release_layer(&mut self) {
+        for qubit in self.delayed_release_qubits.remove_layer() {
+            self.free_qubit_id(qubit);
+        }
+    }
+
+    fn free_qubit_id(&mut self, qubit: Qubit) {
+        self.qubits_in_use[self.qubit_id_map[qubit.0]] = false;
+        self.qubit_id_map.remove(qubit.0);
+        self.qubit_tracker.remove(&qubit);
+    }
+
+    pub fn is_delaying_release(&self) -> bool {
+        !self.delayed_release_qubits.is_empty()
     }
 
     /// Gets the next block ID.
diff --git a/source/compiler/qsc_partial_eval/src/tests.rs b/source/compiler/qsc_partial_eval/src/tests.rs
index d32db7eb31..b8e0f6ad26 100644
--- a/source/compiler/qsc_partial_eval/src/tests.rs
+++ b/source/compiler/qsc_partial_eval/src/tests.rs
@@ -14,6 +14,7 @@ mod loops;
 mod misc;
 mod operators;
 mod output_recording;
+mod parallel;
 mod qubits;
 mod results;
 mod returns;
diff --git a/source/compiler/qsc_partial_eval/src/tests/parallel.rs b/source/compiler/qsc_partial_eval/src/tests/parallel.rs
new file mode 100644
index 0000000000..a94c9aab94
--- /dev/null
+++ b/source/compiler/qsc_partial_eval/src/tests/parallel.rs
@@ -0,0 +1,380 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+use super::{assert_blocks, get_rir_program, get_rir_program_with_capabilities};
+use expect_test::expect;
+use indoc::indoc;
+use qsc_data_structures::target::Profile;
+
+#[test]
+fn baseline_qubit_ids_recycled_without_parallel() {
+    let program = get_rir_program(indoc! {
+        r#"
+        namespace Test {
+            operation op(q : Qubit) : Unit { body intrinsic; }
+            @EntryPoint()
+            operation Main() : Unit {
+                // q1 and q2 are allocated and released inside the inner block
+                { use q1 = Qubit(); op(q1); use q2 = Qubit(); op(q2); }
+                // q1 and q2 are now released; next allocations reuse the same IDs
+                use q3 = Qubit();
+                op(q3);
+                use q4 = Qubit();
+                op(q4);
+            }
+        }
+        "#,
+    });
+    assert_blocks(
+        &program,
+        &expect![[r#"
+            Blocks:
+            Block 0:Block:
+                Call id(1), args( Pointer, )
+                Call id(2), args( Qubit(0), )
+                Call id(2), args( Qubit(1), )
+                Call id(2), args( Qubit(0), )
+                Call id(2), args( Qubit(1), )
+                Call id(3), args( Integer(0), Tag(0, 3), )
+                Return"#]],
+    );
+    assert_eq!(program.num_qubits, 2);
+    assert_eq!(program.num_results, 0);
+}
+
+#[test]
+fn parallel_defers_qubit_release() {
+    let program = get_rir_program(indoc! {
+        r#"
+        namespace Test {
+            operation op(q : Qubit) : Unit { body intrinsic; }
+            @EntryPoint()
+            operation Main() : Unit {
+                parallel {
+                    // q1 and q2 are allocated and released inside the inner block
+                    { use q1 = Qubit(); op(q1); use q2 = Qubit(); op(q2); }
+                    // inside parallel their release is deferred, so q3 and q4 get fresh ids
+                    use q3 = Qubit();
+                    op(q3);
+                    use q4 = Qubit();
+                    op(q4);
+                }
+            }
+        }
+        "#,
+    });
+    assert_blocks(
+        &program,
+        &expect![[r#"
+            Blocks:
+            Block 0:Block:
+                Call id(1), args( Pointer, )
+                Call id(2), args( Qubit(0), )
+                Call id(2), args( Qubit(1), )
+                Call id(2), args( Qubit(2), )
+                Call id(2), args( Qubit(3), )
+                Call id(3), args( Integer(0), Tag(0, 3), )
+                Return"#]],
+    );
+    assert_eq!(program.num_qubits, 4);
+    assert_eq!(program.num_results, 0);
+}
+
+#[test]
+fn parallel_releases_available_after_block_ends() {
+    let program = get_rir_program(indoc! {
+        r#"
+        namespace Test {
+            operation op(q : Qubit) : Unit { body intrinsic; }
+            @EntryPoint()
+            operation Main() : Unit {
+                parallel {
+                    use q = Qubit();
+                    op(q);
+                }
+                parallel {
+                    use q = Qubit();
+                    op(q);
+                }
+            }
+        }
+        "#,
+    });
+    assert_blocks(
+        &program,
+        &expect![[r#"
+            Blocks:
+            Block 0:Block:
+                Call id(1), args( Pointer, )
+                Call id(2), args( Qubit(0), )
+                Call id(2), args( Qubit(0), )
+                Call id(3), args( Integer(0), Tag(0, 3), )
+                Return"#]],
+    );
+    assert_eq!(program.num_qubits, 1);
+    assert_eq!(program.num_results, 0);
+}
+
+#[test]
+fn parallel_nested_defers_inner_releases_to_outer() {
+    let program = get_rir_program(indoc! {
+        r#"
+        namespace Test {
+            operation op(q : Qubit) : Unit { body intrinsic; }
+            @EntryPoint()
+            operation Main() : Unit {
+                parallel {
+                    use outer = Qubit();
+                    op(outer);
+                    parallel {
+                        use inner1 = Qubit();
+                        op(inner1);
+                        use inner2 = Qubit();
+                        op(inner2);
+                    }
+                    // inner qubits are now deferred in the outer layer, so a fresh id is allocated
+                    use outer2 = Qubit();
+                    op(outer2);
+                }
+            }
+        }
+        "#,
+    });
+    assert_blocks(
+        &program,
+        &expect![[r#"
+            Blocks:
+            Block 0:Block:
+                Call id(1), args( Pointer, )
+                Call id(2), args( Qubit(0), )
+                Call id(2), args( Qubit(1), )
+                Call id(2), args( Qubit(2), )
+                Call id(2), args( Qubit(3), )
+                Call id(3), args( Integer(0), Tag(0, 3), )
+                Return"#]],
+    );
+    assert_eq!(program.num_qubits, 4);
+    assert_eq!(program.num_results, 0);
+}
+
+#[test]
+fn parallel_within_reuses_ids_after_limit() {
+    let program = get_rir_program(indoc! {
+        r#"
+        namespace Test {
+            operation op(q : Qubit) : Unit { body intrinsic; }
+            @EntryPoint()
+            operation Main() : Unit {
+                parallel within 2 {
+                    // Each nested block releases its qubit before the next allocation.
+                    { use q1 = Qubit(); op(q1); }
+                    { use q2 = Qubit(); op(q2); }
+                    // 2 qubits have now been deferred; limit reached so q3 and q4 reuse ids
+                    { use q3 = Qubit(); op(q3); }
+                    { use q4 = Qubit(); op(q4); }
+                }
+            }
+        }
+        "#,
+    });
+    assert_blocks(
+        &program,
+        &expect![[r#"
+            Blocks:
+            Block 0:Block:
+                Call id(1), args( Pointer, )
+                Call id(2), args( Qubit(0), )
+                Call id(2), args( Qubit(1), )
+                Call id(2), args( Qubit(0), )
+                Call id(2), args( Qubit(1), )
+                Call id(3), args( Integer(0), Tag(0, 3), )
+                Return"#]],
+    );
+    assert_eq!(program.num_qubits, 2);
+    assert_eq!(program.num_results, 0);
+}
+
+#[test]
+fn parallel_within_nested_defers_through_outer_limit() {
+    let program = get_rir_program(indoc! {
+        r#"
+        namespace Test {
+            operation op(q : Qubit) : Unit { body intrinsic; }
+            @EntryPoint()
+            operation Main() : Unit {
+                parallel within 6 { for _ in 0..2 {
+                    { use q0 = Qubit(); op(q0); }
+                    parallel within 2 {
+                        { use q1 = Qubit(); op(q1); }
+                        { use q2 = Qubit(); op(q2); }
+                        { use q3 = Qubit(); op(q3); }
+                        { use q4 = Qubit(); op(q4); }
+                    }
+                } }
+            }
+        }
+        "#,
+    });
+    assert_blocks(
+        &program,
+        &expect![[r#"
+            Blocks:
+            Block 0:Block:
+                Call id(1), args( Pointer, )
+                Variable(0, Integer) = Store Integer(0)
+                Call id(2), args( Qubit(0), )
+                Call id(2), args( Qubit(1), )
+                Call id(2), args( Qubit(2), )
+                Call id(2), args( Qubit(1), )
+                Call id(2), args( Qubit(2), )
+                Variable(0, Integer) = Store Integer(1)
+                Call id(2), args( Qubit(3), )
+                Call id(2), args( Qubit(4), )
+                Call id(2), args( Qubit(5), )
+                Call id(2), args( Qubit(4), )
+                Call id(2), args( Qubit(5), )
+                Variable(0, Integer) = Store Integer(2)
+                Call id(2), args( Qubit(0), )
+                Call id(2), args( Qubit(1), )
+                Call id(2), args( Qubit(2), )
+                Call id(2), args( Qubit(1), )
+                Call id(2), args( Qubit(2), )
+                Variable(0, Integer) = Store Integer(3)
+                Call id(3), args( Integer(0), Tag(0, 3), )
+                Return"#]],
+    );
+    assert_eq!(program.num_qubits, 6);
+    assert_eq!(program.num_results, 0);
+}
+
+#[test]
+fn parallel_nested_unlimited_outer_defers_all() {
+    let program = get_rir_program(indoc! {
+        r#"
+        namespace Test {
+            operation op(q : Qubit) : Unit { body intrinsic; }
+            @EntryPoint()
+            operation Main() : Unit {
+                parallel { for _ in 0..2 {
+                    { use q0 = Qubit(); op(q0); }
+                    parallel within 2 {
+                        { use q1 = Qubit(); op(q1); }
+                        { use q2 = Qubit(); op(q2); }
+                        { use q3 = Qubit(); op(q3); }
+                        { use q4 = Qubit(); op(q4); }
+                    }
+                } }
+            }
+        }
+        "#,
+    });
+    assert_blocks(
+        &program,
+        &expect![[r#"
+            Blocks:
+            Block 0:Block:
+                Call id(1), args( Pointer, )
+                Variable(0, Integer) = Store Integer(0)
+                Call id(2), args( Qubit(0), )
+                Call id(2), args( Qubit(1), )
+                Call id(2), args( Qubit(2), )
+                Call id(2), args( Qubit(1), )
+                Call id(2), args( Qubit(2), )
+                Variable(0, Integer) = Store Integer(1)
+                Call id(2), args( Qubit(3), )
+                Call id(2), args( Qubit(4), )
+                Call id(2), args( Qubit(5), )
+                Call id(2), args( Qubit(4), )
+                Call id(2), args( Qubit(5), )
+                Variable(0, Integer) = Store Integer(2)
+                Call id(2), args( Qubit(6), )
+                Call id(2), args( Qubit(7), )
+                Call id(2), args( Qubit(8), )
+                Call id(2), args( Qubit(7), )
+                Call id(2), args( Qubit(8), )
+                Variable(0, Integer) = Store Integer(3)
+                Call id(3), args( Integer(0), Tag(0, 3), )
+                Return"#]],
+    );
+    assert_eq!(program.num_qubits, 9);
+    assert_eq!(program.num_results, 0);
+}
+
+#[test]
+fn parallel_forces_loop_unrolling_with_adaptive_rifla() {
+    // Without parallel, the loop uses backward branching (multiple blocks).
+    let program_no_parallel = get_rir_program_with_capabilities(
+        indoc! {
+            r#"
+        namespace Test {
+            @EntryPoint()
+            operation Main() : Unit {
+                for _ in 0..1 {
+                    use q = Qubit();
+                    H(q);
+                }
+            }
+        }
+        "#,
+        },
+        Profile::AdaptiveRIFLA.into(),
+    );
+    assert_blocks(
+        &program_no_parallel,
+        &expect![[r#"
+            Blocks:
+            Block 0:Block:
+                Call id(1), args( Pointer, )
+                Variable(0, Integer) = Store Integer(0)
+                Jump(1)
+            Block 1:Block:
+                Variable(1, Boolean) = Icmp Sle, Variable(0, Integer), Integer(1)
+                Variable(2, Boolean) = Store Bool(true)
+                Branch Variable(1, Boolean), 3, 4
+            Block 2:Block:
+                Call id(3), args( Integer(0), Tag(0, 3), )
+                Return
+            Block 3:Block:
+                Branch Variable(2, Boolean), 5, 2
+            Block 4:Block:
+                Variable(2, Boolean) = Store Bool(false)
+                Jump(3)
+            Block 5:Block:
+                Call id(2), args( Qubit(0), )
+                Variable(3, Integer) = Add Variable(0, Integer), Integer(1)
+                Variable(0, Integer) = Store Variable(3, Integer)
+                Jump(1)"#]],
+    );
+
+    // With parallel, the same loop is unrolled into a single block.
+    let program_parallel = get_rir_program_with_capabilities(
+        indoc! {
+            r#"
+        namespace Test {
+            @EntryPoint()
+            operation Main() : Unit {
+                parallel for _ in 0..1 {
+                    use q = Qubit();
+                    H(q);
+                }
+            }
+        }
+        "#,
+        },
+        Profile::AdaptiveRIFLA.into(),
+    );
+    assert_blocks(
+        &program_parallel,
+        &expect![[r#"
+            Blocks:
+            Block 0:Block:
+                Call id(1), args( Pointer, )
+                Variable(0, Integer) = Store Integer(0)
+                Call id(2), args( Qubit(0), )
+                Variable(0, Integer) = Store Integer(1)
+                Call id(2), args( Qubit(1), )
+                Variable(0, Integer) = Store Integer(2)
+                Call id(3), args( Integer(0), Tag(0, 3), )
+                Return"#]],
+    );
+}
diff --git a/source/compiler/qsc_passes/src/capabilitiesck.rs b/source/compiler/qsc_passes/src/capabilitiesck.rs
index 2bf707903d..46bc6574e1 100644
--- a/source/compiler/qsc_passes/src/capabilitiesck.rs
+++ b/source/compiler/qsc_passes/src/capabilitiesck.rs
@@ -177,6 +177,12 @@ impl<'a> Visitor<'a> for Checker<'a> {
             ExprKind::While(condition_expr_id, body_block_id) => {
                 self.check_expr_while(expr_id, *condition_expr_id, *body_block_id);
             }
+            ExprKind::Parallel(limit_id, expr_id) => {
+                if let Some(limit_id) = limit_id {
+                    self.visit_expr(*limit_id);
+                }
+                self.visit_expr(*expr_id);
+            }
             _ => self.check_expr(expr_id),
         }
     }
diff --git a/source/compiler/qsc_passes/src/capabilitiesck/tests_adaptive.rs b/source/compiler/qsc_passes/src/capabilitiesck/tests_adaptive.rs
index 743240a038..1869946e74 100644
--- a/source/compiler/qsc_passes/src/capabilitiesck/tests_adaptive.rs
+++ b/source/compiler/qsc_passes/src/capabilitiesck/tests_adaptive.rs
@@ -8,14 +8,16 @@ use super::tests_common::{
     CALL_TO_CYCLIC_OPERATION_WITH_DYNAMIC_ARGUMENT, CALL_UNRESOLVED_FUNCTION, CUSTOM_MEASUREMENT,
     CUSTOM_MEASUREMENT_WITH_SIMULATABLE_INTRINSIC_ATTR, CUSTOM_RESET,
     CUSTOM_RESET_WITH_SIMULATABLE_INTRINSIC_ATTR, DYNAMIC_ARRAY_BINARY_OP,
-    LOOP_WITH_DYNAMIC_CONDITION, MEASUREMENT_WITHIN_DYNAMIC_SCOPE, MINIMAL,
-    RETURN_WITHIN_DYNAMIC_SCOPE, USE_CLOSURE_FUNCTION, USE_DYNAMIC_BIG_INT, USE_DYNAMIC_BOOLEAN,
-    USE_DYNAMIC_DOUBLE, USE_DYNAMIC_FUNCTION, USE_DYNAMIC_INDEX, USE_DYNAMIC_INT,
-    USE_DYNAMIC_LHS_EXP_BINOP, USE_DYNAMIC_OPERATION, USE_DYNAMIC_PAULI, USE_DYNAMIC_QUBIT,
-    USE_DYNAMIC_RANGE, USE_DYNAMIC_RHS_EXP_BINOP, USE_DYNAMIC_STRING, USE_DYNAMIC_UDT,
-    USE_DYNAMICALLY_SIZED_ARRAY, USE_ENTRY_POINT_INT_ARRAY_IN_TUPLE,
-    USE_ENTRY_POINT_STATIC_BIG_INT, USE_ENTRY_POINT_STATIC_BOOL, USE_ENTRY_POINT_STATIC_DOUBLE,
-    USE_ENTRY_POINT_STATIC_INT, USE_ENTRY_POINT_STATIC_INT_IN_TUPLE, USE_ENTRY_POINT_STATIC_PAULI,
+    LOOP_WITH_DYNAMIC_CONDITION, MEASUREMENT_WITHIN_DYNAMIC_SCOPE, MINIMAL, PARALLEL_STATIC_BODY,
+    PARALLEL_WITH_DYNAMIC_BRANCH, PARALLEL_WITH_INDIRECT_BRANCH_VIA_CALL,
+    PARALLEL_WITHIN_DYNAMIC_LIMIT, PARALLEL_WITHIN_STATIC_LIMIT, RETURN_WITHIN_DYNAMIC_SCOPE,
+    USE_CLOSURE_FUNCTION, USE_DYNAMIC_BIG_INT, USE_DYNAMIC_BOOLEAN, USE_DYNAMIC_DOUBLE,
+    USE_DYNAMIC_FUNCTION, USE_DYNAMIC_INDEX, USE_DYNAMIC_INT, USE_DYNAMIC_LHS_EXP_BINOP,
+    USE_DYNAMIC_OPERATION, USE_DYNAMIC_PAULI, USE_DYNAMIC_QUBIT, USE_DYNAMIC_RANGE,
+    USE_DYNAMIC_RHS_EXP_BINOP, USE_DYNAMIC_STRING, USE_DYNAMIC_UDT, USE_DYNAMICALLY_SIZED_ARRAY,
+    USE_ENTRY_POINT_INT_ARRAY_IN_TUPLE, USE_ENTRY_POINT_STATIC_BIG_INT,
+    USE_ENTRY_POINT_STATIC_BOOL, USE_ENTRY_POINT_STATIC_DOUBLE, USE_ENTRY_POINT_STATIC_INT,
+    USE_ENTRY_POINT_STATIC_INT_IN_TUPLE, USE_ENTRY_POINT_STATIC_PAULI,
     USE_ENTRY_POINT_STATIC_RANGE, USE_ENTRY_POINT_STATIC_STRING, check, check_for_exe,
 };
 use expect_test::{Expect, expect};
@@ -790,3 +792,86 @@ fn binary_op_with_dynamic_array_succeeds() {
         "#]],
     );
 }
+
+#[test]
+fn parallel_with_static_body_yields_no_errors() {
+    check_profile(
+        PARALLEL_STATIC_BODY,
+        &expect![[r#"
+            []
+        "#]],
+    );
+}
+
+#[test]
+fn parallel_within_with_static_limit_yields_no_errors() {
+    check_profile(
+        PARALLEL_WITHIN_STATIC_LIMIT,
+        &expect![[r#"
+            []
+        "#]],
+    );
+}
+
+#[test]
+fn parallel_with_dynamic_branch_yields_error() {
+    check_profile(
+        PARALLEL_WITH_DYNAMIC_BRANCH,
+        &expect![[r#"
+            [
+                UseOfDynamicBranchingInParallelExpr(
+                    Span {
+                        lo: 196,
+                        hi: 210,
+                    },
+                ),
+            ]
+        "#]],
+    );
+}
+
+#[test]
+fn parallel_within_with_dynamic_limit_yields_error() {
+    check_profile(
+        PARALLEL_WITHIN_DYNAMIC_LIMIT,
+        &expect![[r#"
+            [
+                UseOfDynamicInt(
+                    Span {
+                        lo: 107,
+                        hi: 130,
+                    },
+                ),
+                UseOfDynamicInt(
+                    Span {
+                        lo: 160,
+                        hi: 161,
+                    },
+                ),
+                UseOfDynamicLimitInParallelExpr(
+                    Span {
+                        lo: 160,
+                        hi: 161,
+                    },
+                ),
+            ]
+        "#]],
+    );
+}
+
+#[test]
+fn parallel_with_indirect_branch_via_call_yields_error() {
+    check_profile(
+        PARALLEL_WITH_INDIRECT_BRANCH_VIA_CALL,
+        &expect![[r#"
+            [
+                UseOfDynamicBranchingInParallelExpr(
+                    Span {
+                        lo: 217,
+                        hi: 223,
+                    },
+                ),
+            ]
+        "#]],
+    );
+}
diff --git a/source/compiler/qsc_passes/src/capabilitiesck/tests_adaptive_plus_integers.rs b/source/compiler/qsc_passes/src/capabilitiesck/tests_adaptive_plus_integers.rs
index b1c055d2ec..1aef9e5ee6 100644
--- a/source/compiler/qsc_passes/src/capabilitiesck/tests_adaptive_plus_integers.rs
+++ b/source/compiler/qsc_passes/src/capabilitiesck/tests_adaptive_plus_integers.rs
@@ -8,11 +8,13 @@ use super::tests_common::{
     CALL_TO_CYCLIC_FUNCTION_WITH_DYNAMIC_ARGUMENT,
     CALL_TO_CYCLIC_OPERATION_WITH_CLASSICAL_ARGUMENT,
     CALL_TO_CYCLIC_OPERATION_WITH_DYNAMIC_ARGUMENT, CALL_UNRESOLVED_FUNCTION, CUSTOM_MEASUREMENT,
-    LOOP_WITH_DYNAMIC_CONDITION, MEASUREMENT_WITHIN_DYNAMIC_SCOPE, MINIMAL,
-    RETURN_WITHIN_DYNAMIC_SCOPE, USE_CLOSURE_FUNCTION, USE_DYNAMIC_BIG_INT, USE_DYNAMIC_BOOLEAN,
-    USE_DYNAMIC_DOUBLE, USE_DYNAMIC_FUNCTION, USE_DYNAMIC_INDEX, USE_DYNAMIC_INT,
-    USE_DYNAMIC_LHS_EXP_BINOP, USE_DYNAMIC_OPERATION, USE_DYNAMIC_PAULI, USE_DYNAMIC_QUBIT,
-    USE_DYNAMIC_RHS_EXP_BINOP, USE_DYNAMIC_STRING, USE_DYNAMIC_UDT, USE_DYNAMICALLY_SIZED_ARRAY,
+    LOOP_WITH_DYNAMIC_CONDITION, MEASUREMENT_WITHIN_DYNAMIC_SCOPE, MINIMAL, PARALLEL_STATIC_BODY,
+    PARALLEL_WITH_DYNAMIC_BRANCH, PARALLEL_WITH_INDIRECT_BRANCH_VIA_CALL,
+    PARALLEL_WITHIN_DYNAMIC_LIMIT, PARALLEL_WITHIN_STATIC_LIMIT, RETURN_WITHIN_DYNAMIC_SCOPE,
+    USE_CLOSURE_FUNCTION, USE_DYNAMIC_BIG_INT, USE_DYNAMIC_BOOLEAN, USE_DYNAMIC_DOUBLE,
+    USE_DYNAMIC_FUNCTION, USE_DYNAMIC_INDEX, USE_DYNAMIC_INT, USE_DYNAMIC_LHS_EXP_BINOP,
+    USE_DYNAMIC_OPERATION, USE_DYNAMIC_PAULI, USE_DYNAMIC_QUBIT, USE_DYNAMIC_RHS_EXP_BINOP,
+    USE_DYNAMIC_STRING, USE_DYNAMIC_UDT, USE_DYNAMICALLY_SIZED_ARRAY,
     USE_ENTRY_POINT_INT_ARRAY_IN_TUPLE, USE_ENTRY_POINT_STATIC_BIG_INT,
     USE_ENTRY_POINT_STATIC_BOOL, USE_ENTRY_POINT_STATIC_DOUBLE, USE_ENTRY_POINT_STATIC_INT,
     USE_ENTRY_POINT_STATIC_INT_IN_TUPLE, USE_ENTRY_POINT_STATIC_PAULI,
@@ -627,3 +629,74 @@ fn use_of_static_sized_array_in_tuple_allowed() {
         "#]],
     );
 }
+
+#[test]
+fn parallel_with_static_body_yields_no_errors() {
+    check_profile(
+        PARALLEL_STATIC_BODY,
+        &expect![[r#"
+            []
+        "#]],
+    );
+}
+
+#[test]
+fn parallel_within_with_static_limit_yields_no_errors() {
+    check_profile(
+        PARALLEL_WITHIN_STATIC_LIMIT,
+        &expect![[r#"
+            []
+        "#]],
+    );
+}
+
+#[test]
+fn parallel_with_dynamic_branch_yields_error() {
+    check_profile(
+        PARALLEL_WITH_DYNAMIC_BRANCH,
+        &expect![[r#"
+            [
+                UseOfDynamicBranchingInParallelExpr(
+                    Span {
+                        lo: 196,
+                        hi: 210,
+                    },
+                ),
+            ]
+        "#]],
+    );
+}
+
+#[test]
+fn parallel_within_with_dynamic_limit_yields_error() {
+    check_profile(
+        PARALLEL_WITHIN_DYNAMIC_LIMIT,
+        &expect![[r#"
+            [
+                UseOfDynamicLimitInParallelExpr(
+                    Span {
+                        lo: 160,
+                        hi: 161,
+                    },
+                ),
+            ]
+        "#]],
+    );
+}
+
+#[test]
+fn parallel_with_indirect_branch_via_call_yields_error() {
+    check_profile(
+        PARALLEL_WITH_INDIRECT_BRANCH_VIA_CALL,
+        &expect![[r#"
+            [
+                UseOfDynamicBranchingInParallelExpr(
+                    Span {
+                        lo: 217,
+                        hi: 223,
+                    },
+                ),
+            ]
+        "#]],
+    );
+}
diff --git a/source/compiler/qsc_passes/src/capabilitiesck/tests_adaptive_plus_integers_and_floats.rs b/source/compiler/qsc_passes/src/capabilitiesck/tests_adaptive_plus_integers_and_floats.rs
index bcd333d0ab..cebdc3fcc8 100644
--- a/source/compiler/qsc_passes/src/capabilitiesck/tests_adaptive_plus_integers_and_floats.rs
+++ b/source/compiler/qsc_passes/src/capabilitiesck/tests_adaptive_plus_integers_and_floats.rs
@@ -8,11 +8,13 @@ use super::tests_common::{
     CALL_TO_CYCLIC_FUNCTION_WITH_DYNAMIC_ARGUMENT,
     CALL_TO_CYCLIC_OPERATION_WITH_CLASSICAL_ARGUMENT,
     CALL_TO_CYCLIC_OPERATION_WITH_DYNAMIC_ARGUMENT, CALL_UNRESOLVED_FUNCTION, CUSTOM_MEASUREMENT,
-    LOOP_WITH_DYNAMIC_CONDITION, MEASUREMENT_WITHIN_DYNAMIC_SCOPE, MINIMAL,
-    RETURN_WITHIN_DYNAMIC_SCOPE, USE_CLOSURE_FUNCTION, USE_DYNAMIC_BIG_INT, USE_DYNAMIC_BOOLEAN,
-    USE_DYNAMIC_DOUBLE, USE_DYNAMIC_FUNCTION, USE_DYNAMIC_INDEX, USE_DYNAMIC_INT,
-    USE_DYNAMIC_LHS_EXP_BINOP, USE_DYNAMIC_OPERATION, USE_DYNAMIC_PAULI, USE_DYNAMIC_QUBIT,
-    USE_DYNAMIC_RHS_EXP_BINOP, USE_DYNAMIC_STRING, USE_DYNAMIC_UDT, USE_DYNAMICALLY_SIZED_ARRAY,
+    LOOP_WITH_DYNAMIC_CONDITION, MEASUREMENT_WITHIN_DYNAMIC_SCOPE, MINIMAL, PARALLEL_STATIC_BODY,
+    PARALLEL_WITH_DYNAMIC_BRANCH, PARALLEL_WITH_INDIRECT_BRANCH_VIA_CALL,
+    PARALLEL_WITHIN_DYNAMIC_LIMIT, PARALLEL_WITHIN_STATIC_LIMIT, RETURN_WITHIN_DYNAMIC_SCOPE,
+    USE_CLOSURE_FUNCTION, USE_DYNAMIC_BIG_INT, USE_DYNAMIC_BOOLEAN, USE_DYNAMIC_DOUBLE,
+    USE_DYNAMIC_FUNCTION, USE_DYNAMIC_INDEX, USE_DYNAMIC_INT, USE_DYNAMIC_LHS_EXP_BINOP,
+    USE_DYNAMIC_OPERATION, USE_DYNAMIC_PAULI, USE_DYNAMIC_QUBIT, USE_DYNAMIC_RHS_EXP_BINOP,
+    USE_DYNAMIC_STRING, USE_DYNAMIC_UDT, USE_DYNAMICALLY_SIZED_ARRAY,
     USE_ENTRY_POINT_INT_ARRAY_IN_TUPLE, USE_ENTRY_POINT_STATIC_BIG_INT,
     USE_ENTRY_POINT_STATIC_BOOL, USE_ENTRY_POINT_STATIC_DOUBLE, USE_ENTRY_POINT_STATIC_INT,
     USE_ENTRY_POINT_STATIC_INT_IN_TUPLE, USE_ENTRY_POINT_STATIC_PAULI,
@@ -605,3 +607,74 @@ fn use_of_static_sized_array_in_tuple_allowed() {
         "#]],
     );
 }
+
+#[test]
+fn parallel_with_static_body_yields_no_errors() {
+    check_profile(
+        PARALLEL_STATIC_BODY,
+        &expect![[r#"
+            []
+        "#]],
+    );
+}
+
+#[test]
+fn parallel_within_with_static_limit_yields_no_errors() {
+    check_profile(
+        PARALLEL_WITHIN_STATIC_LIMIT,
+        &expect![[r#"
+            []
+        "#]],
+    );
+}
+
+#[test]
+fn parallel_with_dynamic_branch_yields_error() {
+    check_profile(
+        PARALLEL_WITH_DYNAMIC_BRANCH,
+        &expect![[r#"
+            [
+                UseOfDynamicBranchingInParallelExpr(
+                    Span {
+                        lo: 196,
+                        hi: 210,
+                    },
+                ),
+            ]
+        "#]],
+    );
+}
+
+#[test]
+fn parallel_within_with_dynamic_limit_yields_error() {
+    check_profile(
+        PARALLEL_WITHIN_DYNAMIC_LIMIT,
+        &expect![[r#"
+            [
+                UseOfDynamicLimitInParallelExpr(
+                    Span {
+                        lo: 160,
+                        hi: 161,
+                    },
+                ),
+            ]
+        "#]],
+    );
+}
+
+#[test]
+fn parallel_with_indirect_branch_via_call_yields_error() {
+    check_profile(
+        PARALLEL_WITH_INDIRECT_BRANCH_VIA_CALL,
+        &expect![[r#"
+            [
+                UseOfDynamicBranchingInParallelExpr(
+                    Span {
+                        lo: 217,
+                        hi: 223,
+                    },
+                ),
+            ]
+        "#]],
+    );
+}
diff --git a/source/compiler/qsc_passes/src/capabilitiesck/tests_base.rs b/source/compiler/qsc_passes/src/capabilitiesck/tests_base.rs
index 4fa3dad001..4d29f1829a 100644
--- a/source/compiler/qsc_passes/src/capabilitiesck/tests_base.rs
+++ b/source/compiler/qsc_passes/src/capabilitiesck/tests_base.rs
@@ -8,14 +8,16 @@ use super::tests_common::{
     CALL_TO_CYCLIC_OPERATION_WITH_DYNAMIC_ARGUMENT, CALL_UNRESOLVED_FUNCTION, CUSTOM_MEASUREMENT,
     CUSTOM_MEASUREMENT_WITH_SIMULATABLE_INTRINSIC_ATTR, CUSTOM_RESET,
     CUSTOM_RESET_WITH_SIMULATABLE_INTRINSIC_ATTR, DYNAMIC_ARRAY_BINARY_OP,
-    LOOP_WITH_DYNAMIC_CONDITION, MEASUREMENT_WITHIN_DYNAMIC_SCOPE, MINIMAL,
-    RETURN_WITHIN_DYNAMIC_SCOPE, USE_CLOSURE_FUNCTION, USE_DYNAMIC_BIG_INT, USE_DYNAMIC_BOOLEAN,
-    USE_DYNAMIC_DOUBLE, USE_DYNAMIC_FUNCTION, USE_DYNAMIC_INDEX, USE_DYNAMIC_INT,
-    USE_DYNAMIC_LHS_EXP_BINOP, USE_DYNAMIC_OPERATION, USE_DYNAMIC_PAULI, USE_DYNAMIC_QUBIT,
-    USE_DYNAMIC_RANGE, USE_DYNAMIC_RHS_EXP_BINOP, USE_DYNAMIC_STRING, USE_DYNAMIC_UDT,
-    USE_DYNAMICALLY_SIZED_ARRAY, USE_ENTRY_POINT_INT_ARRAY_IN_TUPLE,
-    USE_ENTRY_POINT_STATIC_BIG_INT, USE_ENTRY_POINT_STATIC_BOOL, USE_ENTRY_POINT_STATIC_DOUBLE,
-    USE_ENTRY_POINT_STATIC_INT, USE_ENTRY_POINT_STATIC_INT_IN_TUPLE, USE_ENTRY_POINT_STATIC_PAULI,
+    LOOP_WITH_DYNAMIC_CONDITION, MEASUREMENT_WITHIN_DYNAMIC_SCOPE, MINIMAL, PARALLEL_STATIC_BODY,
+    PARALLEL_WITH_DYNAMIC_BRANCH, PARALLEL_WITH_INDIRECT_BRANCH_VIA_CALL,
+    PARALLEL_WITHIN_DYNAMIC_LIMIT, PARALLEL_WITHIN_STATIC_LIMIT, RETURN_WITHIN_DYNAMIC_SCOPE,
+    USE_CLOSURE_FUNCTION, USE_DYNAMIC_BIG_INT, USE_DYNAMIC_BOOLEAN, USE_DYNAMIC_DOUBLE,
+    USE_DYNAMIC_FUNCTION, USE_DYNAMIC_INDEX, USE_DYNAMIC_INT, USE_DYNAMIC_LHS_EXP_BINOP,
+    USE_DYNAMIC_OPERATION, USE_DYNAMIC_PAULI, USE_DYNAMIC_QUBIT, USE_DYNAMIC_RANGE,
+    USE_DYNAMIC_RHS_EXP_BINOP, USE_DYNAMIC_STRING, USE_DYNAMIC_UDT, USE_DYNAMICALLY_SIZED_ARRAY,
+    USE_ENTRY_POINT_INT_ARRAY_IN_TUPLE, USE_ENTRY_POINT_STATIC_BIG_INT,
+    USE_ENTRY_POINT_STATIC_BOOL, USE_ENTRY_POINT_STATIC_DOUBLE, USE_ENTRY_POINT_STATIC_INT,
+    USE_ENTRY_POINT_STATIC_INT_IN_TUPLE, USE_ENTRY_POINT_STATIC_PAULI,
     USE_ENTRY_POINT_STATIC_RANGE, USE_ENTRY_POINT_STATIC_STRING, check, check_for_exe,
 };
 use expect_test::{Expect, expect};
@@ -976,3 +978,110 @@ fn binary_op_with_dynamic_array_error() {
         "#]],
     );
 }
+
+#[test]
+fn parallel_with_static_body_yields_no_errors() {
+    check_profile(
+        PARALLEL_STATIC_BODY,
+        &expect![[r#"
+            []
+        "#]],
+    );
+}
+
+#[test]
+fn parallel_within_with_static_limit_yields_no_errors() {
+    check_profile(
+        PARALLEL_WITHIN_STATIC_LIMIT,
+        &expect![[r#"
+            []
+        "#]],
+    );
+}
+
+#[test]
+fn parallel_with_dynamic_branch_yields_error() {
+    check_profile(
+        PARALLEL_WITH_DYNAMIC_BRANCH,
+        &expect![[r#"
+            [
+                UseOfDynamicBool(
+                    Span {
+                        lo: 107,
+                        hi: 122,
+                    },
+                ),
+                UseOfDynamicBool(
+                    Span {
+                        lo: 199,
+                        hi: 200,
+                    },
+                ),
+            ]
+        "#]],
+    );
+}
+
+#[test]
+fn parallel_within_with_dynamic_limit_yields_error() {
+    check_profile(
+        PARALLEL_WITHIN_DYNAMIC_LIMIT,
+        &expect![[r#"
+            [
+                UseOfDynamicBool(
+                    Span {
+                        lo: 107,
+                        hi: 122,
+                    },
+                ),
+                UseOfDynamicBool(
+                    Span {
+                        lo: 160,
+                        hi: 161,
+                    },
+                ),
+                UseOfDynamicInt(
+                    Span {
+                        lo: 160,
+                        hi: 161,
+                    },
+                ),
+                UseOfDynamicLimitInParallelExpr(
+                    Span {
+                        lo: 160,
+                        hi: 161,
+                    },
+                ),
+            ]
+        "#]],
+    );
+}
+
+#[test]
+fn parallel_with_indirect_branch_via_call_yields_error() {
+    check_profile(
+        PARALLEL_WITH_INDIRECT_BRANCH_VIA_CALL,
+        &expect![[r#"
+            [
+                UseOfDynamicBool(
+                    Span {
+                        lo: 79,
+                        hi: 91,
+                    },
+                ),
+                UseOfDynamicBool(
+                    Span {
+                        lo: 217,
+                        hi: 223,
+                    },
+                ),
+                UseOfDynamicBranchingInParallelExpr(
+                    Span {
+                        lo: 217,
+                        hi: 223,
+                    },
+                ),
+            ]
+        "#]],
+    );
+}
diff --git a/source/compiler/qsc_passes/src/capabilitiesck/tests_common.rs b/source/compiler/qsc_passes/src/capabilitiesck/tests_common.rs
index 0d557b02c3..5befcf0765 100644
--- a/source/compiler/qsc_passes/src/capabilitiesck/tests_common.rs
+++ b/source/compiler/qsc_passes/src/capabilitiesck/tests_common.rs
@@ -528,3 +528,60 @@ pub const DYNAMIC_ARRAY_BINARY_OP: &str = r#"
         MResetEachZ(qs) == [Zero, Zero];
     }
 "#;
+
+pub const PARALLEL_STATIC_BODY: &str = r#"
+    namespace Test {
+        operation Foo() : Unit {
+            parallel {
+                use q = Qubit();
+                H(q);
+            }
+        }
+    }"#;
+
+pub const PARALLEL_WITH_DYNAMIC_BRANCH: &str = r#"
+    namespace Test {
+        operation Foo() : Unit {
+            use ctrl = Qubit();
+            let b = M(ctrl) == Zero;
+            parallel {
+                use q = Qubit();
+                if b { H(q); }
+            }
+        }
+    }"#;
+
+pub const PARALLEL_WITHIN_STATIC_LIMIT: &str = r#"
+    namespace Test {
+        operation Foo() : Unit {
+            parallel within 4 {
+                use q = Qubit();
+                H(q);
+            }
+        }
+    }"#;
+
+pub const PARALLEL_WITHIN_DYNAMIC_LIMIT: &str = r#"
+    namespace Test {
+        operation Foo() : Unit {
+            use ctrl = Qubit();
+            let n = M(ctrl) == Zero ? 2 | 4;
+            parallel within n {
+                use q = Qubit();
+                H(q);
+            }
+        }
+    }"#;
+
+pub const PARALLEL_WITH_INDIRECT_BRANCH_VIA_CALL: &str = r#"
+    namespace Test {
+        operation Bar(q : Qubit) : Unit {
+            if M(q) == Zero { X(q); }
+        }
+        operation Foo() : Unit {
+            parallel {
+                use q = Qubit();
+                Bar(q);
+            }
+        }
+    }"#;
diff --git a/source/compiler/qsc_passes/src/logic_sep.rs b/source/compiler/qsc_passes/src/logic_sep.rs
index 433eaa4f9a..0b08113b9b 100644
--- a/source/compiler/qsc_passes/src/logic_sep.rs
+++ b/source/compiler/qsc_passes/src/logic_sep.rs
@@ -178,6 +178,14 @@ impl SepCheck {
             ExprKind::If(cond, then_expr, else_expr) => {
                 self.handle_if_expr(prior, cond, then_expr, else_expr.as_deref())
             }
+            ExprKind::Parallel(limit, body) => {
+                if let Some(limit) = limit {
+                    self.op_call_allowed = false;
+                    self.visit_expr(limit);
+                    self.op_call_allowed = prior;
+                }
+                self.handle_expr(body, prior)
+            }
 
             ExprKind::Array(_)
             | ExprKind::ArrayRepeat(..)
diff --git a/source/compiler/qsc_rca/src/core.rs b/source/compiler/qsc_rca/src/core.rs
index 30f0b7da32..178e47c4c3 100644
--- a/source/compiler/qsc_rca/src/core.rs
+++ b/source/compiler/qsc_rca/src/core.rs
@@ -33,6 +33,7 @@ pub struct Analyzer<'a> {
     package_store_compute_properties: InternalPackageStoreComputeProperties,
     active_contexts: Vec<AnalysisContext>,
     target_capabilities: TargetCapabilityFlags,
+    in_parallel_expr: bool,
 }
 
 impl<'a> Analyzer<'a> {
@@ -46,6 +47,7 @@ impl<'a> Analyzer<'a> {
             package_store_compute_properties,
             active_contexts: Vec::<AnalysisContext>::default(),
             target_capabilities,
+            in_parallel_expr: false,
         }
     }
 
@@ -237,6 +239,7 @@ impl<'a> Analyzer<'a> {
 
     fn analyze_expr_bin_op(
         &mut self,
+        bin_op: BinOp,
         lhs_expr_id: ExprId,
         rhs_expr_id: ExprId,
         expr_type: &Ty,
@@ -253,6 +256,21 @@ impl<'a> Analyzer<'a> {
         compute_kind = compute_kind.aggregate(lhs_compute_kind);
         compute_kind = compute_kind.aggregate(rhs_compute_kind);
 
+        if self.in_parallel_expr
+            && matches!(bin_op, BinOp::AndL | BinOp::OrL)
+            && lhs_compute_kind.is_variable_value_kind()
+        {
+            // Binary boolean operators with a variable left-hand side are short-circuiting expressions, which means they incur
+            // dynamic branching in code-gen. Since this is a parallel expression, we need to track this as a runtime feature.
+            compute_kind = compute_kind.aggregate_runtime_features(
+                ComputeKind::Dynamic {
+                    runtime_features: RuntimeFeatureFlags::UseOfDynamicBranchingInParallelExpr,
+                    value_kind: ValueKind::Constant,
+                },
+                ValueKind::Constant,
+            );
+        }
+
         // Additionally, since the new compute kind can be of a different type than its operands (e.g. 1 == 1),
         // aggregate additional runtime features depending on the binary operator expression's type (if it's dynamic).
         if let ComputeKind::Dynamic {
@@ -382,6 +400,15 @@ impl<'a> Analyzer<'a> {
         {
             *runtime_features |=
                 derive_runtime_features_for_value_kind_associated_to_type(*value_kind, expr_type);
+
+            if self.in_parallel_expr
+                && runtime_features.contains(RuntimeFeatureFlags::UseOfDynamicBool)
+            {
+                // A Call expression that includes use of a dynamic Boolean (before aggreating the features from the
+                // callee and arguments) means that the callable itself incurs the dynamic Boolean runtime feature.
+                // This would cause branching in a parallel expression, which requires an additional runtime feature to be tracked.
+                *runtime_features |= RuntimeFeatureFlags::UseOfDynamicBranchingInParallelExpr;
+            }
         }
 
         // Aggregate the runtime features of the callee and arguments expressions.
@@ -722,6 +749,11 @@ impl<'a> Analyzer<'a> {
                     }
                     _ => {}
                 }
+                if self.in_parallel_expr {
+                    // A dynamic branch in a parallel expression requires an additional runtime feature to be tracked.
+                    dynamic_runtime_features |=
+                        RuntimeFeatureFlags::UseOfDynamicBranchingInParallelExpr;
+                }
             }
             let dynamic_compute_kind = ComputeKind::Dynamic {
                 runtime_features: dynamic_runtime_features,
@@ -1088,7 +1120,11 @@ impl<'a> Analyzer<'a> {
     }
 
     fn analyze_expr_while(&mut self, condition_expr_id: ExprId, block_id: BlockId) -> ComputeKind {
-        let mut should_emit_classical_loop = self.should_emit_classical_loops();
+        // We only want to emit classical loops if the current target capabilities allow it and we are not in a parallel expression.
+        // Checking both conditions here avoids the speculative generation of loop capabilities in cases where we know it
+        // wouldn't be allowed anyway.
+        let mut should_emit_classical_loop =
+            self.should_emit_classical_loops() && !self.in_parallel_expr;
         let mut cached_locals_map = if should_emit_classical_loop {
             Some(self.get_current_application_instance().locals_map.clone())
         } else {
@@ -1183,11 +1219,33 @@ impl<'a> Analyzer<'a> {
                 panic!("if the loop condition is quantum, the loop expression must be quantum too");
             };
             *runtime_features |= RuntimeFeatureFlags::LoopWithDynamicCondition;
+            if self.in_parallel_expr {
+                // A dynamic loop in a parallel expression requires an additional runtime feature to be tracked.
+                *runtime_features |= RuntimeFeatureFlags::UseOfDynamicBranchingInParallelExpr;
+            }
         }
 
         compute_kind
     }
 
+    fn analyze_expr_parallel_limit(&mut self, limit: ExprId) {
+        self.visit_expr(limit);
+        // A limit on a parallel expression must be static, so we check that here.
+        let application_instance = self.get_current_application_instance_mut();
+        let limit_compute_kind = *application_instance.get_expr_compute_kind(limit);
+        if !matches!(limit_compute_kind, ComputeKind::Static) {
+            // Add the runtime feature of dynamic parallelism to the compute kind of the parallel expression.
+            let new_limit_compute_kind = limit_compute_kind.aggregate_runtime_features(
+                ComputeKind::Dynamic {
+                    runtime_features: RuntimeFeatureFlags::UseOfDynamicLimitInParallelExpr,
+                    value_kind: ValueKind::Constant,
+                },
+                ValueKind::Constant,
+            );
+            application_instance.insert_expr_compute_kind(limit, new_limit_compute_kind);
+        }
+    }
+
     // Analyzes the currently active callable assuming it is intrinsic.
     fn analyze_intrinsic_callable(&mut self) {
         // Check whether the callable has already been analyzed.
@@ -1288,6 +1346,8 @@ impl<'a> Analyzer<'a> {
 
         // Push the context of the callable the specialization belongs to.
         self.push_item_context(id.callable);
+        let previous_in_parallel = self.in_parallel_expr;
+        self.in_parallel_expr = false;
         let package = self.package_store.get(id.callable.package);
         let input_params = package.derive_callable_input_params(callable_decl);
         let current_callable_context = self.get_current_item_context_mut();
@@ -1328,6 +1388,7 @@ impl<'a> Analyzer<'a> {
         // Since we are done analyzing the specialization, pop the active item context.
         let popped_item_id = self.pop_item_context();
         assert!(popped_item_id == id.callable);
+        self.in_parallel_expr = previous_in_parallel;
     }
 
     fn analyze_spec_decl(&mut self, decl: &'a SpecDecl, functor_set_value: FunctorSetValue) {
@@ -1921,8 +1982,8 @@ impl<'a> Visitor<'a> for Analyzer<'a> {
             ExprKind::BinOp(BinOp::Exp, lhs_expr_id, rhs_expr_id) => {
                 self.analyze_expr_bin_op_exp(*lhs_expr_id, *rhs_expr_id)
             }
-            ExprKind::BinOp(_, lhs_expr_id, rhs_expr_id) => {
-                self.analyze_expr_bin_op(*lhs_expr_id, *rhs_expr_id, &expr.ty)
+            ExprKind::BinOp(bin_op, lhs_expr_id, rhs_expr_id) => {
+                self.analyze_expr_bin_op(*bin_op, *lhs_expr_id, *rhs_expr_id, &expr.ty)
             }
             ExprKind::Block(block_id) => self.analyze_expr_block(*block_id),
             ExprKind::Call(callee_expr_id, args_expr_id) => {
@@ -1949,6 +2010,20 @@ impl<'a> Visitor<'a> for Analyzer<'a> {
             ExprKind::Index(array_expr_id, index_expr_id) => {
                 self.analyze_expr_index(*array_expr_id, *index_expr_id, &expr.ty)
             }
+            ExprKind::Parallel(limit, body) => {
+                if let Some(limit) = limit {
+                    self.analyze_expr_parallel_limit(*limit);
+                }
+                // We need to track when we are analyzing a parallel expression to understand whether certain constructs should
+                // be allowed.
+                let previous_in_parallel_expr = self.in_parallel_expr;
+                self.in_parallel_expr = true;
+                self.visit_expr(*body);
+                self.in_parallel_expr = previous_in_parallel_expr;
+                // The compute kind of a parallel expression is the same as the compute kind of its inner expression.
+                let application_instance = self.get_current_application_instance();
+                *application_instance.get_expr_compute_kind(*body)
+            }
             ExprKind::Range(start_expr_id, step_expr_id, end_expr_id) => self.analyze_expr_range(
                 start_expr_id.to_owned(),
                 step_expr_id.to_owned(),
diff --git a/source/compiler/qsc_rca/src/errors.rs b/source/compiler/qsc_rca/src/errors.rs
index 14b9249adf..bbdfa933b6 100644
--- a/source/compiler/qsc_rca/src/errors.rs
+++ b/source/compiler/qsc_rca/src/errors.rs
@@ -252,8 +252,25 @@ pub enum Error {
     #[diagnostic(url("https://aka.ms/qdk.qir#use-of-dynamic-generic"))]
     #[diagnostic(code("Qsc.CapabilitiesCk.UseOfDynamicGeneric"))]
     UseOfDynamicGeneric(#[label] Span),
+
+    #[error("cannot use dynamic branching in parallel expression")]
+    #[diagnostic(help(
+        "using branching based on a measurement result in a parallel expression is not supported by the configured target profile"
+    ))]
+    #[diagnostic(url("https://aka.ms/qdk.qir#use-of-dynamic-branching-in-parallel-expr"))]
+    #[diagnostic(code("Qsc.CapabilitiesCk.UseOfDynamicBranchingInParallelExpr"))]
+    UseOfDynamicBranchingInParallelExpr(#[label] Span),
+
+    #[error("cannot use dynamic limit for a parallel expression")]
+    #[diagnostic(help(
+        "using a dynamic limit for a parallel expression is not supported by the configured target profile"
+    ))]
+    #[diagnostic(url("https://aka.ms/qdk.qir#use-of-dynamic-limit-in-parallel-expr"))]
+    #[diagnostic(code("Qsc.CapabilitiesCk.UseOfDynamicLimitInParallelExpr"))]
+    UseOfDynamicLimitInParallelExpr(#[label] Span),
 }
 
+#[allow(clippy::too_many_lines)]
 #[must_use]
 pub fn generate_errors_from_runtime_features(
     runtime_features: RuntimeFeatureFlags,
@@ -356,6 +373,12 @@ pub fn generate_errors_from_runtime_features(
     if runtime_features.contains(RuntimeFeatureFlags::UseOfDynamicGeneric) {
         errors.push(Error::UseOfDynamicGeneric(span));
     }
+    if runtime_features.contains(RuntimeFeatureFlags::UseOfDynamicBranchingInParallelExpr) {
+        errors.push(Error::UseOfDynamicBranchingInParallelExpr(span));
+    }
+    if runtime_features.contains(RuntimeFeatureFlags::UseOfDynamicLimitInParallelExpr) {
+        errors.push(Error::UseOfDynamicLimitInParallelExpr(span));
+    }
     errors
 }
 
diff --git a/source/compiler/qsc_rca/src/lib.rs b/source/compiler/qsc_rca/src/lib.rs
index 2c1d8770b8..a60a60b0d0 100644
--- a/source/compiler/qsc_rca/src/lib.rs
+++ b/source/compiler/qsc_rca/src/lib.rs
@@ -601,7 +601,7 @@ bitflags! {
     /// Runtime features represent anything a program can do that is more complex than executing quantum operations on
     /// statically allocated qubits and using constant arguments.
     #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-    pub struct RuntimeFeatureFlags: u32 {
+    pub struct RuntimeFeatureFlags: u64 {
         /// Use of a dynamic `Bool`.
         const UseOfDynamicBool = 1 << 0;
         /// Use of a dynamic `Int`.
@@ -666,6 +666,10 @@ bitflags! {
         const CallToCustomReset = 1 << 30;
         /// Use of a dynamic generic parameter.
         const UseOfDynamicGeneric = 1 << 31;
+        /// Use of dynamic branching in a parallel expression.
+        const UseOfDynamicBranchingInParallelExpr = 1 << 32;
+        /// Use of a dynamic limit in a parallel expression.
+        const UseOfDynamicLimitInParallelExpr = 1 << 33;
     }
 }
 
@@ -684,6 +688,7 @@ impl RuntimeFeatureFlags {
     }
 
     /// Maps program constructs to target capabilities.
+    #[allow(clippy::too_many_lines)]
     #[must_use]
     pub fn target_capabilities(&self) -> TargetCapabilityFlags {
         let mut capabilities = TargetCapabilityFlags::empty();
@@ -788,6 +793,12 @@ impl RuntimeFeatureFlags {
         if self.contains(RuntimeFeatureFlags::UseOfDynamicGeneric) {
             capabilities |= TargetCapabilityFlags::HigherLevelConstructs;
         }
+        if self.contains(RuntimeFeatureFlags::UseOfDynamicBranchingInParallelExpr) {
+            capabilities |= TargetCapabilityFlags::HigherLevelConstructs;
+        }
+        if self.contains(RuntimeFeatureFlags::UseOfDynamicLimitInParallelExpr) {
+            capabilities |= TargetCapabilityFlags::HigherLevelConstructs;
+        }
         capabilities
     }
 
diff --git a/source/compiler/qsc_rca/src/tests.rs b/source/compiler/qsc_rca/src/tests.rs
index 2057229e68..525d089049 100644
--- a/source/compiler/qsc_rca/src/tests.rs
+++ b/source/compiler/qsc_rca/src/tests.rs
@@ -14,6 +14,7 @@ mod lambdas;
 mod loops;
 mod measurements;
 mod overrides;
+mod parallel;
 mod qubits;
 mod strings;
 mod structs;
diff --git a/source/compiler/qsc_rca/src/tests/parallel.rs b/source/compiler/qsc_rca/src/tests/parallel.rs
new file mode 100644
index 0000000000..266db8b489
--- /dev/null
+++ b/source/compiler/qsc_rca/src/tests/parallel.rs
@@ -0,0 +1,423 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+use super::{
+    CompilationContext, check_callable_compute_properties, check_last_statement_compute_properties,
+};
+use expect_test::expect;
+
+#[test]
+fn check_rca_for_parallel_expr_with_static_body() {
+    let mut compilation_context = CompilationContext::default();
+    compilation_context.update(
+        r#"
+        let e = parallel { };
+        e"#,
+    );
+    let package_store_compute_properties = compilation_context.get_compute_properties();
+    check_last_statement_compute_properties(
+        package_store_compute_properties,
+        &expect![[r#"
+            ApplicationsGeneratorSet:
+                inherent: Static
+                dynamic_param_applications: <empty>"#]],
+    );
+}
+
+#[test]
+fn check_rca_for_parallel_within_with_static_limit_and_body() {
+    let mut compilation_context = CompilationContext::default();
+    compilation_context.update(
+        r#"
+        let e = parallel within 4 { };
+        e"#,
+    );
+    let package_store_compute_properties = compilation_context.get_compute_properties();
+    check_last_statement_compute_properties(
+        package_store_compute_properties,
+        &expect![[r#"
+            ApplicationsGeneratorSet:
+                inherent: Static
+                dynamic_param_applications: <empty>"#]],
+    );
+}
+
+#[test]
+fn check_rca_for_parallel_with_dynamic_operations_no_branching() {
+    // A parallel body that allocates a qubit, applies gates, and measures — making the overall
+    // operation dynamic — but contains no conditional branching and therefore
+    // does NOT produce UseOfDynamicBranchingInParallelExpr.
+    let mut compilation_context = CompilationContext::default();
+    compilation_context.update(
+        r#"
+        operation Foo() : Unit {
+            use q = Qubit();
+            let _ = parallel {
+                use p = Qubit();
+                H(p);
+                M(p)
+            };
+        }"#,
+    );
+    let package_store_compute_properties = compilation_context.get_compute_properties();
+    check_callable_compute_properties(
+        &compilation_context.fir_store,
+        package_store_compute_properties,
+        "Foo",
+        &expect![[r#"
+            Callable: CallableComputeProperties:
+                body: ApplicationsGeneratorSet:
+                    inherent: Dynamic:
+                        runtime_features: RuntimeFeatureFlags(0x0)
+                        value_kind: Constant
+                    dynamic_param_applications: <empty>
+                adj: <none>
+                ctl: <none>
+                ctl-adj: <none>"#]],
+    );
+}
+
+#[test]
+fn check_rca_for_parallel_within_with_dynamic_operations_no_branching() {
+    // Same validation as above but using `parallel within` with a static limit.
+    let mut compilation_context = CompilationContext::default();
+    compilation_context.update(
+        r#"
+        operation Foo() : Unit {
+            use q = Qubit();
+            let _ = parallel within 4 {
+                use p = Qubit();
+                H(p);
+                M(p)
+            };
+        }"#,
+    );
+    let package_store_compute_properties = compilation_context.get_compute_properties();
+    check_callable_compute_properties(
+        &compilation_context.fir_store,
+        package_store_compute_properties,
+        "Foo",
+        &expect![[r#"
+            Callable: CallableComputeProperties:
+                body: ApplicationsGeneratorSet:
+                    inherent: Dynamic:
+                        runtime_features: RuntimeFeatureFlags(0x0)
+                        value_kind: Constant
+                    dynamic_param_applications: <empty>
+                adj: <none>
+                ctl: <none>
+                ctl-adj: <none>"#]],
+    );
+}
+
+#[test]
+fn check_rca_for_parallel_with_dynamic_if_in_body() {
+    let mut compilation_context = CompilationContext::default();
+    compilation_context.update(
+        r#"
+        operation Foo() : Unit {
+            use q = Qubit();
+            parallel {
+                if M(q) == Zero {
+                    H(q);
+                }
+            }
+        }"#,
+    );
+    let package_store_compute_properties = compilation_context.get_compute_properties();
+    check_callable_compute_properties(
+        &compilation_context.fir_store,
+        package_store_compute_properties,
+        "Foo",
+        &expect![[r#"
+            Callable: CallableComputeProperties:
+                body: ApplicationsGeneratorSet:
+                    inherent: Dynamic:
+                        runtime_features: RuntimeFeatureFlags(UseOfDynamicBool | UseOfDynamicBranchingInParallelExpr)
+                        value_kind: Constant
+                    dynamic_param_applications: <empty>
+                adj: <none>
+                ctl: <none>
+                ctl-adj: <none>"#]],
+    );
+}
+
+#[test]
+fn check_rca_for_parallel_with_short_circuit_bool_in_body() {
+    // Short-circuiting `&&`/`||` with a variable LHS incurs dynamic branching in code gen.
+    // When inside a parallel expression, this triggers UseOfDynamicBranchingInParallelExpr.
+    let mut compilation_context = CompilationContext::default();
+    compilation_context.update(
+        r#"
+        operation Foo() : Unit {
+            use q = Qubit();
+            parallel {
+                let b = (M(q) == Zero) and (M(q) == One);
+            }
+        }"#,
+    );
+    let package_store_compute_properties = compilation_context.get_compute_properties();
+    check_callable_compute_properties(
+        &compilation_context.fir_store,
+        package_store_compute_properties,
+        "Foo",
+        &expect![[r#"
+            Callable: CallableComputeProperties:
+                body: ApplicationsGeneratorSet:
+                    inherent: Dynamic:
+                        runtime_features: RuntimeFeatureFlags(UseOfDynamicBool | UseOfDynamicBranchingInParallelExpr)
+                        value_kind: Constant
+                    dynamic_param_applications: <empty>
+                adj: <none>
+                ctl: <none>
+                ctl-adj: <none>"#]],
+    );
+}
+
+#[test]
+fn check_rca_for_parallel_with_while_loop_with_dynamic_condition() {
+    let mut compilation_context = CompilationContext::default();
+    compilation_context.update(
+        r#"
+        operation Foo() : Unit {
+            use q = Qubit();
+            parallel {
+                while M(q) == Zero {
+                    H(q);
+                }
+            }
+        }"#,
+    );
+    let package_store_compute_properties = compilation_context.get_compute_properties();
+    check_callable_compute_properties(
+        &compilation_context.fir_store,
+        package_store_compute_properties,
+        "Foo",
+        &expect![[r#"
+            Callable: CallableComputeProperties:
+                body: ApplicationsGeneratorSet:
+                    inherent: Dynamic:
+                        runtime_features: RuntimeFeatureFlags(UseOfDynamicBool | MeasurementWithinDynamicScope | LoopWithDynamicCondition | UseOfDynamicBranchingInParallelExpr)
+                        value_kind: Constant
+                    dynamic_param_applications: <empty>
+                adj: <none>
+                ctl: <none>
+                ctl-adj: <none>"#]],
+    );
+}
+
+#[test]
+fn check_rca_for_parallel_within_with_dynamic_limit() {
+    // The UseOfDynamicLimitInParallelExpr runtime feature is stored on the limit expression's
+    // compute kind by the RCA, but is not propagated to the callable-level compute properties.
+    // The callable-level features reflect only the dynamic values used in the body (Bool and Int
+    // from the conditional). The UseOfDynamicLimitInParallelExpr flag is checked and surfaced as
+    // an error by the capabilities check pass (see capabilitiesck tests).
+    let mut compilation_context = CompilationContext::default();
+    compilation_context.update(
+        r#"
+        operation Foo() : Unit {
+            use q = Qubit();
+            let n = M(q) == Zero ? 2 | 4;
+            parallel within n { }
+        }"#,
+    );
+    let package_store_compute_properties = compilation_context.get_compute_properties();
+    check_callable_compute_properties(
+        &compilation_context.fir_store,
+        package_store_compute_properties,
+        "Foo",
+        &expect![[r#"
+            Callable: CallableComputeProperties:
+                body: ApplicationsGeneratorSet:
+                    inherent: Dynamic:
+                        runtime_features: RuntimeFeatureFlags(UseOfDynamicBool | UseOfDynamicInt)
+                        value_kind: Constant
+                    dynamic_param_applications: <empty>
+                adj: <none>
+                ctl: <none>
+                ctl-adj: <none>"#]],
+    );
+}
+
+#[test]
+fn check_rca_for_nested_parallel_with_dynamic_if_in_inner_body() {
+    // Dynamic branching in the inner parallel propagates out to the outer parallel's compute kind
+    // since the outer parallel's compute kind is derived from its body.
+    let mut compilation_context = CompilationContext::default();
+    compilation_context.update(
+        r#"
+        operation Foo() : Unit {
+            use q = Qubit();
+            parallel {
+                parallel {
+                    if M(q) == Zero {
+                        H(q);
+                    }
+                }
+            }
+        }"#,
+    );
+    let package_store_compute_properties = compilation_context.get_compute_properties();
+    check_callable_compute_properties(
+        &compilation_context.fir_store,
+        package_store_compute_properties,
+        "Foo",
+        &expect![[r#"
+            Callable: CallableComputeProperties:
+                body: ApplicationsGeneratorSet:
+                    inherent: Dynamic:
+                        runtime_features: RuntimeFeatureFlags(UseOfDynamicBool | UseOfDynamicBranchingInParallelExpr)
+                        value_kind: Constant
+                    dynamic_param_applications: <empty>
+                adj: <none>
+                ctl: <none>
+                ctl-adj: <none>"#]],
+    );
+}
+
+#[test]
+fn check_rca_for_parallel_calling_operation_that_branches_dynamically() {
+    // Bar measures a qubit and branches on the result, making it dynamic with UseOfDynamicBool.
+    // When Foo calls Bar inside a parallel expression, the RCA detects that the call involves a
+    // dynamic bool and adds UseOfDynamicBranchingInParallelExpr to Foo's compute properties.
+    let mut compilation_context = CompilationContext::default();
+    compilation_context.update(
+        r#"
+        operation Bar(q : Qubit) : Unit {
+            if M(q) == Zero {
+                H(q);
+            }
+        }
+        operation Foo() : Unit {
+            use q = Qubit();
+            parallel {
+                Bar(q);
+            }
+        }"#,
+    );
+    let package_store_compute_properties = compilation_context.get_compute_properties();
+    check_callable_compute_properties(
+        &compilation_context.fir_store,
+        package_store_compute_properties,
+        "Bar",
+        &expect![[r#"
+            Callable: CallableComputeProperties:
+                body: ApplicationsGeneratorSet:
+                    inherent: Dynamic:
+                        runtime_features: RuntimeFeatureFlags(UseOfDynamicBool)
+                        value_kind: Constant
+                    dynamic_param_applications:
+                        [0]: [Parameter Type Element] Dynamic:
+                            runtime_features: RuntimeFeatureFlags(UseOfDynamicBool | UseOfDynamicQubit)
+                            value_kind: Constant
+                adj: <none>
+                ctl: <none>
+                ctl-adj: <none>"#]],
+    );
+    check_callable_compute_properties(
+        &compilation_context.fir_store,
+        package_store_compute_properties,
+        "Foo",
+        &expect![[r#"
+            Callable: CallableComputeProperties:
+                body: ApplicationsGeneratorSet:
+                    inherent: Dynamic:
+                        runtime_features: RuntimeFeatureFlags(UseOfDynamicBool | UseOfDynamicBranchingInParallelExpr)
+                        value_kind: Constant
+                    dynamic_param_applications: <empty>
+                adj: <none>
+                ctl: <none>
+                ctl-adj: <none>"#]],
+    );
+}
+
+#[test]
+fn check_rca_for_parallel_within_calling_operation_that_branches_dynamically() {
+    // Same as above but using `parallel within` with a static limit.
+    let mut compilation_context = CompilationContext::default();
+    compilation_context.update(
+        r#"
+        operation Bar(q : Qubit) : Unit {
+            if M(q) == Zero {
+                H(q);
+            }
+        }
+        operation Foo() : Unit {
+            use q = Qubit();
+            parallel within 4 {
+                Bar(q);
+            }
+        }"#,
+    );
+    let package_store_compute_properties = compilation_context.get_compute_properties();
+    check_callable_compute_properties(
+        &compilation_context.fir_store,
+        package_store_compute_properties,
+        "Bar",
+        &expect![[r#"
+            Callable: CallableComputeProperties:
+                body: ApplicationsGeneratorSet:
+                    inherent: Dynamic:
+                        runtime_features: RuntimeFeatureFlags(UseOfDynamicBool)
+                        value_kind: Constant
+                    dynamic_param_applications:
+                        [0]: [Parameter Type Element] Dynamic:
+                            runtime_features: RuntimeFeatureFlags(UseOfDynamicBool | UseOfDynamicQubit)
+                            value_kind: Constant
+                adj: <none>
+                ctl: <none>
+                ctl-adj: <none>"#]],
+    );
+    check_callable_compute_properties(
+        &compilation_context.fir_store,
+        package_store_compute_properties,
+        "Foo",
+        &expect![[r#"
+            Callable: CallableComputeProperties:
+                body: ApplicationsGeneratorSet:
+                    inherent: Dynamic:
+                        runtime_features: RuntimeFeatureFlags(UseOfDynamicBool | UseOfDynamicBranchingInParallelExpr)
+                        value_kind: Constant
+                    dynamic_param_applications: <empty>
+                adj: <none>
+                ctl: <none>
+                ctl-adj: <none>"#]],
+    );
+}
+
+#[test]
+fn check_rca_for_parallel_with_dynamic_arg_to_rotation_does_not_branch() {
+    // A dynamic Double computed outside the parallel expression can be freely used as an
+    // argument to a gate inside it. Passing a dynamic value to a call does not incur branching,
+    // so UseOfDynamicBranchingInParallelExpr must NOT appear in the compute properties.
+    let mut compilation_context = CompilationContext::default();
+    compilation_context.update(
+        r#"
+        operation Foo() : Unit {
+            import Std.Convert.*;
+            use q = Qubit();
+            let angle = M(q) == Zero ? 1.0 | 2.0;
+            parallel {
+                use p = Qubit();
+                Rx(angle, p);
+            }
+        }"#,
+    );
+    let package_store_compute_properties = compilation_context.get_compute_properties();
+    check_callable_compute_properties(
+        &compilation_context.fir_store,
+        package_store_compute_properties,
+        "Foo",
+        &expect![[r#"
+            Callable: CallableComputeProperties:
+                body: ApplicationsGeneratorSet:
+                    inherent: Dynamic:
+                        runtime_features: RuntimeFeatureFlags(UseOfDynamicBool | UseOfDynamicDouble)
+                        value_kind: Constant
+                    dynamic_param_applications: <empty>
+                adj: <none>
+                ctl: <none>
+                ctl-adj: <none>"#]],
+    );
+}
diff --git a/source/vscode/syntaxes/qsharp.tmLanguage.json b/source/vscode/syntaxes/qsharp.tmLanguage.json
index d9c390c2a3..b695fe4980 100644
--- a/source/vscode/syntaxes/qsharp.tmLanguage.json
+++ b/source/vscode/syntaxes/qsharp.tmLanguage.json
@@ -142,7 +142,7 @@
       "patterns": [
         {
           "name": "keyword.control.qsharp",
-          "match": "\\b(use|borrow|mutable|let|set|if|elif|else|repeat|until|fixup|for|in|while|return|fail|within|apply)\\b"
+          "match": "\\b(use|borrow|mutable|let|set|if|elif|else|repeat|until|fixup|for|in|while|return|fail|within|apply|parallel)\\b"
         },
         {
           "name": "keyword.other.qsharp",