Hmmm that’s interesting. A brief test I ran:

import CoreGraphics
import Foundation

struct PointIterator {
  let rect: CGRect
  var nextPoint: CGPoint

  let maxX: CGFloat
  let maxY: CGFloat

  init(rect: CGRect) {
    self.rect      = rect.standardized
    self.nextPoint = self.rect.origin
    // Cache for fast iteration
    self.maxX      = self.rect.maxX
    self.maxY      = self.rect.maxY

  mutating func next() -> CGPoint? {
    guard nextPoint.x <= maxX, nextPoint.y <= maxY else {
      return .none
    defer {
      nextPoint.x += 1
      if nextPoint.x > maxX {
        nextPoint.x  = rect.origin.x
        nextPoint.y += 1
    return nextPoint

// Use iterator
func iteratePoints_it(_ rect: CGRect, with: (CGPoint)->()) {
  var it = PointIterator(rect: rect)
  while let point = {

// Basic unwrapping of the iterator as a function (no ‘defer’)
func iteratePoints_fe(_ rect: CGRect, with: (CGPoint)->()) {
  let rect      = rect.standardized
  var nextPoint = rect.origin
  let maxX      = rect.maxX
  let maxY      = rect.maxY
  while true {
    guard nextPoint.x <= maxX, nextPoint.y <= maxY else {
    nextPoint.x += 1
    if nextPoint.x > maxX {
      nextPoint.x  = rect.origin.x
      nextPoint.y += 1

// loop
func iteratePoints_fe2(_ rect: CGRect, with: (CGPoint)->()) {
  let rect = rect.standardized
  let maxX = rect.maxX
  let maxY = rect.maxY
  for y in stride(from: rect.origin.y, to: maxY, by: 1) {
    for x in stride(from: rect.origin.x, to: maxX, by: 1) {
      with(CGPoint(x: x, y: y))

func profile(_ iterations: Int, _ thing: ()->()) -> TimeInterval {
  var totalTime: TimeInterval = 0
  for _ in 0..<iterations {
    let start = Date().timeIntervalSinceReferenceDate
    totalTime += (Date().timeIntervalSinceReferenceDate - start)
  return totalTime/TimeInterval(iterations)

let bigRect = CGRect(x: 0, y: 0, width: 10_000, height: 10_000)

let iterator = profile(10) { iteratePoints_it(bigRect)  { if $0.x > 1_000_000 { 
print("?") } } } // always false, won't be optimised out.
let foreach = profile(10)  { iteratePoints_fe(bigRect)  { if $0.x > 1_000_000 { 
print("?") } } }
let foreach2 = profile(10) { iteratePoints_fe2(bigRect) { if $0.x > 1_000_000 { 
print("?") } } }
print("iterator: \(iterator) \nforeach: \(foreach) \nforeach2: \(foreach2)")


iterator: 0.316907703876495 
foreach: 0.283202117681503 
foreach2: 0.568318998813629

That ranking is consistent, too. Using an iterator does appear marginally 
slower than a basic unwrapping of the iterator in to a function.

> Hi,
> I'm working on some low-level pixel processing code (stuff that is not 
> possible to do using standard API or on eg GPU), and I had lots of eg these:
> for y in someStartY ..< someStopY {
>     for x in someStartX ..< someStopX {
>         ... pixels[x, y] ...
>     }
> }
> So I implemented some (value) types like eg IntPoint2D, IntSize2D, IntRect2D 
> and I made an IntRect2DIterator so that IntRect2D could be a Sequence over 
> its (discrete) points. With this I could rewrite the above like so:
> for pixelPosAsIntPoint2D in someIntRect2D {
>     ... pixels[pixelPosAsIntPoint2D] ...
> }
> For some reason the first version (two nested for loops for x and y) is 
> always a bit faster than the abstracted version no matter how I write it 
> (tried using eg &+ instead of + etc).
> Is it possible to write as a zero cost abstraction like this, if so, how? If 
> not, why?
> PS
> Note that eg this:
> for y in someStartY ..< someStopY {
>     for x in someStartX ..< someStopX {
>        let pixelPosAsIntPoint2D = IntPoint2D(x: x, y: y)
>         ... pixels[pixelPosAsIntPoint2D] ...
>     }
> }
> is exactly as fast as the top example (using just ... pixels[x, y] ...). So 
> the difference in execution time seems to be due to something in the Iterator 
> and not eg the pixel accessing subscript taking the 2d int point type instead 
> of separate x and y ints.
> Here is one Iterator variant that I have tested:
> struct Rect2DIntPointIterator : IteratorProtocol, Sequence {
>     let startX, startY, stopX, stopY: Int
>     var px, py: Int
>     init(rect: Rect2DInt) {
>         startX = rect.origin.x
>         startY = rect.origin.y
>         stopX = rect.maxX
>         stopY = rect.maxY
>         px = startX
>         py = startY
>     }
>     mutating func next() -> Point2DInt? {
>         defer { px = px &+ 1 }
>         if px == stopX {
>             px = startX
>             py = py &+ 1
>             if py == stopY { return nil }
>         }
>         return Point2DInt(x: px, y: py)
>     }
> }
> And here are typical execution times from an example test:
> 2.1 seconds using my Iterator (the fastest I can get it, no matter how I try 
> to rewrite it).
> 1.5 seconds using nested x, y for loops.
> I'm pretty sure my testing is done thoroughly (measuring average of many 
> runs, using random data, avoiding dead code elimination, whole module 
> optimization, etc).
> I have tried profiling the code and looking at the disassmbly but I'm failing 
> to understand what's going on.
> So the ultimate answer would be in the form of a (2d, Int) Rectangle type 
> whose (2d, Int) Points can be iterated in a for loop, at zero cost compared 
> to doing the same using two nested for loops. Or an explanation of why this 
> is impossible.
> DS
> /Jens
