Hmmm that’s interesting. A brief test I ran: import CoreGraphics import Foundation
struct PointIterator { let rect: CGRect var nextPoint: CGPoint let maxX: CGFloat let maxY: CGFloat init(rect: CGRect) { self.rect = rect.standardized self.nextPoint = self.rect.origin // Cache for fast iteration self.maxX = self.rect.maxX self.maxY = self.rect.maxY } mutating func next() -> CGPoint? { guard nextPoint.x <= maxX, nextPoint.y <= maxY else { return .none } defer { nextPoint.x += 1 if nextPoint.x > maxX { nextPoint.x = rect.origin.x nextPoint.y += 1 } } return nextPoint } } // Use iterator func iteratePoints_it(_ rect: CGRect, with: (CGPoint)->()) { var it = PointIterator(rect: rect) while let point = it.next() { with(point) } } // Basic unwrapping of the iterator as a function (no ‘defer’) func iteratePoints_fe(_ rect: CGRect, with: (CGPoint)->()) { let rect = rect.standardized var nextPoint = rect.origin let maxX = rect.maxX let maxY = rect.maxY while true { guard nextPoint.x <= maxX, nextPoint.y <= maxY else { return } with(nextPoint) nextPoint.x += 1 if nextPoint.x > maxX { nextPoint.x = rect.origin.x nextPoint.y += 1 } } } // for..in loop func iteratePoints_fe2(_ rect: CGRect, with: (CGPoint)->()) { let rect = rect.standardized let maxX = rect.maxX let maxY = rect.maxY for y in stride(from: rect.origin.y, to: maxY, by: 1) { for x in stride(from: rect.origin.x, to: maxX, by: 1) { with(CGPoint(x: x, y: y)) } } } func profile(_ iterations: Int, _ thing: ()->()) -> TimeInterval { var totalTime: TimeInterval = 0 for _ in 0..<iterations { let start = Date().timeIntervalSinceReferenceDate thing() totalTime += (Date().timeIntervalSinceReferenceDate - start) } return totalTime/TimeInterval(iterations) } let bigRect = CGRect(x: 0, y: 0, width: 10_000, height: 10_000) let iterator = profile(10) { iteratePoints_it(bigRect) { if $0.x > 1_000_000 { print("?") } } } // always false, won't be optimised out. let foreach = profile(10) { iteratePoints_fe(bigRect) { if $0.x > 1_000_000 { print("?") } } } let foreach2 = profile(10) { iteratePoints_fe2(bigRect) { if $0.x > 1_000_000 { print("?") } } } print("iterator: \(iterator) \nforeach: \(foreach) \nforeach2: \(foreach2)") Results: iterator: 0.316907703876495 foreach: 0.283202117681503 foreach2: 0.568318998813629 That ranking is consistent, too. Using an iterator does appear marginally slower than a basic unwrapping of the iterator in to a function. > On 4 Jan 2017, at 09:56, Jens Persson via swift-users <swift-users@swift.org> > wrote: > > Hi, > > I'm working on some low-level pixel processing code (stuff that is not > possible to do using standard API or on eg GPU), and I had lots of eg these: > > for y in someStartY ..< someStopY { > for x in someStartX ..< someStopX { > ... pixels[x, y] ... > } > } > > So I implemented some (value) types like eg IntPoint2D, IntSize2D, IntRect2D > and I made an IntRect2DIterator so that IntRect2D could be a Sequence over > its (discrete) points. With this I could rewrite the above like so: > > for pixelPosAsIntPoint2D in someIntRect2D { > ... pixels[pixelPosAsIntPoint2D] ... > } > > For some reason the first version (two nested for loops for x and y) is > always a bit faster than the abstracted version no matter how I write it > (tried using eg &+ instead of + etc). > > > > Is it possible to write as a zero cost abstraction like this, if so, how? If > not, why? > > > > > PS > > Note that eg this: > > for y in someStartY ..< someStopY { > for x in someStartX ..< someStopX { > let pixelPosAsIntPoint2D = IntPoint2D(x: x, y: y) > ... pixels[pixelPosAsIntPoint2D] ... > } > } > > is exactly as fast as the top example (using just ... pixels[x, y] ...). So > the difference in execution time seems to be due to something in the Iterator > and not eg the pixel accessing subscript taking the 2d int point type instead > of separate x and y ints. > > Here is one Iterator variant that I have tested: > > struct Rect2DIntPointIterator : IteratorProtocol, Sequence { > let startX, startY, stopX, stopY: Int > var px, py: Int > init(rect: Rect2DInt) { > startX = rect.origin.x > startY = rect.origin.y > stopX = rect.maxX > stopY = rect.maxY > px = startX > py = startY > } > mutating func next() -> Point2DInt? { > defer { px = px &+ 1 } > if px == stopX { > px = startX > py = py &+ 1 > if py == stopY { return nil } > } > return Point2DInt(x: px, y: py) > } > } > > > And here are typical execution times from an example test: > 2.1 seconds using my Iterator (the fastest I can get it, no matter how I try > to rewrite it). > 1.5 seconds using nested x, y for loops. > > I'm pretty sure my testing is done thoroughly (measuring average of many > runs, using random data, avoiding dead code elimination, whole module > optimization, etc). > > I have tried profiling the code and looking at the disassmbly but I'm failing > to understand what's going on. > > So the ultimate answer would be in the form of a (2d, Int) Rectangle type > whose (2d, Int) Points can be iterated in a for loop, at zero cost compared > to doing the same using two nested for loops. Or an explanation of why this > is impossible. > > DS > > /Jens > > _______________________________________________ > swift-users mailing list > swift-users@swift.org > https://lists.swift.org/mailman/listinfo/swift-users
_______________________________________________ swift-users mailing list swift-users@swift.org https://lists.swift.org/mailman/listinfo/swift-users