Created
December 11, 2023 17:47
-
-
Save davidbalbert/f07c865defe6e070d4c2afdd1454f343 to your computer and use it in GitHub Desktop.
Partially working BigIndexSet + LineCache
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// BigIndexSet.swift | |
// Watt | |
// | |
// Created by David Albert on 12/9/23. | |
// | |
import Foundation | |
struct BigIndexSet: BTree { | |
var root: BTreeNode<BigIndexSetSummary> | |
init(_ root: BTreeNode<BigIndexSetSummary>) { | |
self.root = root | |
} | |
} | |
struct BigIndexSetSummary: BTreeSummary { | |
var indexCount: Int | |
static var zero: BigIndexSetSummary { | |
self.init(indexCount: 0) | |
} | |
static func += (lhs: inout BigIndexSetSummary, rhs: BigIndexSetSummary) { | |
lhs.indexCount += rhs.indexCount | |
} | |
init(summarizing leaf: BigIndexSetLeaf) { | |
indexCount = leaf.data.count | |
} | |
init(indexCount: Int) { | |
self.indexCount = indexCount | |
} | |
} | |
extension BigIndexSetSummary: BTreeDefaultMetric { | |
static var defaultMetric: BigIndexSet.IndicesBaseMetric { BigIndexSet.IndicesBaseMetric() } | |
} | |
struct BigIndexSetLeaf: BTreeLeaf { | |
static let minSize = 32 | |
static let maxSize = 64 | |
var count: Int | |
var data: [Int] | |
static var zero: BigIndexSetLeaf { | |
self.init(count: 0, data: []) | |
} | |
var isUndersized: Bool { | |
data.count < Self.minSize | |
} | |
mutating func pushMaybeSplitting(other: BigIndexSetLeaf) -> BigIndexSetLeaf? { | |
for i in other.data { | |
data.append(i + count) | |
} | |
count += other.count | |
if data.count <= Self.maxSize { | |
return nil | |
} else { | |
let splitIndex = data.count / 2 | |
let splitCount = data[splitIndex - 1] | |
var new = Array(data[splitIndex...]) | |
for i in 0..<new.count { | |
new[i] -= splitCount | |
} | |
let newCount = count - splitCount | |
data.removeLast(new.count) | |
count = splitCount | |
return BigIndexSetLeaf(count: newCount, data: Array(new)) | |
} | |
} | |
subscript(bounds: Range<Int>) -> BigIndexSetLeaf { | |
var d: [Int] = [] | |
for i in data { | |
if bounds.contains(i) { | |
d.append(i - bounds.lowerBound) | |
} | |
} | |
return BigIndexSetLeaf(count: bounds.count, data: d) | |
} | |
} | |
extension BigIndexSet { | |
var upperBound: Int { | |
root.count | |
} | |
} | |
extension BigIndexSet: BidirectionalCollection { | |
typealias Index = BTreeNode<BigIndexSetSummary>.Index | |
var count: Int { | |
root.measure(using: .indices) | |
} | |
var startIndex: Index { | |
return root.startIndex | |
} | |
var endIndex: Index { | |
return root.endIndex | |
} | |
subscript(position: Index) -> Int { | |
position.validate(for: root) | |
precondition(position >= startIndex && position < endIndex, "Index out of bounds") | |
let i = index(roundingDown: position) | |
let (_, offset) = i.read()! | |
return offset | |
} | |
func index(before i: Index) -> Index { | |
return root.index(before: i, using: .indices) | |
} | |
func index(after i: Index) -> Index { | |
return root.index(after: i, using: .indices) | |
} | |
func index(_ i: Index, offsetBy distance: Int) -> Index { | |
root.index(i, offsetBy: distance, using: .indices) | |
} | |
func index(_ i: Index, offsetBy distance: Int, limitedBy limit: Index) -> Index? { | |
root.index(i, offsetBy: distance, limitedBy: limit, using: .indices) | |
} | |
func distance(from start: Index, to end: Index) -> Int { | |
root.distance(from: start, to: end, using: .indices) | |
} | |
} | |
extension BigIndexSet { | |
func index(roundingDown i: Index) -> Index { | |
return root.index(roundingDown: i, using: .indices) | |
} | |
func isBoundary(_ i: Index) -> Bool { | |
i.validate(for: root) | |
return i.isBoundary(in: .indices) | |
} | |
} | |
extension BigIndexSet { | |
struct IndicesBaseMetric: BTreeMetric { | |
func measure(summary: BigIndexSetSummary, count: Int) -> Int { | |
count | |
} | |
func convertToBaseUnits(_ measuredUnits: Int, in leaf: BigIndexSetLeaf) -> Int { | |
measuredUnits | |
} | |
func convertFromBaseUnits(_ baseUnits: Int, in leaf: BigIndexSetLeaf) -> Int { | |
baseUnits | |
} | |
func isBoundary(_ offset: Int, in leaf: BigIndexSetLeaf) -> Bool { | |
IndicesMetric().isBoundary(offset, in: leaf) | |
} | |
func prev(_ offset: Int, in leaf: BigIndexSetLeaf) -> Int? { | |
IndicesMetric().prev(offset, in: leaf) | |
} | |
func next(_ offset: Int, in leaf: BigIndexSetLeaf) -> Int? { | |
IndicesMetric().next(offset, in: leaf) | |
} | |
var canFragment: Bool { | |
false | |
} | |
var type: BTreeMetricType { | |
.trailing | |
} | |
} | |
} | |
extension BTreeMetric<BigIndexSetSummary> where Self == BigIndexSet.IndicesBaseMetric { | |
static var indicesBaseMetric: BigIndexSet.IndicesBaseMetric { BigIndexSet.IndicesBaseMetric() } | |
} | |
extension BigIndexSet { | |
struct IndicesMetric: BTreeMetric { | |
func measure(summary: BigIndexSetSummary, count: Int) -> Int { | |
summary.indexCount | |
} | |
func convertToBaseUnits(_ measuredUnits: Int, in leaf: BigIndexSetLeaf) -> Int { | |
if measuredUnits >= leaf.data.count { | |
return leaf.count + 1 | |
} else { | |
return leaf.data[measuredUnits] | |
} | |
} | |
func convertFromBaseUnits(_ baseUnits: Int, in leaf: BigIndexSetLeaf) -> Int { | |
let (i, found) = leaf.data.binarySearch(for: baseUnits) | |
if found { | |
return i | |
} else { | |
assert(i > 0) | |
return i - 1 | |
} | |
} | |
func isBoundary(_ offset: Int, in leaf: BigIndexSetLeaf) -> Bool { | |
let (_, found) = leaf.data.binarySearch(for: offset) | |
return found | |
} | |
func prev(_ offset: Int, in leaf: BigIndexSetLeaf) -> Int? { | |
assert(offset > 0 && offset <= leaf.count) | |
var (i, found) = leaf.data.binarySearch(for: offset) | |
if found { | |
i -= 1 | |
} | |
if i < 0 { | |
return nil | |
} | |
return leaf.data[i - 1] | |
} | |
func next(_ offset: Int, in leaf: BigIndexSetLeaf) -> Int? { | |
assert(offset >= 0 && offset < leaf.count) | |
var (i, found) = leaf.data.binarySearch(for: offset) | |
if found { | |
i += 1 | |
} | |
if i == leaf.data.count { | |
return nil | |
} | |
return leaf.data[i] | |
} | |
var canFragment: Bool { | |
false | |
} | |
var type: BTreeMetricType { | |
.trailing | |
} | |
} | |
} | |
extension BTreeMetric<BigIndexSetSummary> where Self == BigIndexSet.IndicesBaseMetric { | |
static var indices: BigIndexSet.IndicesMetric { BigIndexSet.IndicesMetric() } | |
} | |
struct BigIndexSetBuilder { | |
var b: BTreeBuilder<BigIndexSet> | |
var leaf: BigIndexSetLeaf | |
var offsetOfLeaf: Int | |
var totalCount: Int | |
init(totalCount: Int) { | |
self.b = BTreeBuilder<BigIndexSet>() | |
self.leaf = .zero | |
self.offsetOfLeaf = 0 | |
self.totalCount = totalCount | |
} | |
mutating func append(_ index: Int) { | |
precondition((leaf.data.isEmpty && index >= offsetOfLeaf) || (index > offsetOfLeaf + leaf.data.last!), "Indices must be inserted in order") | |
if leaf.data.count == BigIndexSetLeaf.maxSize { | |
leaf.count = index - offsetOfLeaf | |
self.offsetOfLeaf = index | |
b.push(leaf: leaf) | |
leaf = .zero | |
} | |
leaf.data.append(index - offsetOfLeaf) | |
totalCount = max(totalCount, index + 1) | |
} | |
mutating func push(_ indices: inout BigIndexSet) { | |
push(&indices, slicedBy: indices.startIndex..<indices.endIndex) | |
} | |
mutating func push(_ indices: inout BigIndexSet, slicedBy range: Range<BigIndexSet.Index>) { | |
precondition(range.lowerBound >= indices.startIndex && range.upperBound <= indices.endIndex, "Index out of bounds") | |
if !leaf.data.isEmpty { | |
leaf.count = totalCount - offsetOfLeaf | |
b.push(leaf: leaf) | |
leaf = .zero | |
} | |
let r = Range(range, in: indices.root) | |
totalCount += r.count | |
b.push(&indices.root, slicedBy: r) | |
} | |
consuming func build() -> BigIndexSet { | |
assert(totalCount >= offsetOfLeaf) | |
leaf.count = totalCount - offsetOfLeaf | |
b.push(leaf: leaf) | |
return b.build() | |
} | |
} | |
extension BigIndexSet { | |
mutating func insert(_ integer: Int) { | |
if contains(integer) { | |
return | |
} | |
if isEmpty || integer > last! { | |
// If we're inserting at the end, and we're uniquely referenced | |
// it's ok to mutate self. | |
var b = BTreeBuilder<Self>() | |
b.push(&self.root) | |
b.push(leaf: BigIndexSetLeaf(count: 0, data: [0])) | |
self = b.build() | |
return | |
} | |
// We're not inserting at the end, so we need to make sure not to mutate | |
// self while pushing integer. Otherwise when we push the second slice, | |
// our indices will be off by 1. | |
var dup = self | |
var b = BTreeBuilder<Self>() | |
b.push(&dup.root, slicedBy: 0..<integer) | |
b.push(leaf: BigIndexSetLeaf(count: 0, data: [0])) | |
b.push(&dup.root, slicedBy: (integer+1)..<upperBound) | |
self = b.build() | |
} | |
func contains(_ integer: Int) -> Bool { | |
if integer == 128 { | |
print("?") | |
} | |
if integer >= upperBound || integer < 0 { | |
return false | |
} | |
let i = root.index(startIndex, offsetBy: integer, using: .indicesBaseMetric) | |
return isBoundary(i) | |
} | |
} | |
extension BigIndexSet: ExpressibleByArrayLiteral { | |
init(arrayLiteral elements: Int...) { | |
self.init(elements) | |
} | |
} | |
extension BigIndexSet { | |
init(_ sequence: some Sequence<Int>) { | |
var b = BigIndexSetBuilder(totalCount: 0) | |
for i in sequence { | |
b.append(i) | |
} | |
self = b.build() | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// BigIndexSetTests.swift | |
// WattTests | |
// | |
// Created by David Albert on 12/9/23. | |
// | |
import XCTest | |
@testable import Watt | |
final class BigIndexSetTests: XCTestCase { | |
func testInit() { | |
let indices = BigIndexSet(stride(from: 0, to: 200, by: 2)) | |
XCTAssertEqual(indices.upperBound, 199) | |
for i in 0..<200 { | |
if i % 2 == 0 { | |
XCTAssert(indices.contains(i), "should contain \(i)") | |
} else { | |
XCTAssert(!indices.contains(i), "should not contain \(i)") | |
} | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extension Range where Bound == Int { | |
init<Summary>(_ range: Range<BTreeNode<Summary>.Index>, in root: BTreeNode<Summary>) where Summary: BTreeSummary { | |
range.lowerBound.validate(for: root) | |
range.upperBound.validate(for: root) | |
self = range.lowerBound.position..<range.upperBound.position | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// LineCache.swift | |
// Watt | |
// | |
// Created by David Albert on 12/9/23. | |
// | |
import Foundation | |
struct LineCache { | |
var cache: SortedCache<Line> | |
subscript(offset: Int) -> Line? { | |
get { | |
cache[offset] | |
} | |
set { | |
cache[offset] = newValue | |
} | |
} | |
mutating func invalidate(range: Range<Int>) { | |
if cache.isEmpty { | |
return | |
} | |
// Invalidate the cache, but treat range as closed (e.g. invalidate range.lowerBound..<(range.upperBound + 1)). | |
// Also invalidate the first index below range.lowerBound | |
let i = cache.key(before: range.lowerBound) ?? 0 | |
let j = range.upperBound + 1 | |
cache.invalidate(range: i..<j) | |
} | |
// This isn't right. I believe the logic should be the same as IntervalCache.invalidate(delta:) | |
mutating func invalidate<Tree>(delta: BTreeDelta<Tree>) where Tree: BTree { | |
var count = 0 | |
for element in delta.elements { | |
switch element { | |
case .copy(let lowerBound, let upperBound): | |
if lowerBound > count { | |
// delete the gap | |
invalidate(range: count..<lowerBound) | |
cache.shiftKeys(startingAt: lowerBound, by: count - lowerBound) | |
} | |
count = upperBound | |
case .insert(let node): | |
// TODO: invalidate something here | |
cache.shiftKeys(startingAt: count, by: node.count) | |
count += node.count | |
} | |
} | |
if count < delta.baseCount { | |
let rangeToDelete = count..<delta.baseCount | |
cache.invalidate(range: count..<delta.baseCount) | |
cache.shiftKeys(startingAt: count, by: delta.baseCount - count) | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// SortedCache.swift | |
// Watt | |
// | |
// Created by David Albert on 12/8/23. | |
// | |
import Foundation | |
struct SortedCache<Element> { | |
typealias Index = IndexSet.Index | |
var dictionary: Dictionary<Int, Element> | |
var keys: IndexSet | |
subscript(key: Int) -> Element? { | |
get { | |
return dictionary[key] | |
} | |
set { | |
if let value = newValue { | |
if dictionary.updateValue(value, forKey: key) == nil { | |
assert(!keys.contains(key)) | |
keys.insert(key) | |
} | |
} else { | |
assert(dictionary.keys.contains(key) == keys.contains(key)) | |
if keys.contains(key) { | |
dictionary.removeValue(forKey: key) | |
keys.remove(key) | |
} | |
} | |
} | |
} | |
mutating func removeAll(keepingCapacity keepCapacity: Bool = false) { | |
dictionary.removeAll(keepingCapacity: keepCapacity) | |
keys.removeAll() | |
} | |
mutating func invalidate(range: Range<Int>) { | |
for key in keys[keys.indexRange(in: range)] { | |
dictionary.removeValue(forKey: key) | |
} | |
keys.remove(integersIn: range) | |
} | |
func key(before key: Int) -> Int? { | |
keys.integerLessThan(key) | |
} | |
mutating func shiftKeys(startingAt index: Int, by delta: Int) { | |
guard delta != 0 else { return } | |
let ks: any Collection<Int> = delta > 0 ? keys.reversed() : keys | |
for key in ks where key >= index { | |
let newKey = key + delta | |
dictionary[newKey] = dictionary[key]! | |
dictionary.removeValue(forKey: key) | |
} | |
keys.shift(startingAt: index, by: delta) | |
} | |
} | |
extension SortedCache: BidirectionalCollection { | |
var count: Int { | |
assert(dictionary.count == keys.count) | |
return dictionary.count | |
} | |
var startIndex: Index { | |
keys.startIndex | |
} | |
var endIndex: Index { | |
keys.endIndex | |
} | |
func index(before i: Index) -> Index { | |
keys.index(before: i) | |
} | |
func index(after i: Index) -> Index { | |
keys.index(after: i) | |
} | |
subscript(position: Index) -> (key: Int, element: Element) { | |
(keys[position], dictionary[keys[position]]!) | |
} | |
} | |
extension SortedCache: ExpressibleByDictionaryLiteral { | |
init(dictionaryLiteral elements: (Int, Element)...) { | |
self.init(elements) | |
} | |
} | |
extension SortedCache { | |
init<S: Sequence>(_ sequence: S) where S.Element == (Int, Element) { | |
let d = Dictionary(sequence) { x, y in y } | |
self.dictionary = d | |
self.keys = IndexSet(d.keys) | |
} | |
} | |
extension SortedCache: Equatable where Element: Equatable { | |
static func ==(lhs: SortedCache, rhs: SortedCache) -> Bool { | |
return lhs.dictionary == rhs.dictionary | |
} | |
} | |
extension SortedCache: CustomStringConvertible { | |
var description: String { | |
dictionary.description | |
} | |
} | |
extension SortedCache { | |
static func + (lhs: SortedCache, rhs: SortedCache) -> SortedCache { | |
var result = lhs | |
for (key, value) in rhs { | |
result[key] = value | |
} | |
return result | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// SortedCache.swift | |
// WattTests | |
// | |
// Created by David Albert on 12/8/23. | |
// | |
import XCTest | |
@testable import Watt | |
final class SortedCacheTests: XCTestCase { | |
func testSubscript() { | |
var dict: SortedCache = [1: "One", 2: "Two"] | |
XCTAssertEqual(dict[1], "One") | |
XCTAssertEqual(dict[2], "Two") | |
dict[1] = "New One" | |
XCTAssertEqual(dict[1], "New One") | |
dict[1] = nil | |
XCTAssertNil(dict[1]) | |
dict[3] = "Three" | |
XCTAssertEqual(dict[3], "Three") | |
} | |
func testRemoveAll() { | |
var dict: SortedCache = [1: "One", 2: "Two"] | |
dict.removeAll() | |
XCTAssertNil(dict[1]) | |
XCTAssertNil(dict[2]) | |
} | |
func testKeyBefore() { | |
var cache: SortedCache = [1: "One", 3: "Three", 5: "Five"] | |
XCTAssertEqual(cache.key(before: 6), 5) | |
XCTAssertEqual(cache.key(before: 5), 3) | |
XCTAssertEqual(cache.key(before: 4), 3) | |
XCTAssertEqual(cache.key(before: 3), 1) | |
XCTAssertEqual(cache.key(before: 2), 1) | |
XCTAssertNil(cache.key(before: 1)) | |
XCTAssertNil(cache.key(before: 0)) | |
cache = [:] | |
XCTAssertNil(cache.key(before: 1)) | |
} | |
func testInvalidate() { | |
func t<V>(_ range: Range<Int>, _ cache: SortedCache<V>, _ expected: SortedCache<V>, file: StaticString = #file, line: UInt = #line) where V: Equatable { | |
var c = cache | |
c.invalidate(range: range) | |
XCTAssertEqual(c, expected, file: file, line: line) | |
} | |
// unchanged | |
func u<V>(_ range: Range<Int>, _ cache: SortedCache<V>, file: StaticString = #file, line: UInt = #line) where V: Equatable { | |
var c = cache | |
c.invalidate(range: range) | |
XCTAssertEqual(c, cache, file: file, line: line) | |
} | |
// remove middle | |
t(2..<4, c(1...5, "a"..."e"), [1: "a", 4: "d", 5: "e"]) | |
// doesn't have to be contiguous | |
t(2..<4, c(1...5, "a"..."e", without: 4), [1: "a", 5: "e"]) | |
// empty ranges are unchanged | |
u(0..<0, c(1...4)) | |
u(1..<1, c(1...4)) | |
u(3..<3, c(1...4)) | |
u(4..<4, c(1...4)) | |
u(5..<5, c(1...4)) | |
// empty prefix | |
u(0..<2, c(2...5)) | |
// empty suffix | |
u(6..<10, c(2...5)) | |
// overlapping prefix | |
t(0..<2, c(1...5), c(2...5)) | |
// overlapping suffix | |
t(5..<10, c(1...5), c(1...4)) | |
// covers fully | |
t(5..<10, c(5..<10), [:]) | |
// subsumes | |
t(0..<15, c(5..<10), [:]) | |
// inside | |
t(5..<10, c(0..<20), c(0..<5) + c(10..<20)) | |
} | |
} | |
extension Unicode.Scalar: Strideable { | |
public func distance(to other: Unicode.Scalar) -> Int { | |
Int(other.value - value) | |
} | |
public func advanced(by n: Int) -> Unicode.Scalar { | |
Unicode.Scalar(Int(value) + n)! | |
} | |
} | |
fileprivate func c(_ kr: Range<Int>, without: Int...) -> SortedCache<Int> { | |
cn(kr, kr, 1, without: without) | |
} | |
fileprivate func c(_ kr: ClosedRange<Int>, without: Int...) -> SortedCache<Int> { | |
cn(kr, kr, 1, without: without) | |
} | |
fileprivate func c(_ kr: Range<Int>, _ vr: Range<Unicode.Scalar>, without: Int...) -> SortedCache<Unicode.Scalar> { | |
cn(kr, vr, 1, without: without) | |
} | |
fileprivate func c(_ kr: ClosedRange<Int>, _ vr: ClosedRange<Unicode.Scalar>, without: Int...) -> SortedCache<Unicode.Scalar> { | |
cn(kr, vr, 1, without: without) | |
} | |
fileprivate func c2(_ kr: Range<Int>, without: Int...) -> SortedCache<Int> { | |
cn(kr, kr, 2, without: without) | |
} | |
fileprivate func c2(_ kr: ClosedRange<Int>, without: Int...) -> SortedCache<Int> { | |
cn(kr, kr, 2, without: without) | |
} | |
fileprivate func c2(_ kr: Range<Int>, _ vr: Range<Unicode.Scalar>) -> SortedCache<Unicode.Scalar> { | |
cn(kr, vr, 2) | |
} | |
fileprivate func c2(_ kr: ClosedRange<Int>, _ vr: ClosedRange<Unicode.Scalar>) -> SortedCache<Unicode.Scalar> { | |
cn(kr, vr, 2) | |
} | |
fileprivate func cn<V>(_ kr: Range<Int>, _ vr: Range<V>, _ stride: Int, without: [Int] = []) -> SortedCache<V> where V: Hashable & Strideable, V.Stride == Int { | |
assert(kr.count == vr.count) | |
let keys = Swift.stride(from: kr.lowerBound, to: kr.upperBound, by: stride) | |
let vals = Swift.stride(from: vr.lowerBound, to: vr.upperBound, by: stride) | |
var dict = SortedCache(zip(keys, vals)) | |
for k in without { | |
dict[k] = nil | |
} | |
return dict | |
} | |
fileprivate func cn<V>(_ kr: ClosedRange<Int>, _ vr: ClosedRange<V>, _ stride: Int, without: [Int] = []) -> SortedCache<V> where V: Hashable & Strideable, V.Stride == Int { | |
assert(kr.count == vr.count) | |
let keys = Swift.stride(from: kr.lowerBound, through: kr.upperBound, by: stride) | |
let vals = Swift.stride(from: vr.lowerBound, through: vr.upperBound, by: stride) | |
var dict = SortedCache(zip(keys, vals)) | |
for k in without { | |
dict[k] = nil | |
} | |
return dict | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment