Skip to content

Instantly share code, notes, and snippets.

@pbk20191
Last active August 25, 2025 03:43
Show Gist options
  • Save pbk20191/d28edae070a9d775c8368c52cb145aee to your computer and use it in GitHub Desktop.
Save pbk20191/d28edae070a9d775c8368c52cb145aee to your computer and use it in GitHub Desktop.
//
// CSVParserSequence.swift
// MyTool
//
// Created by 박병관 on 8/25/25.
//
import Foundation
public struct CSVParserSequence<Base> {
public typealias Element = [String]
public var base: Base
public var delimiter: UnicodeScalar
public var stripBOM: Bool
public init(base: Base, delimiter: UnicodeScalar = ",", stripBOM: Bool = true) {
self.base = base
self.delimiter = delimiter
self.stripBOM = stripBOM
}
// MARK: - Core state machine
struct _CSVCore {
// Configuration
let delimiter: UnicodeScalar
let quote: UnicodeScalar = "\""
let lf: UnicodeScalar = "\n"
let cr: UnicodeScalar = "\r"
let stripBOM: Bool
// State
var row: [String] = []
var field = ""
var inQuote = false
var afterQuote = false // true right after a closing quote
var seenCR = false // true if the last character was CR (to check CRLF)
var isFirstScalar = true // true for the very first scalar (for BOM stripping)
init(delimiter: UnicodeScalar, stripBOM: Bool) {
self.delimiter = delimiter
self.stripBOM = stripBOM
}
@inline(__always) private func isWhitespace(_ s: UnicodeScalar) -> Bool {
s == " " || s == "\t"
}
/// Feed one scalar into the parser.
/// Returns a row `[String]` when a record is completed.
/// Pass `nil` once at EOF to flush the last row (if any).
mutating func feed(_ chOpt: UnicodeScalar?) -> [String]? {
var chOpt = chOpt
// Strip BOM if it is the very first scalar
if isFirstScalar {
isFirstScalar = false
if stripBOM, chOpt == "\u{FEFF}" {
return nil
}
}
// Handle CR followed by LF
if seenCR {
seenCR = false
if chOpt == lf {
// swallow LF after CR, nothing to return
return nil
}
// otherwise, continue processing this character normally
}
// End of input
guard let ch = chOpt else {
inQuote = false
afterQuote = false
if !field.isEmpty || !row.isEmpty {
row.append(post(field, isFirstField: row.isEmpty))
field.removeAll()
let out = row; row.removeAll()
return out
}
return nil
}
// After closing quote: check what follows
if afterQuote {
if isWhitespace(ch) {
// ignore spaces or tabs after a closing quote
return nil
}
if ch == delimiter {
row.append(post(field, isFirstField: row.isEmpty))
field.removeAll()
afterQuote = false
return nil
}
if ch == lf {
row.append(post(field, isFirstField: row.isEmpty))
field.removeAll()
afterQuote = false
let out = row; row.removeAll()
return out
}
if ch == cr {
row.append(post(field, isFirstField: row.isEmpty))
field.removeAll()
afterQuote = false
seenCR = true
let out = row; row.removeAll()
return out
}
if ch == quote {
// "" inside a quoted field → append one quote
field.unicodeScalars.append(quote)
afterQuote = false
inQuote = true
return nil
}
// Otherwise: treat as literal character after the closing quote
field.unicodeScalars.append(ch)
afterQuote = false
return nil
}
// Inside quotes
if inQuote {
if ch == quote {
// closing quote
inQuote = false
afterQuote = true
return nil
} else {
field.unicodeScalars.append(ch)
return nil
}
}
// Outside quotes
if ch == quote {
inQuote = true
return nil
}
if ch == delimiter {
row.append(post(field, isFirstField: row.isEmpty))
field.removeAll()
return nil
}
if ch == lf {
row.append(post(field, isFirstField: row.isEmpty))
field.removeAll()
let out = row; row.removeAll()
return out
}
if ch == cr {
row.append(post(field, isFirstField: row.isEmpty))
field.removeAll()
seenCR = true
let out = row; row.removeAll()
return out
}
// Regular character
field.unicodeScalars.append(ch)
return nil
}
// Strip BOM at the beginning of a row if present
private func post(_ s: String, isFirstField: Bool) -> String {
guard stripBOM, isFirstField else { return s }
if s.unicodeScalars.first == "\u{FEFF}" {
return String(s.unicodeScalars.dropFirst())
}
return s
}
}
// Iterator container
public struct CSVIterator<InnerBase> {
var base: InnerBase
var core: _CSVCore
var eofEmitted = false
internal init(base: InnerBase, parent: CSVParserSequence) {
self.base = base
self.core = _CSVCore(delimiter: parent.delimiter, stripBOM: parent.stripBOM)
}
}
}
// MARK: - Sequence
extension CSVParserSequence : Sequence where Base: Sequence, Base.Element == UnicodeScalar {
public func makeIterator() -> CSVIterator<Base.Iterator> {
.init(base: base.makeIterator(), parent: self)
}
}
extension CSVParserSequence.CSVIterator : IteratorProtocol
where InnerBase: IteratorProtocol, InnerBase.Element == UnicodeScalar {
public mutating func next() -> [String]? {
while true {
if eofEmitted { return nil }
let scalar = base.next()
if let row = core.feed(scalar) { return row }
if scalar == nil { eofEmitted = true }
}
}
}
// MARK: - AsyncSequence
extension CSVParserSequence : AsyncSequence
where Base: AsyncSequence, Base.Element == UnicodeScalar {
public func makeAsyncIterator() -> CSVIterator<Base.AsyncIterator> {
.init(base: base.makeAsyncIterator(), parent: self)
}
}
extension CSVParserSequence.CSVIterator : AsyncIteratorProtocol
where InnerBase: AsyncIteratorProtocol, InnerBase.Element == UnicodeScalar {
@available(macOS 15.0, iOS 18.0, watchOS 11.0, tvOS 18.0, visionOS 2.0, *)
public mutating func next(isolation actor: isolated (any Actor)?) async throws(InnerBase.Failure) -> [String]? {
while true {
if eofEmitted { return nil }
let scalar = try await base.next(isolation: actor)
if let row = core.feed(scalar) { return row }
if scalar == nil { eofEmitted = true }
}
}
public mutating func next() async rethrows -> [String]? {
while true {
if eofEmitted { return nil }
let scalar = try await base.next()
if let row = core.feed(scalar) { return row }
if scalar == nil { eofEmitted = true }
}
}
}
//
// StringCatalog.swift
//
//
// Created by Evan Anderson on 5/16/24.
//
public struct StringCatalog : Codable, Hashable, Sendable {
public var sourceLanguage:String
public var version:String
public var strings:[String:StringCatalog.Entry]
public init(
sourceLanguage: String = "en",
version: String = "1.0",
strings: [String:StringCatalog.Entry] = [:]
) {
self.sourceLanguage = sourceLanguage
self.version = version
self.strings = strings
}
public struct Entry : Codable, Hashable, Sendable {
public var comment:String?
public var extractionState:ExtractionState
public var localizations:[String:StringCatalog.Entry.Localization]?
@TrueIsNilCodableWrapper
public var shouldTranslate:Bool = true
public init(
comment: String? = nil,
extractionState: ExtractionState = .manual,
localizations: [String : StringCatalog.Entry.Localization]? = nil,
shouldTranslate: Bool = true
) {
self.comment = comment
self.extractionState = extractionState
self.localizations = localizations
self.shouldTranslate = shouldTranslate
}
@propertyWrapper
public struct TrueIsNilCodableWrapper: Codable, Hashable, BitwiseCopyable, Sendable {
public var wrappedValue:Bool = false
public init(wrappedValue: Bool) {
self.wrappedValue = wrappedValue
}
public init(from decoder: any Decoder) throws {
let single = try decoder.singleValueContainer()
if single.decodeNil() {
self.wrappedValue = true
} else {
self.wrappedValue = try single.decode(Bool.self)
}
}
public func encode(to encoder: any Encoder) throws {
var container = encoder.singleValueContainer()
if wrappedValue {
try container.encodeNil()
} else {
try container.encode(false)
}
}
}
public enum ExtractionState : String, Codable, Hashable, Sendable, BitwiseCopyable {
case manual
case migrated
}
public struct StringUnit : Codable, Hashable, Sendable {
public var stringUnit:Unit
public init(stringUnit: Unit) {
self.stringUnit = stringUnit
}
}
public struct Unit : Codable, Hashable, Sendable {
public var state:State
public var value:String
public enum State : String, Codable, Hashable, Sendable, BitwiseCopyable {
case needs_review
case new
case stale
case translated
}
public init(state: State = State.needs_review, value: String) {
self.state = state
self.value = value
}
}
public struct Localization : Codable, Hashable, Sendable {
public var stringUnit:Unit?
public var variations:Variations?
public var substitutions:[String:Substitution]?
public init(
stringUnit: Unit?,
variations: Variations? = nil,
substitutions: [String:Substitution]? = nil
) {
self.stringUnit = stringUnit
self.variations = variations
self.substitutions = substitutions
}
public struct Substitution : Codable, Hashable, Sendable {
var argNum:Int
var formatSpecifier:String
var variations:Variations
public init(argNum: Int, formatSpecifier: String, variations: Variations) {
self.argNum = argNum
self.formatSpecifier = formatSpecifier
self.variations = variations
}
public struct Variations : Codable, Hashable, Sendable {
public let plural:Plural?
public init(plural: Plural?) {
self.plural = plural
}
}
}
public struct Variations : Codable, Hashable, Sendable {
public var device:Device?
public var plural:Plural?
public init(device: Device?, plural: Plural?) {
self.device = device
self.plural = plural
}
}
public struct Device : Codable, Hashable, Sendable {
public var appletv:RawVariations?
public var applevision:RawVariations?
public var applewatch:RawVariations?
public var ipad:RawVariations?
public var iphone:RawVariations?
public var ipod:RawVariations?
public var mac:RawVariations?
public var other:RawVariations?
public init(appletv: RawVariations?, applevision: RawVariations?, applewatch: RawVariations?, ipad: RawVariations?, iphone: RawVariations?, ipod: RawVariations?, mac: RawVariations?, other: RawVariations?) {
self.appletv = appletv
self.applevision = applevision
self.applewatch = applewatch
self.ipad = ipad
self.iphone = iphone
self.ipod = ipod
self.mac = mac
self.other = other
}
}
public struct Plural : Codable, Hashable, Sendable {
public var zero:StringUnit?
public var one:StringUnit
public var two:StringUnit?
public var few:StringUnit?
public var many:StringUnit?
public var other:StringUnit
public init(zero: StringUnit?, one: StringUnit, two: StringUnit?, few: StringUnit?, many: StringUnit?, other: StringUnit) {
self.zero = zero
self.one = one
self.two = two
self.few = few
self.many = many
self.other = other
}
}
}
public struct RawVariations : Codable, Hashable, Sendable {
public var variations:Variations?
public var stringUnit:Unit?
public init(variations: Variations?, stringUnit: Unit?) {
self.variations = variations
self.stringUnit = stringUnit
}
public struct Variations : Codable, Hashable, Sendable {
public var plural:StringCatalog.Entry.Localization.Plural?
public init(plural: StringCatalog.Entry.Localization.Plural?) {
self.plural = plural
}
}
}
}
}
extension KeyedEncodingContainer {
// Used to make make sure encode no value when it's wrappedValue is nil.
internal mutating func encode(_ value: StringCatalog.Entry.TrueIsNilCodableWrapper , forKey key: Key) throws {
if value.wrappedValue {
return
} else {
try encode(value.wrappedValue, forKey: key)
}
}
}
extension KeyedDecodingContainer {
internal func decode(_ type: StringCatalog.Entry.TrueIsNilCodableWrapper.Type, forKey key: Key) throws -> StringCatalog.Entry.TrueIsNilCodableWrapper {
if let value = try decodeIfPresent(type, forKey: key) {
return value
}
return .init(wrappedValue: true)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment