Last active
August 25, 2025 03:43
-
-
Save pbk20191/d28edae070a9d775c8368c52cb145aee to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// CSVParserSequence.swift | |
// MyTool | |
// | |
// Created by 박병관 on 8/25/25. | |
// | |
import Foundation | |
public struct CSVParserSequence<Base> { | |
public typealias Element = [String] | |
public var base: Base | |
public var delimiter: UnicodeScalar | |
public var stripBOM: Bool | |
public init(base: Base, delimiter: UnicodeScalar = ",", stripBOM: Bool = true) { | |
self.base = base | |
self.delimiter = delimiter | |
self.stripBOM = stripBOM | |
} | |
// MARK: - Core state machine | |
struct _CSVCore { | |
// Configuration | |
let delimiter: UnicodeScalar | |
let quote: UnicodeScalar = "\"" | |
let lf: UnicodeScalar = "\n" | |
let cr: UnicodeScalar = "\r" | |
let stripBOM: Bool | |
// State | |
var row: [String] = [] | |
var field = "" | |
var inQuote = false | |
var afterQuote = false // true right after a closing quote | |
var seenCR = false // true if the last character was CR (to check CRLF) | |
var isFirstScalar = true // true for the very first scalar (for BOM stripping) | |
init(delimiter: UnicodeScalar, stripBOM: Bool) { | |
self.delimiter = delimiter | |
self.stripBOM = stripBOM | |
} | |
@inline(__always) private func isWhitespace(_ s: UnicodeScalar) -> Bool { | |
s == " " || s == "\t" | |
} | |
/// Feed one scalar into the parser. | |
/// Returns a row `[String]` when a record is completed. | |
/// Pass `nil` once at EOF to flush the last row (if any). | |
mutating func feed(_ chOpt: UnicodeScalar?) -> [String]? { | |
var chOpt = chOpt | |
// Strip BOM if it is the very first scalar | |
if isFirstScalar { | |
isFirstScalar = false | |
if stripBOM, chOpt == "\u{FEFF}" { | |
return nil | |
} | |
} | |
// Handle CR followed by LF | |
if seenCR { | |
seenCR = false | |
if chOpt == lf { | |
// swallow LF after CR, nothing to return | |
return nil | |
} | |
// otherwise, continue processing this character normally | |
} | |
// End of input | |
guard let ch = chOpt else { | |
inQuote = false | |
afterQuote = false | |
if !field.isEmpty || !row.isEmpty { | |
row.append(post(field, isFirstField: row.isEmpty)) | |
field.removeAll() | |
let out = row; row.removeAll() | |
return out | |
} | |
return nil | |
} | |
// After closing quote: check what follows | |
if afterQuote { | |
if isWhitespace(ch) { | |
// ignore spaces or tabs after a closing quote | |
return nil | |
} | |
if ch == delimiter { | |
row.append(post(field, isFirstField: row.isEmpty)) | |
field.removeAll() | |
afterQuote = false | |
return nil | |
} | |
if ch == lf { | |
row.append(post(field, isFirstField: row.isEmpty)) | |
field.removeAll() | |
afterQuote = false | |
let out = row; row.removeAll() | |
return out | |
} | |
if ch == cr { | |
row.append(post(field, isFirstField: row.isEmpty)) | |
field.removeAll() | |
afterQuote = false | |
seenCR = true | |
let out = row; row.removeAll() | |
return out | |
} | |
if ch == quote { | |
// "" inside a quoted field → append one quote | |
field.unicodeScalars.append(quote) | |
afterQuote = false | |
inQuote = true | |
return nil | |
} | |
// Otherwise: treat as literal character after the closing quote | |
field.unicodeScalars.append(ch) | |
afterQuote = false | |
return nil | |
} | |
// Inside quotes | |
if inQuote { | |
if ch == quote { | |
// closing quote | |
inQuote = false | |
afterQuote = true | |
return nil | |
} else { | |
field.unicodeScalars.append(ch) | |
return nil | |
} | |
} | |
// Outside quotes | |
if ch == quote { | |
inQuote = true | |
return nil | |
} | |
if ch == delimiter { | |
row.append(post(field, isFirstField: row.isEmpty)) | |
field.removeAll() | |
return nil | |
} | |
if ch == lf { | |
row.append(post(field, isFirstField: row.isEmpty)) | |
field.removeAll() | |
let out = row; row.removeAll() | |
return out | |
} | |
if ch == cr { | |
row.append(post(field, isFirstField: row.isEmpty)) | |
field.removeAll() | |
seenCR = true | |
let out = row; row.removeAll() | |
return out | |
} | |
// Regular character | |
field.unicodeScalars.append(ch) | |
return nil | |
} | |
// Strip BOM at the beginning of a row if present | |
private func post(_ s: String, isFirstField: Bool) -> String { | |
guard stripBOM, isFirstField else { return s } | |
if s.unicodeScalars.first == "\u{FEFF}" { | |
return String(s.unicodeScalars.dropFirst()) | |
} | |
return s | |
} | |
} | |
// Iterator container | |
public struct CSVIterator<InnerBase> { | |
var base: InnerBase | |
var core: _CSVCore | |
var eofEmitted = false | |
internal init(base: InnerBase, parent: CSVParserSequence) { | |
self.base = base | |
self.core = _CSVCore(delimiter: parent.delimiter, stripBOM: parent.stripBOM) | |
} | |
} | |
} | |
// MARK: - Sequence | |
extension CSVParserSequence : Sequence where Base: Sequence, Base.Element == UnicodeScalar { | |
public func makeIterator() -> CSVIterator<Base.Iterator> { | |
.init(base: base.makeIterator(), parent: self) | |
} | |
} | |
extension CSVParserSequence.CSVIterator : IteratorProtocol | |
where InnerBase: IteratorProtocol, InnerBase.Element == UnicodeScalar { | |
public mutating func next() -> [String]? { | |
while true { | |
if eofEmitted { return nil } | |
let scalar = base.next() | |
if let row = core.feed(scalar) { return row } | |
if scalar == nil { eofEmitted = true } | |
} | |
} | |
} | |
// MARK: - AsyncSequence | |
extension CSVParserSequence : AsyncSequence | |
where Base: AsyncSequence, Base.Element == UnicodeScalar { | |
public func makeAsyncIterator() -> CSVIterator<Base.AsyncIterator> { | |
.init(base: base.makeAsyncIterator(), parent: self) | |
} | |
} | |
extension CSVParserSequence.CSVIterator : AsyncIteratorProtocol | |
where InnerBase: AsyncIteratorProtocol, InnerBase.Element == UnicodeScalar { | |
@available(macOS 15.0, iOS 18.0, watchOS 11.0, tvOS 18.0, visionOS 2.0, *) | |
public mutating func next(isolation actor: isolated (any Actor)?) async throws(InnerBase.Failure) -> [String]? { | |
while true { | |
if eofEmitted { return nil } | |
let scalar = try await base.next(isolation: actor) | |
if let row = core.feed(scalar) { return row } | |
if scalar == nil { eofEmitted = true } | |
} | |
} | |
public mutating func next() async rethrows -> [String]? { | |
while true { | |
if eofEmitted { return nil } | |
let scalar = try await base.next() | |
if let row = core.feed(scalar) { return row } | |
if scalar == nil { eofEmitted = true } | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// StringCatalog.swift | |
// | |
// | |
// Created by Evan Anderson on 5/16/24. | |
// | |
public struct StringCatalog : Codable, Hashable, Sendable { | |
public var sourceLanguage:String | |
public var version:String | |
public var strings:[String:StringCatalog.Entry] | |
public init( | |
sourceLanguage: String = "en", | |
version: String = "1.0", | |
strings: [String:StringCatalog.Entry] = [:] | |
) { | |
self.sourceLanguage = sourceLanguage | |
self.version = version | |
self.strings = strings | |
} | |
public struct Entry : Codable, Hashable, Sendable { | |
public var comment:String? | |
public var extractionState:ExtractionState | |
public var localizations:[String:StringCatalog.Entry.Localization]? | |
@TrueIsNilCodableWrapper | |
public var shouldTranslate:Bool = true | |
public init( | |
comment: String? = nil, | |
extractionState: ExtractionState = .manual, | |
localizations: [String : StringCatalog.Entry.Localization]? = nil, | |
shouldTranslate: Bool = true | |
) { | |
self.comment = comment | |
self.extractionState = extractionState | |
self.localizations = localizations | |
self.shouldTranslate = shouldTranslate | |
} | |
@propertyWrapper | |
public struct TrueIsNilCodableWrapper: Codable, Hashable, BitwiseCopyable, Sendable { | |
public var wrappedValue:Bool = false | |
public init(wrappedValue: Bool) { | |
self.wrappedValue = wrappedValue | |
} | |
public init(from decoder: any Decoder) throws { | |
let single = try decoder.singleValueContainer() | |
if single.decodeNil() { | |
self.wrappedValue = true | |
} else { | |
self.wrappedValue = try single.decode(Bool.self) | |
} | |
} | |
public func encode(to encoder: any Encoder) throws { | |
var container = encoder.singleValueContainer() | |
if wrappedValue { | |
try container.encodeNil() | |
} else { | |
try container.encode(false) | |
} | |
} | |
} | |
public enum ExtractionState : String, Codable, Hashable, Sendable, BitwiseCopyable { | |
case manual | |
case migrated | |
} | |
public struct StringUnit : Codable, Hashable, Sendable { | |
public var stringUnit:Unit | |
public init(stringUnit: Unit) { | |
self.stringUnit = stringUnit | |
} | |
} | |
public struct Unit : Codable, Hashable, Sendable { | |
public var state:State | |
public var value:String | |
public enum State : String, Codable, Hashable, Sendable, BitwiseCopyable { | |
case needs_review | |
case new | |
case stale | |
case translated | |
} | |
public init(state: State = State.needs_review, value: String) { | |
self.state = state | |
self.value = value | |
} | |
} | |
public struct Localization : Codable, Hashable, Sendable { | |
public var stringUnit:Unit? | |
public var variations:Variations? | |
public var substitutions:[String:Substitution]? | |
public init( | |
stringUnit: Unit?, | |
variations: Variations? = nil, | |
substitutions: [String:Substitution]? = nil | |
) { | |
self.stringUnit = stringUnit | |
self.variations = variations | |
self.substitutions = substitutions | |
} | |
public struct Substitution : Codable, Hashable, Sendable { | |
var argNum:Int | |
var formatSpecifier:String | |
var variations:Variations | |
public init(argNum: Int, formatSpecifier: String, variations: Variations) { | |
self.argNum = argNum | |
self.formatSpecifier = formatSpecifier | |
self.variations = variations | |
} | |
public struct Variations : Codable, Hashable, Sendable { | |
public let plural:Plural? | |
public init(plural: Plural?) { | |
self.plural = plural | |
} | |
} | |
} | |
public struct Variations : Codable, Hashable, Sendable { | |
public var device:Device? | |
public var plural:Plural? | |
public init(device: Device?, plural: Plural?) { | |
self.device = device | |
self.plural = plural | |
} | |
} | |
public struct Device : Codable, Hashable, Sendable { | |
public var appletv:RawVariations? | |
public var applevision:RawVariations? | |
public var applewatch:RawVariations? | |
public var ipad:RawVariations? | |
public var iphone:RawVariations? | |
public var ipod:RawVariations? | |
public var mac:RawVariations? | |
public var other:RawVariations? | |
public init(appletv: RawVariations?, applevision: RawVariations?, applewatch: RawVariations?, ipad: RawVariations?, iphone: RawVariations?, ipod: RawVariations?, mac: RawVariations?, other: RawVariations?) { | |
self.appletv = appletv | |
self.applevision = applevision | |
self.applewatch = applewatch | |
self.ipad = ipad | |
self.iphone = iphone | |
self.ipod = ipod | |
self.mac = mac | |
self.other = other | |
} | |
} | |
public struct Plural : Codable, Hashable, Sendable { | |
public var zero:StringUnit? | |
public var one:StringUnit | |
public var two:StringUnit? | |
public var few:StringUnit? | |
public var many:StringUnit? | |
public var other:StringUnit | |
public init(zero: StringUnit?, one: StringUnit, two: StringUnit?, few: StringUnit?, many: StringUnit?, other: StringUnit) { | |
self.zero = zero | |
self.one = one | |
self.two = two | |
self.few = few | |
self.many = many | |
self.other = other | |
} | |
} | |
} | |
public struct RawVariations : Codable, Hashable, Sendable { | |
public var variations:Variations? | |
public var stringUnit:Unit? | |
public init(variations: Variations?, stringUnit: Unit?) { | |
self.variations = variations | |
self.stringUnit = stringUnit | |
} | |
public struct Variations : Codable, Hashable, Sendable { | |
public var plural:StringCatalog.Entry.Localization.Plural? | |
public init(plural: StringCatalog.Entry.Localization.Plural?) { | |
self.plural = plural | |
} | |
} | |
} | |
} | |
} | |
extension KeyedEncodingContainer { | |
// Used to make make sure encode no value when it's wrappedValue is nil. | |
internal mutating func encode(_ value: StringCatalog.Entry.TrueIsNilCodableWrapper , forKey key: Key) throws { | |
if value.wrappedValue { | |
return | |
} else { | |
try encode(value.wrappedValue, forKey: key) | |
} | |
} | |
} | |
extension KeyedDecodingContainer { | |
internal func decode(_ type: StringCatalog.Entry.TrueIsNilCodableWrapper.Type, forKey key: Key) throws -> StringCatalog.Entry.TrueIsNilCodableWrapper { | |
if let value = try decodeIfPresent(type, forKey: key) { | |
return value | |
} | |
return .init(wrappedValue: true) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment