Created
December 4, 2014 00:27
-
-
Save kongtomorrow/3e35d926ec17d9b621c1 to your computer and use it in GitHub Desktop.
simd adoption?
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#import <Foundation/Foundation.h> | |
#import <mach/mach_time.h> | |
#import <simd/simd.h> | |
NSTimeInterval SecondsFromMachTimeInterval(uint64_t machTimeInterval) { | |
static double timeScaleSeconds = 0.0; | |
if (timeScaleSeconds == 0.0) { | |
mach_timebase_info_data_t timebaseInfo; | |
if (mach_timebase_info(&timebaseInfo) == KERN_SUCCESS) { // returns scale factor for ns | |
double timeScaleMicroSeconds = ((double) timebaseInfo.numer / (double) timebaseInfo.denom) / 1000; | |
timeScaleSeconds = timeScaleMicroSeconds / 1000000; | |
} | |
} | |
return timeScaleSeconds*machTimeInterval; | |
} | |
NSTimeInterval TimeToPerformBlock(void (^blockToTime)(void), long repCount) { | |
uint64_t tick = mach_absolute_time(); | |
for (long i = 0; i < repCount; i++) { | |
blockToTime(); | |
} | |
uint64_t tock = mach_absolute_time(); | |
return SecondsFromMachTimeInterval(tock - tick); | |
} | |
void logMachTimeInterval_withIdentifier_(NSTimeInterval seconds, NSString *identifier) { | |
NSLog(@"%-80s %g seconds\n", [identifier UTF8String], seconds); | |
} | |
__attribute__((noinline)) void PresentTimeToPerformBlockWithIdentifier(NSString *identifier, long repCount,void (^blockToTime)(void)) { | |
logMachTimeInterval_withIdentifier_(TimeToPerformBlock(blockToTime, repCount), identifier); | |
} | |
__attribute__((noinline)) Boolean __CFBytesInASCII(const uint8_t *bytes, CFIndex len) { | |
#if __LP64__ | |
/* A bit of unrolling; go by 32s, 16s, and 8s first */ | |
while (len >= 32) { | |
uint64_t val = *(const uint64_t *)bytes; | |
uint64_t hiBits = (val & 0x8080808080808080ULL); // More efficient to collect this rather than do a conditional at every step | |
bytes += 8; | |
val = *(const uint64_t *)bytes; | |
hiBits |= (val & 0x8080808080808080ULL); | |
bytes += 8; | |
val = *(const uint64_t *)bytes; | |
hiBits |= (val & 0x8080808080808080ULL); | |
bytes += 8; | |
val = *(const uint64_t *)bytes; | |
if (hiBits | (val & 0x8080808080808080ULL)) return false; | |
bytes += 8; | |
len -= 32; | |
} | |
while (len >= 16) { | |
uint64_t val = *(const uint64_t *)bytes; | |
uint64_t hiBits = (val & 0x8080808080808080ULL); | |
bytes += 8; | |
val = *(const uint64_t *)bytes; | |
if (hiBits | (val & 0x8080808080808080ULL)) return false; | |
bytes += 8; | |
len -= 16; | |
} | |
while (len >= 8) { | |
uint64_t val = *(const uint64_t *)bytes; | |
if (val & 0x8080808080808080ULL) return false; | |
bytes += 8; | |
len -= 8; | |
} | |
#endif | |
/* Go by 4s */ | |
while (len >= 4) { | |
uint32_t val = *(const uint32_t *)bytes; | |
if (val & 0x80808080U) return false; | |
bytes += 4; | |
len -= 4; | |
} | |
/* Handle the rest one byte at a time */ | |
while (len--) { | |
if (*bytes++ & 0x80) return false; | |
} | |
return true; | |
} | |
__attribute__((noinline)) Boolean __CFBytesInASCII_vec(const uint8_t *bytes, CFIndex len) { | |
#if __LP64__ | |
/* A bit of unrolling; go by 32s, 16s, and 8s first */ | |
while (len >= 32) { | |
vector_uchar32 val = *(const vector_uchar32 *)bytes; | |
if (vector_any(val & 0x80U)) { | |
return false; | |
} | |
bytes += 32; | |
len -= 32; | |
} | |
while (len >= 16) { | |
vector_uchar16 val = *(const vector_uchar16 *)bytes; | |
if (vector_any(val & 0x80U)) { | |
return false; | |
} | |
bytes += 16; | |
len -= 16; | |
} | |
while (len >= 8) { | |
vector_uchar8 val = *(const vector_uchar8 *)bytes; | |
if (vector_any(val & 0x80U)) { | |
return false; | |
} | |
bytes += 8; | |
len -= 8; | |
} | |
#endif | |
/* Go by 4s */ | |
while (len >= 4) { | |
vector_uchar4 val = *(const vector_uchar4 *)bytes; | |
if (vector_any(val & 0x80U)) { | |
return false; | |
} | |
bytes += 4; | |
len -= 4; | |
} | |
/* Handle the rest one byte at a time */ | |
while (len--) { | |
if (*bytes++ & 0x80) return false; | |
} | |
return true; | |
} | |
long total = 0; | |
long repeatCount = 10000; | |
int main(int argc, const char * argv[]) { | |
@autoreleasepool { | |
const uint8_t *shortStringBytes = (const uint8_t *)[@"hello!" UTF8String]; | |
CFIndex shortLen = strlen((char *)shortStringBytes); | |
const uint8_t *longStringBytes = (const uint8_t *)[[NSString stringWithContentsOfFile:@"/usr/share/dict/words" encoding:NSUTF8StringEncoding error:NULL] UTF8String]; | |
CFIndex longLen = strlen((char *)longStringBytes); | |
const uint8_t *longEarlyOutStringBytes = (const uint8_t *)[[@"ü" stringByAppendingString:[NSString stringWithContentsOfFile:@"/usr/share/dict/words" encoding:NSUTF8StringEncoding error:NULL]] UTF8String]; | |
CFIndex longEarlyLen = strlen((char *)longEarlyOutStringBytes); | |
NSLog(@"shortLen: %ld longLen:%ld", shortLen, longLen); | |
PresentTimeToPerformBlockWithIdentifier(@"short no-vec", repeatCount, ^{ | |
total += __CFBytesInASCII((const uint8_t *)shortStringBytes, shortLen); | |
}); | |
PresentTimeToPerformBlockWithIdentifier(@"short vec", repeatCount, ^{ | |
total += __CFBytesInASCII_vec((const uint8_t *)shortStringBytes, shortLen); | |
}); | |
PresentTimeToPerformBlockWithIdentifier(@"long no-vec", repeatCount, ^{ | |
total += __CFBytesInASCII((const uint8_t *)longStringBytes, longLen); | |
}); | |
PresentTimeToPerformBlockWithIdentifier(@"long vec", repeatCount, ^{ | |
total += __CFBytesInASCII_vec((const uint8_t *)longStringBytes, longLen); | |
}); | |
PresentTimeToPerformBlockWithIdentifier(@"long early-out no-vec", repeatCount, ^{ | |
total += __CFBytesInASCII((const uint8_t *)longEarlyOutStringBytes, longEarlyLen); | |
}); | |
PresentTimeToPerformBlockWithIdentifier(@"long early-out vec", repeatCount, ^{ | |
total += __CFBytesInASCII_vec((const uint8_t *)longEarlyOutStringBytes, longEarlyLen); | |
}); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment