-
-
Save captainpete/973608 to your computer and use it in GitHub Desktop.
Text processor for some strangely formatted geo data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdio.h> | |
#include <unistd.h> // for close() | |
#include <fcntl.h> // for O_RDONLY and open() | |
#define BUF_SIZE (4096 * 32) // 128k, 4k aligned | |
typedef enum { | |
state_want_cr = 1, | |
state_want_slash, | |
state_want_lf | |
} state_t; | |
typedef struct { | |
ssize_t buf_has; | |
off_t offset; | |
state_t cs; // current_state | |
unsigned char buf[BUF_SIZE]; | |
} processor_t; | |
inline void flush(processor_t *p); | |
inline void flush_and_reset(processor_t *p); | |
int processor_process(processor_t *p, int fd); | |
int main(int argc, char * const *argv) { | |
if (argc < 2) { | |
fprintf(stderr, "Usage: main file.crs\n"); | |
exit(2); | |
} | |
const char *crs_path = argv[1]; | |
int fd = open(crs_path, O_RDONLY); | |
if (fd < 0) { | |
perror("Error opening file"); | |
return EXIT_FAILURE; | |
} | |
processor_t p = { | |
.buf_has = 0, | |
.offset = 0, | |
.cs = state_want_cr | |
}; | |
if(processor_process(&p, fd) != 0) { | |
close(fd); | |
return EXIT_FAILURE; | |
} | |
close(fd); | |
return EXIT_SUCCESS; | |
} | |
int processor_process(processor_t *p, int fd) { | |
while(1) { | |
// 's/\r\\\n/\\r/' | |
if (p->offset >= p->buf_has) { | |
// Need more data | |
p->buf_has = read(fd, p->buf, BUF_SIZE); | |
if (p->buf_has == 0) { | |
flush(p); | |
break; // eof | |
} | |
else if (p->buf_has < 0) { | |
perror("Error reading from file"); | |
return -1; | |
} | |
p->offset = 0; | |
} | |
switch (p->buf[p->offset]) { | |
case '\r': | |
if (p->cs == state_want_cr) { | |
p->cs = state_want_slash; | |
} | |
else { | |
flush_and_reset(p); | |
} | |
break; | |
case '\\': | |
if (p->cs == state_want_slash) { | |
p->cs = state_want_lf; | |
} | |
else { | |
flush_and_reset(p); | |
} | |
break; | |
case '\n': | |
if (p->cs == state_want_lf) { | |
// We have a winner | |
fputc('\r', stdout); // Output the replacement char | |
p->cs = state_want_cr; // reset | |
} | |
else { | |
flush_and_reset(p); | |
} | |
break; | |
default: | |
fputc(p->buf[p->offset], stdout); | |
} | |
p->offset++; | |
} | |
return 0; | |
} | |
void flush(processor_t *p) { | |
switch (p->cs) { | |
case state_want_lf: | |
fputc('\n', stdout); | |
fputc('\\', stdout); | |
break; | |
case state_want_slash: | |
fputc('\n', stdout); | |
break; | |
case state_want_cr: | |
break; | |
} | |
} | |
void flush_and_reset(processor_t *p) { | |
flush(p); | |
fputc(p->buf[p->offset], stdout); | |
p->cs = state_want_cr; | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CFLAGS = -O2 -Wall $(OPTFLAGS) | |
CC = clang | |
SOURCES = $(wildcard *.c) | |
OBJECTS := $(patsubst %.c, %.o, $(SOURCES)) | |
all : main | |
main : $(OBJECTS) | |
$(CC) -o main $(CFLAGS) $(OBJECTS) | |
clean : | |
rm -f $(OBJECTS) | |
rm -f main |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment