Created
March 31, 2024 12:13
-
-
Save dutchLuck/aa7a01873957e409ed5f45691e0e5d48 to your computer and use it in GitHub Desktop.
typos word frequency count
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* T Y P O S . C | |
* | |
* modified version that can be compiled by gcc | |
* Original T Y P O S is shown in a listing found on URL | |
* https://archive.org/details/68micro-vol-04-num-11/page/n35/mode/1up | |
*/ | |
/*************************************************************/ | |
/* Written by: Jodie A. Zoeller, SWTPc, August 1982 */ | |
/* */ | |
/* TYPOS is a program designed to help catch spelling errors.*/ | |
/* It keeps a frequency count of all alphabetic words in the */ | |
/* specified file. If no tolerance is suplied by the user */ | |
/* TYPOS uses 3. This means that all words occuring 3 times */ | |
/* or less will be displayed after the program is finished */ | |
/* scanning the specified file. */ | |
/* */ | |
/* Syntax: typos <filename> [tolerance] */ | |
/*************************************************************/ | |
#include <stdio.h> /* printf() */ | |
#include <stdlib.h> /* exit() */ | |
#include <string.h> /* strlen(), strcpy() */ | |
#include <ctype.h> /* isalpha() */ | |
#include <sys/types.h> | |
#include <sys/uio.h> | |
#include <unistd.h> /* read() */ | |
#include <fcntl.h> /* open() */ | |
#define MAX 100 /* maximum length of buffer */ | |
#define CR 0x0D /* hex value for carriage return */ | |
#define ERR -1 /* open file error return code */ | |
#define HYPHEN '-' | |
#define QUOTE 0x27 | |
#define SEVEN_BIT_ASCII_MASK 0x7F | |
#define DEFAULT_TOLERANCE 3 | |
typedef struct symbol { | |
struct symbol *left; | |
struct symbol *right; | |
char *text; | |
long count; | |
} SYMBOL; | |
char word[ MAX ]; /* buffer to collect word in */ | |
int infd; /* file descriptor for input */ | |
SYMBOL * treeroot = NULL; | |
long getDcml( char * ptr ); | |
int scanFileAndBuildWordTree( void ); | |
int treeWord( void ); | |
SYMBOL * find( char * token, SYMBOL ** parent ); | |
SYMBOL * node( char * key ); | |
void traverse( SYMBOL * tree, long t ); | |
void giveUp( char * what ); | |
int main( int argc, char * argv[] ) { | |
long t; | |
printf("\n T Y P O S Word Frequency Counter, Version 1.0 \n" ); | |
if( argc < 2 ) printf( "Useage Syntax: TYPOS <filename> [tolerance]\n" ); | |
else { | |
if(( infd = open( argv[1],0)) == ERR ) { | |
printf( "?? Unable to open file %s\n", argv[1]); | |
exit( EXIT_FAILURE ); | |
} | |
t = ( argc > 2 ) ? getDcml( argv[2]) : DEFAULT_TOLERANCE; | |
if( t < 1 ) t = DEFAULT_TOLERANCE; | |
scanFileAndBuildWordTree(); | |
close( infd ); | |
/* Output section */ | |
for( ; t > 0; t-- ) { | |
printf("\nWords occuring %ld time%c:\n", t, ( t > 1 ) ? 's' : ' ' ); | |
traverse( treeroot, t ); | |
} | |
} /* end ELSE */ | |
return( EXIT_SUCCESS ); | |
} /* end MAIN */ | |
int scanFileAndBuildWordTree( void ) { | |
char chr; | |
int cc; | |
cc = 0; | |
while(( read( infd, &chr, 1 )) != 0) { | |
chr &= SEVEN_BIT_ASCII_MASK; | |
if( isalpha( chr ) || ( cc > 0 && ( chr == QUOTE || chr == HYPHEN ))) | |
word[ cc++ ] = chr; | |
else if( cc > 0 ) { | |
word[ cc ] = '\0'; | |
treeWord(); | |
cc = 0; | |
} | |
if( cc == MAX ) { | |
printf("A word greater than %d characters encountered.\n", cc); | |
printf("TYPOS cannot continue.\n"); | |
printf("See your utilities manual for TYPOS criteria.\n\n"); | |
return( 1 ); | |
} | |
} | |
return( 1 ); | |
} | |
int treeWord( void ) { | |
SYMBOL * node; | |
node = find( word, &treeroot ); | |
node->count += 1; | |
return( 1 ); | |
} | |
SYMBOL * find( char * token, SYMBOL ** parent ) { | |
int bc; | |
SYMBOL * current; | |
while(( current = *parent ) != NULL) { | |
if(( bc = strcmp( token, current->text)) == 0) | |
return( current); | |
else parent = ( bc > 0) ? &( current->left) : &( current->right); | |
} | |
return( *parent = node( token )); | |
} | |
SYMBOL * node( char * key ) { | |
char *cptr; SYMBOL *loc; | |
if(( loc = ( SYMBOL * ) malloc( sizeof( SYMBOL ))) == NULL ) | |
giveUp( "Word Record"); | |
loc->left = loc->right = NULL; | |
loc->count = 0; | |
if(( cptr = ( char * ) malloc( strlen( key ) + 1 )) == NULL ) | |
giveUp( "Word Definition"); | |
loc->text = cptr; | |
strcpy( cptr, key ); | |
return( loc ); | |
} | |
void traverse( SYMBOL * tree, long t ) { | |
SYMBOL *subNode; | |
if( tree == NULL) return; | |
if(( subNode = tree->left) != NULL ) traverse( subNode, t ); | |
if( tree->count == t) printf(" %s\n", tree->text); | |
if(( subNode = tree->right) != NULL ) traverse( subNode, t ); | |
return; | |
} | |
long getDcml( char * ptr ) { | |
long num; | |
for( num = 0; *ptr >= '0' && *ptr <= '9'; ptr++ ) | |
num = 10 * num + ( *ptr - '0'); | |
return( num ); | |
} | |
void giveUp( char * what ) { | |
extern int infd; | |
fprintf( stderr, "?? Not enough memory left for a %s\n", what ); | |
close( infd ); | |
fprintf( stderr, "Word table is full. TYPOS Aborting!\n" ); | |
exit( EXIT_FAILURE ); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
typos
Code in this gist owes its existance to Jodie A. Zoeller as his name appears as the author of T Y P O S in a listing shown on
pages 34 and 35 of the 68 Micro Journal (Volume IV Issue XI) November 1982. The file typos.c is a descendant of the original, but has been modified to allow it to be compiled by a modern gcc compatible c compiler.