Skip to content

Instantly share code, notes, and snippets.

@dutchLuck
Created March 31, 2024 12:13
Show Gist options
  • Save dutchLuck/aa7a01873957e409ed5f45691e0e5d48 to your computer and use it in GitHub Desktop.
Save dutchLuck/aa7a01873957e409ed5f45691e0e5d48 to your computer and use it in GitHub Desktop.
typos word frequency count
/*
* T Y P O S . C
*
* modified version that can be compiled by gcc
* Original T Y P O S is shown in a listing found on URL
* https://archive.org/details/68micro-vol-04-num-11/page/n35/mode/1up
*/
/*************************************************************/
/* Written by: Jodie A. Zoeller, SWTPc, August 1982 */
/* */
/* TYPOS is a program designed to help catch spelling errors.*/
/* It keeps a frequency count of all alphabetic words in the */
/* specified file. If no tolerance is suplied by the user */
/* TYPOS uses 3. This means that all words occuring 3 times */
/* or less will be displayed after the program is finished */
/* scanning the specified file. */
/* */
/* Syntax: typos <filename> [tolerance] */
/*************************************************************/
#include <stdio.h> /* printf() */
#include <stdlib.h> /* exit() */
#include <string.h> /* strlen(), strcpy() */
#include <ctype.h> /* isalpha() */
#include <sys/types.h>
#include <sys/uio.h>
#include <unistd.h> /* read() */
#include <fcntl.h> /* open() */
#define MAX 100 /* maximum length of buffer */
#define CR 0x0D /* hex value for carriage return */
#define ERR -1 /* open file error return code */
#define HYPHEN '-'
#define QUOTE 0x27
#define SEVEN_BIT_ASCII_MASK 0x7F
#define DEFAULT_TOLERANCE 3
typedef struct symbol {
struct symbol *left;
struct symbol *right;
char *text;
long count;
} SYMBOL;
char word[ MAX ]; /* buffer to collect word in */
int infd; /* file descriptor for input */
SYMBOL * treeroot = NULL;
long getDcml( char * ptr );
int scanFileAndBuildWordTree( void );
int treeWord( void );
SYMBOL * find( char * token, SYMBOL ** parent );
SYMBOL * node( char * key );
void traverse( SYMBOL * tree, long t );
void giveUp( char * what );
int main( int argc, char * argv[] ) {
long t;
printf("\n T Y P O S Word Frequency Counter, Version 1.0 \n" );
if( argc < 2 ) printf( "Useage Syntax: TYPOS <filename> [tolerance]\n" );
else {
if(( infd = open( argv[1],0)) == ERR ) {
printf( "?? Unable to open file %s\n", argv[1]);
exit( EXIT_FAILURE );
}
t = ( argc > 2 ) ? getDcml( argv[2]) : DEFAULT_TOLERANCE;
if( t < 1 ) t = DEFAULT_TOLERANCE;
scanFileAndBuildWordTree();
close( infd );
/* Output section */
for( ; t > 0; t-- ) {
printf("\nWords occuring %ld time%c:\n", t, ( t > 1 ) ? 's' : ' ' );
traverse( treeroot, t );
}
} /* end ELSE */
return( EXIT_SUCCESS );
} /* end MAIN */
int scanFileAndBuildWordTree( void ) {
char chr;
int cc;
cc = 0;
while(( read( infd, &chr, 1 )) != 0) {
chr &= SEVEN_BIT_ASCII_MASK;
if( isalpha( chr ) || ( cc > 0 && ( chr == QUOTE || chr == HYPHEN )))
word[ cc++ ] = chr;
else if( cc > 0 ) {
word[ cc ] = '\0';
treeWord();
cc = 0;
}
if( cc == MAX ) {
printf("A word greater than %d characters encountered.\n", cc);
printf("TYPOS cannot continue.\n");
printf("See your utilities manual for TYPOS criteria.\n\n");
return( 1 );
}
}
return( 1 );
}
int treeWord( void ) {
SYMBOL * node;
node = find( word, &treeroot );
node->count += 1;
return( 1 );
}
SYMBOL * find( char * token, SYMBOL ** parent ) {
int bc;
SYMBOL * current;
while(( current = *parent ) != NULL) {
if(( bc = strcmp( token, current->text)) == 0)
return( current);
else parent = ( bc > 0) ? &( current->left) : &( current->right);
}
return( *parent = node( token ));
}
SYMBOL * node( char * key ) {
char *cptr; SYMBOL *loc;
if(( loc = ( SYMBOL * ) malloc( sizeof( SYMBOL ))) == NULL )
giveUp( "Word Record");
loc->left = loc->right = NULL;
loc->count = 0;
if(( cptr = ( char * ) malloc( strlen( key ) + 1 )) == NULL )
giveUp( "Word Definition");
loc->text = cptr;
strcpy( cptr, key );
return( loc );
}
void traverse( SYMBOL * tree, long t ) {
SYMBOL *subNode;
if( tree == NULL) return;
if(( subNode = tree->left) != NULL ) traverse( subNode, t );
if( tree->count == t) printf(" %s\n", tree->text);
if(( subNode = tree->right) != NULL ) traverse( subNode, t );
return;
}
long getDcml( char * ptr ) {
long num;
for( num = 0; *ptr >= '0' && *ptr <= '9'; ptr++ )
num = 10 * num + ( *ptr - '0');
return( num );
}
void giveUp( char * what ) {
extern int infd;
fprintf( stderr, "?? Not enough memory left for a %s\n", what );
close( infd );
fprintf( stderr, "Word table is full. TYPOS Aborting!\n" );
exit( EXIT_FAILURE );
}
@dutchLuck
Copy link
Author

dutchLuck commented Mar 31, 2024

typos

Code in this gist owes its existance to Jodie A. Zoeller as his name appears as the author of T Y P O S in a listing shown on
pages 34 and 35 of the 68 Micro Journal (Volume IV Issue XI) November 1982. The file typos.c is a descendant of the original, but has been modified to allow it to be compiled by a modern gcc compatible c compiler.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment