/*
Function: Nagy terjedelmű, egy megánál nagyobb méretű szövegben betűk számlálása openMP és R nyelv segítségével. A szöveg csak angol karakterekből állhat. Betűk választása: 2-től 20-ig terjedően.
Version: 2015.11.29.
Copyright: Harkály Gergő | Miskolci Egyetem
Install:
- CMakeLists.txt:
	CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
	PROJECT(CharacterCounterOpenMP C)
	SET(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR})
	SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib)
	SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
	SET(CMAKE_C_FLAGS ${CMAKE_C_FLAGS} "-std=c99 -fopenmp")
	SUBDIRS(src/app)
	SET(CMAKE_VERBOSE_MAKEFILE on)
- src/app/CMakeLists.txt:
	SET(TARGET_NAME CharacterCounterOpenMP)
	LINK_DIRECTORIES(${LIBRARY_OUTPUT_PATH})
	SET(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR})
	AUX_SOURCE_DIRECTORY(. SRC)
	ADD_EXECUTABLE(${TARGET_NAME} ${SRC})
	TARGET_LINK_LIBRARIES(${TARGET_NAME} -lm)
- R/figure.R:
	library('ggplot2')
	library('reshape')
	library('Cairo')
	library("grid")
	plottitle <- "Karakterszámlálás"
	plotlabels <- c("Szekvenciális", "Automatikus párhuzamosítás", "Kontrollált párhuzamosítás")
	xlabel <- "Vizsgált karakterek száma"
	ylabel <- "Idő (s)"
	data0 <- read.table('R/results_sequence.dat')
	data1 <- read.table('R/results_simple.dat')
	data2 <- read.table('R/results_controlled.dat')
	d <- data.frame(data0$V2, data0$V1, data1$V1, data2$V1)
	colnames(d) <- c('alpha', 'runtime1', 'runtime2', 'runtime3')
	d <- melt(d, id='alpha', variable_name='series')
	CairoPDF("R/results.pdf", width=30, height=12)  
	p<-ggplot(d, 
		aes_string(x=names(d)[1], y=names(d)[3], colour=names(d)[2]), 
		labeller=label_parsed) + 
		geom_point(size=4) + 
		geom_line(size=1.5) + 
		labs(title=plottitle) + 
		xlab(xlabel) + 
		ylab(ylabel) + 
		scale_colour_manual(values=c("black", "blue", "red", "green", "purple"), name="", labels=plotlabels, guide=guide_legend(keyheight=unit(2, "line"), keywidth=unit(5, "line"))) +
		theme_gray(24) +
		scale_x_continuous(breaks=round(seq(1.0, 20.0, by=1.0), 1)) +
		scale_y_continuous(breaks=sort(c(round(seq(0, max(d$value)+1, by=0.5), 1)))) +
		theme(legend.position="bottom")
	print(p)
	dev.off()
- R/results_simple.dat
- R/results_sequence.dat
- R/results_controlled.dat
Run: cmake .; make clean; make; ./bin/CharacterCounterOpenMP; Rscript R/figure.R
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <omp.h>

#define NUM_THREADS 4

void main()
{
	FILE *results_simple;
	results_simple = fopen("R/results_simple.dat", "a");
	FILE *results_sequence;
	results_sequence = fopen("R/results_sequence.dat", "a");
	FILE *results_controlled;
	results_controlled = fopen("R/results_controlled.dat", "a");

	int i, j, k, l;
	double start, end, sumtime;
	char *buffer, filename[100] = "text.txt";
	long lSize;
	FILE *fp;
	fp = fopen(filename, "r");
	if(!fp)
	{
		perror(filename);
		exit(1);
	}
	fseek(fp, 0, SEEK_END); // set to start of the file
	lSize = ftell(fp); // file's lenght
	rewind(fp); // back to start of the file
	buffer = calloc(1, lSize+1);
	if(!buffer)
	{
		fclose(fp);
		printf("Memóriafoglalási hiba!\n");
		exit(1);
	}
	// read file's content
	if(1!=fread(buffer, lSize, 1, fp))
	{
		fclose(fp);
		free(buffer);
		printf("Fájlbeolvasási hiba!\n");
		exit(1);
	}
	fclose(fp);
	// read characters and their count
	int char_len;
	do
	{
		printf("Megszámolni kívánt karakterek száma (2-20): ");
		scanf("%d", &char_len);
	} while(char_len<2 || char_len>20);

	char char_list[char_len]; // stores searched characters
	int char_num_list[char_len]; // stores occurrence of searched characters' count

	for(i = 0; i<char_len; i++)
	{
		char_num_list[i] = 0;
	}
	for(i = 0; i<char_len; i++)
	{
		printf("%d: ", i+1);
		scanf(" %c", &char_list[i]); // "%c" is characters with Enter, so it must be " %c"
	}

	// sequence
	start = omp_get_wtime();
	for(i=0; i<char_len; i++)
	{
		for(j=0; j<lSize; j++)
		{
			if(buffer[j] == char_list[i])
			{
				char_num_list[i]++;
			}
		}
	}
	end = omp_get_wtime();
	sumtime = end-start;
	for(i=0; i<char_len; i++)
	{
		printf("%c előfordulásainak a száma: %d\n", char_list[i], char_num_list[i]);
	}
	printf("Futási idő szekvenciálisan: %f\n", sumtime);
	fprintf(results_sequence, "%f\t%d\n", sumtime, char_len);
	// set null occurrence of searched characters' count
	for(i=0; i<char_len; i++)
	{
		char_num_list[i] = 0;
	}

	// simple
	start = omp_get_wtime();
	#pragma omp parallel for num_threads(NUM_THREADS) schedule(dynamic) firstprivate(j)
	for(i=0; i<char_len; i++)
	{
		for(j=0; j<lSize; j++)
		{
			if(buffer[j] == char_list[i])
			{
				char_num_list[i]++;
			}
		}
	}
	end = omp_get_wtime();
	sumtime = end-start;
	for(i=0; i<char_len; i++)
	{
		printf("%c előfordulásainak a száma: %d\n", char_list[i], char_num_list[i]);
	}
	printf("Futási idő automatikus párhuzamosítással %d szálon: %f\n", NUM_THREADS, sumtime);
	fprintf(results_simple, "%f\t%d\n", sumtime, char_len);
	// set null occurrence of searched characters' count
	for(i=0; i<char_len; i++)
	{
		char_num_list[i] = 0;
	}
	
	// controlled
	if(NUM_THREADS>1)
	{ // egy szállal segmentation fault
		long lSize_partial = lSize/NUM_THREADS; // part's lenght
		long lSize_remaining = lSize-lSize_partial*NUM_THREADS; // last part's lenght
		//char text_partials[NUM_THREADS-1][lSize_partial + 1]; //+1, mert a termináló karakternek is kell hely
		char **text_partials = (char**)calloc(NUM_THREADS -1, sizeof(char *)); // Dinamikus helyfoglalás, enélkül nagy fájlnál seg. faulttal meghalt a program
		for(i=0; i<NUM_THREADS-1; i++)
		{
			text_partials[i] = (char*)calloc(lSize_partial + 1,sizeof(char));
		}
		char text_remaining[lSize_partial + lSize_remaining + 1];
		int pos = 0; // A daraboláshoz használt pozicíó változója, hogy tudjuk, honnan vágjuk le a főstringet
		for(i=0; i<NUM_THREADS-1; i++)
		{
			strncpy(text_partials[i], buffer + pos, lSize_partial); //darabolás
			text_partials[i][lSize_partial] = '\0';
			pos += lSize_partial;
		}
		strncpy(text_remaining, buffer + pos, lSize_partial + lSize_remaining); //utolsó rész darabolása
		text_remaining[lSize_partial + lSize_remaining] = '\0';
		start = omp_get_wtime();
		omp_set_num_threads(NUM_THREADS);
		//parallel for helyett parallel region
		#pragma omp parallel
		{
			int ID = omp_get_thread_num(), m, n, char_num_list_p[char_len];
			if(ID==0)
			{
				if(NUM_THREADS!=omp_get_num_threads())
				{
					printf("Nem sikerült megfelelő számú szálat lefoglalni!\n");
					exit(1);
				}
			}
			for(m=0; m<char_len; m++)
			{
				char_num_list_p[m] = 0;
			}
			if(ID<NUM_THREADS-1)
			{ //Első részek
				for(n=0; n<char_len; n++)
				{
					for(m=0; m<lSize_partial; m++)
					{
						if(text_partials[ID][m] == char_list[n])
						{
							char_num_list_p[n] += 1;
						}
					}
				}
			}
			else if(ID == NUM_THREADS-1)
			{ //Utolsó részek
				for(n=0; n<char_len; n++)
				{
					for(m=0; m<lSize_partial+lSize_remaining; m++)
					{
						if(text_remaining[m] == char_list[n])
						{
							char_num_list_p[n] += 1;
						}
					}
				}
			}
			#pragma omp critical
			{
				for(m=0; m<char_len; m++)
				{
					char_num_list[m] += char_num_list_p[m];
				}
			}
		}
		end = omp_get_wtime();
		sumtime = end - start;
		for(i=0; i<char_len; i++)
		{
			printf("%c előfordulásainak a száma: %d\n", char_list[i], char_num_list[i]);
		}
		printf("Futási idő %d szálon: %f\n", NUM_THREADS, sumtime);
		fprintf(results_controlled, "%f\t%d\n", sumtime, char_len);
		for(i=0; i<NUM_THREADS-1; i++)
		{
			free(text_partials[i]);
		}
		free(text_partials);
	}
	fclose(results_simple);
	fclose(results_sequence);
	fclose(results_controlled);
	free(buffer);
}