// I N C L U D E S /////////////////////////////////////////////////////////// #include #include #include #include #include // D E F I N E S ///////////////////////////////////////////////////////////// #define MAX_STRING_LENGTH 64 #define MAX_FILE_NAME 256 // T Y P E S ///////////////////////////////////////////////////////////////// typedef enum { FALSE = 0, TRUE = 1 } boolean_t; typedef unsigned long dword_t; // S T R U C T U R E S /////////////////////////////////////////////////////// typedef struct node_s { char str[MAX_STRING_LENGTH]; struct node_s* next; } node_t; typedef struct { node_t* head; node_t* tail; dword_t size; } linked_list; typedef struct { dword_t** matrix; dword_t num_rows; dword_t num_cols; size_t spacing; linked_list row_labels; linked_list col_labels; dword_t threshold; } table_t; // G L O B A L S ////////////////////////////////////////////////////////////// // F U N C T I O N S ////////////////////////////////////////////////////////// void InitList(linked_list *list) { list->head = NULL; list->tail = NULL; list->size = 0; } // end ListInit /////////////////////////////////////////////////////////////////////////////// void AddToList(linked_list *list, const char *str) { node_t *new_node; new_node = (node_t*)malloc(sizeof(node_t)); strcpy(new_node->str, str); new_node->next = NULL; if (list->head == NULL) { list->head = new_node; list->tail = list->head; } // end if else { list->tail->next = new_node; list->tail = list->tail->next; } // end list->size++; } // end AddToList /////////////////////////////////////////////////////////////////////////////// boolean_t InTableRowLabels(table_t *table, char* str, dword_t curr_file) { node_t *p; dword_t curr_index; if (table->row_labels.head == NULL ) return FALSE; curr_index = 0; for (p = table->row_labels.head; p != NULL; p = p->next) { if (strcmp(p->str, str) == 0) { table->matrix[curr_index][curr_file]++; return TRUE; } curr_index++; } return FALSE; } // end InTableRowLabels /////////////////////////////////////////////////////////////////////////////// void DestroyList(linked_list *list) { node_t *p, *prev; p = list->head; while (p != NULL) { prev = p; p = p->next; if (prev) { free(prev); prev = NULL; } } // end while InitList(list); } // end DestroyList /////////////////////////////////////////////////////////////////////////////// void StrToLower(char* str) { char *p = NULL; for( p = str; p < str + strlen(str); p++ ) { *p = tolower(*p); } } // end StrToLower /////////////////////////////////////////////////////////////////////////////// boolean_t GetWords(table_t *table, char *filename, dword_t curr_file) { char ch, word[MAX_STRING_LENGTH]; boolean_t word_found; dword_t num_words, word_num_chars; FILE *fp; num_words = 0; word_found = FALSE; if (!(fp = fopen(filename, "r"))) { return FALSE; } while((ch = fgetc(fp)) != EOF) { if ( !word_found ) { if (isalpha(ch)) { word_found = TRUE; word_num_chars = 0; word[word_num_chars++] = ch; } // end } // end if else { if (!isalpha(ch))// && (ch != '-')) { word_found = FALSE; if ( word_num_chars > 1 ) { word[word_num_chars++] = '\0'; StrToLower(word); InTableRowLabels(table, word, curr_file); num_words++; } // end if } // end if else { word[word_num_chars++] = ch; } // end else } // end else } // end for i fclose(fp); return TRUE; } // end GetWords /////////////////////////////////////////////////////////////////////////////// void InitTable(table_t *table) { table->num_rows = 0; table->num_cols = 0; table->matrix = NULL; table->spacing = 0; InitList(&table->row_labels); InitList(&table->col_labels); } // end InitTable /////////////////////////////////////////////////////////////////////////////// void CreateTable(table_t *table) { dword_t row, col; table->num_rows = table->row_labels.size; table->num_cols = table->col_labels.size; table->matrix = (dword_t**)malloc(sizeof(dword_t*) * table->num_rows); for (row=0; rownum_rows; row++) { table->matrix[row] = (dword_t*)malloc(sizeof(dword_t) * table->num_cols); for (col=0; colnum_cols; col++) { table->matrix[row][col] = 0; } } } // end CreateTable /////////////////////////////////////////////////////////////////////////////// void DestroyTable(table_t *table) { dword_t row; if (table->matrix) { for (row=0; rownum_rows; row++) { if (table->matrix[row]) free(table->matrix[row]); } free(table->matrix); } table->num_rows = 0; table->num_cols = 0; DestroyList(&table->row_labels); DestroyList(&table->col_labels); InitTable(table); } // end CreateTable /////////////////////////////////////////////////////////////////////////////// boolean_t PrintToFileTable(table_t *table, char *filename) { node_t *p; dword_t row, col, num_files; FILE *fp; if (table->row_labels.head == NULL ) return FALSE; if (!(fp = fopen(filename, "w"))) return FALSE; // print colunm labels and data row = 0; for (p = table->row_labels.head; p != NULL; p = p->next) { num_files = 0; for (col = 0; col < table->num_cols; col++) { if (table->matrix[row][col]) num_files++; } if (num_files >= table->threshold) { fprintf(fp, "%s ", p->str); for (col = 0; col < table->num_cols; col++) { fprintf(fp, "%d ", table->matrix[row][col]); } fprintf(fp, "%d\n", num_files); } row++; } fclose(fp); return TRUE; } // end PrintToFileTable /////////////////////////////////////////////////////////////////////////////// int main(int argc, char* argv[]) { table_t table; node_t *p; FILE *fp; char file_in_1[MAX_FILE_NAME], file_in_2[MAX_FILE_NAME], file_out[MAX_FILE_NAME]; // char file_in_1[]="words.txt", // file_in_2[]="grimmstories.txt", // file_out[]="words_enhanced.txt"; char word[MAX_STRING_LENGTH], filename[MAX_FILE_NAME]; dword_t frequency, file_no, threshold; if (argv[1]) { strcpy(file_in_1, argv[1]); } else { printf("\nparameters: words file_names threshold [file_out]\n"); return 0; } if (argv[2]) { strcpy(file_in_2, argv[2]); } else { printf("Error: second parameter is missing\n"); return 0; } if (argv[3]) { threshold = atoi(argv[3]); } else { printf("Error: threshold is missing\n"); return 0; } if (argv[4]) strcpy(file_out, argv[4]); else strcpy(file_out, "words_enhanced.txt"); InitTable(&table); table.threshold = threshold; if (!(fp = fopen(file_in_1, "r")) ) { printf("Error: cannot open %s\n", file_in_1); return 0; } while(fscanf(fp, "%s %d", word, &frequency) != EOF) { AddToList(&table.row_labels, word); } // end while fclose(fp); if (!(fp = fopen(file_in_2, "r")) ) { printf("Error: cannot open %s\n", file_in_2); return 0; } while(fscanf(fp, "%s", filename) != EOF) { AddToList(&table.col_labels, filename); } fclose(fp); CreateTable(&table); file_no = 0; for (p = table.col_labels.head; p != NULL; p = p->next) { GetWords(&table, p->str, file_no); file_no++; } // end while PrintToFileTable(&table, file_out); DestroyTable(&table); printf("%s is created\n", file_out); return 0; } // end main