In this program, we are going to share a C++ program to find most frequent words in a file. If you are a beginner and want to start learning the C++ programming, then keep your close attention in this tutorial as I am going to share a program for C++ program to generate random numbers.
To increase your C++ knowledge, practice all C++ programs:
Copy the below C++ program and execute it with the help of GCC compiler.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
#include <iostream> #include <cstdio> #include <cstdlib> #include <cstring> #include <cctype> # define MAX_CHARS 26 # define MAX_WORD_SIZE 30 using namespace std; struct TrieNode { bool isEnd; unsigned frequency; int indexMinHeap; TrieNode* child[MAX_CHARS]; }; struct MinHeapNode { TrieNode* root; unsigned frequency; char* word; }; struct MinHeap { unsigned capacity; int count; MinHeapNode* array; }; TrieNode* newTrieNode() { TrieNode* trieNode = new TrieNode; trieNode->isEnd = 0; trieNode->frequency = 0; trieNode->indexMinHeap = -1; for (int i = 0; i < MAX_CHARS; ++i) trieNode->child[i] = NULL; return trieNode; } MinHeap* createMinHeap(int capacity) { MinHeap* minHeap = new MinHeap; minHeap->capacity = capacity; minHeap->count = 0; minHeap->array = new MinHeapNode [minHeap->capacity]; return minHeap; } void swapMinHeapNodes (MinHeapNode* a, MinHeapNode* b) { MinHeapNode temp = *a; *a = *b; *b = temp; } void minHeapify(MinHeap* minHeap, int idx) { int left, right, smallest; left = 2 * idx + 1; right = 2 * idx + 2; smallest = idx; if (left < minHeap->count && minHeap->array[left].frequency < minHeap->array[smallest].frequency) smallest = left; if (right < minHeap->count && minHeap->array[right].frequency < minHeap->array[smallest].frequency) smallest = right; if (smallest != idx) { minHeap->array[smallest].root->indexMinHeap = idx; minHeap->array[idx].root->indexMinHeap = smallest; swapMinHeapNodes (&minHeap->array[smallest], &minHeap->array[idx]); minHeapify(minHeap, smallest); } } void buildMinHeap(MinHeap* minHeap) { int n, i; n = minHeap->count - 1; for (i = ( n - 1 ) / 2; i >= 0; --i) minHeapify(minHeap, i); } void insertInMinHeap(MinHeap* minHeap, TrieNode** root, const char* word) { if ((*root)->indexMinHeap != -1) { ++( minHeap->array[(*root)->indexMinHeap].frequency); minHeapify(minHeap, (*root)->indexMinHeap); } else if (minHeap->count < minHeap->capacity) { int count = minHeap->count; minHeap->array[count].frequency = (*root)->frequency; minHeap->array[count].word = new char [strlen( word ) + 1]; strcpy(minHeap->array[count].word, word); minHeap->array[count].root = *root; (*root)->indexMinHeap = minHeap->count; ++( minHeap->count ); buildMinHeap( minHeap ); } else if ((*root)->frequency > minHeap->array[0].frequency) { minHeap->array[0].root->indexMinHeap = -1; minHeap->array[0].root = *root; minHeap->array[0].root->indexMinHeap = 0; minHeap->array[0].frequency = (*root)->frequency; delete [] minHeap->array[0].word; minHeap->array[0]. word = new char [strlen( word ) + 1]; strcpy( minHeap->array[0].word, word ); minHeapify (minHeap, 0); } } void insertUtil (TrieNode** root, MinHeap* minHeap, const char* word, const char* dupWord) { if (*root == NULL) *root = newTrieNode(); if (*word != '\0') insertUtil (&((*root)->child[tolower( *word ) - 97]), minHeap, word + 1, dupWord); else { if ((*root)->isEnd) ++((*root)->frequency); else { (*root)->isEnd = 1; (*root)->frequency = 1; } insertInMinHeap(minHeap, root, dupWord); } } void insertTrieAndHeap(const char *word, TrieNode** root, MinHeap* minHeap) { insertUtil(root, minHeap, word, word); } void displayMinHeap(MinHeap* minHeap) { int i; for (i = 0; i < minHeap->count; ++i) { cout<<minHeap->array[i].word<<" : "<<minHeap->array[i].frequency<<endl; } } void printKMostFreq(FILE* fp, int k) { MinHeap* minHeap = createMinHeap(k); TrieNode* root = NULL; char buffer[MAX_WORD_SIZE]; while (fscanf(fp, "%s", buffer) != EOF) insertTrieAndHeap(buffer, &root, minHeap); displayMinHeap(minHeap); } int main() { int k = 5; FILE *fp = fopen("file.txt", "r"); if (fp == NULL) printf ("File doesn't exist "); else printKMostFreq (fp, k); return 0; } |
The smallest distance is 1.41421
If you like FreeWebMentor and you would like to contribute, you can write an article and mail your article to [email protected] Your article will appear on the FreeWebMentor main page and help other developers.