Assignment question, submitted solution and professor's solution
This commit is contained in:
parent
b438684c5b
commit
de49d402e5
BIN
Assignment 1.pdf
Normal file
BIN
Assignment 1.pdf
Normal file
Binary file not shown.
362
ass1.c
Normal file
362
ass1.c
Normal file
@ -0,0 +1,362 @@
|
||||
/* License: AGPLv3 or later. https://www.gnu.org/licenses/licenses.html
|
||||
*
|
||||
* Assignment 1 - Text Analysis
|
||||
* Manish
|
||||
* Student Login: *****
|
||||
*
|
||||
* Compile it as:
|
||||
* gcc -Wall -std=c11 -o ass1 ass1.c
|
||||
*
|
||||
* Relevant code has been carried over from Lab Exercises
|
||||
*
|
||||
* Word are ordered by decreasing count and than alphabetically in
|
||||
* ascending order. Eg:
|
||||
* 15 hello
|
||||
* 15 world
|
||||
*/
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// Word
|
||||
int start;
|
||||
/* Not required as next node's start -1 is end.
|
||||
* For last word, it's text_size -1.
|
||||
*/
|
||||
// int end;
|
||||
int count;
|
||||
|
||||
// AVL tree
|
||||
int left;
|
||||
int right;
|
||||
int height;
|
||||
} node;
|
||||
|
||||
// This is the AVL tree
|
||||
int root = -1;
|
||||
node tree[50000];
|
||||
int tree_size = 0;
|
||||
|
||||
// All words in string pool. Their index in AVL tree
|
||||
char text[500000];
|
||||
int text_size = 0;
|
||||
|
||||
/* Alphabetical index is built by traversing through tree.
|
||||
* Thereafter, sort indexes by count using merge sort.
|
||||
*/
|
||||
int indexes[50000];
|
||||
int indexes_size = 0;
|
||||
|
||||
int min(int i, int j);
|
||||
int max(int i, int j);
|
||||
|
||||
int string_compare(int node_i, int index_j);
|
||||
int new_entry(int start);
|
||||
int height(int node);
|
||||
void update_hegiht(int node);
|
||||
int insert(int start, int node);
|
||||
int rotate_right(int old_parent);
|
||||
int rotate_left(int old_parent);
|
||||
void build_index(int node_index);
|
||||
|
||||
void merge_sort();
|
||||
|
||||
void print_word(int node);
|
||||
|
||||
int main(void)
|
||||
{
|
||||
printf("Enter file name: ");
|
||||
char filename[257];
|
||||
// Assuming filename/file path won't be longer than 256 characters
|
||||
scanf("%256s", filename);
|
||||
|
||||
FILE* file = fopen(filename, "r");
|
||||
if (!file)
|
||||
{
|
||||
perror(filename);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// read word and add node to AVL tree
|
||||
int word_start = text_size;
|
||||
int tree_len = tree_size;
|
||||
char c;
|
||||
bool in_word = false;
|
||||
while ((c = fgetc(file)) != EOF)
|
||||
{
|
||||
if (isalpha(c))
|
||||
{
|
||||
if (!in_word)
|
||||
{
|
||||
in_word = true;
|
||||
word_start = text_size;
|
||||
}
|
||||
text[text_size++] = tolower(c);
|
||||
}
|
||||
else if (isspace(c))
|
||||
{
|
||||
if (in_word)
|
||||
{
|
||||
root = insert(word_start, root);
|
||||
// if word not added because duplicate
|
||||
if (tree_len == tree_size)
|
||||
{
|
||||
text_size = word_start;
|
||||
}
|
||||
else
|
||||
{
|
||||
tree_len = tree_size;
|
||||
}
|
||||
}
|
||||
in_word = false;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(file);
|
||||
|
||||
build_index(root);
|
||||
|
||||
merge_sort();
|
||||
|
||||
printf("First 10 words in index:\n");
|
||||
int upto = min(indexes_size, 10);
|
||||
for (int i = 0; i < upto; i++)
|
||||
{
|
||||
print_word(indexes[i]);
|
||||
}
|
||||
printf("\n\nLast 10 words in index:\n");
|
||||
int from = max(indexes_size - 11, 0);
|
||||
for (int i = from; i < indexes_size; i++)
|
||||
{
|
||||
print_word(indexes[i]);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int min(int i, int j)
|
||||
{
|
||||
if (i <= j)
|
||||
return i;
|
||||
return j;
|
||||
}
|
||||
|
||||
int max(int i, int j)
|
||||
{
|
||||
if (i >= j)
|
||||
return i;
|
||||
return j;
|
||||
}
|
||||
|
||||
/* node_i is a word added to tree and index_j is last word in
|
||||
* text that needs to be compared.
|
||||
*/
|
||||
int string_compare(int node_i, int index_j)
|
||||
{
|
||||
/* If alphabetically node_i comes before than -1
|
||||
* if node_i == index_j than 0
|
||||
* if alphabetically nojde_j comes before than 1
|
||||
*/
|
||||
int i = tree[node_i].start;
|
||||
int j = index_j;
|
||||
int i_end;
|
||||
if (node_i + 1 == tree_size) // node_i is last entry
|
||||
i_end = index_j;
|
||||
else
|
||||
i_end = tree[node_i + 1].start; // start of next word in text array
|
||||
int j_end = text_size;
|
||||
while (i < i_end && j < j_end)
|
||||
{
|
||||
if (text[i] < text[j])
|
||||
return -1;
|
||||
if (text[i++] > text[j++])
|
||||
return 1;
|
||||
}
|
||||
// till now both are equal
|
||||
if (i < i_end) // i is longer
|
||||
return 1;
|
||||
if (j < j_end) // j is longer
|
||||
return -1;
|
||||
return 0; // Both equal and same length
|
||||
}
|
||||
|
||||
int new_entry(int word_start)
|
||||
{
|
||||
tree[tree_size].start = word_start;
|
||||
tree[tree_size].count = 1;
|
||||
tree[tree_size].left = -1;
|
||||
tree[tree_size].right = -1;
|
||||
tree[tree_size].height = 0;
|
||||
return tree_size++;
|
||||
}
|
||||
|
||||
int height(int node)
|
||||
{
|
||||
if (node >= 0)
|
||||
{
|
||||
return tree[node].height;
|
||||
}
|
||||
return node; // -1
|
||||
}
|
||||
|
||||
void update_hegiht(int node)
|
||||
{
|
||||
int left_height = height(tree[node].left);
|
||||
int right_height = height(tree[node].right);
|
||||
tree[node].height = max(left_height, right_height) + 1;
|
||||
}
|
||||
|
||||
int insert(int word_start, int node)
|
||||
{
|
||||
if (node < 0)
|
||||
{
|
||||
return new_entry(word_start);
|
||||
}
|
||||
int comparison = string_compare(node, word_start);
|
||||
// new_word == our_word
|
||||
if (comparison == 0)
|
||||
tree[node].count++;
|
||||
// new_word < our_word
|
||||
else if (comparison > 0)
|
||||
{
|
||||
tree[node].left = insert(word_start, tree[node].left);
|
||||
// if tree imbalance
|
||||
if (height(tree[node].left) - height(tree[node].right) >= 2)
|
||||
{
|
||||
if (string_compare(tree[node].left, word_start) > 0) // Case 1
|
||||
{
|
||||
node = rotate_right(node);
|
||||
}
|
||||
else // Case 2
|
||||
{
|
||||
tree[node].left = rotate_left(tree[node].left);
|
||||
node = rotate_right(node);
|
||||
}
|
||||
}
|
||||
}
|
||||
// new_word > our_word
|
||||
else if (comparison < 0)
|
||||
{
|
||||
tree[node].right = insert(word_start, tree[node].right);
|
||||
// if tree imbalance
|
||||
if (height(tree[node].right) - height(tree[node].left) >= 2)
|
||||
{
|
||||
if (string_compare(tree[node].right, word_start) > 0) // Case 3
|
||||
{
|
||||
tree[node].right = rotate_right(tree[node].right);
|
||||
node = rotate_left(node);
|
||||
}
|
||||
else // Case 4
|
||||
{
|
||||
node = rotate_left(node);
|
||||
}
|
||||
}
|
||||
}
|
||||
update_hegiht(node);
|
||||
return node;
|
||||
}
|
||||
|
||||
int rotate_right(int old_parent)
|
||||
{
|
||||
int new_parent = tree[old_parent].left;
|
||||
tree[old_parent].left = tree[new_parent].right;
|
||||
tree[new_parent].right = old_parent;
|
||||
update_hegiht(old_parent);
|
||||
update_hegiht(new_parent);
|
||||
return new_parent; // to be updated in it's grand parent node
|
||||
}
|
||||
|
||||
int rotate_left(int old_parent)
|
||||
{
|
||||
int new_parent = tree[old_parent].right;
|
||||
tree[old_parent].right = tree[new_parent].left;
|
||||
tree[new_parent].left = old_parent;
|
||||
update_hegiht(old_parent);
|
||||
update_hegiht(new_parent);
|
||||
return new_parent; // to be updated in it's grand parent node
|
||||
}
|
||||
|
||||
void build_index(int node)
|
||||
{
|
||||
if (node < 0)
|
||||
return;
|
||||
build_index(tree[node].left);
|
||||
indexes[indexes_size++] = node;
|
||||
build_index(tree[node].right);
|
||||
}
|
||||
|
||||
void merge_sort()
|
||||
{
|
||||
int temp[indexes_size];
|
||||
|
||||
// These pointers will be swapped at every iteration
|
||||
int* src = indexes;
|
||||
int* dst = temp;
|
||||
int* tmp;
|
||||
|
||||
int merge_size = 1;
|
||||
// It looks O(scary!) but is as efficient as other loop based merge sort
|
||||
while (merge_size < indexes_size)
|
||||
{
|
||||
int i = 0;
|
||||
while (i < indexes_size)
|
||||
{
|
||||
int left_upto = min(i + merge_size, indexes_size);
|
||||
int l = i;
|
||||
int r = left_upto;
|
||||
int right_upto = min(i + (2 * merge_size), indexes_size);
|
||||
while (l < left_upto && r < right_upto)
|
||||
{
|
||||
if (tree[src[l]].count >= tree[src[r]].count)
|
||||
{
|
||||
dst[i++] = src[l++];
|
||||
}
|
||||
else
|
||||
{
|
||||
dst[i++] = src[r++];
|
||||
}
|
||||
}
|
||||
for (; l < left_upto; l++)
|
||||
{
|
||||
dst[i++] = src[l];
|
||||
}
|
||||
for (; r < right_upto; r++)
|
||||
{
|
||||
dst[i++] = src[r];
|
||||
}
|
||||
}
|
||||
tmp = src;
|
||||
src = dst;
|
||||
dst = tmp;
|
||||
merge_size *= 2;
|
||||
}
|
||||
/* if loop ended with final merge in temp array than copy it back to
|
||||
* indexes array
|
||||
*/
|
||||
if (&indexes[0] != &src[0])
|
||||
{
|
||||
for (int i = 0; i < indexes_size; i++)
|
||||
{
|
||||
indexes[i] = src[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void print_word(int node)
|
||||
{
|
||||
int word_length;
|
||||
if (node + 1 == tree_size) // last word in tree
|
||||
word_length = text_size - tree[node].start;
|
||||
else
|
||||
word_length = tree[node + 1].start - tree[node].start;
|
||||
printf(
|
||||
"%d %.*s\n",
|
||||
tree[node].count,
|
||||
word_length,
|
||||
&text[tree[node].start]);
|
||||
}
|
Loading…
Reference in New Issue
Block a user