Compare commits
3 commits
e4e27d421a
...
744d0f7a3a
Author | SHA1 | Date | |
---|---|---|---|
|
744d0f7a3a | ||
|
f9f3674fde | ||
|
7d93d5dbfa |
10 changed files with 229 additions and 53 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -2,3 +2,6 @@
|
||||||
*.d
|
*.d
|
||||||
*a.out*
|
*a.out*
|
||||||
build
|
build
|
||||||
|
.cache/
|
||||||
|
words.txt
|
||||||
|
compile_commands.json
|
||||||
|
|
2
Makefile
Normal file
2
Makefile
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
format:
|
||||||
|
find . -regex '.*\.\(c\|cpp\|cc\|cxx\|h\|hpp\|hh\|hxx\)' -exec clang-format {} +
|
24
lab2/Makefile
Normal file
24
lab2/Makefile
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
CXX = g++
|
||||||
|
CXXFLAGS = -g3 -Werror -Wall -Wpedantic -Wunused-variable -std=c++17
|
||||||
|
|
||||||
|
SRC = $(wildcard *.cc)
|
||||||
|
OBJ = $(SRC:.cc=.o)
|
||||||
|
|
||||||
|
all: spell edit $(OBJ)
|
||||||
|
|
||||||
|
edit: test_edit_distance.o edit_distance.o
|
||||||
|
@echo "Building & linking $@"
|
||||||
|
@$(CXX) $(CXXFLAGS) $^ -o $@
|
||||||
|
|
||||||
|
spell: spell.o word.o dictionary.o
|
||||||
|
@echo "Building & linking $@"
|
||||||
|
@$(CXX) $(CXXFLAGS) $^ -o $@
|
||||||
|
|
||||||
|
%.o:%.cc
|
||||||
|
@echo "Building $@"
|
||||||
|
@$(CXX) -c $(CXXFLAGS) $< -o $@
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f *.o spell edit
|
||||||
|
|
||||||
|
.PHONY: clean
|
|
@ -1,22 +1,82 @@
|
||||||
#include <string>
|
#include "dictionary.h"
|
||||||
#include <vector>
|
#include "word.h"
|
||||||
|
#include <algorithm>
|
||||||
|
#include <filesystem>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <algorithm>
|
#include <string>
|
||||||
#include "word.h"
|
#include <vector>
|
||||||
#include "dictionary.h"
|
|
||||||
|
|
||||||
using std::string;
|
using std::string;
|
||||||
using std::vector;
|
using std::vector;
|
||||||
|
// using std::filesystem::path;
|
||||||
|
|
||||||
Dictionary::Dictionary() {
|
Dictionary::Dictionary() {}
|
||||||
}
|
|
||||||
|
|
||||||
bool Dictionary::contains(const string& word) const {
|
bool Dictionary::contains(const string &word) const { return true; }
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
vector<string> Dictionary::get_suggestions(const string &word) const {
|
vector<string> Dictionary::get_suggestions(const string &word) const {
|
||||||
vector<string> suggestions;
|
vector<string> suggestions;
|
||||||
|
// add_trigram_suggestions(suggestions, word);
|
||||||
|
// rank_suggestions(suggestions, word);
|
||||||
|
// trim_suggestions(suggestions);
|
||||||
return suggestions;
|
return suggestions;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Function to generate trigrams from a string
|
||||||
|
std::vector<std::string> get_trigrams(const std::string &text) {
|
||||||
|
std::vector<std::string> trigrams;
|
||||||
|
if (text.size() < 3) {
|
||||||
|
return trigrams; // Return an empty vector if the input is too short
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i <= text.size() - 3; ++i) {
|
||||||
|
trigrams.push_back(
|
||||||
|
text.substr(i, 3)); // Extract a substring of length 3
|
||||||
|
}
|
||||||
|
|
||||||
|
return trigrams;
|
||||||
|
}
|
||||||
|
|
||||||
|
int Dictionary::spit(path p) {
|
||||||
|
std::ofstream file(p);
|
||||||
|
|
||||||
|
if (!file.is_open()) {
|
||||||
|
std::cerr << "Error opening file! " << std::endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int a = 0; a < 25; a++) {
|
||||||
|
for (auto &word : words[a]) {
|
||||||
|
std::vector<std::string> trias = get_trigrams(word.get_word());
|
||||||
|
file << word << " " << trias.size();
|
||||||
|
|
||||||
|
for (auto tria : trias) {
|
||||||
|
file << " " << tria;
|
||||||
|
}
|
||||||
|
|
||||||
|
file << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
file.flush();
|
||||||
|
file.close();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int Dictionary::slurp(path p) {
|
||||||
|
std::ifstream file(p.string());
|
||||||
|
|
||||||
|
if (!file.is_open()) {
|
||||||
|
std::cerr << "Error opening file! " << std::endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string line;
|
||||||
|
while (std::getline(file, line)) {
|
||||||
|
words[line.size()].push_back(Word(line, get_trigrams(line)));
|
||||||
|
}
|
||||||
|
|
||||||
|
file.close();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
|
@ -1,15 +1,24 @@
|
||||||
#ifndef DICTIONARY_H
|
#ifndef DICTIONARY_H
|
||||||
#define DICTIONARY_H
|
#define DICTIONARY_H
|
||||||
|
|
||||||
|
#include "word.h"
|
||||||
|
#include <filesystem>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
using std::vector;
|
||||||
|
using std::filesystem::path;
|
||||||
|
|
||||||
class Dictionary {
|
class Dictionary {
|
||||||
public:
|
public:
|
||||||
Dictionary();
|
Dictionary();
|
||||||
bool contains(const std::string &word) const;
|
bool contains(const std::string &word) const;
|
||||||
std::vector<std::string> get_suggestions(const std::string &word) const;
|
std::vector<std::string> get_suggestions(const std::string &word) const;
|
||||||
|
int slurp(path p);
|
||||||
|
int spit(path p);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
vector<Word> words[25];
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
35
lab2/edit_distance.cc
Normal file
35
lab2/edit_distance.cc
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
#include <iostream>
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
int edit_distance(const std::string& s1, const std::string& s2) {
|
||||||
|
size_t m = s1.size();
|
||||||
|
size_t n = s2.size();
|
||||||
|
|
||||||
|
// Create a 2D DP table
|
||||||
|
std::vector<std::vector<int>> dp(m + 1, std::vector<int>(n + 1));
|
||||||
|
|
||||||
|
// Fill the base cases
|
||||||
|
for (size_t i = 0; i <= m; ++i)
|
||||||
|
dp[i][0] = i; // Deletion cost
|
||||||
|
|
||||||
|
for (size_t j = 0; j <= n; ++j)
|
||||||
|
dp[0][j] = j; // Insertion cost
|
||||||
|
|
||||||
|
// Fill the DP table
|
||||||
|
for (size_t i = 1; i <= m; ++i) {
|
||||||
|
for (size_t j = 1; j <= n; ++j) {
|
||||||
|
if (s1[i - 1] == s2[j - 1]) {
|
||||||
|
dp[i][j] = dp[i - 1][j - 1]; // No operation needed
|
||||||
|
} else {
|
||||||
|
dp[i][j] = 1 + std::min({dp[i - 1][j], // Deletion
|
||||||
|
dp[i][j - 1], // Insertion
|
||||||
|
dp[i - 1][j - 1] // Substitution
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return dp[m][n];
|
||||||
|
}
|
18
lab2/edit_distance.h
Normal file
18
lab2/edit_distance.h
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
#include <algorithm>
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Computes the edit distance (Levenshtein distance) between two strings.
|
||||||
|
*
|
||||||
|
* The edit distance is defined as the minimum number of single-character edits
|
||||||
|
* (insertions, deletions, or substitutions) required to transform one string into the other.
|
||||||
|
*
|
||||||
|
* This implementation uses dynamic programming to compute the distance efficiently.
|
||||||
|
*
|
||||||
|
* @param s1 The first string.
|
||||||
|
* @param s2 The second string.
|
||||||
|
* @return The edit distance between the two strings.
|
||||||
|
*/
|
||||||
|
int edit_distance(const std::string& s1, const std::string& s2);
|
|
@ -1,18 +1,17 @@
|
||||||
|
#include "dictionary.h"
|
||||||
|
#include <cctype>
|
||||||
|
#include <filesystem>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <algorithm>
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <cctype>
|
|
||||||
#include "dictionary.h"
|
|
||||||
|
|
||||||
using std::string;
|
|
||||||
using std::vector;
|
|
||||||
using std::cin;
|
using std::cin;
|
||||||
using std::cout;
|
using std::cout;
|
||||||
using std::endl;
|
using std::endl;
|
||||||
|
using std::string;
|
||||||
|
using std::vector;
|
||||||
|
|
||||||
void check_word(const string& word, const Dictionary& dict)
|
void check_word(const string &word, const Dictionary &dict) {
|
||||||
{
|
|
||||||
if (dict.contains(word)) {
|
if (dict.contains(word)) {
|
||||||
cout << "Correct." << endl;
|
cout << "Correct." << endl;
|
||||||
} else {
|
} else {
|
||||||
|
@ -27,12 +26,16 @@ void check_word(const string& word, const Dictionary& dict)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
Dictionary dict;
|
Dictionary dict;
|
||||||
string word;
|
string word;
|
||||||
while (cin >> word) {
|
dict.slurp(std::filesystem::path("/usr/share/dict/words"));
|
||||||
transform(word.begin(), word.end(), word.begin(), ::tolower);
|
// dict.spit(std::filesystem::path("words.txt"));
|
||||||
check_word(word, dict);
|
|
||||||
}
|
// while (cin >> word) {
|
||||||
|
// transform(word.begin(), word.end(), word.begin(), ::tolower);
|
||||||
|
// check_word(word, dict);
|
||||||
|
// }
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
32
lab2/word.cc
32
lab2/word.cc
|
@ -1,16 +1,34 @@
|
||||||
|
#include "word.h"
|
||||||
|
#include <algorithm>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "word.h"
|
|
||||||
|
|
||||||
using std::vector;
|
|
||||||
using std::string;
|
using std::string;
|
||||||
|
using std::vector;
|
||||||
|
|
||||||
Word::Word(const string& w, const vector<string>& t) {}
|
Word::Word(const string &w, const vector<string> &t) : word(w), triagrams(t) {
|
||||||
|
std::sort(triagrams.begin(), triagrams.end());
|
||||||
string Word::get_word() const {
|
|
||||||
return string();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
string Word::get_word() const { return string(); }
|
||||||
|
|
||||||
unsigned int Word::get_matches(const vector<string> &t) const {
|
unsigned int Word::get_matches(const vector<string> &t) const {
|
||||||
return 0;
|
unsigned int matches = 0;
|
||||||
|
|
||||||
|
for (const auto &triagram : t) {
|
||||||
|
if (std::binary_search(triagrams.begin(), triagrams.end(), triagram)) {
|
||||||
|
++matches;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return matches;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ostream &operator<<(std::ostream &out, const Word &w) {
|
||||||
|
auto space = string(" ");
|
||||||
|
out << w.word;
|
||||||
|
out << space;
|
||||||
|
for (const auto &tria : w.triagrams) {
|
||||||
|
out << space << tria;
|
||||||
|
}
|
||||||
|
return out;
|
||||||
}
|
}
|
||||||
|
|
20
lab2/word.h
20
lab2/word.h
|
@ -1,21 +1,25 @@
|
||||||
#ifndef WORD_H
|
#pragma once
|
||||||
#define WORD_H
|
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Contains a word and its triagrams
|
||||||
|
*/
|
||||||
class Word {
|
class Word {
|
||||||
public:
|
public:
|
||||||
/* Creates a word w with the sorted trigrams t */
|
/** Creates a word w with the sorted trigrams t */
|
||||||
Word(const std::string &w, const std::vector<std::string> &t);
|
Word(const std::string &w, const std::vector<std::string> &t);
|
||||||
|
|
||||||
/* Returns the word */
|
/** Returns the word */
|
||||||
std::string get_word() const;
|
std::string get_word() const;
|
||||||
|
|
||||||
/* Returns how many of the trigrams in t that are present
|
/** Returns how many of the trigrams in t that are present
|
||||||
in this word's trigram vector */
|
in this word's trigram vector */
|
||||||
unsigned int get_matches(const std::vector<std::string> &t) const;
|
unsigned int get_matches(const std::vector<std::string> &t) const;
|
||||||
private:
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
private:
|
||||||
|
const std::string word;
|
||||||
|
const std::vector<std::string> triagrams;
|
||||||
|
friend std::ostream &operator<<(std::ostream &out, const Word &o);
|
||||||
|
};
|
||||||
|
|
Loading…
Reference in a new issue