Created
October 15, 2018 19:19
-
-
Save brimston3/27ebef93dc64d99d6085f86d4ade6a47 to your computer and use it in GitHub Desktop.
T-9 dict search
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// g++ -Wall -std=c++14 -o treescan treescan.cpp && time ./treescan | |
// just goofin' around, searching for words in phone T-9 numbers. | |
#include <iostream> | |
#include <fstream> | |
#include <assert.h> | |
#include <string> | |
#include <array> | |
#include <algorithm> | |
#include <regex> | |
#include <limits> | |
struct dfs_ctx { | |
const char * pn; // the search string | |
int pnoff; // the offset from the original string that pn starts at | |
unsigned pnlen; // length of the search string | |
char * workbuf; // a work buffer for printing matches | |
}; | |
class dnode { | |
public: | |
dnode(); | |
virtual ~dnode(); | |
bool add(const std::string & s, int linenum=1, int depth = 0); | |
int pn_dfs(const std::string & pn) const; | |
int pn_dfs_impl(struct dfs_ctx & dc, unsigned depth) const; | |
std::array<dnode*, 26> nl; | |
int complete; | |
} dnode_st; | |
dnode::dnode() : | |
nl{0,}, | |
complete(0) | |
{} | |
dnode::~dnode() | |
{ | |
for (auto & nlp : nl) { | |
if (nlp) { | |
delete nlp; | |
} | |
} | |
} | |
__attribute__((pure)) | |
static int map_char_to_radix(const char c) { | |
assert(c >= 'a' && c <= 'z'); | |
return c-'a'; | |
} | |
bool dnode::add(const std::string & s, int linenum, int depth) { | |
assert(depth>=0); | |
if (s.length() == static_cast<unsigned>(depth)) { | |
complete = linenum; | |
return true; | |
} | |
dnode *& rn = nl[map_char_to_radix(s[depth])]; | |
if (rn == nullptr) { | |
rn = new dnode(); | |
if (!rn) { | |
return false; | |
} | |
} | |
return rn->add(s, linenum, depth+1); | |
} | |
const std::map<char, const std::string> anum_map = { | |
{'2', "abc"}, | |
{'3', "def"}, | |
{'4', "ghi"}, | |
{'5', "jkl"}, | |
{'6', "mno"}, | |
{'7', "pqrs"}, | |
{'8', "tuv"}, | |
{'9', "wxyz"} | |
}; | |
int dnode::pn_dfs(const std::string & pn) const { | |
char workbuf[pn.length()+1] = {0,}; | |
if (pn.length() == 0) return 0; | |
int cnt = 0; | |
assert(pn.length() <= std::numeric_limits<unsigned>::max()); | |
struct dfs_ctx dc = { | |
0, 0, static_cast<unsigned>(pn.length()), workbuf | |
}; | |
for (unsigned i = 0; i < pn.length(); i++) { | |
dc.pn = pn.c_str()+i; | |
dc.pnoff = i; | |
cnt += pn_dfs_impl(dc, 0); | |
} | |
return cnt; | |
} | |
int dnode::pn_dfs_impl(struct dfs_ctx & dc, unsigned depth) const { | |
if (!!complete) { | |
std::cout << std::string{dc.workbuf, dc.workbuf + depth} << " @" << dc.pnoff | |
<< " [l#" << complete << "]" << std::endl; | |
} | |
int count = !!complete; | |
if (dc.pnlen == depth) | |
return count; | |
auto pos = anum_map.find(dc.pn[depth]); | |
if (pos == anum_map.end()) | |
return count; | |
for (char c : pos->second) { | |
dc.workbuf[depth] = c; | |
const dnode * const & rn = nl[map_char_to_radix(c)]; | |
if (rn == nullptr) continue; | |
count += rn->pn_dfs_impl(dc, depth+1); | |
} | |
return count; | |
} | |
static int load_dict(dnode & d, std::string filename) { | |
std::ifstream infile(filename); | |
std::string l; | |
std::regex r("^[a-zA-Z]+$"); | |
unsigned total_added = 0; | |
unsigned linenum = 1; // should be at least 1 | |
while(std::getline(infile, l)) { | |
if (regex_match(l,r)) { | |
std::transform(l.begin(), l.end(), l.begin(), ::tolower); | |
if (!d.add(l,linenum)) { | |
std::cerr << "Error adding word \"" << l << "\" to tree" << std::endl; | |
} else { | |
++total_added; | |
} | |
} | |
/* | |
else { | |
omit illegal string. | |
} | |
*/ | |
++linenum; | |
} | |
std::cerr << total_added << " words added" << std::endl; | |
return total_added; | |
} | |
int main() { | |
dnode root; | |
load_dict(root, "/usr/share/dict/american-english"); | |
int cnt = root.pn_dfs("3825633"); | |
std::cout << "Found " << cnt << " matches" << std::endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment