Download code

Jump to: navigation, search

Back to Finite_automaton_string_search_algorithm_(C_Plus_Plus)

Download for Windows: single file, zip

Download for UNIX: single file, zip, tar.gz, tar.bz2

dfa_string_search.cpp

  1 /* The authors of this work have released all rights to it and placed it
  2 in the public domain under the Creative Commons CC0 1.0 waiver
  3 (http://creativecommons.org/publicdomain/zero/1.0/).
  4 
  5 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  6 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  7 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  8 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  9 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 10 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 11 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 12 
 13 Retrieved from: http://en.literateprograms.org/Finite_automaton_string_search_algorithm_(C_Plus_Plus)?oldid=18976
 14 */
 15 
 16 #include <set>
 17 #include <map>
 18 #include <vector>
 19 
 20 #include <string>
 21 
 22 #include <string>
 23 #include <iostream>
 24 
 25 using std::set;
 26 using std::map;
 27 using std::vector;
 28 
 29 using std::string;
 30 using std::cout;
 31 using std::endl;
 32 
 33 typedef int string_pos;
 34 typedef int state_num;
 35 
 36 const int NUM_CHARS = 256;
 37 
 38 
 39 // Maps the set of string positions a state represents to the state number
 40 map< set<string_pos>, state_num > state_map;
 41 // Maps a state number to its set of string positions
 42 vector< set<string_pos> > states;
 43 
 44 vector< vector<state_num> > transition_table;
 45 vector<bool> final;
 46 
 47 
 48 state_num get_state(set<string_pos>& s)
 49 {
 50     static int next_state_num = 0;
 51     map< set<string_pos>, state_num >::iterator iter =
 52         state_map.find(s);
 53     if (iter == state_map.end())
 54     {
 55         state_map[s] = next_state_num;
 56         states.push_back(s);
 57         transition_table.push_back(vector<state_num>(NUM_CHARS));
 58 	final.push_back(false);
 59 
 60         next_state_num++;
 61         return next_state_num - 1;
 62     }
 63     else
 64     {
 65         return iter->second;
 66     }
 67         
 68 }
 69 void fill_transition_table_entry(string pattern, int state, char c_next)
 70 {
 71     set<string_pos> new_state;
 72     new_state.insert(0);
 73     set<string_pos>::iterator iter = states[state].begin();
 74     for( ; iter != states[state].end(); iter++)
 75     {
 76         if (pattern[*iter] == c_next)
 77         {
 78             new_state.insert(*iter + 1);
 79         }
 80     }
 81     state_num temp = get_state(new_state); // fix concurrent modification bug
 82     transition_table[state][c_next] = temp;
 83     #if TRACE
 84         cout << "Adding edge " << state << " -" << c_next << "-> "
 85              << transition_table[state][c_next] << endl;
 86     #endif
 87 }
 88 
 89 void fill_transition_table(string pattern)
 90 {
 91     set<string_pos> initial_pos;
 92     initial_pos.insert(0);
 93     get_state(initial_pos);
 94     for (unsigned int state=0; state < states.size(); state++)
 95     {
 96     repeat_inner_loop:
 97         set<string_pos>::iterator iter = states[state].begin();
 98         for( ; iter != states[state].end(); iter++)
 99         {
100             if (*iter == (string_pos)pattern.length())
101 	    {
102 	        final[state] = true;
103 	        continue;
104 	    }
105             char c_next = pattern[*iter];
106             if (transition_table[state][c_next] == 0) {
107                 fill_transition_table_entry(pattern, state, c_next);
108                 goto repeat_inner_loop;
109             }
110         }
111     }
112     state_map.clear();
113     states.clear();
114 }
115 
116 int dfa_string_search(string search_for, string search_in)
117 {
118     state_num cur_state = 0;
119     string_pos cur_pos = 0;
120     while (cur_pos < (string_pos)search_in.length() && !final[cur_state])
121     {
122         cur_state = transition_table[cur_state][search_in[cur_pos]];
123         cur_pos++;
124     }
125     if (final[cur_state])
126         return cur_pos - search_for.length();
127     else
128         return -1;
129 }
130 int main(int argc, char* argv[])
131 {
132     fill_transition_table(argv[1]);
133     int result = dfa_string_search(argv[1], argv[2]);
134     if (result == -1)
135         cout << "No match found." << endl;
136     else
137         cout << "Matched at position " << result << ":" << endl
138              << string(argv[2]).substr(0, result) << "|"
139              << string(argv[2]).substr(result) << endl;
140     return 0;
141 }
142 


hijacker
hijacker
hijacker
hijacker