Go to the source code of this file.
Functions | |
| search_results () | |
| matches_text ($num) | |
| report_matches () | |
| end_form ($value) | |
| readInt ($file) | |
| readString ($file) | |
| readHeader ($file) | |
| computeIndex ($word) | |
| search ($file, $word, &$statsList) | |
| combine_results ($results, &$docs) | |
| filter_results ($docs, &$requiredWords, &$forbiddenWords) | |
| compare_rank ($a, $b) | |
| sort_results ($docs, &$sorted) | |
| report_results (&$docs) | |
| main () | |
| combine_results | ( | $ | results, | |
| &$ | docs | |||
| ) |
Definition at line 197 of file search.php.
Referenced by main().
00198 { 00199 foreach ($results as $wordInfo) 00200 { 00201 $docsList = &$wordInfo["docs"]; 00202 foreach ($docsList as $di) 00203 { 00204 $key=$di["url"]; 00205 $rank=$di["rank"]; 00206 if (in_array($key, array_keys($docs))) 00207 { 00208 $docs[$key]["rank"]+=$rank; 00209 } 00210 else 00211 { 00212 $docs[$key] = array("url"=>$key, 00213 "name"=>$di["name"], 00214 "rank"=>$rank 00215 ); 00216 } 00217 $docs[$key]["words"][] = array( 00218 "word"=>$wordInfo["word"], 00219 "match"=>$wordInfo["match"], 00220 "freq"=>$di["freq"] 00221 ); 00222 } 00223 } 00224 return $docs; 00225 }
| compare_rank | ( | $ | a, | |
| $ | b | |||
| ) |
Definition at line 267 of file search.php.
00268 { 00269 if ($a["rank"] == $b["rank"]) 00270 { 00271 return 0; 00272 } 00273 return ($a["rank"]>$b["rank"]) ? -1 : 1; 00274 }
| computeIndex | ( | $ | word | ) |
Definition at line 75 of file search.php.
Referenced by search().
00076 { 00077 // Fast string hashing 00078 //$lword = strtolower($word); 00079 //$l = strlen($lword); 00080 //for ($i=0;$i<$l;$i++) 00081 //{ 00082 // $c = ord($lword{$i}); 00083 // $v = (($v & 0xfc00) ^ ($v << 6) ^ $c) & 0xffff; 00084 //} 00085 //return $v; 00086 00087 // Simple hashing that allows for substring search 00088 if (strlen($word)<2) return -1; 00089 // high char of the index 00090 $hi = ord($word{0}); 00091 if ($hi==0) return -1; 00092 // low char of the index 00093 $lo = ord($word{1}); 00094 if ($lo==0) return -1; 00095 // return index 00096 return $hi*256+$lo; 00097 }
| end_form | ( | $ | value | ) |
Definition at line 49 of file search.php.
Referenced by main().
00050 { 00051 echo " <td><input type=\"text\" name=\"query\" value=\"$value\" size=\"20\" accesskey=\"s\"/></td>\n </tr>\n </table>\n </form>\n </li>\n </ul>\n</div>\n"; 00052 }
| filter_results | ( | $ | docs, | |
| &$ | requiredWords, | |||
| &$ | forbiddenWords | |||
| ) |
Definition at line 227 of file search.php.
Referenced by main().
00228 { 00229 $filteredDocs=array(); 00230 while (list ($key, $val) = each ($docs)) 00231 { 00232 $words = &$docs[$key]["words"]; 00233 $copy=1; // copy entry by default 00234 if (sizeof($requiredWords)>0) 00235 { 00236 foreach ($requiredWords as $reqWord) 00237 { 00238 $found=0; 00239 foreach ($words as $wordInfo) 00240 { 00241 $found = $wordInfo["word"]==$reqWord; 00242 if ($found) break; 00243 } 00244 if (!$found) 00245 { 00246 $copy=0; // document contains none of the required words 00247 break; 00248 } 00249 } 00250 } 00251 if (sizeof($forbiddenWords)>0) 00252 { 00253 foreach ($words as $wordInfo) 00254 { 00255 if (in_array($wordInfo["word"],$forbiddenWords)) 00256 { 00257 $copy=0; // document contains a forbidden word 00258 break; 00259 } 00260 } 00261 } 00262 if ($copy) $filteredDocs[$key]=$docs[$key]; 00263 } 00264 return $filteredDocs; 00265 }
| main | ( | void | ) |
Definition at line 325 of file search.php.
References combine_results(), end_form(), filter_results(), main(), readHeader(), report_results(), search(), and sort_results().
00326 { 00327 if(strcmp('4.1.0', phpversion()) > 0) 00328 { 00329 die("Error: PHP version 4.1.0 or above required!"); 00330 } 00331 if (!($file=fopen("search.idx","rb"))) 00332 { 00333 die("Error: Search index file could NOT be opened!"); 00334 } 00335 if (readHeader($file)!="DOXS") 00336 { 00337 die("Error: Header of index file is invalid!"); 00338 } 00339 $query=""; 00340 if (array_key_exists("query", $_GET)) 00341 { 00342 $query=$_GET["query"]; 00343 } 00344 end_form($query); 00345 echo " \n<div class=\"searchresults\">\n"; 00346 $results = array(); 00347 $requiredWords = array(); 00348 $forbiddenWords = array(); 00349 $foundWords = array(); 00350 $word=strtok($query," "); 00351 while ($word) // for each word in the search query 00352 { 00353 if (($word{0}=='+')) { $word=substr($word,1); $requiredWords[]=$word; } 00354 if (($word{0}=='-')) { $word=substr($word,1); $forbiddenWords[]=$word; } 00355 if (!in_array($word,$foundWords)) 00356 { 00357 $foundWords[]=$word; 00358 search($file,strtolower($word),$results); 00359 } 00360 $word=strtok(" "); 00361 } 00362 $docs = array(); 00363 combine_results($results,$docs); 00364 // filter out documents with forbidden word or that do not contain 00365 // required words 00366 $filteredDocs = filter_results($docs,$requiredWords,$forbiddenWords); 00367 // sort the results based on rank 00368 $sorted = array(); 00369 sort_results($filteredDocs,$sorted); 00370 // report results to the user 00371 report_results($sorted); 00372 echo "</div>\n"; 00373 fclose($file); 00374 }
| matches_text | ( | $ | num | ) |
Definition at line 29 of file search.php.
00030 { 00031 if ($num==0) 00032 { 00033 return "Sorry, no documents matching your query."; 00034 } 00035 else if ($num==1) 00036 { 00037 return "Found <b>1</b> document matching your query."; 00038 } 00039 else // $num>1 00040 { 00041 return "Found <b>$num</b> documents matching your query. Showing best matches first."; 00042 } 00043 }
| readHeader | ( | $ | file | ) |
Definition at line 68 of file search.php.
Referenced by main().
00069 { 00070 $header =fgetc($file); $header.=fgetc($file); 00071 $header.=fgetc($file); $header.=fgetc($file); 00072 return $header; 00073 }
| readInt | ( | $ | file | ) |
Definition at line 54 of file search.php.
Referenced by search().
00055 { 00056 $b1 = ord(fgetc($file)); $b2 = ord(fgetc($file)); 00057 $b3 = ord(fgetc($file)); $b4 = ord(fgetc($file)); 00058 return ($b1<<24)|($b2<<16)|($b3<<8)|$b4; 00059 }
| readString | ( | $ | file | ) |
Definition at line 61 of file search.php.
Referenced by search().
00062 { 00063 $result=""; 00064 while (ord($c=fgetc($file))) $result.=$c; 00065 return $result; 00066 }
| report_matches | ( | ) |
| report_results | ( | &$ | docs | ) |
Definition at line 283 of file search.php.
Referenced by main().
00284 { 00285 echo "<table cellspacing=\"2\">\n"; 00286 echo " <tr>\n"; 00287 echo " <td colspan=\"2\"><h2>".search_results()."</h2></td>\n"; 00288 echo " </tr>\n"; 00289 $numDocs = sizeof($docs); 00290 if ($numDocs==0) 00291 { 00292 echo " <tr>\n"; 00293 echo " <td colspan=\"2\">".matches_text(0)."</td>\n"; 00294 echo " </tr>\n"; 00295 } 00296 else 00297 { 00298 echo " <tr>\n"; 00299 echo " <td colspan=\"2\">".matches_text($numDocs); 00300 echo "\n"; 00301 echo " </td>\n"; 00302 echo " </tr>\n"; 00303 $num=1; 00304 foreach ($docs as $doc) 00305 { 00306 echo " <tr>\n"; 00307 echo " <td align=\"right\">$num.</td>"; 00308 echo "<td><a class=\"el\" href=\"".$doc["url"]."\">".$doc["name"]."</a></td>\n"; 00309 echo " <tr>\n"; 00310 echo " <td></td><td class=\"tiny\">".report_matches()." "; 00311 foreach ($doc["words"] as $wordInfo) 00312 { 00313 $word = $wordInfo["word"]; 00314 $matchRight = substr($wordInfo["match"],strlen($word)); 00315 echo "<b>$word</b>$matchRight(".$wordInfo["freq"].") "; 00316 } 00317 echo " </td>\n"; 00318 echo " </tr>\n"; 00319 $num++; 00320 } 00321 } 00322 echo "</table>\n"; 00323 }
| search | ( | $ | file, | |
| $ | word, | |||
| &$ | statsList | |||
| ) |
Definition at line 99 of file search.php.
References computeIndex(), readInt(), and readString().
Referenced by main().
00100 { 00101 $index = computeIndex($word); 00102 if ($index!=-1) // found a valid index 00103 { 00104 fseek($file,$index*4+4); // 4 bytes per entry, skip header 00105 $index = readInt($file); 00106 if ($index) // found words matching the hash key 00107 { 00108 $start=sizeof($statsList); 00109 $count=$start; 00110 fseek($file,$index); 00111 $w = readString($file); 00112 while ($w) 00113 { 00114 $statIdx = readInt($file); 00115 if ($word==substr($w,0,strlen($word))) 00116 { // found word that matches (as substring) 00117 $statsList[$count++]=array( 00118 "word"=>$word, 00119 "match"=>$w, 00120 "index"=>$statIdx, 00121 "full"=>strlen($w)==strlen($word), 00122 "docs"=>array() 00123 ); 00124 } 00125 $w = readString($file); 00126 } 00127 $totalHi=0; 00128 $totalFreqHi=0; 00129 $totalFreqLo=0; 00130 for ($count=$start;$count<sizeof($statsList);$count++) 00131 { 00132 $statInfo = &$statsList[$count]; 00133 $multiplier = 1; 00134 // whole word matches have a double weight 00135 if ($statInfo["full"]) $multiplier=2; 00136 fseek($file,$statInfo["index"]); 00137 $numDocs = readInt($file); 00138 $docInfo = array(); 00139 // read docs info + occurrence frequency of the word 00140 for ($i=0;$i<$numDocs;$i++) 00141 { 00142 $idx=readInt($file); 00143 $freq=readInt($file); 00144 $docInfo[$i]=array("idx" => $idx, 00145 "freq" => $freq>>1, 00146 "rank" => 0.0, 00147 "hi" => $freq&1 00148 ); 00149 if ($freq&1) // word occurs in high priority doc 00150 { 00151 $totalHi++; 00152 $totalFreqHi+=$freq*$multiplier; 00153 } 00154 else // word occurs in low priority doc 00155 { 00156 $totalFreqLo+=$freq*$multiplier; 00157 } 00158 } 00159 // read name and url info for the doc 00160 for ($i=0;$i<$numDocs;$i++) 00161 { 00162 fseek($file,$docInfo[$i]["idx"]); 00163 $docInfo[$i]["name"]=readString($file); 00164 $docInfo[$i]["url"]=readString($file); 00165 } 00166 $statInfo["docs"]=$docInfo; 00167 } 00168 $totalFreq=($totalHi+1)*$totalFreqLo + $totalFreqHi; 00169 for ($count=$start;$count<sizeof($statsList);$count++) 00170 { 00171 $statInfo = &$statsList[$count]; 00172 $multiplier = 1; 00173 // whole word matches have a double weight 00174 if ($statInfo["full"]) $multiplier=2; 00175 for ($i=0;$i<sizeof($statInfo["docs"]);$i++) 00176 { 00177 $docInfo = &$statInfo["docs"]; 00178 // compute frequency rank of the word in each doc 00179 $freq=$docInfo[$i]["freq"]; 00180 if ($docInfo[$i]["hi"]) 00181 { 00182 $statInfo["docs"][$i]["rank"]= 00183 (float)($freq*$multiplier+$totalFreqLo)/$totalFreq; 00184 } 00185 else 00186 { 00187 $statInfo["docs"][$i]["rank"]= 00188 (float)($freq*$multiplier)/$totalFreq; 00189 } 00190 } 00191 } 00192 } 00193 } 00194 return $statsList; 00195 }
| search_results | ( | ) |
| sort_results | ( | $ | docs, | |
| &$ | sorted | |||
| ) |
1.5.1