Chameleon

Chameleon Svn Source Tree

Root/branches/xZenu/src/util/doxygen/src/search.php

Source at commit 1322 created 12 years 8 months ago.
By meklort, Add doxygen to utils folder
1function readInt($file)
2{
3 $b1 = ord(fgetc($file)); $b2 = ord(fgetc($file));
4 $b3 = ord(fgetc($file)); $b4 = ord(fgetc($file));
5 return ($b1<<24)|($b2<<16)|($b3<<8)|$b4;
6}
7
8function readString($file)
9{
10 $result="";
11 while (ord($c=fgetc($file))) $result.=$c;
12 return $result;
13}
14
15function readHeader($file)
16{
17 $header =fgetc($file); $header.=fgetc($file);
18 $header.=fgetc($file); $header.=fgetc($file);
19 return $header;
20}
21
22function computeIndex($word)
23{
24 // Simple hashing that allows for substring search
25 if (strlen($word)<2) return -1;
26 // high char of the index
27 $hi = ord($word{0});
28 if ($hi==0) return -1;
29 // low char of the index
30 $lo = ord($word{1});
31 if ($lo==0) return -1;
32 // return index
33 return $hi*256+$lo;
34}
35
36function search($file,$word,&$statsList)
37{
38 $index = computeIndex($word);
39 if ($index!=-1) // found a valid index
40 {
41 fseek($file,$index*4+4); // 4 bytes per entry, skip header
42 $index = readInt($file);
43 if ($index) // found words matching the hash key
44 {
45 $start=sizeof($statsList);
46 $count=$start;
47 fseek($file,$index);
48 $w = readString($file);
49 while ($w)
50 {
51 $statIdx = readInt($file);
52 if ($word==substr($w,0,strlen($word)))
53 { // found word that matches (as substring)
54 $statsList[$count++]=array(
55 "word"=>$word,
56 "match"=>$w,
57 "index"=>$statIdx,
58 "full"=>strlen($w)==strlen($word),
59 "docs"=>array()
60 );
61 }
62 $w = readString($file);
63 }
64 $totalHi=0;
65 $totalFreqHi=0;
66 $totalFreqLo=0;
67 for ($count=$start;$count<sizeof($statsList);$count++)
68 {
69 $statInfo = &$statsList[$count];
70 $multiplier = 1;
71 // whole word matches have a double weight
72 if ($statInfo["full"]) $multiplier=2;
73 fseek($file,$statInfo["index"]);
74 $numDocs = readInt($file);
75 $docInfo = array();
76 // read docs info + occurrence frequency of the word
77 for ($i=0;$i<$numDocs;$i++)
78 {
79 $idx=readInt($file);
80 $freq=readInt($file);
81 $docInfo[$i]=array("idx" => $idx,
82 "freq" => $freq>>1,
83 "rank" => 0.0,
84 "hi" => $freq&1
85 );
86 if ($freq&1) // word occurs in high priority doc
87 {
88 $totalHi++;
89 $totalFreqHi+=$freq*$multiplier;
90 }
91 else // word occurs in low priority doc
92 {
93 $totalFreqLo+=$freq*$multiplier;
94 }
95 }
96 // read name and url info for the doc
97 for ($i=0;$i<$numDocs;$i++)
98 {
99 fseek($file,$docInfo[$i]["idx"]);
100 $docInfo[$i]["name"]=readString($file);
101 $docInfo[$i]["url"]=readString($file);
102 }
103 $statInfo["docs"]=$docInfo;
104 }
105 $totalFreq=($totalHi+1)*$totalFreqLo + $totalFreqHi;
106 for ($count=$start;$count<sizeof($statsList);$count++)
107 {
108 $statInfo = &$statsList[$count];
109 $multiplier = 1;
110 // whole word matches have a double weight
111 if ($statInfo["full"]) $multiplier=2;
112 for ($i=0;$i<sizeof($statInfo["docs"]);$i++)
113 {
114 $docInfo = &$statInfo["docs"];
115 // compute frequency rank of the word in each doc
116 $freq=$docInfo[$i]["freq"];
117 if ($docInfo[$i]["hi"])
118 {
119 $statInfo["docs"][$i]["rank"]=
120 (float)($freq*$multiplier+$totalFreqLo)/$totalFreq;
121 }
122 else
123 {
124 $statInfo["docs"][$i]["rank"]=
125 (float)($freq*$multiplier)/$totalFreq;
126 }
127 }
128 }
129 }
130 }
131 return $statsList;
132}
133
134function combine_results($results,&$docs)
135{
136 foreach ($results as $wordInfo)
137 {
138 $docsList = &$wordInfo["docs"];
139 foreach ($docsList as $di)
140 {
141 $key=$di["url"];
142 $rank=$di["rank"];
143 if (isset($docs[$key]))
144 {
145 $docs[$key]["rank"]+=$rank;
146 }
147 else
148 {
149 $docs[$key] = array("url"=>$key,
150 "name"=>$di["name"],
151 "rank"=>$rank
152 );
153 }
154 $docs[$key]["words"][] = array(
155 "word"=>$wordInfo["word"],
156 "match"=>$wordInfo["match"],
157 "freq"=>$di["freq"]
158 );
159 }
160 }
161 return $docs;
162}
163
164function filter_results($docs,&$requiredWords,&$forbiddenWords)
165{
166 $filteredDocs=array();
167 while (list ($key, $val) = each ($docs))
168 {
169 $words = &$docs[$key]["words"];
170 $copy=1; // copy entry by default
171 if (sizeof($requiredWords)>0)
172 {
173 foreach ($requiredWords as $reqWord)
174 {
175 $found=0;
176 foreach ($words as $wordInfo)
177 {
178 $found = $wordInfo["word"]==$reqWord;
179 if ($found) break;
180 }
181 if (!$found)
182 {
183 $copy=0; // document contains none of the required words
184 break;
185 }
186 }
187 }
188 if (sizeof($forbiddenWords)>0)
189 {
190 foreach ($words as $wordInfo)
191 {
192 if (in_array($wordInfo["word"],$forbiddenWords))
193 {
194 $copy=0; // document contains a forbidden word
195 break;
196 }
197 }
198 }
199 if ($copy) $filteredDocs[$key]=$docs[$key];
200 }
201 return $filteredDocs;
202}
203
204function compare_rank($a,$b)
205{
206 if ($a["rank"] == $b["rank"])
207 {
208 return 0;
209 }
210 return ($a["rank"]>$b["rank"]) ? -1 : 1;
211}
212
213function sort_results($docs,&$sorted)
214{
215 $sorted = $docs;
216 usort($sorted,"compare_rank");
217 return $sorted;
218}
219
220function report_results(&$docs)
221{
222 echo "<div class=\"header\">";
223 echo " <div class=\"headertitle\">\n";
224 echo " <h1>".search_results()."</h1>\n";
225 echo " </div>\n";
226 echo "</div>\n";
227 echo "<div class=\"searchresults\">\n";
228 echo "<table cellspacing=\"2\">\n";
229 $numDocs = sizeof($docs);
230 if ($numDocs==0)
231 {
232 echo " <tr>\n";
233 echo " <td colspan=\"2\">".matches_text(0)."</td>\n";
234 echo " </tr>\n";
235 }
236 else
237 {
238 echo " <tr>\n";
239 echo " <td colspan=\"2\">".matches_text($numDocs);
240 echo "\n";
241 echo " </td>\n";
242 echo " </tr>\n";
243 $num=1;
244 foreach ($docs as $doc)
245 {
246 echo " <tr>\n";
247 echo " <td align=\"right\">$num.</td>";
248 echo "<td><a class=\"el\" href=\"".$doc["url"]."\">".$doc["name"]."</a></td>\n";
249 echo " <tr>\n";
250 echo " <td></td><td class=\"tiny\">".report_matches()." ";
251 foreach ($doc["words"] as $wordInfo)
252 {
253 $word = $wordInfo["word"];
254 $matchRight = substr($wordInfo["match"],strlen($word));
255 echo "<b>$word</b>$matchRight(".$wordInfo["freq"].") ";
256 }
257 echo " </td>\n";
258 echo " </tr>\n";
259 $num++;
260 }
261 }
262 echo "</table>\n";
263}
264
265function main()
266{
267 if(strcmp('4.1.0', phpversion()) > 0)
268 {
269 die("Error: PHP version 4.1.0 or above required!");
270 }
271 if (!($file=fopen("search/search.idx","rb")))
272 {
273 die("Error: Search index file could NOT be opened!");
274 }
275 if (readHeader($file)!="DOXS")
276 {
277 die("Error: Header of index file is invalid!");
278 }
279 $query="";
280 if (array_key_exists("query", $_GET))
281 {
282 $query=$_GET["query"];
283 }
284 end_form(preg_replace("/[^a-zA-Z0-9\-\_\.]/i", " ", $query ));
285 $results = array();
286 $requiredWords = array();
287 $forbiddenWords = array();
288 $foundWords = array();
289 $word=strtok($query," ");
290 while ($word) // for each word in the search query
291 {
292 if (($word{0}=='+')) { $word=substr($word,1); $requiredWords[]=$word; }
293 if (($word{0}=='-')) { $word=substr($word,1); $forbiddenWords[]=$word; }
294 if (!in_array($word,$foundWords))
295 {
296 $foundWords[]=$word;
297 search($file,strtolower($word),$results);
298 }
299 $word=strtok(" ");
300 }
301 $docs = array();
302 combine_results($results,$docs);
303 // filter out documents with forbidden word or that do not contain
304 // required words
305 $filteredDocs = filter_results($docs,$requiredWords,$forbiddenWords);
306 // sort the results based on rank
307 $sorted = array();
308 sort_results($filteredDocs,$sorted);
309 // report results to the user
310 report_results($sorted);
311 echo "</div>\n";
312 end_page();
313 fclose($file);
314}
315
316main();
317
318

Archive Download this file

Revision: 1322