Chameleon

Chameleon Svn Source Tree

Root/branches/xZenu/src/util/doxygen/src/search_php.h

Source at commit 1322 created 12 years 11 months ago.
By meklort, Add doxygen to utils folder
1"function readInt($file)\n"
2"{\n"
3" $b1 = ord(fgetc($file)); $b2 = ord(fgetc($file));\n"
4" $b3 = ord(fgetc($file)); $b4 = ord(fgetc($file));\n"
5" return ($b1<<24)|($b2<<16)|($b3<<8)|$b4;\n"
6"}\n"
7"\n"
8"function readString($file)\n"
9"{\n"
10" $result=\"\";\n"
11" while (ord($c=fgetc($file))) $result.=$c;\n"
12" return $result;\n"
13"}\n"
14"\n"
15"function readHeader($file)\n"
16"{\n"
17" $header =fgetc($file); $header.=fgetc($file);\n"
18" $header.=fgetc($file); $header.=fgetc($file);\n"
19" return $header;\n"
20"}\n"
21"\n"
22"function computeIndex($word)\n"
23"{\n"
24" // Simple hashing that allows for substring search\n"
25" if (strlen($word)<2) return -1;\n"
26" // high char of the index\n"
27" $hi = ord($word{0});\n"
28" if ($hi==0) return -1;\n"
29" // low char of the index\n"
30" $lo = ord($word{1});\n"
31" if ($lo==0) return -1;\n"
32" // return index\n"
33" return $hi*256+$lo;\n"
34"}\n"
35"\n"
36"function search($file,$word,&$statsList)\n"
37"{\n"
38" $index = computeIndex($word);\n"
39" if ($index!=-1) // found a valid index\n"
40" {\n"
41" fseek($file,$index*4+4); // 4 bytes per entry, skip header\n"
42" $index = readInt($file);\n"
43" if ($index) // found words matching the hash key\n"
44" {\n"
45" $start=sizeof($statsList);\n"
46" $count=$start;\n"
47" fseek($file,$index);\n"
48" $w = readString($file);\n"
49" while ($w)\n"
50" {\n"
51" $statIdx = readInt($file);\n"
52" if ($word==substr($w,0,strlen($word)))\n"
53" { // found word that matches (as substring)\n"
54" $statsList[$count++]=array(\n"
55" \"word\"=>$word,\n"
56" \"match\"=>$w,\n"
57" \"index\"=>$statIdx,\n"
58" \"full\"=>strlen($w)==strlen($word),\n"
59" \"docs\"=>array()\n"
60" );\n"
61" }\n"
62" $w = readString($file);\n"
63" }\n"
64" $totalHi=0;\n"
65" $totalFreqHi=0;\n"
66" $totalFreqLo=0;\n"
67" for ($count=$start;$count<sizeof($statsList);$count++)\n"
68" {\n"
69" $statInfo = &$statsList[$count];\n"
70" $multiplier = 1;\n"
71" // whole word matches have a double weight\n"
72" if ($statInfo[\"full\"]) $multiplier=2;\n"
73" fseek($file,$statInfo[\"index\"]); \n"
74" $numDocs = readInt($file);\n"
75" $docInfo = array();\n"
76" // read docs info + occurrence frequency of the word\n"
77" for ($i=0;$i<$numDocs;$i++)\n"
78" {\n"
79" $idx=readInt($file); \n"
80" $freq=readInt($file); \n"
81" $docInfo[$i]=array(\"idx\" => $idx,\n"
82" \"freq\" => $freq>>1,\n"
83" \"rank\" => 0.0,\n"
84" \"hi\" => $freq&1\n"
85" );\n"
86" if ($freq&1) // word occurs in high priority doc\n"
87" {\n"
88" $totalHi++;\n"
89" $totalFreqHi+=$freq*$multiplier;\n"
90" }\n"
91" else // word occurs in low priority doc\n"
92" {\n"
93" $totalFreqLo+=$freq*$multiplier;\n"
94" }\n"
95" }\n"
96" // read name and url info for the doc\n"
97" for ($i=0;$i<$numDocs;$i++)\n"
98" {\n"
99" fseek($file,$docInfo[$i][\"idx\"]);\n"
100" $docInfo[$i][\"name\"]=readString($file);\n"
101" $docInfo[$i][\"url\"]=readString($file);\n"
102" }\n"
103" $statInfo[\"docs\"]=$docInfo;\n"
104" }\n"
105" $totalFreq=($totalHi+1)*$totalFreqLo + $totalFreqHi;\n"
106" for ($count=$start;$count<sizeof($statsList);$count++)\n"
107" {\n"
108" $statInfo = &$statsList[$count];\n"
109" $multiplier = 1;\n"
110" // whole word matches have a double weight\n"
111" if ($statInfo[\"full\"]) $multiplier=2;\n"
112" for ($i=0;$i<sizeof($statInfo[\"docs\"]);$i++)\n"
113" {\n"
114" $docInfo = &$statInfo[\"docs\"];\n"
115" // compute frequency rank of the word in each doc\n"
116" $freq=$docInfo[$i][\"freq\"];\n"
117" if ($docInfo[$i][\"hi\"])\n"
118" {\n"
119" $statInfo[\"docs\"][$i][\"rank\"]=\n"
120" (float)($freq*$multiplier+$totalFreqLo)/$totalFreq;\n"
121" }\n"
122" else\n"
123" {\n"
124" $statInfo[\"docs\"][$i][\"rank\"]=\n"
125" (float)($freq*$multiplier)/$totalFreq;\n"
126" }\n"
127" }\n"
128" }\n"
129" }\n"
130" }\n"
131" return $statsList;\n"
132"}\n"
133"\n"
134"function combine_results($results,&$docs)\n"
135"{\n"
136" foreach ($results as $wordInfo)\n"
137" {\n"
138" $docsList = &$wordInfo[\"docs\"];\n"
139" foreach ($docsList as $di)\n"
140" {\n"
141" $key=$di[\"url\"];\n"
142" $rank=$di[\"rank\"];\n"
143" if (isset($docs[$key]))\n"
144" {\n"
145" $docs[$key][\"rank\"]+=$rank;\n"
146" }\n"
147" else\n"
148" {\n"
149" $docs[$key] = array(\"url\"=>$key,\n"
150" \"name\"=>$di[\"name\"],\n"
151" \"rank\"=>$rank\n"
152" );\n"
153" }\n"
154" $docs[$key][\"words\"][] = array(\n"
155" \"word\"=>$wordInfo[\"word\"],\n"
156" \"match\"=>$wordInfo[\"match\"],\n"
157" \"freq\"=>$di[\"freq\"]\n"
158" );\n"
159" }\n"
160" }\n"
161" return $docs;\n"
162"}\n"
163"\n"
164"function filter_results($docs,&$requiredWords,&$forbiddenWords)\n"
165"{\n"
166" $filteredDocs=array();\n"
167" while (list ($key, $val) = each ($docs)) \n"
168" {\n"
169" $words = &$docs[$key][\"words\"];\n"
170" $copy=1; // copy entry by default\n"
171" if (sizeof($requiredWords)>0)\n"
172" {\n"
173" foreach ($requiredWords as $reqWord)\n"
174" {\n"
175" $found=0;\n"
176" foreach ($words as $wordInfo)\n"
177" { \n"
178" $found = $wordInfo[\"word\"]==$reqWord;\n"
179" if ($found) break;\n"
180" }\n"
181" if (!$found) \n"
182" {\n"
183" $copy=0; // document contains none of the required words\n"
184" break;\n"
185" }\n"
186" }\n"
187" }\n"
188" if (sizeof($forbiddenWords)>0)\n"
189" {\n"
190" foreach ($words as $wordInfo)\n"
191" {\n"
192" if (in_array($wordInfo[\"word\"],$forbiddenWords))\n"
193" {\n"
194" $copy=0; // document contains a forbidden word\n"
195" break;\n"
196" }\n"
197" }\n"
198" }\n"
199" if ($copy) $filteredDocs[$key]=$docs[$key];\n"
200" }\n"
201" return $filteredDocs;\n"
202"}\n"
203"\n"
204"function compare_rank($a,$b)\n"
205"{\n"
206" if ($a[\"rank\"] == $b[\"rank\"]) \n"
207" {\n"
208" return 0;\n"
209" }\n"
210" return ($a[\"rank\"]>$b[\"rank\"]) ? -1 : 1; \n"
211"}\n"
212"\n"
213"function sort_results($docs,&$sorted)\n"
214"{\n"
215" $sorted = $docs;\n"
216" usort($sorted,\"compare_rank\");\n"
217" return $sorted;\n"
218"}\n"
219"\n"
220"function report_results(&$docs)\n"
221"{\n"
222" echo \"<div class=\\\"header\\\">\";\n"
223" echo \" <div class=\\\"headertitle\\\">\\n\";\n"
224" echo \" <h1>\".search_results().\"</h1>\\n\";\n"
225" echo \" </div>\\n\";\n"
226" echo \"</div>\\n\";\n"
227" echo \"<div class=\\\"searchresults\\\">\\n\";\n"
228" echo \"<table cellspacing=\\\"2\\\">\\n\";\n"
229" $numDocs = sizeof($docs);\n"
230" if ($numDocs==0)\n"
231" {\n"
232" echo \" <tr>\\n\";\n"
233" echo \" <td colspan=\\\"2\\\">\".matches_text(0).\"</td>\\n\";\n"
234" echo \" </tr>\\n\";\n"
235" }\n"
236" else\n"
237" {\n"
238" echo \" <tr>\\n\";\n"
239" echo \" <td colspan=\\\"2\\\">\".matches_text($numDocs);\n"
240" echo \"\\n\";\n"
241" echo \" </td>\\n\";\n"
242" echo \" </tr>\\n\";\n"
243" $num=1;\n"
244" foreach ($docs as $doc)\n"
245" {\n"
246" echo \" <tr>\\n\";\n"
247" echo \" <td align=\\\"right\\\">$num.</td>\";\n"
248" echo \"<td><a class=\\\"el\\\" href=\\\"\".$doc[\"url\"].\"\\\">\".$doc[\"name\"].\"</a></td>\\n\";\n"
249" echo \" <tr>\\n\";\n"
250" echo \" <td></td><td class=\\\"tiny\\\">\".report_matches().\" \";\n"
251" foreach ($doc[\"words\"] as $wordInfo)\n"
252" {\n"
253" $word = $wordInfo[\"word\"];\n"
254" $matchRight = substr($wordInfo[\"match\"],strlen($word));\n"
255" echo \"<b>$word</b>$matchRight(\".$wordInfo[\"freq\"].\") \";\n"
256" }\n"
257" echo \" </td>\\n\";\n"
258" echo \" </tr>\\n\";\n"
259" $num++;\n"
260" }\n"
261" }\n"
262" echo \"</table>\\n\";\n"
263"}\n"
264"\n"
265"function main()\n"
266"{\n"
267" if(strcmp('4.1.0', phpversion()) > 0) \n"
268" {\n"
269" die(\"Error: PHP version 4.1.0 or above required!\");\n"
270" }\n"
271" if (!($file=fopen(\"search/search.idx\",\"rb\"))) \n"
272" {\n"
273" die(\"Error: Search index file could NOT be opened!\");\n"
274" }\n"
275" if (readHeader($file)!=\"DOXS\")\n"
276" {\n"
277" die(\"Error: Header of index file is invalid!\");\n"
278" }\n"
279" $query=\"\";\n"
280" if (array_key_exists(\"query\", $_GET))\n"
281" {\n"
282" $query=$_GET[\"query\"];\n"
283" }\n"
284" end_form(preg_replace(\"/[^a-zA-Z0-9\\-\\_\\.]/i\", \" \", $query ));\n"
285" $results = array();\n"
286" $requiredWords = array();\n"
287" $forbiddenWords = array();\n"
288" $foundWords = array();\n"
289" $word=strtok($query,\" \");\n"
290" while ($word) // for each word in the search query\n"
291" {\n"
292" if (($word{0}=='+')) { $word=substr($word,1); $requiredWords[]=$word; }\n"
293" if (($word{0}=='-')) { $word=substr($word,1); $forbiddenWords[]=$word; }\n"
294" if (!in_array($word,$foundWords))\n"
295" {\n"
296" $foundWords[]=$word;\n"
297" search($file,strtolower($word),$results);\n"
298" }\n"
299" $word=strtok(\" \");\n"
300" }\n"
301" $docs = array();\n"
302" combine_results($results,$docs);\n"
303" // filter out documents with forbidden word or that do not contain\n"
304" // required words\n"
305" $filteredDocs = filter_results($docs,$requiredWords,$forbiddenWords);\n"
306" // sort the results based on rank\n"
307" $sorted = array();\n"
308" sort_results($filteredDocs,$sorted);\n"
309" // report results to the user\n"
310" report_results($sorted);\n"
311" echo \"</div>\\n\";\n"
312" end_page();\n"
313" fclose($file);\n"
314"}\n"
315"\n"
316"main();\n"
317"\n"
318

Archive Download this file

Revision: 1322