Chameleon

Chameleon Svn Source Tree

Root/branches/xZenu/src/util/doxygen/src/searchindex.cpp

Source at commit 1322 created 12 years 8 months ago.
By meklort, Add doxygen to utils folder
1/******************************************************************************
2 *
3 * $Id: searchindex.cpp,v 1.7 2001/03/19 19:27:41 root Exp $
4 *
5 * Copyright (C) 1997-2011 by Dimitri van Heesch.
6 *
7 * Permission to use, copy, modify, and distribute this software and its
8 * documentation under the terms of the GNU General Public License is hereby
9 * granted. No representations are made about the suitability of this software
10 * for any purpose. It is provided "as is" without express or implied warranty.
11 * See the GNU General Public License for more details.
12 *
13 * Documents produced by Doxygen are derivative works derived from the
14 * input used in their production; they are not affected by this license.
15 *
16 */
17
18#include "qtbc.h"
19#include "searchindex.h"
20#include "config.h"
21#include "util.h"
22#include <qfile.h>
23#include <ctype.h>
24#include <qregexp.h>
25
26
27// file format: (all multi-byte values are stored in big endian format)
28// 4 byte header
29// 256*256*4 byte index (4 bytes)
30// for each index entry: a zero terminated list of words
31// for each word: a \0 terminated string + 4 byte offset to the stats info
32// padding bytes to align at 4 byte boundary
33// for each word: the number of urls (4 bytes)
34// + for each url containing the word 8 bytes statistics
35// (4 bytes index to url string + 4 bytes frequency counter)
36// for each url: a \0 terminated string
37
38const int numIndexEntries = 256*256;
39
40//--------------------------------------------------------------------
41
42IndexWord::IndexWord(const char *word) : m_word(word), m_urls(17)
43{
44 m_urls.setAutoDelete(TRUE);
45 //printf("IndexWord::IndexWord(%s)\n",word);
46}
47
48void IndexWord::addUrlIndex(int idx,bool hiPriority)
49{
50 //printf("IndexWord::addUrlIndex(%d,%d)\n",idx,hiPriority);
51 URLInfo *ui = m_urls.find(idx);
52 if (ui==0)
53 {
54 //printf("URLInfo::URLInfo(%d)\n",idx);
55 ui=new URLInfo(idx,0);
56 m_urls.insert(idx,ui);
57 }
58 ui->freq+=2;
59 if (hiPriority) ui->freq|=1; // mark as high priority document
60}
61
62//--------------------------------------------------------------------
63
64SearchIndex::SearchIndex() : m_words(328829), m_index(numIndexEntries), m_urlIndex(-1)
65{
66 int i;
67 m_words.setAutoDelete(TRUE);
68 m_urls.setAutoDelete(TRUE);
69 m_index.setAutoDelete(TRUE);
70 for (i=0;i<numIndexEntries;i++) m_index.insert(i,new QList<IndexWord>);
71}
72
73void SearchIndex::setCurrentDoc(const char *name,const char *baseName,const char *anchor)
74{
75 if (name==0 || baseName==0) return;
76 //printf("SearchIndex::setCurrentDoc(%s,%s,%s)\n",name,baseName,anchor);
77 QCString url=baseName+Config_getString("HTML_FILE_EXTENSION");
78 if (anchor) url+=(QCString)"#"+anchor;
79 m_urlIndex++;
80 m_urls.insert(m_urlIndex,new URL(name,url));
81}
82
83static int charsToIndex(const char *word)
84{
85 if (word==0) return -1;
86
87 // Fast string hashing algorithm
88 //register ushort h=0;
89 //const char *k = word;
90 //ushort mask=0xfc00;
91 //while ( *k )
92 //{
93 // h = (h&mask)^(h<<6)^(*k++);
94 //}
95 //return h;
96
97 // Simple hashing that allows for substring searching
98 uint c1=word[0];
99 if (c1==0) return -1;
100 uint c2=word[1];
101 if (c2==0) return -1;
102 return c1*256+c2;
103}
104
105void SearchIndex::addWord(const char *word,bool hiPriority,bool recurse)
106{
107 static QRegExp nextPart("[_a-z:][A-Z]");
108 //printf("SearchIndex::addWord(%s,%d)\n",word,hiPriority);
109 QString wStr(word);
110 if (wStr.isEmpty()) return;
111 wStr=wStr.lower();
112 IndexWord *w = m_words[wStr];
113 if (w==0)
114 {
115 int idx=charsToIndex(wStr);
116 if (idx<0) return;
117 w = new IndexWord(wStr);
118 //fprintf(stderr,"addWord(%s) at index %d\n",word,idx);
119 m_index[idx]->append(w);
120 m_words.insert(wStr,w);
121 }
122 w->addUrlIndex(m_urlIndex,hiPriority);
123 int i;
124 bool found=FALSE;
125 if (!recurse) // the first time we check if we can strip the prefix
126 {
127 i=getPrefixIndex(word);
128 if (i>0)
129 {
130 addWord(word+i,hiPriority,TRUE);
131 found=TRUE;
132 }
133 }
134 if (!found) // no prefix stripped
135 {
136 if ((i=nextPart.match(word))>=1)
137 {
138 addWord(word+i+1,hiPriority,TRUE);
139 }
140 }
141}
142
143
144static void writeInt(QFile &f,int index)
145{
146 f.putch(((uint)index)>>24);
147 f.putch((((uint)index)>>16)&0xff);
148 f.putch((((uint)index)>>8)&0xff);
149 f.putch(((uint)index)&0xff);
150}
151
152static void writeString(QFile &f,const char *s)
153{
154 const char *p = s;
155 while (*p) f.putch(*p++);
156 f.putch(0);
157}
158
159void SearchIndex::write(const char *fileName)
160{
161 int i;
162 int size=4; // for the header
163 size+=4*numIndexEntries; // for the index
164 int wordsOffset = size;
165 // first pass: compute the size of the wordlist
166 for (i=0;i<numIndexEntries;i++)
167 {
168 QList<IndexWord> *wlist = m_index[i];
169 if (!wlist->isEmpty())
170 {
171 QListIterator<IndexWord> iwi(*wlist);
172 IndexWord *iw;
173 for (iwi.toFirst();(iw=iwi.current());++iwi)
174 {
175 int ws = iw->word().length()+1;
176 size+=ws+4; // word + url info list offset
177 }
178 size+=1; // zero list terminator
179 }
180 }
181
182 // second pass: compute the offsets in the index
183 int indexOffsets[numIndexEntries];
184 int offset=wordsOffset;
185 for (i=0;i<numIndexEntries;i++)
186 {
187 QList<IndexWord> *wlist = m_index[i];
188 if (!wlist->isEmpty())
189 {
190 indexOffsets[i]=offset;
191 QListIterator<IndexWord> iwi(*wlist);
192 IndexWord *iw;
193 for (iwi.toFirst();(iw=iwi.current());++iwi)
194 {
195 offset+= iw->word().length()+1;
196 offset+=4; // word + offset to url info array
197 }
198 offset+=1; // zero list terminator
199 }
200 else
201 {
202 indexOffsets[i]=0;
203 }
204 }
205 int padding = size;
206 size = (size+3)&~3; // round up to 4 byte boundary
207 padding = size - padding;
208
209 //int statsOffset = size;
210 QDictIterator<IndexWord> wdi(m_words);
211 //IndexWord *iw;
212 int *wordStatOffsets = new int[m_words.count()];
213
214 int count=0;
215
216 // third pass: compute offset to stats info for each word
217 for (i=0;i<numIndexEntries;i++)
218 {
219 QList<IndexWord> *wlist = m_index[i];
220 if (!wlist->isEmpty())
221 {
222 QListIterator<IndexWord> iwi(*wlist);
223 IndexWord *iw;
224 for (iwi.toFirst();(iw=iwi.current());++iwi)
225 {
226 //printf("wordStatOffsets[%d]=%d\n",count,size);
227 wordStatOffsets[count++] = size;
228 size+=4+iw->urls().count()*8; // count + (url_index,freq) per url
229 }
230 }
231 }
232 int *urlOffsets = new int[m_urls.count()];
233 //int urlsOffset = size;
234 QIntDictIterator<URL> udi(m_urls);
235 URL *url;
236 for (udi.toFirst();(url=udi.current());++udi)
237 {
238 urlOffsets[udi.currentKey()]=size;
239 size+=url->name.length()+1+
240 url->url.length()+1;
241 }
242 //printf("Total size %x bytes (word=%x stats=%x urls=%x)\n",size,wordsOffset,statsOffset,urlsOffset);
243 QFile f(fileName);
244 if (f.open(IO_WriteOnly))
245 {
246 // write header
247 f.putch('D'); f.putch('O'); f.putch('X'); f.putch('S');
248 // write index
249 for (i=0;i<numIndexEntries;i++)
250 {
251 writeInt(f,indexOffsets[i]);
252 }
253 // write word lists
254 count=0;
255 for (i=0;i<numIndexEntries;i++)
256 {
257 QList<IndexWord> *wlist = m_index[i];
258 if (!wlist->isEmpty())
259 {
260 QListIterator<IndexWord> iwi(*wlist);
261 IndexWord *iw;
262 for (iwi.toFirst();(iw=iwi.current());++iwi)
263 {
264 writeString(f,iw->word());
265 writeInt(f,wordStatOffsets[count++]);
266 }
267 f.putch(0);
268 }
269 }
270 // write extra padding bytes
271 for (i=0;i<padding;i++) f.putch(0);
272 // write word statistics
273 for (i=0;i<numIndexEntries;i++)
274 {
275 QList<IndexWord> *wlist = m_index[i];
276 if (!wlist->isEmpty())
277 {
278 QListIterator<IndexWord> iwi(*wlist);
279 IndexWord *iw;
280 for (iwi.toFirst();(iw=iwi.current());++iwi)
281 {
282 int numUrls = iw->urls().count();
283 writeInt(f,numUrls);
284 QIntDictIterator<URLInfo> uli(iw->urls());
285 URLInfo *ui;
286 for (uli.toFirst();(ui=uli.current());++uli)
287 {
288 writeInt(f,urlOffsets[ui->urlIdx]);
289 writeInt(f,ui->freq);
290 }
291 }
292 }
293 }
294 // write urls
295 QIntDictIterator<URL> udi(m_urls);
296 URL *url;
297 for (udi.toFirst();(url=udi.current());++udi)
298 {
299 writeString(f,url->name);
300 writeString(f,url->url);
301 }
302 }
303
304 delete[] urlOffsets;
305 delete[] wordStatOffsets;
306}
307
308

Archive Download this file

Revision: 1322