branches/xZenu/src/util/doxygen/src/searchindex.cpp - Chameleon Svn Source Tree - Chameleon open source boot loader project.

Root/branches/xZenu/src/util/doxygen/src/searchindex.cpp

Source at commit 1322 created 12 years 8 months ago. By meklort, Add doxygen to utils folder
1	/******************************************************************************␊
2	*␊
3	* $Id: searchindex.cpp,v 1.7 2001/03/19 19:27:41 root Exp $␊
4	*␊
5	* Copyright (C) 1997-2011 by Dimitri van Heesch.␊
6	*␊
7	* Permission to use, copy, modify, and distribute this software and its␊
8	* documentation under the terms of the GNU General Public License is hereby ␊
9	* granted. No representations are made about the suitability of this software ␊
10	* for any purpose. It is provided "as is" without express or implied warranty.␊
11	* See the GNU General Public License for more details.␊
12	*␊
13	* Documents produced by Doxygen are derivative works derived from the␊
14	* input used in their production; they are not affected by this license.␊
15	*␊
16	*/␊
17	␊
18	#include "qtbc.h"␊
19	#include "searchindex.h"␊
20	#include "config.h"␊
21	#include "util.h"␊
22	#include <qfile.h>␊
23	#include <ctype.h>␊
24	#include <qregexp.h>␊
25	␊
26	␊
27	// file format: (all multi-byte values are stored in big endian format)␊
28	// 4 byte header␊
29	// 2562564 byte index (4 bytes)␊
30	// for each index entry: a zero terminated list of words ␊
31	// for each word: a \0 terminated string + 4 byte offset to the stats info␊
32	// padding bytes to align at 4 byte boundary␊
33	// for each word: the number of urls (4 bytes) ␊
34	// + for each url containing the word 8 bytes statistics␊
35	// (4 bytes index to url string + 4 bytes frequency counter)␊
36	// for each url: a \0 terminated string␊
37	␊
38	const int numIndexEntries = 256*256;␊
39	␊
40	//--------------------------------------------------------------------␊
41	␊
42	IndexWord::IndexWord(const char *word) : m_word(word), m_urls(17)␊
43	{␊
44	m_urls.setAutoDelete(TRUE);␊
45	//printf("IndexWord::IndexWord(%s)\n",word);␊
46	}␊
47	␊
48	void IndexWord::addUrlIndex(int idx,bool hiPriority)␊
49	{␊
50	//printf("IndexWord::addUrlIndex(%d,%d)\n",idx,hiPriority);␊
51	URLInfo *ui = m_urls.find(idx);␊
52	if (ui==0)␊
53	{␊
54	//printf("URLInfo::URLInfo(%d)\n",idx);␊
55	ui=new URLInfo(idx,0);␊
56	m_urls.insert(idx,ui);␊
57	}␊
58	ui->freq+=2;␊
59	if (hiPriority) ui->freq\|=1; // mark as high priority document␊
60	}␊
61	␊
62	//--------------------------------------------------------------------␊
63	␊
64	SearchIndex::SearchIndex() : m_words(328829), m_index(numIndexEntries), m_urlIndex(-1)␊
65	{␊
66	int i;␊
67	m_words.setAutoDelete(TRUE);␊
68	m_urls.setAutoDelete(TRUE);␊
69	m_index.setAutoDelete(TRUE);␊
70	for (i=0;i<numIndexEntries;i++) m_index.insert(i,new QList<IndexWord>);␊
71	}␊
72	␊
73	void SearchIndex::setCurrentDoc(const char name,const char baseName,const char *anchor)␊
74	{␊
75	if (name==0 \|\| baseName==0) return;␊
76	//printf("SearchIndex::setCurrentDoc(%s,%s,%s)\n",name,baseName,anchor);␊
77	QCString url=baseName+Config_getString("HTML_FILE_EXTENSION");␊
78	if (anchor) url+=(QCString)"#"+anchor; ␊
79	m_urlIndex++;␊
80	m_urls.insert(m_urlIndex,new URL(name,url));␊
81	}␊
82	␊
83	static int charsToIndex(const char *word)␊
84	{␊
85	if (word==0) return -1;␊
86	␊
87	// Fast string hashing algorithm␊
88	//register ushort h=0;␊
89	//const char *k = word;␊
90	//ushort mask=0xfc00;␊
91	//while ( *k ) ␊
92	//{␊
93	// h = (h&mask)^(h<<6)^(*k++);␊
94	//}␊
95	//return h;␊
96	␊
97	// Simple hashing that allows for substring searching␊
98	uint c1=word[0];␊
99	if (c1==0) return -1;␊
100	uint c2=word[1];␊
101	if (c2==0) return -1;␊
102	return c1*256+c2;␊
103	}␊
104	␊
105	void SearchIndex::addWord(const char *word,bool hiPriority,bool recurse)␊
106	{␊
107	static QRegExp nextPart("[_a-z:][A-Z]");␊
108	//printf("SearchIndex::addWord(%s,%d)\n",word,hiPriority);␊
109	QString wStr(word);␊
110	if (wStr.isEmpty()) return;␊
111	wStr=wStr.lower();␊
112	IndexWord *w = m_words[wStr];␊
113	if (w==0)␊
114	{␊
115	int idx=charsToIndex(wStr);␊
116	if (idx<0) return;␊
117	w = new IndexWord(wStr);␊
118	//fprintf(stderr,"addWord(%s) at index %d\n",word,idx);␊
119	m_index[idx]->append(w);␊
120	m_words.insert(wStr,w);␊
121	}␊
122	w->addUrlIndex(m_urlIndex,hiPriority);␊
123	int i;␊
124	bool found=FALSE;␊
125	if (!recurse) // the first time we check if we can strip the prefix␊
126	{␊
127	i=getPrefixIndex(word);␊
128	if (i>0)␊
129	{␊
130	addWord(word+i,hiPriority,TRUE);␊
131	found=TRUE;␊
132	}␊
133	}␊
134	if (!found) // no prefix stripped␊
135	{␊
136	if ((i=nextPart.match(word))>=1)␊
137	{␊
138	addWord(word+i+1,hiPriority,TRUE);␊
139	}␊
140	}␊
141	}␊
142	␊
143	␊
144	static void writeInt(QFile &f,int index)␊
145	{␊
146	f.putch(((uint)index)>>24);␊
147	f.putch((((uint)index)>>16)&0xff);␊
148	f.putch((((uint)index)>>8)&0xff);␊
149	f.putch(((uint)index)&0xff);␊
150	}␊
151	␊
152	static void writeString(QFile &f,const char *s)␊
153	{␊
154	const char *p = s;␊
155	while (p) f.putch(p++);␊
156	f.putch(0);␊
157	}␊
158	␊
159	void SearchIndex::write(const char *fileName)␊
160	{␊
161	int i;␊
162	int size=4; // for the header␊
163	size+=4*numIndexEntries; // for the index␊
164	int wordsOffset = size;␊
165	// first pass: compute the size of the wordlist␊
166	for (i=0;i<numIndexEntries;i++)␊
167	{␊
168	QList<IndexWord> *wlist = m_index[i];␊
169	if (!wlist->isEmpty())␊
170	{␊
171	QListIterator<IndexWord> iwi(*wlist);␊
172	IndexWord *iw;␊
173	for (iwi.toFirst();(iw=iwi.current());++iwi)␊
174	{␊
175	int ws = iw->word().length()+1; ␊
176	size+=ws+4; // word + url info list offset␊
177	}␊
178	size+=1; // zero list terminator␊
179	}␊
180	}␊
181	␊
182	// second pass: compute the offsets in the index␊
183	int indexOffsets[numIndexEntries];␊
184	int offset=wordsOffset;␊
185	for (i=0;i<numIndexEntries;i++)␊
186	{␊
187	QList<IndexWord> *wlist = m_index[i];␊
188	if (!wlist->isEmpty())␊
189	{␊
190	indexOffsets[i]=offset;␊
191	QListIterator<IndexWord> iwi(*wlist);␊
192	IndexWord *iw;␊
193	for (iwi.toFirst();(iw=iwi.current());++iwi)␊
194	{␊
195	offset+= iw->word().length()+1; ␊
196	offset+=4; // word + offset to url info array ␊
197	}␊
198	offset+=1; // zero list terminator␊
199	}␊
200	else␊
201	{␊
202	indexOffsets[i]=0;␊
203	}␊
204	}␊
205	int padding = size;␊
206	size = (size+3)&~3; // round up to 4 byte boundary␊
207	padding = size - padding;␊
208	␊
209	//int statsOffset = size;␊
210	QDictIterator<IndexWord> wdi(m_words);␊
211	//IndexWord *iw;␊
212	int *wordStatOffsets = new int[m_words.count()];␊
213	␊
214	int count=0;␊
215	␊
216	// third pass: compute offset to stats info for each word␊
217	for (i=0;i<numIndexEntries;i++)␊
218	{␊
219	QList<IndexWord> *wlist = m_index[i];␊
220	if (!wlist->isEmpty())␊
221	{␊
222	QListIterator<IndexWord> iwi(*wlist);␊
223	IndexWord *iw;␊
224	for (iwi.toFirst();(iw=iwi.current());++iwi)␊
225	{␊
226	//printf("wordStatOffsets[%d]=%d\n",count,size);␊
227	wordStatOffsets[count++] = size;␊
228	size+=4+iw->urls().count()*8; // count + (url_index,freq) per url␊
229	}␊
230	}␊
231	}␊
232	int *urlOffsets = new int[m_urls.count()];␊
233	//int urlsOffset = size;␊
234	QIntDictIterator<URL> udi(m_urls);␊
235	URL *url;␊
236	for (udi.toFirst();(url=udi.current());++udi)␊
237	{␊
238	urlOffsets[udi.currentKey()]=size;␊
239	size+=url->name.length()+1+␊
240	url->url.length()+1;␊
241	}␊
242	//printf("Total size %x bytes (word=%x stats=%x urls=%x)\n",size,wordsOffset,statsOffset,urlsOffset);␊
243	QFile f(fileName);␊
244	if (f.open(IO_WriteOnly))␊
245	{␊
246	// write header␊
247	f.putch('D'); f.putch('O'); f.putch('X'); f.putch('S');␊
248	// write index␊
249	for (i=0;i<numIndexEntries;i++)␊
250	{␊
251	writeInt(f,indexOffsets[i]);␊
252	}␊
253	// write word lists␊
254	count=0;␊
255	for (i=0;i<numIndexEntries;i++)␊
256	{␊
257	QList<IndexWord> *wlist = m_index[i];␊
258	if (!wlist->isEmpty())␊
259	{␊
260	QListIterator<IndexWord> iwi(*wlist);␊
261	IndexWord *iw;␊
262	for (iwi.toFirst();(iw=iwi.current());++iwi)␊
263	{␊
264	writeString(f,iw->word());␊
265	writeInt(f,wordStatOffsets[count++]);␊
266	}␊
267	f.putch(0);␊
268	}␊
269	}␊
270	// write extra padding bytes␊
271	for (i=0;i<padding;i++) f.putch(0);␊
272	// write word statistics␊
273	for (i=0;i<numIndexEntries;i++)␊
274	{␊
275	QList<IndexWord> *wlist = m_index[i];␊
276	if (!wlist->isEmpty())␊
277	{␊
278	QListIterator<IndexWord> iwi(*wlist);␊
279	IndexWord *iw;␊
280	for (iwi.toFirst();(iw=iwi.current());++iwi)␊
281	{␊
282	int numUrls = iw->urls().count();␊
283	writeInt(f,numUrls);␊
284	QIntDictIterator<URLInfo> uli(iw->urls());␊
285	URLInfo *ui;␊
286	for (uli.toFirst();(ui=uli.current());++uli)␊
287	{␊
288	writeInt(f,urlOffsets[ui->urlIdx]);␊
289	writeInt(f,ui->freq);␊
290	}␊
291	}␊
292	}␊
293	}␊
294	// write urls␊
295	QIntDictIterator<URL> udi(m_urls);␊
296	URL *url;␊
297	for (udi.toFirst();(url=udi.current());++udi)␊
298	{␊
299	writeString(f,url->name);␊
300	writeString(f,url->url);␊
301	}␊
302	}␊
303	␊
304	delete[] urlOffsets;␊
305	delete[] wordStatOffsets;␊
306	}␊
307	␊
308

Download this file