Luke.NET이 작업을 수행합니다. 여기에 당신의 방식대로 당신을 도울 수있는 소스가 있습니다.
public static TermInfo[] GetHighFreqTerms(Directory dir,
Hashtable junkWords,
int numTerms,
String[] fields)
{
if (dir == null || fields == null) return new TermInfo[0];
IndexReader reader = IndexReader.Open(dir, true);
TermInfoQueue tiq = new TermInfoQueue(numTerms);
TermEnum terms = reader.Terms();
int minFreq = 0;
while (terms.Next())
{
String field = terms.Term.Field;
if (fields != null && fields.Length > 0)
{
bool skip = true;
for (int i = 0; i < fields.Length; i++)
{
if (field.Equals(fields[i]))
{
skip = false;
break;
}
}
if (skip) continue;
}
if (junkWords != null && junkWords[terms.Term.Text] != null)
continue;
if (terms.DocFreq() > minFreq)
{
tiq.Add(new TermInfo(terms.Term, terms.DocFreq()));
if (tiq.Size() >= numTerms) // if tiq overfull
{
tiq.Pop(); // remove lowest in tiq
minFreq = ((TermInfo)tiq.Top()).DocFreq; // reset minFreq
}
}
}
TermInfo[] res = new TermInfo[tiq.Size()];
for (int i = 0; i < res.Length; i++)
{
res[res.Length - i - 1] = (TermInfo)tiq.Pop();
}
reader.Dispose();
return res;
}
굉장하고, 감사합니다. :-) – MaYaN