A way to construct hds on the fly: an example using Lucene
package com.indent.lucene.similarity;
import com.ai.application.interfaces.*;
import java.io.*;
import java.util.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.search.Query;
import com.ai.application.utils.*;
import com.ai.data.*;
import com.ai.common.*;
/**
* LocateSimilarDocumentsPart
* ******************************
* 1. Searches for similar documents based on input
* 2. Collects term frequency vector from description to search for similar documents
* 3. The returned documents are packaged as an IDataCollection
* 4. This will allow for using this part in page design directly
*
* Expected input args
* ******************************
* app: Indent app name
* id: Indent lucene document id
* numofdocs: Maximum number of similar docs to be returned
*
* Output/Behaviour
* 1. Returns IDataCollection
* 2. resultName: Completed hello word string
* 3. Will write a debug message to the log
*
*/
public class LocateSimilarDocumentsPart2 extends AFactoryPart
{
protected Object executeRequestForPart(String requestName, Map inArgs)
throws RequestExecutionException
{
IndentLuceneIndex li = null;
try
{
li = (IndentLuceneIndex)AppObjects.getObject("indentluceneindex",null);
//Collect input args
String app = (String)inArgs.get("app");
String id = (String)inArgs.get("id");
String numOfDocs = (String)inArgs.get("numofdocs");
int iNumOfDocs = Integer.parseInt(numOfDocs);
List documentList =
getSimilarDocuments(li,app,id,iNumOfDocs);
AppObjects.log("Number of documents found:" + documentList.size());
AppObjects.log("Number of documents requested:" + iNumOfDocs);
Iterator itr = documentList.iterator();
int stopCount = 0;
while(itr.hasNext())
{
Document doc = (Document)itr.next();
li.printDocDetails(doc);
stopCount++;
if (stopCount >= iNumOfDocs)
{
AppObjects.log("Number of documents quota fulfilled:" + iNumOfDocs);
break;
}
}
return getDocumentCollection(documentList);
}
catch(IOException x)
{
throw new RequestExecutionException("Error:Getting similar documents from IndentLuceneIndex",x);
}
finally
{
if (li != null)
{
li.closeSessionQuietly();
}
}
}//eof-function
/**
* Construct an IDataCollection from the
* lucene document list and return.
*
* @param documentList
* @return
*/
private IDataCollection getDocumentCollection(List documentList)
{
Vector columnNamesVector = new Vector();
columnNamesVector.add(IndentLuceneIndex.FIELD_ID);
columnNamesVector.add(IndentLuceneIndex.FIELD_APP);
columnNamesVector.add(IndentLuceneIndex.FIELD_DOC);
columnNamesVector.add(IndentLuceneIndex.FIELD_TITLE);
columnNamesVector.add(IndentLuceneIndex.FIELD_DESCRIPTION);
ListDataCollection luceneDocumentCollection
= new ListDataCollection(columnNamesVector);
//Fill it up with rows
Iterator luceneDocItr = documentList.iterator();
while(luceneDocItr.hasNext())
{
Document doc = (Document)luceneDocItr.next();
IDataRow collectionRow = getDataRow(doc
,new VectorMetaData(columnNamesVector));
luceneDocumentCollection.addDataRow(collectionRow);
}
return luceneDocumentCollection;
}
private IDataRow getDataRow(Document luceneDoc, IMetaData columnMetaData)
{
List columnValues = new ArrayList();
columnValues.add(luceneDoc.get(IndentLuceneIndex.FIELD_ID));
columnValues.add(luceneDoc.get(IndentLuceneIndex.FIELD_APP));
columnValues.add(luceneDoc.get(IndentLuceneIndex.FIELD_DOC));
columnValues.add(luceneDoc.get(IndentLuceneIndex.FIELD_TITLE));
columnValues.add(luceneDoc.get(IndentLuceneIndex.FIELD_DESCRIPTION));
return new ListDataRow(columnMetaData,columnValues);
}
public List getSimilarDocuments(IndentLuceneIndex li, String app, String id, int numOfDocs)
throws IOException
{
int docnum = li.searchForDocumentNumber(app,id);
logSearchWords(li,docnum,IndentLuceneIndex.FIELD_TITLE);
logSearchWords(li,docnum,IndentLuceneIndex.FIELD_DESCRIPTION);
logSearchWords(li,docnum,IndentLuceneIndex.FIELD_CONTENTS);
Document doc = li.searchForDocument(app,id);
Query q = RelevanceUtils.getRelevanceQuerySimple("contents",getSearchWords(li,doc,docnum));
return li.searchForDocsUsingQuery(q);
}
/**
* Override this method to optimize your search
* @param li
* @param doc
* @param docnum
* @return
* @throws IOException
*/
protected List getSearchWords(IndentLuceneIndex li, Document doc,int docnum)
throws IOException
{
List sampleList = new ArrayList();
String titleWords[] = li.getTermVectors(docnum,IndentLuceneIndex.FIELD_TITLE).getTerms();
String descWords[] = li.getTermVectors(docnum,IndentLuceneIndex.FIELD_DESCRIPTION).getTerms();
for(int i=0;i<titleWords.length;i++)
{
sampleList.add(titleWords[i]);
AppObjects.info(this,"Adding searchword:" + titleWords[i]);
}
for(int i=0;i<descWords.length;i++)
{
sampleList.add(descWords[i]);
AppObjects.info(this,"Adding searchword:" + descWords[i]);
}
return sampleList;
}
private void logSearchWords(IndentLuceneIndex li, int docnum, String fieldName)
throws IOException
{
// TermFreqVector tfv = li.getTermVectors(docnum,IndentLuceneIndex.FIELD_DESCRIPTION);
TermFreqVector tfv = li.getTermVectors(docnum,fieldName);
String words[] = tfv.getTerms();
AppObjects.log("Number of terms:" + words.length + " in " + fieldName);
//AppObjects.log(words.toString());
}
}//eof-class