9-Aug-06 (Created: 9-Aug-06) | More in 'Howto-Advanced'

A way to construct hds on the fly: an example using Lucene

package com.indent.lucene.similarity;

import com.ai.application.interfaces.*;
import java.io.*;
import java.util.*;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.search.Query;

import com.ai.application.utils.*;
import com.ai.data.*;
import com.ai.common.*;

/**
 * LocateSimilarDocumentsPart
 * ******************************
 * 1. Searches for similar documents based on input
 * 2. Collects term frequency vector from description to search for similar documents 
 * 3. The returned documents are packaged as an IDataCollection
 * 4. This will allow for using this part in page design directly
 *
 * Expected input args
 * ******************************
 * app: Indent app name
 * id:  Indent lucene document id
 * numofdocs: Maximum number of similar docs to be returned
 * 
 * Output/Behaviour
 * 1. Returns IDataCollection
 * 2. resultName: Completed hello word string
 * 3. Will write a debug message to the log
 *
 */

public class LocateSimilarDocumentsPart2 extends AFactoryPart
{
    protected Object executeRequestForPart(String requestName, Map inArgs)
            throws RequestExecutionException
    {
    	
    	IndentLuceneIndex li = null; 
    	try
		{
	    	li = (IndentLuceneIndex)AppObjects.getObject("indentluceneindex",null);
	    	
	    	//Collect input args
	    	String app = (String)inArgs.get("app");
	    	String id = (String)inArgs.get("id");
	    	String numOfDocs = (String)inArgs.get("numofdocs");
	    	int iNumOfDocs = Integer.parseInt(numOfDocs);
	    	
	    	List documentList = 
	    		getSimilarDocuments(li,app,id,iNumOfDocs);
	    	
	        AppObjects.log("Number of documents found:" + documentList.size());
	        AppObjects.log("Number of documents requested:" + iNumOfDocs);

	        Iterator itr = documentList.iterator();
	        int stopCount = 0;
	        while(itr.hasNext())
	        {
	        	Document doc = (Document)itr.next();
	        	li.printDocDetails(doc);
	        	stopCount++;
	        	if (stopCount >= iNumOfDocs)
	        	{
	    	        AppObjects.log("Number of documents quota fulfilled:" + iNumOfDocs);
	        		break;
	        	}
	        }
	    	return getDocumentCollection(documentList);
		}
    	catch(IOException x)
		{
    		throw new RequestExecutionException("Error:Getting similar documents from IndentLuceneIndex",x);
		}
    	finally
		{
    		if (li != null)
    		{
    			li.closeSessionQuietly();
    		}
		}
    }//eof-function
    
    /**
     * Construct an IDataCollection from the 
     * lucene document list and return.
     * 
     * @param documentList
     * @return
     */
    private IDataCollection getDocumentCollection(List documentList)
    {
    	Vector columnNamesVector = new Vector();
    	columnNamesVector.add(IndentLuceneIndex.FIELD_ID);
    	columnNamesVector.add(IndentLuceneIndex.FIELD_APP);
    	columnNamesVector.add(IndentLuceneIndex.FIELD_DOC);
    	columnNamesVector.add(IndentLuceneIndex.FIELD_TITLE);
    	columnNamesVector.add(IndentLuceneIndex.FIELD_DESCRIPTION);
    	
    	ListDataCollection luceneDocumentCollection 
			= new ListDataCollection(columnNamesVector);
    	//Fill it up with rows
    	Iterator luceneDocItr = documentList.iterator();
    	while(luceneDocItr.hasNext())
    	{
    		Document doc = (Document)luceneDocItr.next();
    		IDataRow collectionRow = getDataRow(doc
    									,new VectorMetaData(columnNamesVector));
    		luceneDocumentCollection.addDataRow(collectionRow);
    	}
    	return luceneDocumentCollection;
    }
    
    private IDataRow getDataRow(Document luceneDoc, IMetaData columnMetaData)
    {
    	List columnValues = new ArrayList();
    	columnValues.add(luceneDoc.get(IndentLuceneIndex.FIELD_ID));
    	columnValues.add(luceneDoc.get(IndentLuceneIndex.FIELD_APP));
    	columnValues.add(luceneDoc.get(IndentLuceneIndex.FIELD_DOC));
    	columnValues.add(luceneDoc.get(IndentLuceneIndex.FIELD_TITLE));
    	columnValues.add(luceneDoc.get(IndentLuceneIndex.FIELD_DESCRIPTION));
    	
    	return new ListDataRow(columnMetaData,columnValues);
    }
    
    public List getSimilarDocuments(IndentLuceneIndex li, String app, String id, int numOfDocs)
    throws IOException
    {
    	
    	int docnum = li.searchForDocumentNumber(app,id);
    	logSearchWords(li,docnum,IndentLuceneIndex.FIELD_TITLE);
    	logSearchWords(li,docnum,IndentLuceneIndex.FIELD_DESCRIPTION);
    	logSearchWords(li,docnum,IndentLuceneIndex.FIELD_CONTENTS);
    	
    	Document doc = li.searchForDocument(app,id);
    	Query q = RelevanceUtils.getRelevanceQuerySimple("contents",getSearchWords(li,doc,docnum));
    	return li.searchForDocsUsingQuery(q);
    }
    /**
     * Override this method to optimize your search
     * @param li
     * @param doc
     * @param docnum
     * @return
     * @throws IOException
     */
    protected List getSearchWords(IndentLuceneIndex li, Document doc,int docnum)
    throws IOException
    {
    	List sampleList = new ArrayList();
    	String titleWords[] = li.getTermVectors(docnum,IndentLuceneIndex.FIELD_TITLE).getTerms();
    	String descWords[] = li.getTermVectors(docnum,IndentLuceneIndex.FIELD_DESCRIPTION).getTerms();
    	for(int i=0;i<titleWords.length;i++)
    	{
    		sampleList.add(titleWords[i]);
    		AppObjects.info(this,"Adding searchword:" + titleWords[i]);
    	}
    	for(int i=0;i<descWords.length;i++)
    	{
    		sampleList.add(descWords[i]);
    		AppObjects.info(this,"Adding searchword:" + descWords[i]);
    	}
    	
    	return sampleList;
    }
    
    private void logSearchWords(IndentLuceneIndex li, int  docnum, String fieldName)
    throws IOException
    {
//    	TermFreqVector tfv = li.getTermVectors(docnum,IndentLuceneIndex.FIELD_DESCRIPTION);
    	TermFreqVector tfv = li.getTermVectors(docnum,fieldName);
		String words[] = tfv.getTerms();
		AppObjects.log("Number of terms:" + words.length + " in " + fieldName);
		//AppObjects.log(words.toString());
    }
}//eof-class