package com.indent.lucene.similarity; import com.ai.application.interfaces.*; import java.io.*; import java.util.*; import org.apache.lucene.document.Document; import org.apache.lucene.index.TermFreqVector; import org.apache.lucene.search.Query; import com.ai.application.utils.*; import com.ai.data.*; import com.ai.common.*; /** * LocateSimilarDocumentsPart * ****************************** * 1. Searches for similar documents based on input * 2. Collects term frequency vector from description to search for similar documents * 3. The returned documents are packaged as an IDataCollection * 4. This will allow for using this part in page design directly * * Expected input args * ****************************** * app: Indent app name * id: Indent lucene document id * numofdocs: Maximum number of similar docs to be returned * * Output/Behaviour * 1. Returns IDataCollection * 2. resultName: Completed hello word string * 3. Will write a debug message to the log * */ public class LocateSimilarDocumentsPart2 extends AFactoryPart { protected Object executeRequestForPart(String requestName, Map inArgs) throws RequestExecutionException { IndentLuceneIndex li = null; try { li = (IndentLuceneIndex)AppObjects.getObject("indentluceneindex",null); //Collect input args String app = (String)inArgs.get("app"); String id = (String)inArgs.get("id"); String numOfDocs = (String)inArgs.get("numofdocs"); int iNumOfDocs = Integer.parseInt(numOfDocs); List documentList = getSimilarDocuments(li,app,id,iNumOfDocs); AppObjects.log("Number of documents found:" + documentList.size()); AppObjects.log("Number of documents requested:" + iNumOfDocs); Iterator itr = documentList.iterator(); int stopCount = 0; while(itr.hasNext()) { Document doc = (Document)itr.next(); li.printDocDetails(doc); stopCount++; if (stopCount >= iNumOfDocs) { AppObjects.log("Number of documents quota fulfilled:" + iNumOfDocs); break; } } return getDocumentCollection(documentList); } catch(IOException x) { throw new RequestExecutionException("Error:Getting similar documents from IndentLuceneIndex",x); } finally { if (li != null) { li.closeSessionQuietly(); } } }//eof-function /** * Construct an IDataCollection from the * lucene document list and return. * * @param documentList * @return */ private IDataCollection getDocumentCollection(List documentList) { Vector columnNamesVector = new Vector(); columnNamesVector.add(IndentLuceneIndex.FIELD_ID); columnNamesVector.add(IndentLuceneIndex.FIELD_APP); columnNamesVector.add(IndentLuceneIndex.FIELD_DOC); columnNamesVector.add(IndentLuceneIndex.FIELD_TITLE); columnNamesVector.add(IndentLuceneIndex.FIELD_DESCRIPTION); ListDataCollection luceneDocumentCollection = new ListDataCollection(columnNamesVector); //Fill it up with rows Iterator luceneDocItr = documentList.iterator(); while(luceneDocItr.hasNext()) { Document doc = (Document)luceneDocItr.next(); IDataRow collectionRow = getDataRow(doc ,new VectorMetaData(columnNamesVector)); luceneDocumentCollection.addDataRow(collectionRow); } return luceneDocumentCollection; } private IDataRow getDataRow(Document luceneDoc, IMetaData columnMetaData) { List columnValues = new ArrayList(); columnValues.add(luceneDoc.get(IndentLuceneIndex.FIELD_ID)); columnValues.add(luceneDoc.get(IndentLuceneIndex.FIELD_APP)); columnValues.add(luceneDoc.get(IndentLuceneIndex.FIELD_DOC)); columnValues.add(luceneDoc.get(IndentLuceneIndex.FIELD_TITLE)); columnValues.add(luceneDoc.get(IndentLuceneIndex.FIELD_DESCRIPTION)); return new ListDataRow(columnMetaData,columnValues); } public List getSimilarDocuments(IndentLuceneIndex li, String app, String id, int numOfDocs) throws IOException { int docnum = li.searchForDocumentNumber(app,id); logSearchWords(li,docnum,IndentLuceneIndex.FIELD_TITLE); logSearchWords(li,docnum,IndentLuceneIndex.FIELD_DESCRIPTION); logSearchWords(li,docnum,IndentLuceneIndex.FIELD_CONTENTS); Document doc = li.searchForDocument(app,id); Query q = RelevanceUtils.getRelevanceQuerySimple("contents",getSearchWords(li,doc,docnum)); return li.searchForDocsUsingQuery(q); } /** * Override this method to optimize your search * @param li * @param doc * @param docnum * @return * @throws IOException */ protected List getSearchWords(IndentLuceneIndex li, Document doc,int docnum) throws IOException { List sampleList = new ArrayList(); String titleWords[] = li.getTermVectors(docnum,IndentLuceneIndex.FIELD_TITLE).getTerms(); String descWords[] = li.getTermVectors(docnum,IndentLuceneIndex.FIELD_DESCRIPTION).getTerms(); for(int i=0;i<titleWords.length;i++) { sampleList.add(titleWords[i]); AppObjects.info(this,"Adding searchword:" + titleWords[i]); } for(int i=0;i<descWords.length;i++) { sampleList.add(descWords[i]); AppObjects.info(this,"Adding searchword:" + descWords[i]); } return sampleList; } private void logSearchWords(IndentLuceneIndex li, int docnum, String fieldName) throws IOException { // TermFreqVector tfv = li.getTermVectors(docnum,IndentLuceneIndex.FIELD_DESCRIPTION); TermFreqVector tfv = li.getTermVectors(docnum,fieldName); String words[] = tfv.getTerms(); AppObjects.log("Number of terms:" + words.length + " in " + fieldName); //AppObjects.log(words.toString()); } }//eof-class