This is an automated email from the git hooks/post-receive script. New commit to branch feature/7852-suppression-mots-liaison-nuage in repository coselmar. See http://git.codelutin.com/coselmar.git commit 5f6bbc8d98f774497f1b536abe34b1f34fb89f02 Author: Yannick Martel <martel@©odelutin.com> Date: Tue Jan 5 15:15:56 2016 +0100 Revue de l'indexation : utilisation du FrenchAnalyzer et refonte de la façon de stocker et requeter --- .../indexation/DocumentsIndexationService.java | 140 ++++++++++----------- .../coselmar/services/indexation/LuceneUtils.java | 12 +- .../indexation/QuestionsIndexationService.java | 122 ++++++++++-------- .../indexation/TransverseIndexationService.java | 49 ++++++-- .../coselmar/services/v1/AdminWebService.java | 3 +- .../coselmar/services/v1/DocumentsWebService.java | 2 +- .../coselmar/services/v1/QuestionsWebService.java | 6 +- .../indexation/DocumentsIndexationServiceTest.java | 6 +- coselmar-ui/src/main/webapp/index.html | 2 +- .../src/main/webapp/js/coselmar-admin-services.js | 2 +- 10 files changed, 202 insertions(+), 142 deletions(-) diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java index 144f4a2..c771dd2 100644 --- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java @@ -38,12 +38,9 @@ import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.ParseException; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.WildcardQuery; /** * This Services provides operation about {@link fr.ifremer.coselmar.persistence.entity.Document} @@ -67,21 +64,28 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { protected static final String DOCUMENT_SUMMARY_INDEX_PROPERTY = "documentSummary"; protected static final String DOCUMENT_KEYWORD_INDEX_PROPERTY = "documentKeyword"; protected static final String DOCUMENT_TYPE = "documentindextype"; + // Not Analyzed value, used for top words + protected static final String NOT_ANALYZED_DOCUMENT_KEYWORD_INDEX_PROPERTY = "notAnalyzedDocumentKeyword"; public void indexDocument(DocumentBean document) throws IOException { Document doc = new Document(); doc.add(new StringField(DOCUMENT_ID_INDEX_PROPERTY, document.getId(), Field.Store.YES)); - doc.add(new TextField(DOCUMENT_NAME_INDEX_PROPERTY, document.getName(), Field.Store.YES)); - doc.add(new TextField(DOCUMENT_AUTHORS_INDEX_PROPERTY, document.getAuthors(), Field.Store.YES)); - doc.add(new TextField(DOCUMENT_SUMMARY_INDEX_PROPERTY, document.getSummary(), Field.Store.YES)); +// doc.add(new TextField(DOCUMENT_NAME_INDEX_PROPERTY, document.getName(), Field.Store.YES)); +// doc.add(new TextField(DOCUMENT_AUTHORS_INDEX_PROPERTY, document.getAuthors(), Field.Store.YES)); +// doc.add(new TextField(DOCUMENT_SUMMARY_INDEX_PROPERTY, document.getSummary(), Field.Store.YES)); + doc.add(new Field(DOCUMENT_NAME_INDEX_PROPERTY, document.getName(), TransverseIndexationService.TYPE_STORED_TOKENIZED)); + doc.add(new Field(DOCUMENT_AUTHORS_INDEX_PROPERTY, document.getAuthors(), TransverseIndexationService.TYPE_STORED_TOKENIZED)); + doc.add(new Field(DOCUMENT_SUMMARY_INDEX_PROPERTY, document.getSummary(), TransverseIndexationService.TYPE_STORED_TOKENIZED)); doc.add(new Field("type", DOCUMENT_TYPE, TextField.TYPE_STORED)); Set<String> keywords = document.getKeywords(); if (keywords != null) { for (String keyword : keywords) { - doc.add(new Field(DOCUMENT_KEYWORD_INDEX_PROPERTY, keyword, TextField.TYPE_STORED)); +// doc.add(new TextField(DOCUMENT_KEYWORD_INDEX_PROPERTY, keyword, Field.Store.NO)); + doc.add(new Field(DOCUMENT_KEYWORD_INDEX_PROPERTY, keyword.toLowerCase(), TransverseIndexationService.TYPE_STORED_TOKENIZED)); + doc.add(new Field(NOT_ANALYZED_DOCUMENT_KEYWORD_INDEX_PROPERTY, keyword.toLowerCase(), TransverseIndexationService.TYPE_STORED_NO_TOKENIZED)); } } @@ -96,33 +100,26 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { String[] words = text.split(" "); - // Parse a simple query that searches for the "text": - BooleanQuery query = new BooleanQuery(); + QueryParser queryParser = getLuceneUtils().getQueryParser(DOCUMENT_SUMMARY_INDEX_PROPERTY); + + // Create specific query for each fields with "AND" clause inside them, and "OR" clause between them + String nameQuery = ""; + String summaryQuery = ""; + String authorsQuery = ""; + String keywordsQuery = ""; - BooleanQuery nameQuery = new BooleanQuery(); - BooleanQuery summaryQuery = new BooleanQuery(); - BooleanQuery authorsQuery = new BooleanQuery(); for (String word : words) { - String wildWord = String.format("*%s*", word.toLowerCase()); - nameQuery.add(new WildcardQuery(new Term(DOCUMENT_NAME_INDEX_PROPERTY, wildWord)), BooleanClause.Occur.MUST); - summaryQuery.add(new WildcardQuery(new Term(DOCUMENT_SUMMARY_INDEX_PROPERTY, wildWord)), BooleanClause.Occur.MUST); - authorsQuery.add(new WildcardQuery(new Term(DOCUMENT_AUTHORS_INDEX_PROPERTY, wildWord)), BooleanClause.Occur.MUST); + String wildWord = String.format("%s", word.toLowerCase()); + nameQuery = String.format("%s (%s:%s OR %s:%s*)", nameQuery.isEmpty() ? nameQuery : nameQuery + " AND ", DOCUMENT_NAME_INDEX_PROPERTY, wildWord, DOCUMENT_NAME_INDEX_PROPERTY, wildWord.substring(0, wildWord.length() - 1)); + summaryQuery = String.format("%s (%s:%s OR %s:%s*)", summaryQuery.isEmpty() ? summaryQuery : summaryQuery + " AND ", DOCUMENT_SUMMARY_INDEX_PROPERTY, wildWord, DOCUMENT_SUMMARY_INDEX_PROPERTY, wildWord.substring(0, wildWord.length() -1)); + authorsQuery = String.format("%s (%s:%s OR %s:%s*)", authorsQuery.isEmpty() ? authorsQuery : authorsQuery + " AND ", DOCUMENT_AUTHORS_INDEX_PROPERTY, wildWord, DOCUMENT_AUTHORS_INDEX_PROPERTY, wildWord.substring(0, wildWord.length() -1)); } + keywordsQuery = String.format("%s:'%s'", DOCUMENT_KEYWORD_INDEX_PROPERTY, text.toLowerCase()); - query.add(nameQuery, BooleanClause.Occur.SHOULD); - query.add(summaryQuery, BooleanClause.Occur.SHOULD); - query.add(authorsQuery, BooleanClause.Occur.SHOULD); - - query.add(new TermQuery(new Term(DOCUMENT_KEYWORD_INDEX_PROPERTY, text.toLowerCase())), BooleanClause.Occur.SHOULD); - + String fullQuery = String.format("((type:%s) AND ((%s) OR (%s) OR (%s) OR (%s)))", DOCUMENT_TYPE, nameQuery, summaryQuery, authorsQuery, keywordsQuery); - // Combine that with the type - BooleanQuery fullQuery = new BooleanQuery(); - fullQuery.add(query, BooleanClause.Occur.MUST); - fullQuery.add(new TermQuery(new Term("type", DOCUMENT_TYPE)), BooleanClause.Occur.MUST); - - ScoreDoc[] hits = isearcher.search(fullQuery, null, 1000).scoreDocs; + ScoreDoc[] hits = isearcher.search(queryParser.parse(fullQuery), null, 1000).scoreDocs; List<String> documentIds = new ArrayList(hits.length); @@ -140,43 +137,36 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { DirectoryReader ireader = DirectoryReader.open(getLuceneUtils().getIndexWriter(), false); IndexSearcher isearcher = new IndexSearcher(ireader); + QueryParser queryParser = getLuceneUtils().getQueryParser(DOCUMENT_SUMMARY_INDEX_PROPERTY); - BooleanQuery keywordsQuery = new BooleanQuery(); + String query = ""; for (String text : texts) { String[] words = text.split(" "); // Parse a simple query that searches for the "text": - BooleanQuery query = new BooleanQuery(); - - BooleanQuery nameQuery = new BooleanQuery(); - BooleanQuery summaryQuery = new BooleanQuery(); - BooleanQuery authorsQuery = new BooleanQuery(); + String nameQuery = ""; + String summaryQuery = ""; + String authorsQuery = ""; for (String word : words) { - String wildWord = "*" + word.toLowerCase() + "*"; - nameQuery.add(new WildcardQuery(new Term(DOCUMENT_NAME_INDEX_PROPERTY, wildWord)), BooleanClause.Occur.MUST); - summaryQuery.add(new WildcardQuery(new Term(DOCUMENT_SUMMARY_INDEX_PROPERTY, wildWord)), BooleanClause.Occur.MUST); - authorsQuery.add(new WildcardQuery(new Term(DOCUMENT_AUTHORS_INDEX_PROPERTY, wildWord)), BooleanClause.Occur.MUST); + String wildWord = String.format("%s", word.toLowerCase()); + nameQuery = String.format("%s (%s:%s OR %s:%s*)", nameQuery.isEmpty() ? "" : nameQuery + " AND ", DOCUMENT_NAME_INDEX_PROPERTY, wildWord, DOCUMENT_NAME_INDEX_PROPERTY, wildWord.substring(0, wildWord.length() - 1)); + summaryQuery = String.format("%s (%s:%s OR %s:%s*)", summaryQuery.isEmpty() ? "" : summaryQuery + " AND ", DOCUMENT_SUMMARY_INDEX_PROPERTY, wildWord, DOCUMENT_SUMMARY_INDEX_PROPERTY, wildWord.substring(0, wildWord.length() -1)); + authorsQuery = String.format("%s (%s:%s OR %s:%s*)", authorsQuery.isEmpty() ? "" : authorsQuery + " AND ", DOCUMENT_AUTHORS_INDEX_PROPERTY, wildWord, DOCUMENT_AUTHORS_INDEX_PROPERTY, wildWord.substring(0, wildWord.length() -1)); } + String keywordsQuery = String.format("%s:'%s'", DOCUMENT_KEYWORD_INDEX_PROPERTY, text.toLowerCase()); - query.add(nameQuery, BooleanClause.Occur.SHOULD); - query.add(summaryQuery, BooleanClause.Occur.SHOULD); - query.add(authorsQuery, BooleanClause.Occur.SHOULD); - - query.add(new TermQuery(new Term(DOCUMENT_KEYWORD_INDEX_PROPERTY, text.toLowerCase())), BooleanClause.Occur.SHOULD); - + String fullQuery = String.format("((type:%s) AND ((%s) OR (%s) OR (%s) OR (%s)))", DOCUMENT_TYPE, nameQuery, summaryQuery, authorsQuery, keywordsQuery); // Combine that with the type //XXX ymartel : put to Occur.SHOULD to make an "OR" - keywordsQuery.add(query, BooleanClause.Occur.MUST); + query = String.format("%s (%s)", query.isEmpty() ? "" : query + " AND ", fullQuery); } - BooleanQuery fullQuery = new BooleanQuery(); - fullQuery.add(keywordsQuery, BooleanClause.Occur.MUST); - fullQuery.add(new TermQuery(new Term("type", DOCUMENT_TYPE)), BooleanClause.Occur.MUST); + String completeQuery = String.format("%s AND type:%s", query, DOCUMENT_TYPE); - ScoreDoc[] hits = isearcher.search(fullQuery, null, 1000).scoreDocs; + ScoreDoc[] hits = isearcher.search(queryParser.parse(completeQuery), null, 1000).scoreDocs; List<String> documentIds = new ArrayList(hits.length); @@ -190,28 +180,35 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { return documentIds; } - public void updateDocument(DocumentBean document) throws IOException { + public void updateDocument(DocumentBean document) throws IOException, ParseException { DirectoryReader ireader = DirectoryReader.open(getLuceneUtils().getIndexWriter(), false); IndexSearcher isearcher = new IndexSearcher(ireader); // Retrieve document - BooleanQuery query = new BooleanQuery(); - query.add(new TermQuery(new Term(DOCUMENT_ID_INDEX_PROPERTY, document.getId())), BooleanClause.Occur.MUST); - query.add(new TermQuery(new Term("type", DOCUMENT_TYPE)), BooleanClause.Occur.MUST); + QueryParser queryParser = getLuceneUtils().getQueryParser(DOCUMENT_ID_INDEX_PROPERTY); + + // Query is +documentId:document.getId() +type=documentType + String query = String.format("+%s:%s +type:%s", DOCUMENT_ID_INDEX_PROPERTY, document.getId(), DOCUMENT_TYPE); + + ScoreDoc[] hits = isearcher.search(queryParser.parse(query), null, 1000).scoreDocs; - ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs; if (hits.length > 0) { Document doc = new Document(); doc.add(new StringField(DOCUMENT_ID_INDEX_PROPERTY, document.getId(), Field.Store.YES)); - doc.add(new TextField(DOCUMENT_NAME_INDEX_PROPERTY, document.getName(), Field.Store.YES)); - doc.add(new TextField(DOCUMENT_AUTHORS_INDEX_PROPERTY, document.getAuthors(), Field.Store.YES)); - doc.add(new TextField(DOCUMENT_SUMMARY_INDEX_PROPERTY, document.getSummary(), Field.Store.YES)); +// doc.add(new TextField(DOCUMENT_NAME_INDEX_PROPERTY, document.getName(), Field.Store.YES)); +// doc.add(new TextField(DOCUMENT_AUTHORS_INDEX_PROPERTY, document.getAuthors(), Field.Store.YES)); +// doc.add(new TextField(DOCUMENT_SUMMARY_INDEX_PROPERTY, document.getSummary(), Field.Store.YES)); + doc.add(new Field(DOCUMENT_NAME_INDEX_PROPERTY, document.getName(), TransverseIndexationService.TYPE_STORED_TOKENIZED)); + doc.add(new Field(DOCUMENT_AUTHORS_INDEX_PROPERTY, document.getAuthors(), TransverseIndexationService.TYPE_STORED_TOKENIZED)); + doc.add(new Field(DOCUMENT_SUMMARY_INDEX_PROPERTY, document.getSummary(), TransverseIndexationService.TYPE_STORED_TOKENIZED)); Set<String> keywords = document.getKeywords(); if (keywords != null) { for (String keyword : keywords) { - doc.add(new Field(DOCUMENT_KEYWORD_INDEX_PROPERTY, keyword, TextField.TYPE_STORED)); +// doc.add(new Field(DOCUMENT_KEYWORD_INDEX_PROPERTY, keyword, TextField.TYPE_STORED)); + doc.add(new Field(DOCUMENT_KEYWORD_INDEX_PROPERTY, keyword.toLowerCase(), TransverseIndexationService.TYPE_STORED_TOKENIZED)); + doc.add(new Field(NOT_ANALYZED_DOCUMENT_KEYWORD_INDEX_PROPERTY, keyword.toLowerCase(), TransverseIndexationService.TYPE_STORED_NO_TOKENIZED)); } } @@ -224,25 +221,26 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { ireader.close(); } - public void deleteDocument(String documentId) throws IOException { + public void deleteDocument(String documentId) throws IOException, ParseException { // Retrieve document - BooleanQuery query = new BooleanQuery(); - query.add(new TermQuery(new Term(DOCUMENT_ID_INDEX_PROPERTY, documentId)), BooleanClause.Occur.MUST); - query.add(new TermQuery(new Term("type", DOCUMENT_TYPE)), BooleanClause.Occur.MUST); + QueryParser queryParser = getLuceneUtils().getQueryParser(DOCUMENT_ID_INDEX_PROPERTY); - getLuceneUtils().getIndexWriter().deleteDocuments(query); + // Query is +documentId:{{documentId}} +type=documentType + String query = String.format("+%s:%s +type:%s", DOCUMENT_ID_INDEX_PROPERTY, documentId, DOCUMENT_TYPE); + + getLuceneUtils().getIndexWriter().deleteDocuments(queryParser.parse(query)); getLuceneUtils().getIndexWriter().commit(); } - protected void cleanIndex() throws IOException { - BooleanQuery query = new BooleanQuery.Builder() - .add(new TermQuery(new Term("type", DOCUMENT_TYPE)), BooleanClause.Occur.MUST) - //XXX ymartel 20151215 : Clean older DOCUMENT_TYPE value too (less or equals V1.0.1), should be removed after V2.0 - .add(new TermQuery(new Term("type", "document")), BooleanClause.Occur.SHOULD) - .build(); - getLuceneUtils().getIndexWriter().deleteDocuments(query); + protected void cleanIndex() throws IOException, ParseException { + QueryParser queryParser = getLuceneUtils().getQueryParser(DOCUMENT_ID_INDEX_PROPERTY); + + // Query is on documentType, and should clear older DOCUMENT_TYPE value too (less or equals V1.0.1), should be removed after V2.0 + String query = String.format("type:%s OR type:%s", DOCUMENT_TYPE, "document"); + + getLuceneUtils().getIndexWriter().deleteDocuments(queryParser.parse(query)); getLuceneUtils().getIndexWriter().commit(); } diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/LuceneUtils.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/LuceneUtils.java index c471fac..bb5a36e 100644 --- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/LuceneUtils.java +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/LuceneUtils.java @@ -31,9 +31,10 @@ import fr.ifremer.coselmar.config.CoselmarServicesConfig; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.core.SimpleAnalyzer; +import org.apache.lucene.analysis.fr.FrenchAnalyzer; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.store.Directory; import org.apache.lucene.store.NIOFSDirectory; @@ -58,7 +59,8 @@ public class LuceneUtils { if (analyzer == null) { // analyzer = new StandardAnalyzer(); //Use simple analyzer to index all words and be able to search with "close word" classified in StandardAnalyzer - analyzer = new SimpleAnalyzer(); +// analyzer = new SimpleAnalyzer(); + analyzer = new FrenchAnalyzer(); } return analyzer; @@ -92,4 +94,10 @@ public class LuceneUtils { getIndexWriter().commit(); } + public QueryParser getQueryParser(String defaultField) { + QueryParser parser = new QueryParser(defaultField, getAnalyzer()); + parser.setAllowLeadingWildcard(true); + parser.setDefaultOperator(QueryParser.Operator.AND); + return parser; + } } diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/QuestionsIndexationService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/QuestionsIndexationService.java index b59fb41..892b159 100644 --- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/QuestionsIndexationService.java +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/QuestionsIndexationService.java @@ -26,26 +26,28 @@ package fr.ifremer.coselmar.services.indexation; import fr.ifremer.coselmar.beans.QuestionBean; import fr.ifremer.coselmar.beans.QuestionSearchBean; -import fr.ifremer.coselmar.beans.QuestionSearchExample; import fr.ifremer.coselmar.services.CoselmarSimpleServiceSupport; import org.apache.commons.lang3.StringUtils; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.Term; import org.apache.lucene.misc.HighFreqTerms; import org.apache.lucene.misc.HighFreqTermsMultiFields; import org.apache.lucene.misc.TermStats; import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.WildcardQuery; import java.io.IOException; import java.util.ArrayList; @@ -77,32 +79,39 @@ public class QuestionsIndexationService extends CoselmarSimpleServiceSupport { protected static final String QUESTION_STATUS_INDEX_PROPERTY = "questionStatus"; protected static final String QUESTION_PRIVACY_INDEX_PROPERTY = "questionPrivacy"; protected static final String DOCUMENT_TYPE = "questionindextype"; + // Not Analyzed value, used for top words + protected static final String NOT_ANALYZED_QUESTION_THEME_INDEX_PROPERTY = "notAnalyzedQuestionTheme"; - public void indexQuestion(QuestionBean question) throws IOException { + public void indexQuestion(QuestionBean question) throws IOException, ParseException { // First : try to find if already exist to update it DirectoryReader ireader = DirectoryReader.open(getLuceneUtils().getIndexWriter(), false); IndexSearcher isearcher = new IndexSearcher(ireader); // Retrieve document - BooleanQuery query = new BooleanQuery.Builder() - .add(new TermQuery(new Term(QUESTION_ID_INDEX_PROPERTY, question.getId())), BooleanClause.Occur.MUST) - .add(new TermQuery(new Term("type", DOCUMENT_TYPE)), BooleanClause.Occur.MUST) - .build(); + QueryParser queryParser = getLuceneUtils().getQueryParser(QUESTION_ID_INDEX_PROPERTY); + + // Query is +documentId:qiestion.getId() +type=questionType + String query = String.format("+%s:%s +type:%s", QUESTION_ID_INDEX_PROPERTY, question.getId(), DOCUMENT_TYPE); + + ScoreDoc[] hits = isearcher.search(queryParser.parse(query), 1000).scoreDocs; - ScoreDoc[] hits = isearcher.search(query, 1000).scoreDocs; if (hits.length > 0) { Document doc = new Document(); doc.add(new StringField(QUESTION_ID_INDEX_PROPERTY, question.getId(), Field.Store.YES)); - doc.add(new TextField(QUESTION_TITLE_INDEX_PROPERTY, question.getTitle(), Field.Store.YES)); - doc.add(new TextField(QUESTION_SUMMARY_INDEX_PROPERTY, question.getSummary(), Field.Store.YES)); +// doc.add(new TextField(QUESTION_TITLE_INDEX_PROPERTY, question.getTitle(), Field.Store.YES)); +// doc.add(new TextField(QUESTION_SUMMARY_INDEX_PROPERTY, question.getSummary(), Field.Store.YES)); + doc.add(new Field(QUESTION_TITLE_INDEX_PROPERTY, question.getTitle(), TransverseIndexationService.TYPE_STORED_TOKENIZED)); + doc.add(new Field(QUESTION_SUMMARY_INDEX_PROPERTY, question.getSummary(), TransverseIndexationService.TYPE_STORED_TOKENIZED)); Set<String> themes = question.getThemes(); if (themes != null) { for (String theme : themes) { - doc.add(new TextField(QUESTION_THEME_INDEX_PROPERTY, theme, Field.Store.YES)); +// doc.add(new TextField(QUESTION_THEME_INDEX_PROPERTY, theme, Field.Store.YES)); + doc.add(new Field(QUESTION_THEME_INDEX_PROPERTY, theme.toLowerCase(), TransverseIndexationService.TYPE_STORED_TOKENIZED)); + doc.add(new Field(NOT_ANALYZED_QUESTION_THEME_INDEX_PROPERTY, theme.toLowerCase(), TransverseIndexationService.TYPE_STORED_NO_TOKENIZED)); } } @@ -120,8 +129,10 @@ public class QuestionsIndexationService extends CoselmarSimpleServiceSupport { Document doc = new Document(); doc.add(new StringField(QUESTION_ID_INDEX_PROPERTY, question.getId(), Field.Store.YES)); - doc.add(new TextField(QUESTION_TITLE_INDEX_PROPERTY, question.getTitle(), Field.Store.YES)); - doc.add(new TextField(QUESTION_SUMMARY_INDEX_PROPERTY, question.getSummary(), Field.Store.YES)); +// doc.add(new TextField(QUESTION_TITLE_INDEX_PROPERTY, question.getTitle(), Field.Store.YES)); +// doc.add(new TextField(QUESTION_SUMMARY_INDEX_PROPERTY, question.getSummary(), Field.Store.YES)); + doc.add(new Field(QUESTION_TITLE_INDEX_PROPERTY, question.getTitle(), TransverseIndexationService.TYPE_STORED_TOKENIZED)); + doc.add(new Field(QUESTION_SUMMARY_INDEX_PROPERTY, question.getSummary(), TransverseIndexationService.TYPE_STORED_TOKENIZED)); doc.add(new TextField(QUESTION_STATUS_INDEX_PROPERTY, question.getStatus(), Field.Store.YES)); doc.add(new TextField(QUESTION_PRIVACY_INDEX_PROPERTY, question.getPrivacy(), Field.Store.YES)); @@ -129,7 +140,9 @@ public class QuestionsIndexationService extends CoselmarSimpleServiceSupport { Set<String> themes = question.getThemes(); if (themes != null) { for (String theme : themes) { - doc.add(new Field(QUESTION_THEME_INDEX_PROPERTY, theme, TextField.TYPE_STORED)); +// doc.add(new TextField(QUESTION_THEME_INDEX_PROPERTY, theme, Field.Store.YES)); + doc.add(new Field(QUESTION_THEME_INDEX_PROPERTY, theme.toLowerCase(), TransverseIndexationService.TYPE_STORED_TOKENIZED)); + doc.add(new Field(NOT_ANALYZED_QUESTION_THEME_INDEX_PROPERTY, theme.toLowerCase(), TransverseIndexationService.TYPE_STORED_NO_TOKENIZED)); } } @@ -149,24 +162,36 @@ public class QuestionsIndexationService extends CoselmarSimpleServiceSupport { DirectoryReader ireader = DirectoryReader.open(getLuceneUtils().getIndexWriter(), false); IndexSearcher isearcher = new IndexSearcher(ireader); - // Combine that with the type - BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); - queryBuilder.add(new TermQuery(new Term("type", DOCUMENT_TYPE)), BooleanClause.Occur.MUST); + QueryParser queryParser = getLuceneUtils().getQueryParser(QUESTION_SUMMARY_INDEX_PROPERTY); + + // First : query on the document type + String completeQuery = "type:" + DOCUMENT_TYPE; String searchPrivacy = searchBean.getPrivacy(); if(StringUtils.isNotBlank(searchPrivacy)) { - queryBuilder.add(new TermQuery(new Term(QUESTION_PRIVACY_INDEX_PROPERTY, searchPrivacy.toLowerCase())), BooleanClause.Occur.MUST); + completeQuery = String.format("%s AND %s:%s", completeQuery, QUESTION_PRIVACY_INDEX_PROPERTY, searchPrivacy.toLowerCase()); } String searchStatus = searchBean.getStatus(); if(StringUtils.isNotBlank(searchStatus)) { - queryBuilder.add(new TermQuery(new Term(QUESTION_STATUS_INDEX_PROPERTY, searchStatus.toLowerCase())), BooleanClause.Occur.MUST); + completeQuery = String.format("%s AND %s:%s", completeQuery, QUESTION_STATUS_INDEX_PROPERTY, searchStatus.toLowerCase()); + } + + String searchTitle = searchBean.getTitle(); + if(StringUtils.isNotBlank(searchTitle)) { + completeQuery = String.format("%s AND %s:%s", completeQuery, QUESTION_TITLE_INDEX_PROPERTY, searchTitle.toLowerCase()); + } + + String searchSummary = searchBean.getSummary(); + if(StringUtils.isNotBlank(searchSummary)) { + completeQuery = String.format("%s AND %s:%s", completeQuery, QUESTION_SUMMARY_INDEX_PROPERTY, searchSummary.toLowerCase()); } // Keywords part List<String> keywords = searchBean.getFullTextSearch(); if (keywords != null && !keywords.isEmpty()) { - BooleanQuery.Builder keywordsQueryBuilder = new BooleanQuery.Builder(); + + String keywordsQuery = ""; for (String text : keywords) { @@ -174,34 +199,26 @@ public class QuestionsIndexationService extends CoselmarSimpleServiceSupport { // Parse a simple query that searches for the "text": - BooleanQuery.Builder nameQueryBuilder = new BooleanQuery.Builder(); - BooleanQuery.Builder summaryQueryBuilder = new BooleanQuery.Builder(); + String titleQuery = ""; + String summaryQuery = ""; for (String word : words) { - String wildWord = String.format("*%s*", word.toLowerCase()); - nameQueryBuilder.add(new WildcardQuery(new Term(QUESTION_TITLE_INDEX_PROPERTY, wildWord)), BooleanClause.Occur.MUST); - summaryQueryBuilder.add(new WildcardQuery(new Term(QUESTION_SUMMARY_INDEX_PROPERTY, wildWord)), BooleanClause.Occur.MUST); + String wildWord = String.format("%s", word.toLowerCase()); + titleQuery = String.format("%s (%s:%s OR %s:%s*)", titleQuery.isEmpty() ? "" : titleQuery + " AND ", QUESTION_TITLE_INDEX_PROPERTY, wildWord, QUESTION_TITLE_INDEX_PROPERTY, wildWord.substring(0, wildWord.length() - 1)); + summaryQuery = String.format("%s (%s:%s OR %s:%s*)", summaryQuery.isEmpty() ? "" : summaryQuery + " AND ", QUESTION_SUMMARY_INDEX_PROPERTY, wildWord, QUESTION_SUMMARY_INDEX_PROPERTY, wildWord.substring(0, wildWord.length() -1)); } - BooleanQuery nameQuery = nameQueryBuilder.build(); - BooleanQuery summaryQuery = summaryQueryBuilder.build(); - + String themeQuery = String.format("%s:'%s'", QUESTION_THEME_INDEX_PROPERTY, text.toLowerCase()); - BooleanQuery query = new BooleanQuery.Builder() - .add(nameQuery, BooleanClause.Occur.SHOULD) - .add(summaryQuery, BooleanClause.Occur.SHOULD) - .add(new TermQuery(new Term(QUESTION_THEME_INDEX_PROPERTY, text.toLowerCase())), BooleanClause.Occur.SHOULD) - .build(); + keywordsQuery = String.format("%s ( (%s) OR (%s) OR (%s) )", keywordsQuery.isEmpty() ? "" : keywordsQuery + " AND ", titleQuery, summaryQuery, themeQuery); - keywordsQueryBuilder.add(query, BooleanClause.Occur.MUST); } - BooleanQuery keywordsQuery = keywordsQueryBuilder.build(); // add to complete query - queryBuilder.add(keywordsQuery, BooleanClause.Occur.MUST); + completeQuery = String.format("%s AND (%s)", completeQuery, keywordsQuery); } - BooleanQuery fullQuery = queryBuilder.build(); + Query fullQuery = queryParser.parse(completeQuery); ScoreDoc[] hits = isearcher.search(fullQuery, 1000).scoreDocs; List<String> documentIds = new ArrayList(hits.length); @@ -216,25 +233,29 @@ public class QuestionsIndexationService extends CoselmarSimpleServiceSupport { return documentIds; } - public void deleteQuestion(String documentId) throws IOException { + public void deleteQuestion(String questionId) throws IOException, ParseException { // Retrieve document - BooleanQuery query = new BooleanQuery.Builder() - .add(new TermQuery(new Term(QUESTION_ID_INDEX_PROPERTY, documentId)), BooleanClause.Occur.MUST) - .add(new TermQuery(new Term("type", DOCUMENT_TYPE)), BooleanClause.Occur.MUST) - .build(); + QueryParser queryParser = getLuceneUtils().getQueryParser(QUESTION_ID_INDEX_PROPERTY); + + // Query is +documentId:questionId +type=documentType + String stringQuery = String.format("+%s:%s +type:%s", QUESTION_ID_INDEX_PROPERTY, questionId, DOCUMENT_TYPE); + + Query query = queryParser.parse(stringQuery); getLuceneUtils().getIndexWriter().deleteDocuments(query); getLuceneUtils().getIndexWriter().commit(); } - protected void cleanIndex() throws IOException { - BooleanQuery query = new BooleanQuery.Builder() - .add(new TermQuery(new Term("type", DOCUMENT_TYPE)), BooleanClause.Occur.SHOULD) - //XXX ymartel 20151215 : Clean older DOCUMENT_TYPE value too (less or equals V1.0.1), should be removed after V2.0 - .add(new TermQuery(new Term("type", "question")), BooleanClause.Occur.SHOULD) - .build(); + protected void cleanIndex() throws IOException, ParseException { + QueryParser queryParser = getLuceneUtils().getQueryParser(QUESTION_ID_INDEX_PROPERTY); + + // Query is on questionType, and should clear older DOCUMENT_TYPE value too (less or equals V1.0.1), should be removed after V2.0 + String stringQuery = String.format("type:%s OR type:%s", DOCUMENT_TYPE, "question"); + + Query query = queryParser.parse(stringQuery); + getLuceneUtils().getIndexWriter().deleteDocuments(query); getLuceneUtils().getIndexWriter().commit(); } @@ -245,12 +266,13 @@ public class QuestionsIndexationService extends CoselmarSimpleServiceSupport { Map<String, Long> result = new LinkedHashMap<>(); try { - String[] searchedFields = {QUESTION_TITLE_INDEX_PROPERTY, QUESTION_SUMMARY_INDEX_PROPERTY, QUESTION_THEME_INDEX_PROPERTY}; - TermStats[] highFreqTerms = HighFreqTermsMultiFields.getHighFreqTermsMultiFields(indexReader, 20, searchedFields, new HighFreqTerms.TotalTermFreqComparator()); + String[] searchedFields = {QUESTION_TITLE_INDEX_PROPERTY, QUESTION_SUMMARY_INDEX_PROPERTY, NOT_ANALYZED_QUESTION_THEME_INDEX_PROPERTY}; + TermStats[] highFreqTerms = HighFreqTermsMultiFields.getHighFreqTermsMultiFields(indexReader, 50, searchedFields, new HighFreqTerms.TotalTermFreqComparator()); for (TermStats termStats : highFreqTerms) { long totalTermFreq = termStats.totalTermFreq; String value = termStats.termtext.utf8ToString(); + if (result.containsKey(value)) { result.put(value, result.get(value) + totalTermFreq); } else { diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/TransverseIndexationService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/TransverseIndexationService.java index 7510e9a..7ad917c 100644 --- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/TransverseIndexationService.java +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/TransverseIndexationService.java @@ -26,14 +26,18 @@ package fr.ifremer.coselmar.services.indexation; import fr.ifremer.coselmar.beans.QuestionBean; import fr.ifremer.coselmar.services.CoselmarSimpleServiceSupport; +import org.apache.lucene.document.FieldType; import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.Term; import org.apache.lucene.misc.HighFreqTerms; import org.apache.lucene.misc.HighFreqTermsMultiFields; import org.apache.lucene.misc.TermStats; import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import java.io.IOException; @@ -54,14 +58,37 @@ import java.util.Map; */ public class TransverseIndexationService extends CoselmarSimpleServiceSupport { - protected void cleanAllIndex() throws IOException { - BooleanQuery query = new BooleanQuery.Builder() - .add(new TermQuery(new Term("type", QuestionsIndexationService.DOCUMENT_TYPE)), BooleanClause.Occur.SHOULD) - .add(new TermQuery(new Term("type", DocumentsIndexationService.DOCUMENT_TYPE)), BooleanClause.Occur.SHOULD) - //XXX ymartel 20151215 : Clean older DOCUMENT_TYPE value too (less or equals V1.0.1), should be removed after V2.0 - .add(new TermQuery(new Term("type", "question")), BooleanClause.Occur.SHOULD) - .add(new TermQuery(new Term("type", "document")), BooleanClause.Occur.SHOULD) - .build(); + /** Indexed, no tokenized, stored Field Type. */ + public static final FieldType TYPE_STORED_NO_TOKENIZED = new FieldType(); + public static final FieldType TYPE_STORED_TOKENIZED = new FieldType(); + + static { + TYPE_STORED_NO_TOKENIZED.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + TYPE_STORED_NO_TOKENIZED.setTokenized(false); + TYPE_STORED_NO_TOKENIZED.setStored(true); + TYPE_STORED_NO_TOKENIZED.setStoreTermVectors(true); + TYPE_STORED_NO_TOKENIZED.freeze(); + + TYPE_STORED_TOKENIZED.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + TYPE_STORED_TOKENIZED.setTokenized(true); + TYPE_STORED_TOKENIZED.setStored(true); + TYPE_STORED_TOKENIZED.setStoreTermVectors(true); + TYPE_STORED_TOKENIZED.freeze(); + } + + protected void cleanAllIndex() throws IOException, ParseException { + QueryParser queryParser = getLuceneUtils().getQueryParser(null); + + // Query is on documentType, and should clear older DOCUMENT_TYPE value too (less or equals V1.0.1), should be removed after V2.0 + String cleanQuery = String.format("type:%s OR type:%s OR type:%s OR type:%s", + QuestionsIndexationService.DOCUMENT_TYPE, + DocumentsIndexationService.DOCUMENT_TYPE, + //XXX ymartel 20151215 : Clean older DOCUMENT_TYPE value too (less or equals V1.0.1), should be removed after V2.0 + "question", + "document"); + + Query query = queryParser.parse(cleanQuery); + getLuceneUtils().getIndexWriter().deleteDocuments(query); getLuceneUtils().getIndexWriter().commit(); } @@ -75,10 +102,12 @@ public class TransverseIndexationService extends CoselmarSimpleServiceSupport { String[] searchedFields = { QuestionsIndexationService.QUESTION_TITLE_INDEX_PROPERTY, QuestionsIndexationService.QUESTION_SUMMARY_INDEX_PROPERTY, - QuestionsIndexationService.QUESTION_THEME_INDEX_PROPERTY, +// QuestionsIndexationService.QUESTION_THEME_INDEX_PROPERTY, + QuestionsIndexationService.NOT_ANALYZED_QUESTION_THEME_INDEX_PROPERTY, DocumentsIndexationService.DOCUMENT_NAME_INDEX_PROPERTY, DocumentsIndexationService.DOCUMENT_SUMMARY_INDEX_PROPERTY, - DocumentsIndexationService.DOCUMENT_KEYWORD_INDEX_PROPERTY +// DocumentsIndexationService.DOCUMENT_KEYWORD_INDEX_PROPERTY + DocumentsIndexationService.NOT_ANALYZED_DOCUMENT_KEYWORD_INDEX_PROPERTY }; TermStats[] highFreqTerms = HighFreqTermsMultiFields.getHighFreqTermsMultiFields(indexReader, 20, searchedFields, new HighFreqTerms.TotalTermFreqComparator()); for (TermStats termStats : highFreqTerms) { diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/AdminWebService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/AdminWebService.java index 675b49a..8ee2767 100644 --- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/AdminWebService.java +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/AdminWebService.java @@ -42,6 +42,7 @@ import fr.ifremer.coselmar.services.indexation.DocumentsIndexationService; import fr.ifremer.coselmar.services.indexation.QuestionsIndexationService; import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; +import org.apache.lucene.queryparser.classic.ParseException; import static org.apache.commons.logging.LogFactory.getLog; @@ -92,7 +93,7 @@ public class AdminWebService extends CoselmarWebServiceSupport { String message = String.format("Index was refreshed"); log.debug(message); } - } catch (IOException e) { + } catch (IOException | ParseException e) { if (log.isErrorEnabled()) { log.error("Unable to index new document", e); } diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/DocumentsWebService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/DocumentsWebService.java index 91f56cf..93be6ba 100644 --- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/DocumentsWebService.java +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/DocumentsWebService.java @@ -587,7 +587,7 @@ public class DocumentsWebService extends CoselmarWebServiceSupport { String message = String.format("Document '%s' removed from index", documentId); log.debug(message); } - } catch (IOException e) { + } catch (IOException | ParseException e) { if (log.isErrorEnabled()) { log.error("Unable to remove document entry from index", e); } diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/QuestionsWebService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/QuestionsWebService.java index d264076..590b35c 100644 --- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/QuestionsWebService.java +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/QuestionsWebService.java @@ -228,7 +228,7 @@ public class QuestionsWebService extends CoselmarWebServiceSupport { String message = String.format("Question '%s' added to index", result.getTitle()); log.debug(message); } - } catch (IOException e) { + } catch (IOException | ParseException e) { if (log.isErrorEnabled()) { log.error("Unable to index new question", e); } @@ -344,7 +344,7 @@ public class QuestionsWebService extends CoselmarWebServiceSupport { String message = String.format("Question '%s' deleted from index", questionId); log.debug(message); } - } catch (IOException e) { + } catch (IOException | ParseException e) { if (log.isErrorEnabled()) { log.error("Unable to remove question from index", e); } @@ -770,7 +770,7 @@ public class QuestionsWebService extends CoselmarWebServiceSupport { String message = String.format("Question '%s' added to index", result.getTitle()); log.debug(message); } - } catch (IOException e) { + } catch (IOException | ParseException e) { if (log.isErrorEnabled()) { log.error("Unable to index new question", e); } diff --git a/coselmar-rest/src/test/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationServiceTest.java b/coselmar-rest/src/test/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationServiceTest.java index c08624a..4ed6fa6 100644 --- a/coselmar-rest/src/test/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationServiceTest.java +++ b/coselmar-rest/src/test/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationServiceTest.java @@ -96,8 +96,10 @@ public class DocumentsIndexationServiceTest extends AbstractCoselmarServiceTest Assert.assertEquals("document3", documentMatchingPartOfSummaryIds.get(0)); List<String> documentMatchingPartOfWordIds = documentsIndexationService.searchDocuments("documenta"); - Assert.assertEquals(1, documentMatchingPartOfWordIds.size()); - Assert.assertEquals("document3", documentMatchingPartOfWordIds.get(0)); + Assert.assertEquals(3, documentMatchingPartOfWordIds.size()); + Assert.assertTrue(documentMatchingPartOfWordIds.contains("document1")); + Assert.assertTrue(documentMatchingPartOfWordIds.contains("document2")); + Assert.assertTrue(documentMatchingPartOfWordIds.contains("document3")); documentMatchingPartOfWordIds = documentsIndexationService.searchDocuments("Thi"); Assert.assertEquals(3, documentMatchingPartOfWordIds.size()); diff --git a/coselmar-ui/src/main/webapp/index.html b/coselmar-ui/src/main/webapp/index.html index d4b6fd7..f6feb96 100644 --- a/coselmar-ui/src/main/webapp/index.html +++ b/coselmar-ui/src/main/webapp/index.html @@ -163,7 +163,7 @@ </ul> </nav> - <nav class="visible-xs" collapse="!isCollapsed"> + <nav class="visible-xs" uib-collapse="!isCollapsed"> <ul class="nav navbar-nav"> <li ng-if="context.currentUser.role == 'ADMIN' || context.currentUser.role == 'SUPERVISOR'"> <a href="#users" ng-click="isCollapsed = !isCollapsed">{{ 'menu.item.users' | translate}}</a> diff --git a/coselmar-ui/src/main/webapp/js/coselmar-admin-services.js b/coselmar-ui/src/main/webapp/js/coselmar-admin-services.js index 77a8820..5f152a1 100644 --- a/coselmar-ui/src/main/webapp/js/coselmar-admin-services.js +++ b/coselmar-ui/src/main/webapp/js/coselmar-admin-services.js @@ -21,7 +21,7 @@ * <http://www.gnu.org/licenses/gpl-3.0.html>. * #L% */ -coselmarServices.factory('adminService', ['$resource', 'coselmar-config', function($resource, coselmarConfig){ +coselmarServices.factory('adminService', ['$resource', 'coselmarConfig', function($resource, coselmarConfig){ return new Admin($resource, coselmarConfig); }]); -- To stop receiving notification emails like this one, please contact codelutin.com SCM administrator <admin+scm@codelutin.com>.