[Coselmar-commits] 01/05: refs #9197 Utilisation de Apache Tika pour indexer les documents

24 May 2017

This is an automated email from the git hooks/post-receive script.

New commit to branch develop in repository coselmar.

See https://gitlab.nuiton.org/codelutin/coselmar.git

commit 8517e8f45186e4080ce56c7eb489d9592e2a4802
Author: Yannick Martel <martel@©odelutin.com>
Date:   Mon May 15 22:01:04 2017 +0200

    refs #9197 Utilisation de Apache Tika pour indexer les documents
---
 coselmar-rest/pom.xml                              |   8 +-
 .../indexation/DocumentsIndexationService.java     | 127 ++++++++++++++-------
 .../coselmar/services/indexation/LuceneUtils.java  |  14 ++-
 .../coselmar/services/v1/AdminWebService.java      |   2 +-
 .../coselmar/services/v1/DocumentsWebService.java  |  38 ++++--
 .../indexation/DocumentsIndexationServiceTest.java |  16 +--
 pom.xml                                            |  13 ++-
 7 files changed, 146 insertions(+), 72 deletions(-)

diff --git a/coselmar-rest/pom.xml b/coselmar-rest/pom.xml
index bcf9b30..76b67c6 100644
--- a/coselmar-rest/pom.xml
+++ b/coselmar-rest/pom.xml
@@ -138,7 +138,7 @@
     </dependency>
 
     <dependency>
-      <groupId>postgresql</groupId>
+      <groupId>org.postgresql</groupId>
       <artifactId>postgresql</artifactId>
       <scope>runtime</scope>
     </dependency>
@@ -165,6 +165,12 @@
         <artifactId>lucene-backward-codecs</artifactId>
     </dependency>
 
+    <!-- Tika for document indexation -->
+    <dependency>
+      <groupId>org.apache.tika</groupId>
+      <artifactId>tika-parsers</artifactId>
+    </dependency>
+
     <!-- Others -->
     <dependency>
       <groupId>com.github.spullara.mustache.java</groupId>
diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java
index 16bca16..92402fb 100644
--- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java
+++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java
@@ -24,6 +24,7 @@ package fr.ifremer.coselmar.services.indexation;
  * #L%
  */
 
+import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
@@ -32,6 +33,8 @@ import java.util.Set;
 import fr.ifremer.coselmar.beans.DocumentBean;
 import fr.ifremer.coselmar.services.CoselmarSimpleServiceSupport;
 import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.StringField;
@@ -45,6 +48,7 @@ import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.WildcardQuery;
+import org.apache.tika.exception.TikaException;
 
 /**
  * This Services provides operation about {@link fr.ifremer.coselmar.persistence.entity.Document}
@@ -62,6 +66,8 @@ import org.apache.lucene.search.WildcardQuery;
  */
 public class DocumentsIndexationService extends CoselmarSimpleServiceSupport {
 
+    private static final Log log = LogFactory.getLog(DocumentsIndexationService.class);
+
     protected static final String DOCUMENT_ID_INDEX_PROPERTY = "documentId";
     protected static final String DOCUMENT_NAME_INDEX_PROPERTY = "documentName";
     protected static final String DOCUMENT_AUTHORS_INDEX_PROPERTY = "documentAuthors";
@@ -70,9 +76,10 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport {
     protected static final String DOCUMENT_NAME_CLOUD_TAG_PROPERTY = "documentCloudTagName";
     protected static final String DOCUMENT_SUMMARY_CLOUD_TAG_PROPERTY = "documentCloudTagSummary";
     protected static final String DOCUMENT_KEYWORD_CLOUD_TAG_PROPERTY = "documentCloudTagKeyword";
+    protected static final String DOCUMENT_FILE_CONTENT_INDEX_PROPERTY = "documentFileContent";
     protected static final String DOCUMENT_TYPE = "documentindextype";
 
-    public void indexDocument(DocumentBean document) throws IOException {
+    public void indexDocument(DocumentBean document, String filepath) throws IOException {
 
         Document doc = new Document();
         doc.add(new StringField(DOCUMENT_ID_INDEX_PROPERTY, document.getId(), Field.Store.YES));
@@ -107,6 +114,20 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport {
             }
         }
 
+        if (StringUtils.isNotBlank(filepath)) {
+            try {
+                File documentFile = new File(filepath);
+                String parsedDocumentFile = getLuceneUtils().getTika().parseToString(documentFile);
+                doc.add(new Field(DOCUMENT_FILE_CONTENT_INDEX_PROPERTY, parsedDocumentFile, TextField.TYPE_STORED));
+            } catch (TikaException te) {
+                if (log.isErrorEnabled()) {
+                    String message = String.format("Unable to index document '%s'", filepath);
+                    log.error(message);
+                }
+            }
+
+        }
+
         getLuceneUtils().getIndexWriter().addDocument(doc);
         getLuceneUtils().getIndexWriter().commit();
 
@@ -119,32 +140,35 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport {
         String[] words = text.split(" ");
 
         // Parse a simple query that searches for the "text":
-        BooleanQuery query = new BooleanQuery();
+        BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
 
-        BooleanQuery nameQuery = new BooleanQuery();
-        BooleanQuery summaryQuery = new BooleanQuery();
-        BooleanQuery authorsQuery = new BooleanQuery();
+        BooleanQuery.Builder nameQueryBuilder = new BooleanQuery.Builder();
+        BooleanQuery.Builder summaryQueryBuilder = new BooleanQuery.Builder();
+        BooleanQuery.Builder authorsQueryBuilder = new BooleanQuery.Builder();
+        BooleanQuery.Builder documentFileQueryBuilder = new BooleanQuery.Builder();
 
         for (String word : words) {
             String wildWord = String.format("*%s*", word.toLowerCase());
-            nameQuery.add(new WildcardQuery(new Term(DOCUMENT_NAME_INDEX_PROPERTY, wildWord)), BooleanClause.Occur.MUST);
-            summaryQuery.add(new WildcardQuery(new Term(DOCUMENT_SUMMARY_INDEX_PROPERTY, wildWord)), BooleanClause.Occur.MUST);
-            authorsQuery.add(new WildcardQuery(new Term(DOCUMENT_AUTHORS_INDEX_PROPERTY, wildWord)), BooleanClause.Occur.MUST);
+            nameQueryBuilder.add(new WildcardQuery(new Term(DOCUMENT_NAME_INDEX_PROPERTY, wildWord)), BooleanClause.Occur.MUST);
+            summaryQueryBuilder.add(new WildcardQuery(new Term(DOCUMENT_SUMMARY_INDEX_PROPERTY, wildWord)), BooleanClause.Occur.MUST);
+            authorsQueryBuilder.add(new WildcardQuery(new Term(DOCUMENT_AUTHORS_INDEX_PROPERTY, wildWord)), BooleanClause.Occur.MUST);
+            documentFileQueryBuilder.add(new WildcardQuery(new Term(DOCUMENT_FILE_CONTENT_INDEX_PROPERTY, wildWord)), BooleanClause.Occur.MUST);
         }
 
-        query.add(nameQuery, BooleanClause.Occur.SHOULD);
-        query.add(summaryQuery, BooleanClause.Occur.SHOULD);
-        query.add(authorsQuery, BooleanClause.Occur.SHOULD);
+        queryBuilder.add(nameQueryBuilder.build(), BooleanClause.Occur.SHOULD);
+        queryBuilder.add(summaryQueryBuilder.build(), BooleanClause.Occur.SHOULD);
+        queryBuilder.add(authorsQueryBuilder.build(), BooleanClause.Occur.SHOULD);
+        queryBuilder.add(documentFileQueryBuilder.build(), BooleanClause.Occur.SHOULD);
 
-        query.add(new TermQuery(new Term(DOCUMENT_KEYWORD_INDEX_PROPERTY, text.toLowerCase())), BooleanClause.Occur.SHOULD);
+        queryBuilder.add(new TermQuery(new Term(DOCUMENT_KEYWORD_INDEX_PROPERTY, text.toLowerCase())), BooleanClause.Occur.SHOULD);
 
 
         // Combine that with the type
-        BooleanQuery fullQuery = new BooleanQuery();
-        fullQuery.add(query, BooleanClause.Occur.MUST);
-        fullQuery.add(new TermQuery(new Term("type", DOCUMENT_TYPE)), BooleanClause.Occur.MUST);
+        BooleanQuery.Builder fullQueryBuilder = new BooleanQuery.Builder();
+        fullQueryBuilder.add(queryBuilder.build(), BooleanClause.Occur.MUST);
+        fullQueryBuilder.add(new TermQuery(new Term("type", DOCUMENT_TYPE)), BooleanClause.Occur.MUST);
 
-        ScoreDoc[] hits = isearcher.search(fullQuery, null, 1000).scoreDocs;
+        ScoreDoc[] hits = isearcher.search(fullQueryBuilder.build(), 1000).scoreDocs;
 
         List<String> documentIds = new ArrayList(hits.length);
 
@@ -163,42 +187,45 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport {
         IndexSearcher isearcher = new IndexSearcher(ireader);
 
 
-        BooleanQuery keywordsQuery = new BooleanQuery();
+        BooleanQuery.Builder keywordsQueryBuilder = new BooleanQuery.Builder();
 
         for (String text : texts) {
             String[] words = text.split(" ");
 
             // Parse a simple query that searches for the "text":
-            BooleanQuery query = new BooleanQuery();
+            BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
 
-            BooleanQuery nameQuery = new BooleanQuery();
-            BooleanQuery summaryQuery = new BooleanQuery();
-            BooleanQuery authorsQuery = new BooleanQuery();
+            BooleanQuery.Builder nameQueryBuilder = new BooleanQuery.Builder();
+            BooleanQuery.Builder summaryQueryBuilder = new BooleanQuery.Builder();
+            BooleanQuery.Builder authorsQueryBuilder = new BooleanQuery.Builder();
+            BooleanQuery.Builder documentFileQueryBuilder = new BooleanQuery.Builder();
 
             for (String word : words) {
                 String wildWord = "*" + word.toLowerCase() + "*";
-                nameQuery.add(new WildcardQuery(new Term(DOCUMENT_NAME_INDEX_PROPERTY, wildWord)), BooleanClause.Occur.MUST);
-                summaryQuery.add(new WildcardQuery(new Term(DOCUMENT_SUMMARY_INDEX_PROPERTY, wildWord)), BooleanClause.Occur.MUST);
-                authorsQuery.add(new WildcardQuery(new Term(DOCUMENT_AUTHORS_INDEX_PROPERTY, wildWord)), BooleanClause.Occur.MUST);
+                nameQueryBuilder.add(new WildcardQuery(new Term(DOCUMENT_NAME_INDEX_PROPERTY, wildWord)), BooleanClause.Occur.MUST);
+                summaryQueryBuilder.add(new WildcardQuery(new Term(DOCUMENT_SUMMARY_INDEX_PROPERTY, wildWord)), BooleanClause.Occur.MUST);
+                authorsQueryBuilder.add(new WildcardQuery(new Term(DOCUMENT_AUTHORS_INDEX_PROPERTY, wildWord)), BooleanClause.Occur.MUST);
+                documentFileQueryBuilder.add(new WildcardQuery(new Term(DOCUMENT_FILE_CONTENT_INDEX_PROPERTY, wildWord)), BooleanClause.Occur.MUST);
             }
 
-            query.add(nameQuery, BooleanClause.Occur.SHOULD);
-            query.add(summaryQuery, BooleanClause.Occur.SHOULD);
-            query.add(authorsQuery, BooleanClause.Occur.SHOULD);
+            queryBuilder.add(nameQueryBuilder.build(), BooleanClause.Occur.SHOULD);
+            queryBuilder.add(summaryQueryBuilder.build(), BooleanClause.Occur.SHOULD);
+            queryBuilder.add(authorsQueryBuilder.build(), BooleanClause.Occur.SHOULD);
+            queryBuilder.add(documentFileQueryBuilder.build(), BooleanClause.Occur.SHOULD);
 
-            query.add(new TermQuery(new Term(DOCUMENT_KEYWORD_INDEX_PROPERTY, text.toLowerCase())), BooleanClause.Occur.SHOULD);
+            queryBuilder.add(new TermQuery(new Term(DOCUMENT_KEYWORD_INDEX_PROPERTY, text.toLowerCase())), BooleanClause.Occur.SHOULD);
 
 
             // Combine that with the type
             //XXX ymartel : put to Occur.SHOULD to make an "OR"
-            keywordsQuery.add(query, BooleanClause.Occur.MUST);
+            keywordsQueryBuilder.add(queryBuilder.build(), BooleanClause.Occur.MUST);
         }
 
-        BooleanQuery fullQuery = new BooleanQuery();
-        fullQuery.add(keywordsQuery, BooleanClause.Occur.MUST);
-        fullQuery.add(new TermQuery(new Term("type", DOCUMENT_TYPE)), BooleanClause.Occur.MUST);
+        BooleanQuery.Builder fullQueryBuilder = new BooleanQuery.Builder();
+        fullQueryBuilder.add(keywordsQueryBuilder.build(), BooleanClause.Occur.MUST);
+        fullQueryBuilder.add(new TermQuery(new Term("type", DOCUMENT_TYPE)), BooleanClause.Occur.MUST);
 
-        ScoreDoc[] hits = isearcher.search(fullQuery, null, 1000).scoreDocs;
+        ScoreDoc[] hits = isearcher.search(fullQueryBuilder.build(), 1000).scoreDocs;
 
         List<String> documentIds = new ArrayList(hits.length);
 
@@ -212,16 +239,16 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport {
         return documentIds;
     }
 
-    public void updateDocument(DocumentBean document) throws IOException {
+    public void updateDocument(DocumentBean document, String filepath) throws IOException {
         DirectoryReader ireader = DirectoryReader.open(getLuceneUtils().getIndexWriter(), false);
         IndexSearcher isearcher = new IndexSearcher(ireader);
 
         // Retrieve document
-        BooleanQuery query = new BooleanQuery();
-        query.add(new TermQuery(new Term(DOCUMENT_ID_INDEX_PROPERTY, document.getId())), BooleanClause.Occur.MUST);
-        query.add(new TermQuery(new Term("type", DOCUMENT_TYPE)), BooleanClause.Occur.MUST);
+        BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
+        queryBuilder.add(new TermQuery(new Term(DOCUMENT_ID_INDEX_PROPERTY, document.getId())), BooleanClause.Occur.MUST);
+        queryBuilder.add(new TermQuery(new Term("type", DOCUMENT_TYPE)), BooleanClause.Occur.MUST);
 
-        ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs;
+        ScoreDoc[] hits = isearcher.search(queryBuilder.build(), 1000).scoreDocs;
         if (hits.length > 0) {
             Document doc = new Document();
             doc.add(new StringField(DOCUMENT_ID_INDEX_PROPERTY, document.getId(), Field.Store.YES));
@@ -249,13 +276,27 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport {
 
                     // Cloud Tag management
                     if (keyword.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) {
-                        doc.add(new Field(DOCUMENT_KEYWORD_CLOUD_TAG_PROPERTY, keyword.replaceAll("'", " "), TextField.TYPE_STORED));
+                        doc.add(new TextField(DOCUMENT_KEYWORD_CLOUD_TAG_PROPERTY, keyword.replaceAll("'", " "), Field.Store.YES));
                     }
                 }
             }
 
             doc.add(new Field("type", DOCUMENT_TYPE, TextField.TYPE_STORED));
 
+            if (StringUtils.isNotBlank(filepath)) {
+                try {
+                    File documentFile = new File(filepath);
+                    String parsedDocumentFile = getLuceneUtils().getTika().parseToString(documentFile);
+                    doc.add(new Field(DOCUMENT_FILE_CONTENT_INDEX_PROPERTY, parsedDocumentFile, TextField.TYPE_STORED));
+                } catch (TikaException te) {
+                    if (log.isErrorEnabled()) {
+                        String message = String.format("Unable to index document '%s'", filepath);
+                        log.error(message);
+                    }
+                }
+
+            }
+
             getLuceneUtils().getIndexWriter().updateDocument(new Term(DOCUMENT_ID_INDEX_PROPERTY, document.getId()), doc);
             getLuceneUtils().getIndexWriter().commit();
         }
@@ -266,11 +307,11 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport {
     public void deleteDocument(String documentId) throws IOException {
 
         // Retrieve document
-        BooleanQuery query = new BooleanQuery();
-        query.add(new TermQuery(new Term(DOCUMENT_ID_INDEX_PROPERTY, documentId)), BooleanClause.Occur.MUST);
-        query.add(new TermQuery(new Term("type", DOCUMENT_TYPE)), BooleanClause.Occur.MUST);
+        BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
+        queryBuilder.add(new TermQuery(new Term(DOCUMENT_ID_INDEX_PROPERTY, documentId)), BooleanClause.Occur.MUST);
+        queryBuilder.add(new TermQuery(new Term("type", DOCUMENT_TYPE)), BooleanClause.Occur.MUST);
 
-        getLuceneUtils().getIndexWriter().deleteDocuments(query);
+        getLuceneUtils().getIndexWriter().deleteDocuments(queryBuilder.build());
         getLuceneUtils().getIndexWriter().commit();
 
     }
diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/LuceneUtils.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/LuceneUtils.java
index b6c0736..43a3c43 100644
--- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/LuceneUtils.java
+++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/LuceneUtils.java
@@ -26,22 +26,18 @@ package fr.ifremer.coselmar.services.indexation;
 
 import java.io.File;
 import java.io.IOException;
-import java.io.InputStreamReader;
 
 import fr.ifremer.coselmar.config.CoselmarServicesConfig;
-import org.apache.commons.io.IOUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.SimpleAnalyzer;
 import org.apache.lucene.analysis.en.EnglishAnalyzer;
-import org.apache.lucene.analysis.fr.FrenchAnalyzer;
-import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.NIOFSDirectory;
+import org.apache.tika.Tika;
 
 /**
  * @author ymartel <martel@codelutin.com>
@@ -53,6 +49,7 @@ public class LuceneUtils {
     public Analyzer analyzer;
     public final IndexWriterConfig indexationConfig = new IndexWriterConfig(getAnalyzer());
     public IndexWriter indexWriter;
+    protected Tika tika;
 
     protected CoselmarServicesConfig servicesConfig;
 
@@ -81,6 +78,13 @@ public class LuceneUtils {
         return indexWriter;
     }
 
+    public Tika getTika() {
+        if (tika == null) {
+            this.tika = new Tika();
+        }
+        return tika;
+    }
+
     public void closeWriter() {
         if (indexWriter != null) {
             try {
diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/AdminWebService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/AdminWebService.java
index 675b49a..33b3e57 100644
--- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/AdminWebService.java
+++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/AdminWebService.java
@@ -78,7 +78,7 @@ public class AdminWebService extends CoselmarWebServiceSupport {
             for (Document document : documents) {
                 String lightId = getPersistenceContext().getTopiaIdFactory().getRandomPart(document.getTopiaId());
                 DocumentBean documentBean = BeanEntityConverter.toBean(getPersistenceContext().getTopiaIdFactory(), document);
-                documentsIndexationService.indexDocument(documentBean);
+                documentsIndexationService.indexDocument(documentBean, document.getFilePath());
             }
 
             // Get all questions
diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/DocumentsWebService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/DocumentsWebService.java
index 61cf842..9a1d010 100644
--- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/DocumentsWebService.java
+++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/DocumentsWebService.java
@@ -445,7 +445,7 @@ public class DocumentsWebService extends CoselmarWebServiceSupport {
 
         DocumentsIndexationService documentsIndexationService = getServicesContext().newService(DocumentsIndexationService.class);
         try {
-            documentsIndexationService.indexDocument(result);
+            documentsIndexationService.indexDocument(result, filePath);
             if (log.isDebugEnabled()) {
                 String message = String.format("Document '%s' added to index", documentName);
                 log.debug(message);
@@ -471,11 +471,11 @@ public class DocumentsWebService extends CoselmarWebServiceSupport {
         CoselmarUser currentUser = checkUserAuthentication(authorization);
 
         String documentFullId = getFullIdFromShort(Document.class, documentId);
-        Document document = getDocumentDao().forTopiaIdEquals(documentFullId).findAny();
+        Document documentEntity = getDocumentDao().forTopiaIdEquals(documentFullId).findAny();
 
         // Only Owner Expert or Supervisor/Admin can add document file
         if (!DOCUMENT_SUPER_USER_ROLES.contains(currentUser.getRole().name())
-            && document.getOwner() != currentUser) {
+            && documentEntity.getOwner() != currentUser) {
             String message = String.format("User %s %s ('%s') is not allowed to add document file",
                 currentUser.getFirstname(), currentUser.getName(), getShortIdFromFull(currentUser.getTopiaId()));
             if (log.isWarnEnabled()) {
@@ -485,21 +485,37 @@ public class DocumentsWebService extends CoselmarWebServiceSupport {
         }
 
         // Get owner to place correctly the file
-        CoselmarUser owner = document.getOwner();
+        CoselmarUser owner = documentEntity.getOwner();
         Pair<String, String> pathAndContentType = managerDocumentFile(uploadFile, owner);
         String filePath = pathAndContentType.getLeft();
         String contentType = pathAndContentType.getRight();
 
         // If document has already a file, remove it
-        if (StringUtils.isNotBlank(document.getFilePath())) {
-            File documentFile = new File(document.getFilePath());
+        if (StringUtils.isNotBlank(documentEntity.getFilePath())) {
+            File documentFile = new File(documentEntity.getFilePath());
             FileUtils.deleteQuietly(documentFile);
         }
 
-        document.setWithFile(true);
-        document.setMimeType(contentType);
-        document.setFilePath(filePath);
-        document.setFileName(uploadFile.getName());
+        documentEntity.setWithFile(true);
+        documentEntity.setMimeType(contentType);
+        documentEntity.setFilePath(filePath);
+        documentEntity.setFileName(uploadFile.getName());
+
+        // Should update document index information to put the file
+        DocumentBean documentBean = BeanEntityConverter.toBean(getPersistenceContext().getTopiaIdFactory(), documentEntity);
+
+        DocumentsIndexationService documentsIndexationService = getServicesContext().newService(DocumentsIndexationService.class);
+        try {
+            documentsIndexationService.updateDocument(documentBean, filePath); // no document file for the moment here
+            if (log.isDebugEnabled()) {
+                String message = String.format("Document '%s' was updated in index", documentEntity.getName());
+                log.debug(message);
+            }
+        } catch (IOException e) {
+            if (log.isErrorEnabled()) {
+                log.error("Unable to update document index information", e);
+            }
+        }
 
         commit();
 
@@ -654,7 +670,7 @@ public class DocumentsWebService extends CoselmarWebServiceSupport {
 
         DocumentsIndexationService documentsIndexationService = getServicesContext().newService(DocumentsIndexationService.class);
         try {
-            documentsIndexationService.indexDocument(result);
+            documentsIndexationService.indexDocument(result,  null); // no document file for the moment here
             if (log.isDebugEnabled()) {
                 String message = String.format("Document '%s' was updated in index", document.getName());
                 log.debug(message);
diff --git a/coselmar-rest/src/test/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationServiceTest.java b/coselmar-rest/src/test/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationServiceTest.java
index 9f74687..acd53aa 100644
--- a/coselmar-rest/src/test/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationServiceTest.java
+++ b/coselmar-rest/src/test/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationServiceTest.java
@@ -67,7 +67,7 @@ public class DocumentsIndexationServiceTest extends AbstractCoselmarServiceTest
             "This is not a fake document used for test", "fr", null, "Jack, Jane",
             null, null, false, null, "http://somewhere", "no comment", null, null);
 
-        documentsIndexationService.indexDocument(documentOne);
+        documentsIndexationService.indexDocument(documentOne, null);
 
     }
 
@@ -124,7 +124,7 @@ public class DocumentsIndexationServiceTest extends AbstractCoselmarServiceTest
             "This is not a fake document used for test", "fr", null, "Jack, Jane",
             null, null, false, null, "http://somewhere", "no comment", null, null);
 
-        documentsIndexationService.indexDocument(documentOne);
+        documentsIndexationService.indexDocument(documentOne, null);
 
         List<String> documentMatchingDocumentIds = documentsIndexationService.searchDocuments("document");
         Assert.assertEquals(1, documentMatchingDocumentIds.size());
@@ -149,7 +149,7 @@ public class DocumentsIndexationServiceTest extends AbstractCoselmarServiceTest
             "This is a faked doct updated for test", "fr", null, "James, JJ",
             null, null, false, null, "http://somewhere", "no comment", null, null);
 
-        documentsIndexationService.updateDocument(documentOne);
+        documentsIndexationService.updateDocument(documentOne, null);
 
         documentMatchingDocumentIds = documentsIndexationService.searchDocuments("document");
         Assert.assertTrue(documentMatchingDocumentIds.isEmpty());
@@ -222,7 +222,7 @@ public class DocumentsIndexationServiceTest extends AbstractCoselmarServiceTest
             "This is not a fake document used for test", "fr", null, "Jack, Jane",
             null, null, false, null, "http://somewhere", "no comment", null, null);
 
-        documentsIndexationService.indexDocument(documentOne);
+        documentsIndexationService.indexDocument(documentOne, null);
 
 
         String documentTwoId = "testSearchMultiple_document2";
@@ -231,7 +231,7 @@ public class DocumentsIndexationServiceTest extends AbstractCoselmarServiceTest
             new Date(), Lists.newArrayList("tardis", "documentation", "old", "new", "borrowed", "blue"), "testDocument",
             "This is part of document about the TARDIS", "fr", null, "The Doctor, Rose, Amy, River, Clara",
             null, null, false, null, "http://tardis.wikia.com/wiki/TARDIS", "no comment", null, null);
-        documentsIndexationService.indexDocument(documentTwo);
+        documentsIndexationService.indexDocument(documentTwo, null);
 
 
         List<String> documentMatchingDoctorIds = documentsIndexationService.searchDocuments(Arrays.asList("doctor"));
@@ -266,7 +266,7 @@ public class DocumentsIndexationServiceTest extends AbstractCoselmarServiceTest
             "This is not a fake document used for test", "fr", null, "Jack, Jane",
             null, null, false, null, "http://somewhere", "no comment", null, null);
 
-        documentsIndexationService.indexDocument(documentOne);
+        documentsIndexationService.indexDocument(documentOne, null);
 
         DocumentBean documentTwo = new DocumentBean("document2",
             "Another document", "Amy Pond", "user002", Privacy.PUBLIC.name(),
@@ -274,7 +274,7 @@ public class DocumentsIndexationServiceTest extends AbstractCoselmarServiceTest
             "This is just an other document used for test", "fr", null, "Amy, Rory",
             null, null, false, null, "http://somewhere", "no comment", null, null);
 
-        documentsIndexationService.indexDocument(documentTwo);
+        documentsIndexationService.indexDocument(documentTwo, null);
 
         DocumentBean documentThree = new DocumentBean("document3",
             "Tardis documentation", "The Doctor", "user003", Privacy.PUBLIC.name(),
@@ -282,7 +282,7 @@ public class DocumentsIndexationServiceTest extends AbstractCoselmarServiceTest
             "This is part of documentation about the TARDIS", "fr", null, "The Doctor, Rose, Amy, River, Clara",
             null, null, false, null, "http://tardis.wikia.com/wiki/TARDIS", "no comment", null, null);
 
-        documentsIndexationService.indexDocument(documentThree);
+        documentsIndexationService.indexDocument(documentThree, null);
 
     }
 
diff --git a/pom.xml b/pom.xml
index 2c1ad62..f4bd15e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -134,11 +134,12 @@
     <nuitonValidatorVersion>3.0</nuitonValidatorVersion>
     <nuitonConvertorVersion>1.0</nuitonConvertorVersion>
 
-    <hibernateVersion>4.3.8.Final</hibernateVersion>
-    <postgresqlVersion>9.1-901-1.jdbc4</postgresqlVersion>
+    <hibernateVersion>4.3.11.Final</hibernateVersion>
+    <postgresqlVersion>9.4.1212.jre7</postgresqlVersion>
     <h2Version>1.4.190</h2Version>
 
     <luceneVersion>5.4.0</luceneVersion>
+    <tikaVersion>1.14</tikaVersion>
 
     <tomcatEmbedVersion>7.0.50</tomcatEmbedVersion>
 
@@ -279,7 +280,7 @@
       </dependency>
 
       <dependency>
-        <groupId>postgresql</groupId>
+        <groupId>org.postgresql</groupId>
         <artifactId>postgresql</artifactId>
         <version>${postgresqlVersion}</version>
       </dependency>
@@ -328,6 +329,12 @@
         <scope>runtime</scope>
       </dependency>
 
+      <dependency>
+        <groupId>org.apache.tika</groupId>
+        <artifactId>tika-parsers</artifactId>
+        <version>${tikaVersion}</version>
+      </dependency>
+
       <!-- Commons -->
       <dependency>
         <groupId>org.apache.commons</groupId>

-- 
To stop receiving notification emails like this one, please contact
codelutin.com SCM administrator <admin+scm@codelutin.com>.

    

[Coselmar-commits] 01/05: refs #9197 Utilisation de Apache Tika pour indexer les documents

codelutin.com scm