Skip to Content.
Sympa Menu

idok-commit - [idok-commit] idok commit r336 - in trunk: java/ch/idok/common/config java/ch/idok/dmsd/impl/controller java/ch/idok/dmsd/impl/extractor/microsoft java/ch/idok/dmsd/impl/indexer/lucene java/ch/idok/dmsd/main lib

idok-commit AT lists.psi.ch

Subject: Commit emails of the iDok project

List archive

[idok-commit] idok commit r336 - in trunk: java/ch/idok/common/config java/ch/idok/dmsd/impl/controller java/ch/idok/dmsd/impl/extractor/microsoft java/ch/idok/dmsd/impl/indexer/lucene java/ch/idok/dmsd/main lib


Chronological Thread 
  • From: "AFS account Stadler Hans Christian" <stadler_h AT savannah.psi.ch>
  • To: idok-commit AT lists.psi.ch
  • Subject: [idok-commit] idok commit r336 - in trunk: java/ch/idok/common/config java/ch/idok/dmsd/impl/controller java/ch/idok/dmsd/impl/extractor/microsoft java/ch/idok/dmsd/impl/indexer/lucene java/ch/idok/dmsd/main lib
  • Date: Thu, 12 Mar 2009 11:31:10 +0100
  • List-archive: <https://lists.web.psi.ch/pipermail/idok-commit/>
  • List-id: Commit emails of the iDok project <idok-commit.lists.psi.ch>

Author: stadler_h
Date: Thu Mar 12 11:31:10 2009
New Revision: 336

Log:
Going back to old method for Microsoft Office Indexing - no oodaemon

Modified:
trunk/java/ch/idok/common/config/Setup.java
trunk/java/ch/idok/dmsd/impl/controller/SimpleController.java

trunk/java/ch/idok/dmsd/impl/extractor/microsoft/MSOfficeExtractorFactory.java
trunk/java/ch/idok/dmsd/impl/indexer/lucene/Indexer.java
trunk/java/ch/idok/dmsd/main/Daemon.java
trunk/lib/juh.jar
trunk/lib/jurt.jar
trunk/lib/ridl.jar
trunk/lib/unoil.jar

Modified: trunk/java/ch/idok/common/config/Setup.java
==============================================================================
--- trunk/java/ch/idok/common/config/Setup.java (original)
+++ trunk/java/ch/idok/common/config/Setup.java Thu Mar 12 11:31:10 2009
@@ -251,7 +251,7 @@
}

/**
- * The meta data key for the MD5 digest of the file content.
+ * @brief The meta data key for the MD5 digest of the file content.
*/
public String getChecksumKey() {
return System.getProperty("common.config.checksumKey",
@@ -259,6 +259,14 @@
}

/**
+ * @brief The meta data key for the MD5 digest of the file content.
+ */
+ public String getIndexingExceptionKey() {
+ return System.getProperty("common.config.indexingExceptionKey",
+ "auto:indexer:exception");
+ }
+
+ /**
* Perform initial setup for all iDok client and server programs
*
* @throws DmsException

Modified: trunk/java/ch/idok/dmsd/impl/controller/SimpleController.java
==============================================================================
--- trunk/java/ch/idok/dmsd/impl/controller/SimpleController.java
(original)
+++ trunk/java/ch/idok/dmsd/impl/controller/SimpleController.java Thu
Mar 12 11:31:10 2009
@@ -206,11 +206,9 @@
// Retry with default mime type
if (badMetaData(metaData))
return;
- metaData.put(config.mimeKey(),
- ContentExtractor.defaultMimeType);
+ metaData.put(config.mimeKey(),
ContentExtractor.defaultMimeType);
data.retryCounter++;
- msg
- .append("\nRetrying operation with mime-type
'default'.");
+ msg.append("\nRetrying operation with mime-type
'default'.");
logger.warning(msg.toString());
data.stage.push(data);
return;
@@ -220,11 +218,9 @@
if (badMetaData(metaData))
return;
metaData.put(origMimeKey,
metaData.get(config.mimeKey()));
- metaData.put(config.mimeKey(),
- ContentExtractor.defaultMimeType);
+ metaData.put(config.mimeKey(),
ContentExtractor.defaultMimeType);
data.retryCounter++;
- msg
- .append("\nRetrying operation with mime-type
'default'.");
+ msg.append("\nRetrying operation with mime-type
'default'.");
logger.warning(msg.toString());
data.stage.push(data);
return;

Modified:
trunk/java/ch/idok/dmsd/impl/extractor/microsoft/MSOfficeExtractorFactory.java
==============================================================================
---
trunk/java/ch/idok/dmsd/impl/extractor/microsoft/MSOfficeExtractorFactory.java
(original)
+++
trunk/java/ch/idok/dmsd/impl/extractor/microsoft/MSOfficeExtractorFactory.java
Thu Mar 12 11:31:10 2009
@@ -33,6 +33,13 @@
import java.util.TimerTask;
import java.util.logging.Logger;

+import com.sun.star.bridge.XUnoUrlResolver;
+import com.sun.star.comp.helper.Bootstrap;
+import com.sun.star.frame.XDesktop;
+import com.sun.star.lang.XMultiComponentFactory;
+import com.sun.star.uno.UnoRuntime;
+import com.sun.star.uno.XComponentContext;
+
import ch.idok.common.errorhandling.DmsException;
import ch.idok.common.errorhandling.ErrorType;
import ch.idok.common.errorhandling.Util;
@@ -63,6 +70,50 @@
static Logger logger;

/**
+ * @brief The local OpenOffice context.
+ */
+ private static XComponentContext localContext;
+
+ /**
+ * @brief The service manager stub connect to the OpenOffice instance.
+ */
+ static XMultiComponentFactory serviceManager;
+
+ /**
+ * @brief The OpenOffice desktop object.
+ */
+ static XDesktop desktop;
+
+ /**
+ * @brief Process builder for OpenOffice.
+ */
+ private static ProcessBuilder procBuilder;
+
+ /**
+ * @brief The OpenOffice process.
+ */
+ static Process soffice;
+
+ /**
+ * @brief The port OpenOffice should listen to
+ */
+ static String unoPort;
+
+ /**
+ * @brief Restart counter for OpenOffice.
+ *
+ * OpenOffice seems to leak file descriptors.
+ */
+ int ooRestartCounter;
+
+ /**
+ * @brief Restart threshold for OpenOffice.
+ *
+ * After that many extractions, OpenOffice will be restarted.
+ */
+ private static final int ooRestartThreshold = 10;
+
+ /**
* @brief Maximum time for finishing text extraction
*
* Default is 10s, can be changed using the
@@ -199,6 +250,10 @@

/** @brief Retrieve the searchable plain text. */
public byte[] getText() throws DmsException {
+ if (++ooRestartCounter >= ooRestartThreshold) {
+ logger.finer("Restarting OpenOffice");
+ initOpenOffice();
+ }
File tf = null;
FileOutputStream fos = null;
InputStream is = null;
@@ -207,7 +262,6 @@
Process proc = null;

try {
- String unoPort =
System.getProperty("ch.idok.dmsd.impl.extractor.microsoft.unoport");
if (unoPort == null)
DmsException.throwIt(
ErrorType.MISSING_ARG,
@@ -331,6 +385,141 @@
type = docType;
}

+ /** @brief Safely try to kill the OO process */
+ void killOO() {
+ try {
+ if (soffice != null) {
+ try {
+ soffice.getInputStream().close();
+ soffice.getOutputStream().close();
+ } catch (Throwable th) {}
+ soffice.destroy();
+ }
+ } catch (Throwable th) {
+ logger.finer("Can't destroy OO process\n"+th);
+ }
+ }
+
+ /** @brief Initialize connection to OpenOffice. */
+ void initOpenOffice() throws DmsException {
+ try {
+ if (desktop != null) {
+ try {
+ desktop.terminate();
+ } catch (Throwable th) {
+ logger.finer("Can't terminate OO desktop\n"+th);
+ }finally {
+ desktop = null;
+ }
+ }
+ if (soffice != null) {
+ // Kill soffice if it is already running
+ killOO();
+ soffice = null;
+ }
+
+ if (procBuilder == null) {
+ String screen =
System.getProperty("ch.idok.dmsd.impl.extractor.microsoft.screen", ":0");
+ String tmpDir =
System.getProperty("ch.idok.dmsd.impl.extractor.microsoft.tmpdir", "/tmp");
+ unoPort =
System.getProperty("ch.idok.dmsd.impl.extractor.microsoft.unoport");
+ if (screen == null)
+ DmsException.throwIt(
+ ErrorType.MISSING_ARG,
+ this,
+ "Missing property:
ch.idok.dmsd.impl.extractor.microsoft.screen",
+ "The property must be set to the X screen that
openoffice requires");
+ if (unoPort == null)
+ DmsException.throwIt(
+ ErrorType.MISSING_ARG,
+ this,
+ "Missing property:
ch.idok.dmsd.impl.extractor.microsoft.unoport",
+ "The property must be set to the port that
openoffice must listen to");
+ String soffice =
System.getProperty("ch.idok.dmsd.impl.extractor.microsoft.soffice",
+ "soffice");
+ procBuilder = new ProcessBuilder("bash", "-c",
+ soffice +
+ " -accept='socket,host=localhost,port=" + unoPort +
";urp;'" +
+ " -headless" +
+ " -invisible" +
+ " -display " + screen);
+ procBuilder = procBuilder.redirectErrorStream(true);
+ procBuilder = procBuilder.directory(new File(tmpDir));
+ logger.finest("Command for starting OO is
"+procBuilder.command());
+ }
+
+ // Get the office context
+ localContext = Bootstrap.createInitialComponentContext(null);
+ XMultiComponentFactory localServiceManager =
localContext.getServiceManager();
+ Object unoObj = localServiceManager.createInstanceWithContext(
+ "com.sun.star.bridge.UnoUrlResolver", localContext);
+ XUnoUrlResolver urlResolver = (XUnoUrlResolver) UnoRuntime
+ .queryInterface(XUnoUrlResolver.class, unoObj);
+ unoObj = null;
+ Throwable connectEx = null;
+ logger.finest("Starting OO process");
+ soffice = procBuilder.start();
+ for (int retry = 30; (unoObj == null) && (retry > 0); --retry) {
+ try {
+ unoObj =
urlResolver.resolve("uno:socket,host=localhost,port=" + unoPort
+ + ";urp;StarOffice.ServiceManager");
+ } catch (Throwable ex) {
+ logger.finest("Can't connect to OO, "+retry+" retries
remaining\n"+ex);
+ connectEx = ex;
+ Thread.sleep(200);
+ }
+ }
+
+ if (unoObj == null) {
+ InputStream pOut = null;
+ StringBuffer sb = new StringBuffer();
+ try {
+ pOut = soffice.getInputStream();
+ while (true) {
+ if (pOut.available() == 0)
+ break;
+ int ch = pOut.read();
+ if (ch == -1)
+ break;
+ sb.append((char) ch);
+ }
+ killOO();
+ int retval = soffice.exitValue();
+ logger.warning("OpenOffice exited with return value " +
retval + "\n" + sb);
+ } catch (Throwable th) {
+ logger.severe("Unexpected exception while killing OO " +
th + "\nOO output\n" + sb);
+ } finally {
+ soffice = null;
+ sb = null;
+ }
+ DmsException.throwIt(ErrorType.TOOL_ACCESS, this,
+ "Cannot connect to OpenOffice", "", connectEx);
+ }
+
+ serviceManager = (XMultiComponentFactory) UnoRuntime
+ .queryInterface(XMultiComponentFactory.class, unoObj);
+ unoObj = serviceManager.createInstanceWithContext(
+ "com.sun.star.frame.Desktop", localContext);
+ desktop = (XDesktop) UnoRuntime.queryInterface(XDesktop.class,
+ unoObj);
+ if (desktop == null) {
+ killOO();
+ soffice = null;
+ DmsException.throwIt(
+ ErrorType.TOOL_ACCESS,
+ this,
+ "Cannot connect to OpenOffice",
+ "Unable to retrieve desktop object");
+ }
+ ooRestartCounter = 0;
+ logger.fine("Established connection to OpenOffice");
+ } catch (DmsException ex) {
+ throw ex;
+ } catch (Throwable th) {
+ DmsException.throwIt(ErrorType.TOOL_ACCESS, this,
+ "Cannot connect to OpenOffice", "", th);
+ }
+ }
+
/** @see ContentExtractorFactory.initialize() */
public void initialize(Config config) throws DmsException {
try {
@@ -338,6 +527,13 @@
logger.finest("Initializing extractor factory for MS Office
Documents.");
interruptDelay
=Long.parseLong(System.getProperty("ch.idok.dmsd.impl.extractor.microsoft.interruptdelay",
Long.toString(interruptDelay)));
+ initOpenOffice();
+ Runtime.getRuntime().addShutdownHook(new Thread (new Runnable() {
+ @Override
+ public void run() {
+ killOO();
+ }
+ }));
// Register content extractor factories
config.registerContentExtractor(getMimeType(), this);
int docType = MSExtractorProcess.getNumDocTypes();
@@ -377,5 +573,8 @@
}

public void destroy() {
+ logger.finest("Releasing all resources - killing OO process");
+ killOO();
+ soffice = null;
}
}

Modified: trunk/java/ch/idok/dmsd/impl/indexer/lucene/Indexer.java
==============================================================================
--- trunk/java/ch/idok/dmsd/impl/indexer/lucene/Indexer.java (original)
+++ trunk/java/ch/idok/dmsd/impl/indexer/lucene/Indexer.java Thu Mar 12
11:31:10 2009
@@ -250,6 +250,10 @@

metadata.put(ch.idok.common.config.Setup.getInstance()
.getSearchableVersionKey(),
searchableVersion);
}
+ if (data.exception != null) {
+
metadata.put(ch.idok.common.config.Setup.getInstance()
+ .getIndexingExceptionKey(),
data.exception.getLogMessage());
+ }
for (String metaDatum : metadata.keySet()) {
String metaValue = metadata.get(metaDatum);
luceneDocument.add(new Field(metaDatum, metaValue,

Modified: trunk/java/ch/idok/dmsd/main/Daemon.java
==============================================================================
--- trunk/java/ch/idok/dmsd/main/Daemon.java (original)
+++ trunk/java/ch/idok/dmsd/main/Daemon.java Thu Mar 12 11:31:10 2009
@@ -41,6 +41,10 @@
* top level Logger.
*/
public final class Daemon {
+
+ /** @brief The pipeline configuration object */
+ static Config config;
+
/**
* @brief Print usage info.
*/
@@ -117,7 +121,6 @@
public static void main(String args[]) {
if ((args.length > 1) && (args[0].equals("test")))
Test.run(args); // exits
- Config config = null;
Setup setup = Setup.getSetup();
Logger initialLogger = setup.getInitialLogger();
try {
@@ -127,6 +130,12 @@
controlCommand(initialLogger,
config.getExternalDaemonControl(), args); // exits
config.initialize(args); // Construct pipeline
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ config.destroy();
+ }
+ }));
} catch (DmsException ex) {
initialLogger
.severe(usage(config == null ? null : config.argDesc())

Modified: trunk/lib/juh.jar
==============================================================================
Binary files. No diff available.

Modified: trunk/lib/jurt.jar
==============================================================================
Binary files. No diff available.

Modified: trunk/lib/ridl.jar
==============================================================================
Binary files. No diff available.

Modified: trunk/lib/unoil.jar
==============================================================================
Binary files. No diff available.



  • [idok-commit] idok commit r336 - in trunk: java/ch/idok/common/config java/ch/idok/dmsd/impl/controller java/ch/idok/dmsd/impl/extractor/microsoft java/ch/idok/dmsd/impl/indexer/lucene java/ch/idok/dmsd/main lib, AFS account Stadler Hans Christian, 03/12/2009

Archive powered by MHonArc 2.6.19.

Top of Page