Skip to Content.
Sympa Menu

idok-commit - [idok-commit] idok commit r347 - in trunk: java/ch/idok/dmsd/impl/extractor/microsoft lib sites/psi/scripts/admin/dmsd sites/psi/scripts/admin/dmsd/dist

idok-commit AT lists.psi.ch

Subject: Commit emails of the iDok project

List archive

[idok-commit] idok commit r347 - in trunk: java/ch/idok/dmsd/impl/extractor/microsoft lib sites/psi/scripts/admin/dmsd sites/psi/scripts/admin/dmsd/dist


Chronological Thread 
  • From: "AFS account Stadler Hans Christian" <stadler_h AT savannah.psi.ch>
  • To: idok-commit AT lists.psi.ch
  • Subject: [idok-commit] idok commit r347 - in trunk: java/ch/idok/dmsd/impl/extractor/microsoft lib sites/psi/scripts/admin/dmsd sites/psi/scripts/admin/dmsd/dist
  • Date: Thu, 19 Mar 2009 16:14:06 +0100
  • List-archive: <https://lists.web.psi.ch/pipermail/idok-commit/>
  • List-id: Commit emails of the iDok project <idok-commit.lists.psi.ch>

Author: stadler_h
Date: Thu Mar 19 16:14:06 2009
New Revision: 347

Log:
changed to pipe communication with Open Office; all uno stuff is done in
separate processes

Added:
trunk/java/ch/idok/dmsd/impl/extractor/microsoft/OOProcess.java
trunk/java/ch/idok/dmsd/impl/extractor/microsoft/ProcessArgs.java
trunk/java/ch/idok/dmsd/impl/extractor/microsoft/ProcessTimeout.java
trunk/java/ch/idok/dmsd/impl/extractor/microsoft/ThreadTimeout.java
trunk/lib/java_uno.jar (contents, props changed)
trunk/lib/unoloader.jar (contents, props changed)
Modified:
trunk/java/ch/idok/dmsd/impl/extractor/microsoft/MSExtractorProcess.java

trunk/java/ch/idok/dmsd/impl/extractor/microsoft/MSOfficeExtractorFactory.java
trunk/sites/psi/scripts/admin/dmsd/build.xml
trunk/sites/psi/scripts/admin/dmsd/dist/dmsd.config
trunk/sites/psi/scripts/admin/dmsd/dist/run-dmsd.sh

Modified:
trunk/java/ch/idok/dmsd/impl/extractor/microsoft/MSExtractorProcess.java
==============================================================================
--- trunk/java/ch/idok/dmsd/impl/extractor/microsoft/MSExtractorProcess.java
(original)
+++ trunk/java/ch/idok/dmsd/impl/extractor/microsoft/MSExtractorProcess.java
Thu Mar 19 16:14:06 2009
@@ -288,14 +288,8 @@
*/
private XDesktop getDesktop() throws DmsException {
try {
- String unoPort =
System.getProperty("ch.idok.dmsd.impl.extractor.microsoft.unoport");
- if (unoPort == null)
- DmsException.throwIt(
- ErrorType.MISSING_ARG,
- this,
- "Missing property:
ch.idok.dmsd.impl.extractor.microsoft.unoport",
- "The property must be set to the port that
openoffice listens to");
-
+ String tmpDir = OOProcess.getTmpDir();
+ String pipeName = OOProcess.getPipeName(tmpDir);
// Get the office context
XComponentContext localContext =
Bootstrap.createInitialComponentContext(null);
XMultiComponentFactory localServiceManager =
localContext.getServiceManager();
@@ -307,9 +301,8 @@
Throwable connectEx = null;
for (int retry = 3; (unoObj == null) && (retry > 0); --retry) {
try {
- unoObj = urlResolver.resolve("uno:" +
- MSOfficeExtractorFactory.connectString(unoPort) +
- "StarOffice.ServiceManager");
+ unoObj = urlResolver.resolve("uno:" +
OOProcess.getConnectString(pipeName)
+ + "StarOffice.ServiceManager");
} catch (Throwable ex) {
logger.finest("OpenOffice connection error, remaining
retries: "+(retry - 1));
connectEx = ex;

Modified:
trunk/java/ch/idok/dmsd/impl/extractor/microsoft/MSOfficeExtractorFactory.java
==============================================================================
---
trunk/java/ch/idok/dmsd/impl/extractor/microsoft/MSOfficeExtractorFactory.java
(original)
+++
trunk/java/ch/idok/dmsd/impl/extractor/microsoft/MSOfficeExtractorFactory.java
Thu Mar 19 16:14:06 2009
@@ -27,19 +27,13 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.PrintStream;
import java.nio.CharBuffer;
import java.util.Map;
import java.util.Timer;
-import java.util.TimerTask;
import java.util.logging.Logger;

-import com.sun.star.bridge.XUnoUrlResolver;
-import com.sun.star.comp.helper.Bootstrap;
-import com.sun.star.frame.XDesktop;
-import com.sun.star.lang.XMultiComponentFactory;
-import com.sun.star.uno.UnoRuntime;
-import com.sun.star.uno.XComponentContext;
-
import ch.idok.common.errorhandling.DmsException;
import ch.idok.common.errorhandling.ErrorType;
import ch.idok.common.errorhandling.Util;
@@ -70,36 +64,11 @@
static Logger logger;

/**
- * @brief The local OpenOffice context.
- */
- private static XComponentContext localContext;
-
- /**
- * @brief The service manager stub connect to the OpenOffice instance.
- */
- static XMultiComponentFactory serviceManager;
-
- /**
- * @brief The OpenOffice desktop object.
- */
- static XDesktop desktop;
-
- /**
- * @brief Process builder for OpenOffice.
- */
- private static ProcessBuilder procBuilder;
-
- /**
* @brief The OpenOffice process.
*/
static Process soffice;

/**
- * @brief The port OpenOffice should listen to
- */
- static String unoPort;
-
- /**
* @brief Restart counter for OpenOffice.
*
* OpenOffice seems to leak file descriptors.
@@ -120,119 +89,100 @@
* ch.idok.dmsd.impl.extractor.microsoft.interruptdelay
* property.
*/
- static long interruptDelay = 10000;
-
- /**
- * @brief Delay for reconnection retries to Open Office in milliseconds
- *
- * This time span is also used for the Open Office connection timeout
- * watchdog timer.
- */
- static long reconnectDelay = 500;
+ static long interruptDelay = 5000;

/**
- * @brief Delay for letting Open Office start properly
+ * @brief Maximum delay in ms for process shutdown
*/
- static long startupDelay = 1000;
+ static long processKillTimeout = 10000;

/**
- * @brief Delay for letting Open Office shutdown properly
+ * @brief Maximum delay in ms for process startup
*/
- static long destroyDelay = 1000;
+ private long processStartupTimeout = 5000;

/**
- * @brief Timer task for shooting down the Extractor due to lack of
progress
- */
- final class InterruptTimerTask extends TimerTask {
- private Process process;
- InterruptTimerTask(Process proc) {
- process = proc;
- }
-
- @Override
- public void run() {
- process.destroy();
- logger.fine("Killed process "+process+" due to lack of
progress");
- }
- }
-
- /**
- * @brief Timer task for interrupting OpenOffice connection attempts
- */
- final class TimeoutTimerTask extends TimerTask {
- private Thread snail;
- TimeoutTimerTask(Thread thread) {
- snail = thread;
- }
-
- @Override
- public void run() {
- snail.interrupt();
- logger.fine("Interrupted thread " + snail + "due to lack of
progress");
- }
- }
-
- /**
* @brief Class implementing a thread for reading an error stream.
*/
- final class StdErrReader implements Runnable {
+ final class InputReader implements Runnable {

/** @brief The error input stream to be read by this thread. */
- private InputStreamReader err;
+ private InputStreamReader inputStream;

/** @brief String builder for collecting the error information. */
private StringBuilder out;

/** @brief Exception saved by the run method. */
- private DmsException throwable;
+ public DmsException throwable;
+
+ volatile boolean stopNow;
+
+ Thread me;

/**
* @brief Constructor
* @param eis The error input stream to be read.
*/
- StdErrReader(InputStream eis) {
- err = new InputStreamReader(eis);
+ InputReader(InputStream is) {
+ inputStream = new InputStreamReader(is);
throwable = null;
out = new StringBuilder();
+ stopNow = false;
}

/**
* @brief Get the error output.
* @return The error output as a string
*/
- String getErrorOutput() throws DmsException {
+ String getErrorOutput() {
synchronized (out) {
- if (throwable != null)
- throw throwable;
return out.toString();
}
}

+ void interrupt() {
+ stopNow = true;
+ me.interrupt();
+ }
+
/**
* @brief Run method that reads the error stream.
*/
public void run() {
Throwable ex = null;
+ me = Thread.currentThread();
+ long now = System.currentTimeMillis();
try {
CharBuffer chb = CharBuffer.allocate(1024);
int len;
do {
- len = err.read(chb);
- while (len > 0) {
+ while (! inputStream.ready()) {
+ try { Thread.sleep(10); } catch (Throwable th) {}
+ synchronized(this) {
+ if (stopNow)
+ break;
+ }
+ if (System.currentTimeMillis() - now > 20000) {
+ logger.warning("See no point in idling any
longer");
+ break;
+ }
+ }
+ len = inputStream.read(chb);
+ if (len > 0) {
+ chb.flip();
synchronized (out) {
- out.append(chb.position(0).toString());
+ out.append(chb);
}
chb.clear();
- len = err.read(chb);
}
} while (len >= 0);
} catch (Throwable th) {
ex = th;
} finally {
- try { err.close(); } catch (Throwable t) { /*ignore*/ }
+ try { inputStream.close(); } catch (Throwable t) {
/*ignore*/ }
}
if (ex != null)
- throwable = new DmsException(ErrorType.DOC_HANDLING, this,
"Unable to handle document", "Failed to read the error stream from MS Office
Extractor Process", ex);
+ throwable = new DmsException(ErrorType.DOC_HANDLING, this,
"Unable to handle document", "Failed to read the error stream", ex);
logger.finest("Thread "+Thread.currentThread().getName()+":
exit");
}

@@ -283,9 +233,10 @@

/** @brief Retrieve the searchable plain text. */
public byte[] getText() throws DmsException {
- if (++ooRestartCounter >= ooRestartThreshold) {
+ DmsException exception = null;
+ if ((soffice == null) || (++ooRestartCounter >=
ooRestartThreshold)) {
logger.finer("Restarting OpenOffice");
- initOpenOffice();
+ initOO();
}
File tf = null;
FileOutputStream fos = null;
@@ -295,14 +246,8 @@
Process proc = null;

try {
- if (unoPort == null)
- DmsException.throwIt(
- ErrorType.MISSING_ARG,
- this,
- "Missing property:
ch.idok.dmsd.impl.extractor.microsoft.unoport",
- "The property must be set to the port that
openoffice listens to");
// Save the document in a temporary file
- tf = File.createTempFile("Indexer", getDesc().fileExtension);
+ tf = File.createTempFile("Indexer", "." +
getDesc().fileExtension);
fos = new FileOutputStream(tf);
fos.write(raw);
fos.close();
@@ -310,23 +255,22 @@
// Start the timer that kills the extraction process after 3
seconds
// if the extraction fails to produce a result and start the
extraction process
timer = new Timer("InterruptTimer for thread
"+Thread.currentThread().getName());
- String cmd = System.getProperty("java.home")+"/bin/java";
- String cp = System.getProperty("java.class.path");
- String port =
"-Dch.idok.dmsd.impl.extractor.microsoft.unoport="+unoPort;
- String cls =
"ch.idok.dmsd.impl.extractor.microsoft.MSExtractorProcess";
+ String cls = MSExtractorProcess.class.getCanonicalName();
+ //
"ch.idok.dmsd.impl.extractor.microsoft.MSExtractorProcess";
String doc = tf.getCanonicalPath();
String tp = Integer.toString(type);
String level = Setup.getSetup().getLogLevel().toString();
+ ProcessBuilder procBuilder = new ProcessBuilder(new
ProcessArgs(logger, cls, doc,tp, level));
logger.finest("Calling MSExtractor process with\n"+
- cmd+" -classpath "+cp+" "+port+" "+cls+"
"+doc+" "+tp+" "+level);
- ProcessBuilder procBuilder = new ProcessBuilder(cmd,
"-classpath", cp, port, cls, doc,tp, level);
+ procBuilder.command().toString());
procBuilder.directory(null);
proc = procBuilder.start();
- timer.schedule(new InterruptTimerTask(proc), interruptDelay);
// Start the thread for reading the stderr output of the
extraction process
- StdErrReader errOut = new
StdErrReader(proc.getErrorStream());
- errThread = new Thread(errOut, "StdErrReader");
+ InputReader errOut = new InputReader(proc.getErrorStream());
+ errThread = new Thread(errOut, "StdErrReader-getText");
errThread.start();
+ // Start the timeout thread
+ timer.schedule(new ProcessTimeout(proc, logger),
interruptDelay);
// Read the stdout output of the extraction process
is = proc.getInputStream();
int length = readLength(is)+1; // allow for 1 character of
slack
@@ -348,20 +292,29 @@
} while (length > 0);
}
is.close(); is = null;
+ timer.cancel();
logger.finest("Finished reading the input stream,
nchars="+offset);
// Log the stderr output, if any, of the extraction process
- errThread.join(interruptDelay);
- String errStr = errOut.getErrorOutput();
- if (errStr.length() != 0)
- logger.warning("Error stream of the MS Office Extractor
Process:\n"+errStr);
int rval = proc.waitFor();
+ errOut.interrupt();
+ errThread.join(interruptDelay);
+ try {
+ String errStr = errOut.getErrorOutput();
+ if (errStr.length() != 0)
+ logger.warning("Error stream of the MS Office
Extractor Process:\n"+
+ "------- BEGIN ------\n" + errStr +
"\n------- END ------");
+ if (errOut.throwable != null)
+ throw errOut.throwable;
+ } catch (Throwable th) {
+ logger.warning("Failed to retrieve error stream of MS
Office Extractor process\n" + th);
+ }
if (rval != 0)
DmsException.throwIt(ErrorType.DOC_HANDLING, this,
"Cannot handle document.", "MS Office Extractor Process terminated with
status "+rval);
return buf;
} catch (DmsException ex) {
- throw ex;
+ exception = ex;
} catch (Throwable th) {
- DmsException.throwIt(ErrorType.DOC_HANDLING, this,
+ exception = new DmsException(ErrorType.DOC_HANDLING, this,
"Failed to handle document",
Util.stackTraceToString(th), th);
} finally {
if (timer != null)
@@ -393,7 +346,8 @@
dispose();
}
}
- return null;
+ try { killOO(); } catch (Throwable th) {}
+ throw exception;
}

/** @brief Retrieve the modified metadata. */
@@ -417,189 +371,120 @@
private MSOfficeExtractorFactory(int docType) {
type = docType;
}
-
- /** @brief Safely try to kill the OO process */
+
void killOO() {
+ if (soffice == null) {
+ logger.finest("Nothing to do");
+ return;
+ }
+ Timer timer = null;
try {
- if (soffice != null) {
- try {
- soffice.getInputStream().close();
- soffice.getOutputStream().close();
- Timer timer = new Timer("OpenOffice Shutdown Watchdog");
- timer.schedule(new
TimeoutTimerTask(Thread.currentThread()), reconnectDelay);
- desktop.terminate();
- timer.cancel();
- Thread.sleep(destroyDelay);
- } catch (Throwable th) {}
- soffice.destroy();
+ PrintStream command = new PrintStream(soffice.getOutputStream());
+ InputReader errOut = new InputReader(soffice.getErrorStream());
+ Thread errThread = new Thread(errOut, "StdErrReader-killOO");
+ errThread.start();
+ command.write("stop\n".getBytes());
+ command.close();
+ timer = new Timer("Destruction timer for process " + soffice);
+ timer.schedule(new ProcessTimeout(soffice, logger),
processKillTimeout);
+ soffice.waitFor();
+ timer.cancel();
+ errOut.interrupt();
+ errThread.join(interruptDelay);
+ try {
+ String errMsg = errOut.getErrorOutput();
+ if (errMsg.length()!= 0)
+ logger.warning("Error output of the OO control
process:\n" +
+ "------ BEGIN ------\n" + errMsg + "\n------ END
------");
+ if (errOut.throwable != null)
+ throw errOut.throwable;
+ } catch (Throwable th) {
+ logger.finest("Error: OO control process error stream
reader\n" + th);
}
+ logger.finest("OO control process terminated with exit value " +
soffice.exitValue());
} catch (Throwable th) {
- logger.finer("Can't destroy OO process\n"+th);
+ logger.severe("Internal bug detected\n" + th);
} finally {
- desktop = null;
+ if (timer != null)
+ timer.cancel();
+ if (soffice != null)
+ soffice.destroy();
soffice = null;
}
}

/** @brief Initialize connection to OpenOffice. */
- void initOpenOffice() throws DmsException {
+ void initOO() {
+ killOO();
+ Timer timer = null;
try {
- if (desktop != null) {
- try {
- desktop.terminate();
- } catch (Throwable th) {
- logger.finer("Can't terminate OO desktop\n"+th);
- }finally {
- desktop = null;
- }
- }
- if (soffice != null) {
- // Kill soffice if it is already running
- killOO();
- soffice = null;
- }
-
- if (procBuilder == null) {
- String screen =
System.getProperty("ch.idok.dmsd.impl.extractor.microsoft.screen", ":0");
- String tmpDir =
System.getProperty("ch.idok.dmsd.impl.extractor.microsoft.tmpdir", "/tmp");
- unoPort =
System.getProperty("ch.idok.dmsd.impl.extractor.microsoft.unoport");
- if (screen == null)
- DmsException.throwIt(
- ErrorType.MISSING_ARG,
- this,
- "Missing property:
ch.idok.dmsd.impl.extractor.microsoft.screen",
- "The property must be set to the X screen that
openoffice requires");
- if (unoPort == null)
- DmsException.throwIt(
- ErrorType.MISSING_ARG,
- this,
- "Missing property:
ch.idok.dmsd.impl.extractor.microsoft.unoport",
- "The property must be set to the port that
openoffice must listen to");
- String soffice =
System.getProperty("ch.idok.dmsd.impl.extractor.microsoft.soffice",
- "soffice");
- procBuilder = new ProcessBuilder("bash", "-c",
- soffice +
- " -accept='" + connectString(unoPort) + "'" +
- " -headless" +
- " -invisible" +
- " -display " + screen);
- procBuilder = procBuilder.redirectErrorStream(true);
- procBuilder = procBuilder.directory(new File(tmpDir));
- logger.finest("Command for starting OO is
"+procBuilder.command());
- }
-
- // Get the office context
- localContext = Bootstrap.createInitialComponentContext(null);
- XMultiComponentFactory localServiceManager =
localContext.getServiceManager();
- Object unoObj = localServiceManager.createInstanceWithContext(
- "com.sun.star.bridge.UnoUrlResolver", localContext);
- XUnoUrlResolver urlResolver = (XUnoUrlResolver) UnoRuntime
- .queryInterface(XUnoUrlResolver.class, unoObj);
- unoObj = null;
- Throwable connectEx = null;
- logger.finest("Starting OO process");
+ String cls = OOProcess.class.getCanonicalName();
+ String level = Setup.getSetup().getLogLevel().toString();
+ ProcessBuilder procBuilder = new ProcessBuilder(new
ProcessArgs(logger, cls, level));
+ logger.finest("Calling OO control process with\n"+
+ procBuilder.command().toString());
+ procBuilder.directory(null);
soffice = procBuilder.start();
- Thread.sleep(startupDelay);
- Timer timer = new Timer("OpenOffice Connection Timeout
Watchdog");
- TimeoutTimerTask watchdog = new
TimeoutTimerTask(Thread.currentThread());
- for (int retry = 30; (unoObj == null) && (retry > 0); --retry) {
- try {
- timer.schedule(watchdog , reconnectDelay);
- unoObj = urlResolver.resolve("uno:" +
connectString(unoPort) +
- "StarOffice.ServiceManager");
- } catch (Throwable ex) {
- logger.finest("Can't connect to OO, "+retry+" retries
remaining\n"+ex);
- connectEx = ex;
- } finally {
- watchdog.cancel();
- }
- if (unoObj == null)
- Thread.sleep(reconnectDelay * (30 - retry));

- }
+ InputReader errOut = new InputReader(soffice.getErrorStream());
+ Thread errThread = new Thread(errOut, "StdErrReader-initOO");
+ errThread.start();
+ OutputStream command = soffice.getOutputStream();
+ logger.finest("Sending start command...");
+ command.write("start\n".getBytes());
+ command.flush();
+ Thread.yield();
+ timer = new Timer("Destruction timer for process " + soffice);
+ timer.schedule(new ProcessTimeout(soffice, logger),
processStartupTimeout);
+ logger.finest("Waiting for answer...");
+ String answer = OOProcess.readLine(soffice.getInputStream());
timer.cancel();
-
- if (unoObj == null) {
- InputStream pOut = null;
- StringBuffer sb = new StringBuffer();
- try {
- pOut = soffice.getInputStream();
- while (true) {
- if (pOut.available() == 0)
- break;
- int ch = pOut.read();
- if (ch == -1)
- break;
- sb.append((char) ch);
- }
- killOO();
- int retval = soffice.exitValue();
- logger.warning("OpenOffice exited with return value " +
retval + "\n" + sb);
- } catch (Throwable th) {
- logger.severe("Unexpected exception while killing OO " +
th + "\nOO output\n" + sb);
- } finally {
- soffice = null;
- sb = null;
- }
- DmsException.throwIt(ErrorType.TOOL_ACCESS, this,
- "Cannot connect to OpenOffice", "", connectEx);
+ logger.finest("Received answer " + answer);
+ errOut.interrupt();
+ errThread.join(interruptDelay);
+ try {
+ String errMsg = errOut.getErrorOutput();
+ if (errMsg.length()!= 0)
+ logger.warning("Error output of the OO control
process:\n" +
+ "------ BEGIN ------\n" + errMsg + "\n------ END
------");
+ if (errOut.throwable != null)
+ throw errOut.throwable;
+ } catch (Throwable th) {
+ logger.warning("Error: OO control process error stream
reader\n" + th);
}
-
- serviceManager = (XMultiComponentFactory) UnoRuntime
- .queryInterface(XMultiComponentFactory.class, unoObj);
- unoObj = serviceManager.createInstanceWithContext(
- "com.sun.star.frame.Desktop", localContext);
- desktop = (XDesktop) UnoRuntime.queryInterface(XDesktop.class,
- unoObj);
- if (desktop == null) {
+ if ((answer == null) || ! answer.equals("started")) {
+ logger.warning("Did not receive correct answer from OO
control process!");
killOO();
- soffice = null;
- DmsException.throwIt(
- ErrorType.TOOL_ACCESS,
- this,
- "Cannot connect to OpenOffice",
- "Unable to retrieve desktop object");
+ } else {
+ logger.finest("OO control has been started sucessfully");
}
- ooRestartCounter = 0;
- logger.fine("Established connection to OpenOffice");
- } catch (DmsException ex) {
- throw ex;
+ ooRestartCounter = 0;
} catch (Throwable th) {
- DmsException.throwIt(ErrorType.TOOL_ACCESS, this,
- "Cannot connect to OpenOffice", "", th);
- }
+ logger.severe("Internal bug detected\n" +
Util.stackTraceToString(th));
+ if (timer != null)
+ timer.cancel();
+ if (soffice != null)
+ soffice.destroy();
+ soffice = null;
+ }
}

- /** @brief Return the OpenOffice connection method string */
- static String connectString(String unoPort) {
- return "socket,host=localhost,port=" + unoPort +
",tcpNoDelay=1;urp;";
- //return "pipe,name=indexer-pipe-" + unoPort + ";urp;";
- }
-
/** @see ContentExtractorFactory.initialize() */
public void initialize(Config config) throws DmsException {
try {
logger = config.getLogger("dmsd.impl.extractor.microsoft");
logger.finest("Initializing extractor factory for MS Office
Documents.");
- interruptDelay
=Long.parseLong(System.getProperty("ch.idok.dmsd.impl.extractor.microsoft.interruptdelay",
+ interruptDelay =
Long.parseLong(System.getProperty("ch.idok.dmsd.impl.extractor.microsoft.interruptdelay",
Long.toString(interruptDelay)));
- int loop=5;
- do {
- try {
- initOpenOffice();
- break;
- } catch (Throwable th) {
- if (loop == 1)
- throw th;
- loop--;
- logger.warning("Couldn't initialize Open Office, " + loop
- + "retries remaining");
- Thread.sleep(1000);
- }
- } while(true);
+ processKillTimeout =
Long.parseLong(System.getProperty("ch.idok.dmsd.impl.extractor.microsoft.killtimeout",
+ Long.toString(processKillTimeout)));
+ processStartupTimeout =
Long.parseLong(System.getProperty("ch.idok.dmsd.impl.extractor.microsoft.starttimeout",
+ Long.toString(processStartupTimeout)));
+ initOO();
Runtime.getRuntime().addShutdownHook(new Thread (new Runnable() {
@Override
public void run() {
- killOO();
+ if (soffice != null)
+ soffice.destroy();
}
}));
// Register content extractor factories
@@ -612,10 +497,13 @@
}
} catch (NumberFormatException ex) {
DmsException.throwIt(ErrorType.BAD_ARG, this, "Initialization
error.",
- "ch.idok.dmsd.impl.extractor.microsoft.interruptdelay
property is no long value.",
+ "One of the following properties has no long value:\n" +
+ "ch.idok.dmsd.impl.extractor.microsoft.interruptdelay\n"
+
+ "ch.idok.dmsd.impl.extractor.microsoft.starttimeout\n" +
+ "ch.idok.dmsd.impl.extractor.microsoft.killtimeout",
ex);
} catch (Throwable th) {
- DmsException.throwIt(ErrorType.INTERNAL, this, "Bug detected",
"", th);
+ DmsException.throwIt(ErrorType.INTERNAL, this, "Bug detected",
Util.stackTraceToString(th), th);
}
}


Added: trunk/java/ch/idok/dmsd/impl/extractor/microsoft/OOProcess.java
==============================================================================
--- (empty file)
+++ trunk/java/ch/idok/dmsd/impl/extractor/microsoft/OOProcess.java Thu
Mar 19 16:14:06 2009
@@ -0,0 +1,378 @@
+/*
+ * Copyright (C) 2006-2008 iDok team.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
+ */
+
+package ch.idok.dmsd.impl.extractor.microsoft;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Timer;
+import java.util.logging.ConsoleHandler;
+import java.util.logging.Handler;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import ch.idok.common.errorhandling.DmsException;
+import ch.idok.common.errorhandling.ErrorType;
+import ch.idok.common.errorhandling.Util;
+
+import com.sun.star.bridge.XUnoUrlResolver;
+import com.sun.star.comp.helper.Bootstrap;
+import com.sun.star.frame.XDesktop;
+import com.sun.star.lang.XMultiComponentFactory;
+import com.sun.star.uno.UnoRuntime;
+import com.sun.star.uno.XComponentContext;
+
+/**
+ * @brief Startup/Kill Open Office Instance
+ */
+public final class OOProcess {
+
+ /**
+ * @brief The logger instance for this class.
+ */
+ private static Logger logger;
+
+ /**
+ * @brief The OpenOffice desktop object.
+ */
+ private XDesktop desktop;
+
+ /**
+ * @brief Process builder for OpenOffice.
+ */
+ private ProcessBuilder procBuilder;
+
+ /**
+ * @brief The OpenOffice process.
+ */
+ private Process soffice;
+
+ /**
+ * @brief Delay in ms after terminating the OO desktop
+ */
+ private int desktopTerminationDelay = 1000;
+
+ /**
+ * @brief Delay in ms after terminating the OO desktop
+ */
+ private int processKillDelay = 2000;
+
+ /**
+ * @brief Delay in ms after starting the OO process
+ */
+ private int processStartupDelay = 2000;
+
+ /**
+ * @brief Delay in ms after a failed initial connection attempt
+ */
+ private int initialConnectDelay = 500;
+
+ /**
+ * @brief X11 screen for OO
+ */
+ private String screen = ":0";
+
+ /**
+ * @brief Directory for temporary files for OO text extraction
+ */
+ private static String tmpDir = "/tmp";
+
+ /**
+ * @brief Name of the OO pipe connection
+ *
+ * If the first char isn't /, tmpDir/ will be prepended to pipeName
+ */
+ private static String pipeName = "indexer-msextractor-pipe";
+
+ /**
+ * @brief The OO executable
+ */
+ private static String executable = "soffice";
+
+ public static String readLine(InputStream is) throws IOException {
+ StringBuilder sb = new StringBuilder();
+ int ch = is.read();
+ boolean valid = false;
+ while (ch >= 0) {
+ char c = (char)ch;
+ valid = true;
+ if (c == '\n')
+ break;
+ sb.append(c);
+ ch = is.read();
+ }
+ if (valid)
+ return sb.toString();
+ return null;
+ }
+
+ /**
+ * @brief Initialize OO and parse commands from stdin
+ * @param args 0 - log level
+ */
+ public static void main(String[] args) {
+ int retval = 1;
+ Level level = Level.parse(args[0]);
+ logger = Logger.getLogger("dmsd.impl.extractor.microsoft");
+ try {
+ Handler handler = new ConsoleHandler();
+ handler.setLevel(level);
+ logger.addHandler(handler);
+ logger.setLevel(level);
+ logger.setUseParentHandlers(false);
+ logger.finest("test1 //// loglevel="+args[0]);
+
+ retval = 2;
+ OOProcess ooproc = new OOProcess();
+ retval = 3;
+ String command;
+ try {
+ do {
+ retval = 5;
+ command = readLine(System.in);
+ logger.finest("Received command " + command);
+ retval = 6;
+ if (command == null) {
+ logger.warning("Error: input stream was closed,
terminating!");
+ break; // input stream was closed
+ }
+ retval = 7;
+ if (command.equals("start")) {
+ boolean success = ooproc.start();
+ if (success) {
+ logger.finest("Sending ack");
+ System.out.write("started\n".getBytes());
+ } else {
+ logger.finest("Sending nack");
+ System.out.println("failed");
+ }
+ System.out.flush();
+ Thread.sleep(5000);
+ continue;
+ }
+ retval = 8;
+ if (command.equals("stop")) {
+ break;
+ }
+ logger.warning("Error: unknown command");
+ } while(true);
+ retval = 10;
+ } catch (IOException ex) {
+ logger.warning("Exception while processing Indexer
commands\n"+ex);
+ }
+ ooproc.stop();
+ retval = 0;
+ logger.finest("Terminating with retval="+retval);
+ System.exit(retval);
+ } catch (Throwable th) {
+ logger.warning("OOProcess failed!\n" + Util.getStackTrace(th));
+ }
+ System.exit(retval);
+ }
+
+ public static String getConnectString(String pipeName) {
+ return "pipe,name=" + pipeName + ";urp;";
+ }
+
+ public static String getTmpDir() throws DmsException, IOException {
+ String td =
System.getProperty("ch.idok.dmsd.impl.extractor.microsoft.tmpdir", tmpDir);
+ File dir = new File(td);
+ if (! dir.isDirectory())
+ DmsException.throwIt(ErrorType.BAD_ARG, dir, "Invalid directory
for text extraction",
+ "ch.idok.dmsd.impl.extractor.microsoft.tmpdir property value
is invalid");
+ return dir.getCanonicalPath();
+ }
+
+ public static String getPipeName(String tmpDir) throws DmsException {
+ String pn =
System.getProperty("ch.idok.dmsd.impl.extractor.microsoft.pipeName",
pipeName);
+ if (pn.charAt(0) == '/')
+ DmsException.throwIt(ErrorType.BAD_ARG, tmpDir, "Invalid name
for OO connection",
+ "ch.idok.dmsd.impl.extractor.microsoft.pipeName property
value is invalid");
+ return pn;
+ }
+
+ private OOProcess() {
+ try {
+ executable =
System.getProperty("ch.idok.dmsd.impl.extractor.microsoft.soffice",
executable);
+ if (! new File(executable).canExecute())
+ DmsException.throwIt(ErrorType.BAD_ARG, this, "Invalid
executable path",
+ "ch.idok.dmsd.impl.extractor.microsoft.soffice property
value is invalid");
+ tmpDir = getTmpDir();
+ pipeName = getPipeName(tmpDir);
+ screen =
System.getProperty("ch.idok.dmsd.impl.extractor.microsoft.screen", screen);
+ desktopTerminationDelay = Integer.parseInt(
+
System.getProperty("ch.idok.dmsd.impl.extractor.microsoft.desktoptTerminationDelay",
+ Integer.toString(processKillDelay)));
+ processKillDelay = Integer.parseInt(
+
System.getProperty("ch.idok.dmsd.impl.extractor.microsoft.processKillDelay",
+ Integer.toString(processKillDelay)));
+ processStartupDelay = Integer.parseInt(
+
System.getProperty("ch.idok.dmsd.impl.extractor.microsoft.processStartupDelay",
+ Integer.toString(processStartupDelay)));
+ initialConnectDelay = Integer.parseInt(
+
System.getProperty("ch.idok.dmsd.impl.extractor.microsoft.initialConnectDelay",
+ Integer.toString(initialConnectDelay)));
+ procBuilder = new ProcessBuilder(executable,
+ "-accept=" + getConnectString(pipeName),
+ "-headless",
+ "-norestore",
+ "-nodefault");
+ // "-display ", screen);
+ procBuilder = procBuilder.redirectErrorStream(true);
+ procBuilder = procBuilder.directory(new File(tmpDir));
+ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
+ @Override
+ public void run() {
+ stop();
+ }
+ }));
+ } catch (Throwable th) {
+ logger.warning("Unable to set all properties");
+ }
+
+ }
+
+ /**
+ * @brief Stop Open Office
+ */
+ void stop() {
+ if (desktop != null) {
+ logger.finest("Terminating desktop");
+ try {
+ desktop.terminate();
+ Thread.sleep(desktopTerminationDelay);
+ } catch (Throwable th) {
+ logger.finer("Can't terminate OO desktop\n"+th);
+ }finally {
+ desktop = null;
+ }
+ }
+ if (soffice != null) {
+ logger.finest("Closing streams and killing OO process");
+ try {
+ try { soffice.getInputStream().close(); } catch (Throwable
th) {}
+ try { soffice.getOutputStream().close(); } catch (Throwable
th) {}
+ } finally {
+ soffice.destroy();
+ try { Thread.sleep(processKillDelay); } catch (Throwable th)
{}
+ soffice = null;
+ logger.finest("Killed OO process!");
+ }
+ }
+ }
+
+ /**
+ * @brief Start Open Office
+ *
+ * @return true if the startup succeeds
+ */
+ boolean start() {
+ try {
+ logger.finest("Stopping old OO process, if any...");
+ stop();
+
+ // Get the OO context
+ logger.finest("Get OO context");
+ XComponentContext localContext =
Bootstrap.createInitialComponentContext(null);
+ XMultiComponentFactory localServiceManager =
localContext.getServiceManager();
+ Object unoObj = localServiceManager.createInstanceWithContext(
+ "com.sun.star.bridge.UnoUrlResolver", localContext);
+ XUnoUrlResolver urlResolver = (XUnoUrlResolver) UnoRuntime
+ .queryInterface(XUnoUrlResolver.class, unoObj);
+ unoObj = null;
+
+ // Start OO process
+ Throwable connectEx = null;
+ logger.finest("Starting OO process\n" +
procBuilder.command().toString());
+ soffice = procBuilder.start();
+
+ // Initial OO connection
+ logger.finest("Trying to establish connection...");
+ try {
+ Thread.sleep(processStartupDelay);
+ Timer timer = new Timer("OpenOffice Connection Timeout
Watchdog");
+ final int maxRetries = 5;
+ for (int retry = maxRetries; (unoObj == null) && (retry >
0); --retry) {
+ ThreadTimeout watchdog = new
ThreadTimeout(Thread.currentThread(), logger);
+ try {
+ timer.schedule(watchdog , initialConnectDelay);
+ String url = "uno:" + getConnectString(pipeName) +
"StarOffice.ServiceManager";
+ logger.finest("Attempting connecting to " + url);
+ unoObj = urlResolver.resolve(url);
+ } catch (Throwable ex) {
+ logger.finest("Can't connect to OO, "+retry+"
retries remaining\n"+ex);
+ connectEx = ex;
+ } finally {
+ watchdog.cancel();
+ }
+ if (unoObj == null)
+ Thread.sleep(initialConnectDelay * (maxRetries -
retry));
+ }
+ timer.cancel();
+ } catch (Throwable th) {
+ logger.warning("Failed to establish initial OO connection\n"
+ th);
+ }
+
+ if (unoObj == null) {
+ InputStream pOut = null;
+ StringBuffer sb = new StringBuffer();
+ try {
+ pOut = soffice.getInputStream();
+ while (true) {
+ if (pOut.available() == 0)
+ break;
+ int ch = pOut.read();
+ if (ch == -1)
+ break;
+ sb.append((char) ch);
+ }
+ soffice.destroy();
+ try { Thread.sleep(processKillDelay); } catch (Throwable
th) {}
+ int retval = soffice.exitValue();
+ logger.warning("OpenOffice exited with return value " +
retval + "\n" + connectEx + "\n" + sb);
+ } catch (Throwable th) {
+ logger.severe("Unexpected exception while killing OO " +
th + "\nOO output\n" + sb);
+ } finally {
+ soffice = null;
+ sb = null;
+ }
+ return false;
+ }
+
+ // Create OO desktop object
+ XMultiComponentFactory serviceManager = (XMultiComponentFactory)
UnoRuntime
+ .queryInterface(XMultiComponentFactory.class, unoObj);
+ unoObj = serviceManager.createInstanceWithContext(
+ "com.sun.star.frame.Desktop", localContext);
+ desktop = (XDesktop) UnoRuntime.queryInterface(XDesktop.class,
+ unoObj);
+ if (desktop == null) {
+ stop();
+ logger.warning("Unable to create desktop object");
+ return false;
+ }
+ logger.fine("Established connection to OpenOffice");
+ return true;
+ } catch (Throwable th) {
+ logger.severe("Bug detected\n" + Util.stackTraceToString(th));
+ }
+ return false;
+ }
+}

Added: trunk/java/ch/idok/dmsd/impl/extractor/microsoft/ProcessArgs.java
==============================================================================
--- (empty file)
+++ trunk/java/ch/idok/dmsd/impl/extractor/microsoft/ProcessArgs.java Thu
Mar 19 16:14:06 2009
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2006-2008 iDok team.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
+ */
+
+package ch.idok.dmsd.impl.extractor.microsoft;
+
+import java.util.LinkedList;
+import java.util.Properties;
+import java.util.logging.Logger;
+
+public final class ProcessArgs extends LinkedList<String> {
+ private static final long serialVersionUID = 1L;
+
+ public ProcessArgs(Logger logger, String... args) {
+ super();
+ Properties prop = System.getProperties();
+ add(System.getProperty("java.home")+"/bin/java");
+ add("-classpath");
+ add(prop.getProperty("java.class.path"));
+ for (Object keyObj: prop.keySet()) {
+ String key = (String)keyObj;
+ if (key.startsWith("ch.idok")) {
+ add("-D" + key + "=" + prop.getProperty(key));
+ }
+ }
+ for (String arg: args)
+ add(arg);
+ }
+}

Added: trunk/java/ch/idok/dmsd/impl/extractor/microsoft/ProcessTimeout.java
==============================================================================
--- (empty file)
+++ trunk/java/ch/idok/dmsd/impl/extractor/microsoft/ProcessTimeout.java
Thu Mar 19 16:14:06 2009
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2006-2008 iDok team.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
+ */
+
+package ch.idok.dmsd.impl.extractor.microsoft;
+
+import java.util.TimerTask;
+import java.util.logging.Logger;
+
+class ProcessTimeout extends TimerTask {
+ private Process process;
+ private Logger logger;
+ ProcessTimeout(Process proc, Logger log) {
+ process = proc;
+ logger = log;
+ }
+
+ @Override
+ public void run() {
+ process.destroy();
+ logger.fine("Killed process " + process + " due to lack of
progress");
+ }
+}

Added: trunk/java/ch/idok/dmsd/impl/extractor/microsoft/ThreadTimeout.java
==============================================================================
--- (empty file)
+++ trunk/java/ch/idok/dmsd/impl/extractor/microsoft/ThreadTimeout.java Thu
Mar 19 16:14:06 2009
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2006-2008 iDok team.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
+ */
+
+package ch.idok.dmsd.impl.extractor.microsoft;
+
+import java.util.TimerTask;
+import java.util.logging.Logger;
+
+/**
+ * @brief Timer task for interrupting a thread
+ */
+class ThreadTimeout extends TimerTask {
+ private Thread snail;
+ private Logger logger;
+ ThreadTimeout(Thread thread, Logger log) {
+ snail = thread;
+ logger = log;
+ }
+
+ @Override
+ public void run() {
+ snail.interrupt();
+ logger.fine("Interrupted thread " + snail + "due to lack of
progress");
+ }
+}

Added: trunk/lib/java_uno.jar
==============================================================================
Binary file. No diff available.

Added: trunk/lib/unoloader.jar
==============================================================================
Binary file. No diff available.

Modified: trunk/sites/psi/scripts/admin/dmsd/build.xml
==============================================================================
--- trunk/sites/psi/scripts/admin/dmsd/build.xml (original)
+++ trunk/sites/psi/scripts/admin/dmsd/build.xml Thu Mar 19 16:14:06
2009
@@ -56,12 +56,14 @@
<property name="uno-2" value="${unodir}/ridl.jar"/>
<property name="uno-3" value="${unodir}/juh.jar"/>
<property name="uno-4" value="${unodir}/jurt.jar"/>
+ <property name="uno-5" value="${unodir}/unoloader.jar"/>
+ <property name="uno-6" value="${unodir}/java_uno.jar"/>
<property name="exif" value="${distlib}/exif.jar"/>
<property name="tar" value="${distlib}/tar.jar"/>
<property name="htmlparser" value="${distlib}/htmlparser.jar"/>

<!-- Library sets -->
- <property name="uno-all" value="${uno-1}:${uno-2}:${uno-3}:${uno-4}"/>
+ <property name="uno-all"
value="${uno-1}:${uno-2}:${uno-3}:${uno-4}:${uno-5}:${uno-6}"/>
<property name="lib-all"
value="${jsvn}:${lucene}:${pdf}:${fontbox}:${bcprov}:${bcmail}:${uno-all}:${exif}:${tar}:${htmlparser}"/>

<target name="init" description="Initialize timestamp properties and
create necessary directories">

Modified: trunk/sites/psi/scripts/admin/dmsd/dist/dmsd.config
==============================================================================
--- trunk/sites/psi/scripts/admin/dmsd/dist/dmsd.config (original)
+++ trunk/sites/psi/scripts/admin/dmsd/dist/dmsd.config Thu Mar 19 16:14:06
2009
@@ -9,11 +9,9 @@
# The screen for Xvfb and Open Office
DUMMY_DISPLAY=:2

-# Path to the Open Office "soffice" executable
+# Path to the Open Office "soffice" executable and libraries
SOFFICE_PATH=/opt/openoffice.org3/program/soffice
-
-# The port Open Office should listen to
-OOUNO_PORT=2705
+SOFFICE_LIBS=/opt/openoffice.org/ure/lib

# Delay in milliseconds before the OO Extraction Process is killed
OOKILLER_DELAY=20000

Modified: trunk/sites/psi/scripts/admin/dmsd/dist/run-dmsd.sh
==============================================================================
--- trunk/sites/psi/scripts/admin/dmsd/dist/run-dmsd.sh (original)
+++ trunk/sites/psi/scripts/admin/dmsd/dist/run-dmsd.sh Thu Mar 19 16:14:06
2009
@@ -68,7 +68,7 @@
while true; do
rm -f /tmp/lucene-*.lock
info "Starting dmsd with loglevel ${NEW_LOG_LEVEL}"
- HOME="$PROG_HOME" ${JAVA} \
+ HOME="$PROG_HOME" LD_LIBRARY_PATH=${SOFFICE_LIBS} ${JAVA} \
-server \
-XX:NewRatio=2 \
-Xmx${VM_MAX_HEAP_SIZE} \
@@ -77,7 +77,6 @@
-Dcom.sun.management.jmxremote.ssl=false \
-Dcom.sun.management.jmxremote.port=${JMX_PORT} \
-Dch.idok.dmsd.impl.extractor.microsoft.screen=${DUMMY_DISPLAY} \
- -Dch.idok.dmsd.impl.extractor.microsoft.unoport=${OOUNO_PORT} \
-Dch.idok.dmsd.impl.extractor.microsoft.soffice=${SOFFICE_PATH} \

-Dch.idok.dmsd.impl.extractor.microsoft.interruptdelay=${OOKILLER_DELAY} \
-Dch.idok.dmsd.impl.extractor.mimetypes=${MIME_TYPES_FILE} \



  • [idok-commit] idok commit r347 - in trunk: java/ch/idok/dmsd/impl/extractor/microsoft lib sites/psi/scripts/admin/dmsd sites/psi/scripts/admin/dmsd/dist, AFS account Stadler Hans Christian, 03/19/2009

Archive powered by MHonArc 2.6.19.

Top of Page