monit-general
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Monit problem with tomcat


From: Martin Pala
Subject: Re: Monit problem with tomcat
Date: Wed, 25 Apr 2012 20:52:44 +0200

You can try the pattern based process test, for example something like this:

--8<--
check process fisheye_test matching "/eng/apps/atlassian/fecru/fecru_test_current/fisheyeboot.jar"
--8<--

You can test the pattern using CLI like this:

--8<--
monit procmatch "/eng/apps/atlassian/fecru/fecru_test_current/fisheyeboot.jar"
--8<--

Regards,
Martin


On Apr 24, 2012, at 5:26 PM, Chad Neal wrote:

 
I am hoping one of you monit gurus can help me. I am using monit to monitor a tomcat process and having some issues getting actions monit takes to succeed. The tomcat app I am monitoring is a product called Fisheye from Atlassian and comes bundled with a version of tomcat that doesn’t write a pid file. To that create the pid file I use pgrep to locate the running process, collect its pid, and write that into to a file. Using this method Monit is able to tell when my app is running or not. The problem happens if I ask Monit to stop or start tomcat. When this happens monit attempts to stop the wrong pid or at start doesn’t seem to read the pid file I write to test if the start was successful. I am attaching a lot of config and log data for your review. Thanks much for any help you can provide.
 
 
Monitrc:
set daemon  60              # check services at 1-minute intervals
set logfile /apps/monit/var/log/monit.log
set idfile /apps/monit/var/.monit.id
set statefile /apps/monit/var/.monit.state
set mailserver mailhost.xxx.com               # primary mailserver
## --8<--
set mail-format {
reply-to: address@hidden
subject: ETG Monit alert --  $EVENT $SERVICE
message: $EVENT Service $SERVICE
Date:        $DATE
Action:      $ACTION
Host:        $HOST
Description: $DESCRIPTION
 
This email was generated by the ETG Monit service.
}
## --8<--
set alert address@hidden only on { instance } # receive all instance alerts
set httpd port 5280 and
    allow localhost        # allow localhost to connect to the server and
    allow 169.143.0.0/16
    allow xxx:xxx      # required for command line use
    allow @etgtools        # allow etg admin team access / also allows access to monit.d directory
    allow @bbaowner        # allow bamboo agent owners control over agents
 
check directory monit.d with path /apps/monit/etc/monit.d
  if changed timestamp then exec "/apps/monit/bin/monit reload"
include /apps/monit/etc/monit.d/*.monit
#
 
Tomcat.monit (/apps/monit/etc/monit.d/tomcat.monit)
check process fisheye_test with pidfile /eng/data/fecru_test/fecru.pid
        alert address@hidden
        start program = "/etc/init.d/fecru_test start" with timeout 300 seconds
        stop program  = "/etc/init.d/fecru_test stop"
 
 
init.d script:
RUN_AS_USER=etgfecru
unset CATALINA_OPTS
CATALINA_HOME=/eng/apps/atlassian/fecru/fecru_test_current
export FISHEYE_INST=/eng/data/fecru_test
export FISHEYE_OPTS="-d64 -Xms6g -Xmx6g -XX:MaxPermSize=768m -Dfile.encoding=UTF-8"
export JAVA_HOME=/eng/apps/oracle/java/jdk/fecru_current
PIDFILE="$FISHEYE_INST/fecru.pid"
 
start() {
        echo "Starting FECRU: "
        if [ "$USER" == "root" ]; then
                su $RUN_AS_USER -c "$CATALINA_HOME/bin/start.sh"
        elif [ "$USER" == "$RUN_AS_USER" ]; then
                $CATALINA_HOME/bin/start.sh
        else
                echo "You must be root or $RUN_AS_USER to start fecru\n"
        fi
        # sleep 20s
        pid=`pgrep -f "java -DSERVER_INFO=FECRU_TEST_15160"`
        if [ $pid ]; then
                echo $pid>${PIDFILE}
        fi
        echo "done."
}
stop() {
        echo "Shutting down FECRU: "
        if [ "$USER" == "root" ]; then
                su $RUN_AS_USER -c "$CATALINA_HOME/bin/stop.sh"
        elif [ "$USER" == "$RUN_AS_USER" ]; then
                $CATALINA_HOME/bin/stop.sh
        else
                echo "You must be root or $RUN_AS_USER to stop fecru\n"
        fi
        if [ -e ${PIDFILE} ]; then
                rm ${PIDFILE}
                echo "rm ${PIDFILE}"
        fi
        echo "done."
}
status(){
        pid=`pgrep -f "java -DSERVER_INFO=FECRU_TEST_15160"`
        if [ ! $pid ]; then
          echo "FECRU Test is not running"
        else
          echo "FECRU Test is running with a pid $pid"
        fi
}
pid(){
        pid=`pgrep -f "java -DSERVER_INFO=FECRU_TEST_15160"`
        if [ ! $pid ]; then
          echo "-1"
        else
          echo "$pid"
        fi
}
backup() {
        echo "backing up FECRU: "
        if [ "$USER" == "root" ]; then
                su $RUN_AS_USER -c "$CATALINA_HOME/bin/fisheyectl.sh backup"
        elif [ "$USER" == "$RUN_AS_USER" ]; then
                        $CATALINA_HOME/bin/fisheyectl.sh backup
        else
                        echo "You must be root or $RUN_AS_USER to backup fecru\n"
        fi
        echo "done."
}
restore() {
       echo "restore FECRU: "
                if [ -f $1 ]; then
                        if [ "$USER" == "root" ]; then
                                su $RUN_AS_USER -c "$CATALINA_HOME/bin/fisheyectl.sh restore --force -f $1"
                        elif [ "$USER" == "$RUN_AS_USER" ]; then
                $CATALINA_HOME/bin/fisheyectl.sh restore --force -f $1
                        else
                echo "You must be root or $RUN_AS_USER to restore fecru\n"
                        fi
                else
                        echo "The file $1 does not exist. Restore will not be completed."
                fi
        echo "done."
}
 
case "$1" in
        start)
                start
                ;;
        stop)
                stop
                ;;
                status)
                                status
                                ;;
                pid)
                                pid
                                ;;
        restart)
                stop
                sleep 10
                start
                ;;
        backup)
                backup
                ;;
        restore)
                restore $2
                ;;
        *)
        echo "Usage: $0 {start|stop|status|pid|restart|backup|restore <filename>}"
esac
exit 0
 
Running tomcat info:
address@hidden /eng/apps/atlassian/fecru
218 > cat /eng/data/fecru_test/fecru.pid
22148
address@hidden /eng/apps/atlassian/fecru
219 > ps -ef | grep -i TEST
etgfecru 22148     1 31 09:11 pts/1    00:00:44 /eng/apps/oracle/java/jdk/fecru_current/bin/java -DSERVER_INFO=FECRU_TEST_15160 -Xms6g -Xmx6g -XX:MaxPermSize=768m -Dfile.encoding=UTF-8 -XX:MaxNewSize=128m -Dfisheye.library.path= -Dfisheye.inst=/eng/data/fecru_test -Djava.awt.headless=true -Djava.endorsed.dirs=/eng/apps/atlassian/fecru/fecru_test_current/lib/endorsed -jar /eng/apps/atlassian/fecru/fecru_test_current/fisheyeboot.jar start
etgfecru 22531 13063  0 09:13 pts/1    00:00:00 grep -i test
address@hidden /eng/apps/atlassian/fecru
220 > ./fecru_test.init status
FECRU Test is running with a pid 22148
 
 
Now when attempting to stop tomcat this is found in the monit log files:
[MDT Apr 24 09:15:58] info     : 'fisheye_test' stop on user request
[MDT Apr 24 09:15:58] info     : monit daemon at 22396 awakened
[MDT Apr 24 09:15:58] info     : Awakened by User defined signal 1
[MDT Apr 24 09:15:58] info     : 'fisheye_test' stop: /eng/apps/atlassian/fecru/fecru_test.init
[MDT Apr 24 09:15:58] debug    : monit: pidfile '/eng/data/fecru_test/fecru.pid' does not exist
[MDT Apr 24 09:15:58] debug    : 'fisheye_test' monitoring disabled
[MDT Apr 24 09:15:58] info     : 'fisheye_test' stop action done
 
What is odd is the pid file was removed however the app remains running:
address@hidden /eng/apps/atlassian/fecru
223 > cat /eng/data/fecru_test/fecru.pid
cat: /eng/data/fecru_test/fecru.pid: No such file or directory
address@hidden /eng/apps/atlassian/fecru
224 > ./fecru_test.init status
FECRU Test is running with a pid 22148
 
Now after using the same init script to stop tomcat I enable monitoring for tomcat and this is found in the logs:
[MDT Apr 24 09:20:13] debug    : monit: pidfile '/eng/data/fecru_test/fecru.pid' does not exist
[MDT Apr 24 09:20:13] error    : 'fisheye_test' process is not running
[MDT Apr 24 09:20:13] debug    : -------------------------------------------------------------------------------
[MDT Apr 24 09:20:13] debug    :     /apps/monit/bin/monit [0x418653]
[MDT Apr 24 09:20:13] debug    :     /apps/monit/bin/monit(LogError+0x9f) [0x418e0f]
[MDT Apr 24 09:20:13] debug    :     /apps/monit/bin/monit(Event_post+0x429) [0x415e79]
[MDT Apr 24 09:20:13] debug    :     /apps/monit/bin/monit(check_process+0xa1) [0x425321]
[MDT Apr 24 09:20:13] debug    :     /apps/monit/bin/monit(validate+0x1f4) [0x4257d4]
[MDT Apr 24 09:20:13] debug    :     /apps/monit/bin/monit [0x41265d]
[MDT Apr 24 09:20:13] debug    :     /apps/monit/bin/monit(main+0x4df) [0x412f2f]
[MDT Apr 24 09:20:13] debug    :     /lib64/libc.so.6(__libc_start_main+0xf4) [0x3c0b41d994]
[MDT Apr 24 09:20:13] debug    :     /apps/monit/bin/monit [0x409a19]
[MDT Apr 24 09:20:13] debug    : -------------------------------------------------------------------------------
[MDT Apr 24 09:20:13] info     : 'fisheye_test' trying to restart
[MDT Apr 24 09:20:13] debug    : monit: pidfile '/eng/data/fecru_test/fecru.pid' does not exist
[MDT Apr 24 09:20:13] debug    : monit: pidfile '/eng/data/fecru_test/fecru.pid' does not exist
[MDT Apr 24 09:20:13] info     : 'fisheye_test' start: /eng/apps/atlassian/fecru/fecru_test.init
[MDT Apr 24 09:20:13] debug    : monit: pidfile '/eng/data/fecru_test/fecru.pid' does not exist
[MDT Apr 24 09:20:13] debug    : monit: pidfile '/eng/data/fecru_test/fecru.pid' does not exist
[MDT Apr 24 09:21:43] error    : 'fisheye_test' failed to start
[MDT Apr 24 09:21:43] debug    : -------------------------------------------------------------------------------
[MDT Apr 24 09:21:43] debug    :     /apps/monit/bin/monit [0x418653]
[MDT Apr 24 09:21:43] debug    :     /apps/monit/bin/monit(LogError+0x9f) [0x418e0f]
[MDT Apr 24 09:21:43] debug    :     /apps/monit/bin/monit(Event_post+0x429) [0x415e79]
[MDT Apr 24 09:21:43] debug    :     /apps/monit/bin/monit [0x413e13]
[MDT Apr 24 09:21:43] debug    :     /apps/monit/bin/monit(control_service+0x137) [0x4140c7]
[MDT Apr 24 09:21:43] debug    :     /apps/monit/bin/monit [0x415774]
[MDT Apr 24 09:21:43] debug    :     /apps/monit/bin/monit(Event_post+0x46a) [0x415eba]
[MDT Apr 24 09:21:43] debug    :     /apps/monit/bin/monit(check_process+0xa1) [0x425321]
[MDT Apr 24 09:21:43] debug    :     /apps/monit/bin/monit(validate+0x1f4) [0x4257d4]
[MDT Apr 24 09:21:43] debug    :     /apps/monit/bin/monit [0x41265d]
[MDT Apr 24 09:21:43] debug    :     /apps/monit/bin/monit(main+0x4df) [0x412f2f]
[MDT Apr 24 09:21:43] debug    :     /lib64/libc.so.6(__libc_start_main+0xf4) [0x3c0b41d994]
[MDT Apr 24 09:21:43] debug    :     /apps/monit/bin/monit [0x409a19]
 
Thanks for any help you can provide-
Chad
 
 
Chad Neal
_________________________________________________
Information Technology Services
Making Every Mission Possible
 
(o): 303.328.6592 | (c) 720.226.8225 | address@hidden
55 Inverness Drive East | Englewood, CO | 80112-5498 | www.jeppesen.com
 
--
To unsubscribe:
https://lists.nongnu.org/mailman/listinfo/monit-general


reply via email to

[Prev in Thread] Current Thread [Next in Thread]