服务器安装monit进程监控服务

2019-02-19 14:31:03   php分享记录

  monit,centos7,进程监控  

monit的安装部署

  1. 下载源码包
    1. cd /usr/local/src/
    2. git clone https://tildeslash@bitbucket.org/tildeslash/monit.git.//下载源码包
    3. cd monit
    4. ./bootstrap
    5. ./configure
    6. make && make install
    7. 安装完成
    8. 新建/etc/monit/目录
    9. 目录结构
    10. monit---conf----------monitrc
    11. ---sys
    12. ---logs(文件)
    13. ---var

其中monitrc是从源码目录复制过来的(monitrc要求权限不超过0700)
配置修改为以下内容

  1. ###############################################################################
  2. ## Monit control file
  3. ###############################################################################
  4. ##
  5. ## Comments begin with a '#' and extend through the end of the line. Keywords
  6. ## are case insensitive. All path's MUST BE FULLY QUALIFIED, starting with '/'.
  7. ##
  8. ## Below you will find examples of some frequently used statements. For
  9. ## information about the control file and a complete list of statements and
  10. ## options, please have a look in the Monit manual.
  11. ##
  12. ##
  13. ###############################################################################
  14. ## Global section
  15. ###############################################################################
  16. ##
  17. ## Start Monit in the background (run as a daemon):
  18. #
  19. set daemon 10 # check services at 30 seconds intervals
  20. # with start delay 240 # optional: delay the first check by 4-minutes (by
  21. # # default Monit check immediately after Monit start)
  22. #
  23. #
  24. ## Set syslog logging. If you want to log to a standalone log file instead,
  25. ## specify the full path to the log file
  26. #
  27. set log /etc/monit/logs
  28. #
  29. #
  30. ## Set the location of the Monit lock file which stores the process id of the
  31. ## running Monit instance. By default this file is stored in $HOME/.monit.pid
  32. #
  33. set pidfile /etc/monit/var/monit.pid
  34. #
  35. ## Set the location of the Monit id file which stores the unique id for the
  36. ## Monit instance. The id is generated and stored on first Monit start. By
  37. ## default the file is placed in $HOME/.monit.id.
  38. #
  39. # set idfile /var/.monit.id
  40. #
  41. ## Set the location of the Monit state file which saves monitoring states
  42. ## on each cycle. By default the file is placed in $HOME/.monit.state. If
  43. ## the state file is stored on a persistent filesystem, Monit will recover
  44. ## the monitoring state across reboots. If it is on temporary filesystem, the
  45. ## state will be lost on reboot which may be convenient in some situations.
  46. #
  47. set statefile /etc/monit/var/.monit.state
  48. #
  49. #
  50. ## Set limits for various tests. The following example shows the default values:
  51. ##
  52. # set limits {
  53. # programOutput: 512 B, # check program's output truncate limit
  54. # sendExpectBuffer: 256 B, # limit for send/expect protocol test
  55. # fileContentBuffer: 512 B, # limit for file content test
  56. # httpContentBuffer: 1 MB, # limit for HTTP content test
  57. # networkTimeout: 5 seconds # timeout for network I/O
  58. # programTimeout: 300 seconds # timeout for check program
  59. # stopTimeout: 30 seconds # timeout for service stop
  60. # startTimeout: 30 seconds # timeout for service start
  61. # restartTimeout: 30 seconds # timeout for service restart
  62. # }
  63. ## Set global SSL options (just most common options showed, see manual for
  64. ## full list).
  65. #
  66. # set ssl {
  67. # verify : enable, # verify SSL certificates (disabled by default but STRONGLY RECOMMENDED)
  68. # selfsigned : allow # allow self signed SSL certificates (reject by default)
  69. # }
  70. #
  71. #
  72. ## Set the list of mail servers for alert delivery. Multiple servers may be
  73. ## specified using a comma separator. If the first mail server fails, Monit
  74. # will use the second mail server in the list and so on. By default Monit uses
  75. # port 25 - it is possible to override this with the PORT option.
  76. #
  77. set mailserver localhost
  78. #set mailserver mail.bar.baz, # primary mailserver
  79. # backup.bar.baz port 10025, # backup mailserver on port 10025
  80. # localhost # fallback relay
  81. #
  82. #
  83. ## By default Monit will drop alert events if no mail servers are available.
  84. ## If you want to keep the alerts for later delivery retry, you can use the
  85. ## EVENTQUEUE statement. The base directory where undelivered alerts will be
  86. ## stored is specified by the BASEDIR option. You can limit the queue size
  87. ## by using the SLOTS option (if omitted, the queue is limited by space
  88. ## available in the back end filesystem).
  89. #
  90. # set eventqueue
  91. # basedir /var/monit # set the base directory where events will be stored
  92. # slots 100 # optionally limit the queue size
  93. #
  94. #
  95. ## Send status and events to M/Monit (for more informations about M/Monit
  96. ## see https://mmonit.com/). By default Monit registers credentials with
  97. ## M/Monit so M/Monit can smoothly communicate back to Monit and you don't
  98. ## have to register Monit credentials manually in M/Monit. It is possible to
  99. ## disable credential registration using the commented out option below.
  100. ## Though, if safety is a concern we recommend instead using https when
  101. ## communicating with M/Monit and send credentials encrypted. The password
  102. ## should be URL encoded if it contains URL-significant characters like
  103. ## ":", "?", "@". Default timeout is 5 seconds, you can customize it by
  104. ## adding the timeout option.
  105. #
  106. # set mmonit http://monit:monit@192.168.1.10:8080/collector
  107. # # with timeout 30 seconds # Default timeout is 5 seconds
  108. # # and register without credentials # Don't register credentials
  109. #
  110. #
  111. ## Monit by default uses the following format for alerts if the mail-format
  112. ## statement is missing::
  113. ## --8<--
  114. set mail-format {
  115. from: Monit <monit@$HOST>
  116. subject: monit alert -- $EVENT $SERVICE
  117. message: $EVENT Service $SERVICE
  118. Date: $DATE
  119. Action: $ACTION
  120. Host: $HOST
  121. Description: $DESCRIPTION
  122. Your faithful employee,
  123. Monit
  124. }
  125. ## --8<--
  126. ##
  127. ## You can override this message format or parts of it, such as subject
  128. ## or sender using the MAIL-FORMAT statement. Macros such as $DATE, etc.
  129. ## are expanded at runtime. For example, to override the sender, use:
  130. #
  131. set mail-format { from: monit@foo.bar }
  132. #
  133. #
  134. ## You can set alert recipients whom will receive alerts if/when a
  135. ## service defined in this file has errors. Alerts may be restricted on
  136. ## events by using a filter as in the second example below.
  137. #
  138. set alert 1776243356@qq.com # receive all alerts
  139. #
  140. ## Do not alert when Monit starts, stops or performs a user initiated action.
  141. ## This filter is recommended to avoid getting alerts for trivial cases.
  142. #
  143. # set alert your-name@your.domain not on { instance, action }
  144. #
  145. #
  146. ## Monit has an embedded HTTP interface which can be used to view status of
  147. ## services monitored and manage services from a web interface. The HTTP
  148. ## interface is also required if you want to issue Monit commands from the
  149. ## command line, such as 'monit status' or 'monit restart service' The reason
  150. ## for this is that the Monit client uses the HTTP interface to send these
  151. ## commands to a running Monit daemon. See the Monit Wiki if you want to
  152. ## enable SSL for the HTTP interface.
  153. #
  154. set httpd port 2812 and
  155. use address localhost # only accept connection from localhost (drop if you use M/Monit)
  156. allow localhost # allow localhost to connect to the server and
  157. allow admin:monit # require user 'admin' with password 'monit'
  158. #with ssl { # enable SSL/TLS and set path to server certificate
  159. # pemfile: /etc/ssl/certs/monit.pem
  160. #}
  161. ###############################################################################
  162. ## Services
  163. ###############################################################################
  164. ##
  165. ## Check general system resources such as load average, cpu and memory
  166. ## usage. Each test specifies a resource, conditions and the action to be
  167. ## performed should a test fail.
  168. #
  169. # check system $HOST
  170. # if loadavg (1min) > 4 then alert
  171. # if loadavg (5min) > 2 then alert
  172. # if cpu usage > 95% for 10 cycles then alert
  173. # if memory usage > 75% then alert
  174. # if swap usage > 25% then alert
  175. #
  176. #
  177. ## Check if a file exists, checksum, permissions, uid and gid. In addition
  178. ## to alert recipients in the global section, customized alert can be sent to
  179. ## additional recipients by specifying a local alert handler. The service may
  180. ## be grouped using the GROUP option. More than one group can be specified by
  181. ## repeating the 'group name' statement.
  182. #
  183. # check file apache_bin with path /usr/local/apache/bin/httpd
  184. # if failed checksum and
  185. # expect the sum 8f7f419955cefa0b33a2ba316cba3659 then unmonitor
  186. # if failed permission 755 then unmonitor
  187. # if failed uid "root" then unmonitor
  188. # if failed gid "root" then unmonitor
  189. # alert security@foo.bar on {
  190. # checksum, permission, uid, gid, unmonitor
  191. # } with the mail-format { subject: Alarm! }
  192. # group server
  193. #
  194. #
  195. ## Check that a process is running, in this case Apache, and that it respond
  196. ## to HTTP and HTTPS requests. Check its resource usage such as cpu and memory,
  197. ## and number of children. If the process is not running, Monit will restart
  198. ## it by default. In case the service is restarted very often and the
  199. ## problem remains, it is possible to disable monitoring using the TIMEOUT
  200. ## statement. This service depends on another service (apache_bin) which
  201. ## is defined above.
  202. #
  203. # check process apache with pidfile /usr/local/apache/logs/httpd.pid
  204. # start program = "/etc/init.d/httpd start" with timeout 60 seconds
  205. # stop program = "/etc/init.d/httpd stop"
  206. # if cpu > 60% for 2 cycles then alert
  207. # if cpu > 80% for 5 cycles then restart
  208. # if totalmem > 200.0 MB for 5 cycles then restart
  209. # if children > 250 then restart
  210. # if disk read > 500 kb/s for 10 cycles then alert
  211. # if disk write > 500 kb/s for 10 cycles then alert
  212. # if failed host www.tildeslash.com port 80 protocol http and request "/somefile.html" then restart
  213. # if failed port 443 protocol https with timeout 15 seconds then restart
  214. # if 3 restarts within 5 cycles then unmonitor
  215. # depends on apache_bin
  216. # group server
  217. #
  218. #
  219. ## Check filesystem permissions, uid, gid, space usage, inode usage and disk I/O.
  220. ## Other services, such as databases, may depend on this resource and an automatically
  221. ## graceful stop may be cascaded to them before the filesystem will become full and data
  222. ## lost.
  223. #
  224. # check filesystem datafs with path /dev/sdb1
  225. # start program = "/bin/mount /data"
  226. # stop program = "/bin/umount /data"
  227. # if failed permission 660 then unmonitor
  228. # if failed uid "root" then unmonitor
  229. # if failed gid "disk" then unmonitor
  230. # if space usage > 80% for 5 times within 15 cycles then alert
  231. # if space usage > 99% then stop
  232. # if inode usage > 30000 then alert
  233. # if inode usage > 99% then stop
  234. # if read rate > 1 MB/s for 5 cycles then alert
  235. # if read rate > 500 operations/s for 5 cycles then alert
  236. # if write rate > 1 MB/s for 5 cycles then alert
  237. # if write rate > 500 operations/s for 5 cycles then alert
  238. # if service time > 10 milliseconds for 3 times within 5 cycles then alert
  239. # group server
  240. #
  241. #
  242. ## Check a file's timestamp. In this example, we test if a file is older
  243. ## than 15 minutes and assume something is wrong if its not updated. Also,
  244. ## if the file size exceed a given limit, execute a script
  245. #
  246. # check file database with path /data/mydatabase.db
  247. # if failed permission 700 then alert
  248. # if failed uid "data" then alert
  249. # if failed gid "data" then alert
  250. # if timestamp > 15 minutes then alert
  251. # if size > 100 MB then exec "/my/cleanup/script" as uid dba and gid dba
  252. #
  253. #
  254. ## Check directory permission, uid and gid. An event is triggered if the
  255. ## directory does not belong to the user with uid 0 and gid 0. In addition,
  256. ## the permissions have to match the octal description of 755 (see chmod(1)).
  257. #
  258. # check directory bin with path /bin
  259. # if failed permission 755 then unmonitor
  260. # if failed uid 0 then unmonitor
  261. # if failed gid 0 then unmonitor
  262. #
  263. #
  264. ## Check a remote host availability by issuing a ping test and check the
  265. ## content of a response from a web server. Up to three pings are sent and
  266. ## connection to a port and an application level network check is performed.
  267. #
  268. # check host myserver with address 192.168.1.1
  269. # if failed ping then alert
  270. # if failed port 3306 protocol mysql with timeout 15 seconds then alert
  271. # if failed port 80 protocol http
  272. # and request /some/path with content = "a string"
  273. # then alert
  274. #
  275. #
  276. ## Check a network link status (up/down), link capacity changes, saturation
  277. ## and bandwidth usage.
  278. #
  279. # check network public with interface eth0
  280. # if failed link then alert
  281. # if changed link then alert
  282. # if saturation > 90% then alert
  283. # if download > 10 MB/s then alert
  284. # if total uploaded > 1 GB in last hour then alert
  285. #
  286. #
  287. ## Check custom program status output.
  288. #
  289. # check program myscript with path /usr/local/bin/myscript.sh
  290. # if status != 0 then alert
  291. #
  292. #
  293. ###############################################################################
  294. ## Includes
  295. ###############################################################################
  296. ##
  297. ## It is possible to include additional configuration parts from other files or
  298. ## directories.
  299. #
  300. include /etc/monit/conf/sys/*
  301. #

2.修改完毕后,新增服务配置
在sys目录下,新增一个conf文件,文件内容如下

  1. check process freeswitch
  2. with pidfile /usr/local/freeswitch/run/freeswitch.pid
  3. start program = "/usr/local/freeswitch/bin/freeswitch -nc -nonat"
  4. stop program = "/usr/local/freeswitch/bin/freeswitch -stop"
  5. if memory > 1500.0 MB for 5 cycles then alert
  6. if memory > 1700.0 MB for 5 cycles then alert
  7. if memory > 2000.0 MB for 5 cycles then restart
  8. if cpu > 60% for 5 cycles then alert

3.开启服务
/etc/monit/conf/monit -c /etc/monit/conf/monitrc
4.常用命令

  1. monit -t # 配置文件检测
  2. monit # 启动monit daemon
  3. monit -c /var/monit/monitrc # 启动monit daemon时指定配置文件
  4. monit reload # 当更新了配置文件需要重载
  5. monit status # 查看所有服务状态
  6. monit status nginx # 查看nginx服务状态
  7. monit stop all # 停止所有服务
  8. monit stop nginx # 停止nginx服务
  9. monit start all # 启动所有服务
  10. monit start nginx # 启动nginx服务
  11. monit -V # 查看版本

1.monit.service配置

  1. vim /usr/lib/systemd/system/monit.service
  2. [Unit]
  3. Description=monit deamon
  4. After=network.target
  5. [Service]
  6. Type=simple
  7. KillMode=process
  8. ExecStart=/usr/local/bin/monit -I -c /etc/monit/conf/monitrc
  9. ExecStop=/usr/local/bin/monit -c /etc/monit/conf/monitrc quit
  10. ExecReload=/usr/local/bin/monit -c /etc/monit/conf/monitrc reload
  11. Restart=on-abnormal
  12. StandardOutput=null
  13. [Install]
  14. WantedBy=multi-user.target