In this post I'll demonstrate how to collect various metrics using a simple python daemon, send them to Graphite and create dashboards with Grafana that are stored in Elasticsearch.
For more detailed explanation of what Graphite is and it's Carbon and Whisper components please read my other blog post
here .
First lets install and configure Graphite:
File: gistfile1.sh
------------------
[root@server1 ~] apt-get install -y python2.6 python-pip python-cairo python-django python-django-tagging
[root@server1 ~] apt-get install -y libapache2-mod-wsgi python-twisted python-memcache python-pysqlite2 python-simplejson
[root@server1 ~] pip install whisper
[root@server1 ~] pip install carbon
[root@server1 ~] pip install graphite-web
[root@server1 ~] cat /etc/apache2/sites-available/graphite
WSGISocketPrefix /etc/httpd/wsgi/
<VirtualHost *:80>
Header set Access-Control-Allow-Origin "*"
Header set Access-Control-Allow-Methods "GET, OPTIONS"
Header set Access-Control-Allow-Headers "origin, authorization, accept"
ServerName graphite.prod.lon3.example.net
DocumentRoot "/opt/graphite/webapp"
ErrorLog /opt/graphite/storage/log/webapp/error.log
CustomLog /opt/graphite/storage/log/webapp/access.log common
WSGIDaemonProcess graphite processes=5 threads=5 display-name='%{GROUP}' inactivity-timeout=120
WSGIProcessGroup graphite
WSGIApplicationGroup %{GLOBAL}
WSGIImportScript /opt/graphite/conf/graphite.wsgi process-group=graphite application-group=%{GLOBAL}
WSGIScriptAlias / /opt/graphite/conf/graphite.wsgi
Alias /content/ /opt/graphite/webapp/content/
<Location "/content/">
SetHandler None
</Location>
Alias /media/ "@DJANGO_ROOT@/contrib/admin/media/"
<Location "/media/">
SetHandler None
</Location>
<Directory /opt/graphite/conf/>
Order deny,allow
Allow from all
</Directory>
</VirtualHost>
[root@server1 ~] mkdir -p /etc/httpd/wsgi/
[root@server1 ~] cd /opt/graphite/conf/
[root@server1 /opt/graphite/conf] cat carbon.conf
[cache]
ENABLE_LOGROTATION = True
USER =
MAX_CACHE_SIZE = inf
MAX_UPDATES_PER_SECOND = 500
MAX_CREATES_PER_MINUTE = 50
LINE_RECEIVER_INTERFACE = 0.0.0.0
LINE_RECEIVER_PORT = 2003
ENABLE_UDP_LISTENER = False
UDP_RECEIVER_INTERFACE = 0.0.0.0
UDP_RECEIVER_PORT = 2003
PICKLE_RECEIVER_INTERFACE = 0.0.0.0
PICKLE_RECEIVER_PORT = 2004
LOG_LISTENER_CONNECTIONS = True
USE_INSECURE_UNPICKLER = False
CACHE_QUERY_INTERFACE = 0.0.0.0
CACHE_QUERY_PORT = 7002
USE_FLOW_CONTROL = True
LOG_UPDATES = False
LOG_CACHE_HITS = False
LOG_CACHE_QUEUE_SORTS = True
CACHE_WRITE_STRATEGY = sorted
WHISPER_AUTOFLUSH = False
WHISPER_FALLOCATE_CREATE = True
[relay]
LINE_RECEIVER_INTERFACE = 0.0.0.0
LINE_RECEIVER_PORT = 2013
PICKLE_RECEIVER_INTERFACE = 0.0.0.0
PICKLE_RECEIVER_PORT = 2014
LOG_LISTENER_CONNECTIONS = True
RELAY_METHOD = rules
REPLICATION_FACTOR = 1
DESTINATIONS = 127.0.0.1:2004
MAX_DATAPOINTS_PER_MESSAGE = 500
MAX_QUEUE_SIZE = 10000
USE_FLOW_CONTROL = True
[aggregator]
LINE_RECEIVER_INTERFACE = 0.0.0.0
LINE_RECEIVER_PORT = 2023
PICKLE_RECEIVER_INTERFACE = 0.0.0.0
PICKLE_RECEIVER_PORT = 2024
LOG_LISTENER_CONNECTIONS = True
FORWARD_ALL = True
DESTINATIONS = 127.0.0.1:2004
REPLICATION_FACTOR = 1
MAX_QUEUE_SIZE = 10000
USE_FLOW_CONTROL = True
MAX_DATAPOINTS_PER_MESSAGE = 500
MAX_AGGREGATION_INTERVALS = 5
[root@server1 /opt/graphite/conf] cp graphite.wsgi.example graphite.wsgi
[root@server1 /opt/graphite/conf] cat storage-schemas.conf
[carbon]
pattern = ^carbon\.
retentions = 300:90d
[default_5min_for_90day]
pattern = .*
retentions = 300s:90d
[root@server1 /opt/graphite/conf] cat storage-aggregation.conf
[min]
pattern = \.min$
xFilesFactor = 0.1
aggregationMethod = min
[max]
pattern = \.max$
xFilesFactor = 0.1
aggregationMethod = max
[sum]
pattern = \.count$
xFilesFactor = 0
aggregationMethod = sum
[default_average]
pattern = .*
xFilesFactor = 0.5
aggregationMethod = average
[root@server1 /opt/graphite/conf] cd /opt/graphite/webapp/graphite
[root@server1 /opt/graphite/conf] python manage.py syncdb
[root@server1 /opt/graphite/conf] chown -R www-data:www-data /opt/graphite/storage/
[root@server1 /opt/graphite/conf] a2ensite graphite
[root@server1 /opt/graphite/conf] a2dissite default
[root@server1 /opt/graphite/conf] a2enmod headers
[root@server1 /opt/graphite/conf] /etc/init.d/apache2 restart
[root@server1 /opt/graphite/conf] /opt/graphite/bin/carbon-cache.py start
Next lets install Grafana:
File: gistfile1.sh
------------------
[root@server1 ~] cd /opt/
[root@server1 /opt] wget http://grafanarel.s3.amazonaws.com/grafana-1.6.1.tar.gz
[root@server1 /opt] tar zxfv grafana-1.6.1.tar.gz
[root@server1 /opt] mv grafana-1.6.1 grafana
[root@server1 /opt] rm grafana-1.6.1.tar.gz
[root@server1 /opt] cd grafana/
[root@server1 /opt/grafana] mv config.sample.js config.js
[root@server1 /opt/grafana] cat config.js
///// @scratch /configuration/config.js/1
// == Configuration
// config.js is where you will find the core Grafana configuration. This file contains parameter that
// must be set before Grafana is run for the first time.
///
define(['settings'],
function (Settings) {
return new Settings({
// datasources, you can add multiple
datasources: {
graphite: {
type: 'graphite',
url: "http://graphite.prod.lon3.example.net",
default: true
},
influxdb: {
type: 'influxdb',
url: "http://my_influxdb_server:8086/db/database_name",
username: 'admin',
password: 'admin'
},
},
// elasticsearch url
// used for storing and loading dashboards, optional
// For Basic authentication use: http://username:password@domain.com:9200
elasticsearch: "http://elasticsearch.prod.lon3.example.net:9200",
// default start dashboard
default_route: '/dashboard/file/default.json',
// Elasticsearch index for storing dashboards
grafana_index: "grafana-dash",
// timezoneOFfset:
// If you experiance problems with zoom, it is probably caused by timezone diff between
// your browser and the graphite-web application. timezoneOffset setting can be used to have Grafana
// translate absolute time ranges to the graphite-web timezone.
// Example:
// If TIME_ZONE in graphite-web config file local_settings.py is set to America/New_York, then set
// timezoneOffset to "-0500" (for UTC - 5 hours)
// Example:
// If TIME_ZONE is set to UTC, set this to "0000"
//
timezoneOffset: null,
// set to false to disable unsaved changes warning
unsaved_changes_warning: true,
// set the default timespan for the playlist feature
// Example: "1m", "1h"
playlist_timespan: "1m",
// Add your own custom pannels
plugins: {
panels: []
}
});
});
[root@server1 /opt/grafana] cat /etc/apache2/sites-available/grafana
<VirtualHost *:80>
Header set Access-Control-Allow-Origin "*"
Header set Access-Control-Allow-Methods "GET, OPTIONS"
Header set Access-Control-Allow-Headers "origin, authorization, accept"
ServerName grafana.prod.lon3.example.net
DocumentRoot "/opt/grafana"
</VirtualHost>
[root@server1 /opt/grafana] a2ensite grafana
[root@server1 /opt/grafana] service apache2 reload
Lastly lets install Elasticsearch to store the dashboards created with Grafana:
File: gistfile1.sh
------------------
[root@server1 /opt] curl -s http://packages.elasticsearch.org/GPG-KEY-elasticsearch | apt-key add -
[root@server1 /opt] echo "deb http://packages.elasticsearch.org/elasticsearch/1.0/debian stable main" > /etc/apt/sources.list.d/elasticsearch.list
[root@server1 /opt] apt-get update
[root@server1 /opt] apt-get install -y elasticsearch openjdk-7-jre-headless
[root@server1 /opt] update-rc.d elasticsearch defaults
[root@server1 /opt] cat /etc/elasticsearch/elasticsearch.yml
cluster.name: elasticsearch
node.data: true
transport.tcp.port: 8180
http.port: 9200
[root@server1 /opt] /etc/init.d/elasticsearch start
There are many fine tools to collect various metrics from logs and snmp, like collectd, Diamond, Ganglia, Graphene and statsd, just to name a few. In most cases however I always needed to graph data that is stored in MySQL, or in SNMP, so I wrote this small python daemon that is highly configurable and perfect for fetching numbers:
https://github.com/kaivanov/Scripts/tree/master/DBaaS/mcollector