How to combine BATS, PyTest and MySQL X Plugin for tests

Hi,
In this post I am going to show some tricks on using BATS framework + PyTest + Python X Plugin things, to have combined, simple test solutions.

Let’s describe MySQL X Plugin side, here is the full class:

# Connecting to MySQL and working with a Session
import mysqlx

class MyXPlugin:

    def __init__(self, schema_name, collection_name):
        # Connect to a dedicated MySQL server
        self.session = mysqlx.get_session({
            'host': 'localhost',
            'port': 33060,
            'user': 'bakux',
            'password': 'Baku12345',
            'ssl-mode': mysqlx.SSLMode.DISABLED
        })

        self.schema_name = schema_name
        self.collection_name = collection_name

        # Getting schema object
        self.schema = self.session.get_schema(self.schema_name)
        # Creating collection
        self.schema.create_collection(self.collection_name, reuse=True)
        # Getting collection object
        self.collection_obj = self.schema.get_collection(self.collection_name)


    def insert_into_collection(self):
        # You can also add multiple documents at once
        print "Inserting 3 rows into collection"
        self.collection_obj.add({'_id': '2', 'name': 'Sakila', 'age': 15},
                    {'_id': '3', 'name': 'Jack', 'age': 15},
                    {'_id': '4', 'name': 'Clare', 'age': 37}).execute()

    def remove_from_collection(self):
        # Removing non-existing _id
        self.collection_obj.remove('_id = 1').execute()


    def alter_table_engine(self):
        # Altering table engine to rocksdb; Should raise an error
        try:
            command = "alter table {}.{} engine=rocksdb".format(self.schema_name, self.collection_name)
            sql = self.session.sql(command)
            sql.execute()
        except Exception as e:
            raise mysqlx.errors.OperationalError("Could not alter engine of table here!")
        else:
            return 0

    def alter_table_drop_column(self):
        # Dropping generated column
        print "Altering default collection to drop generated column"
        try:
            command = "alter table {}.{} drop column `_id`".format(self.schema_name, self.collection_name)
            sql = self.session.sql(command)
            sql.execute()
        except Exception as e:
            raise
        else:
            return 0

    def return_table_obj(self):
        # Returning Table object
        table = mysqlx.Table(self.schema, self.collection_name)
        return table

    def create_view_from_collection(self, view_name):
        # Creating view from collection
        print "Trying to create view based on MyRocks collection"
        try:
            command = "create view {}.{} as select * from {}.{}".format(self.schema_name, view_name, self.schema_name, self.collection_name)
            sql = self.session.sql(command)
            sql.execute()
        except Exception as e:
            raise
        else:
            return 0

    def select_from_view(self, view_name):
        # Running select; Should raise an error
        print "Trying to select from view [Should raise an OperationalError]"
        try:
            command = "select * from {}.{}".format(self.schema_name, view_name)
            sql = self.session.sql(command)
            cursor = sql.execute()
            cursor.fetch_all()
        except Exception as e:
            raise mysqlx.errors.OperationalError("The JSON binary value contains invalid data")
        else:
            return 0

    def select_from_table(self):
        # Running select; Should raise an error
        print "Trying to select from view [Should raise an OperationalError]"
        try:
            command = "select * from {}.{}".format(self.schema_name, self.collection_name)
            sql = self.session.sql(command)
            cursor = sql.execute()
            cursor.fetch_all()
        except Exception as e:
            raise mysqlx.errors.OperationalError("The JSON binary value contains invalid data")
        else:
            return 0

And for testing purposes I have added some PyTest tests as:


import pytest
from mysqlx.errors import OperationalError

@pytest.mark.usefixtures("return_plugin_obj")
class TestXPlugin:
    """
    Tests for XPlugin + MyRocks
    """

    def test_check_if_collection_exists(self, return_plugin_obj):
        assert return_plugin_obj.collection_obj.exists_in_database() == True

    def test_check_collection_count(self, return_plugin_obj):
        # Inserting data
        return_plugin_obj.insert_into_collection()
        # Removing data
        return_plugin_obj.remove_from_collection()
        # Checking count
        assert return_plugin_obj.collection_obj.count() == 3

    def test_alter_table_engine_raises(self, return_plugin_obj):
        # Should raise error here
        print "Altering default collection engine from InnoDB to MyRocks [Should raise an OperationalError]"
        with pytest.raises(OperationalError) as er:
            return_plugin_obj.alter_table_engine()
        print er

    def test_alter_table_drop_column(self, return_plugin_obj):
        return_value = return_plugin_obj.alter_table_drop_column()
        assert return_value == 0

    def test_alter_table_engine(self, return_plugin_obj):
        print "Altering default collection engine from InnoDB to MyRocks [Should NOT raise an OperationalError]"
        return_value = return_plugin_obj.alter_table_engine()
        assert return_value == 0

    def helper_function(self, return_plugin_obj):
        table_obj = return_plugin_obj.return_table_obj()
        return table_obj

    def test_check_if_table_exists(self, return_plugin_obj):
        assert self.helper_function(return_plugin_obj).exists_in_database() == True

    def test_check_table_count(self, return_plugin_obj):
        assert self.helper_function(return_plugin_obj).count() == 3

    def test_check_table_name(self, return_plugin_obj):
        assert self.helper_function(return_plugin_obj).get_name() == "my_collection"

    def test_check_schema_name(self, return_plugin_obj):
        assert self.helper_function(return_plugin_obj).get_schema().get_name() == "generated_columns_test"

    def test_check_if_table_is_view(self, return_plugin_obj):
        assert self.helper_function(return_plugin_obj).is_view() == False

    def test_create_view_from_collection(self, return_plugin_obj):
        return_value = return_plugin_obj.create_view_from_collection("my_collection_view")

    def test_select_from_view(self, return_plugin_obj):
        # with pytest.raises(OperationalError) as er:
        #      return_plugin_obj.select_from_view("my_collection_view")
        # print er
        assert return_plugin_obj.select_from_view("my_collection_view") == 0

    def test_select_from_table(self, return_plugin_obj):
        # with pytest.raises(OperationalError) as er:
        #     return_plugin_obj.select_from_table()
        # print er
        assert return_plugin_obj.select_from_table() == 0

Previously I have described the full pytest run but now, I am going to call each test method from bats file:

#!/usr/bin/env bats

# Created by Shahriyar Rzayev from Percona

DIRNAME=$BATS_TEST_DIRNAME

@test "Running test_check_if_collection_exists" {
  run python -m pytest -vv ${DIRNAME}/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_check_if_collection_exists
  echo $output
  [ $status -eq 0 ]
}

@test "Running test_check_collection_count" {
  run python -m pytest -vv ${DIRNAME}/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_check_collection_count
  echo $output
  [ $status -eq 0 ]
}

@test "Running test_alter_table_engine_raises" {
  run python -m pytest -vv ${DIRNAME}/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_alter_table_engine_raises
  echo $output
  [ $status -eq 0 ]
}

@test "Running test_alter_table_drop_column" {
  run python -m pytest -vv ${DIRNAME}/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_alter_table_drop_column
  echo $output
  [ $status -eq 0 ]
}

@test "Running test_alter_table_engine" {
  run python -m pytest -vv ${DIRNAME}/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_alter_table_engine
  echo $output
  [ $status -eq 0 ]
}

@test "Running test_check_if_table_exists" {
  run python -m pytest -vv ${DIRNAME}/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_check_if_table_exists
  echo $output
  [ $status -eq 0 ]
}

@test "Running test_check_table_count" {
  run python -m pytest -vv ${DIRNAME}/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_check_table_count
  echo $output
  [ $status -eq 0 ]
}

@test "Running test_check_table_name" {
  run python -m pytest -vv ${DIRNAME}/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_check_table_name
  echo $output
  [ $status -eq 0 ]
}

@test "Running test_check_schema_name" {
  run python -m pytest -vv ${DIRNAME}/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_check_schema_name
  echo $output
  [ $status -eq 0 ]
}

@test "Running test_check_if_table_is_view" {
  run python -m pytest -vv ${DIRNAME}/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_check_if_table_is_view
  echo $output
  [ $status -eq 0 ]
}

@test "Running test_create_view_from_collection" {
  run python -m pytest -vv ${DIRNAME}/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_create_view_from_collection
  echo $output
  [ $status -eq 0 ]
}

@test "Running test_select_from_table [Should not raise an OperationalError after MYR-151/152]" {
  run python -m pytest -vv ${DIRNAME}/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_select_from_table
  echo $output
  [ $status -eq 0 ]
}

@test "Running test_select_from_view [Should not raise an OperationalError asfter MYR-151/152]" {
  run python -m pytest -vv ${DIRNAME}/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_select_from_view
  echo $output
  [ $status -eq 0 ]
}

The idea behind bats framework is quite simple and handy.
So just pick one test and run it inside bats.

The final sample output is something like this which is very well structured.

#Running X Plugin tests#
 ✓ Running test_check_if_collection_exists
 ✓ Running test_check_collection_count
 ✓ Running test_alter_table_engine_raises
 ✓ Running test_alter_table_drop_column
 ✓ Running test_alter_table_engine
 ✓ Running test_check_if_table_exists
 ✓ Running test_check_table_count
 ✓ Running test_check_table_name
 ✓ Running test_check_schema_name
 ✓ Running test_check_if_table_is_view
 ✓ Running test_create_view_from_collection
 ✓ Running test_select_from_table
 ✓ Running test_select_from_view

13 tests, 0 failures

For full code please see:

https://github.com/Percona-QA/percona-qa/tree/master/myrocks-tests

Using MySQL Connector/Python X (mysqlx module) MyRocks episode

This post is about, how I have tried to make simple Python script using mysqlx module work with MyRocks.
This is also related to pytest, as I have implemented simple pytest tests to call them from bash file.

So let’s discuss problem description:
The base problem is, by default when you create collection using Python X Plugin, the collection will have, 1 json type column called `doc` and 1 generated column from this `doc` column called `_id`.
So basically, you can not alter table engine to MyRocks because it will give an error something like:

ERROR 3106 (HY000): 'Specified storage engine' is not supported for generated columns.

The result:

Well, it can be solved by dropping generated `_id` column. Here we are encountering another issue that, if you have table with json data, please do NOT alter it to MyRocks, otherwise, you will get some weird results as described here:

https://jira.percona.com/browse/MYR-152

So basically, when I select from this collection it will raise an OperationalError:

mysqlx.errors.OperationalError("The JSON binary value contains invalid data")

Now the sample code portions:

The main worker class code:


import mysqlx

class MyXPlugin:

    def __init__(self, schema_name, collection_name):
        # Connect to a dedicated MySQL server
        self.session = mysqlx.get_session({
            'host': 'localhost',
            'port': 33060,
            'user': 'bakux',
            'password': 'Baku12345',
            'ssl-mode': mysqlx.SSLMode.DISABLED
        })

        self.schema_name = schema_name
        self.collection_name = collection_name

        # Getting schema object
        self.schema = self.session.get_schema(self.schema_name)
        # Creating collection
        self.schema.create_collection(self.collection_name)
        # Getting collection object
        self.collection_obj = self.schema.get_collection(self.collection_name)


    def insert_into_collection(self):
        # You can also add multiple documents at once
        print "Inserting 3 rows into collection"
        self.collection_obj.add({'_id': '2', 'name': 'Sakila', 'age': 15},
                    {'_id': '3', 'name': 'Jack', 'age': 15},
                    {'_id': '4', 'name': 'Clare', 'age': 37}).execute()

    def remove_from_collection(self):
        # Removing non-existing _id
        self.collection_obj.remove('_id = 1').execute()


    def alter_table_engine(self):
        # Altering table engine to rocksdb; Should raise an error
        try:
            command = "alter table {}.{} engine=rocksdb".format(self.schema_name, self.collection_name)
            sql = self.session.sql(command)
            sql.execute()
        except Exception as e:
            raise mysqlx.errors.OperationalError("Could not alter engine of table here!")
        else:
            return 0

    def alter_table_drop_column(self):
        # Dropping generated column
        print "Altering default collection to drop generated column"
        try:
            command = "alter table {}.{} drop column `_id`".format(self.schema_name, self.collection_name)
            sql = self.session.sql(command)
            sql.execute()
        except Exception as e:
            raise
        else:
            return 0

    def return_table_obj(self):
        # Returning Table object
        table = mysqlx.Table(self.schema, self.collection_name)
        return table

    def create_view_from_collection(self, view_name):
        # Creating view from collection
        print "Trying to create view based on MyRocks collection"
        try:
            command = "create view {}.{} as select * from {}.{}".format(self.schema_name, view_name, self.schema_name, self.collection_name)
            sql = self.session.sql(command)
            sql.execute()
        except Exception as e:
            raise
        else:
            return 0

    def select_from_view(self, view_name):
        # Running select; Should raise an error
        print "Trying to select from view [Should raise an OperationalError]"
        try:
            command = "select * from {}.{}".format(self.schema_name, view_name)
            sql = self.session.sql(command)
            sql.execute()
        except Exception as e:
            raise mysqlx.errors.OperationalError("The JSON binary value contains invalid data")
        else:
            return 0

    def select_from_table(self):
        # Running select; Should raise an error
        print "Trying to select from view [Should raise an OperationalError]"
        try:
            command = "select * from {}.{}".format(self.schema_name, self.collection_name)
            sql = self.session.sql(command)
            sql.execute()
        except Exception as e:
            raise mysqlx.errors.OperationalError("The JSON binary value contains invalid data")
        else:
            return 0

The PyTest module:

import pytest
from mysqlx.errors import OperationalError

@pytest.mark.usefixtures("return_plugin_obj")
class TestXPlugin:
    """
    Tests for XPlugin + MyRocks
    """

    def test_check_if_collection_exists(self, return_plugin_obj):
        assert return_plugin_obj.collection_obj.exists_in_database() == True

    def test_check_collection_count(self, return_plugin_obj):
        # Inserting data
        return_plugin_obj.insert_into_collection()
        # Removing data
        return_plugin_obj.remove_from_collection()
        # Checking count
        assert return_plugin_obj.collection_obj.count() == 3

    def test_alter_table_engine_raises(self, return_plugin_obj):
        # Should raise error here
        print "Altering default collection engine from InnoDB to MyRocks [Should raise an OperationalError]"
        with pytest.raises(OperationalError) as er:
            return_plugin_obj.alter_table_engine()
        print er

    def test_alter_table_drop_column(self, return_plugin_obj):
        return_value = return_plugin_obj.alter_table_drop_column()
        assert return_value == 0

    def test_alter_table_engine(self, return_plugin_obj):
        print "Altering default collection engine from InnoDB to MyRocks [Should NOT raise an OperationalError]"
        return_value = return_plugin_obj.alter_table_engine()
        assert return_value == 0

    def helper_function(self, return_plugin_obj):
        table_obj = return_plugin_obj.return_table_obj()
        return table_obj

    def test_check_if_table_exists(self, return_plugin_obj):
        assert self.helper_function(return_plugin_obj).exists_in_database() == True

    def test_check_table_count(self, return_plugin_obj):
        assert self.helper_function(return_plugin_obj).count() == 3

    def test_check_table_name(self, return_plugin_obj):
        assert self.helper_function(return_plugin_obj).get_name() == "my_collection"

    def test_check_schema_name(self, return_plugin_obj):
        assert self.helper_function(return_plugin_obj).get_schema().get_name() == "generated_columns_test"

    def test_check_if_table_is_view(self, return_plugin_obj):
        assert self.helper_function(return_plugin_obj).is_view() == False

    def test_create_view_from_collection(self, return_plugin_obj):
        return_value = return_plugin_obj.create_view_from_collection("my_collection_view")

    def test_select_from_view(self, return_plugin_obj):
        with pytest.raises(OperationalError) as er:
             return_plugin_obj.select_from_view("my_collection_view")
        print er

    def test_select_from_table(self, return_plugin_obj):
        with pytest.raises(OperationalError) as er:
            return_plugin_obj.select_from_table()
        print er

The PyTest fixture code:

from myrocks_mysqlx_plugin.myrocks_mysqlx_plugin import MyXPlugin
import pytest
# schema_name = "generated_columns_test"
# collection_name = "my_collection"
plugin_obj = MyXPlugin("generated_columns_test", "my_collection")

@pytest.fixture()
def return_plugin_obj():
    return plugin_obj

The final bash runner test output:

#Running X Plugin tests#
========================================================================== test session starts ==========================================================================
platform linux2 -- Python 2.7.5, pytest-3.2.1, py-1.4.34, pluggy-0.4.0 -- /usr/bin/python
cachedir: ../../.cache
rootdir: /home/shahriyar.rzaev, inifile:
collected 13 items                                                                                                                                                       

../../percona-qa/myrocks-tests/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_check_if_collection_exists PASSED
../../percona-qa/myrocks-tests/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_check_collection_count PASSED
../../percona-qa/myrocks-tests/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_alter_table_engine_raises PASSED
../../percona-qa/myrocks-tests/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_alter_table_drop_column PASSED
../../percona-qa/myrocks-tests/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_alter_table_engine PASSED
../../percona-qa/myrocks-tests/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_check_if_table_exists PASSED
../../percona-qa/myrocks-tests/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_check_table_count PASSED
../../percona-qa/myrocks-tests/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_check_table_name PASSED
../../percona-qa/myrocks-tests/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_check_schema_name PASSED
../../percona-qa/myrocks-tests/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_check_if_table_is_view PASSED
../../percona-qa/myrocks-tests/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_create_view_from_collection PASSED
../../percona-qa/myrocks-tests/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_select_from_view FAILED
../../percona-qa/myrocks-tests/myrocks_mysqlx_plugin_test/test_module01.py::TestXPlugin::test_select_from_table PASSED

Also reported interesting thing here:
https://bugs.mysql.com/bug.php?id=87531

Using MySQL Connector/Python X (mysqlx module)

This post is about simple usage of mysqlx module i.e X Plugin with latest Connector/Python DMR.
The version of Python Connector is 8.0.4.

Installing:

wget https://dev.mysql.com/get/Downloads/Connector-Python/mysql-connector-python-8.0.4-0.1.dmr.el7.x86_64.rpm
sudo yum install mysql-connector-python-8.0.4-0.1.dmr.el7.x86_64.rpm

Sample Python code:

# Connecting to MySQL and working with a Session
import mysqlx

# Connect to a dedicated MySQL server
session = mysqlx.get_session({
    'host': 'localhost',
    'port': 33060,
    'user': 'bakux',
    'password': 'Baku12345',
    'ssl-mode': mysqlx.SSLMode.DISABLED
})

schema = session.get_schema('generated_columns_test')

# Create 'my_collection' in schema
schema.create_collection('my_collection')

# Get 'my_collection' from schema
collection = schema.get_collection('my_collection')

assert(True == collection.exists_in_database())

# You can also add multiple documents at once
collection.add({'_id': '2', 'name': 'Sakila', 'age': 15},
            {'_id': '3', 'name': 'Jack', 'age': 15},
            {'_id': '4', 'name': 'Clare', 'age': 37}).execute()

collection.remove('_id = 1').execute()

assert(3 == collection.count())

The code is quite clean so basically we have created a collection and inserted some data.

From MySQL client side the structure of this “table”==collection:

CREATE TABLE `my_collection` (
  `doc` json DEFAULT NULL,
  `_id` varchar(32) GENERATED ALWAYS AS (json_unquote(json_extract(`doc`,'$._id'))) STORED NOT NULL,
  PRIMARY KEY (`_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4

The collection has “stored generated column” _id which is extracted from doc column.

select * from my_collection;
+-------------------------------------------+-----+
| doc                                       | _id |
+-------------------------------------------+-----+
| {"_id": "2", "age": 15, "name": "Sakila"} | 2   |
| {"_id": "3", "age": 15, "name": "Jack"}   | 3   |
| {"_id": "4", "age": 37, "name": "Clare"}  | 4   |
+-------------------------------------------+-----+
3 rows in set (0.00 sec)

This default behavior restricted my tests on other storage engines. Because it is impossible to alter the engine of this default “collection”.

ERROR 3106 (HY000): 'Specified storage engine' is not supported for generated columns.

BTW, while testing, found some bugs and reported:

https://bugs.mysql.com/bug.php?id=87472
https://bugs.mysql.com/bug.php?id=87475
https://bugs.mysql.com/bug.php?id=87492
https://bugs.mysql.com/bug.php?id=87493

Partial table recovery from physical backup

In previous topic, we have covered “Transportable Tablespace” concept by copying and importing table’s tablespace to remote server. See -> Copying Tablespaces to Remote Server The idea is copying tablespace file to remote server, in remote server you must create identical database names and table names manually, then you should discard new table’s tablespace file and import new copied one. To achieve this you must have running MySQL version >= 5.6, innodb_file_per_table=1 and you must know “CREATE statement” of table. Let’s to change our test condition. Assume that, you have MySQL server and you have taken physical backup of your server (you can use Percona XtraBackup, cold backup for eg.). But one of the wonderful day somebody deleted all table data (say -> delete from table_name). In fact your table at this moment exists(.frm and .ibd), you can easily discard table’s tablespace and import tablespace from backup folder. But if table is dropped and you don’t know the create of table. Or even database is dropped. Our path will differ from previous one: *1. Create dropped database manually. 2. Create dropped table by extracting table’s create statement from .frm file which is in backed up directory. To extract table create statement from .frm file you can use **mysqlfrm tool from MySQL Utilities. 3. Discard table’s tablespace (ALTER TABLE t DISCARD TABLESPACE;) 4. Copy .ibd file from backup directory to MySQL’s datadir database directory 5. Import copied back tablespace file.(ALTER TABLE t IMPORT TABLESPACE;)*** You can also read about this concept from documentation -> tablespace-copying I have automatized this process adding table create statement extracting functionality to MySQL-AutoXtraBackup project as –partial recovery option. Here is a demo usage video:

If you tested and found issues, please report it to improve this opensource project.

mysqldiskusage – source code examination

As you know there is a great toolset named “MySQL Utilities”, which you can use for solving various administrative tasks. mysqldiskusage utility is for calculating MySQL Server’s disk usage and generating informative reports. Of course this project is open source and everybody could review the source code. A few words about how mysqldiskusage calculates database disk usage will be crucial for understanding algorithm. The source tree is: mysql-utilities-1.5.4/scripts/mysqldiskusage.py If you open this Python file you will see (line 169-175) :

# We do database disk usage by default. 
try: 
    diskusage.show_database_usage(servers[0], datadir, args, options) 
except UtilError: _, e, _ = sys.exc_info() 
    print("ERROR: %s" % e.errmsg) 
    sys.exit(1) 

By default it shows database disk usage and calling another function named show_database_usage from mysql-utilities-1.5.4/mysql/utilities/command/diskusage.py file. Now if we open up this diskusage.py file and search for show_database_usage function, you should see that in turn this function uses another function named _build_db_list. From _build_db_list it gets back all necessary information as in code stated clearly(line 550-562):

# Get list of databases with sizes and formatted when necessary columns, rows     
db_total = _build_db_list(server, res, dblist, datadir, fmt == "grid", have_read, verbosity, include_empty or do_all, is_remote) 
if not quiet: 
    print "# Database totals:" print_list(sys.stdout, fmt, columns, rows, no_headers) 
if not quiet: _print_size("nTotal database disk usage = ", db_total) 
    print 

Now we know that all calculations are happened in _build_db_list function. If you search and find this function(begins from line 360) you can see that, in fact mysqldiskusage is calculating database disk usage as follows:

It finds (data_length + index_length) from information_schema.tables per database manner

Then it sum ups (data_length + index_length) with misc_files variable data which is in fact returned by _get_db_dir_size function. But what is this misc_files? Logically misc_files must be .opt and .frm files. So misc_files must not be an “.MYD” , “.MYI”, “.IBD”, “general_log”, “slow_log”.

So in fact mysqldiskusage calculates database disk usage as -> (data_length + index_length)[size in bytes] + (.opt+.frm)[size in bytes]. First of all we must insist on not using information_schema for accurate disk usage calculation because of simple rule: “InnoDB preallocates pages(16Kib) for further table usage, but data_length column will not show these pages”

As proof of concept let’s create sample empty table:

mysql> create database test; 
Query OK, 
1 row affected (0,01 sec) 
mysql> use test; 
Database changed 
mysql> 
select data_length, index_length from information_schema.tables where table_schema='test' and table_name='t1'; 
+-------------+--------------+ 
| data_length | index_length | 
+-------------+--------------+ 
| 16384 | 0 | 
+-------------+--------------+ 
1 row in set (0,00 sec) 

If we consider that our exact table size is 16384 bytes, we are in wrong direction. In fact if we use OS commands we could see that the exact size of table is 98304 bytes:

[root@node1 ~]# ls -lt /var/lib/mysql/test/ 
total 208 
-rw-rw----. 1 mysql mysql 98304 Apr 18 11:44 t1.ibd 

So when we create an InnoDB table it is preallocates 6 pages(16Kib*6 = 98304) but only 1 page shown up from data_length column. Now let’s come back to our misc_files or exactly _get_db_dir_size() function. From source code we can see that there is no a check for “.IBD” files:

... 
for item in os.listdir(folder): 
    name, ext = os.path.splitext(item) 
    if ext.upper() not in (".MYD", ".MYI") and  name.upper() not in ('SLOW_LOG', 'GENERAL_LOG'): 
        itemfolder = os.path.join(folder, item) 
    ... 

Because of this calculation is wrong for databases as we see from output:

[root@node1 ~]# mysqldiskusage --server=root_pass -vvv 
# Source on localhost: ... connected. 
# Database totals: 
+---------------------+--------------+--------------+--------------+--------------+ | db_name | db_dir_size | data_size | misc_files | total | +---------------------+--------------+--------------+--------------+--------------+ | employees | 242.523.049 | 205.979.648 | 242.523.049 | 448.502.697 | 

As you see it sum ups data_size with misc_files and gets back total as 448.502.697 bytes. But in fact our employees database is exactly 242.523.049 bytes. And of course the: Total database disk usage = 450.940.391 bytes or 430,05 MB is wrong as well. For further exploration and how to patch source code see related BUG REPORT #76703.