diff -r 9d8383ab5cec sql/log.cc --- a/sql/log.cc Mon Jun 30 23:12:15 2008 +0700 +++ b/sql/log.cc Tue Jul 01 00:34:18 2008 +0700 @@ -981,6 +981,7 @@ if (!query_length) { thd->sent_row_count= thd->examined_row_count= 0; + thd->innodb_was_used= FALSE; thd->query_plan_flags= QPLAN_NONE; thd->query_plan_fsort_passes= 0; } @@ -2264,7 +2265,30 @@ ((thd->query_plan_flags & QPLAN_FILESORT) ? "Yes" : "No"), ((thd->query_plan_flags & QPLAN_FILESORT_DISK) ? "Yes" : "No"), thd->query_plan_fsort_passes) == (uint) -1) - tmp_errno=errno; + tmp_errno=errno; + if ((thd->variables.log_slow_verbosity & SLOG_V_INNODB) && thd->innodb_was_used) + { + char buff[5][20]; + sprintf(buff[2], "%.6f", thd->innodb_io_reads_wait_timer / 1000000.0); + sprintf(buff[3], "%.6f", thd->innodb_lock_que_wait_timer / 1000000.0); + sprintf(buff[4], "%.6f", thd->innodb_innodb_que_wait_timer / 1000000.0); + if (my_b_printf(&log_file, + "# InnoDB_IO_r_ops: %lu InnoDB_IO_r_bytes: %lu InnoDB_IO_r_wait: %s\n" \ + "# InnoDB_rec_lock_wait: %s InnoDB_queue_wait: %s\n" \ + "# InnoDB_pages_distinct: %lu\n", + (ulong) thd->innodb_io_reads, + (ulong) thd->innodb_io_read, + buff[2], buff[3], buff[4], + (ulong) thd->innodb_page_access) == (uint) -1) + tmp_errno=errno; + } + else + { + if ((thd->variables.log_slow_verbosity & SLOG_V_INNODB) && + my_b_printf(&log_file,"# No InnoDB statistics available for this query\n") == (uint) -1) + tmp_errno=errno; + } + if (thd->db && strcmp(thd->db, db)) { // Database changed if (my_b_printf(&log_file,"use %s;\n",thd->db) == (uint) -1) diff -r 9d8383ab5cec sql/mysql_priv.h --- a/sql/mysql_priv.h Mon Jun 30 23:12:15 2008 +0700 +++ b/sql/mysql_priv.h Tue Jul 01 00:34:18 2008 +0700 @@ -605,6 +605,7 @@ }; #define SLOG_V_QUERY_PLAN 1 << 1 +#define SLOG_V_INNODB 1 << 2 /* ... */ #define SLOG_V_INVALID 1 << 31 #define SLOG_V_NONE 1 << 0 @@ -614,6 +615,7 @@ /* Basic flags */ { SLOG_V_QUERY_PLAN, "query_plan" }, + { SLOG_V_INNODB, "innodb" }, /* End of baisc flags */ @@ -622,7 +624,7 @@ /* Complex flags */ { SLOG_V_QUERY_PLAN, "standard" }, - { SLOG_V_INVALID, "full" }, + { SLOG_V_QUERY_PLAN|SLOG_V_INNODB, "full" }, /* End of complex flags */ diff -r 9d8383ab5cec sql/sql_class.cc --- a/sql/sql_class.cc Mon Jun 30 23:12:15 2008 +0700 +++ b/sql/sql_class.cc Tue Jul 01 00:34:18 2008 +0700 @@ -2900,6 +2900,12 @@ first_successful_insert_id_in_prev_stmt; backup->first_successful_insert_id_in_cur_stmt= first_successful_insert_id_in_cur_stmt; + backup->innodb_io_reads= innodb_io_reads; + backup->innodb_io_read= innodb_io_read; + backup->innodb_io_reads_wait_timer= innodb_io_reads_wait_timer; + backup->innodb_lock_que_wait_timer= innodb_lock_que_wait_timer; + backup->innodb_innodb_que_wait_timer= innodb_innodb_que_wait_timer; + backup->innodb_page_access= innodb_page_access; if ((!lex->requires_prelocking() || is_update_query(lex->sql_command)) && !current_stmt_binlog_row_based) @@ -2919,6 +2925,12 @@ cuted_fields= 0; transaction.savepoints= 0; first_successful_insert_id_in_cur_stmt= 0; + innodb_io_reads= 0; + innodb_io_read= 0; + innodb_io_reads_wait_timer= 0; + innodb_lock_que_wait_timer= 0; + innodb_innodb_que_wait_timer= 0; + innodb_page_access= 0; } @@ -2979,6 +2991,12 @@ */ examined_row_count+= backup->examined_row_count; cuted_fields+= backup->cuted_fields; + innodb_io_reads+= backup->innodb_io_reads; + innodb_io_read+= backup->innodb_io_read; + innodb_io_reads_wait_timer+= backup->innodb_io_reads_wait_timer; + innodb_lock_que_wait_timer+= backup->innodb_lock_que_wait_timer; + innodb_innodb_que_wait_timer+= backup->innodb_innodb_que_wait_timer; + innodb_page_access+= backup->innodb_page_access; } diff -r 9d8383ab5cec sql/sql_class.h --- a/sql/sql_class.h Mon Jun 30 23:12:15 2008 +0700 +++ b/sql/sql_class.h Tue Jul 01 00:34:18 2008 +0700 @@ -400,7 +400,12 @@ ulong log_slow_rate_limit; ulong log_slow_filter; ulong log_slow_verbosity; - + ulong innodb_io_reads; + ulonglong innodb_io_read; + ulong innodb_io_reads_wait_timer; + ulong innodb_lock_que_wait_timer; + ulong innodb_innodb_que_wait_timer; + ulong innodb_page_access; }; @@ -985,6 +990,14 @@ uint in_sub_stmt; bool enable_slow_log; bool last_insert_id_used; + + ulong innodb_io_reads; + ulonglong innodb_io_read; + ulong innodb_io_reads_wait_timer; + ulong innodb_lock_que_wait_timer; + ulong innodb_innodb_que_wait_timer; + ulong innodb_page_access; + SAVEPOINT *savepoints; }; @@ -1326,6 +1339,14 @@ Delayed_insert *di; bool write_to_slow_log; + + bool innodb_was_used; + ulong innodb_io_reads; + ulonglong innodb_io_read; + ulong innodb_io_reads_wait_timer; + ulong innodb_lock_que_wait_timer; + ulong innodb_innodb_que_wait_timer; + ulong innodb_page_access; ulong query_plan_flags; ulong query_plan_fsort_passes; diff -r 9d8383ab5cec sql/sql_parse.cc --- a/sql/sql_parse.cc Mon Jun 30 23:12:15 2008 +0700 +++ b/sql/sql_parse.cc Tue Jul 01 00:34:18 2008 +0700 @@ -5430,6 +5430,14 @@ thd->rand_used= 0; thd->sent_row_count= thd->examined_row_count= 0; + thd->innodb_was_used= FALSE; + thd->innodb_io_reads= 0; + thd->innodb_io_read= 0; + thd->innodb_io_reads_wait_timer= 0; + thd->innodb_lock_que_wait_timer= 0; + thd->innodb_innodb_que_wait_timer= 0; + thd->innodb_page_access= 0; + thd->query_plan_flags= QPLAN_NONE; thd->query_plan_fsort_passes= 0; diff -r 9d8383ab5cec storage/innobase/buf/buf0buf.c --- a/storage/innobase/buf/buf0buf.c Mon Jun 30 23:12:15 2008 +0700 +++ b/storage/innobase/buf/buf0buf.c Tue Jul 01 00:34:18 2008 +0700 @@ -37,6 +37,7 @@ #include "log0log.h" #include "trx0undo.h" #include "srv0srv.h" +#include "thr0loc.h" /* IMPLEMENTATION OF THE BUFFER POOL @@ -1138,6 +1139,31 @@ return(block); } +inline void _increment_page_get_statistics(buf_block_t* block, trx_t* trx) +{ + ulint block_hash; + ulint block_hash_byte; + byte block_hash_offset; + + ut_ad(block); + + if (!trx || !trx->distinct_page_access_hash) + return; + + block_hash = ut_hash_ulint((block->space << 20) + block->space + + block->offset, DPAH_SIZE << 3); + block_hash_byte = block_hash >> 3; + block_hash_offset = (byte) block_hash & 0x07; + if (block_hash_byte < 0 || block_hash_byte >= DPAH_SIZE) + fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %lu !!!\n", block_hash_byte, block_hash_offset); + if (block_hash_offset < 0 || block_hash_offset > 7) + fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %lu !!!\n", block_hash_byte, block_hash_offset); + if ((trx->distinct_page_access_hash[block_hash_byte] & ((byte) 0x01 << block_hash_offset)) == 0) + trx->distinct_page_access++; + trx->distinct_page_access_hash[block_hash_byte] |= (byte) 0x01 << block_hash_offset; + return; +} + /************************************************************************ This is the general function used to get access to a database page. */ @@ -1160,6 +1186,11 @@ ulint fix_type; ibool success; ibool must_read; + trx_t* trx; + ulint sec; + ulint ms; + ib_longlong start_time; + ib_longlong finish_time; ut_ad(mtr); ut_ad((rw_latch == RW_S_LATCH) @@ -1171,6 +1202,7 @@ #ifndef UNIV_LOG_DEBUG ut_ad(!ibuf_inside() || ibuf_page(space, offset)); #endif + trx = thr_local_get_trx(os_thread_get_curr_id()); buf_pool->n_page_gets++; loop: block = NULL; @@ -1200,7 +1232,7 @@ return(NULL); } - buf_read_page(space, offset); + buf_read_page(space, offset, trx); #ifdef UNIV_DEBUG buf_dbg_counter++; @@ -1305,6 +1337,11 @@ /* Let us wait until the read operation completes */ + if (trx) + { + ut_usectime(&sec, &ms); + start_time = (ib_longlong)sec * 1000000 + ms; + } for (;;) { mutex_enter(&block->mutex); @@ -1319,6 +1356,12 @@ break; } + } + if (trx) + { + ut_usectime(&sec, &ms); + finish_time = (ib_longlong)sec * 1000000 + ms; + trx->io_reads_wait_timer += (ulint)(finish_time - start_time); } } @@ -1340,12 +1383,15 @@ /* In the case of a first access, try to apply linear read-ahead */ - buf_read_ahead_linear(space, offset); + buf_read_ahead_linear(space, offset, trx); } #ifdef UNIV_IBUF_DEBUG ut_a(ibuf_count_get(block->space, block->offset) == 0); #endif + + _increment_page_get_statistics(block, trx); + return(block->frame); } @@ -1370,6 +1416,7 @@ ibool accessed; ibool success; ulint fix_type; + trx_t* trx; ut_ad(mtr && block); ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); @@ -1467,13 +1514,16 @@ read-ahead */ buf_read_ahead_linear(buf_frame_get_space_id(guess), - buf_frame_get_page_no(guess)); + buf_frame_get_page_no(guess), trx); } #ifdef UNIV_IBUF_DEBUG ut_a(ibuf_count_get(block->space, block->offset) == 0); #endif buf_pool->n_page_gets++; + + trx = thr_local_get_trx(os_thread_get_curr_id()); + _increment_page_get_statistics(block, trx); return(TRUE); } @@ -1497,6 +1547,7 @@ buf_block_t* block; ibool success; ulint fix_type; + trx_t* trx; ut_ad(mtr); ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); @@ -1577,6 +1628,9 @@ || (ibuf_count_get(block->space, block->offset) == 0)); #endif buf_pool->n_page_gets++; + + trx = thr_local_get_trx(os_thread_get_curr_id()); + _increment_page_get_statistics(block, trx); return(TRUE); } diff -r 9d8383ab5cec storage/innobase/buf/buf0rea.c --- a/storage/innobase/buf/buf0rea.c Mon Jun 30 23:12:15 2008 +0700 +++ b/storage/innobase/buf/buf0rea.c Tue Jul 01 00:34:18 2008 +0700 @@ -70,7 +70,8 @@ treat the tablespace as dropped; this is a timestamp we use to stop dangling page reads from a tablespace which we have DISCARDed + IMPORTed back */ - ulint offset) /* in: page number */ + ulint offset, /* in: page number */ + trx_t* trx) { buf_block_t* block; ulint wake_later; @@ -129,10 +130,10 @@ ut_a(block->state == BUF_BLOCK_FILE_PAGE); - *err = fil_io(OS_FILE_READ | wake_later, + *err = _fil_io(OS_FILE_READ | wake_later, sync, space, offset, 0, UNIV_PAGE_SIZE, - (void*)block->frame, (void*)block); + (void*)block->frame, (void*)block, trx); ut_a(*err == DB_SUCCESS); if (sync) { @@ -163,8 +164,9 @@ the page at the given page number does not get read even if we return a value > 0! */ ulint space, /* in: space id */ - ulint offset) /* in: page number of a page which the current thread + ulint offset, /* in: page number of a page which the current thread wants to access */ + trx_t* trx) { ib_longlong tablespace_version; buf_block_t* block; @@ -260,7 +262,7 @@ count += buf_read_page_low( &err, FALSE, ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER, - space, tablespace_version, i); + space, tablespace_version, i, trx); if (err == DB_TABLESPACE_DELETED) { ut_print_timestamp(stderr); fprintf(stderr, @@ -306,7 +308,8 @@ /* out: number of page read requests issued: this can be > 1 if read-ahead occurred */ ulint space, /* in: space id */ - ulint offset) /* in: page number */ + ulint offset, /* in: page number */ + trx_t* trx) { ib_longlong tablespace_version; ulint count; @@ -315,13 +318,13 @@ tablespace_version = fil_space_get_version(space); - count = buf_read_ahead_random(space, offset); + count = buf_read_ahead_random(space, offset, trx); /* We do the i/o in the synchronous aio mode to save thread switches: hence TRUE */ count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space, - tablespace_version, offset); + tablespace_version, offset, trx); srv_buf_pool_reads+= count2; if (err == DB_TABLESPACE_DELETED) { ut_print_timestamp(stderr); @@ -368,8 +371,9 @@ /*==================*/ /* out: number of page read requests issued */ ulint space, /* in: space id */ - ulint offset) /* in: page number of a page; NOTE: the current thread + ulint offset, /* in: page number of a page; NOTE: the current thread must want access to this page (see NOTE 3 above) */ + trx_t* trx) { ib_longlong tablespace_version; buf_block_t* block; @@ -552,7 +556,7 @@ count += buf_read_page_low( &err, FALSE, ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER, - space, tablespace_version, i); + space, tablespace_version, i, trx); if (err == DB_TABLESPACE_DELETED) { ut_print_timestamp(stderr); fprintf(stderr, @@ -625,7 +629,7 @@ (i + 1 == n_stored) && sync, BUF_READ_ANY_PAGE, space_ids[i], space_versions[i], - page_nos[i]); + page_nos[i], NULL); if (err == DB_TABLESPACE_DELETED) { /* We have deleted or are deleting the single-table @@ -704,12 +708,12 @@ if ((i + 1 == n_stored) && sync) { buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space, tablespace_version, - page_nos[i]); + page_nos[i], NULL); } else { buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE | OS_AIO_SIMULATED_WAKE_LATER, space, tablespace_version, - page_nos[i]); + page_nos[i], NULL); } } diff -r 9d8383ab5cec storage/innobase/fil/fil0fil.c --- a/storage/innobase/fil/fil0fil.c Mon Jun 30 23:12:15 2008 +0700 +++ b/storage/innobase/fil/fil0fil.c Tue Jul 01 00:34:18 2008 +0700 @@ -3634,7 +3634,7 @@ node->name, node->handle, buf, offset_low, offset_high, UNIV_PAGE_SIZE * n_pages, - NULL, NULL); + NULL, NULL, NULL); #endif if (success) { node->size += n_pages; @@ -3962,7 +3962,7 @@ Reads or writes data. This operation is asynchronous (aio). */ ulint -fil_io( +_fil_io( /*===*/ /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do i/o on a tablespace @@ -3988,8 +3988,9 @@ void* buf, /* in/out: buffer where to store read data or from where to write; in aio this must be appropriately aligned */ - void* message) /* in: message for aio handler if non-sync + void* message, /* in: message for aio handler if non-sync aio used, else ignored */ + trx_t* trx) { fil_system_t* system = fil_system; ulint mode; @@ -4133,7 +4134,7 @@ #else /* Queue the aio request */ ret = os_aio(type, mode | wake_later, node->name, node->handle, buf, - offset_low, offset_high, len, node, message); + offset_low, offset_high, len, node, message, trx); #endif ut_a(ret); diff -r 9d8383ab5cec storage/innobase/include/buf0rea.h --- a/storage/innobase/include/buf0rea.h Mon Jun 30 23:12:15 2008 +0700 +++ b/storage/innobase/include/buf0rea.h Tue Jul 01 00:34:18 2008 +0700 @@ -10,6 +10,7 @@ #define buf0rea_h #include "univ.i" +#include "trx0types.h" #include "buf0types.h" /************************************************************************ @@ -25,7 +26,8 @@ /* out: number of page read requests issued: this can be > 1 if read-ahead occurred */ ulint space, /* in: space id */ - ulint offset);/* in: page number */ + ulint offset, /* in: page number */ + trx_t* trx); /************************************************************************ Applies linear read-ahead if in the buf_pool the page is a border page of a linear read-ahead area and all the pages in the area have been accessed. @@ -55,8 +57,9 @@ /*==================*/ /* out: number of page read requests issued */ ulint space, /* in: space id */ - ulint offset);/* in: page number of a page; NOTE: the current thread + ulint offset, /* in: page number of a page; NOTE: the current thread must want access to this page (see NOTE 3 above) */ + trx_t* trx); /************************************************************************ Issues read requests for pages which the ibuf module wants to read in, in order to contract the insert buffer tree. Technically, this function is like diff -r 9d8383ab5cec storage/innobase/include/fil0fil.h --- a/storage/innobase/include/fil0fil.h Mon Jun 30 23:12:15 2008 +0700 +++ b/storage/innobase/include/fil0fil.h Tue Jul 01 00:34:18 2008 +0700 @@ -567,8 +567,11 @@ /************************************************************************ Reads or writes data. This operation is asynchronous (aio). */ +#define fil_io(type, sync, space_id, block_offset, byte_offset, len, buf, message) \ + _fil_io(type, sync, space_id, block_offset, byte_offset, len, buf, message, NULL) + ulint -fil_io( +_fil_io( /*===*/ /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do i/o on a tablespace @@ -594,8 +597,9 @@ void* buf, /* in/out: buffer where to store read data or from where to write; in aio this must be appropriately aligned */ - void* message); /* in: message for aio handler if non-sync + void* message, /* in: message for aio handler if non-sync aio used, else ignored */ + trx_t* trx); /************************************************************************ Reads data from a space to a buffer. Remember that the possible incomplete blocks at the end of file are ignored: they are not taken into account when diff -r 9d8383ab5cec storage/innobase/include/os0file.h --- a/storage/innobase/include/os0file.h Mon Jun 30 23:12:15 2008 +0700 +++ b/storage/innobase/include/os0file.h Tue Jul 01 00:34:18 2008 +0700 @@ -10,6 +10,8 @@ #define os0file_h #include "univ.i" + +#include "trx0types.h" #ifndef __WIN__ #include @@ -424,8 +426,11 @@ /*********************************************************************** Requests a synchronous read operation. */ +#define os_file_read(file, buf, offset, offset_high, n) \ + _os_file_read(file, buf, offset, offset_high, n, NULL) + ibool -os_file_read( +_os_file_read( /*=========*/ /* out: TRUE if request was successful, FALSE if fail */ @@ -435,7 +440,8 @@ offset where to read */ ulint offset_high,/* in: most significant 32 bits of offset */ - ulint n); /* in: number of bytes to read */ + ulint n, /* in: number of bytes to read */ + trx_t* trx); /*********************************************************************** Rewind file to its start, read at most size - 1 bytes from it to str, and NUL-terminate str. All errors are silently ignored. This function is @@ -587,7 +593,8 @@ can be used to identify a completed aio operation); if mode is OS_AIO_SYNC, these are ignored */ - void* message2); + void* message2, + trx_t* trx); /**************************************************************************** Wakes up all async i/o threads so that they know to exit themselves in shutdown. */ diff -r 9d8383ab5cec storage/innobase/include/thr0loc.h --- a/storage/innobase/include/thr0loc.h Mon Jun 30 23:12:15 2008 +0700 +++ b/storage/innobase/include/thr0loc.h Tue Jul 01 00:34:18 2008 +0700 @@ -15,6 +15,7 @@ #include "univ.i" #include "os0thread.h" +#include "trx0trx.h" /******************************************************************** Initializes the thread local storage module. */ @@ -36,6 +37,14 @@ /*===========*/ os_thread_id_t id); /* in: thread id */ /*********************************************************************** +Gets trx */ + +trx_t* +thr_local_get_trx( +/*==================*/ + /* out: trx for mysql */ + os_thread_id_t id); /* in: thread id of the thread */ +/*********************************************************************** Gets the slot number in the thread table of a thread. */ ulint @@ -43,6 +52,14 @@ /*==================*/ /* out: slot number */ os_thread_id_t id); /* in: thread id of the thread */ +/*********************************************************************** +Sets in the local storage the slot number in the thread table of a thread. */ + +void +thr_local_set_trx( +/*==================*/ + os_thread_id_t id, /* in: thread id of the thread */ + trx_t* trx); /* in: slot number */ /*********************************************************************** Sets in the local storage the slot number in the thread table of a thread. */ diff -r 9d8383ab5cec storage/innobase/include/trx0trx.h --- a/storage/innobase/include/trx0trx.h Mon Jun 30 23:12:15 2008 +0700 +++ b/storage/innobase/include/trx0trx.h Tue Jul 01 00:34:18 2008 +0700 @@ -662,6 +662,17 @@ /*------------------------------*/ char detailed_error[256]; /* detailed error message for last error, or empty. */ + /*------------------------------*/ + os_thread_id_t trx_thread_id; + ulint io_reads; + ib_longlong io_read; + ulint io_reads_wait_timer; + ib_longlong lock_que_wait_ustarted; + ulint lock_que_wait_timer; + ulint innodb_que_wait_timer; + ulint distinct_page_access; +#define DPAH_SIZE 8192 + byte* distinct_page_access_hash; }; #define TRX_MAX_N_THREADS 32 /* maximum number of diff -r 9d8383ab5cec storage/innobase/lock/lock0lock.c --- a/storage/innobase/lock/lock0lock.c Mon Jun 30 23:12:15 2008 +0700 +++ b/storage/innobase/lock/lock0lock.c Tue Jul 01 00:34:19 2008 +0700 @@ -1770,6 +1770,8 @@ { lock_t* lock; trx_t* trx; + ulint sec; + ulint ms; ut_ad(mutex_own(&kernel_mutex)); @@ -1824,6 +1826,8 @@ trx->que_state = TRX_QUE_LOCK_WAIT; trx->was_chosen_as_deadlock_victim = FALSE; trx->wait_started = time(NULL); + ut_usectime(&sec, &ms); + trx->lock_que_wait_ustarted = (ib_longlong)sec * 1000000 + ms; ut_a(que_thr_stop(thr)); @@ -3469,6 +3473,8 @@ { lock_t* lock; trx_t* trx; + ulint sec; + ulint ms; ut_ad(mutex_own(&kernel_mutex)); @@ -3518,6 +3524,9 @@ return(DB_SUCCESS); } + trx->wait_started = time(NULL); + ut_usectime(&sec, &ms); + trx->lock_que_wait_ustarted = (ib_longlong)sec * 1000000 + ms; trx->que_state = TRX_QUE_LOCK_WAIT; trx->was_chosen_as_deadlock_victim = FALSE; trx->wait_started = time(NULL); @@ -4230,7 +4239,7 @@ ulint i; mtr_t mtr; trx_t* trx; - + fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n"); /* First print info on non-active transactions */ diff -r 9d8383ab5cec storage/innobase/os/os0file.c --- a/storage/innobase/os/os0file.c Mon Jun 30 23:12:15 2008 +0700 +++ b/storage/innobase/os/os0file.c Tue Jul 01 00:34:19 2008 +0700 @@ -14,6 +14,7 @@ #include "srv0start.h" #include "fil0fil.h" #include "buf0buf.h" +#include "trx0sys.h" #if defined(UNIV_HOTBACKUP) && defined(__WIN__) /* Add includes for the _stat() call to compile on Windows */ @@ -99,6 +100,7 @@ struct aiocb control; /* Posix control block for aio request */ #endif + trx_t* trx; }; /* The aio array structure */ @@ -1960,9 +1962,13 @@ #ifndef __WIN__ /*********************************************************************** Does a synchronous read operation in Posix. */ + +#define os_file_pread(file, buf, n, offset, offset_high) \ + _os_file_pread(file, buf, n, offset, offset_high, NULL); + static ssize_t -os_file_pread( +_os_file_pread( /*==========*/ /* out: number of bytes read, -1 if error */ os_file_t file, /* in: handle to a file */ @@ -1970,12 +1976,17 @@ ulint n, /* in: number of bytes to read */ ulint offset, /* in: least significant 32 bits of file offset from where to read */ - ulint offset_high) /* in: most significant 32 bits of - offset */ + ulint offset_high, /* in: most significant 32 bits of + offset */ + trx_t* trx) { off_t offs; ssize_t n_bytes; - + ulint sec; + ulint ms; + ib_longlong start_time; + ib_longlong finish_time; + ut_a((offset & 0xFFFFFFFFUL) == offset); /* If off_t is > 4 bytes in size, then we assume we can pass a @@ -1994,7 +2005,13 @@ } os_n_file_reads++; - + if (trx) + { + trx->io_reads++; + trx->io_read += n; + ut_usectime(&sec, &ms); + start_time = (ib_longlong)sec * 1000000 + ms; + } #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD) os_mutex_enter(os_file_count_mutex); os_file_n_pending_preads++; @@ -2007,6 +2024,13 @@ os_file_n_pending_preads--; os_n_pending_reads--; os_mutex_exit(os_file_count_mutex); + + if (trx) + { + ut_usectime(&sec, &ms); + finish_time = (ib_longlong)sec * 1000000 + ms; + trx->io_reads_wait_timer += (ulint)(finish_time - start_time); + } return(n_bytes); #else @@ -2038,6 +2062,13 @@ os_n_pending_reads--; os_mutex_exit(os_file_count_mutex); + if (trx) + { + ut_usectime(&sec, &ms); + finish_time = (ib_longlong)sec * 1000000 + ms; + trx->io_reads_wait_timer += (ulint)(finish_time - start_time); + } + return(ret); } #endif @@ -2161,7 +2192,7 @@ Requests a synchronous positioned read operation. */ ibool -os_file_read( +_os_file_read( /*=========*/ /* out: TRUE if request was successful, FALSE if fail */ @@ -2171,7 +2202,8 @@ offset where to read */ ulint offset_high, /* in: most significant 32 bits of offset */ - ulint n) /* in: number of bytes to read */ + ulint n, /* in: number of bytes to read */ + trx_t* trx) { #ifdef __WIN__ BOOL ret; @@ -2186,7 +2218,6 @@ os_n_file_reads++; os_bytes_read_since_printout += n; - try_again: ut_ad(file); ut_ad(buf); @@ -2235,7 +2266,7 @@ os_bytes_read_since_printout += n; try_again: - ret = os_file_pread(file, buf, n, offset, offset_high); + ret = _os_file_pread(file, buf, n, offset, offset_high, trx); if ((ulint)ret == n) { @@ -3217,7 +3248,8 @@ offset */ ulint offset_high, /* in: most significant 32 bits of offset */ - ulint len) /* in: length of the block to read or write */ + ulint len, /* in: length of the block to read or write */ + trx_t* trx) { os_aio_slot_t* slot; #ifdef WIN_ASYNC_IO @@ -3470,7 +3502,8 @@ can be used to identify a completed aio operation); if mode is OS_AIO_SYNC, these are ignored */ - void* message2) + void* message2, + trx_t* trx) { os_aio_array_t* array; os_aio_slot_t* slot; @@ -3509,8 +3542,8 @@ wait in the Windows case. */ if (type == OS_FILE_READ) { - return(os_file_read(file, buf, offset, - offset_high, n)); + return(_os_file_read(file, buf, offset, + offset_high, n, trx)); } ut_a(type == OS_FILE_WRITE); @@ -3543,8 +3576,13 @@ ut_error; } + if (trx && type == OS_FILE_READ) + { + trx->io_reads++; + trx->io_read += n; + } slot = os_aio_array_reserve_slot(type, array, message1, message2, file, - name, buf, offset, offset_high, n); + name, buf, offset, offset_high, n, trx); if (type == OS_FILE_READ) { if (os_aio_use_native_aio) { #ifdef WIN_ASYNC_IO @@ -4136,7 +4174,7 @@ ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs, consecutive_ios[i]->len); - offs += consecutive_ios[i]->len; + offs += consecutive_ios[i]->len; } } @@ -4148,9 +4186,8 @@ /* Mark the i/os done in slots */ - for (i = 0; i < n_consecutive; i++) { + for (i = 0; i < n_consecutive; i++) consecutive_ios[i]->io_already_done = TRUE; - } /* We return the messages for the first slot now, and if there were several slots, the messages will be returned with subsequent calls diff -r 9d8383ab5cec storage/innobase/srv/srv0srv.c --- a/storage/innobase/srv/srv0srv.c Mon Jun 30 23:12:15 2008 +0700 +++ b/storage/innobase/srv/srv0srv.c Tue Jul 01 00:34:19 2008 +0700 @@ -987,6 +987,10 @@ ibool has_slept = FALSE; srv_conc_slot_t* slot = NULL; ulint i; + ib_longlong start_time = 0L; + ib_longlong finish_time = 0L; + ulint sec; + ulint ms; if (trx->mysql_thd != NULL && thd_is_replication_slave_thread(trx->mysql_thd)) { @@ -1055,6 +1059,7 @@ switches. */ if (SRV_THREAD_SLEEP_DELAY > 0) { os_thread_sleep(SRV_THREAD_SLEEP_DELAY); + trx->innodb_que_wait_timer += SRV_THREAD_SLEEP_DELAY; } trx->op_info = ""; @@ -1110,11 +1115,18 @@ /* Go to wait for the event; when a thread leaves InnoDB it will release this thread */ + ut_usectime(&sec, &ms); + start_time = (ib_longlong)sec * 1000000 + ms; + trx->op_info = "waiting in InnoDB queue"; os_event_wait(slot->event); trx->op_info = ""; + + ut_usectime(&sec, &ms); + finish_time = (ib_longlong)sec * 1000000 + ms; + trx->innodb_que_wait_timer += (ulint)(finish_time - start_time); os_fast_mutex_lock(&srv_conc_mutex); diff -r 9d8383ab5cec storage/innobase/thr/thr0loc.c --- a/storage/innobase/thr/thr0loc.c Mon Jun 30 23:12:15 2008 +0700 +++ b/storage/innobase/thr/thr0loc.c Tue Jul 01 00:34:19 2008 +0700 @@ -45,6 +45,7 @@ for this thread */ ibool in_ibuf;/* TRUE if the the thread is doing an ibuf operation */ + trx_t* trx; hash_node_t hash; /* hash chain node */ ulint magic_n; }; @@ -111,6 +112,29 @@ } /*********************************************************************** +Gets trx */ + +trx_t* +thr_local_get_trx( +/*==================*/ + /* out: trx for mysql */ + os_thread_id_t id) /* in: thread id of the thread */ +{ + trx_t* trx; + thr_local_t* local; + + mutex_enter(&thr_local_mutex); + + local = thr_local_get(id); + + trx = local->trx; + + mutex_exit(&thr_local_mutex); + + return(trx); +} + +/*********************************************************************** Sets the slot number in the thread table of a thread. */ void @@ -126,6 +150,26 @@ local = thr_local_get(id); local->slot_no = slot_no; + + mutex_exit(&thr_local_mutex); +} + +/*********************************************************************** +Sets trx */ + +void +thr_local_set_trx( +/*==================*/ + os_thread_id_t id, /* in: thread id of the thread */ + trx_t* trx) /* in: trx */ +{ + thr_local_t* local; + + mutex_enter(&thr_local_mutex); + + local = thr_local_get(id); + + local->trx = trx; mutex_exit(&thr_local_mutex); } @@ -170,6 +214,7 @@ local->magic_n = THR_LOCAL_MAGIC_N; local->in_ibuf = FALSE; + local->trx = NULL; mutex_enter(&thr_local_mutex); diff -r 9d8383ab5cec storage/innobase/trx/trx0trx.c --- a/storage/innobase/trx/trx0trx.c Mon Jun 30 23:12:15 2008 +0700 +++ b/storage/innobase/trx/trx0trx.c Tue Jul 01 00:34:19 2008 +0700 @@ -185,6 +185,16 @@ trx->global_read_view_heap = mem_heap_create(256); trx->global_read_view = NULL; trx->read_view = NULL; + + trx->io_reads = 0; + trx->io_read = 0; + trx->io_reads_wait_timer = 0; + trx->lock_que_wait_timer = 0; + trx->innodb_que_wait_timer = 0; + trx->distinct_page_access = 0; + trx->distinct_page_access_hash = NULL; + trx->trx_thread_id = os_thread_get_curr_id(); + thr_local_set_trx(trx->trx_thread_id, NULL); /* Set X/Open XA transaction identification to NULL */ memset(&trx->xid, 0, sizeof(trx->xid)); @@ -226,6 +236,10 @@ trx->mysql_thread_id = os_thread_get_curr_id(); trx->mysql_process_no = os_proc_get_number(); + + trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE); + memset(trx->distinct_page_access_hash, 0, DPAH_SIZE); + thr_local_set_trx(trx->mysql_thread_id, trx); return(trx); } @@ -345,6 +359,8 @@ ut_a(trx->read_view == NULL); + thr_local_free(trx->trx_thread_id); + mem_free(trx); } @@ -356,6 +372,12 @@ /*===============*/ trx_t* trx) /* in, own: trx object */ { + if (trx->distinct_page_access_hash) + { + mem_free(trx->distinct_page_access_hash); + trx->distinct_page_access_hash= NULL; + } + mutex_enter(&kernel_mutex); UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx); @@ -1049,6 +1071,9 @@ trx_t* trx) /* in: transaction */ { que_thr_t* thr; + ulint sec; + ulint ms; + ib_longlong now; ut_ad(mutex_own(&kernel_mutex)); ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT); @@ -1063,6 +1088,9 @@ thr = UT_LIST_GET_FIRST(trx->wait_thrs); } + ut_usectime(&sec, &ms); + now = (ib_longlong)sec * 1000000 + ms; + trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted); trx->que_state = TRX_QUE_RUNNING; } @@ -1076,6 +1104,9 @@ trx_t* trx) /* in: transaction in the TRX_QUE_LOCK_WAIT state */ { que_thr_t* thr; + ulint sec; + ulint ms; + ib_longlong now; ut_ad(mutex_own(&kernel_mutex)); ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT); @@ -1090,6 +1121,9 @@ thr = UT_LIST_GET_FIRST(trx->wait_thrs); } + ut_usectime(&sec, &ms); + now = (ib_longlong)sec * 1000000 + ms; + trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted); trx->que_state = TRX_QUE_RUNNING; }