/* Copyright (c) 2005, 2016, Oracle and/or its affiliates.
Copyright (c) 2009, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
#ifndef LOG_H
#define LOG_H
#include "handler.h" /* my_xid */
#include "rpl_constants.h"
class Relay_log_info;
class Format_description_log_event;
bool reopen_fstreams(const char *filename, FILE *outstream, FILE *errstream);
void setup_log_handling();
bool trans_has_updated_trans_table(const THD* thd);
bool stmt_has_updated_trans_table(const THD *thd);
bool use_trans_cache(const THD* thd, bool is_transactional);
bool ending_trans(THD* thd, const bool all);
bool ending_single_stmt_trans(THD* thd, const bool all);
bool trans_has_updated_non_trans_table(const THD* thd);
bool stmt_has_updated_non_trans_table(const THD* thd);
/*
Transaction Coordinator log - a base abstract class
for two different implementations
*/
class TC_LOG
{
public:
int using_heuristic_recover();
TC_LOG() = default;
virtual ~TC_LOG() = default;
virtual int open(const char *opt_name)=0;
virtual void close()=0;
/*
Transaction coordinator 2-phase commit.
Must invoke the run_prepare_ordered and run_commit_ordered methods, as
described below for these methods.
In addition, must invoke THD::wait_for_prior_commit(), or equivalent
wait, to ensure that one commit waits for another if registered to do so.
*/
virtual int log_and_order(THD *thd, my_xid xid, bool all,
bool need_prepare_ordered,
bool need_commit_ordered) = 0;
virtual int unlog(ulong cookie, my_xid xid)=0;
virtual int unlog_xa_prepare(THD *thd, bool all)= 0;
virtual void commit_checkpoint_notify(void *cookie)= 0;
protected:
/*
These methods are meant to be invoked from log_and_order() implementations
to run any prepare_ordered() respectively commit_ordered() methods in
participating handlers.
They must be called using suitable thread syncronisation to ensure that
they are each called in the correct commit order among all
transactions. However, it is only necessary to call them if the
corresponding flag passed to log_and_order is set (it is safe, but not
required, to call them when the flag is false).
The caller must be holding LOCK_prepare_ordered respectively
LOCK_commit_ordered when calling these methods.
*/
void run_prepare_ordered(THD *thd, bool all);
void run_commit_ordered(THD *thd, bool all);
};
/*
Locks used to ensure serialised execution of TC_LOG::run_prepare_ordered()
and TC_LOG::run_commit_ordered(), or any other code that calls handler
prepare_ordered() or commit_ordered() methods.
*/
extern mysql_mutex_t LOCK_prepare_ordered;
extern mysql_cond_t COND_prepare_ordered;
extern mysql_mutex_t LOCK_after_binlog_sync;
extern mysql_mutex_t LOCK_commit_ordered;
#ifdef HAVE_PSI_INTERFACE
extern PSI_mutex_key key_LOCK_prepare_ordered, key_LOCK_commit_ordered;
extern PSI_mutex_key key_LOCK_after_binlog_sync;
extern PSI_cond_key key_COND_prepare_ordered;
#endif
class TC_LOG_DUMMY: public TC_LOG // use it to disable the logging
{
public:
TC_LOG_DUMMY() = default;
int open(const char *opt_name) override { return 0; }
void close() override { }
/*
TC_LOG_DUMMY is only used when there are <= 1 XA-capable engines, and we
only use internal XA during commit when >= 2 XA-capable engines
participate.
*/
int log_and_order(THD *thd, my_xid xid, bool all,
bool need_prepare_ordered, bool need_commit_ordered) override
{
DBUG_ASSERT(0);
return 1;
}
int unlog(ulong cookie, my_xid xid) override { return 0; }
int unlog_xa_prepare(THD *thd, bool all) override
{
return 0;
}
void commit_checkpoint_notify(void *cookie) override { DBUG_ASSERT(0); };
};
#define TC_LOG_PAGE_SIZE 8192
#ifdef HAVE_MMAP
class TC_LOG_MMAP: public TC_LOG
{
public: // only to keep Sun Forte on sol9x86 happy
typedef enum {
PS_POOL, // page is in pool
PS_ERROR, // last sync failed
PS_DIRTY // new xids added since last sync
} PAGE_STATE;
struct pending_cookies {
uint count;
uint pending_count;
ulong cookies[1];
};
private:
typedef struct st_page {
struct st_page *next; // page a linked in a fifo queue
my_xid *start, *end; // usable area of a page
my_xid *ptr; // next xid will be written here
int size, free; // max and current number of free xid slots on the page
int waiters; // number of waiters on condition
PAGE_STATE state; // see above
mysql_mutex_t lock; // to access page data or control structure
mysql_cond_t cond; // to wait for a sync
} PAGE;
/* List of THDs for which to invoke commit_ordered(), in order. */
struct commit_entry
{
struct commit_entry *next;
THD *thd;
};
char logname[FN_REFLEN];
File fd;
my_off_t file_length;
uint npages, inited;
uchar *data;
struct st_page *pages, *syncing, *active, *pool, **pool_last_ptr;
/*
note that, e.g. LOCK_active is only used to protect
'active' pointer, to protect the content of the active page
one has to use active->lock.
Same for LOCK_pool and LOCK_sync
*/
mysql_mutex_t LOCK_active, LOCK_pool, LOCK_sync, LOCK_pending_checkpoint;
mysql_cond_t COND_pool, COND_active;
/*
Queue of threads that need to call commit_ordered().
Access to this queue must be protected by LOCK_prepare_ordered.
*/
commit_entry *commit_ordered_queue;
/*
This flag and condition is used to reserve the queue while threads in it
each run the commit_ordered() methods one after the other. Only once the
last commit_ordered() in the queue is done can we start on a new queue
run.
Since we start this process in the first thread in the queue and finish in
the last (and possibly different) thread, we need a condition variable for
this (we cannot unlock a mutex in a different thread than the one who
locked it).
The condition is used together with the LOCK_prepare_ordered mutex.
*/
mysql_cond_t COND_queue_busy;
my_bool commit_ordered_queue_busy;
pending_cookies* pending_checkpoint;
public:
TC_LOG_MMAP(): inited(0), pending_checkpoint(0) {}
int open(const char *opt_name) override;
void close() override;
int log_and_order(THD *thd, my_xid xid, bool all,
bool need_prepare_ordered, bool need_commit_ordered) override;
int unlog(ulong cookie, my_xid xid) override;
int unlog_xa_prepare(THD *thd, bool all) override
{
return 0;
}
void commit_checkpoint_notify(void *cookie) override;
int recover();
private:
int log_one_transaction(my_xid xid);
void get_active_from_pool();
int sync();
int overflow();
int delete_entry(ulong cookie);
};
#else
#define TC_LOG_MMAP TC_LOG_DUMMY
#endif
extern TC_LOG *tc_log;
extern TC_LOG_MMAP tc_log_mmap;
extern TC_LOG_DUMMY tc_log_dummy;
/* log info errors */
#define LOG_INFO_EOF -1
#define LOG_INFO_IO -2
#define LOG_INFO_INVALID -3
#define LOG_INFO_SEEK -4
#define LOG_INFO_MEM -6
#define LOG_INFO_FATAL -7
#define LOG_INFO_IN_USE -8
#define LOG_INFO_EMFILE -9
/* bitmap to SQL_LOG::close() */
#define LOG_CLOSE_INDEX 1
#define LOG_CLOSE_TO_BE_OPENED 2
#define LOG_CLOSE_STOP_EVENT 4
#define LOG_CLOSE_DELAYED_CLOSE 8
/*
Maximum unique log filename extension.
Note: setting to 0x7FFFFFFF due to atol windows
overflow/truncate.
*/
#define MAX_LOG_UNIQUE_FN_EXT 0x7FFFFFFF
/*
Number of warnings that will be printed to error log
before extension number is exhausted.
*/
#define LOG_WARN_UNIQUE_FN_EXT_LEFT 1000
class Relay_log_info;
/*
Note that we destroy the lock mutex in the desctructor here.
This means that object instances cannot be destroyed/go out of scope,
until we have reset thd->current_linfo to NULL;
*/
typedef struct st_log_info
{
char log_file_name[FN_REFLEN];
my_off_t index_file_offset, index_file_start_offset;
my_off_t pos;
bool fatal; // if the purge happens to give us a negative offset
st_log_info() : index_file_offset(0), index_file_start_offset(0),
pos(0), fatal(0)
{
DBUG_ENTER("LOG_INFO");
log_file_name[0] = '\0';
DBUG_VOID_RETURN;
}
} LOG_INFO;
/*
Currently we have only 3 kinds of logging functions: old-fashioned
logs, stdout and csv logging routines.
*/
#define MAX_LOG_HANDLERS_NUM 3
/* log event handler flags */
#define LOG_NONE 1U
#define LOG_FILE 2U
#define LOG_TABLE 4U
class Log_event;
class Rows_log_event;
enum enum_log_type { LOG_UNKNOWN, LOG_NORMAL, LOG_BIN };
enum enum_log_state { LOG_OPENED, LOG_CLOSED, LOG_TO_BE_OPENED };
/*
Use larger buffers when reading from and to binary log
We make it one step smaller than 64K to account for malloc overhead.
*/
#define LOG_BIN_IO_SIZE MY_ALIGN_DOWN(65536-1, IO_SIZE)
/*
TODO use mmap instead of IO_CACHE for binlog
(mmap+fsync is two times faster than write+fsync)
*/
class MYSQL_LOG
{
public:
MYSQL_LOG();
virtual ~MYSQL_LOG() = default;
void init_pthread_objects();
void cleanup();
bool open(
#ifdef HAVE_PSI_INTERFACE
PSI_file_key log_file_key,
#endif
const char *log_name,
enum_log_type log_type,
const char *new_name, ulong next_file_number,
enum cache_type io_cache_type_arg);
void close(uint exiting);
inline bool is_open() { return log_state != LOG_CLOSED; }
const char *generate_name(const char *log_name,
const char *suffix,
bool strip_ext, char *buff);
virtual int generate_new_name(char *new_name, const char *log_name,
ulong next_log_number);
protected:
/* LOCK_log is inited by init_pthread_objects() */
mysql_mutex_t LOCK_log;
char *name;
char log_file_name[FN_REFLEN];
char time_buff[20], db[NAME_LEN + 1];
bool write_error, inited;
IO_CACHE log_file;
enum_log_type log_type;
volatile enum_log_state log_state;
enum cache_type io_cache_type;
friend class Log_event;
#ifdef HAVE_PSI_INTERFACE
/** Instrumentation key to use for file io in @c log_file */
PSI_file_key m_log_file_key;
#endif
bool init_and_set_log_file_name(const char *log_name,
const char *new_name,
ulong next_log_number,
enum_log_type log_type_arg,
enum cache_type io_cache_type_arg);
};
/* Tell the io thread if we can delay the master info sync. */
#define SEMI_SYNC_SLAVE_DELAY_SYNC 1
/* Tell the io thread if the current event needs a ack. */
#define SEMI_SYNC_NEED_ACK 2
class MYSQL_QUERY_LOG: public MYSQL_LOG
{
public:
MYSQL_QUERY_LOG() : last_time(0) {}
void reopen_file();
bool write(time_t event_time, const char *user_host, size_t user_host_len, my_thread_id thread_id,
const char *command_type, size_t command_type_len,
const char *sql_text, size_t sql_text_len);
bool write(THD *thd, time_t current_time,
const char *user_host, size_t user_host_len,
ulonglong query_utime, ulonglong lock_utime, bool is_command,
const char *sql_text, size_t sql_text_len);
bool open_slow_log(const char *log_name)
{
char buf[FN_REFLEN];
return open(
#ifdef HAVE_PSI_INTERFACE
key_file_slow_log,
#endif
generate_name(log_name, "-slow.log", 0, buf),
LOG_NORMAL, 0, 0, WRITE_CACHE);
}
bool open_query_log(const char *log_name)
{
char buf[FN_REFLEN];
return open(
#ifdef HAVE_PSI_INTERFACE
key_file_query_log,
#endif
generate_name(log_name, ".log", 0, buf),
LOG_NORMAL, 0, 0, WRITE_CACHE);
}
private:
time_t last_time;
};
/*
We assign each binlog file an internal ID, used to identify them for unlog().
The IDs start from 0 and increment for each new binlog created.
In unlog() we need to know the ID of the binlog file that the corresponding
transaction was written into. We also need a special value for a corner
case where there is no corresponding binlog id (since nothing was logged).
And we need an error flag to mark that unlog() must return failure.
We use the following macros to pack all of this information into the single
ulong available with log_and_order() / unlog().
Note that we cannot use the value 0 for cookie, as that is reserved as error
return value from log_and_order().
*/
#define BINLOG_COOKIE_ERROR_RETURN 0
#define BINLOG_COOKIE_DUMMY_ID 1
#define BINLOG_COOKIE_BASE 2
#define BINLOG_COOKIE_DUMMY(error_flag) \
( (BINLOG_COOKIE_DUMMY_ID<<1) | ((error_flag)&1) )
#define BINLOG_COOKIE_MAKE(id, error_flag) \
( (((id)+BINLOG_COOKIE_BASE)<<1) | ((error_flag)&1) )
#define BINLOG_COOKIE_GET_ERROR_FLAG(c) ((c) & 1)
#define BINLOG_COOKIE_GET_ID(c) ( ((ulong)(c)>>1) - BINLOG_COOKIE_BASE )
#define BINLOG_COOKIE_IS_DUMMY(c) \
( ((ulong)(c)>>1) == BINLOG_COOKIE_DUMMY_ID )
class binlog_cache_mngr;
class binlog_cache_data;
struct rpl_gtid;
struct wait_for_commit;
class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
{
#ifdef HAVE_PSI_INTERFACE
/** The instrumentation key to use for @ LOCK_index. */
PSI_mutex_key m_key_LOCK_index;
/** The instrumentation key to use for @ COND_relay_log_updated */
PSI_cond_key m_key_relay_log_update;
/** The instrumentation key to use for @ COND_bin_log_updated */
PSI_cond_key m_key_bin_log_update;
/** The instrumentation key to use for opening the log file. */
PSI_file_key m_key_file_log, m_key_file_log_cache;
/** The instrumentation key to use for opening the log index file. */
PSI_file_key m_key_file_log_index, m_key_file_log_index_cache;
PSI_cond_key m_key_COND_queue_busy;
/** The instrumentation key to use for LOCK_binlog_end_pos. */
PSI_mutex_key m_key_LOCK_binlog_end_pos;
#else
static constexpr PSI_mutex_key m_key_LOCK_index= 0;
static constexpr PSI_cond_key m_key_relay_log_update= 0;
static constexpr PSI_cond_key m_key_bin_log_update= 0;
static constexpr PSI_file_key m_key_file_log= 0, m_key_file_log_cache= 0;
static constexpr PSI_file_key m_key_file_log_index= 0;
static constexpr PSI_file_key m_key_file_log_index_cache= 0;
static constexpr PSI_cond_key m_key_COND_queue_busy= 0;
static constexpr PSI_mutex_key m_key_LOCK_binlog_end_pos= 0;
#endif
struct group_commit_entry
{
struct group_commit_entry *next;
THD *thd;
binlog_cache_mngr *cache_mngr;
bool using_stmt_cache;
bool using_trx_cache;
/*
Extra events (COMMIT/ROLLBACK/XID, and possibly INCIDENT) to be
written during group commit. The incident_event is only valid if
trx_data->has_incident() is true.
*/
Log_event *end_event;
Log_event *incident_event;
/* Set during group commit to record any per-thread error. */
int error;
int commit_errno;
IO_CACHE *error_cache;
/* This is the `all' parameter for ha_commit_ordered(). */
bool all;
/*
True if we need to increment xid_count in trx_group_commit_leader() and
decrement in unlog() (this is needed if there is a participating engine
that does not implement the commit_checkpoint_request() handlerton
method).
*/
bool need_unlog;
/*
Fields used to pass the necessary information to the last thread in a
group commit, only used when opt_optimize_thread_scheduling is not set.
*/
bool check_purge;
/* Flag used to optimise around wait_for_prior_commit. */
bool queued_by_other;
ulong binlog_id;
bool ro_1pc; // passes the binlog_cache_mngr::ro_1pc value to Gtid ctor
};
/*
When this is set, a RESET MASTER is in progress.
Then we should not write any binlog checkpoints into the binlog (that
could result in deadlock on LOCK_log, and we will delete all binlog files
anyway). Instead we should signal COND_xid_list whenever a new binlog
checkpoint arrives - when all have arrived, RESET MASTER will complete.
*/
uint reset_master_pending;
ulong mark_xid_done_waiting;
/* LOCK_log and LOCK_index are inited by init_pthread_objects() */
mysql_mutex_t LOCK_index;
mysql_mutex_t LOCK_binlog_end_pos;
mysql_mutex_t LOCK_xid_list;
mysql_cond_t COND_xid_list;
mysql_cond_t COND_relay_log_updated, COND_bin_log_updated;
ulonglong bytes_written;
IO_CACHE index_file;
char index_file_name[FN_REFLEN];
/*
purge_file is a temp file used in purge_logs so that the index file
can be updated before deleting files from disk, yielding better crash
recovery. It is created on demand the first time purge_logs is called
and then reused for subsequent calls. It is cleaned up in cleanup().
*/
IO_CACHE purge_index_file;
char purge_index_file_name[FN_REFLEN];
/*
The max size before rotation (usable only if log_type == LOG_BIN: binary
logs and relay logs).
For a binlog, max_size should be max_binlog_size.
max_size is set in init(), and dynamically changed (when one does SET
GLOBAL MAX_BINLOG_SIZE|MAX_RELAY_LOG_SIZE) from sys_vars.cc
*/
ulong max_size;
/*
Number generated by last call of find_uniq_filename(). Corresponds
closely with current_binlog_id
*/
ulong last_used_log_number;
// current file sequence number for load data infile binary logging
uint file_id;
uint open_count; // For replication
int readers_count;
/* Queue of transactions queued up to participate in group commit. */
group_commit_entry *group_commit_queue;
/*
Condition variable to mark that the group commit queue is busy.
Used when each thread does it's own commit_ordered() (when
binlog_optimize_thread_scheduling=1).
Used with the LOCK_commit_ordered mutex.
*/
my_bool group_commit_queue_busy;
mysql_cond_t COND_queue_busy;
/* Total number of committed transactions. */
ulonglong num_commits;
/* Number of group commits done. */
ulonglong num_group_commits;
/* The reason why the group commit was grouped */
ulonglong group_commit_trigger_count, group_commit_trigger_timeout;
ulonglong group_commit_trigger_lock_wait;
/* binlog encryption data */
struct Binlog_crypt_data crypto;
/* pointer to the sync period variable, for binlog this will be
sync_binlog_period, for relay log this will be
sync_relay_log_period
*/
uint *sync_period_ptr;
uint sync_counter;
bool state_file_deleted;
bool binlog_state_recover_done;
inline uint get_sync_period()
{
return *sync_period_ptr;
}
int write_to_file(IO_CACHE *cache);
/*
This is used to start writing to a new log file. The difference from
new_file() is locking. new_file_without_locking() does not acquire
LOCK_log.
*/
int new_file_without_locking();
int new_file_impl();
void do_checkpoint_request(ulong binlog_id);
void purge();
int write_transaction_or_stmt(group_commit_entry *entry, uint64 commit_id);
int queue_for_group_commit(group_commit_entry *entry);
bool write_transaction_to_binlog_events(group_commit_entry *entry);
void trx_group_commit_leader(group_commit_entry *leader);
bool is_xidlist_idle_nolock();
public:
/*
A list of struct xid_count_per_binlog is used to keep track of how many
XIDs are in prepared, but not committed, state in each binlog. And how
many commit_checkpoint_request()'s are pending.
When count drops to zero in a binlog after rotation, it means that there
are no more XIDs in prepared state, so that binlog is no longer needed
for XA crash recovery, and we can log a new binlog checkpoint event.
The list is protected against simultaneous access from multiple
threads by LOCK_xid_list.
*/
struct xid_count_per_binlog : public ilink {
char *binlog_name;
uint binlog_name_len;
ulong binlog_id;
/* Total prepared XIDs and pending checkpoint requests in this binlog. */
long xid_count;
long notify_count;
/* For linking in requests to the binlog background thread. */
xid_count_per_binlog *next_in_queue;
xid_count_per_binlog(char *log_file_name, uint log_file_name_len)
:binlog_id(0), xid_count(0), notify_count(0)
{
binlog_name_len= log_file_name_len;
binlog_name= (char *) my_malloc(PSI_INSTRUMENT_ME, binlog_name_len, MYF(MY_ZEROFILL));
if (binlog_name)
memcpy(binlog_name, log_file_name, binlog_name_len);
}
~xid_count_per_binlog()
{
my_free(binlog_name);
}
};
I_List