|
一、问题发现
在一次数据迁移中,用到了INSERT INTO t1 SELECT * FROM t2这样的 SQL 用来搬迁大表,为了提高插入效率关闭了Binlog,考虑用多线程来插入提高速度。表的类型信息和插入效率如下所示。
测试环境:
- Linux node-76-11 4.19.90-17.ky10.aarch64,128核CPU,512G内存。
GreatSQL参数配置如下(为降低 I/O 因素影响,关闭 Binlog):- #**********************Performance*********************
- #******connect
- max_connections=10000
- max_connect_errors=1000000
- open_files_limit=65535
- back_log=1500
- table_definition_cache=10000
- thread_stack=256K
- thread_cache_size=3000
- #******session
- sort_buffer_size=4M
- join_buffer_size=4M
- read_buffer_size=4M
- read_rnd_buffer_size=4M
- bulk_insert_buffer_size=64M
- tmp_table_size=64M
- max_heap_table_size=64M
- net_buffer_length=16K
- max_allowed_packet=1G
- #******timeout
- lock_wait_timeout=600
- connect_timeout=10
- interactive_timeout=31536000
- wait_timeout=31536000
- net_read_timeout=86400
- net_write_timeout=86400
- net_retry_count=10
- #**********************InnoDB**************************
- transaction_isolation=READ-COMMITTED
- innodb_buffer_pool_size=200G
- innodb_buffer_pool_instances=16
- innodb_max_dirty_pages_pct=90
- innodb_flush_log_at_trx_commit=0
- innodb_log_buffer_size=1G
- innodb_page_cleaners=8
- innodb_buffer_pool_dump_at_shutdown=ON
- innodb_buffer_pool_load_at_startup=ON
- innodb_buffer_pool_dump_pct=100
- innodb_checksum_algorithm=NONE
- innodb_log_checksums=NO
- innodb_undo_log_truncate=OFF
- innodb_change_buffering = none
- innodb_spin_wait_delay=6
- innodb_spin_wait_pause_multiplier=50
- innodb_sync_spin_loops=30
- #******feature
- innodb_open_files=65535
- innodb_flush_method=O_DIRECT
- innodb_flush_neighbors=0
- innodb_flush_sync=ON
- innodb_io_capacity=20000
- innodb_io_capacity_max=40000
- innodb_lru_scan_depth=9000
- innodb_lock_wait_timeout=30
- innodb_print_all_deadlocks=ON
- innodb_online_alter_log_max_size=4G
- innodb_thread_concurrency=0
- innodb_read_io_threads=32
- innodb_write_io_threads=32
- innodb_doublewrite=ON
- innodb_doublewrite_pages=64
- innodb_adaptive_hash_index=OFF
- innodb_status_file=OFF
复制代码 1、窄表 + 有自增主键
- greatsql> CREATE TABLE t1 (
- c1 int invisible auto_increment primary key,
- c2 int,
- str1 int DEFAULT(100) NOT NULL,
- str2 int DEFAULT(100) NOT NULL,
- str3 int DEFAULT(100) NOT NULL,
- str4 int DEFAULT(100) NOT NULL
- ) engine=InnoDB;
- greatsql> CREATE TABLE t2 LIKE t1;
复制代码行平均长度约 30 字节
行数插入sql线程数总用时解释1000万行insert into t2 select * from t1;单线程1 min 46.47 sec1000万行insert into t2 select * from t1;21 min 15.45 sec1000万行insert into t2 select * from t1;655.74 sec1000万行insert into t2 select * from t1;1045.34 sec1000万行insert into t2 select * from t1;1643.78 sec最佳1000万行insert into t2 select * from t1;21 min 11.62 sec1000万行insert into t2 select * from t1;61 min 2.53 sec1000万行insert into t2 select * from t1;101 min 0.55 sec1000万行insert into t2 select * from t1;1655.46 sec1000万行insert into t2 select * from t1;21 min 24.13 sec1000万行insert into t2 select * from t1;61 min 14.43 sec1000万行insert into t2 select * from t1;101 min 10.64 sec2、中等宽度表 + 有自增主键
- greatsql> CREATE TABLE t1 (
- c1 int invisible auto_increment primary key,
- c2 int,
- str1 varchar(512) CHARACTER SET latin1 DEFAULT(repeat('a',512)) NOT NULL
- ) engine=InnoDB;
- greatsql> CREATE TABLE t2 LIKE t1;
复制代码行平均长度约 500多字节
行数插入sql线程数总用时解释1000万行insert into t2 select * from t1;单线程4 min 30.41 sec1000万行insert into t2 select * from t1;24 min 36.58 sec效率无提升1000万行insert into t2 select * from t1;64 min 46.58 sec效率无提升1000万行insert into t2 select * from t1;104 min 50.43 sec效率无提升1000万行insert into t2 select * from t1;24 min 26.28 sec效率无提升1000万行insert into t2 select * from t1;64 min 38.84 sec效率无提升1000万行insert into t2 select * from t1;104 min 49.42 sec效率无提升1000万行insert into t2 select * from t1;24 min 25.67 sec效率无提升1000万行insert into t2 select * from t1;64 min 40.42 sec效率无提升1000万行insert into t2 select * from t1;104 min 46.70 sec效率无提升3、宽表 + 有自增主键
- greatsql> CREATE TABLE t1 (
- c1 int invisible auto_increment primary key,
- c2 int,
- str1 varchar(1024) CHARACTER SET latin1 DEFAULT(repeat('a1',512)) NOT NULL,
- str2 varchar(1024) CHARACTER SET latin1 DEFAULT(repeat('b2',512)) NOT NULL,
- str3 varchar(1024) CHARACTER SET latin1 DEFAULT(repeat('c3',512)) NOT NULL,
- str4 varchar(1024) CHARACTER SET latin1 DEFAULT(repeat('c3',512)) NOT NULL
- ) engine=InnoDB;
- greatsql> CREATE TABLE t2 LIKE t1;
复制代码行平均长度约 5000多字节
行数插入sql线程数总用时/s解释100万行insert into t2 select * from t1;单线程4 min 32.90 sec100万行insert into t2 select * from t1;24 min 52.36 sec性能劣化100万行insert into t2 select * from t1;24 min 52.31 sec性能劣化100万行insert into t2 select * from t1;65 min 36.07 sec性能劣化100万行insert into t2 select * from t1;24 min 48.34 sec性能劣化4、窄表 + 无主键(GIPK关)
- greatsql> CREATE TABLE t1 (
- c1 int invisible auto_increment primary key,
- c2 int,
- str1 int DEFAULT(100) NOT NULL,
- str2 int DEFAULT(100) NOT NULL,
- str3 int DEFAULT(100) NOT NULL,
- str4 int DEFAULT(100) NOT NULL
- ) engine=InnoDB;
- greatsql> CREATE TABLE t2 (
- c2 int,
- str1 int DEFAULT(100) NOT NULL,
- str2 int DEFAULT(100) NOT NULL,
- str3 int DEFAULT(100) NOT NULL,
- str4 int DEFAULT(100) NOT NULL
- ) engine=InnoDB;
复制代码行平均宽度约 30 字节,没用 GIPKs 特性
行数插入sql线程数总用时解释1000万行insert into t2 select * from t1;单线程1 min 29.08 sec1000万行insert into t2 select * from t1;21 min 38.84 sec性能劣化1000万行insert into t2 select * from t1;62 min 18.88 sec性能劣化1000万行insert into t2 select * from t1;102 min 14.51 sec性能劣化1000万行insert into t2 select * from t1;21 min 35.96 sec性能劣化1000万行insert into t2 select * from t1;62 min 3.55 sec性能劣化1000万行insert into t2 select * from t1;102 min 1.52 sec性能劣化5、结论
从上面的测试结果可以看出,无主键表并发插入劣化严重,有主键的情况下,窄表的并发性能提升效果好,但是对于宽表反而会造成劣化。
二、问题调查过程
1、首先调查无主键的情况下长数据造成性能劣化的情况。用perf查看造成性能劣化的瓶颈在哪里。- + 12.55% 0.01% mysqld mysqld [.] lock_clust_rec_read_check_and_lock
- + 12.34% 0.13% mysqld [kernel.kallsyms] [k] x64_sys_call
- + 12.03% 0.01% mysqld mysqld [.] Buf_fetch<Buf_fetch_normal>::mtr_add_page
- + 11.92% 0.01% mysqld mysqld [.] btr_cur_ins_lock_and_undo
- + 11.27% 0.19% mysqld [kernel.kallsyms] [k] __x64_sys_futex
- - 11.18% 11.11% mysqld mysqld [.] ut_delay
- 10.89% start_thread
- pfs_spawn_thread
- handle_connection
- do_command
- dispatch_command
- dispatch_sql_command
- mysql_execute_command
- Sql_cmd_dml::execute
- Sql_cmd_dml::execute_inner
- Query_expression::execute
- - Query_expression::ExecuteIteratorQuery
- - 10.75% Query_result_insert::send_data
- write_record
- handler::ha_write_row
- ha_innobase::write_row
- row_insert_for_mysql
- row_insert_for_mysql_using_ins_graph
- row_ins_step
- row_ins
- row_ins_index_entry_step
- row_ins_index_entry
- row_ins_clust_index_entry
- - row_ins_clust_index_entry_low
- - 10.16% btr_pcur_t::open
- - btr_cur_search_to_nth_level
- - 10.10% buf_page_get_gen
- - 10.08% Buf_fetch<Buf_fetch_normal>::single_page
- - 10.07% Buf_fetch<Buf_fetch_normal>::mtr_add_page
- rw_lock_x_lock_gen
- - pfs_rw_lock_x_lock_func
- - 10.07% rw_lock_x_lock_func
- ut_delay
- + 11.08% 0.21% mysqld [kernel.kallsyms] [k] do_futex
- + 10.90% 0.02% mysqld mysqld [.] rw_lock_x_lock_func
- + 10.90% 0.00% mysqld mysqld [.] pfs_rw_lock_x_lock_func
- + 10.90% 0.01% mysqld mysqld [.] rw_lock_x_lock_gen
- + 9.44% 0.01% mysqld mysqld [.] locksys::owns_page_shard
- + 9.39% 0.05% mysqld mysqld [.] locksys::Latches::owns_page_shard
- + 9.29% 0.64% mysqld mysqld [.] PolicyMutex<TTASEventMutex<GenericPolicy> >::trylock
- + 7.66% 0.02% mysqld mysqld [.] locksys::rec_queue_latch_and_validate
- + 7.65% 0.17% mysqld mysqld [.] rw_lock_debug_mutex_exit
- + 7.39% 0.07% mysqld mysqld [.] trx_undo_report_row_operation
- + 7.17% 0.01% mysqld mysqld [.] buf_pool_validate_instance
- + 7.17% 0.00% mysqld mysqld [.] buf_validate
- - 6.63% 5.87% mysqld mysqld [.] unlikely
- - 5.84% start_thread
- pfs_spawn_thread
- handle_connection
- do_command
- dispatch_command
- dispatch_sql_command
- mysql_execute_command
- Sql_cmd_dml::execute
- Sql_cmd_dml::execute_inner
- Query_expression::execute
- - Query_expression::ExecuteIteratorQuery
- - 4.44% Query_result_insert::send_data
- write_record
- handler::ha_write_row
- ha_innobase::write_row
- - row_insert_for_mysql
- - 4.43% row_insert_for_mysql_using_ins_graph
- - 4.42% row_ins_step
- - 4.41% row_ins
- - 4.41% row_ins_index_entry_step
- - 4.35% row_ins_index_entry
- row_ins_clust_index_entry
- - row_ins_clust_index_entry_low
- - 1.89% btr_pcur_t::open
- - btr_cur_search_to_nth_level
- - 1.08% page_cur_search_with_match
- 0.52% dtuple_t::compare
- - 1.87% btr_cur_optimistic_insert
- 1.13% page_cur_tuple_insert
- 0.56% btr_cur_ins_lock_and_undo
- + 1.40% TableScanIterator::Read
- 0.76% unlikely
- + 6.38% 0.45% mysqld [kernel.kallsyms] [k] futex_wait
- + 5.88% 0.02% mysqld mysqld [.] Buf_fetch_normal::get
- + 5.76% 0.01% mysqld mysqld [.] lock_rec_lock
复制代码 可以看到多线程数据插入时候,在无主键的情况下频繁的索引分裂影响性能,所以导致性能劣化严重。
那么有自增主键的情况下性能是什么情况呢?
看一下有自增主键的情况下性能情况,在有自增主键的时候与上面无主键的性能瓶颈处差不多,但是没有那么严重的频繁的索引分裂的性能问题。- + 10.16% 0.67% mysqld mysqld [.] PolicyMutex<TTASEventMutex<GenericPolicy> >::trylock
- + 9.25% 0.03% mysqld mysqld [.] trx_undo_report_row_operation
- + 8.49% 0.31% mysqld [kernel.kallsyms] [k] futex_wait
- + 7.77% 0.00% mysqld libstdc++.so.6.0.32 [.] 0x00007ffff7958793
- + 7.59% 0.17% mysqld mysqld [.] rw_lock_debug_mutex_exit
- - 7.30% 6.87% mysqld mysqld [.] unlikely
- 6.58% start_thread
- pfs_spawn_thread
- handle_connection
- do_command
- dispatch_command
- dispatch_sql_command
- mysql_execute_command
- Sql_cmd_dml::execute
- Sql_cmd_dml::execute_inner
- Query_expression::execute
- - Query_expression::ExecuteIteratorQuery
- - 5.01% Query_result_insert::send_data
- - 5.00% write_record
- handler::ha_write_row
- - ha_innobase::write_row
- - 4.99% row_insert_for_mysql
- - row_insert_for_mysql_using_ins_graph
- - 4.96% row_ins_step
- - row_ins
- - 4.95% row_ins_index_entry_step
- - 4.91% row_ins_index_entry
- row_ins_clust_index_entry
- - row_ins_clust_index_entry_low
- - 2.63% btr_cur_optimistic_insert
- - 1.35% page_cur_tuple_insert
- 0.67% page_cur_insert_rec_low
- - 0.87% btr_cur_ins_lock_and_undo
- 0.59% trx_undo_report_row_operation
- - 1.22% btr_pcur_t::open
- 1.20% btr_cur_search_to_nth_level
- - 0.55% btr_cur_pessimistic_insert
- - 0.55% btr_page_split_and_insert
- 0.51% page_validate
- + 1.57% TableScanIterator::Read
- + 7.06% 0.03% mysqld mysqld [.] mtr_t::commit
- + 7.02% 0.00% mysqld mysqld [.] buf_validate
- + 7.01% 0.02% mysqld mysqld [.] buf_pool_validate_instance
复制代码 我们知道对自增主键插入新数据时,是先获取一个区段锁,这样可以避免频繁持锁造成的性能劣化。而无显式定义主键表(其他可用于聚集索引的非空唯一索引也没有)时,会采用实例级的 DB_ROW_ID 作为该表的聚集索引,这个 DB_ROW_ID 每插入一行都需要请求加锁,因此会比自增主键表更加耗时。所以上面可以看到无主键表的多线程插入性能反而比单线程劣化。
2、接着调查有主键的情况下宽表造成性能劣化的情况。用perf查看造成性能劣化的瓶颈在哪里。- + 12.66% 0.05% mysqld [kernel.kallsyms] [k] do_syscall_64
- - 12.65% 12.30% mysqld mysqld [.] ut_delay
- - 7.46% start_thread
- pfs_spawn_thread
- handle_connection
- do_command
- dispatch_command
- dispatch_sql_command
- mysql_execute_command
- Sql_cmd_dml::execute
- Sql_cmd_dml::execute_inner
- Query_expression::execute
- - Query_expression::ExecuteIteratorQuery
- - 7.27% Query_result_insert::send_data
- write_record
- handler::ha_write_row
- ha_innobase::write_row
- row_insert_for_mysql
- row_insert_for_mysql_using_ins_graph
- row_ins_step
- row_ins
- row_ins_index_entry_step
- row_ins_index_entry
- row_ins_clust_index_entry
- - row_ins_clust_index_entry_low
- - 4.53% btr_pcur_t::open
- - btr_cur_search_to_nth_level
- - 2.52% mtr_t::sx_lock
- rw_lock_sx_lock_gen
- - pfs_rw_lock_sx_lock_func
- + 2.52% rw_lock_sx_lock_func
- - 1.85% buf_page_get_gen
- - 1.84% Buf_fetch<Buf_fetch_normal>::single_page
- - 1.81% Buf_fetch<Buf_fetch_normal>::mtr_add_page
- rw_lock_s_lock_gen
- rw_lock_s_lock_func
- rw_lock_s_lock_spin
- ut_delay
- - 2.07% btr_height_get
- btr_root_block_get
- btr_block_get
- btr_block_get_func
- - buf_page_get_gen
- - 2.06% Buf_fetch<Buf_fetch_normal>::single_page
- - 2.05% Buf_fetch<Buf_fetch_normal>::mtr_add_page
- rw_lock_s_lock_gen
- rw_lock_s_lock_func
- rw_lock_s_lock_spin
- ut_delay
- 0.50% btr_cur_optimistic_insert
- - 4.85% 0x7ffff7958793
- - 4.70% std::thread::_State_impl<std::thread::_Invoker<std::tuple<Detached_thread, void (*)(unsigned long), unsigned long> > >::_M_run
- std::thread::_Invoker<std::tuple<Detached_thread, void (*)(unsigned long), unsigned long> >::operator()
- std::thread::_Invoker<std::tuple<Detached_thread, void (*)(unsigned long), unsigned long> >::_M_invoke<0ul, 1ul, 2ul>
- std::__invoke<Detached_thread, void (*)(unsigned long), unsigned long>
- std::__invoke_impl<void, Detached_thread, void (*)(unsigned long), unsigned long>
- Detached_thread::operator()<void (*)(unsigned long), unsigned long>
- std::_Bind<void (*(unsigned long))(unsigned long)>::operator()<, void>
- std::_Bind<void (*(unsigned long))(unsigned long)>::__call<void, , 0ul>
- std::__invoke<void (*&)(unsigned long), unsigned long&>
- - std::__invoke_impl<void, void (*&)(unsigned long), unsigned long&>
- - 4.53% io_handler_thread
- - fil_aio_wait
- - 4.14% buf_page_io_complete
- - 1.48% buf_flush_write_complete
- - 1.48% mutex_enter_inline<PolicyMutex<TTASEventMutex<GenericPolicy> > >
- PolicyMutex<TTASEventMutex<GenericPolicy> >::enter
- TTASEventMutex<GenericPolicy>::enter
- TTASEventMutex<GenericPolicy>::spin_and_try_lock
- TTASEventMutex<GenericPolicy>::is_free
- ut_delay
- - 1.38% ibuf_merge_or_delete_for_page
- - 1.19% ibuf_bitmap_get_map_page
- buf_page_get_gen
- Buf_fetch<Buf_fetch_normal>::single_page
- Buf_fetch<Buf_fetch_normal>::mtr_add_page
- rw_lock_x_lock_gen
- pfs_rw_lock_x_lock_func
- rw_lock_x_lock_func
- ut_delay
- - 1.04% mutex_enter_inline<PolicyMutex<TTASEventMutex<GenericPolicy> > >
- PolicyMutex<TTASEventMutex<GenericPolicy> >::enter
- TTASEventMutex<GenericPolicy>::enter
- TTASEventMutex<GenericPolicy>::spin_and_try_lock
- TTASEventMutex<GenericPolicy>::is_free
- ut_delay
- + 12.57% 0.41% mysqld [kernel.kallsyms] [k] x64_sys_call
- + 9.93% 0.00% mysqld mysqld [.] buf_validate
- + 9.91% 0.06% mysqld mysqld [.] buf_pool_validate_instance
- + 9.73% 0.11% mysqld [kernel.kallsyms] [k] __x64_sys_futex
- + 9.72% 0.30% mysqld mysqld [.] rw_lock_debug_mutex_enter
- + 9.61% 0.13% mysqld [kernel.kallsyms] [k] do_futex
- + 8.23% 0.03% mysqld mysqld [.] TableScanIterator::Read
- + 8.20% 0.02% mysqld mysqld [.] handler::ha_rnd_next
- + 8.17% 0.00% mysqld mysqld [.] ha_innobase::rnd_next
- + 8.17% 0.00% mysqld mysqld [.] ha_innobase::general_fetch
- + 8.08% 0.05% mysqld mysqld [.] row_search_mvcc
- + 7.75% 0.02% mysqld mysqld [.] buf_page_t::Latching_rules_helpers::assert_latches_let_distinguish
- + 7.71% 0.03% mysqld mysqld [.] buf_page_t::is_io_fix_write
- + 7.41% 0.04% mysqld mysqld [.] ut::Stateful_latching_rules<buf_io_fix, 3ul>::assert_latches_let_distinguish
- + 7.10% 0.01% mysqld mysqld [.] btr_cur_optimistic_insert
- + 6.98% 0.02% mysqld mysqld [.] Buf_fetch_normal::get
- + 6.76% 0.29% mysqld mysqld [.] PolicyMutex<TTASEventMutex<GenericPolicy> >::trylock
- + 6.50% 0.02% mysqld mysqld [.] Buf_fetch<Buf_fetch_normal>::mtr_add_page
- + 6.15% 0.04% mysqld mysqld [.] ut::Stateful_latching_rules<buf_io_fix, 3ul>::assert_latches_let_distinguish
- + 6.11% 0.02% mysqld mysqld [.] ut::Stateful_latching_rules<buf_io_fix, 3ul>::can_leave
- + 5.92% 0.27% mysqld [kernel.kallsyms] [k] futex_wait
- + 5.89% 0.11% mysqld mysqld [.] rw_lock_debug_mutex_exit
- + 5.89% 0.01% mysqld mysqld [.] btr_block_get
- + 5.88% 0.00% mysqld mysqld [.] btr_block_get_func
- + 5.74% 0.02% mysqld mysqld [.] rw_lock_s_lock_func
- + 5.50% 0.01% mysqld mysqld [.] PolicyMutex<TTASEventMutex<GenericPolicy> >::enter
- - 5.49% 4.80% mysqld mysqld [.] unlikely
- - 3.89% start_thread
- pfs_spawn_thread
- handle_connection
- do_command
- dispatch_command
- dispatch_sql_command
- mysql_execute_command
- Sql_cmd_dml::execute
- Sql_cmd_dml::execute_inner
- Query_expression::execute
- - Query_expression::ExecuteIteratorQuery
- - 3.37% Query_result_insert::send_data
- write_record
- handler::ha_write_row
- ha_innobase::write_row
- row_insert_for_mysql
- row_insert_for_mysql_using_ins_graph
- row_ins_step
- row_ins
- - row_ins_index_entry_step
- - 3.33% row_ins_index_entry
- row_ins_clust_index_entry
- - row_ins_clust_index_entry_low
- - 1.34% btr_pcur_t::open
- - 1.34% btr_cur_search_to_nth_level
- 0.87% page_cur_search_with_match
- - 1.26% btr_cur_pessimistic_insert
- 1.19% btr_page_split_and_insert
- + 0.52% TableScanIterator::Read
- 0.90% 0x7ffff7958793
- 0.69% unlikely
复制代码 可以看到除了上面第一点提到的索引分裂的影响外,还增加了页分裂以及页读写相关的 I/O 操作影响性能,宽表情况下对于读写性能比短数据更加依赖机器的性能和配置,并且会随着写入数据的增加降低写入效率。
同样的,看看单线程情况下宽表的 perf 情况。- + 8.05% 0.02% mysqld mysqld [.] ut::Stateful_latching_rules<buf_io_fix, 3ul>::can_leave
- + 7.95% 0.00% mysqld mysqld [.] mutex_enter_inline<PolicyMutex<TTASEventMutex<GenericPolicy> > >
- + 7.81% 0.03% mysqld [kernel.kallsyms] [k] x64_sys_call
- - 7.50% 7.50% mysqld mysqld [.] ut_delay
- - 7.41% 0x7ffff7958793
- - 7.36% std::thread::_State_impl<std::thread::_Invoker<std::tuple<Detached_thread, void (*)(unsigned long), unsigned long> > >::_M_run
- std::thread::_Invoker<std::tuple<Detached_thread, void (*)(unsigned long), unsigned long> >::operator()
- std::thread::_Invoker<std::tuple<Detached_thread, void (*)(unsigned long), unsigned long> >::_M_invoke<0ul, 1ul, 2ul>
- std::__invoke<Detached_thread, void (*)(unsigned long), unsigned long>
- std::__invoke_impl<void, Detached_thread, void (*)(unsigned long), unsigned long>
- Detached_thread::operator()<void (*)(unsigned long), unsigned long>
- std::_Bind<void (*(unsigned long))(unsigned long)>::operator()<, void>
- std::_Bind<void (*(unsigned long))(unsigned long)>::__call<void, , 0ul>
- std::__invoke<void (*&)(unsigned long), unsigned long&>
- - std::__invoke_impl<void, void (*&)(unsigned long), unsigned long&>
- - 7.34% io_handler_thread
- fil_aio_wait
- - buf_page_io_complete
- - 5.01% buf_flush_write_complete
- mutex_enter_inline<PolicyMutex<TTASEventMutex<GenericPolicy> > >
- PolicyMutex<TTASEventMutex<GenericPolicy> >::enter
- TTASEventMutex<GenericPolicy>::enter
- TTASEventMutex<GenericPolicy>::spin_and_try_lock
- TTASEventMutex<GenericPolicy>::is_free
- ut_delay
- - 1.25% ibuf_merge_or_delete_for_page
- ibuf_bitmap_get_map_page
- buf_page_get_gen
- Buf_fetch<Buf_fetch_normal>::single_page
- Buf_fetch<Buf_fetch_normal>::mtr_add_page
- rw_lock_x_lock_gen
- pfs_rw_lock_x_lock_func
- rw_lock_x_lock_func
- ut_delay
- - 1.08% mutex_enter_inline<PolicyMutex<TTASEventMutex<GenericPolicy> > >
- PolicyMutex<TTASEventMutex<GenericPolicy> >::enter
- TTASEventMutex<GenericPolicy>::enter
- TTASEventMutex<GenericPolicy>::spin_and_try_lock
- TTASEventMutex<GenericPolicy>::is_free
- ut_delay
- + 7.30% 0.04% mysqld mysqld [.] buf_page_t::Latching_rules_helpers::assert_latches_let_distinguish
- + 4.37% 0.00% mysqld mysqld [.] Fil_system::flush_file_spaces
- - 4.14% 3.88% mysqld mysqld [.] unlikely
- - 2.75% start_thread
- pfs_spawn_thread
- handle_connection
- do_command
- dispatch_command
- dispatch_sql_command
- mysql_execute_command
- Sql_cmd_dml::execute
- Sql_cmd_dml::execute_inner
- Query_expression::execute
- - Query_expression::ExecuteIteratorQuery
- - 2.46% Query_result_insert::send_data
- write_record
- handler::ha_write_row
- ha_innobase::write_row
- row_insert_for_mysql
- row_insert_for_mysql_using_ins_graph
- row_ins_step
- - row_ins
- - 2.44% row_ins_index_entry_step
- - 2.41% row_ins_index_entry
- row_ins_clust_index_entry
- - row_ins_clust_index_entry_low
- - 0.97% btr_pcur_t::open
- - btr_cur_search_to_nth_level
- 0.55% page_cur_search_with_match
- - 0.80% btr_cur_pessimistic_insert
- 0.70% btr_page_split_and_insert
- - 1.13% 0x7ffff7958793
- - 1.02% std::thread::_State_impl<std::thread::_Invoker<std::tuple<Detached_thread, void (*)(unsigned long), unsigned long> > >::_M_run
- std::thread::_Invoker<std::tuple<Detached_thread, void (*)(unsigned long), unsigned long> >::operator()
- std::thread::_Invoker<std::tuple<Detached_thread, void (*)(unsigned long), unsigned long> >::_M_invoke<0ul, 1ul, 2ul>
- std::__invoke<Detached_thread, void (*)(unsigned long), unsigned long>
- std::__invoke_impl<void, Detached_thread, void (*)(unsigned long), unsigned long>
- Detached_thread::operator()<void (*)(unsigned long), unsigned long>
- std::_Bind<void (*(unsigned long))(unsigned long)>::operator()<, void>
- std::_Bind<void (*(unsigned long))(unsigned long)>::__call<void, , 0ul>
- std::__invoke<void (*&)(unsigned long), unsigned long&>
- - std::__invoke_impl<void, void (*&)(unsigned long), unsigned long&>
- - 0.70% io_handler_thread
- - fil_aio_wait
- - 0.65% buf_page_io_complete
- buf_flush_write_complete
- dblwr::write_complete
- Double_write::write_complete
- fil_flush_file_spaces
- Fil_system::flush_file_spaces
- - Fil_shard::flush_file_spaces
- + 0.65% Fil_shard::acquire
- + 3.97% 0.58% mysqld mysqld [.] Fil_shard::flush_file_spaces
复制代码 对比多线程和单线程的堆栈信息,可以看到多线程下抢 I/O 资源情况变严重,同时索引分裂和页分裂更严重,要花更多时间进行页位置比较、计算数据占用的空间和插入,另外由于抢锁导致多线程性能劣化更严重。
三、问题解决方案
通过上面分析发现,要想保证多线程插入速度比单线程快需要满足一定的条件:
- 目标表有主键,当主键列没有设置自增属性的情况下,每个线程间的主键值需要是离散且单个线程内主键值需要连续,这是为了保证索引插入的性能最高。比如线程1主键值为[1-1000],线程2主键值为[10000-11000],线程3主键值为[30000-31000]这样排列效率会比[1-3000]分三个线程高很多。
- 如果目标表主键是自增主键的话,因为无法定义自增主键的值,只能按照插入时候确定键值,因此自增主键会比非自增主键多线程离散值性能慢,但是因为自增主键持有的是区段锁,因此不会频繁持锁方面性能又会比非自增主键好。实际使用的时候还是要看源表的数据分布情况以及目标表结构来决定要怎么分配多线程的插入主键值。
- 目标表是宽表时,从上面的测试数据来看单行数据超过 520Bytes 多线程就开始劣化了,因此多线程插入只针对窄表有效。
四、问题总结
一般我们都认为多线程插入数据肯定比单线程插入快,但是通过本次的测试和分析可以看到实际情况需要结合数据库参数配置、索引、存储页以及磁盘的 I/O 性能等一起看,还要看源表主键值分布情况和目标表的表结构,并不是所有多线程插入都能产生更好的效果节省时间,有时候反而可能多线程比单线程更耗时间以及资源。
Enjoy GreatSQL
来源:https://www.cnblogs.com/greatsql/p/18310838
免责声明:由于采集信息均来自互联网,如果侵犯了您的权益,请联系我们【E-Mail:cb@itdo.tech】 我们会及时删除侵权内容,谢谢合作! |
|