-- Q1.1
select sum(lo_extendedprice*lo_discount) as
revenue
from lineorder join dim_date on lo_orderdatekey = d_datekey
where
d_year = 1993
and lo_discount between 1 and 3
and lo_quantity < 25;
-- Q1.2
select sum(lo_extendedprice*lo_discount) as revenue
from lineorder
join dim_date on lo_orderdatekey = d_datekey
where d_yearmonth = 199401 and lo_discount
between 4 and 6 and lo_quantity between 26 and 35;
-- Q1.3
select sum(lo_extendedprice*lo_discount) as revenue
from lineorder
join dim_date on lo_orderdatekey = d_datekey
where d_weeknuminyear = 6
and d_year = 1994
and lo_discount between 5 and 7 and lo_quantity between 26 and 35;
-- Q2.1
select sum(lo_revenue), d_year, p_brand
from lineorder
join dim_date
on lo_orderdatekey = d_datekey
join part
on lo_partkey = p_partkey join supplier
on lo_suppkey = s_suppkey
where p_category = 'MFGR#12'
and s_region = 'AMERICA'
group by d_year, p_brand
order by d_year, p_brand;
-- Q2.2
select sum(lo_revenue), d_year, p_brand
from lineorder
join dim_date
on lo_orderdatekey = d_datekey
join part
on lo_partkey = p_partkey
join supplier
on lo_suppkey = s_suppkey
where p_brand between 'MFGR#2221' and 'MFGR#2228'
and s_region = 'ASIA'
group by d_year, p_brand
order by d_year, p_brand;
-- Q2.3
select sum(lo_revenue), d_year, p_brand
from lineorder
join dim_date
on lo_orderdatekey = d_datekey
join part
on lo_partkey = p_partkey
join supplier
on lo_suppkey = s_suppkey
where p_brand= 'MFGR#2239'
and s_region = 'EUROPE'
group by d_year, p_brand
order by d_year, p_brand;
-- Q3.1
select c_nation, s_nation, d_year, sum(lo_revenue) as revenue
from customer
join lineorder
on lo_custkey = c_customerkey
join supplier
on lo_suppkey = s_suppkey
join dim_date on lo_orderdatekey = d_datekey
where c_region = 'ASIA'
and s_region = 'ASIA'
and d_year >= 1992 and d_year <= 1997
group by c_nation, s_nation, d_year
order by d_year asc, revenue desc;
-- Q3.2
select c_city, s_city, d_year, sum(lo_revenue) as revenue
from customer
join lineorder
on lo_custkey = c_customerkey
join supplier
on lo_suppkey = s_suppkey
join dim_date
on lo_orderdatekey = d_datekey
where c_nation = 'UNITED STATES'
and s_nation = 'UNITED STATES'
and d_year >= 1992
and d_year <= 1997
group by c_city, s_city, d_year
order by d_year asc, revenue desc;
-- Q3.3
select c_city, s_city, d_year, sum(lo_revenue) as revenue
from customer
join lineorder
on lo_custkey = c_customerkey
join supplier on lo_suppkey = s_suppkey
join dim_date on lo_orderdatekey = d_datekey
where (c_city='UNITED KI1' or c_city='UNITED KI5')
and (s_city='UNITED KI1' or s_city='UNITED KI5')
and d_year >= 1992
and d_year <= 1997
group by c_city, s_city, d_year
order by d_year asc, revenue desc;
-- Q3.4
select c_city, s_city, d_year, sum(lo_revenue)
as revenue
from customer
join lineorder
on lo_custkey = c_customerkey
join supplier
on lo_suppkey = s_suppkey
join dim_date
on lo_orderdatekey = d_datekey
where
(c_city='UNITED KI1' or c_city='UNITED KI5')
and (s_city='UNITED KI1' or s_city='UNITED KI5')
and d_yearmonth = 'Dec1997'
group by c_city, s_city, d_year
order by d_year asc, revenue desc;
-- Q4.1
select d_year, c_nation,
sum(lo_revenue - lo_supplycost) as profit
from lineorder
join dim_date
on lo_orderdatekey = d_datekey
join customer
on lo_custkey = c_customerkey
join supplier
on lo_suppkey = s_suppkey
join part
on lo_partkey = p_partkey
where
c_region = 'AMERICA'
and s_region = 'AMERICA'
and (p_mfgr = 'MFGR#1'
or p_mfgr = 'MFGR#2')
group by d_year, c_nation
order by d_year, c_nation;
-- Q4.2
select d_year, s_nation, p_category,
sum(lo_revenue - lo_supplycost) as profit
from lineorder
join dim_date
on lo_orderdatekey = d_datekey
join customer
on lo_custkey = c_customerkey
join supplier
on lo_suppkey = s_suppkey
join part
on lo_partkey = p_partkey
where
c_region = 'AMERICA'
and s_region = 'AMERICA'
and (d_year = 1997 or d_year = 1998)
and (p_mfgr = 'MFGR#1'
or p_mfgr = 'MFGR#2')
group by d_year, s_nation, p_category
order by d_year, s_nation, p_category;
-- Q4.3
select d_year, s_city, p_brand,
sum(lo_revenue - lo_supplycost) as profit
from lineorder
join dim_date
on lo_orderdatekey = d_datekey
join customer
on lo_custkey = c_customerkey
join supplier
on lo_suppkey = s_suppkey
join part
on lo_partkey = p_partkey
where
s_nation = 'UNITED STATES'
and (d_year = 1997 or d_year = 1998)
and p_category = 'MFGR#14'
group by d_year, s_city, p_brand
order by d_year, s_city, p_brand;
Thanks Justin, I do not see conclusion section. So you suggest innodb_old_blocks_time default value should be zero?
“This blog post is not a definitive conclusion about innodb_old_blocks_pct or innodb_old_blocks_time. It does highlight how a data set much larger than the buffer pool may perform worse with innodb_old_blocks_time=1000, but as I said this needs further investigation.”
Khan,
This setting (as many others) is workload dependent. I think the default of 1000 is better choice than 0 for many, yet I think you better to test it out to see what works best for you.
Note this variable is something what you can change online without restarting the server which makes it easier to play with in production
Khan, almost all settings have defaults that work reasonably but can be tuned for specific workloads.
Justin is using a star schema sort of workload and that’s different from OLTP. In the OLTP case we expect occasional queries that do table scans but generally it’s best to prevent them from flushing the pages used by most queries. Analytical processing workloads for which the star schema tends to be used don’t necessarily have that property and may benefit from smaller or bigger, depending on workload, innodb_old_blocks_pct and innodb_old_blocks_time = 0.
If Justin wanted to he could show settings that work better for this job and worse for pure OLTP on the same hardware. It’s routine tuning for what’s happening with a specific server. Not good or bad settings, just different.
With the previous default it was useful to show benchmarks illustrating the value of setting innodb_old_blocks_pct to a non-zero value. Now we’ve changed the default it’s useful to show the ones where 0 can be better. Just to help people to know that defaults can be tuned and when it might be a good idea.
Defaults and general recommendations are good but can’t ever replace testing on specific combinations to find out what works best.
Views are my own, for an official Oracle view consult a PR person.
James Day, MySQL Senior Principal Support Engineer, Oracle
The queries aren’t applicable to only star schema. It is likely that a reporting workload on 5.6.10 on an any type of schema could be slower than 5.5.30 if the working set is larger than the buffer pool, even with very fast io (the FS cache is the fastest IO possible). It seems to me (just my humble opinion) that MySQL should be usable for more than sysbench! So people have to know that this setting could have negative impact and test it on their workload.
You can’t just suggest that everybody upgrade to 5.6.10 and it will be all roses and faster and better! Some people from Oracle have been suggesting just that.
>If Justin wanted to he could show settings that work better for this job and worse for pure OLTP on the same hardware.
>It’s routine tuning for what’s happening with a specific server. Not good or bad settings, just different.
Actually it isn’t clear if I can, because I haven’t tested the other settings. I tested with basic out of the box settings and saw an immediate very large performance difference on the SSB. It might be that enabling new optimizer features will help, or it may not. I don’t have the data to make such predictions. So as I said, I’ll be doing more testing.
If it isn’t obvious, I had to spend a LOT of time this week figuring out why 5.6.10 was so much slower out of the box, then documenting it and taking the time to post about it. I want to test other settings and see how they work, but I can’t put that all in a single post. Thus, this post is mainly about the huge out-of-the-box difference I saw with innodb_old_blocks_time.
I did not expect my performance to drop so significantly when testing 5.6.10 and people testing it on their workloads may not be aware of all the defaults changes and which ones might result in significant (and possibly negative) performance differences. Most of the defaults changes are good for all (or nearly all) workloads, but this one seems good only for OLTP and even then, maybe not in all cases (once again, needs more testing). The only testing I’ve seen on innodb_old_blocks_time is “sysbench + mysqldump” and even then, the tests were only done when the data fits in the buffer pool. There are many workloads that are significantly different from the synthetic sysbench test and many databases have working sets larger than the buffer pool.
Note:
It would be very nice if InnoDB had multiple buffer pools and supported placing tables into named BPs, or if it supported pinning tables into the BP. But it doesn’t do those things. In either case, the dimension tables could be placed (or pinned) into a specific buffer pool and it would eliminate the problem.
In this particular case, it might be beneficial to set up an event which does an FTS on the dimension tables frequently. They are small and this will keep them in the cache.
I agree that it’s good for people to know that this setting can have a negative effect for some workloads. I think it’s a good thing that you’re writing about one of those cases and I appreciate you doing it.
James Day
Why did you choose not to use referential integrity with foreign keys?