﻿/* GAP ANALYSIS */
set schema cdkoen;
create table gap (counter integer); 

insert into gap (counter) values (1); 
insert into gap (counter) values (2); 
insert into gap (counter) values (5); 
insert into gap (counter) values (6); 
insert into gap (counter) values (8); 
insert into gap (counter) values (9); 
insert into gap (counter) values (10);

select counter from gap;

/* Finding Gaps with an exclusion join¨*/
with dummy(id) as (
    select 1 from SYSIBM.SYSDUMMY1    
    union all
    select id + 1 from dummy where id < 10
)
select id from dummy;


with dummy(id) as (
    select 1 from SYSIBM.SYSDUMMY1    
    union all
    select id + 1 from dummy where id < 10
)
select id,
       gap.counter 
from dummy
left join gap on id = gap.counter;

with dummy(id) as (
    select 1 from SYSIBM.SYSDUMMY1    
    union all
    select id + 1 from dummy where id < 10
)
select id,
       gap.counter 
from dummy
left join gap on id = gap.counter
where counter is null;

/* Finding ranges of missing gaps */

select gap.counter + 1 as start
from gap
left join gap r on gap.counter = r.counter - 1
where r.counter is null;


select min(fr.counter) - 1 as stop
from gap
left join gap fr on gap.counter < fr.counter
where fr.counter is not null
group by gap.counter;

select gap.counter + 1 as start, 
       min(fr.counter) - 1 as stop
from gap
left join gap r on gap.counter = r.counter - 1
left join gap fr on gap.counter < fr.counter
where r.counter is null
      and fr.counter is not null
group by gap.counter,
         r.counter;
         
/*  Finding ranges of continous values */

select *
from gap 
left join gap s on s.counter = gap.counter - 1;

select gap.counter as stop, e.counter 
from gap 
left join gap e on e.counter = gap.counter + 1
where e.counter is null;

select gap.counter as start,
    (select a.counter as counter
    from gap a
    left join gap b on b.counter = a.counter + 1
    where b.counter is null
        and a.counter >= gap.counter
    limit 1) as stop
from gap
left join gap s on s.counter = gap.counter - 1
where s.counter is null;

/* mailchimp data */
CREATE TABLE stats (
    campaign_id varchar(16), 
    sent_time timestamp, 
    subject varchar(256), 
    email varchar(64), 
    open_count integer, 
    click_count integer
);

select * from stats;

with table_mean as
    (
        select avg(length(subject)) as mean_subject_length,
            avg(open_count) as mean_open_rate
        from stats
    ),

   table_corrected as
    (
        select length(subject) - mean_subject_length as mean_subject_length_corrected, 
                open_count - mean_open_rate as mean_open_rate_corrected
        from table_mean, stats
    ),  

select mean_subject_length_corrected, mean_open_rate_corrected
from table_corrected;        
    
with table_mean as
    (
        select avg(length(subject)) as mean_subject_length,
            avg(open_count) as mean_open_rate
        from stats
    ),

    table_corrected as
    (
        select length(subject) - mean_subject_length as mean_subject_length_corrected, 
                open_count - mean_open_rate as mean_open_rate_corrected
        from table_mean, stats
    )      

select sum(mean_subject_length_corrected * mean_open_rate_corrected) / sqrt(sum(mean_subject_length_corrected * mean_subject_length_corrected) * sum(mean_open_rate_corrected * mean_open_rate_corrected)) as r
from table_corrected;                   
                   
select corr(length(subject), open_count) as r from stats;     



select 'subject_length' as rij, 
       'subject_length' as col, 
       corr(length(subject), length(subject)) as coeff
from stats
union
select 'subject_length' as rij, 
       'open_rate' as col, 
       corr(length(subject), open_count) as coeff
from stats
union
select 'subject_length' as rij, 
       'click_rate' as col, 
       corr(length(subject), click_count) as coeff
from stats
union
select 'open_rate' as rij, 
       'open_rate' as col, 
       corr(open_count, open_count) as coeff
from stats
union
select 'open_rate' as rij, 
       'click_rate' as col, 
       corr(open_count, click_count) as coeff
from stats
union
select 'click_rate' as rij, 
       'click_rate' as col, 
       corr(click_count, click_count) as coeff
from stats;

select stddev(click_count) from stats;



select rij, 
    sum(case when col='subject_length' then coeff else 0 end) as subject_length, 
    sum(case when col='open_rate' then coeff else 0 end) as open_rate, 
    sum(case when col='click_rate' then coeff else 0 end) as click_rate
from (
select 'subject_length' as rij, 
       'subject_length' as col, 
       corr(length(subject), length(subject)) as coeff
from stats
union
select 'subject_length' as rij, 
       'open_rate' as col, 
       corr(length(subject), open_count) as coeff
from stats
union
select 'subject_length' as rij, 
       'click_rate' as col, 
       corr(length(subject), click_count) as coeff
from stats
union
select 'open_rate' as rij, 
       'open_rate' as col, 
       corr(open_count, open_count) as coeff
from stats
union
select 'open_rate' as rij, 
       'click_rate' as col, 
       corr(open_count, click_count) as coeff
from stats
union
select 'click_rate' as rij, 
       'click_rate' as col, 
       corr(click_count, click_count) as coeff
from stats
)
group by rij
order by rij DESC;



   
                   
