function PopulateValues(lastkey) { avg_domain_rating[lastkey]=total_domain_ratings/resultcount; avg_ref_domains[lastkey]=total_ref_domains/resultcount; zero_ref_domain_count[lastkey]=zero_ref_domain; weak_domain_rating_count[lastkey]=weak_domain_rating; serp_count[lastkey] = resultcount; if (lastkey~/how[[:space:]]*to|instruction|guide|manual/){ is_buyer[lastkey]="NO" } else { is_buyer[lastkey]="YES" } resultcount=zero_ref_domain=weak_domain_rating=total_ref_domains=total_domain_ratings=0; } BEGIN { FS=","; OFS=","; print "Keyword","Difficulty","# of 0 Refering Domains","# of DA less than 10","Buyer Keyword","Volume","Return Rate","Clicks","Average Refering Domain","Average Domain Authority","Number of SERP analysed" } { nicheSpecificFilterWords="adapter|cable|splitter"; filterWords="best[[:space:]]*buy|bestbuy|walmart|target|free|used|adapter|cable|splitter"; filterURLs="bestbuy.com|walmart.com|target.com" if ($NF<=10 && NR>1 && # $1!~/best[[:space:]]*buy|bestbuy|walmart|target|free|used|adapter|cable|splitter/ && $1!~filterWords && $1!~nicheSpecificFilterWords && # $2!~/bestbuy.com|walmart.com|target.com/) { $2!~filterURLs) { currentkey=$1; if (lastkey && currentkey!=lastkey){ PopulateValues(lastkey) } if (!resultcount || resultcount==0){ keyword[currentkey]; difficulty[currentkey]=$4; volome[currentkey]=$5; clicks[currentkey]=$6; return_rate[currentkey]=$8; } if ($12==0) zero_ref_domain++; if ($13<=10) weak_domain_rating++; total_ref_domains+=$12; total_domain_ratings+=$13; resultcount++; lastkey=currentkey; } } END { # Since we populate values AFTER the "key" rows are processed by awk # Last "key" needs to be populated here PopulateValues(lastkey) for(key in keyword) { print key, difficulty[key], zero_ref_domain_count[key], weak_domain_rating_count[key], is_buyer[key], volome[key], return_rate[key], clicks[key], int(avg_ref_domains[key]), int(avg_domain_rating[key]), serp_count[key] | "sort -k3,4 -t, -r" } }