Created
July 2, 2013 22:20
-
-
Save andry1/5913776 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SET mapreduce.output.fileoutputformat.compress true | |
SET mapreduce.output.fileoutputformat.compress.codec org.apache.hadoop.io.compress.SnappyCodec | |
SET mapreduce.output.fileoutputformat.compress.type BLOCK | |
SET pig.exec.mapPartAgg true | |
SET mapreduce.job.queuename testing | |
xfp_all = LOAD 'chris_295640.impression' USING org.apache.hcatalog.pig.HCatLoader(); | |
xfp_partition = FILTER xfp_all BY year=='2013' AND month=='06' AND day=='19' AND hour=='20'; | |
xfp = FILTER xfp_partition BY lineitemid != 0 AND time != 'Time'; | |
spm = LOAD 'chris_295640.site_page_mapping' USING org.apache.hcatalog.pig.HCatLoader(); | |
dma = LOAD 'google_295640.designated_market_areas' USING org.apache.hcatalog.pig.HCatLoader(); | |
countries = LOAD 'google_295640.countries' USING org.apache.hcatalog.pig.HCatLoader(); | |
states = LOAD 'google_295640.states' USING org.apache.hcatalog.pig.HCatLoader(); | |
xfp_join1 = JOIN xfp BY adunitid, spm BY zone_source_id USING 'replicated'; | |
xfp_join2 = JOIN xfp_join1 BY (metroid - 200000) LEFT OUTER, dma BY code USING 'replicated'; | |
xfp_join3 = JOIN xfp_join2 BY countryid LEFT OUTER, countries BY xfp_id USING 'replicated'; | |
xfp_join4 = JOIN xfp_join3 BY regionid LEFT OUTER, states BY xfp_id USING 'replicated'; | |
xfp_out = FOREACH xfp_join4 GENERATE time,userid,'0.0.0.0',lineitemid,creativeid,creativeversion,xfp_join1::spm::site_source_id,xfp_join1::spm::zone_source_id, | |
targetedcustomcriteria, | |
(xfp_join3::countries::source_id IS NULL ? '0' : xfp_join3::countries::source_id), | |
(states::abbr IS NULL ? '0' : states::abbr), | |
browserid, | |
(xfp_join2::dma::source_id IS NULL ? '0' : xfp_join2::dma::source_id), | |
((bandwidthgroupid >= 1 AND bandwidthgroupid <= 5) ? bandwidthgroupid - 1 : 0), | |
customtargeting, postalcode; | |
STORE xfp_out INTO '/user/chris/xfp_pig' USING PigStorage('\\xFE','noschema'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment