Java代码统计网站中不同省份用户的访问数
一、需求
针对log日志中给定的信息,统计网站中不同省份用户的访问数
二、编程代码
packageorg.apache.hadoop.studyhdfs.mapreduce; importjava.io.IOException; importorg.apache.commons.lang.StringUtils; importorg.apache.hadoop.conf.Configuration; importorg.apache.hadoop.conf.Configured; importorg.apache.hadoop.fs.Path; importorg.apache.hadoop.io.IntWritable; importorg.apache.hadoop.io.LongWritable; importorg.apache.hadoop.io.Text; importorg.apache.hadoop.mapreduce.Job; importorg.apache.hadoop.mapreduce.Mapper; importorg.apache.hadoop.mapreduce.Mapper.Context; importorg.apache.hadoop.mapreduce.Reducer; importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat; importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat; importorg.apache.hadoop.util.Tool; importorg.apache.hadoop.util.ToolRunner; importorg.jboss.netty.util.internal.StringUtil; publicclassProvinceCountMapReduceextendsConfiguredimplementsTool{ //1.map /* *<KEYIN,VALUEIN,KEYOUT,VALUEOUT> */ publicstaticclassWordCountMapperextendsMapper<LongWritable,Text,IntWritable,IntWritable>{ privateIntWritablemapOutputKey=newIntWritable(); privateIntWritablemapOutputValue=newIntWritable(1); @Override publicvoidmap(LongWritablekey,Textvalue,Contextcontext) throwsIOException,InterruptedException{ //getlineValue StringlineValue=value.toString(); //split String[]strs=lineValue.split("\t"); //lineblank Stringurl=strs[1]; StringprovinceIdValue=strs[23]; //guolv if(strs.length<30||StringUtils.isBlank(provinceIdValue)||StringUtils.isBlank(url)){ return; } intprovinceId=Integer.MAX_VALUE; try{ provinceId=Integer.valueOf(provinceIdValue); }catch(Exceptione){ return; } if(provinceId==Integer.MAX_VALUE){ return; } mapOutputKey.set(provinceId); context.write(mapOutputKey,mapOutputValue); } } //2.reduce publicstaticclassWordCountReduceextendsReducer<IntWritable,IntWritable,IntWritable,IntWritable>{ privateIntWritableoutputValue=newIntWritable(); @Override publicvoidreduce(IntWritablekey,Iterable<IntWritable>values,Contextcontext) throwsIOException,InterruptedException{ //todo intsum=0; for(IntWritablevalue:values){ sum+=value.get(); } outputValue.set(sum); context.write(key,outputValue); } } publicintrun(String[]args)throwsException{ //1.getConfiguration Configurationconf=super.getConf(); //2.createjob Jobjob=Job.getInstance(conf,this.getClass().getSimpleName()); job.setJarByClass(ProvinceCountMapReduce.class); //3.setjob //3.1setinput PathinputPath=newPath(args[0]); FileInputFormat.addInputPath(job,inputPath); //3.2setmapper job.setMapperClass(WordCountMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); //3.3setreduce job.setReducerClass(WordCountReduce.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); //3.4setinput PathoutputPath=newPath(args[1]); FileOutputFormat.setOutputPath(job,outputPath); //4.submmit booleanisSuccess=job.waitForCompletion(true); returnisSuccess?0:1; } publicstaticvoidmain(String[]args)throwsException{ args=newString[]{ "hdfs://Hadoop-senior02.beifeng.com:8020/input/2015082818", "hdfs://Hadoop-senior02.beifeng.com:8020/output15/" }; Configurationconf=newConfiguration(); conf.set("mapreduce.map.output.compress","true"); intstatus=ToolRunner.run(conf,newProvinceCountMapReduce(),args); System.exit(status); } }
3、运行结果
1)运行代码:bin/hdfsdfs-text/output15/par*
2)运行结果:
13527
21672
3511
4325
5776
6661
795
880
9183
1093
11135
12289
13264
14374
15163
16419
17306
18272
19226
202861
21124
2238
2396
24100
2520
26157
2749
2821
2985
3042
32173
以上所述是小编给大家介绍的Java代码统计网站中不同省份用户的访问数的相关介绍,希望对大家有所帮助,在此小编也非常感谢大家对毛票票网站的支持!