你好,游客 登录 注册 搜索
背景:
阅读新闻

hadoop权威指南中天气数据运行

[日期:2015-08-03] 来源:CSDN博客  作者: [字体: ]

1) hdfs文件系统准备工作
a) # hadoop fs –ls /user/root #查看hdfs文件系统
b) # hadoop fs -rm /user/root/output02/part-r-00000
c) 删除文档,删除文件夹
d) # hadoop fs -rm –r /user/root/output02
e) # hadoop fs –mkdir –p input/ncdc
f) 解压缩输入文件,hadoop无法识别.zip或者.rar
g) # hadoop fs -put ../input_tmp/* input/ncdc
h) 同一个任务对应的input文件内容格式必须一致。
2) 编写程序
a) MaxTemperature.java

// cc MaxTemperature Application to find the maximum temperature in the weather dataset
// vv MaxTemperature
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MaxTemperature {

  public static void main(String[] args) throws Exception {
    if (args.length != 2) {
      System.err.println("Usage: MaxTemperature <input path> <output path>");
      System.exit(-1);
    }

    Job job = new Job();
    job.setJarByClass(MaxTemperature.class);
    job.setJobName("Max temperature");

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(MaxTemperatureMapper.class);
    job.setReducerClass(MaxTemperatureReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
}
// ^^ MaxTemperature
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34

b) MaxTemperatureMapper.java

// cc MaxTemperatureMapper Mapper for maximum temperature example
// vv MaxTemperatureMapper
import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class MaxTemperatureMapper
  extends Mapper<LongWritable, Text, Text, IntWritable> {

  private static final int MISSING = 9999;

  @Override
  public void map(LongWritable key, Text value, Context context)
      throws IOException, InterruptedException {

    String line = value.toString();
    String year = line.substring(15, 19);
    int airTemperature;
    if (line.charAt(87) == '+') { // parseInt doesn't like leading plus signs
      airTemperature = Integer.parseInt(line.substring(88, 92));
    } else {
      airTemperature = Integer.parseInt(line.substring(87, 92));
    }
    String quality = line.substring(92, 93);
    if (airTemperature != MISSING && quality.matches("[01459]")) {
      context.write(new Text(year), new IntWritable(airTemperature));
    }
  }
}
// ^^ MaxTemperatureMapper
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33

c) MaxTemperatureReducer.java

// cc MaxTemperatureReducer Reducer for maximum temperature example
// vv MaxTemperatureReducer
import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class MaxTemperatureReducer
  extends Reducer<Text, IntWritable, Text, IntWritable> {

  @Override
  public void reduce(Text key, Iterable<IntWritable> values,
      Context context)
      throws IOException, InterruptedException {

    int maxValue = Integer.MIN_VALUE;
    for (IntWritable value : values) {
      maxValue = Math.max(maxValue, value.get());
    }
    context.write(key, new IntWritable(maxValue));
  }
}
// ^^ MaxTemperatureReducer
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24

3) eclipse上运行
a) 创建包org.hadoop.ncdc,编译上述3个Java类。
b) run as → Java application → arguments
hdfs://master:9000/user/root/input/ncdc hdfs://master:9000/user/root/output/ncdc
c) # hadoop fs -ls output/ncdc
d) # hadoop fs cat output/ncdc/part-r-00000
4) javac方式执行
a) vi classpath.sh添加

export HADOOP_HOME=/usr/local/hadoop2.5
export CLASSPATH=.:/usr/local/jdk1.7/lib:/usr/local/jdk1.7/jre/lib
for f in $HADOOP_HOME/share/hadoop/common/hadoop-*.jar;do
     export CLASSPATH=$CLASSPATH:$f
done
for f in $HADOOP_HOME/share/hadoop/hdfs/hadoop-*.jar;do
     export CLASSPATH=$CLASSPATH:$f
done
for f in $HADOOP_HOME/share/hadoop/mapreduce/hadoop-*.jar;do
     export CLASSPATH=$CLASSPATH:$f
done
for f in $HADOOP_HOME/share/hadoop/yarn/hadoop-*.jar;do
     export CLASSPATH=$CLASSPATH:$f
done

export CLASSPATH=$CLASSPATH:$HADOOP_HOME/share/common/lib:$HADOOP_HOME/share/hdfs/lib:$HADOOP_HOME/share/mapreduce/lib:$HADOOP_HOME/share/tools/lib:$HADOOP_HOME/share/yarn/lib
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16

b) cat /usr/local/hadoop2.5/classpath.sh>> /etc/profile
c) cat /usr/local/hadoop2.5/classpath.sh >> /usr/local/hadoop2.5/etc/hadoop/hadoop-env.sh
d) source /etc/profile
e) 重启hadoop服务
f) # javac MaxTemperatureMapper.java -d .
其他类也一样,注意先编译最底层类,编译完成的class文件在Java程序的package路径下
g) # jar -cvf MaxTemperature.jar org #打成jar包
h) # jar -tvf MaxTemperature.jar #查看jar包目录结构
i) # hadoop jar MaxTemperature.jar org/hadoop/ncdc/MaxTemperature input/ncdc output/ncdc #运行jar包
hadoop jar 包名 程序主类名 输入文件夹 输出文件夹
j) # hadoop fs -ls output/ncdc
k) # hadoop fs -cat output/ncdc/part-r-00000
5) eclipse输出jar包运行
a) 右键export→jar→生成jar包
由于夸系统,故无需在打jar包过程中附加.classpath
b) 因已经配置了classpath,Linux上运作jar包即可。





收藏 推荐 打印 | 录入:Cstor | 阅读:
本文评论   查看全部评论 (0)
表情: 表情 姓名: 字数
点评:
       
评论声明
  • 尊重网上道德,遵守中华人民共和国的各项有关法律法规
  • 承担一切因您的行为而直接或间接导致的民事或刑事法律责任
  • 本站管理人员有权保留或删除其管辖留言中的任意内容
  • 本站有权在网站内转载或引用您的评论
  • 参与本评论即表明您已经阅读并接受上述条款