你好,游客 登录
背景:
阅读新闻

hadoop实现购物商城推荐系统

[日期:2014-07-04] 来源:uml.org.cn  作者:潇洒子弦的博客 [字体: ]
package xian.zhang.common;
import java.util.regex.Pattern;
public class Util {
public static final Pattern DELIMITER = Pattern.compile("[\t,]");
}

 

package xian.zhang.core;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* 将输入数据 userid1,product1 userid1,product2 userid1,product3
* 合并成 userid1 product1,product2,product3输出
* @author zx
*
*/
public class CombinProductInUser {

public static class CombinProductMapper extends Mapper<LongWritable, Text, IntWritable, Text>{
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String[] items = value.toString().split(",");
context.write(new IntWritable(Integer.parseInt(items[0])), new Text(items[1]));
}
}

public static class CombinProductReducer extends Reducer<IntWritable, Text, IntWritable, Text>{
@Override
protected void reduce(IntWritable key, Iterable<Text> values,Context context)
throws IOException, InterruptedException {
StringBuffer sb = new StringBuffer();
Iterator<Text> it = values.iterator();
sb.append(it.next().toString());
while(it.hasNext()){
sb.append(",").append(it.next().toString());
}
context.write(key, new Text(sb.toString()));
}

}

@SuppressWarnings("deprecation")
public static boolean run(Path inPath,Path outPath) throws IOException, ClassNotFoundException, InterruptedException{

Configuration conf = new Configuration();
Job job = new Job(conf,"CombinProductInUser");

job.setJarByClass(CombinProductInUser.class);
job.setMapperClass(CombinProductMapper.class);
job.setReducerClass(CombinProductReducer.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);

FileInputFormat.addInputPath(job, inPath);
FileOutputFormat.setOutputPath(job, outPath);

return job.waitForCompletion(true);

}

}

 

package xian.zhang.core;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* 将输入数据 userid1,product1 userid1,product2 userid1,product3
* 合并成 userid1 product1,product2,product3输出
* @author zx
*
*/
public class CombinProductInUser {

public static class CombinProductMapper extends Mapper<LongWritable, Text, IntWritable, Text>{
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String[] items = value.toString().split(",");
context.write(new IntWritable(Integer.parseInt(items[0])), new Text(items[1]));
}
}

public static class CombinProductReducer extends Reducer<IntWritable, Text, IntWritable, Text>{
@Override
protected void reduce(IntWritable key, Iterable<Text> values,Context context)
throws IOException, InterruptedException {
StringBuffer sb = new StringBuffer();
Iterator<Text> it = values.iterator();
sb.append(it.next().toString());
while(it.hasNext()){
sb.append(",").append(it.next().toString());
}
context.write(key, new Text(sb.toString()));
}

}

@SuppressWarnings("deprecation")
public static boolean run(Path inPath,Path outPath) throws IOException, ClassNotFoundException, InterruptedException{

Configuration conf = new Configuration();
Job job = new Job(conf,"CombinProductInUser");

job.setJarByClass(CombinProductInUser.class);
job.setMapperClass(CombinProductMapper.class);
job.setReducerClass(CombinProductReducer.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);

FileInputFormat.addInputPath(job, inPath);
FileOutputFormat.setOutputPath(job, outPath);

return job.waitForCompletion(true);

}

}


 

package xian.zhang.core;
import java.io.IOException;
import org.apache.hadoop.fs.Path;
public class Main {

public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException {

if(args.length < 2){
throw new IllegalArgumentException("要有两个参数,数据输入的路径和输出路径");
}

Path inPath1 = new Path(args[0]);
Path outPath1 = new Path(inPath1.getParent()+"/CombinProduct");

Path inPath2 = outPath1;
Path outPath2 = new Path(args[1]);

if(CombinProductInUser.run(inPath1, outPath1)){
System.exit(ProductCo_occurrenceMatrix.run(inPath2, outPath2)?0:1);
}
}

}

ShopxxUserRecommend<相同购物喜好的好友推荐>

整个项目分两部,一,以商品对用户进行分组,二,求出用户的同现矩阵。

原理和ShopxxProductRecommend一样

下面附上代码

package xian.zhang.common;
import java.util.regex.Pattern;
public class Util {
public static final Pattern DELIMITER = Pattern.compile("[\t,]");
}

 

package xian.zhang.core;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* 将输入数据 userid1,product1 userid1,product2 userid1,product3
* 合并成 productid1 user1,user2,user3输出
* @author zx
*
*/
public class CombinUserInProduct {

public static class CombinUserMapper extends Mapper<LongWritable, Text, IntWritable, Text>{
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String[] items = value.toString().split(",");
context.write(new IntWritable(Integer.parseInt(items[1])), new Text(items[0]));
}
}

public static class CombinUserReducer extends Reducer<IntWritable, Text, IntWritable, Text>{
@Override
protected void reduce(IntWritable key, Iterable<Text> values,Context context)
throws IOException, InterruptedException {
StringBuffer sb = new StringBuffer();
Iterator<Text> it = values.iterator();
sb.append(it.next().toString());
while(it.hasNext()){
sb.append(",").append(it.next().toString());
}
context.write(key, new Text(sb.toString()));
}

}

@SuppressWarnings("deprecation")
public static boolean run(Path inPath,Path outPath) throws IOException, ClassNotFoundException, InterruptedException{
Configuration conf = new Configuration();
Job job = new Job(conf,"CombinUserInProduct");

job.setJarByClass(CombinUserInProduct.class);
job.setMapperClass(CombinUserMapper.class);
job.setReducerClass(CombinUserReducer.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);

FileInputFormat.addInputPath(job, inPath);
FileOutputFormat.setOutputPath(job, outPath);

return job.waitForCompletion(true);

}

}

 

package xian.zhang.core;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import xian.zhang.common.Util;
/**
* 用户的同先矩阵
* @author zx
*
*/
public class UserCo_occurrenceMatrix {
public static class Co_occurrenceMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
IntWritable one = new IntWritable(1);

@Override
protected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {

String[] products = Util.DELIMITER.split(value.toString());
for(int i=1;i<products.length;i++){
for(int j=1;j<products.length;j++){
if(i != j){
context.write(new Text(products[i] + ":" + products[j]), one);
}
}
}

}

}

public static class Co_occurrenceReducer extends Reducer<Text, IntWritable, NullWritable, Text>{
NullWritable nullKey =NullWritable.get();

@Override
protected void reduce(Text key, Iterable<IntWritable> values,Context context)
throws IOException, InterruptedException {
int sum = 0;
Iterator<IntWritable> it = values.iterator();
while(it.hasNext()){
sum += it.next().get();
}
context.write(nullKey, new Text(key.toString().replace(":", ",") + "," + sum));
}

}

@SuppressWarnings("deprecation")
public static boolean run(Path inPath,Path outPath) throws IOException, ClassNotFoundException, InterruptedException{

Configuration conf = new Configuration();
Job job = new Job(conf,"UserCo_occurrenceMatrix");

job.setJarByClass(UserCo_occurrenceMatrix.class);
job.setMapperClass(Co_occurrenceMapper.class);
job.setReducerClass(Co_occurrenceReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);

job.setOutputKeyClass(NullWritable.class);
job.setOutputKeyClass(Text.class);

FileInputFormat.addInputPath(job, inPath);
FileOutputFormat.setOutputPath(job, outPath);

return job.waitForCompletion(true);
}

}

 

package xian.zhang.core;
import java.io.IOException;
import org.apache.hadoop.fs.Path;
public class Main {

public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException {

if(args.length < 2){
throw new IllegalArgumentException("要有两个参数,数据输入的路径和输出路径");
}

Path inPath1 = new Path(args[0]);
Path outPath1 = new Path(inPath1.getParent()+"/CombinUser");

Path inPath2 = outPath1;
Path outPath2 = new Path(args[1]);

if(CombinUserInProduct.run(inPath1, outPath1)){
System.exit(UserCo_occurrenceMatrix.run(inPath2, outPath2)?0:1);
}
}

}

代码在github上有

git@github.com:chaoku/ShopxxProductRecommend.git





收藏 推荐 打印 | 录入: | 阅读:
本文评论   查看全部评论 (0)
表情: 表情 姓名: 字数
点评:
       
评论声明
  • 尊重网上道德,遵守中华人民共和国的各项有关法律法规
  • 承担一切因您的行为而直接或间接导致的民事或刑事法律责任
  • 本站管理人员有权保留或删除其管辖留言中的任意内容
  • 本站有权在网站内转载或引用您的评论
  • 参与本评论即表明您已经阅读并接受上述条款