发布时间:2022-08-19 13:15
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.mllib.recommendation.ALS;
import org.apache.spark.mllib.recommendation.MatrixFactorizationModel;
import org.apache.spark.mllib.recommendation.Rating;
import scala.Tuple2;
public class myAls {
public static void main(String[] args) {
// TODO Auto-generated method stub
SparkConf conf=new SparkConf().setAppName("als").setMaster("local");
JavaSparkContext sc=new JavaSparkContext(conf);
JavaRDD con=sc.textFile("file:///home/gyq/下载/spark-2.3.2-bin-hadoop2.7/data/mllib/als/sample.data");
JavaRDD ratings=con.map(f->{
return new Rating(
new Integer(f.split("::")[0]),
new Integer(f.split("::")[1]),
new Double(f.split("::")[2]));
});//数据转换为javardd三元组
JavaRDD[] rr=ratings.randomSplit(new double[]{0.3,0.7});
MatrixFactorizationModel model=ALS.train(rr[0].rdd(), 15, 10);//生成模型
JavaRDD> up =rr[1].map(f->{//取0.7的数据要预测的用户和产品ID
return new Tuple2<>(f.user(),f.product());
});
JavaPairRDD pupr= model.predict(up.rdd()).toJavaRDD().mapToPair(f->{
return new Tuple2(f.user()+","+f.product(),f.rating());//用0.7的数据去预测得到一个键值对
});
//pupr.foreach(f->System.out.println(f+"gg"));
JavaPairRDD upr=rr[1].mapToPair(f->{//原来0.7的数据转化为键值对
return new Tuple2(f.user()+","+f.product(),f.rating());
});
//upr.foreach(f->System.out.println(f+"ss"));
JavaPairRDD> mm=upr.join(pupr);
//输出格式为((用户,项目),(预测评分,实际评分))
//mm.foreach(f->System.out.println(f+"qq"));
//model.save(sc.sc(),"file:///home/gyq/下载/spark-2.3.2-bin-hadoop2.7/data/mllib/als/myals");
JavaPairRDD> recom=mm.filter(f->{
double f2_2=f._2._2;
double f2_1=f._2._1;
int a=(int)f2_1;
int b=(int)f2_2;
if (a==b){
return true;}
else
return false;
});
double count=recom.count();
double counts=mm.count();
double accur=count/counts;
System.out.println("count="+count);
System.out.println("counts="+counts);
System.out.println("accur="+accur);
sc.stop();
}
}
数据类似这种:用户 产品 评分 时间戳
利用自带的函数:
ALS.train(data,rank,iterations,lambda)
各参数意义:
ALS.train(数据,维度,迭代次数,正则化参数)
还摆个屁的烂?用Python画如此漂亮的专业插图 ?简直So easy!
翻译:对测试自动化来说,为什么说Python是非常好的(选择)WHY PYTHON IS GREAT FOR TEST AUTOMATION...
Pytorch: inplace operation runtimeError
摸了俩小时docker-compose的鱼,Dockerfile常用姿势5分钟速成~
【北京迅为】i.MX6ULL终结者Linux 电容触摸屏实验运行测试
我想转行做软件测试,有必要报培训班吗?我听说好多人说自学就行...
11月21日科技资讯|罗永浩将举办「老人与海」发布会;微软疑似遭遇大范围全球宕机;Python 3.9 首个测试版发布