//load input file, It should have the following format:
//word1 word2 word3 word4 ...
//word2 word1 word4 word3 ...
JavaRDD<String> lines = sc.textFile(input);
2. 生成所有相关联的pair
1234567891011121314
JavaPairRDD<String, Iterable<String>> links = lines.flatMapToPair(new PairFlatMapFunction<String, String, String>() {
@Override
public Iterable<Tuple2<String, String>> call(String line) {
String[] parts = line.split("\t");
List<Tuple2<String, String>> results = Lists.newArrayList();
for (int i = 0; i < parts.length - 1; i++) {
for (int j = i + 1; j < parts.length; j++) {
results.add(new Tuple2<>(parts[i], parts[j]));
results.add(new Tuple2<>(parts[j], parts[i]));
}
}
return results;
}
}).distinct().groupByKey().cache();