Ⅰ 关于数据挖掘中的apriori算法,帮忙推出关联规则 事务数为 5 支持度为0.6,置信度为0.6
abc的支持数P1=3,acd的支持数P2=3,bcd的支持数P3=3,关联规则的输出就是在由频繁项集的项组成的关联规则中,找出置信度大于等于最小置信度阈值的关联规则。因为由频繁项集的项组成的关联规则的支持度大于等于最小支持阈值,所以规则产生过程就是在由频繁项集的项组成的关联规则中,找出置信度大于等于最小置信度阈值的强关联规则,基本步骤如下:
1) 对于每个频繁项集L,产生L的所有非空真子集。
2) 对于L的每个非空真子集 ,如果L的支持计数除以 的支持计数大于等于最小置信度阈值min_conf,则输出强关联规则 =>(L- )。
例如:abc的非空真子集有a,b,c,ab,ac,bc。分别算出他们的支持数,再除以abc的支持数,若结果大于0.6则可输出强关联规则。Pa/P1=4/3>0.6,则可以输出关联规则:a=>bc;同理可输出强关联规则:b=>ac,c=>ab;
同理,对于acd,bcd采用同样地方法。
Ⅱ 急需C++实现的Apriori算法代码
用C++ 实现的 可以 到http://download.csdn.net/down/188143/chanjuanzz下载 不过要注册扣积分的
算法实现
(一)核心类
Apriori算法的核心实现类为AprioriAlgorithm,实现的java代码如下所示:
package org.shirdrn.datamining.association;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
/**
* <B>关联规则挖掘:Apriori算法</B>
*
* <P>该算法基本上按照Apriori算法的基本思想来实现的。
*
* @author shirdrn
* @date 2009/07/22 22:56:23
* @msn shirdrn#hotmail.com(#→@)
* @qq 187071722
*/
public class AprioriAlgorithm {
private Map<Integer, Set<String>> txDatabase; // 事务数据库
private Float minSup; // 最小支持度
private Float minConf; // 最小置信度
private Integer txDatabaseCount; // 事务数据库中的事务数
private Map<Integer, Set<Set<String>>> freqItemSet; // 频繁项集集合
private Map<Set<String>, Set<Set<String>>> assiciationRules; // 频繁关联规则集合
public AprioriAlgorithm(
Map<Integer, Set<String>> txDatabase,
Float minSup,
Float minConf) {
this.txDatabase = txDatabase;
this.minSup = minSup;
this.minConf = minConf;
this.txDatabaseCount = this.txDatabase.size();
freqItemSet = new TreeMap<Integer, Set<Set<String>>>();
assiciationRules = new HashMap<Set<String>, Set<Set<String>>>();
}
/**
* 扫描事务数据库,计算频繁1-项集
* @return
*/
public Map<Set<String>, Float> getFreq1ItemSet() {
Map<Set<String>, Float> freq1ItemSetMap = new HashMap<Set<String>, Float>();
Map<Set<String>, Integer> candFreq1ItemSet = this.getCandFreq1ItemSet();
Iterator<Map.Entry<Set<String>, Integer>> it = candFreq1ItemSet.entrySet().iterator();
while(it.hasNext()) {
Map.Entry<Set<String>, Integer> entry = it.next();
// 计算支持度
Float supported = new Float(entry.getValue().toString())/new Float(txDatabaseCount);
if(supported>=minSup) {
freq1ItemSetMap.put(entry.getKey(), supported);
}
}
return freq1ItemSetMap;
}
/**
* 计算候选频繁1-项集
* @return
*/
public Map<Set<String>, Integer> getCandFreq1ItemSet() {
Map<Set<String>, Integer> candFreq1ItemSetMap = new HashMap<Set<String>, Integer>();
Iterator<Map.Entry<Integer, Set<String>>> it = txDatabase.entrySet().iterator();
// 统计支持数,生成候选频繁1-项集
while(it.hasNext()) {
Map.Entry<Integer, Set<String>> entry = it.next();
Set<String> itemSet = entry.getValue();
for(String item : itemSet) {
Set<String> key = new HashSet<String>();
key.add(item.trim());
if(!candFreq1ItemSetMap.containsKey(key)) {
Integer value = 1;
candFreq1ItemSetMap.put(key, value);
}
else {
Integer value = 1+candFreq1ItemSetMap.get(key);
candFreq1ItemSetMap.put(key, value);
}
}
}
return candFreq1ItemSetMap;
}
/**
* 根据频繁(k-1)-项集计算候选频繁k-项集
*
* @param m 其中m=k-1
* @param freqMItemSet 频繁(k-1)-项集
* @return
*/
public Set<Set<String>> aprioriGen(int m, Set<Set<String>> freqMItemSet) {
Set<Set<String>> candFreqKItemSet = new HashSet<Set<String>>();
Iterator<Set<String>> it = freqMItemSet.iterator();
Set<String> originalItemSet = null;
while(it.hasNext()) {
originalItemSet = it.next();
Iterator<Set<String>> itr = this.getIterator(originalItemSet, freqMItemSet);
while(itr.hasNext()) {
Set<String> identicalSet = new HashSet<String>(); // 两个项集相同元素的集合(集合的交运算)
identicalSet.addAll(originalItemSet);
Set<String> set = itr.next();
identicalSet.retainAll(set); // identicalSet中剩下的元素是identicalSet与set集合中公有的元素
if(identicalSet.size() == m-1) { // (k-1)-项集中k-2个相同
Set<String> differentSet = new HashSet<String>(); // 两个项集不同元素的集合(集合的差运算)
differentSet.addAll(originalItemSet);
differentSet.removeAll(set); // 因为有k-2个相同,则differentSet中一定剩下一个元素,即differentSet大小为1
differentSet.addAll(set); // 构造候选k-项集的一个元素(set大小为k-1,differentSet大小为k)
candFreqKItemSet.add(differentSet); // 加入候选k-项集集合
}
}
}
return candFreqKItemSet;
}
/**
* 根据一个频繁k-项集的元素(集合),获取到频繁k-项集的从该元素开始的迭代器实例
* @param itemSet
* @param freqKItemSet 频繁k-项集
* @return
*/
private Iterator<Set<String>> getIterator(Set<String> itemSet, Set<Set<String>> freqKItemSet) {
Iterator<Set<String>> it = freqKItemSet.iterator();
while(it.hasNext()) {
if(itemSet.equals(it.next())) {
break;
}
}
return it;
}
/**
* 根据频繁(k-1)-项集,调用aprioriGen方法,计算频繁k-项集
*
* @param k
* @param freqMItemSet 频繁(k-1)-项集
* @return
*/
public Map<Set<String>, Float> getFreqKItemSet(int k, Set<Set<String>> freqMItemSet) {
Map<Set<String>, Integer> candFreqKItemSetMap = new HashMap<Set<String>, Integer>();
// 调用aprioriGen方法,得到候选频繁k-项集
Set<Set<String>> candFreqKItemSet = this.aprioriGen(k-1, freqMItemSet);
// 扫描事务数据库
Iterator<Map.Entry<Integer, Set<String>>> it = txDatabase.entrySet().iterator();
// 统计支持数
while(it.hasNext()) {
Map.Entry<Integer, Set<String>> entry = it.next();
Iterator<Set<String>> kit = candFreqKItemSet.iterator();
while(kit.hasNext()) {
Set<String> kSet = kit.next();
Set<String> set = new HashSet<String>();
set.addAll(kSet);
set.removeAll(entry.getValue()); // 候选频繁k-项集与事务数据库中元素做差元算
if(set.isEmpty()) { // 如果拷贝set为空,支持数加1
if(candFreqKItemSetMap.get(kSet) == null) {
Integer value = 1;
candFreqKItemSetMap.put(kSet, value);
}
else {
Integer value = 1+candFreqKItemSetMap.get(kSet);
candFreqKItemSetMap.put(kSet, value);
}
}
}
}
// 计算支持度,生成频繁k-项集,并返回
return support(candFreqKItemSetMap);
}
/**
* 根据候选频繁k-项集,得到频繁k-项集
*
* @param candFreqKItemSetMap 候选k项集(包含支持计数)
*/
public Map<Set<String>, Float> support(Map<Set<String>, Integer> candFreqKItemSetMap) {
Map<Set<String>, Float> freqKItemSetMap = new HashMap<Set<String>, Float>();
Iterator<Map.Entry<Set<String>, Integer>> it = candFreqKItemSetMap.entrySet().iterator();
while(it.hasNext()) {
Map.Entry<Set<String>, Integer> entry = it.next();
// 计算支持度
Float supportRate = new Float(entry.getValue().toString())/new Float(txDatabaseCount);
if(supportRate<minSup) { // 如果不满足最小支持度,删除
it.remove();
}
else {
freqKItemSetMap.put(entry.getKey(), supportRate);
}
}
return freqKItemSetMap;
}
/**
* 挖掘全部频繁项集
*/
public void mineFreqItemSet() {
// 计算频繁1-项集
Set<Set<String>> freqKItemSet = this.getFreq1ItemSet().keySet();
freqItemSet.put(1, freqKItemSet);
// 计算频繁k-项集(k>1)
int k = 2;
while(true) {
Map<Set<String>, Float> freqKItemSetMap = this.getFreqKItemSet(k, freqKItemSet);
if(!freqKItemSetMap.isEmpty()) {
this.freqItemSet.put(k, freqKItemSetMap.keySet());
freqKItemSet = freqKItemSetMap.keySet();
}
else {
break;
}
k++;
}
}
/**
* <P>挖掘频繁关联规则
* <P>首先挖掘出全部的频繁项集,在此基础上挖掘频繁关联规则
*/
public void mineAssociationRules() {
freqItemSet.remove(1); // 删除频繁1-项集
Iterator<Map.Entry<Integer, Set<Set<String>>>> it = freqItemSet.entrySet().iterator();
while(it.hasNext()) {
Map.Entry<Integer, Set<Set<String>>> entry = it.next();
for(Set<String> itemSet : entry.getValue()) {
// 对每个频繁项集进行关联规则的挖掘
mine(itemSet);
}
}
}
/**
* 对从频繁项集集合freqItemSet中每迭代出一个频繁项集元素,执行一次关联规则的挖掘
* @param itemSet 频繁项集集合freqItemSet中的一个频繁项集元素
*/
public void mine(Set<String> itemSet) {
int n = itemSet.size()/2; // 根据集合的对称性,只需要得到一半的真子集
for(int i=1; i<=n; i++) {
// 得到频繁项集元素itemSet的作为条件的真子集集合
Set<Set<String>> properSubset = ProperSubsetCombination.getProperSubset(i, itemSet);
// 对条件的真子集集合中的每个条件项集,获取到对应的结论项集,从而进一步挖掘频繁关联规则
for(Set<String> conditionSet : properSubset) {
Set<String> conclusionSet = new HashSet<String>();
conclusionSet.addAll(itemSet);
conclusionSet.removeAll(conditionSet); // 删除条件中存在的频繁项
confide(conditionSet, conclusionSet); // 调用计算置信度的方法,并且挖掘出频繁关联规则
}
}
}
/**
* 对得到的一个条件项集和对应的结论项集,计算该关联规则的支持计数,从而根据置信度判断是否是频繁关联规则
* @param conditionSet 条件频繁项集
* @param conclusionSet 结论频繁项集
*/
public void confide(Set<String> conditionSet, Set<String> conclusionSet) {
// 扫描事务数据库
Iterator<Map.Entry<Integer, Set<String>>> it = txDatabase.entrySet().iterator();
// 统计关联规则支持计数
int conditionToConclusionCnt = 0; // 关联规则(条件项集推出结论项集)计数
int conclusionToConditionCnt = 0; // 关联规则(结论项集推出条件项集)计数
int supCnt = 0; // 关联规则支持计数
while(it.hasNext()) {
Map.Entry<Integer, Set<String>> entry = it.next();
Set<String> txSet = entry.getValue();
Set<String> set1 = new HashSet<String>();
Set<String> set2 = new HashSet<String>();
set1.addAll(conditionSet);
set1.removeAll(txSet); // 集合差运算:set-txSet
if(set1.isEmpty()) { // 如果set为空,说明事务数据库中包含条件频繁项conditionSet
// 计数
conditionToConclusionCnt++;
}
set2.addAll(conclusionSet);
set2.removeAll(txSet); // 集合差运算:set-txSet
if(set2.isEmpty()) { // 如果set为空,说明事务数据库中包含结论频繁项conclusionSet
// 计数
conclusionToConditionCnt++;
}
if(set1.isEmpty() && set2.isEmpty()) {
supCnt++;
}
}
// 计算置信度
Float conditionToConclusionConf = new Float(supCnt)/new Float(conditionToConclusionCnt);
if(conditionToConclusionConf>=minConf) {
if(assiciationRules.get(conditionSet) == null) { // 如果不存在以该条件频繁项集为条件的关联规则
Set<Set<String>> conclusionSetSet = new HashSet<Set<String>>();
conclusionSetSet.add(conclusionSet);
assiciationRules.put(conditionSet, conclusionSetSet);
}
else {
assiciationRules.get(conditionSet).add(conclusionSet);
}
}
Float conclusionToConditionConf = new Float(supCnt)/new Float(conclusionToConditionCnt);
if(conclusionToConditionConf>=minConf) {
if(assiciationRules.get(conclusionSet) == null) { // 如果不存在以该结论频繁项集为条件的关联规则
Set<Set<String>> conclusionSetSet = new HashSet<Set<String>>();
conclusionSetSet.add(conditionSet);
assiciationRules.put(conclusionSet, conclusionSetSet);
}
else {
assiciationRules.get(conclusionSet).add(conditionSet);
}
}
}
/**
* 经过挖掘得到的频繁项集Map
*
* @return 挖掘得到的频繁项集集合
*/
public Map<Integer, Set<Set<String>>> getFreqItemSet() {
return freqItemSet;
}
/**
* 获取挖掘到的全部的频繁关联规则的集合
* @return 频繁关联规则集合
*/
public Map<Set<String>, Set<Set<String>>> getAssiciationRules() {
return assiciationRules;
}
}
(二)辅助类
ProperSubsetCombination类是一个辅助类,在挖掘频繁关联规则的过程中,用于生成一个频繁项集元素的非空真子集,实现代码如下:
package org.shirdrn.datamining.association;
import java.util.BitSet;
import java.util.HashSet;
import java.util.Set;
/**
* <B>求频繁项集元素(集合)的非空真子集集合</B>
* <P>从一个集合(大小为n)中取出m(m属于2~n/2的闭区间)个元素的组合实现类,获取非空真子集的集合
*
* @author shirdrn
* @date 2009/07/22 22:56:23
* @msn shirdrn#hotmail.com(#→@)
* @qq 187071722
*/
public class ProperSubsetCombination {
private static String[] array;
private static BitSet startBitSet; // 比特集合起始状态
private static BitSet endBitSet; // 比特集合终止状态,用来控制循环
private static Set<Set<String>> properSubset; // 真子集集合
/**
* 计算得到一个集合的非空真子集集合
*
* @param n 真子集的大小
* @param itemSet 一个频繁项集元素
* @return 非空真子集集合
*/
public static Set<Set<String>> getProperSubset(int n, Set<String> itemSet) {
String[] array = new String[itemSet.size()];
ProperSubsetCombination.array = itemSet.toArray(array);
properSubset = new HashSet<Set<String>>();
startBitSet = new BitSet();
endBitSet = new BitSet();
// 初始化startBitSet,左侧占满1
for (int i=0; i<n; i++) {
startBitSet.set(i, true);
}
// 初始化endBit,右侧占满1
for (int i=array.length-1; i>=array.length-n; i--) {
endBitSet.set(i, true);
}
// 根据起始startBitSet,将一个组合加入到真子集集合中
get(startBitSet);
while(!startBitSet.equals(endBitSet)) {
int zeroCount = 0; // 统计遇到10后,左边0的个数
int oneCount = 0; // 统计遇到10后,左边1的个数
int pos = 0; // 记录当前遇到10的索引位置
// 遍历startBitSet来确定10出现的位置
for (int i=0; i<array.length; i++) {
if (!startBitSet.get(i)) {
zeroCount++;
}
if (startBitSet.get(i) && !startBitSet.get(i+1)) {
pos = i;
oneCount = i - zeroCount;
// 将10变为01
startBitSet.set(i, false);
startBitSet.set(i+1, true);
break;
}
}
// 将遇到10后,左侧的1全部移动到最左侧
int counter = Math.min(zeroCount, oneCount);
int startIndex = 0;
int endIndex = 0;
if(pos>1 && counter>0) {
pos--;
endIndex = pos;
for (int i=0; i<counter; i++) {
startBitSet.set(startIndex, true);
startBitSet.set(endIndex, false);
startIndex = i+1;
pos--;
if(pos>0) {
endIndex = pos;
}
}
}
get(startBitSet);
}
return properSubset;
}
/**
* 根据一次移位操作得到的startBitSet,得到一个真子集
* @param bitSet
*/
private static void get(BitSet bitSet) {
Set<String> set = new HashSet<String>();
for(int i=0; i<array.length; i++) {
if(bitSet.get(i)) {
set.add(array[i]);
}
}
properSubset.add(set);
}
}
测试用例
对上述Apriori算法的实现进行了简单的测试,测试用例如下所示:
package org.shirdrn.datamining.association;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.shirdrn.datamining.association.AprioriAlgorithm;
import junit.framework.TestCase;
/**
* <B>Apriori算法测试类</B>
*
* @author shirdrn
* @date 2009/07/22 22:56:23
* @msn shirdrn#hotmail.com(#→@)
* @qq 187071722
*/
public class TestAprioriAlgorithm extends TestCase {
private AprioriAlgorithm apriori;
private Map<Integer, Set<String>> txDatabase;
private Float minSup = new Float("0.50");
private Float minConf = new Float("0.70");
@Override
protected void setUp() throws Exception {
create(); // 构造事务数据库
apriori = new AprioriAlgorithm(txDatabase, minSup, minConf);
}
/**
* 构造模拟事务数据库txDatabase
*/
public void create() {
txDatabase = new HashMap<Integer, Set<String>>();
Set<String> set1 = new TreeSet<String>();
set1.add("A");
set1.add("B");
set1.add("C");
set1.add("E");
txDatabase.put(1, set1);
Set<String> set2 = new TreeSet<String>();
set2.add("A");
set2.add("B");
set2.add("C");
txDatabase.put(2, set2);
Set<String> set3 = new TreeSet<String>();
set3.add("C");
set3.add("D");
txDatabase.put(3, set3);
Set<String> set4 = new TreeSet<String>();
set4.add("A");
set4.add("B");
set4.add("E");
txDatabase.put(4, set4);
}
/**
* 测试挖掘频繁1-项集
*/
public void testFreq1ItemSet() {
System.out.println("挖掘频繁1-项集 : " + apriori.getFreq1ItemSet());
}
/**
* 测试aprioriGen方法,生成候选频繁项集
*/
public void testAprioriGen() {
System.out.println(
"候选频繁2-项集 : " +
this.apriori.aprioriGen(1, this.apriori.getFreq1ItemSet().keySet())
);
}
/**
* 测试挖掘频繁2-项集
*/
public void testGetFreq2ItemSet() {
System.out.println(
"挖掘频繁2-项集 :" +
this.apriori.getFreqKItemSet(2, this.apriori.getFreq1ItemSet().keySet())
);
}
/**
* 测试挖掘频繁3-项集
*/
public void testGetFreq3ItemSet() {
System.out.println(
"挖掘频繁3-项集 :" +
this.apriori.getFreqKItemSet(
3,
this.apriori.getFreqKItemSet(2, this.apriori.getFreq1ItemSet().keySet()).keySet()
)
);
}
/**
* 测试挖掘全部频繁项集
*/
public void testGetFreqItemSet() {
this.apriori.mineFreqItemSet(); // 挖掘频繁项集
System.out.println("挖掘频繁项集 :" + this.apriori.getFreqItemSet());
}
/**
* 测试挖掘全部频繁关联规则
*/
public void testMineAssociationRules() {
this.apriori.mineFreqItemSet(); // 挖掘频繁项集
this.apriori.mineAssociationRules();
System.out.println("挖掘频繁关联规则 :" + this.apriori.getAssiciationRules());
}
}
测试结果:
挖掘频繁1-项集 : {[E]=0.5, [A]=0.75, [B]=0.75, [C]=0.75}
候选频繁2-项集 : [[E, C], [A, B], [B, C], [A, C], [E, B], [E, A]]
挖掘频繁2-项集 :{[A, B]=0.75, [B, C]=0.5, [A, C]=0.5, [E, B]=0.5, [E, A]=0.5}
挖掘频繁3-项集 :{[E, A, B]=0.5, [A, B, C]=0.5}
挖掘频繁项集 :{1=[[E], [A], [B], [C]], 2=[[A, B], [B, C], [A, C], [E, B], [E, A]], 3=[[E, A, B], [A, B, C]]}
挖掘频繁关联规则 :{[E]=[[A], [B], [A, B]], [A]=[[B]], [B]=[[A]], [B, C]=[[A]], [A, C]=[[B]], [E, B]=[[A]], [E, A]=[[B]]}
从测试结果看到,使用Apriori算法挖掘得到的全部频繁项集为:
{1=[[E], [A], [B], [C]], 2=[[A, B], [B, C], [A, C], [E, B], [E, A]], 3=[[E, A, B], [A, B, C]]}
使用Apriori算法挖掘得到的全部频繁关联规则为:
{E}→{A}、{E}→{B}、{E}→{A,B}、{A}→{B}、{B}→{A}、{B,C}→{A}、{A,C}→{B}、{B,E}→{A}、{A,E}→{B}。
Ⅲ 用Matlab实现apriori算法关联规则的挖掘程序,完整有详细注解
下面这段是apriori算法中由2频繁项集找k频繁项集的程序,程序中有两个问题:
1、似乎while循环的K永远都是固定的,也就是都是频繁2项集的个数。得到频繁3项集后K的个数不是要变吗?如何体现呢?
2、程序中有两个for的大循环,但是发现结果是只要找到一个频繁3项集第二个for循环就会结束,但是其实还应该有其它的频繁3项集。for循环不是应该无条件执行到参数k结束吗?当时k值是15,可是程序结束的时候i=2,j=3,然后j就不执行4以及一直到k的部分了。是什么原因呢?麻烦高手指点一下。急啊……
while( k>0)
le=length(candidate{1});
num=2;
nl=0;
for i=1:k-1
for j=i+1:k
x1=candidate{i}; %candidate初始值为频繁2项集,这个表示频繁项集的第i项
x2=candidate{j};
c = intersect(x1, x2);
M=0;
r=1;
nn=0;
l1=0;
if (length(c)==le-1) & (sum(c==x1(1:le-1))==le-1)
houxuan=union(x1(1:le),x2(le));
%树剪枝,若一个候选项的某个K-1项子集为非频繁,则剪枝掉
sub_set=subset(houxuan);
%生成该候选项的所有K-1项子集
NN=length(sub_set);
%判断这些K-1项自己是否都为频繁的
while(r & M<NN)
M=M+1;
r=in(sub_set{M},candidate);
end
if M==NN
nl=nl+1;
%候选k项集
cand{nl}=houxuan;
%记录每个候选k项集出现的次数
le=length(cand{1});
for i=1:m
s=cand{nl};
x=X(i,:);
if sum(x(s))==le
nn=nn+1;
end
end
end
end
%从候选集中找频繁项集
if nn>=th
ll=ll+1;
candmid{nl}=cand{nl};
pfxj(nl).element=cand{nl};
pfxj(nl).time=nn;
disp('得到的频繁项集为:')
result=(candmid{nl});
disp(result);
end
end
end
end
Ⅳ apriori算法
Apriori算法是第一个关联规则挖掘算法,也是最经典的算法。它利用逐层搜索的迭代方法找出数据库中项集的关系,以形成规则,其过程由连接(类矩阵运算)与剪枝(去掉那些没必要的中间结果)组成。
Ⅳ 怎么用java实现apriori算法
fromoperatorimportand_:
def__init__(self,inputfile):
self.transactions=[]
self.itemSet=set([])
inf=open(inputfile,'rb')
forlineininf.readlines():
elements=set(filter(lambdaentry:len(entry)>0,line.strip().split(',')))
iflen(elements)>0:
self.transactions.append(elements)
forelementinelements:
self.itemSet.add(element)
inf.close()
self.toRetItems={}
self.associationRules=[]
defgetSupport(self,itemcomb):
iftype(itemcomb)!=frozenset:
itemcomb=frozenset([itemcomb])
within_transaction=lambdatransaction:rece(and_,[(itemintransaction)foriteminitemcomb])
count=len(filter(within_transaction,self.transactions))
returnfloat(count)/float(len(self.transactions))
defrunApriori(self,minSupport=0.15,minConfidence=0.6):
itemCombSupports=filter(lambdafreqpair:freqpair[1]>=minSupport,
map(lambdaitem:(frozenset([item]),self.getSupport(item)),self.itemSet))
currentLset=set(map(lambdafreqpair:freqpair[0],itemCombSupports))
k=2
whilelen(currentLset)>0:
currentCset=set([i.union(j)(i.union(j))==k])
currentItemCombSupports=filter(lambdafreqpair:freqpair[1]>=minSupport,
map(lambdaitem:(item,self.getSupport(item)),currentCset))
currentLset=set(map(lambdafreqpair:freqpair[0],currentItemCombSupports))
itemCombSupports.extend(currentItemCombSupports)
k+=1
forkey,supportValinitemCombSupports:
self.toRetItems[key]=supportVal
self.calculateAssociationRules(minConfidence=minConfidence)
defcalculateAssociationRules(self,minConfidence=0.6):
forkeyinself.toRetItems:
subsets=[frozenset(item)forkinrange(1,len(key))foritemincombinations(key,k)]
forsubsetinsubsets:
confidence=self.toRetItems[key]/self.toRetItems[subset]
ifconfidence>minConfidence:
self.associationRules.append([subset,key-subset,confidence])
用Scala也大概六十多行:
importscala.io.Sourceimportscala.collection.immutable.Listimportscala.collection.immutable.Setimportjava.io.Fileimportscala.collection.mutable.MapclassAprioriAlgorithm(inputFile:File){
vartransactions:List[Set[String]]=List()
varitemSet:Set[String]=Set()
for(line<-Source.fromFile(inputFile).getLines()){
valelementSet=line.trim.split(',').toSet
if(elementSet.size>0){
transactions=transactions:+elementSet
itemSet=itemSet++elementSet
}
}
vartoRetItems:Map[Set[String],Double]=Map()
varassociationRules:List[(Set[String],Set[String],Double)]=List()
defgetSupport(itemComb:Set[String]):Double={
defwithinTransaction(transaction:Set[String]):Boolean=itemComb
.map(x=>transaction.contains(x))
.receRight((x1,x2)=>x1&&x2)
valcount=transactions.filter(withinTransaction).size
count.toDouble/transactions.size.toDouble
}
defrunApriori(minSupport:Double=0.15,minConfidence:Double=0.6)={
varitemCombs=itemSet.map(word=>(Set(word),getSupport(Set(word))))
.filter(wordSupportPair=>(wordSupportPair._2>minSupport))
varcurrentLSet:Set[Set[String]]=itemCombs.map(wordSupportPair=>wordSupportPair._1).toSet
vark:Int=2
while(currentLSet.size>0){
valcurrentCSet:Set[Set[String]]=currentLSet.map(wordSet=>currentLSet.map(wordSet1=>wordSet|wordSet1))
.receRight((set1,set2)=>set1|set2)
.filter(wordSet=>(wordSet.size==k))
valcurrentItemCombs=currentCSet.map(wordSet=>(wordSet,getSupport(wordSet)))
.filter(wordSupportPair=>(wordSupportPair._2>minSupport))
currentLSet=currentItemCombs.map(wordSupportPair=>wordSupportPair._1).toSet
itemCombs=itemCombs|currentItemCombs
k+=1
}
for(itemComb<-itemCombs){
toRetItems+=(itemComb._1->itemComb._2)
}
calculateAssociationRule(minConfidence)
}
defcalculateAssociationRule(minConfidence:Double=0.6)={
toRetItems.keys.foreach(item=>
item.subsets.filter(wordSet=>(wordSet.size<item.size&wordSet.size>0))
.foreach(subset=>{associationRules=associationRules:+(subset,itemdiffsubset,
toRetItems(item).toDouble/toRetItems(subset).toDouble)
}
)
)
associationRules=associationRules.filter(rule=>rule._3>minConfidence)
}}
我不建议用Java,应改用python或Scala一类的语言。如果用Python,代码大概50行左右,但可以想象用Java便看起来复杂得多。看如下:
Ⅵ python apriori算法代码怎么实现
classApriori(object):
def__init__(self,filename,min_support,item_start,item_end):
self.filename=filename
self.min_support=min_support#最小支持度
self.min_confidence=50
self.line_num=0#item的行数
self.item_start=item_start#取哪行的item
self.item_end=item_end
self.location=[[i]foriinrange(self.item_end-self.item_start+1)]
self.support=self.sut(self.location)
self.num=list(sorted(set([jforiinself.locationforjini])))#记录item
self.pre_support=[]#保存前一个support,location,num
self.pre_location=[]
self.pre_num=[]
self.item_name=[]#项目名
self.find_item_name()
self.loop()
self.confidence_sup()
defdeal_line(self,line):
"提取出需要的项"
return[i.strip()foriinline.split('')ifi][self.item_start-1:self.item_end]
deffind_item_name(self):
"根据第一行抽取item_name"
withopen(self.filename,'r')asF:
forindex,lineinenumerate(F.readlines()):
ifindex==0:
self.item_name=self.deal_line(line)
break
defsut(self,location):
"""
输入[[1,2,3],[2,3,4],[1,3,5]...]
输出每个位置集的support[123,435,234...]
"""
withopen(self.filename,'r')asF:
support=[0]*len(location)
forindex,lineinenumerate(F.readlines()):
ifindex==0:continue
#提取每信息
item_line=self.deal_line(line)
forindex_num,iinenumerate(location):
flag=0
forjini:
ifitem_line[j]!='T':
flag=1
break
ifnotflag:
support[index_num]+=1
self.line_num=index#一共多少行,出去第一行的item_name
returnsupport
defselect(self,c):
"返回位置"
stack=[]
foriinself.location:
forjinself.num:
ifjini:
iflen(i)==c:
stack.append(i)
else:
stack.append([j]+i)
#多重列表去重
importitertools
s=sorted([sorted(i)foriinstack])
location=list(sfors,_initertools.groupby(s))
returnlocation
defdel_location(self,support,location):
"清除不满足条件的候选集"
#小于最小支持度的剔除
forindex,iinenumerate(support):
ifi<self.line_num*self.min_support/100:
support[index]=0
#apriori第二条规则,剔除
forindex,jinenumerate(location):
sub_location=[j[:index_loc]+j[index_loc+1:]forindex_locinrange(len(j))]
flag=0
forkinsub_location:
ifknotinself.location:
flag=1
break
ifflag:
support[index]=0
#删除没用的位置
location=[ifori,jinzip(location,support)ifj!=0]
support=[iforiinsupportifi!=0]
returnsupport,location
defloop(self):
"s级频繁项级的迭代"
s=2
whileTrue:
print'-'*80
print'The',s-1,'loop'
print'location',self.location
print'support',self.support
print'num',self.num
print'-'*80
#生成下一级候选集
location=self.select(s)
support=self.sut(location)
support,location=self.del_location(support,location)
num=list(sorted(set([jforiinlocationforjini])))
s+=1
iflocationandsupportandnum:
self.pre_num=self.num
self.pre_location=self.location
self.pre_support=self.support
self.num=num
self.location=location
self.support=support
else:
break
defconfidence_sup(self):
"计算confidence"
ifsum(self.pre_support)==0:
print'min_supporterror'#第一次迭代即失败
else:
forindex_location,each_locationinenumerate(self.location):
del_num=[each_location[:index]+each_location[index+1:]forindexinrange(len(each_location))]#生成上一级频繁项级
del_num=[iforiindel_numifiinself.pre_location]#删除不存在上一级频繁项级子集
del_support=[self.pre_support[self.pre_location.index(i)]foriindel_numifiinself.pre_location]#从上一级支持度查找
#printdel_num
#printself.support[index_location]
#printdel_support
forindex,iinenumerate(del_num):#计算每个关联规则支持度和自信度
index_support=0
iflen(self.support)!=1:
index_support=index
support=float(self.support[index_location])/self.line_num*100#支持度
s=[jforindex_item,jinenumerate(self.item_name)ifindex_itemini]
ifdel_support[index]:
confidence=float(self.support[index_location])/del_support[index]*100
ifconfidence>self.min_confidence:
print','.join(s),'->>',self.item_name[each_location[index]],'min_support:',str(support)+'%','min_confidence:',str(confidence)+'%'
defmain():
c=Apriori('basket.txt',14,3,13)
d=Apriori('simple.txt',50,2,6)
if__name__=='__main__':
main()
Apriori(filename, min_support, item_start, item_end)
参数说明
filename:(路径)文件名
min_support:最小支持度
item_start:item起始位置
item_end:item结束位置
importapriori
c=apriori.Apriori('basket.txt',11,3,13)
输出:
Ⅶ 怎么用java实现apriori算法
作者:何史提
链接:https://www.hu.com/question/22590018/answer/26646688
来源:知乎
着作权归作者所有。商业转载请联系作者获得授权,非商业转载请注明出处。
Apriori算法的理念其实很简单,可是实现起上来却复杂无比,因为当中无可避免用Set和Hash Table等高阶的数据结构,而且有很多loop用以读取数据。
我不建议用Java,应改用Python或Scala一类的语言。如果用Python,代码大概50行左右,但可以想象用Java便看起来复杂得多。看如下:
from operator import and_
from itertools import combinations
class AprioriAssociationRule:
def __init__(self, inputfile):
self.transactions = []
self.itemSet = set([])
inf = open(inputfile, 'rb')
for line in inf.readlines():
elements = set(filter(lambda entry: len(entry)>0, line.strip().split(',')))
if len(elements)>0:
self.transactions.append(elements)
for element in elements:
self.itemSet.add(element)
inf.close()
self.toRetItems = {}
self.associationRules = []
def getSupport(self, itemcomb):
if type(itemcomb) != frozenset:
itemcomb = frozenset([itemcomb])
within_transaction = lambda transaction: rece(and_, [(item in transaction) for item in itemcomb])
count = len(filter(within_transaction, self.transactions))
return float(count)/float(len(self.transactions))
def runApriori(self, minSupport=0.15, minConfidence=0.6):
itemCombSupports = filter(lambda freqpair: freqpair[1]>=minSupport,
map(lambda item: (frozenset([item]), self.getSupport(item)), self.itemSet))
currentLset = set(map(lambda freqpair: freqpair[0], itemCombSupports))
k = 2
while len(currentLset)>0:
currentCset = set([i.union(j) for i in currentLset for j in currentLset if len(i.union(j))==k])
currentItemCombSupports = filter(lambda freqpair: freqpair[1]>=minSupport,
map(lambda item: (item, self.getSupport(item)), currentCset))
currentLset = set(map(lambda freqpair: freqpair[0], currentItemCombSupports))
itemCombSupports.extend(currentItemCombSupports)
k += 1
for key, supportVal in itemCombSupports:
self.toRetItems[key] = supportVal
self.calculateAssociationRules(minConfidence=minConfidence)
def calculateAssociationRules(self, minConfidence=0.6):
for key in self.toRetItems:
subsets = [frozenset(item) for k in range(1, len(key)) for item in combinations(key, k)]
for subset in subsets:
confidence = self.toRetItems[key] / self.toRetItems[subset]
if confidence > minConfidence:
self.associationRules.append([subset, key-subset, confidence])
Ⅷ apriori算法是什么
经典的关联规则挖掘算法包括Apriori算法和FP-growth算法。
apriori算法多次扫描交易数据库,每次利用候选频繁集产生频繁集;而FP-growth则利用树形结构,无需产生候选频繁集而是直接得到频繁集,大大减少扫描交易数据库的次数,从而提高了算法的效率,但是apriori的算法扩展性较好,可以用于并行计算等领域。
(8)apriori算法c代码扩展阅读:
Apriori algorithm是关联规则里一项基本算法
Apriori算法将发现关联规则的过程分:
第一通过迭代,检索出事务数据库1中的所有频繁项集,即支持度不低于用户设定的阈值的项集;
第二利用频繁项集构造出满足用户最小信任度的规则。其中,挖掘或识别出所有频繁项集是该算法的核心,占整个计算量的大部分。
Ⅸ 急求用C实现的Apriori算法的 代码
http://www.csc.liv.ac.uk/~frans/Notes/KDD/AssocRuleMine/apriori.html
.h
====================================
/*----------------------------------------------------------------------
File : apriori.h
Contents: apriori algorithm for finding frequent item sets
(specialized version for FIMI 2003 workshop)
Author : Christian Borgelt
History : 15.08.2003 file created from normal apriori.c
16.08.2003 parameter for transaction filtering added
18.08.2003 dynamic filtering decision based on times added
21.08.2003 transaction sort changed to heapsort
20.09.2003 output file made optional
----------------------------------------------------------------------*/
/*
Modified by : Frédéric Flouvat
Modifications : store the positive and negative border into an
an input trie for ABS
process stastical informations on dataset to stop
the apriori classical iterations
Author : Frédéric Flouvat
----------------------------------------------------------------------*/
#ifndef APRIRORI_H
#define APRIRORI_H
#include <iostream>
using namespace std;
#define MAXIMAL
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <time.h>
#include <assert.h>
#include "tract.h"
#include "istree.h"
#include "Application.h"
/*----------------------------------------------------------------------
Preprocessor Definitions
----------------------------------------------------------------------*/
#define PRGNAME "fim/apriori"
#define DESCRIPTION "frequent item sets miner for FIMI 2003"
#define VERSION "version 1.7 (2003.12.02) " \
"(c) 2003 Christian Borgelt"
/* --- error codes --- */
#define E_OPTION (-5) /* unknown option */
#define E_OPTARG (-6) /* missing option argument */
#define E_ARGCNT (-7) /* too few/many arguments */
#define E_SUPP (-8) /* invalid minimum support */
#define E_NOTAS (-9) /* no items or transactions */
#define E_UNKNOWN (-18) /* unknown error */
#ifndef QUIET /* if not quiet version */
#define MSG(x) x /* print messages */
#else /* if quiet version */
#define MSG(x) /* suppress messages */
#endif
#define SEC_SINCE(t) ((clock()-(t)) /(double)CLOCKS_PER_SEC)
#define RECCNT(s) (tfs_reccnt(is_tfscan(s)) \
+ ((tfs_delim(is_tfscan(s)) == TFS_REC) ? 0 : 1))
#define BUFFER(s) tfs_buf(is_tfscan(s))
/*----------------------------------------------------------------------
Constants
----------------------------------------------------------------------*/
#ifndef QUIET /* if not quiet version */
/* --- error messages --- */
static const char *errmsgs[] = {
/* E_NONE 0 */ "no error\n",
/* E_NOMEM -1 */ "not enough memory\n",
/* E_FOPEN -2 */ "cannot open file %s\n",
/* E_FREAD -3 */ "read error on file %s\n",
/* E_FWRITE -4 */ "write error on file %s\n",
/* E_OPTION -5 */ "unknown option -%c\n",
/* E_OPTARG -6 */ "missing option argument\n",
/* E_ARGCNT -7 */ "wrong number of arguments\n",
/* E_SUPP -8 */ "invalid minimal support %d\n",
/* E_NOTAS -9 */ "no items or transactions to work on\n",
/* -10 to -15 */ NULL, NULL, NULL, NULL, NULL, NULL,
/* E_ITEMEXP -16 */ "file %s, record %d: item expected\n",
/* E_DUPITEM -17 */ "file %s, record %d: plicate item %s\n",
/* E_UNKNOWN -18 */ "unknown error\n"
};
#endif
/*----------------------------------------------------------------------
Global Variables
----------------------------------------------------------------------*/
#ifndef QUIET
static char *prgname; /* program name for error messages */
#endif
static ITEMSET *itemset = NULL; /* item set */
static TASET *taset = NULL; /* transaction set */
static TATREE *tatree = NULL; /* transaction tree */
static ISTREE *istree = NULL; /* item set tree */
static FILE *in = NULL; /* input file */
static FILE *out = NULL; /* output file */
extern "C" TATREE * apriori( char*fn_in, char*fn_out, int supp, int & level,
Trie * bdPapriori, Trie * bdn, set<Element> * relist, double ratioNfC, double & eps, int ismax,
vector< unsigned int > * stat, int & maxBdP, bool & generatedFk, bool verbose ) ;
#endif
.c
============================================
/*----------------------------------------------------------------------
File : apriori.c
Contents: apriori algorithm for finding frequent item sets
(specialized version for FIMI 2003 workshop)
Author : Christian Borgelt
History : 15.08.2003 file created from normal apriori.c
16.08.2003 parameter for transaction filtering added
18.08.2003 dynamic filtering decision based on times added
21.08.2003 transaction sort changed to heapsort
20.09.2003 output file made optional
----------------------------------------------------------------------*/
/*
Modified by : Frédéric Flouvat
Modifications : store the positive and negative border into an
an input trie for ABS
process stastical informations on dataset to stop
the apriori classical iterations
Author : Frédéric Flouvat
----------------------------------------------------------------------*/
#include "apriori.h"
/*----------------------------------------------------------------------
Main Functions
----------------------------------------------------------------------*/
static void error (int code, ...)
{ /* --- print an error message */
#ifndef QUIET /* if not quiet version */
va_list args; /* list of variable arguments */
const char *msg; /* error message */
assert(prgname); /* check the program name */
if (code < E_UNKNOWN) code = E_UNKNOWN;
if (code < 0) { /* if to report an error, */
msg = errmsgs[-code]; /* get the error message */
if (!msg) msg = errmsgs[-E_UNKNOWN];
fprintf(stderr, "\n%s: ", prgname);
va_start(args, code); /* get variable arguments */
vfprintf(stderr, msg, args);/* print error message */
va_end(args); /* end argument evaluation */
}
#endif
#ifndef NDEBUG /* if debug version */
if (istree) ist_delete(istree);
if (tatree) tat_delete(tatree);
if (taset) tas_delete(taset, 0);
if (itemset) is_delete(itemset);
if (in) fclose(in); /* clean up memory */
if (out) fclose(out); /* and close files */
#endif
exit(code); /* abort the program */
} /* error() */
/*--------------------------------------------------------------------*/
TATREE * apriori( char*fn_in, char*fn_out, int supp, int & level, Trie * bdPapriori,
Trie * bdn , set<Element> * relist , double ratioNfC, double & eps,int ismax,
vector< unsigned int > * stat, int & maxBdP, bool & generatedFk, bool verbose )
{
int i, k, n; /* loop variables, counters */
int tacnt = 0; /* number of transactions */
int max = 0; /* maximum transaction size */
int empty = 1; /* number of empty item sets */
int *map, *set; /* identifier map, item set */
char *usage; /* flag vector for item usage */
clock_t t, tt, tc, x; /* timer for measurements */
double actNfC = 1 ;
double avgNfC = 0 ;
int nbgen = 0 ;
int nbfreq = 0 ;
level = 1 ;
bool endApriori = false ; // boolean to stop the initial classial apriori approach
int bdnsize = 0 ; // number of itemsets found infrequent
/* --- create item set and transaction set --- */
itemset = is_create(); /* create an item set and */
if (!itemset) error(E_NOMEM); /* set the special characters */
taset = tas_create(itemset); /* create a transaction set */
if (!taset) error(E_NOMEM); /* to store the transactions */
if( verbose ) MSG(fprintf(stderr, "\n")); /* terminate the startup message */
/* --- read transactions --- */
if( verbose )MSG(fprintf(stderr, "reading %s ... ", fn_in));
t = clock(); /* start the timer and */
in = fopen(fn_in, "r"); /* open the input file */
if (!in) error(E_FOPEN, fn_in);
for (tacnt = 0; 1; tacnt++) { /* transaction read loop */
k = is_read(itemset, in); /* read the next transaction */
if (k < 0) error(k, fn_in, RECCNT(itemset), BUFFER(itemset));
if (k > 0) break; /* check for error and end of file */
k = is_tsize(itemset); /* update the maximal */
if (k > max) max = k; /* transaction size */
if (taset && (tas_add(taset, NULL, 0) != 0))
error(E_NOMEM); /* add the loaded transaction */
} /* to the transaction set */
fclose(in); in = NULL; /* close the input file */
n = is_cnt(itemset); /* get the number of items */
if( verbose ) MSG(fprintf(stderr, "[%d item(s),", n));
if( verbose ) MSG(fprintf(stderr, " %d transaction(s)] done ", tacnt));
if( verbose ) MSG(fprintf(stderr, "[%.2fs].\n", SEC_SINCE(t)));
/* --- sort and recode items --- */
if( verbose ) MSG(fprintf(stderr, "sorting and recoding items ... "));
t = clock(); /* start the timer */
map = (int*)malloc(is_cnt(itemset) *sizeof(int));
if (!map) error(E_NOMEM); /* create an item identifier map */
n = is_recode(itemset, supp, 2, map); /* 2: sorting mode */
tas_recode(taset, map, n); /* recode the loaded transactions */
max = tas_max(taset); /* get the new maximal t.a. size */
// use in the other part of the implementation to have the corresponding
// identifiant to an internal id
stat->reserve( n+2 ) ;
stat->push_back( 0 ) ;
for(int j= 0; j< n ; j++ )
{
stat->push_back( 0 ) ;
relist->insert( Element( atoi( is_name( itemset, j ) ) ,j) );
}
if( verbose ) MSG(fprintf(stderr, "[%d item(s)] ", n));
if( verbose ) MSG(fprintf(stderr, "done [%.2fs].\n", SEC_SINCE(t)));
/* --- create a transaction tree --- */
if( verbose ) MSG(fprintf(stderr, "creating transaction tree ... "));
t = clock(); /* start the timer */
tatree = tat_create(taset,1); /* create a transaction tree */
if (!tatree) error(E_NOMEM); /* (compactify transactions) */
tt = clock() -t; /* note the construction time */
if( verbose ) MSG(fprintf(stderr, "done [%.2fs].\n", SEC_SINCE(t)));
/* --- create an item set tree --- */
if( verbose ) MSG(fprintf(stderr, "checking subsets of size 1"));
t = clock(); tc = 0; /* start the timer and */
istree = ist_create(n, supp); /* create an item set tree */
if (!istree) error(E_NOMEM);
for (k = n; --k >= 0; ) /* set single item frequencies */
ist_setcnt(istree, k, is_getfrq(itemset, k));
ist_settac(istree, tacnt); /* set the number of transactions */
usage = (char*)malloc(n *sizeof(char));
if (!usage) error(E_NOMEM); /* create a item usage vector */
/* --- check item subsets --- */
while (ist_height(istree) < max && ( ( ismax == -1 && endApriori == false )
|| ist_height(istree) < ismax )
)
{
nbgen = 0 ;
nbfreq = 0 ;
level ++ ;
i = ist_check(istree,usage);/* check current item usage */
if (i < max) max = i; /* update the maximum set size */
if (ist_height(istree) >= i) break;
k = ist_addlvl(istree, nbgen); /* while max. height is not reached, */
if (k < 0) error(E_NOMEM); /* add a level to the item set tree */
if (k != 0) break; /* if no level was added, abort */
if( verbose ) MSG(fprintf(stderr, " %d", ist_height(istree)));
if ((i < n) /* check item usage on current level */
&& (i *(double)tt < 0.1 *n *tc)) {
n = i; x = clock(); /* if items were removed and */
tas_filter(taset, usage); /* the counting time is long enough, */
tat_delete(tatree); /* remove unnecessary items */
tatree = tat_create(taset, 1);
if (!tatree) error(E_NOMEM);
tt = clock() -x; /* rebuild the transaction tree and */
} /* note the new construction time */
x = clock(); /* start the timer */
ist_countx(istree, tatree, nbfreq, istree->supp ); /* count the transaction tree */
tc = clock() -x; /* in the item set tree */
actNfC = 1-double(nbfreq)/double(nbgen) ;
avgNfC = avgNfC + actNfC ;
if( verbose )
{
cout<<" \t Fk : "<<nbfreq<<" Ck : "<<nbgen<<" NFk/Ck "<<actNfC<<" avg NFk/Ck "<<avgNfC/(level-1)<<endl;
}
bdnsize += nbgen - nbfreq ;
if( level >=4 && ( bdnsize / nbgen < 1.5 ) && ( bdnsize > 100 ) )
{
if( actNfC < ratioNfC )
{
eps = 0 ;
endApriori = true ;
}
else if( actNfC > 0.25 )
endApriori = true ;
}
} /* and note the new counting time */
if( verbose ) MSG(fprintf(stderr, " done [%.2fs].\n", SEC_SINCE(t)));
/* --- filter item sets --- */
t = clock(); /* start the timer */
#ifdef MAXIMAL /* filter maximal item sets */
if( verbose ) MSG(fprintf(stderr, "filtering maximal item sets ... "));
if( ratioNfC == 0 || nbgen < k+1 || ist_height(istree)>= max )
ist_filter2(istree, IST_MAXFRQ, 0);
else
ist_filter2(istree, IST_MAXFRQ, bdn);
if( verbose ) MSG(fprintf(stderr, " done [%.2fs].\n", SEC_SINCE(t)));
empty = (n <= 0) ? 1 : 0; /* check whether the empty item set */
#endif /* is maximal */
#ifdef CLOSED /* filter closed item sets */
if( verbose ) MSG(fprintf(stderr, "filtering closed item sets ... "));
ist_filter(istree, IST_CLOSED);
if( verbose ) MSG(fprintf(stderr, " done [%.2fs].\n", SEC_SINCE(t)));
for (k = n; --k >= 0; ) /* check for an item in all t.a. */
if (is_getfrq(itemset, k) == tacnt) break;
empty = (k <= 0) ? 1 : 0; /* check whether the empty item set */
#endif /* is closed */
/* --- print item sets --- */
for (i = ist_height(istree); --i >= 0; )
map[i] = 0; /* clear the item set counters */
if( verbose ) MSG(fprintf(stderr, "writing %s ... ", (fn_out) ? fn_out : "<none>"));
t = clock(); /* start the timer and */
if (fn_out) { /* if an output file is given, */
out = fopen(fn_out, "w"); /* open the output file */
if (!out) error(E_FOPEN, fn_out);
if (empty) fprintf(out, " (%d)\n", tacnt);
} /* report empty item set */
ist_init(istree); /* init. the item set extraction */
set = is_tract(itemset); /* get the transaction buffer */
for (n = empty; 1; n++) { /* extract item sets from the tree */
k = ist_set(istree, set, &supp);
if (k <= 0) break; /* get the next frequent item set */
map[k-1]++; /* count the item set */
if (fn_out) { /* if an output file is given */
for (i = 0; i < k; i++) { /* traverse the items */
fputs(is_name(itemset, set[i]), out);
fputc(' ', out); /* print the name of the next item */
} /* followed by a separator */
fprintf(out, "(%d)\n", supp);
} /* print the item set's support */
else
{
short unsigned * is = new short unsigned[k] ;
for (i = 0; i < k; i++) /* traverse the items */
{
is[i] = set[i] ;
}
if( k < level || nbgen < k+1 || ist_height(istree)>= max )
{
bdPapriori->insert(is, k ,supp ) ;
(*stat)[ 0 ] ++;
(*stat)[ k+1 ]++;
if( maxBdP < k )
maxBdP = k ;
}
else
{
generatedFk = true ;
}
delete[] is;
}
}
if (fn_out) { /* if an output file is given */
if (fflush(out) != 0) error(E_FWRITE, fn_out);
if (out != stdout) fclose(out);
out = NULL; /* close the output file */
}
if( verbose ) MSG(fprintf(stderr, "[%d set(s)] done ", n));
if( verbose ) MSG(fprintf(stderr, "[%.2fs].\n", SEC_SINCE(t)));
/* --- print item set statistics --- */
k = ist_height(istree); /* find last nonzero counter */
if ((k > 0) && (map[k-1] <= 0)) k--;
if( verbose ){
printf("%d\n", empty); /* print the numbers of item sets */
for (i = 0; i < k; i++) printf("%d\n", map[i]);
}
/* --- clean up --- */
#ifndef NDEBUG /* if this is a debug version */
free(usage); /* delete the item usage vector */
free(map); /* and the identifier map */
ist_delete(istree); /* delete the item set tree, */
if (taset) tas_delete(taset, 0); /* the transaction set, */
is_delete(itemset); /* and the item set */
#endif
return tatree ;
}