導航:首頁 > 源碼編譯 > apriori演算法c代碼

apriori演算法c代碼

發布時間:2023-01-11 04:13:28

Ⅰ 關於數據挖掘中的apriori演算法,幫忙推出關聯規則 事務數為 5 支持度為0.6,置信度為0.6

abc的支持數P1=3,acd的支持數P2=3,bcd的支持數P3=3,關聯規則的輸出就是在由頻繁項集的項組成的關聯規則中,找出置信度大於等於最小置信度閾值的關聯規則。因為由頻繁項集的項組成的關聯規則的支持度大於等於最小支持閾值,所以規則產生過程就是在由頻繁項集的項組成的關聯規則中,找出置信度大於等於最小置信度閾值的強關聯規則,基本步驟如下:
1) 對於每個頻繁項集L,產生L的所有非空真子集。
2) 對於L的每個非空真子集 ,如果L的支持計數除以 的支持計數大於等於最小置信度閾值min_conf,則輸出強關聯規則 =>(L- )。
例如:abc的非空真子集有a,b,c,ab,ac,bc。分別算出他們的支持數,再除以abc的支持數,若結果大於0.6則可輸出強關聯規則。Pa/P1=4/3>0.6,則可以輸出關聯規則:a=>bc;同理可輸出強關聯規則:b=>ac,c=>ab;
同理,對於acd,bcd採用同樣地方法。

Ⅱ 急需C++實現的Apriori演算法代碼

用C++ 實現的 可以 到http://download.csdn.net/down/188143/chanjuanzz下載 不過要注冊扣積分的

演算法實現

(一)核心類

Apriori演算法的核心實現類為AprioriAlgorithm,實現的java代碼如下所示:

package org.shirdrn.datamining.association;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;

/**
* <B>關聯規則挖掘:Apriori演算法</B>
*
* <P>該演算法基本上按照Apriori演算法的基本思想來實現的。
*
* @author shirdrn
* @date 2009/07/22 22:56:23
* @msn shirdrn#hotmail.com(#→@)
* @qq 187071722
*/
public class AprioriAlgorithm {

private Map<Integer, Set<String>> txDatabase; // 事務資料庫
private Float minSup; // 最小支持度
private Float minConf; // 最小置信度
private Integer txDatabaseCount; // 事務資料庫中的事務數

private Map<Integer, Set<Set<String>>> freqItemSet; // 頻繁項集集合
private Map<Set<String>, Set<Set<String>>> assiciationRules; // 頻繁關聯規則集合

public AprioriAlgorithm(
Map<Integer, Set<String>> txDatabase,
Float minSup,
Float minConf) {
this.txDatabase = txDatabase;
this.minSup = minSup;
this.minConf = minConf;
this.txDatabaseCount = this.txDatabase.size();
freqItemSet = new TreeMap<Integer, Set<Set<String>>>();
assiciationRules = new HashMap<Set<String>, Set<Set<String>>>();
}

/**
* 掃描事務資料庫,計算頻繁1-項集
* @return
*/
public Map<Set<String>, Float> getFreq1ItemSet() {
Map<Set<String>, Float> freq1ItemSetMap = new HashMap<Set<String>, Float>();
Map<Set<String>, Integer> candFreq1ItemSet = this.getCandFreq1ItemSet();
Iterator<Map.Entry<Set<String>, Integer>> it = candFreq1ItemSet.entrySet().iterator();
while(it.hasNext()) {
Map.Entry<Set<String>, Integer> entry = it.next();
// 計算支持度
Float supported = new Float(entry.getValue().toString())/new Float(txDatabaseCount);
if(supported>=minSup) {
freq1ItemSetMap.put(entry.getKey(), supported);
}
}
return freq1ItemSetMap;
}

/**
* 計算候選頻繁1-項集
* @return
*/
public Map<Set<String>, Integer> getCandFreq1ItemSet() {
Map<Set<String>, Integer> candFreq1ItemSetMap = new HashMap<Set<String>, Integer>();
Iterator<Map.Entry<Integer, Set<String>>> it = txDatabase.entrySet().iterator();
// 統計支持數,生成候選頻繁1-項集
while(it.hasNext()) {
Map.Entry<Integer, Set<String>> entry = it.next();
Set<String> itemSet = entry.getValue();
for(String item : itemSet) {
Set<String> key = new HashSet<String>();
key.add(item.trim());
if(!candFreq1ItemSetMap.containsKey(key)) {
Integer value = 1;
candFreq1ItemSetMap.put(key, value);
}
else {
Integer value = 1+candFreq1ItemSetMap.get(key);
candFreq1ItemSetMap.put(key, value);
}
}
}
return candFreq1ItemSetMap;
}

/**
* 根據頻繁(k-1)-項集計算候選頻繁k-項集
*
* @param m 其中m=k-1
* @param freqMItemSet 頻繁(k-1)-項集
* @return
*/
public Set<Set<String>> aprioriGen(int m, Set<Set<String>> freqMItemSet) {
Set<Set<String>> candFreqKItemSet = new HashSet<Set<String>>();
Iterator<Set<String>> it = freqMItemSet.iterator();
Set<String> originalItemSet = null;
while(it.hasNext()) {
originalItemSet = it.next();
Iterator<Set<String>> itr = this.getIterator(originalItemSet, freqMItemSet);
while(itr.hasNext()) {
Set<String> identicalSet = new HashSet<String>(); // 兩個項集相同元素的集合(集合的交運算)
identicalSet.addAll(originalItemSet);
Set<String> set = itr.next();
identicalSet.retainAll(set); // identicalSet中剩下的元素是identicalSet與set集合中公有的元素
if(identicalSet.size() == m-1) { // (k-1)-項集中k-2個相同
Set<String> differentSet = new HashSet<String>(); // 兩個項集不同元素的集合(集合的差運算)
differentSet.addAll(originalItemSet);
differentSet.removeAll(set); // 因為有k-2個相同,則differentSet中一定剩下一個元素,即differentSet大小為1
differentSet.addAll(set); // 構造候選k-項集的一個元素(set大小為k-1,differentSet大小為k)
candFreqKItemSet.add(differentSet); // 加入候選k-項集集合
}
}
}
return candFreqKItemSet;
}

/**
* 根據一個頻繁k-項集的元素(集合),獲取到頻繁k-項集的從該元素開始的迭代器實例
* @param itemSet
* @param freqKItemSet 頻繁k-項集
* @return
*/
private Iterator<Set<String>> getIterator(Set<String> itemSet, Set<Set<String>> freqKItemSet) {
Iterator<Set<String>> it = freqKItemSet.iterator();
while(it.hasNext()) {
if(itemSet.equals(it.next())) {
break;
}
}
return it;
}

/**
* 根據頻繁(k-1)-項集,調用aprioriGen方法,計算頻繁k-項集
*
* @param k
* @param freqMItemSet 頻繁(k-1)-項集
* @return
*/
public Map<Set<String>, Float> getFreqKItemSet(int k, Set<Set<String>> freqMItemSet) {
Map<Set<String>, Integer> candFreqKItemSetMap = new HashMap<Set<String>, Integer>();
// 調用aprioriGen方法,得到候選頻繁k-項集
Set<Set<String>> candFreqKItemSet = this.aprioriGen(k-1, freqMItemSet);

// 掃描事務資料庫
Iterator<Map.Entry<Integer, Set<String>>> it = txDatabase.entrySet().iterator();
// 統計支持數
while(it.hasNext()) {
Map.Entry<Integer, Set<String>> entry = it.next();
Iterator<Set<String>> kit = candFreqKItemSet.iterator();
while(kit.hasNext()) {
Set<String> kSet = kit.next();
Set<String> set = new HashSet<String>();
set.addAll(kSet);
set.removeAll(entry.getValue()); // 候選頻繁k-項集與事務資料庫中元素做差元算
if(set.isEmpty()) { // 如果拷貝set為空,支持數加1
if(candFreqKItemSetMap.get(kSet) == null) {
Integer value = 1;
candFreqKItemSetMap.put(kSet, value);
}
else {
Integer value = 1+candFreqKItemSetMap.get(kSet);
candFreqKItemSetMap.put(kSet, value);
}
}
}
}
// 計算支持度,生成頻繁k-項集,並返回
return support(candFreqKItemSetMap);
}

/**
* 根據候選頻繁k-項集,得到頻繁k-項集
*
* @param candFreqKItemSetMap 候選k項集(包含支持計數)
*/
public Map<Set<String>, Float> support(Map<Set<String>, Integer> candFreqKItemSetMap) {
Map<Set<String>, Float> freqKItemSetMap = new HashMap<Set<String>, Float>();
Iterator<Map.Entry<Set<String>, Integer>> it = candFreqKItemSetMap.entrySet().iterator();
while(it.hasNext()) {
Map.Entry<Set<String>, Integer> entry = it.next();
// 計算支持度
Float supportRate = new Float(entry.getValue().toString())/new Float(txDatabaseCount);
if(supportRate<minSup) { // 如果不滿足最小支持度,刪除
it.remove();
}
else {
freqKItemSetMap.put(entry.getKey(), supportRate);
}
}
return freqKItemSetMap;
}

/**
* 挖掘全部頻繁項集
*/
public void mineFreqItemSet() {
// 計算頻繁1-項集
Set<Set<String>> freqKItemSet = this.getFreq1ItemSet().keySet();
freqItemSet.put(1, freqKItemSet);
// 計算頻繁k-項集(k>1)
int k = 2;
while(true) {
Map<Set<String>, Float> freqKItemSetMap = this.getFreqKItemSet(k, freqKItemSet);
if(!freqKItemSetMap.isEmpty()) {
this.freqItemSet.put(k, freqKItemSetMap.keySet());
freqKItemSet = freqKItemSetMap.keySet();
}
else {
break;
}
k++;
}
}

/**
* <P>挖掘頻繁關聯規則
* <P>首先挖掘出全部的頻繁項集,在此基礎上挖掘頻繁關聯規則
*/
public void mineAssociationRules() {
freqItemSet.remove(1); // 刪除頻繁1-項集
Iterator<Map.Entry<Integer, Set<Set<String>>>> it = freqItemSet.entrySet().iterator();
while(it.hasNext()) {
Map.Entry<Integer, Set<Set<String>>> entry = it.next();
for(Set<String> itemSet : entry.getValue()) {
// 對每個頻繁項集進行關聯規則的挖掘
mine(itemSet);
}
}
}

/**
* 對從頻繁項集集合freqItemSet中每迭代出一個頻繁項集元素,執行一次關聯規則的挖掘
* @param itemSet 頻繁項集集合freqItemSet中的一個頻繁項集元素
*/
public void mine(Set<String> itemSet) {
int n = itemSet.size()/2; // 根據集合的對稱性,只需要得到一半的真子集
for(int i=1; i<=n; i++) {
// 得到頻繁項集元素itemSet的作為條件的真子集集合
Set<Set<String>> properSubset = ProperSubsetCombination.getProperSubset(i, itemSet);
// 對條件的真子集集合中的每個條件項集,獲取到對應的結論項集,從而進一步挖掘頻繁關聯規則
for(Set<String> conditionSet : properSubset) {
Set<String> conclusionSet = new HashSet<String>();
conclusionSet.addAll(itemSet);
conclusionSet.removeAll(conditionSet); // 刪除條件中存在的頻繁項
confide(conditionSet, conclusionSet); // 調用計算置信度的方法,並且挖掘出頻繁關聯規則
}
}
}

/**
* 對得到的一個條件項集和對應的結論項集,計算該關聯規則的支持計數,從而根據置信度判斷是否是頻繁關聯規則
* @param conditionSet 條件頻繁項集
* @param conclusionSet 結論頻繁項集
*/
public void confide(Set<String> conditionSet, Set<String> conclusionSet) {
// 掃描事務資料庫
Iterator<Map.Entry<Integer, Set<String>>> it = txDatabase.entrySet().iterator();
// 統計關聯規則支持計數
int conditionToConclusionCnt = 0; // 關聯規則(條件項集推出結論項集)計數
int conclusionToConditionCnt = 0; // 關聯規則(結論項集推出條件項集)計數
int supCnt = 0; // 關聯規則支持計數
while(it.hasNext()) {
Map.Entry<Integer, Set<String>> entry = it.next();
Set<String> txSet = entry.getValue();
Set<String> set1 = new HashSet<String>();
Set<String> set2 = new HashSet<String>();
set1.addAll(conditionSet);

set1.removeAll(txSet); // 集合差運算:set-txSet
if(set1.isEmpty()) { // 如果set為空,說明事務資料庫中包含條件頻繁項conditionSet
// 計數
conditionToConclusionCnt++;
}
set2.addAll(conclusionSet);
set2.removeAll(txSet); // 集合差運算:set-txSet
if(set2.isEmpty()) { // 如果set為空,說明事務資料庫中包含結論頻繁項conclusionSet
// 計數
conclusionToConditionCnt++;

}
if(set1.isEmpty() && set2.isEmpty()) {
supCnt++;
}
}
// 計算置信度
Float conditionToConclusionConf = new Float(supCnt)/new Float(conditionToConclusionCnt);
if(conditionToConclusionConf>=minConf) {
if(assiciationRules.get(conditionSet) == null) { // 如果不存在以該條件頻繁項集為條件的關聯規則
Set<Set<String>> conclusionSetSet = new HashSet<Set<String>>();
conclusionSetSet.add(conclusionSet);
assiciationRules.put(conditionSet, conclusionSetSet);
}
else {
assiciationRules.get(conditionSet).add(conclusionSet);
}
}
Float conclusionToConditionConf = new Float(supCnt)/new Float(conclusionToConditionCnt);
if(conclusionToConditionConf>=minConf) {
if(assiciationRules.get(conclusionSet) == null) { // 如果不存在以該結論頻繁項集為條件的關聯規則
Set<Set<String>> conclusionSetSet = new HashSet<Set<String>>();
conclusionSetSet.add(conditionSet);
assiciationRules.put(conclusionSet, conclusionSetSet);
}
else {
assiciationRules.get(conclusionSet).add(conditionSet);
}
}
}

/**
* 經過挖掘得到的頻繁項集Map
*
* @return 挖掘得到的頻繁項集集合
*/
public Map<Integer, Set<Set<String>>> getFreqItemSet() {
return freqItemSet;
}

/**
* 獲取挖掘到的全部的頻繁關聯規則的集合
* @return 頻繁關聯規則集合
*/
public Map<Set<String>, Set<Set<String>>> getAssiciationRules() {
return assiciationRules;
}
}

(二)輔助類

ProperSubsetCombination類是一個輔助類,在挖掘頻繁關聯規則的過程中,用於生成一個頻繁項集元素的非空真子集,實現代碼如下:

package org.shirdrn.datamining.association;
import java.util.BitSet;
import java.util.HashSet;
import java.util.Set;

/**
* <B>求頻繁項集元素(集合)的非空真子集集合</B>
* <P>從一個集合(大小為n)中取出m(m屬於2~n/2的閉區間)個元素的組合實現類,獲取非空真子集的集合
*
* @author shirdrn
* @date 2009/07/22 22:56:23
* @msn shirdrn#hotmail.com(#→@)
* @qq 187071722
*/
public class ProperSubsetCombination {

private static String[] array;
private static BitSet startBitSet; // 比特集合起始狀態
private static BitSet endBitSet; // 比特集合終止狀態,用來控制循環
private static Set<Set<String>> properSubset; // 真子集集合

/**
* 計算得到一個集合的非空真子集集合
*
* @param n 真子集的大小
* @param itemSet 一個頻繁項集元素
* @return 非空真子集集合
*/
public static Set<Set<String>> getProperSubset(int n, Set<String> itemSet) {
String[] array = new String[itemSet.size()];
ProperSubsetCombination.array = itemSet.toArray(array);
properSubset = new HashSet<Set<String>>();
startBitSet = new BitSet();
endBitSet = new BitSet();

// 初始化startBitSet,左側占滿1
for (int i=0; i<n; i++) {
startBitSet.set(i, true);
}

// 初始化endBit,右側占滿1
for (int i=array.length-1; i>=array.length-n; i--) {
endBitSet.set(i, true);
}

// 根據起始startBitSet,將一個組合加入到真子集集合中
get(startBitSet);

while(!startBitSet.equals(endBitSet)) {
int zeroCount = 0; // 統計遇到10後,左邊0的個數
int oneCount = 0; // 統計遇到10後,左邊1的個數
int pos = 0; // 記錄當前遇到10的索引位置

// 遍歷startBitSet來確定10出現的位置
for (int i=0; i<array.length; i++) {
if (!startBitSet.get(i)) {
zeroCount++;
}
if (startBitSet.get(i) && !startBitSet.get(i+1)) {
pos = i;
oneCount = i - zeroCount;
// 將10變為01
startBitSet.set(i, false);
startBitSet.set(i+1, true);
break;
}
}
// 將遇到10後,左側的1全部移動到最左側
int counter = Math.min(zeroCount, oneCount);
int startIndex = 0;
int endIndex = 0;
if(pos>1 && counter>0) {
pos--;
endIndex = pos;
for (int i=0; i<counter; i++) {
startBitSet.set(startIndex, true);
startBitSet.set(endIndex, false);
startIndex = i+1;
pos--;
if(pos>0) {
endIndex = pos;
}
}
}
get(startBitSet);
}
return properSubset;
}

/**
* 根據一次移位操作得到的startBitSet,得到一個真子集
* @param bitSet
*/
private static void get(BitSet bitSet) {
Set<String> set = new HashSet<String>();
for(int i=0; i<array.length; i++) {
if(bitSet.get(i)) {
set.add(array[i]);
}
}
properSubset.add(set);
}
}

測試用例

對上述Apriori演算法的實現進行了簡單的測試,測試用例如下所示:

package org.shirdrn.datamining.association;

import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;

import org.shirdrn.datamining.association.AprioriAlgorithm;

import junit.framework.TestCase;

/**
* <B>Apriori演算法測試類</B>
*
* @author shirdrn
* @date 2009/07/22 22:56:23
* @msn shirdrn#hotmail.com(#→@)
* @qq 187071722
*/
public class TestAprioriAlgorithm extends TestCase {

private AprioriAlgorithm apriori;
private Map<Integer, Set<String>> txDatabase;
private Float minSup = new Float("0.50");
private Float minConf = new Float("0.70");

@Override
protected void setUp() throws Exception {
create(); // 構造事務資料庫
apriori = new AprioriAlgorithm(txDatabase, minSup, minConf);
}

/**
* 構造模擬事務資料庫txDatabase
*/
public void create() {
txDatabase = new HashMap<Integer, Set<String>>();
Set<String> set1 = new TreeSet<String>();
set1.add("A");
set1.add("B");
set1.add("C");
set1.add("E");
txDatabase.put(1, set1);
Set<String> set2 = new TreeSet<String>();
set2.add("A");
set2.add("B");
set2.add("C");
txDatabase.put(2, set2);
Set<String> set3 = new TreeSet<String>();
set3.add("C");
set3.add("D");
txDatabase.put(3, set3);
Set<String> set4 = new TreeSet<String>();
set4.add("A");
set4.add("B");
set4.add("E");
txDatabase.put(4, set4);
}

/**
* 測試挖掘頻繁1-項集
*/
public void testFreq1ItemSet() {
System.out.println("挖掘頻繁1-項集 : " + apriori.getFreq1ItemSet());
}

/**
* 測試aprioriGen方法,生成候選頻繁項集
*/
public void testAprioriGen() {
System.out.println(
"候選頻繁2-項集 : " +
this.apriori.aprioriGen(1, this.apriori.getFreq1ItemSet().keySet())
);
}

/**
* 測試挖掘頻繁2-項集
*/
public void testGetFreq2ItemSet() {
System.out.println(
"挖掘頻繁2-項集 :" +
this.apriori.getFreqKItemSet(2, this.apriori.getFreq1ItemSet().keySet())
);
}

/**
* 測試挖掘頻繁3-項集
*/
public void testGetFreq3ItemSet() {
System.out.println(
"挖掘頻繁3-項集 :" +
this.apriori.getFreqKItemSet(
3,
this.apriori.getFreqKItemSet(2, this.apriori.getFreq1ItemSet().keySet()).keySet()
)
);
}

/**
* 測試挖掘全部頻繁項集
*/
public void testGetFreqItemSet() {
this.apriori.mineFreqItemSet(); // 挖掘頻繁項集
System.out.println("挖掘頻繁項集 :" + this.apriori.getFreqItemSet());
}

/**
* 測試挖掘全部頻繁關聯規則
*/
public void testMineAssociationRules() {
this.apriori.mineFreqItemSet(); // 挖掘頻繁項集
this.apriori.mineAssociationRules();
System.out.println("挖掘頻繁關聯規則 :" + this.apriori.getAssiciationRules());
}
}

測試結果:

挖掘頻繁1-項集 : {[E]=0.5, [A]=0.75, [B]=0.75, [C]=0.75}
候選頻繁2-項集 : [[E, C], [A, B], [B, C], [A, C], [E, B], [E, A]]
挖掘頻繁2-項集 :{[A, B]=0.75, [B, C]=0.5, [A, C]=0.5, [E, B]=0.5, [E, A]=0.5}
挖掘頻繁3-項集 :{[E, A, B]=0.5, [A, B, C]=0.5}
挖掘頻繁項集 :{1=[[E], [A], [B], [C]], 2=[[A, B], [B, C], [A, C], [E, B], [E, A]], 3=[[E, A, B], [A, B, C]]}
挖掘頻繁關聯規則 :{[E]=[[A], [B], [A, B]], [A]=[[B]], [B]=[[A]], [B, C]=[[A]], [A, C]=[[B]], [E, B]=[[A]], [E, A]=[[B]]}

從測試結果看到,使用Apriori演算法挖掘得到的全部頻繁項集為:

{1=[[E], [A], [B], [C]], 2=[[A, B], [B, C], [A, C], [E, B], [E, A]], 3=[[E, A, B], [A, B, C]]}

使用Apriori演算法挖掘得到的全部頻繁關聯規則為:

{E}→{A}、{E}→{B}、{E}→{A,B}、{A}→{B}、{B}→{A}、{B,C}→{A}、{A,C}→{B}、{B,E}→{A}、{A,E}→{B}。

Ⅲ 用Matlab實現apriori演算法關聯規則的挖掘程序,完整有詳細註解

下面這段是apriori演算法中由2頻繁項集找k頻繁項集的程序,程序中有兩個問題:
1、似乎while循環的K永遠都是固定的,也就是都是頻繁2項集的個數。得到頻繁3項集後K的個數不是要變嗎?如何體現呢?
2、程序中有兩個for的大循環,但是發現結果是只要找到一個頻繁3項集第二個for循環就會結束,但是其實還應該有其它的頻繁3項集。for循環不是應該無條件執行到參數k結束嗎?當時k值是15,可是程序結束的時候i=2,j=3,然後j就不執行4以及一直到k的部分了。是什麼原因呢?麻煩高手指點一下。急啊……
while( k>0)
le=length(candidate{1});
num=2;
nl=0;
for i=1:k-1
for j=i+1:k
x1=candidate{i}; %candidate初始值為頻繁2項集,這個表示頻繁項集的第i項
x2=candidate{j};
c = intersect(x1, x2);
M=0;
r=1;
nn=0;
l1=0;
if (length(c)==le-1) & (sum(c==x1(1:le-1))==le-1)
houxuan=union(x1(1:le),x2(le));
%樹剪枝,若一個候選項的某個K-1項子集為非頻繁,則剪枝掉
sub_set=subset(houxuan);
%生成該候選項的所有K-1項子集
NN=length(sub_set);
%判斷這些K-1項自己是否都為頻繁的
while(r & M<NN)
M=M+1;
r=in(sub_set{M},candidate);
end
if M==NN
nl=nl+1;
%候選k項集
cand{nl}=houxuan;
%記錄每個候選k項集出現的次數
le=length(cand{1});
for i=1:m
s=cand{nl};
x=X(i,:);
if sum(x(s))==le
nn=nn+1;
end
end
end
end
%從候選集中找頻繁項集
if nn>=th
ll=ll+1;
candmid{nl}=cand{nl};
pfxj(nl).element=cand{nl};
pfxj(nl).time=nn;
disp('得到的頻繁項集為:')
result=(candmid{nl});
disp(result);
end

end
end
end

Ⅳ apriori演算法

Apriori演算法是第一個關聯規則挖掘演算法,也是最經典的演算法。它利用逐層搜索的迭代方法找出資料庫中項集的關系,以形成規則,其過程由連接(類矩陣運算)與剪枝(去掉那些沒必要的中間結果)組成。

Ⅳ 怎麼用java實現apriori演算法

fromoperatorimportand_:
def__init__(self,inputfile):
self.transactions=[]
self.itemSet=set([])
inf=open(inputfile,'rb')
forlineininf.readlines():
elements=set(filter(lambdaentry:len(entry)>0,line.strip().split(',')))
iflen(elements)>0:
self.transactions.append(elements)
forelementinelements:
self.itemSet.add(element)
inf.close()
self.toRetItems={}
self.associationRules=[]

defgetSupport(self,itemcomb):
iftype(itemcomb)!=frozenset:
itemcomb=frozenset([itemcomb])
within_transaction=lambdatransaction:rece(and_,[(itemintransaction)foriteminitemcomb])
count=len(filter(within_transaction,self.transactions))
returnfloat(count)/float(len(self.transactions))

defrunApriori(self,minSupport=0.15,minConfidence=0.6):
itemCombSupports=filter(lambdafreqpair:freqpair[1]>=minSupport,
map(lambdaitem:(frozenset([item]),self.getSupport(item)),self.itemSet))
currentLset=set(map(lambdafreqpair:freqpair[0],itemCombSupports))
k=2
whilelen(currentLset)>0:
currentCset=set([i.union(j)(i.union(j))==k])
currentItemCombSupports=filter(lambdafreqpair:freqpair[1]>=minSupport,
map(lambdaitem:(item,self.getSupport(item)),currentCset))
currentLset=set(map(lambdafreqpair:freqpair[0],currentItemCombSupports))
itemCombSupports.extend(currentItemCombSupports)
k+=1
forkey,supportValinitemCombSupports:
self.toRetItems[key]=supportVal
self.calculateAssociationRules(minConfidence=minConfidence)

defcalculateAssociationRules(self,minConfidence=0.6):
forkeyinself.toRetItems:
subsets=[frozenset(item)forkinrange(1,len(key))foritemincombinations(key,k)]
forsubsetinsubsets:
confidence=self.toRetItems[key]/self.toRetItems[subset]
ifconfidence>minConfidence:
self.associationRules.append([subset,key-subset,confidence])
用Scala也大概六十多行:
importscala.io.Sourceimportscala.collection.immutable.Listimportscala.collection.immutable.Setimportjava.io.Fileimportscala.collection.mutable.MapclassAprioriAlgorithm(inputFile:File){
vartransactions:List[Set[String]]=List()
varitemSet:Set[String]=Set()
for(line<-Source.fromFile(inputFile).getLines()){
valelementSet=line.trim.split(',').toSet
if(elementSet.size>0){
transactions=transactions:+elementSet
itemSet=itemSet++elementSet
}
}
vartoRetItems:Map[Set[String],Double]=Map()
varassociationRules:List[(Set[String],Set[String],Double)]=List()

defgetSupport(itemComb:Set[String]):Double={
defwithinTransaction(transaction:Set[String]):Boolean=itemComb
.map(x=>transaction.contains(x))
.receRight((x1,x2)=>x1&&x2)
valcount=transactions.filter(withinTransaction).size
count.toDouble/transactions.size.toDouble
}

defrunApriori(minSupport:Double=0.15,minConfidence:Double=0.6)={
varitemCombs=itemSet.map(word=>(Set(word),getSupport(Set(word))))
.filter(wordSupportPair=>(wordSupportPair._2>minSupport))
varcurrentLSet:Set[Set[String]]=itemCombs.map(wordSupportPair=>wordSupportPair._1).toSet
vark:Int=2
while(currentLSet.size>0){
valcurrentCSet:Set[Set[String]]=currentLSet.map(wordSet=>currentLSet.map(wordSet1=>wordSet|wordSet1))
.receRight((set1,set2)=>set1|set2)
.filter(wordSet=>(wordSet.size==k))
valcurrentItemCombs=currentCSet.map(wordSet=>(wordSet,getSupport(wordSet)))
.filter(wordSupportPair=>(wordSupportPair._2>minSupport))
currentLSet=currentItemCombs.map(wordSupportPair=>wordSupportPair._1).toSet
itemCombs=itemCombs|currentItemCombs
k+=1
}
for(itemComb<-itemCombs){
toRetItems+=(itemComb._1->itemComb._2)
}
calculateAssociationRule(minConfidence)
}

defcalculateAssociationRule(minConfidence:Double=0.6)={
toRetItems.keys.foreach(item=>
item.subsets.filter(wordSet=>(wordSet.size<item.size&wordSet.size>0))
.foreach(subset=>{associationRules=associationRules:+(subset,itemdiffsubset,
toRetItems(item).toDouble/toRetItems(subset).toDouble)
}
)
)
associationRules=associationRules.filter(rule=>rule._3>minConfidence)
}}

我不建議用Java,應改用python或Scala一類的語言。如果用Python,代碼大概50行左右,但可以想像用Java便看起來復雜得多。看如下:

Ⅵ python apriori演算法代碼怎麼實現

classApriori(object):
def__init__(self,filename,min_support,item_start,item_end):
self.filename=filename
self.min_support=min_support#最小支持度
self.min_confidence=50
self.line_num=0#item的行數
self.item_start=item_start#取哪行的item
self.item_end=item_end
self.location=[[i]foriinrange(self.item_end-self.item_start+1)]
self.support=self.sut(self.location)
self.num=list(sorted(set([jforiinself.locationforjini])))#記錄item
self.pre_support=[]#保存前一個support,location,num
self.pre_location=[]
self.pre_num=[]
self.item_name=[]#項目名
self.find_item_name()
self.loop()
self.confidence_sup()
defdeal_line(self,line):
"提取出需要的項"
return[i.strip()foriinline.split('')ifi][self.item_start-1:self.item_end]
deffind_item_name(self):
"根據第一行抽取item_name"
withopen(self.filename,'r')asF:
forindex,lineinenumerate(F.readlines()):
ifindex==0:
self.item_name=self.deal_line(line)
break
defsut(self,location):
"""
輸入[[1,2,3],[2,3,4],[1,3,5]...]
輸出每個位置集的support[123,435,234...]
"""
withopen(self.filename,'r')asF:
support=[0]*len(location)
forindex,lineinenumerate(F.readlines()):
ifindex==0:continue
#提取每信息
item_line=self.deal_line(line)
forindex_num,iinenumerate(location):
flag=0
forjini:
ifitem_line[j]!='T':
flag=1
break
ifnotflag:
support[index_num]+=1
self.line_num=index#一共多少行,出去第一行的item_name
returnsupport
defselect(self,c):
"返回位置"
stack=[]
foriinself.location:
forjinself.num:
ifjini:
iflen(i)==c:
stack.append(i)
else:
stack.append([j]+i)
#多重列表去重
importitertools
s=sorted([sorted(i)foriinstack])
location=list(sfors,_initertools.groupby(s))
returnlocation
defdel_location(self,support,location):
"清除不滿足條件的候選集"
#小於最小支持度的剔除
forindex,iinenumerate(support):
ifi<self.line_num*self.min_support/100:
support[index]=0
#apriori第二條規則,剔除
forindex,jinenumerate(location):
sub_location=[j[:index_loc]+j[index_loc+1:]forindex_locinrange(len(j))]
flag=0
forkinsub_location:
ifknotinself.location:
flag=1
break
ifflag:
support[index]=0
#刪除沒用的位置
location=[ifori,jinzip(location,support)ifj!=0]
support=[iforiinsupportifi!=0]
returnsupport,location
defloop(self):
"s級頻繁項級的迭代"
s=2
whileTrue:
print'-'*80
print'The',s-1,'loop'
print'location',self.location
print'support',self.support
print'num',self.num
print'-'*80
#生成下一級候選集
location=self.select(s)
support=self.sut(location)
support,location=self.del_location(support,location)
num=list(sorted(set([jforiinlocationforjini])))
s+=1
iflocationandsupportandnum:
self.pre_num=self.num
self.pre_location=self.location
self.pre_support=self.support
self.num=num
self.location=location
self.support=support
else:
break
defconfidence_sup(self):
"計算confidence"
ifsum(self.pre_support)==0:
print'min_supporterror'#第一次迭代即失敗
else:
forindex_location,each_locationinenumerate(self.location):
del_num=[each_location[:index]+each_location[index+1:]forindexinrange(len(each_location))]#生成上一級頻繁項級
del_num=[iforiindel_numifiinself.pre_location]#刪除不存在上一級頻繁項級子集
del_support=[self.pre_support[self.pre_location.index(i)]foriindel_numifiinself.pre_location]#從上一級支持度查找
#printdel_num
#printself.support[index_location]
#printdel_support
forindex,iinenumerate(del_num):#計算每個關聯規則支持度和自信度
index_support=0
iflen(self.support)!=1:
index_support=index
support=float(self.support[index_location])/self.line_num*100#支持度
s=[jforindex_item,jinenumerate(self.item_name)ifindex_itemini]
ifdel_support[index]:
confidence=float(self.support[index_location])/del_support[index]*100
ifconfidence>self.min_confidence:
print','.join(s),'->>',self.item_name[each_location[index]],'min_support:',str(support)+'%','min_confidence:',str(confidence)+'%'
defmain():
c=Apriori('basket.txt',14,3,13)
d=Apriori('simple.txt',50,2,6)
if__name__=='__main__':
main()

Apriori(filename, min_support, item_start, item_end)

參數說明

filename:(路徑)文件名
min_support:最小支持度
item_start:item起始位置

item_end:item結束位置

importapriori
c=apriori.Apriori('basket.txt',11,3,13)


輸出:

Ⅶ 怎麼用java實現apriori演算法

作者:何史提
鏈接:https://www.hu.com/question/22590018/answer/26646688
來源:知乎
著作權歸作者所有。商業轉載請聯系作者獲得授權,非商業轉載請註明出處。

Apriori演算法的理念其實很簡單,可是實現起上來卻復雜無比,因為當中無可避免用Set和Hash Table等高階的數據結構,而且有很多loop用以讀取數據。
我不建議用Java,應改用Python或Scala一類的語言。如果用Python,代碼大概50行左右,但可以想像用Java便看起來復雜得多。看如下:

from operator import and_
from itertools import combinations

class AprioriAssociationRule:
def __init__(self, inputfile):
self.transactions = []
self.itemSet = set([])
inf = open(inputfile, 'rb')
for line in inf.readlines():
elements = set(filter(lambda entry: len(entry)>0, line.strip().split(',')))
if len(elements)>0:
self.transactions.append(elements)
for element in elements:
self.itemSet.add(element)
inf.close()
self.toRetItems = {}
self.associationRules = []

def getSupport(self, itemcomb):
if type(itemcomb) != frozenset:
itemcomb = frozenset([itemcomb])
within_transaction = lambda transaction: rece(and_, [(item in transaction) for item in itemcomb])
count = len(filter(within_transaction, self.transactions))
return float(count)/float(len(self.transactions))

def runApriori(self, minSupport=0.15, minConfidence=0.6):
itemCombSupports = filter(lambda freqpair: freqpair[1]>=minSupport,
map(lambda item: (frozenset([item]), self.getSupport(item)), self.itemSet))
currentLset = set(map(lambda freqpair: freqpair[0], itemCombSupports))
k = 2
while len(currentLset)>0:
currentCset = set([i.union(j) for i in currentLset for j in currentLset if len(i.union(j))==k])
currentItemCombSupports = filter(lambda freqpair: freqpair[1]>=minSupport,
map(lambda item: (item, self.getSupport(item)), currentCset))
currentLset = set(map(lambda freqpair: freqpair[0], currentItemCombSupports))
itemCombSupports.extend(currentItemCombSupports)
k += 1
for key, supportVal in itemCombSupports:
self.toRetItems[key] = supportVal
self.calculateAssociationRules(minConfidence=minConfidence)

def calculateAssociationRules(self, minConfidence=0.6):
for key in self.toRetItems:
subsets = [frozenset(item) for k in range(1, len(key)) for item in combinations(key, k)]
for subset in subsets:
confidence = self.toRetItems[key] / self.toRetItems[subset]
if confidence > minConfidence:
self.associationRules.append([subset, key-subset, confidence])

Ⅷ apriori演算法是什麼

經典的關聯規則挖掘演算法包括Apriori演算法和FP-growth演算法。

apriori演算法多次掃描交易資料庫,每次利用候選頻繁集產生頻繁集;而FP-growth則利用樹形結構,無需產生候選頻繁集而是直接得到頻繁集,大大減少掃描交易資料庫的次數,從而提高了演算法的效率,但是apriori的演算法擴展性較好,可以用於並行計算等領域。

(8)apriori演算法c代碼擴展閱讀:

Apriori algorithm是關聯規則里一項基本演算法

Apriori演算法將發現關聯規則的過程分:

第一通過迭代,檢索出事務資料庫1中的所有頻繁項集,即支持度不低於用戶設定的閾值的項集;

第二利用頻繁項集構造出滿足用戶最小信任度的規則。其中,挖掘或識別出所有頻繁項集是該演算法的核心,占整個計算量的大部分。

Ⅸ 急求用C實現的Apriori演算法的 代碼

http://www.csc.liv.ac.uk/~frans/Notes/KDD/AssocRuleMine/apriori.html

.h
====================================
/*----------------------------------------------------------------------
File : apriori.h
Contents: apriori algorithm for finding frequent item sets
(specialized version for FIMI 2003 workshop)
Author : Christian Borgelt
History : 15.08.2003 file created from normal apriori.c
16.08.2003 parameter for transaction filtering added
18.08.2003 dynamic filtering decision based on times added
21.08.2003 transaction sort changed to heapsort
20.09.2003 output file made optional
----------------------------------------------------------------------*/
/*
Modified by : Frédéric Flouvat
Modifications : store the positive and negative border into an
an input trie for ABS
process stastical informations on dataset to stop
the apriori classical iterations
Author : Frédéric Flouvat
----------------------------------------------------------------------*/
#ifndef APRIRORI_H
#define APRIRORI_H

#include <iostream>
using namespace std;
#define MAXIMAL

#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <time.h>
#include <assert.h>

#include "tract.h"
#include "istree.h"
#include "Application.h"

/*----------------------------------------------------------------------
Preprocessor Definitions
----------------------------------------------------------------------*/
#define PRGNAME "fim/apriori"
#define DESCRIPTION "frequent item sets miner for FIMI 2003"
#define VERSION "version 1.7 (2003.12.02) " \
"(c) 2003 Christian Borgelt"

/* --- error codes --- */
#define E_OPTION (-5) /* unknown option */
#define E_OPTARG (-6) /* missing option argument */
#define E_ARGCNT (-7) /* too few/many arguments */
#define E_SUPP (-8) /* invalid minimum support */
#define E_NOTAS (-9) /* no items or transactions */
#define E_UNKNOWN (-18) /* unknown error */

#ifndef QUIET /* if not quiet version */
#define MSG(x) x /* print messages */
#else /* if quiet version */
#define MSG(x) /* suppress messages */
#endif

#define SEC_SINCE(t) ((clock()-(t)) /(double)CLOCKS_PER_SEC)
#define RECCNT(s) (tfs_reccnt(is_tfscan(s)) \
+ ((tfs_delim(is_tfscan(s)) == TFS_REC) ? 0 : 1))
#define BUFFER(s) tfs_buf(is_tfscan(s))

/*----------------------------------------------------------------------
Constants
----------------------------------------------------------------------*/
#ifndef QUIET /* if not quiet version */

/* --- error messages --- */
static const char *errmsgs[] = {
/* E_NONE 0 */ "no error\n",
/* E_NOMEM -1 */ "not enough memory\n",
/* E_FOPEN -2 */ "cannot open file %s\n",
/* E_FREAD -3 */ "read error on file %s\n",
/* E_FWRITE -4 */ "write error on file %s\n",
/* E_OPTION -5 */ "unknown option -%c\n",
/* E_OPTARG -6 */ "missing option argument\n",
/* E_ARGCNT -7 */ "wrong number of arguments\n",
/* E_SUPP -8 */ "invalid minimal support %d\n",
/* E_NOTAS -9 */ "no items or transactions to work on\n",
/* -10 to -15 */ NULL, NULL, NULL, NULL, NULL, NULL,
/* E_ITEMEXP -16 */ "file %s, record %d: item expected\n",
/* E_DUPITEM -17 */ "file %s, record %d: plicate item %s\n",
/* E_UNKNOWN -18 */ "unknown error\n"
};
#endif

/*----------------------------------------------------------------------
Global Variables
----------------------------------------------------------------------*/
#ifndef QUIET
static char *prgname; /* program name for error messages */
#endif
static ITEMSET *itemset = NULL; /* item set */
static TASET *taset = NULL; /* transaction set */
static TATREE *tatree = NULL; /* transaction tree */
static ISTREE *istree = NULL; /* item set tree */
static FILE *in = NULL; /* input file */
static FILE *out = NULL; /* output file */

extern "C" TATREE * apriori( char*fn_in, char*fn_out, int supp, int & level,
Trie * bdPapriori, Trie * bdn, set<Element> * relist, double ratioNfC, double & eps, int ismax,
vector< unsigned int > * stat, int & maxBdP, bool & generatedFk, bool verbose ) ;

#endif

.c
============================================
/*----------------------------------------------------------------------
File : apriori.c
Contents: apriori algorithm for finding frequent item sets
(specialized version for FIMI 2003 workshop)
Author : Christian Borgelt
History : 15.08.2003 file created from normal apriori.c
16.08.2003 parameter for transaction filtering added
18.08.2003 dynamic filtering decision based on times added
21.08.2003 transaction sort changed to heapsort
20.09.2003 output file made optional
----------------------------------------------------------------------*/
/*
Modified by : Frédéric Flouvat
Modifications : store the positive and negative border into an
an input trie for ABS
process stastical informations on dataset to stop
the apriori classical iterations
Author : Frédéric Flouvat
----------------------------------------------------------------------*/

#include "apriori.h"

/*----------------------------------------------------------------------
Main Functions
----------------------------------------------------------------------*/

static void error (int code, ...)
{ /* --- print an error message */
#ifndef QUIET /* if not quiet version */
va_list args; /* list of variable arguments */
const char *msg; /* error message */

assert(prgname); /* check the program name */
if (code < E_UNKNOWN) code = E_UNKNOWN;
if (code < 0) { /* if to report an error, */
msg = errmsgs[-code]; /* get the error message */
if (!msg) msg = errmsgs[-E_UNKNOWN];
fprintf(stderr, "\n%s: ", prgname);
va_start(args, code); /* get variable arguments */
vfprintf(stderr, msg, args);/* print error message */
va_end(args); /* end argument evaluation */
}
#endif
#ifndef NDEBUG /* if debug version */
if (istree) ist_delete(istree);
if (tatree) tat_delete(tatree);
if (taset) tas_delete(taset, 0);
if (itemset) is_delete(itemset);
if (in) fclose(in); /* clean up memory */
if (out) fclose(out); /* and close files */
#endif
exit(code); /* abort the program */
} /* error() */

/*--------------------------------------------------------------------*/

TATREE * apriori( char*fn_in, char*fn_out, int supp, int & level, Trie * bdPapriori,
Trie * bdn , set<Element> * relist , double ratioNfC, double & eps,int ismax,
vector< unsigned int > * stat, int & maxBdP, bool & generatedFk, bool verbose )
{
int i, k, n; /* loop variables, counters */
int tacnt = 0; /* number of transactions */
int max = 0; /* maximum transaction size */
int empty = 1; /* number of empty item sets */
int *map, *set; /* identifier map, item set */
char *usage; /* flag vector for item usage */
clock_t t, tt, tc, x; /* timer for measurements */

double actNfC = 1 ;
double avgNfC = 0 ;
int nbgen = 0 ;
int nbfreq = 0 ;
level = 1 ;
bool endApriori = false ; // boolean to stop the initial classial apriori approach
int bdnsize = 0 ; // number of itemsets found infrequent

/* --- create item set and transaction set --- */
itemset = is_create(); /* create an item set and */
if (!itemset) error(E_NOMEM); /* set the special characters */
taset = tas_create(itemset); /* create a transaction set */
if (!taset) error(E_NOMEM); /* to store the transactions */
if( verbose ) MSG(fprintf(stderr, "\n")); /* terminate the startup message */

/* --- read transactions --- */
if( verbose )MSG(fprintf(stderr, "reading %s ... ", fn_in));
t = clock(); /* start the timer and */
in = fopen(fn_in, "r"); /* open the input file */
if (!in) error(E_FOPEN, fn_in);
for (tacnt = 0; 1; tacnt++) { /* transaction read loop */
k = is_read(itemset, in); /* read the next transaction */
if (k < 0) error(k, fn_in, RECCNT(itemset), BUFFER(itemset));
if (k > 0) break; /* check for error and end of file */
k = is_tsize(itemset); /* update the maximal */
if (k > max) max = k; /* transaction size */
if (taset && (tas_add(taset, NULL, 0) != 0))
error(E_NOMEM); /* add the loaded transaction */
} /* to the transaction set */
fclose(in); in = NULL; /* close the input file */
n = is_cnt(itemset); /* get the number of items */
if( verbose ) MSG(fprintf(stderr, "[%d item(s),", n));
if( verbose ) MSG(fprintf(stderr, " %d transaction(s)] done ", tacnt));
if( verbose ) MSG(fprintf(stderr, "[%.2fs].\n", SEC_SINCE(t)));

/* --- sort and recode items --- */
if( verbose ) MSG(fprintf(stderr, "sorting and recoding items ... "));
t = clock(); /* start the timer */
map = (int*)malloc(is_cnt(itemset) *sizeof(int));
if (!map) error(E_NOMEM); /* create an item identifier map */
n = is_recode(itemset, supp, 2, map); /* 2: sorting mode */
tas_recode(taset, map, n); /* recode the loaded transactions */
max = tas_max(taset); /* get the new maximal t.a. size */

// use in the other part of the implementation to have the corresponding
// identifiant to an internal id
stat->reserve( n+2 ) ;
stat->push_back( 0 ) ;
for(int j= 0; j< n ; j++ )
{
stat->push_back( 0 ) ;
relist->insert( Element( atoi( is_name( itemset, j ) ) ,j) );
}

if( verbose ) MSG(fprintf(stderr, "[%d item(s)] ", n));
if( verbose ) MSG(fprintf(stderr, "done [%.2fs].\n", SEC_SINCE(t)));

/* --- create a transaction tree --- */
if( verbose ) MSG(fprintf(stderr, "creating transaction tree ... "));
t = clock(); /* start the timer */
tatree = tat_create(taset,1); /* create a transaction tree */
if (!tatree) error(E_NOMEM); /* (compactify transactions) */
tt = clock() -t; /* note the construction time */
if( verbose ) MSG(fprintf(stderr, "done [%.2fs].\n", SEC_SINCE(t)));

/* --- create an item set tree --- */
if( verbose ) MSG(fprintf(stderr, "checking subsets of size 1"));
t = clock(); tc = 0; /* start the timer and */
istree = ist_create(n, supp); /* create an item set tree */
if (!istree) error(E_NOMEM);
for (k = n; --k >= 0; ) /* set single item frequencies */
ist_setcnt(istree, k, is_getfrq(itemset, k));
ist_settac(istree, tacnt); /* set the number of transactions */
usage = (char*)malloc(n *sizeof(char));
if (!usage) error(E_NOMEM); /* create a item usage vector */

/* --- check item subsets --- */
while (ist_height(istree) < max && ( ( ismax == -1 && endApriori == false )
|| ist_height(istree) < ismax )
)
{
nbgen = 0 ;
nbfreq = 0 ;

level ++ ;

i = ist_check(istree,usage);/* check current item usage */

if (i < max) max = i; /* update the maximum set size */
if (ist_height(istree) >= i) break;

k = ist_addlvl(istree, nbgen); /* while max. height is not reached, */

if (k < 0) error(E_NOMEM); /* add a level to the item set tree */
if (k != 0) break; /* if no level was added, abort */
if( verbose ) MSG(fprintf(stderr, " %d", ist_height(istree)));
if ((i < n) /* check item usage on current level */
&& (i *(double)tt < 0.1 *n *tc)) {
n = i; x = clock(); /* if items were removed and */
tas_filter(taset, usage); /* the counting time is long enough, */
tat_delete(tatree); /* remove unnecessary items */
tatree = tat_create(taset, 1);
if (!tatree) error(E_NOMEM);
tt = clock() -x; /* rebuild the transaction tree and */
} /* note the new construction time */
x = clock(); /* start the timer */

ist_countx(istree, tatree, nbfreq, istree->supp ); /* count the transaction tree */

tc = clock() -x; /* in the item set tree */

actNfC = 1-double(nbfreq)/double(nbgen) ;
avgNfC = avgNfC + actNfC ;

if( verbose )
{
cout<<" \t Fk : "<<nbfreq<<" Ck : "<<nbgen<<" NFk/Ck "<<actNfC<<" avg NFk/Ck "<<avgNfC/(level-1)<<endl;
}

bdnsize += nbgen - nbfreq ;

if( level >=4 && ( bdnsize / nbgen < 1.5 ) && ( bdnsize > 100 ) )
{
if( actNfC < ratioNfC )
{
eps = 0 ;
endApriori = true ;
}
else if( actNfC > 0.25 )
endApriori = true ;

}

} /* and note the new counting time */
if( verbose ) MSG(fprintf(stderr, " done [%.2fs].\n", SEC_SINCE(t)));

/* --- filter item sets --- */
t = clock(); /* start the timer */
#ifdef MAXIMAL /* filter maximal item sets */
if( verbose ) MSG(fprintf(stderr, "filtering maximal item sets ... "));

if( ratioNfC == 0 || nbgen < k+1 || ist_height(istree)>= max )
ist_filter2(istree, IST_MAXFRQ, 0);
else
ist_filter2(istree, IST_MAXFRQ, bdn);

if( verbose ) MSG(fprintf(stderr, " done [%.2fs].\n", SEC_SINCE(t)));
empty = (n <= 0) ? 1 : 0; /* check whether the empty item set */
#endif /* is maximal */
#ifdef CLOSED /* filter closed item sets */
if( verbose ) MSG(fprintf(stderr, "filtering closed item sets ... "));
ist_filter(istree, IST_CLOSED);
if( verbose ) MSG(fprintf(stderr, " done [%.2fs].\n", SEC_SINCE(t)));
for (k = n; --k >= 0; ) /* check for an item in all t.a. */
if (is_getfrq(itemset, k) == tacnt) break;
empty = (k <= 0) ? 1 : 0; /* check whether the empty item set */
#endif /* is closed */

/* --- print item sets --- */
for (i = ist_height(istree); --i >= 0; )
map[i] = 0; /* clear the item set counters */
if( verbose ) MSG(fprintf(stderr, "writing %s ... ", (fn_out) ? fn_out : "<none>"));
t = clock(); /* start the timer and */
if (fn_out) { /* if an output file is given, */
out = fopen(fn_out, "w"); /* open the output file */
if (!out) error(E_FOPEN, fn_out);
if (empty) fprintf(out, " (%d)\n", tacnt);
} /* report empty item set */
ist_init(istree); /* init. the item set extraction */
set = is_tract(itemset); /* get the transaction buffer */
for (n = empty; 1; n++) { /* extract item sets from the tree */

k = ist_set(istree, set, &supp);

if (k <= 0) break; /* get the next frequent item set */
map[k-1]++; /* count the item set */
if (fn_out) { /* if an output file is given */
for (i = 0; i < k; i++) { /* traverse the items */
fputs(is_name(itemset, set[i]), out);
fputc(' ', out); /* print the name of the next item */
} /* followed by a separator */
fprintf(out, "(%d)\n", supp);
} /* print the item set's support */
else
{
short unsigned * is = new short unsigned[k] ;

for (i = 0; i < k; i++) /* traverse the items */
{
is[i] = set[i] ;
}
if( k < level || nbgen < k+1 || ist_height(istree)>= max )
{
bdPapriori->insert(is, k ,supp ) ;

(*stat)[ 0 ] ++;
(*stat)[ k+1 ]++;

if( maxBdP < k )
maxBdP = k ;

}
else
{
generatedFk = true ;

}

delete[] is;

}
}
if (fn_out) { /* if an output file is given */
if (fflush(out) != 0) error(E_FWRITE, fn_out);
if (out != stdout) fclose(out);
out = NULL; /* close the output file */
}
if( verbose ) MSG(fprintf(stderr, "[%d set(s)] done ", n));
if( verbose ) MSG(fprintf(stderr, "[%.2fs].\n", SEC_SINCE(t)));

/* --- print item set statistics --- */
k = ist_height(istree); /* find last nonzero counter */
if ((k > 0) && (map[k-1] <= 0)) k--;
if( verbose ){
printf("%d\n", empty); /* print the numbers of item sets */
for (i = 0; i < k; i++) printf("%d\n", map[i]);
}

/* --- clean up --- */
#ifndef NDEBUG /* if this is a debug version */
free(usage); /* delete the item usage vector */
free(map); /* and the identifier map */
ist_delete(istree); /* delete the item set tree, */

if (taset) tas_delete(taset, 0); /* the transaction set, */
is_delete(itemset); /* and the item set */
#endif

return tatree ;

}

閱讀全文

與apriori演算法c代碼相關的資料

熱點內容
oppo手機西瓜視頻的文件夾 瀏覽:865
騎手一般用哪個app 瀏覽:610
程序員老闆用什麼手機 瀏覽:848
比心app頭像不通過為什麼 瀏覽:105
加密幣市值前十走勢 瀏覽:190
單片機學習推薦課程 瀏覽:473
對數ln的運演算法則圖片 瀏覽:735
仿微博app源碼 瀏覽:781
怎麼取消調用app 瀏覽:545
程序員去哪裡求助 瀏覽:834
伺服器里的埠是什麼 瀏覽:975
aspnetjavaphp 瀏覽:399
程序員畢業時間 瀏覽:286
程序員用戶免費軟體 瀏覽:754
51單片機匯編語言指令 瀏覽:139
女程序員好難 瀏覽:688
三田壓縮機與電裝 瀏覽:710
重生細胞安卓版沒鍵盤怎麼玩 瀏覽:994
小米nfc手機刷加密卡 瀏覽:290
linux如何下載文件 瀏覽:808