This is an assignment for Artificial Intelligence class.
This program takes an excel of data as input, and it has two stages. 
In the training stage, the program will compute the relevant log probabilities over the training set. 
In the testing stage, it computes the classification attribute over each instance in the test set and tallies the accuracy, precision, and recall. The log probabilities is the negative log probability of a 0.1 Laplacian correction of the relevant frequencies. That is:
In the formulas above T is the training set; |T| is the number of instances in T; and #T (φ) is the number of instances in T that satisfy condition φ. In the testing stage, for each instance X.a1 = u1 . . . X.a5 = u5 in the test set, compute the value of the sum
for v = 1, 2, 3; choose the value of v with the smallest sum; and compare it to the labeled value X.a6.
A screenshot of actual output. All follows the format above.
Complete Python code below.
How to run:
python main.py (num of training set) (num of testing set) 
for example, the above result comes from:
python main.py 10 10
import math,sys
#read from excel
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
wb=load_workbook(filename='prog4Data.xlsx')
data=wb.active
#print(data['A1'].value)
#numTraining=10
#numTesting=10
category=[]
def getLp(numInstances,v):
    #log(a,Base)
    lp=-math.log((getNumOfInCategory(v)+0.1)/(numInstances+0.3),2)
    return lp
# lp(X.a1=1|X.a6=1))
def getBayesLp(numUVsatisfied,v):
    bayesLp=-math.log((numUVsatisfied+0.1)/(getNumOfInCategory(v)+0.4),2)

    return bayesLp
# #T(category): number of instances in T that satisfy condition X
def getNumOfInCategory(whichCategory):
    n=0
    for i in range(2,numTraining+2):
        if data.cell(row=i,column=6).value==whichCategory:
            n=n+1

    return n
    
def getNumSatisfyUV(indexOfAttribute, valueOfAttribute,whichCategory):
    n=0
    for i in range(2,numTraining+2):
        if data.cell(row=i,column=6).value==whichCategory:
            if data.cell(row=i,column=indexOfAttribute).value==valueOfAttribute:
                n=n+1

    return n
    
    
    
def getSumLp(u1,u2,u3,u4,u5,v):
    sum=0
    sum=getLp(getNumOfInCategory(v),v)+getBayesLp(getNumSatisfyUV(1, u1,v),v)+getBayesLp(getNumSatisfyUV(2, u2,v),v)+getBayesLp(getNumSatisfyUV(3, u3,v),v)+getBayesLp(getNumSatisfyUV(4, u4,v),v)+getBayesLp(getNumSatisfyUV(5, u5,v),v)
    return sum

def getSmallestSumLp(u1,u2,u3,u4,u5):
    v=[]
    #i=1, 2, 3
    for i in range(1,4):
        #print(i)
        #print(getLp(getNumOfInCategory(i),i))
        #print(getBayesLp(getNumSatisfyUV(1, u1,i),i))
        #print(getBayesLp(getNumSatisfyUV(2, u2,i),i))
        #print(getBayesLp(getNumSatisfyUV(3, u3,i),i))
        #print(getBayesLp(getNumSatisfyUV(4, u4,i),i))
        #print(getBayesLp(getNumSatisfyUV(5, u5,i),i))
        
        #print(getSumLp(u1,u2,u3,u4,u5,i))
        v.append(getSumLp(u1,u2,u3,u4,u5,i))
    hold=v[0]
    #initialize j: the v with the smallest sum
    j=0
    if v[1]<hold:
        hold=v[1]
        j=1
    if v[2]<hold:
        hold=v[2]
        j=2
    #print(j)
    return j+1
    
    
def readTestPoints():
    #v guessed 
    correctObservation=0
    wrongObservation=0
    accuracy=0
    labeled3=0
    guessed3=0
    both=0
    for i in range(1002-numTesting,1002):
        label=data.cell(row=i,column=6).value
        guess=getSmallestSumLp(data.cell(row=i,column=1).value,data.cell(row=i,column=2).value,data.cell(row=i,column=3).value,data.cell(row=i,column=4).value,data.cell(row=i,column=5).value)
        if label==guess:
            correctObservation+=1
        else:
            wrongObservation+=1
            
        if label==3:
            labeled3+=1
        if guess==3:
            guessed3+=1
        if guess==3 and label==3:
            both+=1
            
            
        #print(i)
        #print('guess:')
        #print(guess)
        #print('label:')
        #print(label)
            
    
    precision=both/guessed3
    recall=both/labeled3
    accuracy=correctObservation/numTesting
    print('accuracy:')
    print(accuracy)
    print('precision:')
    print(precision)
    print('recall:')
    print(recall)
if __name__ == "__main__":
    # check whether parameters are given
    if (len(sys.argv) < 2):
        print("please provide num of training and testing data")
        exit(1)
    numTraining = int(sys.argv[1])
    numTesting = int(sys.argv[2])
    
    #LTarget = int(sys.argv[3])
#print lp(X.a6=1)    lp(X.a6=2)    lp(X.a6=3)
lps=[getLp(numTraining,1),getLp(numTraining,2),getLp(numTraining,3)]
print('\t'.join('%1.4f' % v for v in lps))

#print(getNumSatisfyUV(1,1,1))
#print(getBayesLp(getNumSatisfyUV(1,1,1),1))
#print lp(X.a1=1|X.a6=1)..... 

for z in range(5):
    a1Matrix=[[0 for x in range(4)]for y in range(3)]
    for i in range(4):
        for j in range(3):
        
            a1Matrix[j][i]=getBayesLp(getNumSatisfyUV(z+1,i+1,j+1),j+1)
            #a1Matrix[j][i]=i+j
    for a in a1Matrix:
        print('\t'.join('%1.4f' % v for v in a))
    print('\n');
    
#print(getSmallestSumLp(1,4,1,2,3))
readTestPoints()
#print('debug:')
#print(getNumOfInCategory(3))


Back to Top