955 lines
37 KiB
Python
955 lines
37 KiB
Python
#!/usr/bin/python
|
|
|
|
##############################################################################
|
|
## Copyright (c) 2007 by Scott R. Little
|
|
## University of Utah
|
|
##
|
|
## Permission to use, copy, modify and/or distribute, but not sell, this
|
|
## software and its documentation for any purpose is hereby granted
|
|
## without fee, subject to the following terms and conditions:
|
|
##
|
|
## 1. The above copyright notice and this permission notice must
|
|
## appear in all copies of the software and related documentation.
|
|
##
|
|
## 2. The name of University of Utah may not be used in advertising or
|
|
## publicity pertaining to distribution of the software without the
|
|
## specific, prior written permission of University of Utah.
|
|
##
|
|
## 3. THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
|
|
## EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
|
|
## WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
|
|
##
|
|
## IN NO EVENT SHALL UNIVERSITY OF UTAH OR THE AUTHORS OF THIS SOFTWARE BE
|
|
## LIABLE FOR ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES
|
|
## OF ANY KIND, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
|
|
## OR PROFITS, WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON
|
|
## ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
|
## OR PERFORMANCE OF THIS SOFTWARE.
|
|
##
|
|
##############################################################################
|
|
|
|
### TODO ###
|
|
#Provide a method to specify different numbers of thresholds for different variables?
|
|
#Use properties to aid in the threshold generation?
|
|
#Update genBins to use the partial results list
|
|
#Add a brief comment regarding how to add optimization and cost functions
|
|
|
|
import re, os.path, cText, copy, sys
|
|
from optparse import OptionParser
|
|
|
|
#Regular expressions
|
|
lQuoteR = re.compile("\"+")
|
|
tQuoteR = re.compile("\"+")
|
|
numVarsR = re.compile("Variables: ")
|
|
numPointsR = re.compile("Points: ")
|
|
spaceR = re.compile("\s+")
|
|
lSpaceR = re.compile("^\s+")
|
|
tSpaceR = re.compile("\s+$")
|
|
lineSpaceR = re.compile("^\s+$")
|
|
newLR = re.compile("\n+")
|
|
lDotR = re.compile("^\.\w")
|
|
epsilonR = re.compile(".epsilon")
|
|
lengthR = re.compile(".length")
|
|
timeR = re.compile(".time")
|
|
absoluteTimeR = re.compile(".absoluteTime")
|
|
percentR = re.compile(".percent")
|
|
inputR = re.compile(".inputs")
|
|
outputR = re.compile(".outputs")
|
|
dmvcR = re.compile(".dmvc")
|
|
rateSamplingR = re.compile(".rateSampling")
|
|
pathLengthR = re.compile(".pathLength")
|
|
vaRateUpdateIntervalR = re.compile(".vaRateUpdateInterval")
|
|
minDelayValR = re.compile(".minDelayVal")
|
|
minRateValR = re.compile(".minRateVal")
|
|
minDivisionValR = re.compile(".minDivisionVal")
|
|
decPercentR = re.compile(".decPercent")
|
|
minVarValR = re.compile(".minVarVal")
|
|
maxVarValR = re.compile(".maxVarVal")
|
|
falseR = re.compile("false",re.I) #pass the I flag to be case insensitive
|
|
trueR = re.compile("true",re.I) #pass the I flag to be case insensitive
|
|
binCommentR = re.compile("^\#")
|
|
lParenR = re.compile("\\(+")
|
|
rParenR = re.compile("\\)+")
|
|
rowsR = re.compile("\\(.*?\\)")
|
|
|
|
##############################################################################
|
|
# A class to hold the lists of places and transitions in the graph.
|
|
##############################################################################
|
|
class Variable:
|
|
"A continuous variable in the system being modeled."
|
|
def __init__(self,nameStr):
|
|
self.name = nameStr #the name of the variable
|
|
self.dmvc = None #Boolean denoting the status of the variable as a discrete multi-valued continuous (DMVC) variable
|
|
self.input = None #Boolean denoting that the variable is a model input
|
|
self.output = None #Boolean denoting that the variable is a model output
|
|
self.type = None #Describes the type of the variable using an enumerated type (VOLTAGE, CURRENT) which is needed by Verilog-A.
|
|
def __str__(self):
|
|
retStr = self.name+"["
|
|
if self.dmvc:
|
|
retStr = retStr + "1]"
|
|
else:
|
|
retStr = retStr + "0]"
|
|
return retStr
|
|
## End Class Variable ########################################################
|
|
|
|
##############################################################################
|
|
# A class to hold the parameters specified in the threshold (.bins) file.
|
|
##############################################################################
|
|
class ThresholdParameters:
|
|
"The parameters possibly specified in the thresholds (.bins) file."
|
|
def __init__(self,numVars):
|
|
#Default values
|
|
self.epsilon = 0.1 #What is the +/- epsilon where signals are considered to be equivalent
|
|
self.length = 15 #the number of time points that a value must persist to be considered constant
|
|
self.time = 5e-6 #the amount of time that must pass to be considered constant when using absoluteTime
|
|
self.absoluteTime = False #when False time points are used to determine DMVC and when true absolutime time is used to determine DMVC
|
|
self.percent = 0.8 #a decimal value representing the percent of the total trace that must be constant to qualify to become a DMVC var
|
|
self.numValuesL = [] #the number of constant values for each variable...-1 indicates that the variable isn't considered a DMVC variable
|
|
self.vaRateUpdateInterval = 1e-6 #how often the rate is added to the continuous variable in the Verilog-A model output
|
|
for i in range(numVars):
|
|
self.numValuesL.append(-1)
|
|
def __str__(self):
|
|
retStr = "epsilon:"+str(self.epsilon)+" length:"+str(self.length)+" numValuesL:"+str(self.numValuesL)
|
|
return retStr
|
|
############################################################################
|
|
# Determine if two values are equal within the given epsilon.
|
|
############################################################################
|
|
def epsilonEquiv(self,v1,v2):
|
|
if abs(v1-v2) <= self.epsilon:
|
|
return True
|
|
else:
|
|
return False
|
|
## End Class ThresholdParameters #############################################
|
|
|
|
##############################################################################
|
|
# A class for data required for the discovery of discrete multi-valued
|
|
# continuous variables.
|
|
##############################################################################
|
|
class DMVCpart:
|
|
"Information about a single run of a constant value."
|
|
numDMVCparts = 0
|
|
def __init__(self):
|
|
self.id = DMVCpart.numDMVCparts #unique numeric ID for each run
|
|
DMVCpart.numDMVCparts = DMVCpart.numDMVCparts + 1
|
|
self.varInd = -1 #an index into the varsL array denoting which variable owns the run
|
|
self.valueL = [] #the list of values for this run
|
|
self.startPoint = -1 #an index into datL for the start point of the run
|
|
self.endPoint = -1 #an index into datL for the end point of the run
|
|
self.nextRun = None #a reference to the next sequential run
|
|
def __str__(self):
|
|
retStr = "Part:"+str(self.id)+" Start:"+str(self.startPoint)+" End:"+str(self.endPoint)+" Val:"+str(self.constVal())
|
|
if self.nextRun:
|
|
retStr += " Next:"+str(self.nextRun.id)
|
|
else:
|
|
retStr += " Next:None"
|
|
return retStr
|
|
############################################################################
|
|
# Calculate the constant value from the value list. Currently it is an
|
|
# average of all list values.
|
|
############################################################################
|
|
def constVal(self):
|
|
total = 0
|
|
for i in self.valueL:
|
|
total = total + i
|
|
return total/float(len(self.valueL))
|
|
############################################################################
|
|
# Calculate the delay for a given DMVC run.
|
|
############################################################################
|
|
def calcDelay(self,datL):
|
|
ind1 = self.startPoint
|
|
ind2 = self.endPoint
|
|
delay = datL[ind2][0]-datL[ind1][0]
|
|
#Assuming that there is some time between runs we want to account for that time. If we assume a constant rate of change we can just split the difference
|
|
if self.nextRun:
|
|
ind3 = self.nextRun.startPoint
|
|
delay += ((datL[ind3][0]-datL[ind2][0])/2)
|
|
return delay
|
|
## End Class DMVCpart ########################################################
|
|
|
|
##############################################################################
|
|
# Remove leading & trailing space as well as trailing new line characters.
|
|
##############################################################################
|
|
def cleanLine(line):
|
|
lineNS = re.sub(lSpaceR,"",line)
|
|
lineNL = re.sub(newLR,"",lineNS)
|
|
lineTS = re.sub(tSpaceR,"",lineNL)
|
|
return lineTS
|
|
|
|
##############################################################################
|
|
# Remove leading & trailing space as well as trailing new line characters.
|
|
##############################################################################
|
|
def cleanName(name):
|
|
nameNL = re.sub(lQuoteR,"",name)
|
|
nameTS = re.sub(tQuoteR,"",nameNL)
|
|
return nameTS
|
|
|
|
##############################################################################
|
|
# Creates a 2 dimensional array of lists rows x cols with each value
|
|
# initialized to initVal.
|
|
##############################################################################
|
|
def create2Darray(rows,cols,initVal):
|
|
newL = []
|
|
for i in range(rows):
|
|
initL = []
|
|
for j in range(cols):
|
|
initL.append(initVal)
|
|
newL.append(initL)
|
|
return newL
|
|
|
|
##############################################################################
|
|
# Create the list of variables. All data files must have the same variables
|
|
# in the same order.
|
|
##############################################################################
|
|
def extractVars(datFile):
|
|
varsL = []
|
|
line = ""
|
|
inputF = open(datFile, 'r')
|
|
rowsL = inputF.read()
|
|
rowsM = rowsR.match(rowsL)
|
|
row = rowsM.group()
|
|
varNames = cleanRow(row)
|
|
varNamesL = varNames.split(",")
|
|
for varStr in varNamesL:
|
|
varStr = cleanName(varStr)
|
|
varsL.append(Variable(varStr))
|
|
varsL[0].dmvc = False
|
|
inputF.close()
|
|
return varsL
|
|
|
|
##############################################################################
|
|
# Parse a .dat file ensuring that the varsL matches the global list.
|
|
##############################################################################
|
|
def parseDatFile(datFile,varsL):
|
|
inputF = open(datFile, 'r')
|
|
linesL = inputF.read()
|
|
rowsL = rowsR.findall(linesL)
|
|
for i in range(len(rowsL)):
|
|
rowsL[i] = cleanRow(rowsL[i])
|
|
numPoints = -1
|
|
varNames = cleanRow(rowsL[0])
|
|
varNamesL = []
|
|
varNamesL = varNames.split(",")
|
|
for i in range(len(varNamesL)):
|
|
varNamesL[i] = cleanName(varNamesL[i])
|
|
numPoints = len(varNamesL)
|
|
if len(varNamesL) == len(varsL):
|
|
for i in range(len(varNamesL)):
|
|
if varNamesL[i] != varsL[i].name:
|
|
cStr = cText.cSetFg(cText.RED)
|
|
cStr += "ERROR:"
|
|
cStr += cText.cSetAttr(cText.NONE)
|
|
print cStr+" Expected "+varsL[i].name+" in position "+str(i)+" but received "+varNamesL[i]+" in file: "+datFile
|
|
sys.exit()
|
|
else:
|
|
cStr = cText.cSetFg(cText.RED)
|
|
cStr += "ERROR:"
|
|
cStr += cText.cSetAttr(cText.NONE)
|
|
print cStr + " Expected "+str(len(varsL))+" variables but received "+str(len(varNamesL))+" in file: "+datFile
|
|
sys.exit()
|
|
|
|
datL = []
|
|
for i in range(1,len(rowsL)):
|
|
valStrL = cleanRow(rowsL[i]).split(",")
|
|
valL = []
|
|
for s in valStrL:
|
|
valL.append(float(s))
|
|
datL.append(valL)
|
|
inputF.close()
|
|
return datL, numPoints
|
|
|
|
##############################################################################
|
|
# Parse the .bins (thresholds) file.
|
|
##############################################################################
|
|
def parseBinsFile(binsFile,varsL):
|
|
global pathLength
|
|
global rateSampling
|
|
global minDelayVal
|
|
global minRateVal
|
|
global minDivisionVal
|
|
global decPercent
|
|
global minVarValL
|
|
global maxVarValL
|
|
global limitExists
|
|
|
|
minVarValL = []
|
|
maxVarValL = []
|
|
limitExists = False
|
|
|
|
for i in range(len(varsL)):
|
|
minVarValL.append(None)
|
|
maxVarValL.append(None)
|
|
|
|
if not os.path.isfile(binsFile):
|
|
cStr = cText.cSetFg(cText.RED)
|
|
cStr += "ERROR:"
|
|
cStr += cText.cSetAttr(cText.NONE)
|
|
print cStr+" the .bins file, "+binsFile+" was not found."
|
|
sys.exit()
|
|
inputF = open(binsFile, 'r')
|
|
linesL = inputF.readlines()
|
|
tParam = ThresholdParameters(len(varsL))
|
|
divisionsStrL = []
|
|
numDivisions = 0
|
|
for i in range(1,len(varsL)):
|
|
divisionsStrL.append([])
|
|
for i in range(len(linesL)):
|
|
#Allow blank lines and comments
|
|
if lineSpaceR.match(linesL[i]) or binCommentR.match(linesL[i]):
|
|
continue
|
|
if lDotR.match(linesL[i]):
|
|
if epsilonR.match(linesL[i]):
|
|
epsilonL = spaceR.split(linesL[i])
|
|
tParam.epsilon = abs(float(epsilonL[1]))
|
|
elif lengthR.match(linesL[i]):
|
|
lengthL = spaceR.split(linesL[i])
|
|
tParam.length = float(lengthL[1])
|
|
elif timeR.match(linesL[i]):
|
|
timeL = spaceR.split(linesL[i])
|
|
tParam.time = float(timeL[1])
|
|
elif vaRateUpdateIntervalR.match(linesL[i]):
|
|
vaRateUpdateIntervalL = vaRateUpdateIntervalR.split(linesL[i])
|
|
tParam.vaRateUpdateInterval = float(vaRateUpdateIntervalL[1])
|
|
elif absoluteTimeR.match(linesL[i]):
|
|
absoluteTimeL = spaceR.split(linesL[i])
|
|
if trueR.match(absoluteTimeL[1]):
|
|
tParam.absoluteTime = True
|
|
elif falseR.match(absoluteTimeL[1]):
|
|
tParam.absoluteTime = False
|
|
else:
|
|
cStr = cText.cSetFg(cText.RED)
|
|
cStr += "ERROR:"
|
|
cStr += cText.cSetAttr(cText.NONE)
|
|
print cStr+" Attempted to set .absoluteTime with"+absoluteTimeL[i]+" which is unrecognized. It was not set. Please use True or False."
|
|
sys.exit()
|
|
elif percentR.match(linesL[i]):
|
|
percentL = spaceR.split(linesL[i])
|
|
tParam.percent = float(percentL[1])
|
|
elif inputR.match(linesL[i]):
|
|
cLine = cleanLine(linesL[i])
|
|
inputL = spaceR.split(cLine)
|
|
for i in range(1,len(inputL)):
|
|
found = False
|
|
for j in range(1,len(varsL)):
|
|
if inputL[i] == varsL[j].name:
|
|
#print varsL[j].name+" is an input."
|
|
varsL[j].input = True
|
|
found = True
|
|
break
|
|
if not found:
|
|
cStr = cText.cSetFg(cText.RED)
|
|
cStr += "ERROR:"
|
|
cStr += cText.cSetAttr(cText.NONE)
|
|
print cStr+" "+inputL[i]+" was specified as an input in the .bins file, but wasn't found in the variable list."
|
|
sys.exit()
|
|
elif outputR.match(linesL[i]):
|
|
cLine = cleanLine(linesL[i])
|
|
outputL = spaceR.split(cLine)
|
|
for i in range(1,len(outputL)):
|
|
found = False
|
|
for j in range(1,len(varsL)):
|
|
if outputL[i] == varsL[j].name:
|
|
#print varsL[j].name+" is an output."
|
|
varsL[j].output = True
|
|
found = True
|
|
break
|
|
if not found:
|
|
cStr = cText.cSetFg(cText.RED)
|
|
cStr += "ERROR:"
|
|
cStr += cText.cSetAttr(cText.NONE)
|
|
print cStr+" "+outputL[i]+" was specified as an output in the .bins file, but wasn't found in the variable list."
|
|
sys.exit()
|
|
elif dmvcR.match(linesL[i]):
|
|
cLine = cleanLine(linesL[i])
|
|
dmvcL = spaceR.split(cLine)
|
|
outputL = spaceR.split(cLine)
|
|
for i in range(1,len(dmvcL)):
|
|
found = False
|
|
for j in range(1,len(varsL)):
|
|
if outputL[i] == varsL[j].name:
|
|
#print varsL[j].name+" is dmvc."
|
|
varsL[j].dmvc = True
|
|
found = True
|
|
break
|
|
if not found:
|
|
cStr = cText.cSetFg(cText.RED)
|
|
cStr += "ERROR:"
|
|
cStr += cText.cSetAttr(cText.NONE)
|
|
print cStr+" "+outputL[i]+" was specified as dmvc in the .bins file, but wasn't found in the variable list."
|
|
sys.exit()
|
|
elif rateSamplingR.match(linesL[i]):
|
|
rateSamplingL = spaceR.split(linesL[i])
|
|
rateSampling = int(rateSamplingL[1])
|
|
elif pathLengthR.match(linesL[i]):
|
|
pathLengthL = spaceR.split(linesL[i])
|
|
pathLength = int(pathLengthL[1])
|
|
elif minDelayValR.match(linesL[i]):
|
|
minDelayValL = spaceR.split(linesL[i])
|
|
minDelayVal = int(minDelayValL[1])
|
|
elif minRateValR.match(linesL[i]):
|
|
minRateValL = spaceR.split(linesL[i])
|
|
minRateVal = int(minDelayValL[1])
|
|
elif minDivisionValR.match(linesL[i]):
|
|
minDivisionValL = spaceR.split(linesL[i])
|
|
minDivisionVal = int(minDelayValL[1])
|
|
elif decPercentR.match(linesL[i]):
|
|
decPercentL = spaceR.split(linesL[i])
|
|
decPercent = int(decPercentL[1])
|
|
elif minVarValR.match(linesL[i]):
|
|
limitExists = True
|
|
cLine = cleanLine(linesL[i])
|
|
inputL = spaceR.split(cLine)
|
|
found = False
|
|
for i in range(1,len(varsL)):
|
|
if inputL[2] == varsL[i].name:
|
|
minVarValL[i] = inputL[1]
|
|
found = True
|
|
break
|
|
if not found:
|
|
cStr = cText.cSetFg(cText.RED)
|
|
cStr += "ERROR:"
|
|
cStr += cText.cSetAttr(cText.NONE)
|
|
print cStr+" "+inputL[2]+" was specified in a target for .minVarVal in the .bins file, but wasn't found in the variable list."
|
|
sys.exit()
|
|
elif maxVarValR.match(linesL[i]):
|
|
limitExists = True
|
|
cLine = cleanLine(linesL[i])
|
|
inputL = spaceR.split(cLine)
|
|
found = False
|
|
for i in range(1,len(varsL)):
|
|
if inputL[2] == varsL[i].name:
|
|
maxVarValL[i] = inputL[1]
|
|
found = True
|
|
break
|
|
if not found:
|
|
cStr = cText.cSetFg(cText.RED)
|
|
cStr += "ERROR:"
|
|
cStr += cText.cSetAttr(cText.NONE)
|
|
print cStr+" "+inputL[2]+" was specified in a target for .maxVarVal in the .bins file, but wasn't found in the variable list."
|
|
sys.exit()
|
|
else:
|
|
cStr = cText.cSetFg(cText.RED)
|
|
cStr += "ERROR:"
|
|
cStr += cText.cSetAttr(cText.NONE)
|
|
print cStr+" Unparseable dot option in the thresholds file: "+linesL[i]
|
|
sys.exit()
|
|
else:
|
|
numDivisions += 1
|
|
cLineL = cleanLine(linesL[i]).split(" ")
|
|
found = False
|
|
for j in range(1,len(varsL)):
|
|
if cLineL[0] == varsL[j].name:
|
|
divisionsStrL[j-1] = cLineL[1:]
|
|
found = True
|
|
break
|
|
if not found:
|
|
#cStr = cText.cSetFg(cText.RED)
|
|
cStr = "ERROR:"
|
|
#cStr += cText.cSetAttr(cText.NONE)
|
|
print cStr+" Variable not included in the data file."
|
|
print "Line: "+linesL[i]
|
|
sys.exit()
|
|
divisionsL = [[]]
|
|
for sL in divisionsStrL:
|
|
fL = []
|
|
for s in sL:
|
|
if (s.find("?") == -1):
|
|
fL.append(float(s))
|
|
else:
|
|
fL.append(s)
|
|
divisionsL.append(fL)
|
|
#print len(fL)
|
|
inputF.close()
|
|
if numDivisions != len(varsL)-1:
|
|
cStr = cText.cSetFg(cText.RED)
|
|
cStr += "WARNING:"
|
|
cStr += cText.cSetAttr(cText.NONE)
|
|
print cStr+" There is not a threshold for every variable in the dat file."
|
|
#sys.exit()
|
|
#print "divisionsL:"+str(divisionsL)
|
|
return divisionsL, tParam
|
|
|
|
##############################################################################
|
|
# Remove leading and trailing parantheses
|
|
##############################################################################
|
|
def cleanRow(row):
|
|
rowNS = re.sub(lParenR,"",row)
|
|
rowTS = re.sub(rParenR,"",rowNS)
|
|
return rowTS
|
|
|
|
##############################################################################
|
|
# Reorder the datL so each row is a list of data values for the ith
|
|
# variable. Also build a list of the extreme values for each
|
|
# variable.
|
|
##############################################################################
|
|
def reorderDatL(varsL):
|
|
datValsL = []
|
|
datValsExtremaL = []
|
|
for i in range(len(varsL)):
|
|
datValsL.append([])
|
|
datValsExtremaL.append([])
|
|
|
|
i = 1
|
|
while 0==0:
|
|
try:
|
|
datFile = "run-" + str(i) + ".tsd"
|
|
datL,numPoints = parseDatFile(datFile,varsL)
|
|
for j in range(len(varsL)):
|
|
for k in range(len(datL)):
|
|
datValsL[j].append(datL[k][j])
|
|
for j in range(1,len(varsL)):
|
|
datValsExtremaL[j] = (min(datValsL[j]),max(datValsL[j]))
|
|
except:
|
|
break
|
|
i += 1
|
|
return datValsL, datValsExtremaL
|
|
|
|
##############################################################################
|
|
# Explore a potential DVMC run. If the run is valid (currently this
|
|
# means long enough) then return the run. Else return None.
|
|
##############################################################################
|
|
def exploreRun(datL,i,j,tParam):
|
|
run = DMVCpart()
|
|
run.startPoint = i
|
|
run.varInd = j
|
|
run.valueL.append(datL[i][j])
|
|
while i+1 < len(datL) and tParam.epsilonEquiv(datL[run.startPoint][j],datL[i+1][j]):
|
|
run.valueL.append(datL[i+1][j])
|
|
i = i+1
|
|
#print "i:"+str(i)+" j:"+str(j)
|
|
run.endPoint = i
|
|
if not tParam.absoluteTime:
|
|
if ((run.endPoint-run.startPoint)+1) < tParam.length:
|
|
#print "Run is too short from "+str(run.startPoint)+" to "+str(run.endPoint)+" ["+str((run.endPoint-run.startPoint)+1)+"]"
|
|
return None, i
|
|
else:
|
|
#print "Found a run from "+str(run.startPoint)+" to "+str(run.endPoint)+"["+str((run.endPoint-run.startPoint)+1)+"]"
|
|
return run, i
|
|
else:
|
|
if run.calcDelay(datL) < tParam.time:
|
|
#print "Run is too short from "+str(run.startPoint)+" to "+str(run.endPoint)+" ["+str(run.calcDelay(datL))+"]"
|
|
return None, i
|
|
else:
|
|
#print "Found a run from "+str(run.startPoint)+" to "+str(run.endPoint)+" ["+str(run.calcDelay(datL))+"]"
|
|
return run, i
|
|
|
|
##############################################################################
|
|
# Determine which variables should be considered multi-valued
|
|
# continuous variables. Marks varsL[i].dmvc for DMVC variables and
|
|
# returns a list of valid DMVC runs varsL long. Empty lists exist for
|
|
# non-DMVC places and lists of valid runs are present for DMVC
|
|
# variables.
|
|
##############################################################################
|
|
def findDMVC(datL,varsL,tParam):
|
|
tempRun = None
|
|
prevRun = None
|
|
runL = []
|
|
for j in range(len(varsL)):
|
|
runL.append([])
|
|
if varsL[j].dmvc != False:
|
|
#print "Examining variable["+str(j)+"]: "+varsL[j].name
|
|
mark = 0
|
|
for i in range(len(datL)-1):
|
|
if i < mark:
|
|
continue
|
|
if tParam.epsilonEquiv(datL[i][j],datL[i+1][j]):
|
|
#print "Exploring from:"+str(i)
|
|
tempRun,mark = exploreRun(datL,i,j,tParam)
|
|
#print "Returning at:"+str(mark)
|
|
if tempRun != None:
|
|
if len(runL[j]) > 1:
|
|
prevRun.nextRun = tempRun
|
|
prevRun = tempRun
|
|
runL[j].append(tempRun)
|
|
#determine if a high enough percentage of the run is constant
|
|
if not tParam.absoluteTime:
|
|
numPoints = 0
|
|
for run in runL[j]:
|
|
#print "run:"+str(run)
|
|
#print "runDelay:"+str(run.calcDelay(datL))
|
|
numPoints += (run.endPoint-run.startPoint) + 1
|
|
if (numPoints/float(len(datL))) < tParam.percent:
|
|
#print "Clearing runs for "+varsL[j].name+" ["+str(numPoints/float(len(datL)))+"]"+str(numPoints)+"/"+str(len(datL))
|
|
runL[j] = [] #clear the runs if they don't meet the percentage requirement
|
|
else:
|
|
#print varsL[j].name+" is a DMVC. ["+str(numPoints/float(len(datL)))+"]"
|
|
varsL[j].dmvc = True
|
|
else:
|
|
absTime = 0.0
|
|
for run in runL[j]:
|
|
#print "run:"+str(run)
|
|
#print "runDelay:"+str(run.calcDelay(datL))
|
|
absTime += run.calcDelay(datL)
|
|
if (absTime/(datL[len(datL)-1][0]-datL[0][0])) < tParam.percent:
|
|
#print "Clearing runs for "+varsL[j].name+" ["+str(absTime/(datL[len(datL)-1][0]-datL[0][0]))+"]"+str(absTime)+"/"+str(datL[len(datL)-1][0]-datL[0][0])
|
|
runL[j] = []
|
|
else:
|
|
#print varsL[j].name+" is a DMVC. ["+str(absTime/(datL[len(datL)-1][0]-datL[0][0]))+"]"
|
|
varsL[j].dmvc = True
|
|
#return runL for processing during the graph building
|
|
return runL
|
|
|
|
##############################################################################
|
|
# Create an initial set of divisions based upon the number of bins and
|
|
# the extreme values for each variable. These initial bins are
|
|
# evenly spaced.
|
|
##############################################################################
|
|
def initDivisionsL(datValsExtremaL,varsL,divisionsL):
|
|
#divisionsL = []
|
|
#for i in range(len(varsL)):
|
|
# divisionsL.append([])
|
|
for i in range(1,len(varsL)):
|
|
print varsL[i]
|
|
#print "i:"+str(i)+" "+str(datValsExtremaL[i])
|
|
interval = float(abs(datValsExtremaL[i][1]-datValsExtremaL[i][0]) / (numThresholds+1))
|
|
#print "interval:"+str(interval)
|
|
for j in range(0,len(divisionsL[i])):
|
|
if (divisionsL[i][j] == "?"):
|
|
divisionsL[i][j] = datValsExtremaL[i][0]+(interval*j)
|
|
return divisionsL
|
|
|
|
##############################################################################
|
|
# Generate the bin encoding for each data point given the divisions.
|
|
##############################################################################
|
|
def genBins(datL,divisionsL):
|
|
#print "datL:"+str(datL)
|
|
#print "divisionsL:"+str(divisionsL)
|
|
binsL = create2Darray(len(divisionsL),len(datL[0]),-1)
|
|
for i in range(1,len(divisionsL)):
|
|
for j in range(len(datL[0])):
|
|
for k in range(len(divisionsL[i])):
|
|
if (datL[i][j] <= divisionsL[i][k]):
|
|
binsL[i][j] = k
|
|
break
|
|
else:
|
|
#handles the case when the datum is in the highest bin
|
|
#i.e. for 2 boundary numbers 3 bins are required
|
|
#print "binsL["+str(i)+"]["+str(j)+"] = "+str(k+1)
|
|
binsL[i][j] = k+1
|
|
#print "binsL:"+str(binsL)
|
|
return binsL
|
|
|
|
##############################################################################
|
|
# Determine if two bins are equivalent and return a Boolean.
|
|
##############################################################################
|
|
def equalBins(a,b,binsL,divisionsL):
|
|
for i in range(1,len(divisionsL)):
|
|
if binsL[i][a] != binsL[i][b]:
|
|
return False
|
|
return True
|
|
|
|
##############################################################################
|
|
# Generate the rates for each data point given the bin encodings.
|
|
##############################################################################
|
|
def genRates(divisionsL,datL,binsL):
|
|
#Function notes: Rates can be calculated based on transitions or places. If rates are calculated based on places they are calculated based on the change in the bin for the entire line. If rates are calculated based on transitions they are calculated based on the change in the bin for each variable. These two methods have different results and it appears that place based rates are more stable. To help "smooth" out the rates there are several ways to modify the rate calcualation. One way is to change the rateSampling variable. This variable determines how long the bin must remain constant before a rate is calculated for that bin. It can be a numerical value or "inf." The "inf" setting only calculates the rate once per bin change. You can invalidate bin changes of short length using the pathLength variable. Any run of consecutive bins shorter than pathLength will not have its rate calculated. The rate is also only calculated if the time values differ for the two points as I have seen examples where this is a problem.
|
|
ratesL = create2Darray(len(divisionsL),len(datL[0]),'-')
|
|
if placeRates:
|
|
#Place based rate calculation
|
|
if rateSampling == "inf":
|
|
mark = 0
|
|
for i in range(len(datL[0])):
|
|
if i < mark:
|
|
continue
|
|
while mark < len(datL[0]) and equalBins(i,mark,binsL,divisionsL):
|
|
mark += 1
|
|
if datL[0][mark-1] != datL[0][i] and (mark-i) >= pathLength:
|
|
for j in range(1,len(divisionsL)):
|
|
ratesL[j][i] = (datL[j][mark-1]-datL[j][i])/(datL[0][mark-1]-datL[0][i])
|
|
else:
|
|
for i in range(len(datL[0])-rateSampling):
|
|
calcRate = True
|
|
for k in range(rateSampling):
|
|
if not equalBins(i,i+k,binsL,divisionsL):
|
|
calcRate = False
|
|
break
|
|
if calcRate and datL[0][i+rateSampling] != datL[0][i]:
|
|
for j in range(1,len(divisionsL)):
|
|
ratesL[j][i] = (datL[j][i+rateSampling]-datL[j][i])/(datL[0][i+rateSampling]-datL[0][i])
|
|
else:
|
|
cStr = cText.cSetFg(cText.YELLOW)
|
|
cStr += "WARNING:"
|
|
cStr += cText.cSetAttr(cText.NONE)
|
|
print cStr+"this feature has not been tested."
|
|
#Transition based rate calculation
|
|
if rateSampling == "inf":
|
|
for j in range(1,len(divisionsL)):
|
|
mark = 0
|
|
for i in range(len(datL[0])):
|
|
if i < mark:
|
|
continue
|
|
while mark < len(datL[0]) and equalBins(i,mark,binsL,divisionsL):
|
|
mark = mark + 1
|
|
if datL[0][mark-1] != datL[0][i]:
|
|
ratesL[j][i] = (datL[j][mark-1]-datL[j][i])/(datL[0][mark-1]-datL[0][i])
|
|
else:
|
|
for i in range(len(datL[0])-rateSampling):
|
|
for j in range(1,len(divisionsL)):
|
|
calcRate = True
|
|
for k in range(rateSampling):
|
|
if not equalBins(i,i+k,binsL,divisionsL):
|
|
calcRate = False
|
|
break
|
|
if calcRate and datL[0][i+rateSampling] != datL[0][i]:
|
|
ratesL[j][i] = (datL[j][i+rateSampling]-datL[j][i])/(datL[0][i+rateSampling]-datL[0][i])
|
|
return ratesL
|
|
|
|
##############################################################################
|
|
# Return the minimum rate for a given rate list.
|
|
##############################################################################
|
|
def minRate(ratesL):
|
|
#Remove the characters from the list before doing the comparison
|
|
cmpL = []
|
|
for i in range(len(ratesL)):
|
|
if ratesL[i] != '-':
|
|
cmpL.append(ratesL[i])
|
|
if len(cmpL) > 0:
|
|
return min(cmpL)
|
|
else:
|
|
return "-"
|
|
|
|
##############################################################################
|
|
# Return the maximum rate for a given rate list.
|
|
##############################################################################
|
|
def maxRate(ratesL):
|
|
#Remove the characters from the list before doing the comparison
|
|
cmpL = []
|
|
for i in range(len(ratesL)):
|
|
if ratesL[i] != '-':
|
|
cmpL.append(ratesL[i])
|
|
if len(cmpL) > 0:
|
|
return max(cmpL)
|
|
else:
|
|
return "-"
|
|
|
|
##############################################################################
|
|
# Give a score for the even distribution of points for all
|
|
# variables. 0 is the optimal score.
|
|
##############################################################################
|
|
def pointDistCost(datValsL,divisionsL,resL=[],updateVar=-1):
|
|
total = 0
|
|
if updateVar == 0:
|
|
for i in range(len(divisionsL)):
|
|
resL.append(0)
|
|
#Fill up resL
|
|
for i in range(1,len(divisionsL)):
|
|
points = pointDistCostVar(datValsL[i],divisionsL[i])
|
|
total += points
|
|
resL[i] = points
|
|
elif updateVar > 0:
|
|
#Incrementally calculate a total change
|
|
resL[updateVar] = pointDistCostVar(datValsL[updateVar],
|
|
divisionsL[updateVar])
|
|
for i in resL:
|
|
total += i
|
|
else:
|
|
#Do a full calculation from scratch
|
|
for i in range(1,len(divisionsL)):
|
|
total += pointDistCostVar(datValsL[i],divisionsL[i])
|
|
return total
|
|
|
|
##############################################################################
|
|
# Give a score for the even distribution of points for an individual
|
|
# variable. 0 is the optimal score.
|
|
##############################################################################
|
|
def pointDistCostVar(datValsL,divisionsL):
|
|
optPointsPerBin = len(datValsL)/(len(divisionsL)+1)
|
|
#print "optPointsPerBin:"+str(optPointsPerBin)
|
|
pointsPerBinL = []
|
|
for i in range(len(divisionsL)+1):
|
|
pointsPerBinL.append(0)
|
|
for i in range(len(datValsL)):
|
|
top = True
|
|
for j in range(len(divisionsL)):
|
|
if datValsL[i] <= divisionsL[j]:
|
|
pointsPerBinL[j] += 1
|
|
top = False
|
|
break
|
|
if top:
|
|
pointsPerBinL[len(divisionsL)] += 1
|
|
|
|
#print "pointsPerBinL:"+str(pointsPerBinL)
|
|
score = 0
|
|
for points in pointsPerBinL:
|
|
score += abs(points - optPointsPerBin)
|
|
return score
|
|
|
|
##############################################################################
|
|
# Give a score for the range of rates for all variables. 0 is the
|
|
# optimal score.
|
|
##############################################################################
|
|
def rateRangeCost(datValsL,divisionsL,resL=[],updateVar=-1):
|
|
total = 0
|
|
binsL = genBins(datValsL,divisionsL)
|
|
ratesL = genRates(divisionsL,datValsL,binsL)
|
|
#print "ratesL:"+str(ratesL)
|
|
for i in range(1,len(divisionsL)):
|
|
maxR = maxRate(ratesL[i])
|
|
minR = minRate(ratesL[i])
|
|
total += abs(maxR-minR)
|
|
return total
|
|
|
|
##############################################################################
|
|
# Look for the optimal thresholds using a greedy algorithm.
|
|
##############################################################################
|
|
def greedyOpt(divisionsL,datValsL,datValsExtremaL,initDivL):
|
|
resL = [] #Used to keep partial results for cost functions
|
|
updateVar = 0 #The variable that was updated to help optimize cost function recalculation
|
|
bestDivisionsL = copy.deepcopy(divisionsL)
|
|
bestCost = costFunc(datValsL,divisionsL,resL,updateVar)
|
|
numMoves = 0
|
|
print "Starting optimization..."
|
|
while numMoves < iterations:
|
|
for i in range(1,len(divisionsL)):
|
|
for j in range(len(divisionsL[i])):
|
|
if (initDivL[i][j] != "?"):
|
|
#move left
|
|
if (j == 0):
|
|
if divisionsL[i][j] != "?":
|
|
distance = abs(divisionsL[i][j] - datValsExtremaL[i][0])/2
|
|
else:
|
|
distance = abs(divisionsL[i][j] - divisionsL[i][j-1])/2
|
|
else:
|
|
distance = abs(divisionsL[i][j] - divisionsL[i][j-1])/2
|
|
newDivisionsL = copy.deepcopy(divisionsL)
|
|
newDivisionsL[i][j] -= distance
|
|
newCost = costFunc(datValsL,newDivisionsL,resL,i)
|
|
numMoves += 1
|
|
if numMoves % 500 == 0:
|
|
print str(numMoves)+"/"+str(iterations)
|
|
if newCost < bestCost:
|
|
bestCost = newCost
|
|
divisionsL = newDivisionsL
|
|
else:
|
|
#move right
|
|
if j == len(divisionsL[i])-1:
|
|
distance = abs(datValsExtremaL[i][1] - divisionsL[i][j])/2
|
|
else:
|
|
distance = abs(divisionsL[i][j+1] - divisionsL[i][j])/2
|
|
newDivisionsL = copy.deepcopy(divisionsL)
|
|
newDivisionsL[i][j] += distance
|
|
newCost = costFunc(datValsL,newDivisionsL,resL,i)
|
|
numMoves += 1
|
|
if numMoves % 500 == 0:
|
|
print str(numMoves)+"/"+str(iterations)
|
|
if newCost < bestCost:
|
|
bestCost = newCost
|
|
divisionsL = newDivisionsL
|
|
if numMoves > iterations:
|
|
return divisionsL
|
|
return divisionsL
|
|
|
|
##############################################################################
|
|
# Look for the optimal thresholds using a greedy algorithm for the
|
|
##############################################################################
|
|
def writeBinsFile(varsL,divisionsL,binsFile):
|
|
outputF = open(binsFile, 'w')
|
|
flag = False
|
|
for i in range(len(varsL)):
|
|
if (varsL[i].dmvc == True):
|
|
if (flag == False):
|
|
outputF.write(".dmvc ")
|
|
flag = True
|
|
outputF.write(varsL[i].name + " ")
|
|
if (flag == True):
|
|
outputF.write("\n")
|
|
for i in range(1,len(varsL)):
|
|
if len(divisionsL[i]) > 0:
|
|
outputF.write(varsL[i].name)
|
|
for div in divisionsL[i]:
|
|
outputF.write(" "+str(div))
|
|
outputF.write("\n")
|
|
outputF.close()
|
|
|
|
##############################################################################
|
|
##############################################################################
|
|
def main():
|
|
global numThresholds
|
|
global iterations
|
|
global optFunc
|
|
global costFunc
|
|
|
|
usage = "usage: %prog [options] datFile1 ... datFileN"
|
|
parser = OptionParser(usage=usage)
|
|
parser.set_defaults(binsFile=None,numThresholds=None,costF="p",optF="g")
|
|
parser.add_option("-b", "--bins", action="store", dest="binsFile", help="The name of the .bins file to be created. If this is not provided the basename of the first input data file is used.")
|
|
parser.add_option("-t", "--thresholds", action="store", dest="numThresholds", help="The number of thresholds to create during autogeneration.")
|
|
parser.add_option("-i", "--iterations", action="store", dest="iterations", help="The number of iterations of the optimization algorithm to run.")
|
|
parser.add_option("-c", "--cost", action="store", dest="costF", help="The cost function to use: r - Minimize the distance between rates; p - Average the number of points in each bin.")
|
|
parser.add_option("-o", "--optimization", action="store", dest="optF", help="The optimization function to use: g - Greedy algorithm.")
|
|
|
|
(options, args) = parser.parse_args()
|
|
|
|
#if len(args) > 0:
|
|
# datFileL = args
|
|
#else:
|
|
# print "At least one data file is required."
|
|
# parser.print_help()
|
|
# sys.exit()
|
|
|
|
if not options.binsFile:
|
|
baseFileL = os.path.splitext(datFileL[0])
|
|
binsFile = baseFileL[0]+".bins"
|
|
else:
|
|
binsFile = options.binsFile
|
|
|
|
if options.numThresholds:
|
|
numThresholds = int(options.numThresholds)
|
|
|
|
if options.iterations:
|
|
iterations = int(options.iterations)
|
|
|
|
if options.optF == "g":
|
|
optFunc = greedyOpt
|
|
else:
|
|
cStr = cText.cSetFg(cText.RED)
|
|
cStr += "ERROR:"
|
|
cStr += cText.cSetAttr(cText.NONE)
|
|
print cStr + options.optFunc + " is not a valid option for the optimization function."
|
|
parser.print_help()
|
|
sys.exit()
|
|
|
|
if options.costF == "r":
|
|
costFunc = rateRangeCost
|
|
elif options.costF == "p":
|
|
costFunc = pointDistCost
|
|
else:
|
|
cStr = cText.cSetFg(cText.RED)
|
|
cStr += "ERROR:"
|
|
cStr += cText.cSetAttr(cText.NONE)
|
|
print cStr + options.costFunc + " is not a valid option for the cost function."
|
|
parser.print_help()
|
|
sys.exit()
|
|
|
|
varsL = extractVars("run-1.tsd")
|
|
datValsL, datValsExtremaL = reorderDatL(varsL)
|
|
if os.path.isfile(options.binsFile):
|
|
initDivL, tParam = parseBinsFile(options.binsFile,varsL)
|
|
dmvcRunL = findDMVC(datValsL,varsL,tParam)
|
|
divisionsL = initDivisionsL(datValsExtremaL,varsL,initDivL)
|
|
print "Iterations: "+str(iterations)
|
|
print "Optimization function: "+optFunc.func_name
|
|
print "Cost function: "+costFunc.func_name
|
|
print "Initial divisionsL:"+str(divisionsL)
|
|
print "Initial score:"+str(costFunc(datValsL,divisionsL))
|
|
divisionsL = optFunc(divisionsL,datValsL,datValsExtremaL,initDivL)
|
|
print "Final divisionsL:"+str(divisionsL)
|
|
print "Final score:"+str(costFunc(datValsL,divisionsL))
|
|
writeBinsFile(varsL,divisionsL,binsFile)
|
|
|
|
##############################################################################
|
|
##############################################################################
|
|
|
|
###########
|
|
# Globals #
|
|
###########
|
|
numThresholds = 2 #the default number of thresholds to create...it can be overridden from the command line
|
|
iterations = 10000 #the default number of iterations
|
|
rateSampling = "inf" #How many points should exist between the sampling of different rates..."inf" samples once/threshold
|
|
pathLength = 10 #For "inf" rate sampling the number of time points that a "run" must persist for the rate to be calculated. This is just another parameter to help with the data smoothing.
|
|
placeRates = True #When true the script calculates rates based on places. When false it calculates rates based on transitions although there is very little infrastructure for transition based rates and it isn't well tested.
|
|
optFunc = None #The name of the optimization function that will be used.
|
|
costFunc = None #The name of the cost function that will be used in the optimization function.
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|