iBioSim/bin/autogenT.py

#!/usr/bin/python

##############################################################################
##   Copyright (c) 2007 by Scott R. Little
##   University of Utah
##
##   Permission to use, copy, modify and/or distribute, but not sell, this
##   software and its documentation for any purpose is hereby granted
##   without fee, subject to the following terms and conditions:
##
##   1.  The above copyright notice and this permission notice must
##   appear in all copies of the software and related documentation.
##
##   2.  The name of University of Utah may not be used in advertising or
##   publicity pertaining to distribution of the software without the
##   specific, prior written permission of University of Utah.
##
##   3.  THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
##   EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
##   WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
##
##   IN NO EVENT SHALL UNIVERSITY OF UTAH OR THE AUTHORS OF THIS SOFTWARE BE
##   LIABLE FOR ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES
##   OF ANY KIND, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
##   OR PROFITS, WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON
##   ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE
##   OR PERFORMANCE OF THIS SOFTWARE.
##
##############################################################################

### TODO ###
#Provide a method to specify different numbers of thresholds for different variables?
#Use properties to aid in the threshold generation?
#Update genBins to use the partial results list
#Add a brief comment regarding how to add optimization and cost functions

import re, os.path, cText, copy, sys
from optparse import OptionParser

#Regular expressions
lQuoteR = re.compile("\"+")
tQuoteR = re.compile("\"+")
numVarsR = re.compile("Variables: ")
numPointsR = re.compile("Points: ")
spaceR = re.compile("\s+")
lSpaceR = re.compile("^\s+")
tSpaceR = re.compile("\s+$")
lineSpaceR = re.compile("^\s+$")
newLR = re.compile("\n+")
lDotR = re.compile("^\.\w")
epsilonR = re.compile(".epsilon")
lengthR = re.compile(".length")
timeR = re.compile(".time")
absoluteTimeR = re.compile(".absoluteTime")
percentR = re.compile(".percent")
inputR = re.compile(".inputs")
outputR = re.compile(".outputs")
dmvcR = re.compile(".dmvc")
rateSamplingR = re.compile(".rateSampling")
pathLengthR = re.compile(".pathLength")
vaRateUpdateIntervalR = re.compile(".vaRateUpdateInterval")
minDelayValR = re.compile(".minDelayVal")
minRateValR = re.compile(".minRateVal")
minDivisionValR = re.compile(".minDivisionVal")
decPercentR = re.compile(".decPercent")
minVarValR = re.compile(".minVarVal")
maxVarValR = re.compile(".maxVarVal")
falseR = re.compile("false",re.I) #pass the I flag to be case insensitive
trueR = re.compile("true",re.I) #pass the I flag to be case insensitive
binCommentR = re.compile("^\#")
lParenR = re.compile("\\(+")
rParenR = re.compile("\\)+")
rowsR = re.compile("\\(.*?\\)")

##############################################################################
# A class to hold the lists of places and transitions in the graph.
##############################################################################
class Variable:
	"A continuous variable in the system being modeled."
	def __init__(self,nameStr):
		self.name = nameStr #the name of the variable
		self.dmvc = None #Boolean denoting the status of the variable as a discrete multi-valued continuous (DMVC) variable
		self.input = None #Boolean denoting that the variable is a model input
		self.output = None #Boolean denoting that the variable is a model output
		self.type = None #Describes the type of the variable using an enumerated type (VOLTAGE, CURRENT) which is needed by Verilog-A.
	def __str__(self):
		retStr = self.name+"["
		if self.dmvc:
			retStr = retStr + "1]"
		else:
			retStr = retStr + "0]"
		return retStr
## End Class Variable ########################################################

##############################################################################
# A class to hold the parameters specified in the threshold (.bins) file.
##############################################################################
class ThresholdParameters:
	"The parameters possibly specified in the thresholds (.bins) file."
	def __init__(self,numVars):
		#Default values
		self.epsilon = 0.1 #What is the +/- epsilon where signals are considered to be equivalent
		self.length = 15 #the number of time points that a value must persist to be considered constant
		self.time = 5e-6 #the amount of time that must pass to be considered constant when using absoluteTime
		self.absoluteTime = False #when False time points are used to determine DMVC and when true absolutime time is used to determine DMVC
		self.percent = 0.8 #a decimal value representing the percent of the total trace that must be constant to qualify to become a DMVC var
		self.numValuesL = [] #the number of constant values for each variable...-1 indicates that the variable isn't considered a DMVC variable
		self.vaRateUpdateInterval = 1e-6 #how often the rate is added to the continuous variable in the Verilog-A model output
		for i in range(numVars):
			self.numValuesL.append(-1)
	def __str__(self):
		retStr = "epsilon:"+str(self.epsilon)+" length:"+str(self.length)+" numValuesL:"+str(self.numValuesL)
		return retStr
	############################################################################
	# Determine if two values are equal within the given epsilon.
	############################################################################
	def epsilonEquiv(self,v1,v2):
		if abs(v1-v2) <= self.epsilon:
			return True
		else:
			return False
## End Class ThresholdParameters #############################################

##############################################################################
# A class for data required for the discovery of discrete multi-valued
# continuous variables.
##############################################################################
class DMVCpart:
	"Information about a single run of a constant value."
	numDMVCparts = 0
	def __init__(self):
		self.id = DMVCpart.numDMVCparts #unique numeric ID for each run
		DMVCpart.numDMVCparts = DMVCpart.numDMVCparts + 1
		self.varInd = -1 #an index into the varsL array denoting which variable owns the run
		self.valueL = [] #the list of values for this run
		self.startPoint = -1 #an index into datL for the start point of the run
		self.endPoint = -1 #an index into datL for the end point of the run
		self.nextRun = None #a reference to the next sequential run
	def __str__(self):
		retStr = "Part:"+str(self.id)+" Start:"+str(self.startPoint)+" End:"+str(self.endPoint)+" Val:"+str(self.constVal())
		if self.nextRun:
			retStr += " Next:"+str(self.nextRun.id)
		else:
			retStr += " Next:None"
		return retStr
	############################################################################
	# Calculate the constant value from the value list.  Currently it is an
	# average of all list values.
	############################################################################
	def constVal(self):
		total = 0
		for i in self.valueL:
			total = total + i
		return total/float(len(self.valueL))
  ############################################################################
	# Calculate the delay for a given DMVC run.
  ############################################################################
	def calcDelay(self,datL):
		ind1 = self.startPoint
		ind2 = self.endPoint
		delay = datL[ind2][0]-datL[ind1][0]
	  #Assuming that there is some time between runs we want to account for that time.  If we assume a constant rate of change we can just split the difference
		if self.nextRun:
			ind3 = self.nextRun.startPoint
			delay += ((datL[ind3][0]-datL[ind2][0])/2)
		return delay
## End Class DMVCpart ########################################################

##############################################################################
# Remove leading & trailing space as well as trailing new line characters.
##############################################################################
def cleanLine(line):
	lineNS = re.sub(lSpaceR,"",line)
	lineNL = re.sub(newLR,"",lineNS)
	lineTS = re.sub(tSpaceR,"",lineNL)
	return lineTS

##############################################################################
# Remove leading & trailing space as well as trailing new line characters.
##############################################################################
def cleanName(name):
	nameNL = re.sub(lQuoteR,"",name)
	nameTS = re.sub(tQuoteR,"",nameNL)
	return nameTS

##############################################################################
# Creates a 2 dimensional array of lists rows x cols with each value
# initialized to initVal.
##############################################################################
def create2Darray(rows,cols,initVal):
	newL = []
	for i in range(rows):
		initL = []
		for j in range(cols):
			initL.append(initVal)
		newL.append(initL)
	return newL

##############################################################################
# Create the list of variables.  All data files must have the same variables
# in the same order.
##############################################################################
def extractVars(datFile):
	varsL = []
	line = ""
	inputF = open(datFile, 'r')
	rowsL = inputF.read()
	rowsM = rowsR.match(rowsL)
	row = rowsM.group()
	varNames = cleanRow(row)
	varNamesL = varNames.split(",")
	for varStr in varNamesL:
		varStr = cleanName(varStr)
		varsL.append(Variable(varStr))
	varsL[0].dmvc = False
	inputF.close()
	return varsL

##############################################################################
# Parse a .dat file ensuring that the varsL matches the global list.
##############################################################################
def parseDatFile(datFile,varsL):
	inputF = open(datFile, 'r')
	linesL = inputF.read()
	rowsL = rowsR.findall(linesL)
	for i in range(len(rowsL)):
		rowsL[i] = cleanRow(rowsL[i])
	numPoints = -1
	varNames = cleanRow(rowsL[0])
	varNamesL = []
	varNamesL = varNames.split(",")
	for i in range(len(varNamesL)):
		varNamesL[i] = cleanName(varNamesL[i])
	numPoints = len(varNamesL)
	if len(varNamesL) == len(varsL):
		for i in range(len(varNamesL)):
			if varNamesL[i] != varsL[i].name:
				cStr = cText.cSetFg(cText.RED)
				cStr += "ERROR:"
				cStr += cText.cSetAttr(cText.NONE)
				print cStr+" Expected "+varsL[i].name+" in position "+str(i)+" but received "+varNamesL[i]+" in file: "+datFile
				sys.exit()
	else:
		cStr = cText.cSetFg(cText.RED)
		cStr += "ERROR:"
		cStr += cText.cSetAttr(cText.NONE)
		print cStr + " Expected "+str(len(varsL))+" variables but received "+str(len(varNamesL))+" in file: "+datFile
		sys.exit()

	datL = []
	for i in range(1,len(rowsL)):
		valStrL = cleanRow(rowsL[i]).split(",")
		valL = []
		for s in valStrL:
			valL.append(float(s))
		datL.append(valL)
	inputF.close()
	return datL, numPoints

##############################################################################
# Parse the .bins (thresholds) file.
##############################################################################
def parseBinsFile(binsFile,varsL):
	global pathLength
	global rateSampling
	global minDelayVal
	global minRateVal
	global minDivisionVal
	global decPercent
	global minVarValL
	global maxVarValL
	global limitExists

	minVarValL = []
	maxVarValL = []
	limitExists = False

	for i in range(len(varsL)):
		minVarValL.append(None)
		maxVarValL.append(None)

	if not os.path.isfile(binsFile):
		cStr = cText.cSetFg(cText.RED)
		cStr += "ERROR:"
		cStr += cText.cSetAttr(cText.NONE)
		print cStr+" the .bins file, "+binsFile+" was not found."
		sys.exit()
	inputF = open(binsFile, 'r')
	linesL = inputF.readlines()
	tParam = ThresholdParameters(len(varsL))
	divisionsStrL = []
	numDivisions = 0
	for i in range(1,len(varsL)):
		divisionsStrL.append([])
	for i in range(len(linesL)):
		#Allow blank lines and comments
		if lineSpaceR.match(linesL[i]) or binCommentR.match(linesL[i]):
			continue
		if lDotR.match(linesL[i]):
			if epsilonR.match(linesL[i]):
				epsilonL = spaceR.split(linesL[i])
				tParam.epsilon = abs(float(epsilonL[1]))
			elif lengthR.match(linesL[i]):
				lengthL = spaceR.split(linesL[i])
				tParam.length = float(lengthL[1])
			elif timeR.match(linesL[i]):
				timeL = spaceR.split(linesL[i])
				tParam.time = float(timeL[1])
			elif vaRateUpdateIntervalR.match(linesL[i]):
				vaRateUpdateIntervalL = vaRateUpdateIntervalR.split(linesL[i])
				tParam.vaRateUpdateInterval = float(vaRateUpdateIntervalL[1])
			elif absoluteTimeR.match(linesL[i]):
				absoluteTimeL = spaceR.split(linesL[i])
				if trueR.match(absoluteTimeL[1]):
					tParam.absoluteTime = True
				elif falseR.match(absoluteTimeL[1]):
					tParam.absoluteTime = False
				else:
					cStr = cText.cSetFg(cText.RED)
					cStr += "ERROR:"
					cStr += cText.cSetAttr(cText.NONE)
					print cStr+" Attempted to set .absoluteTime with"+absoluteTimeL[i]+" which is unrecognized.  It was not set.  Please use True or False."
					sys.exit()
			elif percentR.match(linesL[i]):
				percentL = spaceR.split(linesL[i])
				tParam.percent = float(percentL[1])
			elif inputR.match(linesL[i]):
				cLine = cleanLine(linesL[i])
				inputL = spaceR.split(cLine)
				for i in range(1,len(inputL)):
					found = False
					for j in range(1,len(varsL)):
						if inputL[i] == varsL[j].name:
							#print varsL[j].name+" is an input."
							varsL[j].input = True
							found = True
							break
					if not found:
						cStr = cText.cSetFg(cText.RED)
						cStr += "ERROR:"
						cStr += cText.cSetAttr(cText.NONE)
						print cStr+" "+inputL[i]+" was specified as an input in the .bins file, but wasn't found in the variable list."
						sys.exit()
			elif outputR.match(linesL[i]):
				cLine = cleanLine(linesL[i])
				outputL = spaceR.split(cLine)
				for i in range(1,len(outputL)):
					found = False
					for j in range(1,len(varsL)):
						if outputL[i] == varsL[j].name:
							#print varsL[j].name+" is an output."
							varsL[j].output = True
							found = True
							break
					if not found:
						cStr = cText.cSetFg(cText.RED)
						cStr += "ERROR:"
						cStr += cText.cSetAttr(cText.NONE)
						print cStr+" "+outputL[i]+" was specified as an output in the .bins file, but wasn't found in the variable list."
						sys.exit()
			elif dmvcR.match(linesL[i]):
				cLine = cleanLine(linesL[i])
				dmvcL = spaceR.split(cLine)
				outputL = spaceR.split(cLine)
				for i in range(1,len(dmvcL)):
					found = False
					for j in range(1,len(varsL)):
						if outputL[i] == varsL[j].name:
							#print varsL[j].name+" is dmvc."
							varsL[j].dmvc = True
							found = True
							break
					if not found:
						cStr = cText.cSetFg(cText.RED)
						cStr += "ERROR:"
						cStr += cText.cSetAttr(cText.NONE)
						print cStr+" "+outputL[i]+" was specified as dmvc in the .bins file, but wasn't found in the variable list."
						sys.exit()
			elif rateSamplingR.match(linesL[i]):
				rateSamplingL = spaceR.split(linesL[i])
				rateSampling = int(rateSamplingL[1])
			elif pathLengthR.match(linesL[i]):
				pathLengthL = spaceR.split(linesL[i])
				pathLength = int(pathLengthL[1])
			elif minDelayValR.match(linesL[i]):
				minDelayValL = spaceR.split(linesL[i])
				minDelayVal = int(minDelayValL[1])
			elif minRateValR.match(linesL[i]):
				minRateValL = spaceR.split(linesL[i])
				minRateVal = int(minDelayValL[1])
			elif minDivisionValR.match(linesL[i]):
				minDivisionValL = spaceR.split(linesL[i])
				minDivisionVal = int(minDelayValL[1])
			elif decPercentR.match(linesL[i]):
				decPercentL = spaceR.split(linesL[i])
				decPercent = int(decPercentL[1])
			elif minVarValR.match(linesL[i]):
				limitExists = True
				cLine = cleanLine(linesL[i])
				inputL = spaceR.split(cLine)
				found = False
				for i in range(1,len(varsL)):
					if inputL[2] == varsL[i].name:
						minVarValL[i] = inputL[1]
						found = True
						break
				if not found:
					cStr = cText.cSetFg(cText.RED)
					cStr += "ERROR:"
					cStr += cText.cSetAttr(cText.NONE)
					print cStr+" "+inputL[2]+" was specified in a target for .minVarVal in the .bins file, but wasn't found in the variable list."
					sys.exit()
			elif maxVarValR.match(linesL[i]):
				limitExists = True
				cLine = cleanLine(linesL[i])
				inputL = spaceR.split(cLine)
				found = False
				for i in range(1,len(varsL)):
					if inputL[2] == varsL[i].name:
						maxVarValL[i] = inputL[1]
						found = True
						break
				if not found:
					cStr = cText.cSetFg(cText.RED)
					cStr += "ERROR:"
					cStr += cText.cSetAttr(cText.NONE)
					print cStr+" "+inputL[2]+" was specified in a target for .maxVarVal in the .bins file, but wasn't found in the variable list."
					sys.exit()
			else:
				cStr = cText.cSetFg(cText.RED)
				cStr += "ERROR:"
				cStr += cText.cSetAttr(cText.NONE)
				print cStr+" Unparseable dot option in the thresholds file: "+linesL[i]
				sys.exit()
		else:
			numDivisions += 1
			cLineL = cleanLine(linesL[i]).split(" ")
			found = False
			for j in range(1,len(varsL)):
				if cLineL[0] == varsL[j].name:
					divisionsStrL[j-1] = cLineL[1:]
					found = True
					break
			if not found:
				#cStr = cText.cSetFg(cText.RED)
				cStr = "ERROR:"
				#cStr += cText.cSetAttr(cText.NONE)
				print cStr+" Variable not included in the data file."
				print "Line: "+linesL[i]
				sys.exit()
	divisionsL = [[]]
	for sL in divisionsStrL:
		fL = []
		for s in sL:
			if (s.find("?") == -1):
				fL.append(float(s))
			else:
				fL.append(s)
		divisionsL.append(fL)
		#print len(fL)
	inputF.close()
	if numDivisions != len(varsL)-1:
		cStr = cText.cSetFg(cText.RED)
		cStr += "WARNING:"
		cStr += cText.cSetAttr(cText.NONE)
		print cStr+" There is not a threshold for every variable in the dat file."
		#sys.exit()
	#print "divisionsL:"+str(divisionsL)
	return divisionsL, tParam

##############################################################################
# Remove leading and trailing parantheses
##############################################################################
def cleanRow(row):
    rowNS = re.sub(lParenR,"",row)
    rowTS = re.sub(rParenR,"",rowNS)
    return rowTS

##############################################################################
# Reorder the datL so each row is a list of data values for the ith
# variable.  Also build a list of the extreme values for each
# variable.
##############################################################################
def reorderDatL(varsL):
	datValsL = []
	datValsExtremaL = []
	for i in range(len(varsL)):
		datValsL.append([])
		datValsExtremaL.append([])

	i = 1
	while 0==0:
		try:
			datFile = "run-" + str(i) + ".tsd"
			datL,numPoints = parseDatFile(datFile,varsL)
			for j in range(len(varsL)):
				for k in range(len(datL)):
					datValsL[j].append(datL[k][j])
			for j in range(1,len(varsL)):
				datValsExtremaL[j] = (min(datValsL[j]),max(datValsL[j]))
		except:
			break
		i += 1
	return datValsL, datValsExtremaL

##############################################################################
# Explore a potential DVMC run.  If the run is valid (currently this
# means long enough) then return the run.  Else return None.
##############################################################################
def exploreRun(datL,i,j,tParam):
	run = DMVCpart()
	run.startPoint = i
	run.varInd = j
	run.valueL.append(datL[i][j])
	while i+1 < len(datL) and tParam.epsilonEquiv(datL[run.startPoint][j],datL[i+1][j]):
		run.valueL.append(datL[i+1][j])
		i = i+1
		#print "i:"+str(i)+" j:"+str(j)
	run.endPoint = i
	if not tParam.absoluteTime:
		if ((run.endPoint-run.startPoint)+1) < tParam.length:
			#print "Run is too short from "+str(run.startPoint)+" to "+str(run.endPoint)+" ["+str((run.endPoint-run.startPoint)+1)+"]"
			return None, i
		else:
			#print "Found a run from "+str(run.startPoint)+" to "+str(run.endPoint)+"["+str((run.endPoint-run.startPoint)+1)+"]"
			return run, i
	else:
		if run.calcDelay(datL) < tParam.time:
			#print "Run is too short from "+str(run.startPoint)+" to "+str(run.endPoint)+" ["+str(run.calcDelay(datL))+"]"
			return None, i
		else:
			#print "Found a run from "+str(run.startPoint)+" to "+str(run.endPoint)+" ["+str(run.calcDelay(datL))+"]"
			return run, i

##############################################################################
# Determine which variables should be considered multi-valued
# continuous variables.  Marks varsL[i].dmvc for DMVC variables and
# returns a list of valid DMVC runs varsL long.  Empty lists exist for
# non-DMVC places and lists of valid runs are present for DMVC
# variables.
##############################################################################
def findDMVC(datL,varsL,tParam):
	tempRun = None
	prevRun = None
	runL = []
	for j in range(len(varsL)):
		runL.append([])
		if varsL[j].dmvc != False:
			#print "Examining variable["+str(j)+"]: "+varsL[j].name
			mark = 0
			for i in range(len(datL)-1):
				if i < mark:
					continue
				if tParam.epsilonEquiv(datL[i][j],datL[i+1][j]):
					#print "Exploring from:"+str(i)
					tempRun,mark = exploreRun(datL,i,j,tParam)
					#print "Returning at:"+str(mark)
					if tempRun != None:
						if len(runL[j]) > 1:
							prevRun.nextRun = tempRun
						prevRun = tempRun
						runL[j].append(tempRun)
			#determine if a high enough percentage of the run is constant
			if not tParam.absoluteTime:
				numPoints = 0
				for run in runL[j]:
					#print "run:"+str(run)
					#print "runDelay:"+str(run.calcDelay(datL))
					numPoints += (run.endPoint-run.startPoint) + 1
				if (numPoints/float(len(datL))) < tParam.percent:
					#print "Clearing runs for "+varsL[j].name+" ["+str(numPoints/float(len(datL)))+"]"+str(numPoints)+"/"+str(len(datL))
					runL[j] = [] #clear the runs if they don't meet the percentage requirement
				else:
					#print varsL[j].name+" is a DMVC. ["+str(numPoints/float(len(datL)))+"]"
					varsL[j].dmvc = True
			else:
				absTime = 0.0
				for run in runL[j]:
					#print "run:"+str(run)
					#print "runDelay:"+str(run.calcDelay(datL))
					absTime += run.calcDelay(datL)
				if (absTime/(datL[len(datL)-1][0]-datL[0][0])) < tParam.percent:
					#print "Clearing runs for "+varsL[j].name+" ["+str(absTime/(datL[len(datL)-1][0]-datL[0][0]))+"]"+str(absTime)+"/"+str(datL[len(datL)-1][0]-datL[0][0])
					runL[j] = []
				else:
					#print varsL[j].name+" is a DMVC. ["+str(absTime/(datL[len(datL)-1][0]-datL[0][0]))+"]"
					varsL[j].dmvc = True
	#return runL for processing during the graph building
	return runL

##############################################################################
# Create an initial set of divisions based upon the number of bins and
# the extreme values for each variable.  These initial bins are
# evenly spaced.
##############################################################################
def initDivisionsL(datValsExtremaL,varsL,divisionsL):
	#divisionsL = []
	#for i in range(len(varsL)):
	#	divisionsL.append([])
	for i in range(1,len(varsL)):
		print varsL[i]
		#print "i:"+str(i)+" "+str(datValsExtremaL[i])
		interval = float(abs(datValsExtremaL[i][1]-datValsExtremaL[i][0]) / (numThresholds+1))
		#print "interval:"+str(interval)
		for j in range(0,len(divisionsL[i])):
			if (divisionsL[i][j] == "?"):
				divisionsL[i][j] = datValsExtremaL[i][0]+(interval*j)
	return divisionsL

##############################################################################
# Generate the bin encoding for each data point given the divisions.
##############################################################################
def genBins(datL,divisionsL):
	#print "datL:"+str(datL)
	#print "divisionsL:"+str(divisionsL)
	binsL = create2Darray(len(divisionsL),len(datL[0]),-1)
	for i in range(1,len(divisionsL)):
		for j in range(len(datL[0])):
			for k in range(len(divisionsL[i])):
				if (datL[i][j] <= divisionsL[i][k]):
					binsL[i][j] = k
					break
				else:
					#handles the case when the datum is in the highest bin
					#i.e. for 2 boundary numbers 3 bins are required
					#print "binsL["+str(i)+"]["+str(j)+"] = "+str(k+1)
					binsL[i][j] = k+1
	#print "binsL:"+str(binsL)
	return binsL

##############################################################################
# Determine if two bins are equivalent and return a Boolean.
##############################################################################
def equalBins(a,b,binsL,divisionsL):
	for i in range(1,len(divisionsL)):
		if binsL[i][a] != binsL[i][b]:
			return False
	return True

##############################################################################
# Generate the rates for each data point given the bin encodings.
##############################################################################
def genRates(divisionsL,datL,binsL):
	#Function notes: Rates can be calculated based on transitions or places.  If rates are calculated based on places they are calculated based on the change in the bin for the entire line.  If rates are calculated based on transitions they are calculated based on the change in the bin for each variable.  These two methods have different results and it appears that place based rates are more stable.  To help "smooth" out the rates there are several ways to modify the rate calcualation.  One way is to change the rateSampling variable.  This variable determines how long the bin must remain constant before a rate is calculated for that bin.  It can be a numerical value or "inf."  The "inf" setting only calculates the rate once per bin change.  You can invalidate bin changes of short length using the pathLength variable.  Any run of consecutive bins shorter than pathLength will not have its rate calculated.  The rate is also only calculated if the time values differ for the two points as I have seen examples where this is a problem.
	ratesL = create2Darray(len(divisionsL),len(datL[0]),'-')
	if placeRates:
		#Place based rate calculation
		if rateSampling == "inf":
			mark = 0
			for i in range(len(datL[0])):
				if i < mark:
					continue
				while mark < len(datL[0]) and equalBins(i,mark,binsL,divisionsL):
					mark += 1
				if datL[0][mark-1] != datL[0][i] and (mark-i) >= pathLength:
					for j in range(1,len(divisionsL)):
						ratesL[j][i] = (datL[j][mark-1]-datL[j][i])/(datL[0][mark-1]-datL[0][i])
		else:
			for i in range(len(datL[0])-rateSampling):
				calcRate = True
				for k in range(rateSampling):
					if not equalBins(i,i+k,binsL,divisionsL):
						calcRate = False
						break
				if calcRate and datL[0][i+rateSampling] != datL[0][i]:
					for j in range(1,len(divisionsL)):
						ratesL[j][i] = (datL[j][i+rateSampling]-datL[j][i])/(datL[0][i+rateSampling]-datL[0][i])
	else:
		cStr = cText.cSetFg(cText.YELLOW)
		cStr += "WARNING:"
		cStr += cText.cSetAttr(cText.NONE)
		print cStr+"this feature has not been tested."
		#Transition based rate calculation
		if rateSampling == "inf":
			for j in range(1,len(divisionsL)):
				mark = 0
				for i in range(len(datL[0])):
					if i < mark:
						continue
					while mark < len(datL[0]) and equalBins(i,mark,binsL,divisionsL):
						mark = mark + 1
					if datL[0][mark-1] != datL[0][i]:
						ratesL[j][i] = (datL[j][mark-1]-datL[j][i])/(datL[0][mark-1]-datL[0][i])
		else:
			for i in range(len(datL[0])-rateSampling):
				for j in range(1,len(divisionsL)):
					calcRate = True
					for k in range(rateSampling):
						if not equalBins(i,i+k,binsL,divisionsL):
							calcRate = False
							break
					if calcRate and datL[0][i+rateSampling] != datL[0][i]:
						ratesL[j][i] = (datL[j][i+rateSampling]-datL[j][i])/(datL[0][i+rateSampling]-datL[0][i])
	return ratesL

##############################################################################
# Return the minimum rate for a given rate list.
##############################################################################
def minRate(ratesL):
 	#Remove the characters from the list before doing the comparison
	cmpL = []
	for i in range(len(ratesL)):
		if ratesL[i] != '-':
			cmpL.append(ratesL[i])
	if len(cmpL) > 0:
		return min(cmpL)
	else:
		return "-"

##############################################################################
# Return the maximum rate for a given rate list.
##############################################################################
def maxRate(ratesL):
  #Remove the characters from the list before doing the comparison
	cmpL = []
	for i in range(len(ratesL)):
		if ratesL[i] != '-':
			cmpL.append(ratesL[i])
	if len(cmpL) > 0:
		return max(cmpL)
	else:
		return "-"

##############################################################################
# Give a score for the even distribution of points for all
# variables. 0 is the optimal score.
##############################################################################
def pointDistCost(datValsL,divisionsL,resL=[],updateVar=-1):
	total = 0
	if updateVar == 0:
		for i in range(len(divisionsL)):
			resL.append(0)
		#Fill up resL
		for i in range(1,len(divisionsL)):
			points = pointDistCostVar(datValsL[i],divisionsL[i])
			total += points
			resL[i] = points
	elif updateVar > 0:
		#Incrementally calculate a total change
		resL[updateVar] = pointDistCostVar(datValsL[updateVar],
																			 divisionsL[updateVar])
		for i in resL:
			total += i
	else:
		#Do a full calculation from scratch
		for i in range(1,len(divisionsL)):
			total += pointDistCostVar(datValsL[i],divisionsL[i])
	return total

##############################################################################
# Give a score for the even distribution of points for an individual
# variable. 0 is the optimal score.
##############################################################################
def pointDistCostVar(datValsL,divisionsL):
	optPointsPerBin = len(datValsL)/(len(divisionsL)+1)
	#print "optPointsPerBin:"+str(optPointsPerBin)
	pointsPerBinL = []
	for i in range(len(divisionsL)+1):
		pointsPerBinL.append(0)
	for i in range(len(datValsL)):
		top = True
		for j in range(len(divisionsL)):
			if datValsL[i] <= divisionsL[j]:
				pointsPerBinL[j] += 1
				top = False
				break
		if top:
			pointsPerBinL[len(divisionsL)] += 1

	#print "pointsPerBinL:"+str(pointsPerBinL)
	score = 0
	for points in pointsPerBinL:
		score += abs(points - optPointsPerBin)
	return score

##############################################################################
# Give a score for the range of rates for all variables.  0 is the
# optimal score.
##############################################################################
def rateRangeCost(datValsL,divisionsL,resL=[],updateVar=-1):
	total = 0
	binsL = genBins(datValsL,divisionsL)
	ratesL = genRates(divisionsL,datValsL,binsL)
	#print "ratesL:"+str(ratesL)
	for i in range(1,len(divisionsL)):
		maxR = maxRate(ratesL[i])
		minR = minRate(ratesL[i])
		total += abs(maxR-minR)
	return total

##############################################################################
# Look for the optimal thresholds using a greedy algorithm.
##############################################################################
def greedyOpt(divisionsL,datValsL,datValsExtremaL,initDivL):
	resL = [] #Used to keep partial results for cost functions
	updateVar = 0 #The variable that was updated to help optimize cost function recalculation
	bestDivisionsL = copy.deepcopy(divisionsL)
	bestCost = costFunc(datValsL,divisionsL,resL,updateVar)
	numMoves = 0
	print "Starting optimization..."
	while numMoves < iterations:
		for i in range(1,len(divisionsL)):
			for j in range(len(divisionsL[i])):
				if (initDivL[i][j] != "?"):
					#move left
					if (j == 0):
						if divisionsL[i][j] != "?":
							distance = abs(divisionsL[i][j] - datValsExtremaL[i][0])/2
						else:
							distance = abs(divisionsL[i][j] - divisionsL[i][j-1])/2
					else:
						distance = abs(divisionsL[i][j] - divisionsL[i][j-1])/2
					newDivisionsL = copy.deepcopy(divisionsL)
					newDivisionsL[i][j] -= distance
					newCost = costFunc(datValsL,newDivisionsL,resL,i)
					numMoves += 1
					if numMoves % 500 == 0:
					    	print str(numMoves)+"/"+str(iterations)
					if newCost < bestCost:
						bestCost = newCost
						divisionsL = newDivisionsL
					else:
						#move right
						if j == len(divisionsL[i])-1:
							distance = abs(datValsExtremaL[i][1] - divisionsL[i][j])/2
						else:
							distance = abs(divisionsL[i][j+1] - divisionsL[i][j])/2
						newDivisionsL = copy.deepcopy(divisionsL)
						newDivisionsL[i][j] += distance
						newCost = costFunc(datValsL,newDivisionsL,resL,i)
						numMoves += 1
						if numMoves % 500 == 0:
							print str(numMoves)+"/"+str(iterations)
						if newCost < bestCost:
							bestCost = newCost
							divisionsL = newDivisionsL
						if numMoves > iterations:
							return divisionsL
	return divisionsL

##############################################################################
# Look for the optimal thresholds using a greedy algorithm for the
##############################################################################
def writeBinsFile(varsL,divisionsL,binsFile):
	outputF = open(binsFile, 'w')
	flag = False
	for i in range(len(varsL)):
		if (varsL[i].dmvc == True):
			if (flag == False):
				outputF.write(".dmvc ")
				flag = True
			outputF.write(varsL[i].name + " ")
	if (flag == True):
		outputF.write("\n")
	for i in range(1,len(varsL)):
			if len(divisionsL[i]) > 0:
				outputF.write(varsL[i].name)
				for div in divisionsL[i]:
					outputF.write(" "+str(div))
				outputF.write("\n")
	outputF.close()

##############################################################################
##############################################################################
def main():
	global numThresholds
	global iterations
	global optFunc
	global costFunc

	usage = "usage: %prog [options] datFile1 ... datFileN"
	parser = OptionParser(usage=usage)
	parser.set_defaults(binsFile=None,numThresholds=None,costF="p",optF="g")
	parser.add_option("-b", "--bins", action="store", dest="binsFile", help="The name of the .bins file to be created.  If this is not provided the basename of the first input data file is used.")
	parser.add_option("-t", "--thresholds", action="store", dest="numThresholds", help="The number of thresholds to create during autogeneration.")
	parser.add_option("-i", "--iterations", action="store", dest="iterations", help="The number of iterations of the optimization algorithm to run.")
	parser.add_option("-c", "--cost", action="store", dest="costF", help="The cost function to use: r - Minimize the distance between rates; p - Average the number of points in each bin.")
	parser.add_option("-o", "--optimization", action="store", dest="optF", help="The optimization function to use: g - Greedy algorithm.")

	(options, args) = parser.parse_args()

	#if len(args) > 0:
	#	datFileL = args
	#else:
	#	print "At least one data file is required."
	#	parser.print_help()
	#	sys.exit()

	if not options.binsFile:
		baseFileL = os.path.splitext(datFileL[0])
		binsFile = baseFileL[0]+".bins"
	else:
		binsFile = options.binsFile

	if options.numThresholds:
		numThresholds = int(options.numThresholds)

	if options.iterations:
		iterations = int(options.iterations)

	if options.optF == "g":
		optFunc = greedyOpt
	else:
		cStr = cText.cSetFg(cText.RED)
		cStr += "ERROR:"
		cStr += cText.cSetAttr(cText.NONE)
		print cStr + options.optFunc + " is not a valid option for the optimization function."
		parser.print_help()
		sys.exit()

	if options.costF == "r":
		costFunc = rateRangeCost
	elif options.costF == "p":
		costFunc = pointDistCost
	else:
		cStr = cText.cSetFg(cText.RED)
		cStr += "ERROR:"
		cStr += cText.cSetAttr(cText.NONE)
		print cStr + options.costFunc + " is not a valid option for the cost function."
		parser.print_help()
		sys.exit()

	varsL = extractVars("run-1.tsd")
	datValsL, datValsExtremaL = reorderDatL(varsL)
	if os.path.isfile(options.binsFile):
		initDivL, tParam = parseBinsFile(options.binsFile,varsL)
	dmvcRunL = findDMVC(datValsL,varsL,tParam)
	divisionsL = initDivisionsL(datValsExtremaL,varsL,initDivL)
	print "Iterations: "+str(iterations)
	print "Optimization function: "+optFunc.func_name
	print "Cost function: "+costFunc.func_name
	print "Initial divisionsL:"+str(divisionsL)
	print "Initial score:"+str(costFunc(datValsL,divisionsL))
	divisionsL = optFunc(divisionsL,datValsL,datValsExtremaL,initDivL)
	print "Final divisionsL:"+str(divisionsL)
	print "Final score:"+str(costFunc(datValsL,divisionsL))
	writeBinsFile(varsL,divisionsL,binsFile)

##############################################################################
##############################################################################

###########
# Globals #
###########
numThresholds = 2 #the default number of thresholds to create...it can be overridden from the command line
iterations = 10000 #the default number of iterations
rateSampling = "inf" #How many points should exist between the sampling of different rates..."inf" samples once/threshold
pathLength = 10 #For "inf" rate sampling the number of time points that a "run" must persist for the rate to be calculated.  This is just another parameter to help with the data smoothing.
placeRates = True #When true the script calculates rates based on places.  When false it calculates rates based on transitions although there is very little infrastructure for transition based rates and it isn't well tested.
optFunc = None #The name of the optimization function that will be used.
costFunc = None #The name of the cost function that will be used in the optimization function.


if __name__ == "__main__":
	main()