tools/refactoring/stringmanipulation.py - src - Git at Google

 import string

 # returns tuple, [success,updated_string] where the updated string has
 # has one less (the first) occurance of match string
 def removefirstoccurance( remove_string, match_string ):
     lowercase_string = remove_string.lower()
     lowercase_match_string = match_string.lower()
     lowest_index = lowercase_string.find(lowercase_match_string)
     if(lowest_index == -1):
         return [False,remove_string]
     past_match_index = lowest_index + len(lowercase_match_string)
     highest_index = len(remove_string)
     remove_string = remove_string[0:lowest_index] + remove_string[past_match_index: highest_index]
     return [True,remove_string]

 # returns a string with all occurances of match_string removed
 def removealloccurances( remove_string, match_string ):
     return_value = [True, remove_string]
     while(return_value[0]):
         return_value = removefirstoccurance(return_value[1],match_string)
     return return_value[1]

 # removes an occurance of match_string only if it's first in the string
 # returns tuple [succes, new_string]
 def removeprefix( remove_string, match_string ):
     lowercase_string = remove_string.lower()
     lowercase_match_string = match_string.lower()
     lowest_index = lowercase_string.find(lowercase_match_string)
     if(lowest_index == -1):
         return [False,remove_string]
     if(lowest_index != 0):
         return [False,remove_string]
     past_match_index = lowest_index + len(lowercase_match_string)
     highest_index = len(remove_string)
     remove_string = remove_string[0:lowest_index] + remove_string[past_match_index: highest_index]
 #    print lowest_index
 #    print past_match_index
     return [True,remove_string]

 # removes multiple occurances of match string as long as they are first in
 # the string
 def removeallprefix( remove_string, match_string ):
     return_value = [True, remove_string]
     while(return_value[0]):
         return_value = removeprefix(return_value[1],match_string)
     return return_value[1]

 # returns true if extensionstring is a correct extension
 def isextension( extensionstring ):
     if(len(extensionstring) < 2):
         return False
     if(extensionstring[0] != '.'):
         return False
     if(extensionstring[1:len(extensionstring)-1].find('.') != -1):
         return False
     return True

 # returns the index of start of the last occurance of match_string
 def findlastoccurance( original_string, match_string ):
     search_index = original_string.find(match_string)
     found_index = search_index
     last_index = len(original_string) - 1
     while((search_index != -1) and (search_index < last_index)):
         search_index = original_string[search_index+1:last_index].find(match_string)
         if(search_index != -1):
             found_index = search_index
     return found_index

 # changes extension from original_extension to new_extension
 def changeextension( original_string, original_extension, new_extension):
     if(not isextension(original_extension)):
         return original_string
     if(not isextension(new_extension)):
         return original_string
     index = findlastoccurance(original_string, original_extension)
     if(index == -1):
         return original_string
     return_value = original_string[0:index] + new_extension
     return return_value

 # wanted to do this with str.find however didnt seem to work so do it manually
 # returns the index of the first capital letter
 def findfirstcapitalletter( original_string ):
     for index in range(len(original_string)):
         if(original_string[index].lower() != original_string[index]):
             return index
     return -1


 # replaces capital letters with underscore and lower case letter (except very
 # first
 def lowercasewithunderscore( original_string ):
 # ignore the first letter since there should be no underscore in front of it
     if(len(original_string) < 2):
         return original_string
     return_value = original_string[1:len(original_string)]
     index = findfirstcapitalletter(return_value)
     while(index != -1):
         return_value = return_value[0:index] + \
                        '_' + \
                        return_value[index].lower() + \
                        return_value[index+1:len(return_value)]
         index = findfirstcapitalletter(return_value)
     return_value = original_string[0].lower() + return_value
     return return_value

 # my table is a duplicate of strings
 def removeduplicates( my_table ):
     new_table = []
     for old_string1, new_string1 in my_table:
         found = 0
         for old_string2, new_string2 in new_table:
             if(old_string1 == old_string2):
                 found += 1
             if(new_string1 == new_string2):
                 if(new_string1 == ''):
                     found += found
                 else:
                     found += 1
             if(found == 1):
                 print 'missmatching set, terminating program'
                 print old_string1
                 print new_string1
                 print old_string2
                 print new_string2
                 quit()
             if(found == 2):
                 break
         if(found == 0):
             new_table.append([old_string1,new_string1])
     return new_table

 def removenochange( my_table ):
     new_table = []
     for old_string, new_string in my_table:
         if(old_string != new_string):
             new_table.append([old_string,new_string])
     return new_table

 # order table after size of the string (can be used to replace bigger strings
 # first which is useful since smaller strings can be inside the bigger string)
 # E.g. GIPS is a sub string of GIPSVE if we remove GIPS first GIPSVE will never
 # be removed. N is small so no need for fancy sort algorithm. Use selection sort
 def ordertablesizefirst( my_table ):
     for current_index in range(len(my_table)):
         biggest_string = 0
         biggest_string_index = -1
         for search_index in range(len(my_table)):
             if(search_index < current_index):
                 continue
             length_of_string = len(my_table[search_index][0])
             if(length_of_string > biggest_string):
                 biggest_string = length_of_string
                 biggest_string_index = search_index
         if(biggest_string_index == -1):
             print 'sorting algorithm failed, program exit'
             quit()
         old_value = my_table[current_index]
         my_table[current_index] = my_table[biggest_string_index]
         my_table[biggest_string_index] = old_value
     return my_table

 # returns true if string 1 or 2 is a substring of the other, assuming neither
 # has whitespaces
 def issubstring( string1, string2 ):
     if(len(string1) == 0):
         return -1
     if(len(string2) == 0):
         return -1
     large_string = string1
     small_string = string2
     if(len(string1) < len(string2)):
         large_string = string2
         small_string = string1

     for index in range(len(large_string)):
         large_sub_string = large_string[index:index+len(small_string)].lower()
         if(large_sub_string ==\
            small_string.lower()):
               return index
     return -1

 #not_part_of_word_table = [' ','(',')','{','}',':','\t','*','&','/','[',']','.',',','\n']
 #def ispartofword( char ):
 #    for item in not_part_of_word_table:
 #        if(char == item):
 #            return False
 #    return True

 # must be numerical,_ or charachter
 def ispartofword( char ):
     if(char.isalpha()):
         return True
     if(char.isalnum()):
         return True
     if(char == '_'):
         return True
     return False

 # returns the index of the first letter in the word that the current_index
 # is pointing to and the size of the word
 def getword( line, current_index):
     if(current_index < 0):
         return []
     line = line.rstrip()
     if(len(line) <= current_index):
         return []
     if(line[current_index] == ' '):
         return []
     start_pos = current_index
     while start_pos >= 0:
         if(not ispartofword(line[start_pos])):
             start_pos += 1
             break
         start_pos -= 1
     if(start_pos == -1):
         start_pos = 0
     end_pos = current_index
     while end_pos < len(line):
         if(not ispartofword(line[end_pos])):
             break
         end_pos += 1
     return [start_pos,end_pos - start_pos]

 # my table is a tuple [string1,string2] complement_to_table is just a list
 # of strings to compare to string1
 def complement( my_table, complement_to_table ):
     new_table = []
     for index in range(len(my_table)):
         found = False;
         for compare_string in complement_to_table:
             if(my_table[index][0].lower() == compare_string.lower()):
                 found = True
         if(not found):
             new_table.append(my_table[index])
     return new_table

 def removestringfromhead( line, remove_string):
     for index in range(len(line)):
         if(line[index:index+len(remove_string)] != remove_string):
             return line[index:index+len(line)]
     return ''

 def removeccomment( line ):
     comment_string = '//'
     for index in range(len(line)):
         if(line[index:index+len(comment_string)] == comment_string):
             return line[0:index]
     return line

 def whitespacestoonespace( line ):
     return ' '.join(line.split())

 def fixabbreviations( original_string ):
     previouswascapital = (original_string[0].upper() == original_string[0])
     new_string = ''
     for index in range(len(original_string)):
         if(index == 0):
             new_string += original_string[index]
             continue
         if(original_string[index] == '_'):
             new_string += original_string[index]
             previouswascapital = False
             continue
         if(original_string[index].isdigit()):
             new_string += original_string[index]
             previouswascapital = False
             continue
         currentiscapital = (original_string[index].upper() == original_string[index])
         letter_to_add = original_string[index]
         if(previouswascapital and currentiscapital):
             letter_to_add = letter_to_add.lower()
         if(previouswascapital and (not currentiscapital)):
             old_letter = new_string[len(new_string)-1]
             new_string = new_string[0:len(new_string)-1]
             new_string += old_letter.upper()
         previouswascapital = currentiscapital
         new_string += letter_to_add
     return new_string

 def replaceoccurances(old_string, replace_string, replace_with_string):
     if (len(replace_string) == 0):
         return old_string
     if (len(old_string) < len(replace_string)):
         return old_string
     # Simple implementation, could proably be done smarter
     new_string = ''
     for index in range(len(old_string)):
         #print new_string
         if(len(replace_string) > (len(old_string) - index)):
             new_string += old_string[index:index + len(old_string)]
             break
         match = (len(replace_string) > 0)
         for replace_index in range(len(replace_string)):
             if (replace_string[replace_index] != old_string[index + replace_index]):
                 match = False
                 break
         if (match):
             new_string += replace_with_string
             index =+ len(replace_string)
         else:
             new_string += old_string[index]
     return new_string
	import string

	# returns tuple, [success,updated_string] where the updated string has
	# has one less (the first) occurance of match string
	def removefirstoccurance( remove_string, match_string ):
	lowercase_string = remove_string.lower()
	lowercase_match_string = match_string.lower()
	lowest_index = lowercase_string.find(lowercase_match_string)
	if(lowest_index == -1):
	return [False,remove_string]
	past_match_index = lowest_index + len(lowercase_match_string)
	highest_index = len(remove_string)
	remove_string = remove_string[0:lowest_index] + remove_string[past_match_index: highest_index]
	return [True,remove_string]

	# returns a string with all occurances of match_string removed
	def removealloccurances( remove_string, match_string ):
	return_value = [True, remove_string]
	while(return_value[0]):
	return_value = removefirstoccurance(return_value[1],match_string)
	return return_value[1]

	# removes an occurance of match_string only if it's first in the string
	# returns tuple [succes, new_string]
	def removeprefix( remove_string, match_string ):
	lowercase_string = remove_string.lower()
	lowercase_match_string = match_string.lower()
	lowest_index = lowercase_string.find(lowercase_match_string)
	if(lowest_index == -1):
	return [False,remove_string]
	if(lowest_index != 0):
	return [False,remove_string]
	past_match_index = lowest_index + len(lowercase_match_string)
	highest_index = len(remove_string)
	remove_string = remove_string[0:lowest_index] + remove_string[past_match_index: highest_index]
	# print lowest_index
	# print past_match_index
	return [True,remove_string]

	# removes multiple occurances of match string as long as they are first in
	# the string
	def removeallprefix( remove_string, match_string ):
	return_value = [True, remove_string]
	while(return_value[0]):
	return_value = removeprefix(return_value[1],match_string)
	return return_value[1]

	# returns true if extensionstring is a correct extension
	def isextension( extensionstring ):
	if(len(extensionstring) < 2):
	return False
	if(extensionstring[0] != '.'):
	return False
	if(extensionstring[1:len(extensionstring)-1].find('.') != -1):
	return False
	return True

	# returns the index of start of the last occurance of match_string
	def findlastoccurance( original_string, match_string ):
	search_index = original_string.find(match_string)
	found_index = search_index
	last_index = len(original_string) - 1
	while((search_index != -1) and (search_index < last_index)):
	search_index = original_string[search_index+1:last_index].find(match_string)
	if(search_index != -1):
	found_index = search_index
	return found_index

	# changes extension from original_extension to new_extension
	def changeextension( original_string, original_extension, new_extension):
	if(not isextension(original_extension)):
	return original_string
	if(not isextension(new_extension)):
	return original_string
	index = findlastoccurance(original_string, original_extension)
	if(index == -1):
	return original_string
	return_value = original_string[0:index] + new_extension
	return return_value

	# wanted to do this with str.find however didnt seem to work so do it manually
	# returns the index of the first capital letter
	def findfirstcapitalletter( original_string ):
	for index in range(len(original_string)):
	if(original_string[index].lower() != original_string[index]):
	return index
	return -1


	# replaces capital letters with underscore and lower case letter (except very
	# first
	def lowercasewithunderscore( original_string ):
	# ignore the first letter since there should be no underscore in front of it
	if(len(original_string) < 2):
	return original_string
	return_value = original_string[1:len(original_string)]
	index = findfirstcapitalletter(return_value)
	while(index != -1):
	return_value = return_value[0:index] + \
	'_' + \
	return_value[index].lower() + \
	return_value[index+1:len(return_value)]
	index = findfirstcapitalletter(return_value)
	return_value = original_string[0].lower() + return_value
	return return_value

	# my table is a duplicate of strings
	def removeduplicates( my_table ):
	new_table = []
	for old_string1, new_string1 in my_table:
	found = 0
	for old_string2, new_string2 in new_table:
	if(old_string1 == old_string2):
	found += 1
	if(new_string1 == new_string2):
	if(new_string1 == ''):
	found += found
	else:
	found += 1
	if(found == 1):
	print 'missmatching set, terminating program'
	print old_string1
	print new_string1
	print old_string2
	print new_string2
	quit()
	if(found == 2):
	break
	if(found == 0):
	new_table.append([old_string1,new_string1])
	return new_table

	def removenochange( my_table ):
	new_table = []
	for old_string, new_string in my_table:
	if(old_string != new_string):
	new_table.append([old_string,new_string])
	return new_table

	# order table after size of the string (can be used to replace bigger strings
	# first which is useful since smaller strings can be inside the bigger string)
	# E.g. GIPS is a sub string of GIPSVE if we remove GIPS first GIPSVE will never
	# be removed. N is small so no need for fancy sort algorithm. Use selection sort
	def ordertablesizefirst( my_table ):
	for current_index in range(len(my_table)):
	biggest_string = 0
	biggest_string_index = -1
	for search_index in range(len(my_table)):
	if(search_index < current_index):
	continue
	length_of_string = len(my_table[search_index][0])
	if(length_of_string > biggest_string):
	biggest_string = length_of_string
	biggest_string_index = search_index
	if(biggest_string_index == -1):
	print 'sorting algorithm failed, program exit'
	quit()
	old_value = my_table[current_index]
	my_table[current_index] = my_table[biggest_string_index]
	my_table[biggest_string_index] = old_value
	return my_table

	# returns true if string 1 or 2 is a substring of the other, assuming neither
	# has whitespaces
	def issubstring( string1, string2 ):
	if(len(string1) == 0):
	return -1
	if(len(string2) == 0):
	return -1
	large_string = string1
	small_string = string2
	if(len(string1) < len(string2)):
	large_string = string2
	small_string = string1

	for index in range(len(large_string)):
	large_sub_string = large_string[index:index+len(small_string)].lower()
	if(large_sub_string ==\
	small_string.lower()):
	return index
	return -1

	#not_part_of_word_table = [' ','(',')','{','}',':','\t','*','&','/','[',']','.',',','\n']
	#def ispartofword( char ):
	# for item in not_part_of_word_table:
	# if(char == item):
	# return False
	# return True

	# must be numerical,_ or charachter
	def ispartofword( char ):
	if(char.isalpha()):
	return True
	if(char.isalnum()):
	return True
	if(char == '_'):
	return True
	return False

	# returns the index of the first letter in the word that the current_index
	# is pointing to and the size of the word
	def getword( line, current_index):
	if(current_index < 0):
	return []
	line = line.rstrip()
	if(len(line) <= current_index):
	return []
	if(line[current_index] == ' '):
	return []
	start_pos = current_index
	while start_pos >= 0:
	if(not ispartofword(line[start_pos])):
	start_pos += 1
	break
	start_pos -= 1
	if(start_pos == -1):
	start_pos = 0
	end_pos = current_index
	while end_pos < len(line):
	if(not ispartofword(line[end_pos])):
	break
	end_pos += 1
	return [start_pos,end_pos - start_pos]

	# my table is a tuple [string1,string2] complement_to_table is just a list
	# of strings to compare to string1
	def complement( my_table, complement_to_table ):
	new_table = []
	for index in range(len(my_table)):
	found = False;
	for compare_string in complement_to_table:
	if(my_table[index][0].lower() == compare_string.lower()):
	found = True
	if(not found):
	new_table.append(my_table[index])
	return new_table

	def removestringfromhead( line, remove_string):
	for index in range(len(line)):
	if(line[index:index+len(remove_string)] != remove_string):
	return line[index:index+len(line)]
	return ''

	def removeccomment( line ):
	comment_string = '//'
	for index in range(len(line)):
	if(line[index:index+len(comment_string)] == comment_string):
	return line[0:index]
	return line

	def whitespacestoonespace( line ):
	return ' '.join(line.split())

	def fixabbreviations( original_string ):
	previouswascapital = (original_string[0].upper() == original_string[0])
	new_string = ''
	for index in range(len(original_string)):
	if(index == 0):
	new_string += original_string[index]
	continue
	if(original_string[index] == '_'):
	new_string += original_string[index]
	previouswascapital = False
	continue
	if(original_string[index].isdigit()):
	new_string += original_string[index]
	previouswascapital = False
	continue
	currentiscapital = (original_string[index].upper() == original_string[index])
	letter_to_add = original_string[index]
	if(previouswascapital and currentiscapital):
	letter_to_add = letter_to_add.lower()
	if(previouswascapital and (not currentiscapital)):
	old_letter = new_string[len(new_string)-1]
	new_string = new_string[0:len(new_string)-1]
	new_string += old_letter.upper()
	previouswascapital = currentiscapital
	new_string += letter_to_add
	return new_string

	def replaceoccurances(old_string, replace_string, replace_with_string):
	if (len(replace_string) == 0):
	return old_string
	if (len(old_string) < len(replace_string)):
	return old_string
	# Simple implementation, could proably be done smarter
	new_string = ''
	for index in range(len(old_string)):
	#print new_string
	if(len(replace_string) > (len(old_string) - index)):
	new_string += old_string[index:index + len(old_string)]
	break
	match = (len(replace_string) > 0)
	for replace_index in range(len(replace_string)):
	if (replace_string[replace_index] != old_string[index + replace_index]):
	match = False
	break
	if (match):
	new_string += replace_with_string
	index =+ len(replace_string)
	else:
	new_string += old_string[index]
	return new_string