| import string |
| |
| # returns tuple, [success,updated_string] where the updated string has |
| # has one less (the first) occurance of match string |
| def removefirstoccurance( remove_string, match_string ): |
| lowercase_string = remove_string.lower() |
| lowercase_match_string = match_string.lower() |
| lowest_index = lowercase_string.find(lowercase_match_string) |
| if(lowest_index == -1): |
| return [False,remove_string] |
| past_match_index = lowest_index + len(lowercase_match_string) |
| highest_index = len(remove_string) |
| remove_string = remove_string[0:lowest_index] + remove_string[past_match_index: highest_index] |
| return [True,remove_string] |
| |
| # returns a string with all occurances of match_string removed |
| def removealloccurances( remove_string, match_string ): |
| return_value = [True, remove_string] |
| while(return_value[0]): |
| return_value = removefirstoccurance(return_value[1],match_string) |
| return return_value[1] |
| |
| # removes an occurance of match_string only if it's first in the string |
| # returns tuple [succes, new_string] |
| def removeprefix( remove_string, match_string ): |
| lowercase_string = remove_string.lower() |
| lowercase_match_string = match_string.lower() |
| lowest_index = lowercase_string.find(lowercase_match_string) |
| if(lowest_index == -1): |
| return [False,remove_string] |
| if(lowest_index != 0): |
| return [False,remove_string] |
| past_match_index = lowest_index + len(lowercase_match_string) |
| highest_index = len(remove_string) |
| remove_string = remove_string[0:lowest_index] + remove_string[past_match_index: highest_index] |
| # print lowest_index |
| # print past_match_index |
| return [True,remove_string] |
| |
| # removes multiple occurances of match string as long as they are first in |
| # the string |
| def removeallprefix( remove_string, match_string ): |
| return_value = [True, remove_string] |
| while(return_value[0]): |
| return_value = removeprefix(return_value[1],match_string) |
| return return_value[1] |
| |
| # returns true if extensionstring is a correct extension |
| def isextension( extensionstring ): |
| if(len(extensionstring) < 2): |
| return False |
| if(extensionstring[0] != '.'): |
| return False |
| if(extensionstring[1:len(extensionstring)-1].find('.') != -1): |
| return False |
| return True |
| |
| # returns the index of start of the last occurance of match_string |
| def findlastoccurance( original_string, match_string ): |
| search_index = original_string.find(match_string) |
| found_index = search_index |
| last_index = len(original_string) - 1 |
| while((search_index != -1) and (search_index < last_index)): |
| search_index = original_string[search_index+1:last_index].find(match_string) |
| if(search_index != -1): |
| found_index = search_index |
| return found_index |
| |
| # changes extension from original_extension to new_extension |
| def changeextension( original_string, original_extension, new_extension): |
| if(not isextension(original_extension)): |
| return original_string |
| if(not isextension(new_extension)): |
| return original_string |
| index = findlastoccurance(original_string, original_extension) |
| if(index == -1): |
| return original_string |
| return_value = original_string[0:index] + new_extension |
| return return_value |
| |
| # wanted to do this with str.find however didnt seem to work so do it manually |
| # returns the index of the first capital letter |
| def findfirstcapitalletter( original_string ): |
| for index in range(len(original_string)): |
| if(original_string[index].lower() != original_string[index]): |
| return index |
| return -1 |
| |
| |
| # replaces capital letters with underscore and lower case letter (except very |
| # first |
| def lowercasewithunderscore( original_string ): |
| # ignore the first letter since there should be no underscore in front of it |
| if(len(original_string) < 2): |
| return original_string |
| return_value = original_string[1:len(original_string)] |
| index = findfirstcapitalletter(return_value) |
| while(index != -1): |
| return_value = return_value[0:index] + \ |
| '_' + \ |
| return_value[index].lower() + \ |
| return_value[index+1:len(return_value)] |
| index = findfirstcapitalletter(return_value) |
| return_value = original_string[0].lower() + return_value |
| return return_value |
| |
| # my table is a duplicate of strings |
| def removeduplicates( my_table ): |
| new_table = [] |
| for old_string1, new_string1 in my_table: |
| found = 0 |
| for old_string2, new_string2 in new_table: |
| if(old_string1 == old_string2): |
| found += 1 |
| if(new_string1 == new_string2): |
| if(new_string1 == ''): |
| found += found |
| else: |
| found += 1 |
| if(found == 1): |
| print 'missmatching set, terminating program' |
| print old_string1 |
| print new_string1 |
| print old_string2 |
| print new_string2 |
| quit() |
| if(found == 2): |
| break |
| if(found == 0): |
| new_table.append([old_string1,new_string1]) |
| return new_table |
| |
| def removenochange( my_table ): |
| new_table = [] |
| for old_string, new_string in my_table: |
| if(old_string != new_string): |
| new_table.append([old_string,new_string]) |
| return new_table |
| |
| # order table after size of the string (can be used to replace bigger strings |
| # first which is useful since smaller strings can be inside the bigger string) |
| # E.g. GIPS is a sub string of GIPSVE if we remove GIPS first GIPSVE will never |
| # be removed. N is small so no need for fancy sort algorithm. Use selection sort |
| def ordertablesizefirst( my_table ): |
| for current_index in range(len(my_table)): |
| biggest_string = 0 |
| biggest_string_index = -1 |
| for search_index in range(len(my_table)): |
| if(search_index < current_index): |
| continue |
| length_of_string = len(my_table[search_index][0]) |
| if(length_of_string > biggest_string): |
| biggest_string = length_of_string |
| biggest_string_index = search_index |
| if(biggest_string_index == -1): |
| print 'sorting algorithm failed, program exit' |
| quit() |
| old_value = my_table[current_index] |
| my_table[current_index] = my_table[biggest_string_index] |
| my_table[biggest_string_index] = old_value |
| return my_table |
| |
| # returns true if string 1 or 2 is a substring of the other, assuming neither |
| # has whitespaces |
| def issubstring( string1, string2 ): |
| if(len(string1) == 0): |
| return -1 |
| if(len(string2) == 0): |
| return -1 |
| large_string = string1 |
| small_string = string2 |
| if(len(string1) < len(string2)): |
| large_string = string2 |
| small_string = string1 |
| |
| for index in range(len(large_string)): |
| large_sub_string = large_string[index:index+len(small_string)].lower() |
| if(large_sub_string ==\ |
| small_string.lower()): |
| return index |
| return -1 |
| |
| #not_part_of_word_table = [' ','(',')','{','}',':','\t','*','&','/','[',']','.',',','\n'] |
| #def ispartofword( char ): |
| # for item in not_part_of_word_table: |
| # if(char == item): |
| # return False |
| # return True |
| |
| # must be numerical,_ or charachter |
| def ispartofword( char ): |
| if(char.isalpha()): |
| return True |
| if(char.isalnum()): |
| return True |
| if(char == '_'): |
| return True |
| return False |
| |
| # returns the index of the first letter in the word that the current_index |
| # is pointing to and the size of the word |
| def getword( line, current_index): |
| if(current_index < 0): |
| return [] |
| line = line.rstrip() |
| if(len(line) <= current_index): |
| return [] |
| if(line[current_index] == ' '): |
| return [] |
| start_pos = current_index |
| while start_pos >= 0: |
| if(not ispartofword(line[start_pos])): |
| start_pos += 1 |
| break |
| start_pos -= 1 |
| if(start_pos == -1): |
| start_pos = 0 |
| end_pos = current_index |
| while end_pos < len(line): |
| if(not ispartofword(line[end_pos])): |
| break |
| end_pos += 1 |
| return [start_pos,end_pos - start_pos] |
| |
| # my table is a tuple [string1,string2] complement_to_table is just a list |
| # of strings to compare to string1 |
| def complement( my_table, complement_to_table ): |
| new_table = [] |
| for index in range(len(my_table)): |
| found = False; |
| for compare_string in complement_to_table: |
| if(my_table[index][0].lower() == compare_string.lower()): |
| found = True |
| if(not found): |
| new_table.append(my_table[index]) |
| return new_table |
| |
| def removestringfromhead( line, remove_string): |
| for index in range(len(line)): |
| if(line[index:index+len(remove_string)] != remove_string): |
| return line[index:index+len(line)] |
| return '' |
| |
| def removeccomment( line ): |
| comment_string = '//' |
| for index in range(len(line)): |
| if(line[index:index+len(comment_string)] == comment_string): |
| return line[0:index] |
| return line |
| |
| def whitespacestoonespace( line ): |
| return ' '.join(line.split()) |
| |
| def fixabbreviations( original_string ): |
| previouswascapital = (original_string[0].upper() == original_string[0]) |
| new_string = '' |
| for index in range(len(original_string)): |
| if(index == 0): |
| new_string += original_string[index] |
| continue |
| if(original_string[index] == '_'): |
| new_string += original_string[index] |
| previouswascapital = False |
| continue |
| if(original_string[index].isdigit()): |
| new_string += original_string[index] |
| previouswascapital = False |
| continue |
| currentiscapital = (original_string[index].upper() == original_string[index]) |
| letter_to_add = original_string[index] |
| if(previouswascapital and currentiscapital): |
| letter_to_add = letter_to_add.lower() |
| if(previouswascapital and (not currentiscapital)): |
| old_letter = new_string[len(new_string)-1] |
| new_string = new_string[0:len(new_string)-1] |
| new_string += old_letter.upper() |
| previouswascapital = currentiscapital |
| new_string += letter_to_add |
| return new_string |
| |
| def replaceoccurances(old_string, replace_string, replace_with_string): |
| if (len(replace_string) == 0): |
| return old_string |
| if (len(old_string) < len(replace_string)): |
| return old_string |
| # Simple implementation, could proably be done smarter |
| new_string = '' |
| for index in range(len(old_string)): |
| #print new_string |
| if(len(replace_string) > (len(old_string) - index)): |
| new_string += old_string[index:index + len(old_string)] |
| break |
| match = (len(replace_string) > 0) |
| for replace_index in range(len(replace_string)): |
| if (replace_string[replace_index] != old_string[index + replace_index]): |
| match = False |
| break |
| if (match): |
| new_string += replace_with_string |
| index =+ len(replace_string) |
| else: |
| new_string += old_string[index] |
| return new_string |