260be3a6cb98c5f933001b83f21674a8fbb97748 gperez2 Mon May 8 10:38:47 2023 -0700 Added indexing by position then concatenating terms after a diff --git src/utils/qa/searchedTermsCron.py src/utils/qa/searchedTermsCron.py index d836ff4..0366687 100755 --- src/utils/qa/searchedTermsCron.py +++ src/utils/qa/searchedTermsCron.py @@ -62,44 +62,49 @@ hgSearch=bash(" zcat /hive/data/inside/wwwstats/"+node+"/"+year+"/"+log+" | grep 'hgSearch' | tr '?' '\t' | cut -f 2 | grep 'search' | uniq") for i in hgSearch: f.write(i+'\n') else: for log in latestLogs: hgSearch=bash(" zcat /hive/data/inside/wwwstats/"+node+"/"+year+"/"+log+" | grep 'hgSearch' | tr '?' '\t' | cut -f 2 | grep 'search' | uniq") for i in hgSearch: f.write(i+'\n') f.close() #Remove duplicates with the same hgsid and save the list to a variable search_lines= bash("cat /hive/users/qateam/searchedTermsCronArchive/"+datetime.now().strftime("%Y-%m")+"/hgSearchTrimLogs | sort | uniq ") #For loop that removes hgsid and counts the search term searches_count = {} -for term in search_lines: - if len(term.split('&'))>2: - term=term.split('&') - term.pop(1) - term.reverse() - term=str(term)[1:-1] + +#Added indexing by position then concatenating terms after a '&' line split +for line in search_lines: + if len(line.split('&'))>2: + terms=line.split('&') + term=terms[0] + db=terms[2] term=term.split('=') term=term[1:3] term = str(term)[1:-1] - term = term.replace("\"", "").replace(", 'search", "").replace("\'", "").replace(",", "") - if term.lower() in searches_count: - searches_count[term.lower()] += 1 + db=db.split('=') + db=db[1:3] + db=str(db)[1:-1] + searchTerm=db+" "+term + searchTerm=searchTerm.replace("\'", "") + if searchTerm.lower() in searches_count: + searches_count[searchTerm.lower()] += 1 else: - searches_count[term.lower()] = 1 + searches_count[searchTerm.lower()] = 1 #Sort the count values from largest to smallest and stores to a list sorted_searches_counts= sorted(searches_count.values(), reverse=True) #Make a dictionary with the count values from largest to smallest sorted_searches_dict = {} for i in sorted_searches_counts: for k in searches_count.keys(): if searches_count[k] == i: sorted_searches_dict[k] = searches_count[k] #Write the sorted count values and search terms to a file file_searches = open('/hive/users/qateam/searchedTermsCronArchive/'+datetime.now().strftime("%Y-%m")+'/searchCount.txt', 'w') file_searches.write("count"+'\t'+"db term"+'\n') file_searches.write("--------------------"'\n')