src/utils/qa/searchedTermsCron.py 260be3a6cb98c5f933001b83f21674a8fbb97748

260be3a6cb98c5f933001b83f21674a8fbb97748
gperez2
  Mon May 8 10:38:47 2023 -0700
Added indexing by position then concatenating terms after a

diff --git src/utils/qa/searchedTermsCron.py src/utils/qa/searchedTermsCron.py
index d836ff4..0366687 100755
--- src/utils/qa/searchedTermsCron.py
+++ src/utils/qa/searchedTermsCron.py
@@ -62,44 +62,49 @@
                 hgSearch=bash(" zcat /hive/data/inside/wwwstats/"+node+"/"+year+"/"+log+" | grep 'hgSearch' | tr '?' '\t' | cut -f 2 | grep 'search' | uniq")
                 for i in hgSearch:
                     f.write(i+'\n')
         else:
             for log in latestLogs:
                 hgSearch=bash(" zcat /hive/data/inside/wwwstats/"+node+"/"+year+"/"+log+" | grep 'hgSearch' | tr '?' '\t' | cut -f 2 | grep 'search' | uniq")
                 for i in hgSearch:
                     f.write(i+'\n')
 f.close()
 
 #Remove duplicates with the same hgsid and save the list to a variable 
 search_lines= bash("cat /hive/users/qateam/searchedTermsCronArchive/"+datetime.now().strftime("%Y-%m")+"/hgSearchTrimLogs | sort | uniq ")
 
 #For loop that removes hgsid and counts the search term 
 searches_count = {}
-for term in search_lines:
-    if len(term.split('&'))>2:
-        term=term.split('&')
-        term.pop(1)
-        term.reverse()
-        term=str(term)[1:-1]
+
+#Added indexing by position then concatenating terms after a '&' line split
+for line in search_lines:
+    if len(line.split('&'))>2:
+        terms=line.split('&')
+        term=terms[0]
+        db=terms[2]
         term=term.split('=')
         term=term[1:3]
         term = str(term)[1:-1]
-        term = term.replace("\"", "").replace(", 'search", "").replace("\'", "").replace(",", "")
-        if term.lower() in searches_count:
-            searches_count[term.lower()] += 1
+        db=db.split('=')
+        db=db[1:3]
+        db=str(db)[1:-1]
+        searchTerm=db+" "+term
+        searchTerm=searchTerm.replace("\'", "")
+        if searchTerm.lower() in searches_count:
+            searches_count[searchTerm.lower()] += 1
         else:
-            searches_count[term.lower()] = 1
+            searches_count[searchTerm.lower()] = 1
 
 #Sort the count values from largest to smallest and stores to a list           
 sorted_searches_counts= sorted(searches_count.values(), reverse=True)
 
 #Make a dictionary with the count values from largest to smallest 
 sorted_searches_dict = {}
 for i in sorted_searches_counts:
     for k in searches_count.keys():
         if searches_count[k] == i:
             sorted_searches_dict[k] = searches_count[k]
 
 #Write the sorted count values and search terms to a file
 file_searches = open('/hive/users/qateam/searchedTermsCronArchive/'+datetime.now().strftime("%Y-%m")+'/searchCount.txt', 'w')
 file_searches.write("count"+'\t'+"db term"+'\n')
 file_searches.write("--------------------"'\n')