Introduced a num_keywords argument here to adjust the number of top k…

…eywords to be presented by the Visualizer.
SarthakJShetty · Nov 2, 2020 · cce974d · cce974d
1 parent 7189d7f
commit cce974d
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 8 deletions.
diff --git a/pyResearchInsights/NLP_Engine.py b/pyResearchInsights/NLP_Engine.py
@@ -141,7 +141,7 @@ def lemmatization(status_logger_name, textual_data, allowed_postags=['NOUN', 'AD
 
 	return texts_out
 
-def nlp_engine_main(abstracts_log_name, status_logger_name, num_topics = None):
+def nlp_engine_main(abstracts_log_name, status_logger_name, num_topics = None, num_keywords = None):
 	nlp_engine_main_start_status_key = "Initiating the NLP Engine"
 	status_logger(status_logger_name, nlp_engine_main_start_status_key)
 
@@ -150,6 +150,11 @@ def nlp_engine_main(abstracts_log_name, status_logger_name, num_topics = None):
 		'''If the user has not provided this argument then set to 10'''
 		num_topics = 10
 
+	'''Declaring the number of keywords to be presented by the Visualizer'''
+	if num_keywords == None:
+		'''If the user has not provided this argument then set to 20'''
+		num_keywords = 20
+
 	'''Extracts the data from the .txt file and puts them into a Pandas dataframe buckets'''
 	textual_dataframe = data_reader(abstracts_log_name, status_logger_name)
 	'''Rids the symbols and special characters from the textual_data'''
@@ -185,6 +190,6 @@ def nlp_engine_main(abstracts_log_name, status_logger_name, num_topics = None):
 	status_logger(status_logger_name, nlp_engine_main_end_status_key)
 
 	'''Importing the visualizer_main function to view the LDA Model built by the NLP_engine_main() function'''
-	visualizer_main(lda_model, corpus, id2word, textual_data_lemmatized, num_topics, abstracts_log_name, status_logger_name)
+	visualizer_main(lda_model, corpus, id2word, textual_data_lemmatized, num_topics, num_keywords, abstracts_log_name, status_logger_name)
 
 	return 0
diff --git a/pyResearchInsights/Visualizer.py b/pyResearchInsights/Visualizer.py
@@ -39,15 +39,15 @@ def visualizer_generator(lda_model, corpus, id2word, logs_folder_name, status_lo
 	visualizer_generator_end_status_key = "Prepared the topic modeling visualization"+" "+logs_folder_name+"/"+"Data_Visualization_Topic_Modelling.html"
 	status_logger(status_logger_name, visualizer_generator_end_status_key)		
 
-def topic_builder(lda_model, topic_order, num_topics, textual_data_lemmatized, logs_folder_name, status_logger_name):
+def topic_builder(lda_model, topic_order, num_topics, num_keywords, textual_data_lemmatized, logs_folder_name, status_logger_name):
 	'''We generate histograms here to present the frequency and weights of the keywords of each topic and save them to the disc for further analysis'''
 	topic_builder_start_status_key = "Preparing the frequency and weights vs keywords charts"
 	status_logger(status_logger_name, topic_builder_start_status_key)
 
 	'''Setting the colormaps here to generate the num_topics charts that proceed'''
 	colorchart = cm.get_cmap('plasma', num_topics)
 
-	topics = lda_model.show_topics(num_topics = -1, num_words = 20, formatted=False)
+	topics = lda_model.show_topics(num_topics = -1, num_words = num_keywords, formatted=False)
 	data_flat = [w for w_list in textual_data_lemmatized for w in w_list]
 	counter = Counter(data_flat)
 
@@ -62,8 +62,8 @@ def topic_builder(lda_model, topic_order, num_topics, textual_data_lemmatized, l
 
 	for topic in topic_order:
 		'''Progressively generating the figures comprising the weights and frequencies for each keyword in each topic'''
-		fig, ax = plt.subplots(1,1, figsize=[20, 15])
-		x_axis = [x_axis_element for x_axis_element in range(0, 20)]
+		fig, ax = plt.subplots(1, 1, figsize=[20, 15])
+		x_axis = [x_axis_element for x_axis_element in range(0, num_keywords)]
 
 		'''Creating the x_axis labels here, which is the topic keywords'''
 		x_axis_labels = [element for element in df.loc[df.topic_id==topic, 'word']]
@@ -182,7 +182,7 @@ def trends_histogram(abstracts_log_name, logs_folder_name, trend_keywords, statu
 	trends_histogram_end_status_key = "Generated the trends graph"+" "+logs_folder_name+"/"+"Data_Visualization_Trends_Graph"+"_"+trend_keywords[0]+".png"
 	status_logger(status_logger_name, trends_histogram_end_status_key)
 
-def	visualizer_main(lda_model, corpus, id2word, textual_data_lemmatized, num_topics, abstracts_log_name, status_logger_name):
+def	visualizer_main(lda_model, corpus, id2word, textual_data_lemmatized, num_topics, num_keywords, abstracts_log_name, status_logger_name):
 	visualizer_main_start_status_key = "Entering the visualizer_main() code"
 	status_logger(status_logger_name, visualizer_main_start_status_key)
 
@@ -201,7 +201,7 @@ def	visualizer_main(lda_model, corpus, id2word, textual_data_lemmatized, num_top
 	topic_order = visualizer_generator(lda_model, corpus, id2word, logs_folder_name, status_logger_name)
 
 	'''We generate histograms here to present the frequency and weights of the keywords of each topic'''
-	topic_builder(lda_model, topic_order, num_topics, textual_data_lemmatized, logs_folder_name, status_logger_name)
+	topic_builder(lda_model, topic_order, num_topics, num_keywords, textual_data_lemmatized, logs_folder_name, status_logger_name)
 
 	visualizer_main_end_status_key = "Exiting the visualizer_main() code"
 	status_logger(status_logger_name, visualizer_main_end_status_key)