# Create a 2x4 subplot grid with a larger figure size to display the best 8 plots
fig, axs = plt.subplots(2, 4, figsize=(24, 12))
plt.subplots_adjust(hspace=0.6, wspace=0.6)
# Plot 1: Histogram of 'score'
data['score'].plot(kind='hist', bins=20, title='Score Histogram', ax=axs[0, 0])
axs[0, 0].spines[['top', 'right']].set_visible(False)
axs[0, 0].set_xlabel('Score')
axs[0, 0].set_ylabel('Frequency')
# Plot 2: Bar plot for Sentiment counts
data.groupby('Sentiment').size().plot(kind='barh', color=sns.color_palette('Dark2'), ax=axs[0, 1])
axs[0, 1].set_title('Sentiment Counts')
axs[0, 1].spines[['top', 'right']].set_visible(False)
# Plot 3: Bar plot for 'Sub Category' counts
data.groupby('Sub Category').size().plot(kind='barh', color=sns.color_palette('Dark2'), ax=axs[0, 2])
axs[0, 2].set_title('Sub Category Counts')
axs[0, 2].spines[['top', 'right']].set_visible(False)
# Plot 4: Heatmap of 'Sub Category' vs 'Sentiment'
df_2dhist = pd.DataFrame({
x_label: grp['Sub Category'].value_counts()
for x_label, grp in data.groupby('Sentiment')
}).fillna(0)
sns.heatmap(df_2dhist, cmap='viridis', ax=axs[0, 3], annot=True, fmt=".0f", cbar=False)
axs[0, 3].set_xlabel('Sentiment')
axs[0, 3].set_ylabel('Sub Category')
axs[0, 3].set_title('Sub Category vs Sentiment Heatmap')
# Plot 5: Heatmap of 'Sub Category' vs 'Sub Category_test'
df_2dhist_test = pd.DataFrame({
x_label: grp['Sub Category_test'].value_counts()
for x_label, grp in data.groupby('Sub Category')
}).fillna(0)
sns.heatmap(df_2dhist_test, cmap='viridis', ax=axs[1, 0], annot=True, fmt=".0f", cbar=False)
axs[1, 0].set_xlabel('Sub Category')
axs[1, 0].set_ylabel('Sub Category_test')
axs[1, 0].set_title('Sub Category vs Sub Category_test Heatmap')
# Plot 6: Violin plot of 'score' by 'Sentiment'
sns.violinplot(data=data, x='score', y='Sentiment', inner='box', palette='Dark2', ax=axs[1, 1])
sns.despine(ax=axs[1, 1], top=True, right=True, bottom=True, left=True)
axs[1, 1].set_title('Score by Sentiment')
# Plot 7: Scatter plot of 'score' vs 'thumbsUpCount'
data.plot(kind='scatter', x='score', y='thumbsUpCount', s=32, alpha=0.8, ax=axs[1, 2], color='coral')
axs[1, 2].spines[['top', 'right']].set_visible(False)
axs[1, 2].set_title('Score vs ThumbsUpCount')
# Plot 8: Line plot of 'thumbsUpCount'
data['thumbsUpCount'].plot(kind='line', ax=axs[1, 3], color='teal')
axs[1, 3].spines[['top', 'right']].set_visible(False)
axs[1, 3].set_title('ThumbsUpCount over Time')
axs[1, 3].set_xlabel('Index')
axs[1, 3].set_ylabel('ThumbsUpCount')
# Apply tight layout to prevent overlapping
plt.tight_layout()
plt.show()