| \n", " | text | \n", "movie_name | \n", "category_name | \n", "
|---|---|---|---|
| 0 | \n", "A senior at an elite college (Katie Holmes), a... | \n", "abandon | \n", "Plot | \n", "
| 1 | \n", "Will Lightman is a hip Londoner who one day re... | \n", "about_a_boy | \n", "Plot | \n", "
| 2 | \n", "Warren Schmidt (Nicholson) is forced to deal w... | \n", "about_schmidt | \n", "Plot | \n", "
| 3 | \n", "An account of screenwriter Charlie Kaufman's (... | \n", "adaptation | \n", "Plot | \n", "
| 4 | \n", "Ali G unwittingly becomes a pawn in the evil C... | \n", "ali_g_indahouse | \n", "Plot | \n", "
| \n", " | Positive freq | \n", "Negative freq | \n", "pos_precision | \n", "pos_freq_pct | \n", "pos_hmean | \n", "
|---|---|---|---|---|---|
| term | \n", "\n", " | \n", " | \n", " | \n", " | \n", " |
| the | \n", "2346 | \n", "2288 | \n", "0.506258 | \n", "0.024735 | \n", "0.047166 | \n", "
| a | \n", "1775 | \n", "1613 | \n", "0.523908 | \n", "0.018715 | \n", "0.036139 | \n", "
| and | \n", "1637 | \n", "1179 | \n", "0.581321 | \n", "0.017260 | \n", "0.033524 | \n", "
| of | \n", "1480 | \n", "1235 | \n", "0.545120 | \n", "0.015604 | \n", "0.030340 | \n", "
| to | \n", "942 | \n", "1010 | \n", "0.482582 | \n", "0.009932 | \n", "0.019463 | \n", "
| it | \n", "826 | \n", "801 | \n", "0.507683 | \n", "0.008709 | \n", "0.017124 | \n", "
| is | \n", "818 | \n", "726 | \n", "0.529793 | \n", "0.008625 | \n", "0.016973 | \n", "
| s | \n", "808 | \n", "749 | \n", "0.518947 | \n", "0.008519 | \n", "0.016763 | \n", "
| in | \n", "676 | \n", "622 | \n", "0.520801 | \n", "0.007127 | \n", "0.014062 | \n", "
| that | \n", "617 | \n", "602 | \n", "0.506153 | \n", "0.006505 | \n", "0.012846 | \n", "
| \n", " | Positive freq | \n", "Negative freq | \n", "pos_precision | \n", "pos_freq_pct | \n", "pos_hmean | \n", "pos_precision_normcdf | \n", "pos_freq_pct_normcdf | \n", "pos_scaled_f_score | \n", "
|---|---|---|---|---|---|---|---|---|
| term | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| the best | \n", "65 | \n", "7 | \n", "0.902778 | \n", "0.000685 | \n", "0.001370 | \n", "0.800329 | \n", "0.999876 | \n", "0.889042 | \n", "
| entertaining | \n", "58 | \n", "13 | \n", "0.816901 | \n", "0.000612 | \n", "0.001222 | \n", "0.744974 | \n", "0.999443 | \n", "0.853648 | \n", "
| heart | \n", "45 | \n", "11 | \n", "0.803571 | \n", "0.000474 | \n", "0.000948 | \n", "0.735715 | \n", "0.993981 | \n", "0.845567 | \n", "
| our | \n", "42 | \n", "11 | \n", "0.792453 | \n", "0.000443 | \n", "0.000885 | \n", "0.727863 | \n", "0.990311 | \n", "0.839043 | \n", "
| ride | \n", "29 | \n", "6 | \n", "0.828571 | \n", "0.000306 | \n", "0.000611 | \n", "0.752939 | \n", "0.943992 | \n", "0.837710 | \n", "
| summer | \n", "29 | \n", "6 | \n", "0.828571 | \n", "0.000306 | \n", "0.000611 | \n", "0.752939 | \n", "0.943992 | \n", "0.837710 | \n", "
| flaws | \n", "19 | \n", "1 | \n", "0.950000 | \n", "0.000200 | \n", "0.000401 | \n", "0.827414 | \n", "0.844483 | \n", "0.835861 | \n", "
| moore | \n", "19 | \n", "1 | \n", "0.950000 | \n", "0.000200 | \n", "0.000401 | \n", "0.827414 | \n", "0.844483 | \n", "0.835861 | \n", "
| hilarious | \n", "28 | \n", "6 | \n", "0.823529 | \n", "0.000295 | \n", "0.000590 | \n", "0.749514 | \n", "0.937188 | \n", "0.832910 | \n", "
| delivers | \n", "25 | \n", "5 | \n", "0.833333 | \n", "0.000264 | \n", "0.000527 | \n", "0.756150 | \n", "0.912886 | \n", "0.827159 | \n", "
| \n", " | Positive freq | \n", "Negative freq | \n", "pos_precision | \n", "pos_freq_pct | \n", "pos_hmean | \n", "pos_precision_normcdf | \n", "pos_freq_pct_normcdf | \n", "pos_scaled_f_score | \n", "
|---|---|---|---|---|---|---|---|---|
| term | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| bill perfectly | \n", "0 | \n", "1 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.137436 | \n", "0.467472 | \n", "0.212421 | \n", "
| shoplifts | \n", "0 | \n", "1 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.137436 | \n", "0.467472 | \n", "0.212421 | \n", "
| encyclopedia | \n", "0 | \n", "1 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.137436 | \n", "0.467472 | \n", "0.212421 | \n", "
| homo eroticism | \n", "0 | \n", "1 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.137436 | \n", "0.467472 | \n", "0.212421 | \n", "
| trade homo | \n", "0 | \n", "1 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.137436 | \n", "0.467472 | \n", "0.212421 | \n", "
| rough trade | \n", "0 | \n", "2 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.137436 | \n", "0.467472 | \n", "0.212421 | \n", "
| on rough | \n", "0 | \n", "1 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.137436 | \n", "0.467472 | \n", "0.212421 | \n", "
| machismo in | \n", "0 | \n", "1 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.137436 | \n", "0.467472 | \n", "0.212421 | \n", "
| gang machismo | \n", "0 | \n", "1 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.137436 | \n", "0.467472 | \n", "0.212421 | \n", "
| teen gang | \n", "0 | \n", "1 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.137436 | \n", "0.467472 | \n", "0.212421 | \n", "
| \n", " | Positive freq | \n", "Negative freq | \n", "pos_precision | \n", "pos_freq_pct | \n", "pos_hmean | \n", "pos_precision_normcdf | \n", "pos_freq_pct_normcdf | \n", "pos_scaled_f_score | \n", "neg_precision | \n", "neg_freq_pct | \n", "neg_scaled_f_score | \n", "scaled_f_score | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| term | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| the best | \n", "65 | \n", "7 | \n", "0.902778 | \n", "0.000685 | \n", "0.001370 | \n", "0.800329 | \n", "0.999876 | \n", "0.889042 | \n", "0.097222 | \n", "0.000077 | \n", "0.304090 | \n", "0.778085 | \n", "
| entertaining | \n", "58 | \n", "13 | \n", "0.816901 | \n", "0.000612 | \n", "0.001222 | \n", "0.744974 | \n", "0.999443 | \n", "0.853648 | \n", "0.183099 | \n", "0.000142 | \n", "0.382666 | \n", "0.707296 | \n", "
| heart | \n", "45 | \n", "11 | \n", "0.803571 | \n", "0.000474 | \n", "0.000948 | \n", "0.735715 | \n", "0.993981 | \n", "0.845567 | \n", "0.196429 | \n", "0.000120 | \n", "0.387529 | \n", "0.691134 | \n", "
| our | \n", "42 | \n", "11 | \n", "0.792453 | \n", "0.000443 | \n", "0.000885 | \n", "0.727863 | \n", "0.990311 | \n", "0.839043 | \n", "0.207547 | \n", "0.000120 | \n", "0.395904 | \n", "0.678086 | \n", "
| summer | \n", "29 | \n", "6 | \n", "0.828571 | \n", "0.000306 | \n", "0.000611 | \n", "0.752939 | \n", "0.943992 | \n", "0.837710 | \n", "0.171429 | \n", "0.000066 | \n", "0.352312 | \n", "0.675421 | \n", "
| ride | \n", "29 | \n", "6 | \n", "0.828571 | \n", "0.000306 | \n", "0.000611 | \n", "0.752939 | \n", "0.943992 | \n", "0.837710 | \n", "0.171429 | \n", "0.000066 | \n", "0.352312 | \n", "0.675421 | \n", "
| flaws | \n", "19 | \n", "1 | \n", "0.950000 | \n", "0.000200 | \n", "0.000401 | \n", "0.827414 | \n", "0.844483 | \n", "0.835861 | \n", "0.050000 | \n", "0.000011 | \n", "0.255388 | \n", "0.671722 | \n", "
| moore | \n", "19 | \n", "1 | \n", "0.950000 | \n", "0.000200 | \n", "0.000401 | \n", "0.827414 | \n", "0.844483 | \n", "0.835861 | \n", "0.050000 | \n", "0.000011 | \n", "0.255388 | \n", "0.671722 | \n", "
| hilarious | \n", "28 | \n", "6 | \n", "0.823529 | \n", "0.000295 | \n", "0.000590 | \n", "0.749514 | \n", "0.937188 | \n", "0.832910 | \n", "0.176471 | \n", "0.000066 | \n", "0.355781 | \n", "0.665820 | \n", "
| delivers | \n", "25 | \n", "5 | \n", "0.833333 | \n", "0.000264 | \n", "0.000527 | \n", "0.756150 | \n", "0.912886 | \n", "0.827159 | \n", "0.166667 | \n", "0.000055 | \n", "0.345030 | \n", "0.654317 | \n", "
| \n", " | Positive freq | \n", "Negative freq | \n", "pos_precision | \n", "pos_freq_pct | \n", "pos_hmean | \n", "pos_precision_normcdf | \n", "pos_freq_pct_normcdf | \n", "pos_scaled_f_score | \n", "neg_precision | \n", "neg_freq_pct | \n", "neg_scaled_f_score | \n", "scaled_f_score | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| term | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| bad | \n", "17 | \n", "105 | \n", "0.139344 | \n", "0.000179 | \n", "0.000358 | \n", "0.213805 | \n", "0.815360 | \n", "0.338776 | \n", "0.860656 | \n", "0.001149 | \n", "0.880301 | \n", "-0.760603 | \n", "
| worst | \n", "2 | \n", "23 | \n", "0.080000 | \n", "0.000021 | \n", "0.000042 | \n", "0.178665 | \n", "0.513404 | \n", "0.265081 | \n", "0.920000 | \n", "0.000252 | \n", "0.864072 | \n", "-0.728143 | \n", "
| nor | \n", "3 | \n", "25 | \n", "0.107143 | \n", "0.000032 | \n", "0.000063 | \n", "0.194262 | \n", "0.536341 | \n", "0.285218 | \n", "0.892857 | \n", "0.000274 | \n", "0.863366 | \n", "-0.726733 | \n", "
| a bad | \n", "2 | \n", "22 | \n", "0.083333 | \n", "0.000021 | \n", "0.000042 | \n", "0.180537 | \n", "0.513404 | \n", "0.267136 | \n", "0.916667 | \n", "0.000241 | \n", "0.858329 | \n", "-0.716659 | \n", "
| neither | \n", "3 | \n", "24 | \n", "0.111111 | \n", "0.000032 | \n", "0.000063 | \n", "0.196610 | \n", "0.536341 | \n", "0.287741 | \n", "0.888889 | \n", "0.000263 | \n", "0.858224 | \n", "-0.716448 | \n", "
| instead | \n", "7 | \n", "32 | \n", "0.179487 | \n", "0.000074 | \n", "0.000148 | \n", "0.239703 | \n", "0.626152 | \n", "0.346687 | \n", "0.820513 | \n", "0.000350 | \n", "0.853228 | \n", "-0.706456 | \n", "
| obvious | \n", "3 | \n", "23 | \n", "0.115385 | \n", "0.000032 | \n", "0.000063 | \n", "0.199158 | \n", "0.536341 | \n", "0.290460 | \n", "0.884615 | \n", "0.000252 | \n", "0.852596 | \n", "-0.705191 | \n", "
| mess | \n", "1 | \n", "19 | \n", "0.050000 | \n", "0.000011 | \n", "0.000021 | \n", "0.162372 | \n", "0.490422 | \n", "0.243969 | \n", "0.950000 | \n", "0.000208 | \n", "0.850818 | \n", "-0.701637 | \n", "
| boring | \n", "0 | \n", "17 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.137436 | \n", "0.467472 | \n", "0.212421 | \n", "1.000000 | \n", "0.000186 | \n", "0.848754 | \n", "-0.697508 | \n", "
| the only | \n", "3 | \n", "22 | \n", "0.120000 | \n", "0.000032 | \n", "0.000063 | \n", "0.201931 | \n", "0.536341 | \n", "0.293398 | \n", "0.880000 | \n", "0.000241 | \n", "0.846446 | \n", "-0.692891 | \n", "