#!/bin/bash # Find standard deviation of packages per category and other fun statistics # Questions? Ask Donnie Berkholz PORTDIR="$(portageq envvar PORTDIR)" TOTAL="0" # Multiple of standard deviation to use for category size estimate SD_OPERATOR="/" SD_OPERAND="2" # Color setup, works only for interactive sessions # Can't quote ${PS1} if [ -n ${PS1} ]; then color="yes" else unset color fi bold="${color:+\033[1m}" unbold="${color:+\033[0m}" red="${color:+\E[40;31m}" green="${color:+\E[40;32m}" yellow="${color:+\E[40;33m}" blue="${color:+\E[40;34m}" magenta="${color:+\E[40;35m}" cyan="${color:+\E[40;36m}" white="${color:+\E[40;37m}" # How many categories? for DIR in $(find ${PORTDIR} -mindepth 1 -maxdepth 1); do if [ "${DIR}" != "scripts" -a "${DIR}" != "profiles" -a "${DIR}" != "eclass" ]; then CATEGORIES="${CATEGORIES} ${DIR}" fi done CATEGORY_NUMBER="$(echo ${CATEGORIES} | wc -w)" # Get total number of packages in tree for DIR in ${CATEGORIES}; do ((TOTAL += $(find ${DIR} -mindepth 1 -maxdepth 1 | wc -l))) done # Average packages per category AVERAGE="$((${TOTAL} / ${CATEGORY_NUMBER}))" # Get sigma( ( x(i) - x(bar) )^^2 ) for DIR in ${CATEGORIES}; do PACKAGE_TOTAL="$(find ${DIR} -mindepth 1 -maxdepth 1 | wc -l)" SIGMA="$((${SIGMA} + $((${PACKAGE_TOTAL} - ${AVERAGE}))**2))" done # Get standard deviation VARIANCE="$((${SIGMA} / ${CATEGORY_NUMBER}))" STANDARD_DEVIATION="$(echo "sqrt ( ${VARIANCE} )" | /usr/bin/bc)" # Suggested category range MINIMUM="$((${AVERAGE} - ${STANDARD_DEVIATION} ${SD_OPERATOR} ${SD_OPERAND}))" MAXIMUM="$((${AVERAGE} + ${STANDARD_DEVIATION} ${SD_OPERATOR} ${SD_OPERAND}))" RANGE="${MINIMUM} to ${MAXIMUM}" echo echo -e "${blue}Statistics for ${bold}${white}${PORTDIR}:${unbold}" echo -e "${yellow}Total packages = ${red}${TOTAL}" echo -e "${yellow}Total categories = ${red}${CATEGORY_NUMBER}" echo -e "${yellow}Average packages per category = ${red}${AVERAGE}" #echo -e "${yellow}Sigma = ${red}${SIGMA}" #echo -e "${yellow}Variance = ${red}${VARIANCE}" echo -e "${yellow}Standard deviation of packages per category = ${red}${STANDARD_DEVIATION}" echo -e "${bold}${green}Suggested category size within (standard deviation ${SD_OPERATOR} ${SD_OPERAND}) of average: ${red}${RANGE}${unbold}" echo -e "${bold}Split categories with more than ${MAXIMUM} packages, and do not create categories with fewer than ${MINIMUM} packages.${unbold}" echo