Word count (Python)

From LiteratePrograms

Jump to: navigation, search
Other implementations: Assembly Intel x86 Linux | C | C++ | Forth | Haskell | J | Lua | Perl | Python | Python, functional | Rexx

This program is a code dump.
Code dumps are articles with little or no documentation or rearrangement of code. Please help to turn it into a literate program. Also make sure that the source of this code does consent to release it under the MIT or public domain license.

An implementation of the UNIX wc tool, in Python.

The wc tool counts characters, words and lines in text files or stdin. When invoked without any options, it will print all three values. These options are supported:

  • -c - Only count characters
  • -w - Only count words
  • -l - Only count lines

If the tool is invoked without any file name parameters, it will use stdin.

<<wc.py>>=
#!/usr/bin/env python

from optparse import OptionParser
from string import split
import sys

parser = OptionParser(usage="usage: %prog [options] [file1 file2 ...]")
parser.add_option("-c", "--char", 
                  dest="characters", 
                  action="store_true",
                  default=False,
                  help="Only count characters")
parser.add_option("-w", "--words", 
                  dest="words", 
                  action="store_true",
                  default=False,
                  help="Only count words")
parser.add_option("-l", "--lines", 
                  dest="lines", 
                  action="store_true",
                  default=False,
                  help="Only count lines")

(options, args) = parser.parse_args()

if not(options.characters or options.words or options.lines):
    options.characters, options.words, options.lines = True, True, True

def get_count(data):
    lines = str(len(data))
    words = str(sum([len(split(x)) for x in data]))
    chars = str(sum([len(x) for x in data]))
    return lines, words, chars

def print_count(lines, words, chars, filename):
    print "\t",
    if (options.lines):
        print lines + "\t",
    if (options.words):
        print words + "\t",
    if (options.characters):
        print chars + "\t",
    print filename

if (len(args) > 0):

    total_lines = 0
    total_words = 0
    total_chars = 0
    file_count = 0

    for file_string in args:
        if '*' in file_string:
            import glob
            file_list = glob.glob(file_string)
        else:
            file_list = [file_string]

        for file_name in file_list:
            file = open(file_name)
            data = file.readlines()

            lines, words, chars = get_count(data)
            print_count(lines, words, chars, file_name)

            total_lines += int(lines)
            total_words += int(words)
            total_chars += int(chars)
            file_count += 1

    if 2 <= file_count:
        print_count(str(total_lines), str(total_words), str(total_chars), "total") 
    
else:
    file = sys.stdin
    data = file.readlines()

    lines, words, chars = get_count(data)
    print_count(lines, words, chars, "")
Download code
Personal tools