#!/usr/bin/env python

import sys, optparse

def main(Infile=sys.stdin, verbose=True):
    l = 1
    offset = 0
    if verbose: print 'Lines processed:'
    for line in Infile:
        offset += len(line)
        if verbose and not l % 50: print l,
        try:
            uline = line.decode('utf8')
        except UnicodeDecodeError, e:
            pointer = ' ' * e.start
            error = '^' * (e.end-e.start)
            print
            print 'Error in %s on line %i, bytes %i-%i, byte-offset %i:' % (e.encoding, l, e.start, e.end, offset-(len(line)-e.start))
            print line.rstrip()
            print pointer + error
            sys.exit(255)
        l += 1 

    if verbose:
        print
        print 'Input was %i lines of valid utf8' % (l-1)

if __name__ == '__main__':
    usage = "usage: %prog [-s] [file_to_test]"
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-s', '--silent', action="store_false",
            dest="verbose", default=True,
            help="don't print status messages to stdout")
    (options, args) = parser.parse_args()
    filename = None    
    if args:
        for f in args:
            try:
                F = file(f)
                main(Infile=F, verbose=options.verbose)
                F.close()
            except IOError, e:
                print e
                sys.exit(1)
    else:
        F = sys.stdin
        main(Infile=F, verbose=options.verbose)
