
# Generates random walk graphs and stock graphs to see if people can tell the difference.
# Lawrence Kesteloot, July 18, 2009.
# http://www.teamten.com/lawrence/writings/are_stocks_a_random_walk.html

import random
import urllib
import sys

# The number of days we're going to plot. There are roughly this many
# weekdays in six months.
NUM_DAYS = 130

# The symbols we have.
SYMBOLS = [ "AIG", "CIT", "IBM", "MMM", "SWY", "YHOO" ]

# Returns a list of chart numbers.
def get_random_chart(seed):
    # Seed the random number generator so we can reproduce the sequence.
    random.seed(seed)

    # Starting value.
    value = random.random()
    values = []

    # Random walk.
    for i in range(NUM_DAYS):
        values.append(value)
        delta = random.random() - 0.5
        value += delta

    return values

# Returns a list normalized between 0.0 and 1.0.
def normalize(values):
    max_value = max(values)
    min_value = min(values)

    return [(value - min_value) / (max_value - min_value) for value in values]

# Given a line from a CSV file, returns a representative sample.
def get_daily_average(line):
    # Date,Open,High,Low,Close,Volume,Adj Close
    fields = line.split(",")

    # Get the open and close.
    open_value = float(fields[1])
    close_value = float(fields[4])

    # We used to return the average of open and close, but this made
    # a graph that was much too smooth. We just get the open.
    #return (open_value + close_value) / 2
    return open_value

# Reads a CSV file of historical data and returns a tuple of
# a string (first day date) and the values of the random period.
def read_chart(symbol, seed):
    # Read all lines at once.
    lines = file(symbol + ".csv").readlines()

    # Cut out the header line.
    del lines[0]

    # Number of days.
    total = len(lines)

    # Pick out our period of interest.
    random.seed(seed)
    random.random()
    random.random()
    random.random()
    first = random.randint(0, total - NUM_DAYS)  # randint() is inclusive

    # Get the subsequence.
    lines = lines[first:first + NUM_DAYS]

    # Reverse the sequence because the data file is in reverse chronological
    # order.
    lines.reverse()

    # Get the date of the first day.
    first_day = lines[0].split(",")[0]

    # Pick out the value we want.
    return first_day, [get_daily_average(line) for line in lines]

def main():
    # All our results.
    results = []

    # Our notes.
    details = file("DETAILS", "w")

    # Our seed.
    seed = 1

    # Get the real charts.
    for symbol in SYMBOLS:
        first_day, values = read_chart(symbol, seed)
        values = normalize(values)
        results.append(values)
        details.write("Symbol: %s, seed = %d, first day = %s\n"
                % (symbol, seed, first_day))
        seed += 1

    # Make up some charts.
    for i in range(len(SYMBOLS)):
        values = get_random_chart(seed)
        values = normalize(values)
        results.append(values)
        details.write("Random, seed = %d\n" % (seed,))
        seed += 1

    # Shuffle.
    order = range(len(results))
    random.seed(seed)
    random.shuffle(order)
    details.write("Order: %s\n" % (order,))

    # Output anonymized results.
    for index in range(len(order)):
        results_index = order[index]
        label = "%c" % (index + 65,)
        filename = label + ".csv"
        out = file(filename, "w")
        values = results[results_index]
        for value in values:
            out.write(str(value) + "\n")
        out.close()
        if results_index < len(SYMBOLS):
            symbol = SYMBOLS[results_index]
        else:
            symbol = "random (%d)" % (results_index - len(SYMBOLS))
        details.write("Key: %s is %s\n" % (label, symbol))

        # Convert values to strings.
        string_values = [str(value) for value in values]

        # Chart the data.
        sys.stderr.write("Making chart for %s\n" % (label,))
        params = {}
        params["cht"] = "lc"        # Lines.
        params["chs"] = "616x476"   # 8.5x11 ratio, under 300,000 pixels.
        params["chl"] = label
        params["chd"] = "t:" + ",".join(string_values)
        params["chds"] = "0,1"      # Min and max of data values.
        params["chco"] = "000000"   # Black lines for nice printing.
        params = urllib.urlencode(params)
        google = urllib.urlopen("http://chart.apis.google.com/chart", params)
        image_data = google.read()
        google.close()
        image = file(label + ".png", "wb")
        image.write(image_data)
        image.close()

    details.close()

main()

