Archive for May, 2008

Improved Labix dateutil.rruleset

I’ve been using Labix’s python dateutil.rruleset object in a program to manage collections of recurring dates, represented by dateutil.rrule objects. I discovered that the rruleset does not work properly if you remove a date from the set and then subsequently add the date back in. Using the standard implementation (actually, I was using Brian Beck’s heap-based re-implementation) of dateutil.rrulset, once a date is marked as excluded (either with exrule or exdate), it cannot be included.

As a simple example involving a single date, you can see the following in ipython:

In [1]: import dateutil.rrule as R
In [2]: import datetime as DT
In [3]: s = R.rruleset()
In [4]: s.exdate(DT.datetime(2000,1,1))
In [5]: s.rdate(DT.datetime(2000,1,1))
In [6]: list(s)
Out[6]: []

The correct output in the above example, as I see it, is the single date that we added on the 5th line. To achieve this behavior I completely rewrote the rruleset class within the dateutil.rrule.py file of the distribution. I then reinstalled the modified dateutil.rrule package using setup.py.

The new code is below. To use it yourself, simple cut out the existing rruleset class code and paste in this code.

class rruleset(rrulebase):

    class _genrule:
        """keep a iterator that we step through, holding at each
        value (as the current 'min') until we advance."""
        def __init__(self, op, rule):
            self.op   = op
            self.rule = rule
            self.reset()

        def reset(self):
            """reset the iterator and clear out the current minimum."""
            self._min  = None
            self.iter = iter(self.rule)

        def copyAndReset(self):
            """so that multiple iterators can step through the rules concurrently."""
            c = rruleset._genrule(self.op, self.rule)
            c.reset()
            return c

        def advance(self):
            """mark the minimum as consumed so we can move to the next one."""
            self._min = None

        def _getMin(self):
            """Either get the cached min or produce a new min."""
            if self._min is not None:
                return self._min
            else:
                self._min = self.iter.next()
                return self._min

        def __repr__(self):
            return "genrule op: %s _min: %s" % (self.op, self._min)

        min = property(_getMin)

    class _allrules:
        """keep a list of genrule objects that we'll iterate through."""
        def __init__(self):
            self.rules = []

        def append(self, genrule):
            """add a new rules to our list of rules"""
            self.rules.append(genrule)
            self.reset()

        def reset(self):
            """reset every rule in the list"""
            for r in self.rules:
                r.reset()

        def copyAndReset(self):
            """create a copy of our list of rules and reset them all"""
            c = rruleset._allrules()
            c.rules = [r.copyAndReset() for r in self.rules]
            c.reset()
            return c

        def advanceAllWithMin(self, min):
            """advance the iterator for each rule that is currently at the given min."""
            for r in self.rules:
                try:
                    if r.min == min:
                        r.advance()
                except:
                    pass

        def popMinDate(self):
            """pop off a list of minimums from our list of rules.  some of the
            iterators in our rules list might overlap.  we want to abide by the
            last (most recently added) rule in the list.  So if there are two rules,
            and the first rule includes a date, but the second rule excludes that
            date, then we want to exclude the date."""
            mins = []
            foundAddRule = False
            for r in self.rules:
                try:
                    record = {'min': r.min, 'genrule': r}
                    if r.op == 'add':
                        foundAddRule = True
                    if (not mins) or (r.min < mins[0]['min']):
                        # we encounter a new minimum, so clear the list
                        mins = []
                        mins.append(record)
                    elif r.min == mins[0]['min']:
                        # current minimum matches minimum for this rule
                        mins.append(record)
                except:
                    pass
            if not foundAddRule:
                raise IndexError
            if mins:
                self.advanceAllWithMin(mins[-1]['min'])
                return mins[-1]
            else:
                # done iterating -- all rules exhausted.
                raise IndexError

    def __init__(self, cache=False):
        rrulebase.__init__(self, cache)
        self._allrules = rruleset._allrules()

    def rrule(self, rrule):
        self._allrules.append(rruleset._genrule('add', rrule))

    def rdate(self, rdate):
        self.rdates([rdate])

    def rdates(self, rdates):
        self._allrules.append(rruleset._genrule('add', sorted(rdates)))

    def exrule(self, exrule):
        self._allrules.append(rruleset._genrule('ex', exrule))

    def exdate(self, exdate):
        self.exdates([exdate])

    def exdates(self, exdates):
        self._allrules.append(rruleset._genrule('ex', sorted(exdates)))

    def _iter(self):
        allrules = self._allrules.copyAndReset()
        total = 0
        try:
            while True:
                # this loop will exit when popMinDate raises an exception
                minRecord = allrules.popMinDate()
                if minRecord['genrule'].op == "add":
                    total += 1
                    yield minRecord['min']
        except:
            self._len = total

Leave a Comment

Moody’s should open source their rating software

I just read this article in the Financial Times about how a Moody’s software error gave erroneous top ratings to structured debt products. The article claims that the error was in their computer models, and that for some debt products the proper rating might have been up to four notches lower.

I’m not an open source zealot, but this situation is rather glaring. If a publicly traded rating agency is going to publicly rate public debt instruments, its sensible that their computer models should be public. We’re not talking about the secret formula to Coke or Heinz ketchup. This is a formula behind a highly influential rating that strongly effects the market and our economy.

If Moody’s opened up their source, then the onus for correctness would spread out into their user base. Users who really depended on the results could comb the source code and flag errors. Then again, users who really care about ratings are better off not trusting the stiff, antiquated rating methods of Moody’s.

Leave a Comment

Monads Demystified

Haskell’s monads are an enigma. They hold the key to shorter, more modular Haskell programs, but at first they’re hard to understand. For sure, many monadic tutorials exist, but I had to study them for weeks before it all clicked. I know others who struggled just as much.

I had such a hard time partly because I didn’t understand some fundamental concepts as well as I had thought. Unfortunately, it took me a while to realize this because I already had experience writing real Haskell code. It didn’t occur to me that I had to revisit basic concepts like types and functions. I also think that other tutorials don’t spend enough time breaking down the syntactic sugar that makes Haskell’s monads so terse.

This writeup focuses on the concepts that confused me. The intended audience is people who have studied Haskell, yet have struggled with monads. Although some fundemental concepts are explained, this is not a comprehensive introduction to Haskell.

Before we dive in, I want to acknowledge a few people and resources that I found most helpful during my studies. Paul Hudak’s writing is exceptionally lucid. His original A Gentle Introduction to Haskell is a tremendous resource. Cale Gibbard wrote my two favorite monadic introductions: Monads as Containers and Monads as Computation. In addition, Jeff Newbern’s All About Monads contains a well written Catalog of Standard Monads. Finally, the experts on the #haskell IRC channel are always helpful.

Back to Basics

Before we talk about the syntax or semantics of monads, and before we step through any examples, we go back to basics. When I started learning about monads, I overestimated my understanding of Haskell’s type system. This section highlights the concepts that I overlooked.

Every monad is nothing more than a paramaterized type that implements a few functions. Let’s review paramaterized types by exploring the details of (Maybe a).

> data Maybe a = Nothing
>              | Just a

It’s straightforward to see that (Maybe a) can be used to represent values that either exist or are missing. It’s also intuitive that the variable a is a place holder for the type that may or may not be there (perhaps an Int). Fair enough, but there’s more.

First let’s break down the type’s name. Although it is often called the “maybe type” in conversation, its proper name is (Maybe a). The word Maybe is a type constructor and the character a is a type variable.

The type constructor Maybe is a function that accepts the given type variable a and returns the type (Maybe a). I found this notion confusing because you cannot call the Maybe function within regular code. For example, ghci prints out:

Prelude> Maybe 4
:1:0: Not in scope: data constructor `Maybe'
Prelude> :t Maybe
:1:0: Not in scope: data constructor `Maybe'
Prelude> :kind Maybe
Maybe :: * -> *

The last statement, using :kind, gives an indication that Maybe is a unary type constructor. We won’t stop to discuss notion of a kinds (partly because I don’t understand them well enough myself). We can safely skip that topic. The error messages are printed because the Maybe function is only valid within the context of type definitions, type class definitions, and type signatures. For example, you might write:

> catMaybes :: [Maybe a] -> [a]

Within this type signature, Maybe is a function being applied to the type variable a, to produce a type of (Maybe a). (Incidentally, the type (Maybe a) is passed in turn to the type constructor [].) This is code, but it is within the scope of type signatures. We’ll return to this distinction later when we replace particular type constructors like Maybe with a variable that represents any parametric type constructor.

The right hand side of the type definition lists one or more value constructors, separated by the pipe symbol. For (Maybe a), we have Nothing and Just a. Even though these value constructors are defined within the type definition, they are valid in regular Haskell code, as we can see with ghci:

Prelude> :t Nothing
Nothing :: Maybe a
Prelude> :t Just
Just :: a -> Maybe a
Prelude> Just 5
Just 5

The ghci output says that the function named Just takes some value x of type a and returns a value of type (Maybe a), which by definition equals Just x. We introduce the value variable x here to distinguish it from a, which is a type variable. Similarly, Nothing is a nullary function of type (Maybe a).

Note that unlike the type constructor Maybe, which produces a type named (Maybe a), the value constructors Nothing and Just produce values that have the type (Maybe a). One consequence of this distinction is the tendency for value constructors to be named after its type constructor. This is not a conflict because value constructors and type constructors are in different namespaces.

Finally, be sure to review the notion of Haskell type classes and instances. Type classes allow us to define a particular interface, and then instances allow us to declare that a type implements that interface. For the purposes of monads, we will be examining the type classes Functor, and Monad.

Functors

For whatever reason, I had it in my mind that I would put off learning about functors until I understood monads. I wasn’t sure what a functor was, but I also wasn’t sure what a monad was, and I wanted to take things one at a time. In retrospect, this was a foolish mistake because the concepts are related, and functors are much simpler than monads.

A functor is simply a fancy name for a type that supports the map function, which happens to be called fmap in the Functor class. In ghci you can see:

Prelude> :t map
map :: (a -> b) -> [a] -> [b]
Prelude> :t fmap
fmap :: (Functor f) => (a -> b) -> f a -> f b

As the types show, the map function only operates over lists, whereas fmap is generalized to operate over any parametric type. The idea is that we want to apply some function to every item within a collection. The Functor class is defined as follows:

> class Functor f where
>     fmap     :: (a -> b) -> f a -> f b

This type class definition is written in the language for types. The variable f is a placeholder for any parametric type constructor. We could instantiate the variable with any parametric type constructor, including Maybe. For example:

> instance Functor Maybe where
>     fmap f Nothing  = Nothing
>     fmap f (Just x) = Just (f x)

A minor point — do not let the reuse of the variable f confuse you. In the Functor class definition, f is a placeholder for a type constructor. Within this instance defintion, f represents the function that we want to apply to whatever is within the Just x value.

Let’s see what happens in ghci:

Prelude> fmap (* 2) (Just 4)
Just 8
Prelude> fmap (* 2) Nothing
Nothing
Prelude> fmap odd (Just 4)
Just False
Prelude> fmap odd Nothing
Nothing

For the list type, we simply have:

> instance Functor [] where
>     fmap = map

To be precise, there are a two semantic rules that every proper fmap implementation must abide by:

> fmap id      = id
> fmap (f . g) = fmap f . fmap g

Together, these rules ensure that fmap doesn’t modify the shape or order of its input. Most sensible definitions of fmap abide by these rules, so don’t think about them too hard.

The important takeway from this section is the generalization of map to any parametric type using the variable f within the type signature of fmap. This allows us to make any parametric type (such as [a]) implement fmap, satisfying the common need of applying a function to every item in some collection. In the case of (Maybe a), we apply the function to zero or one values. For lists (the [a] type), we apply the function to zero or more values.

The Monad Type Class

Now we turn to the Monad class. Perhaps surprisingly, we’ll hold off discussing how monads can improve your Haskell code. Instead, we simply introduce the methods of the Monad class, just as we did for the Functor class. We also introduce Haskell’s special monadic syntactic sugar.

Here is the Monad class definition:

> infixl 1  >>, >>=
> class  Monad m  where
>     return   :: a -> m a
>     (>>=)    :: m a -> (a -> m b) -> m b
>     (>>)     :: m a -> m b -> m b
>     fail     :: String -> m a
>     m >> k   =  m >>= \_ -> k

The type class’s two important functions are return and (>>=), which is conversationally called “bind”. The function (>>) is defined in terms of bind, and exists to simplify syntax. We’ll discuss fail later, but it’s typically used to handle exceptional results.

Don’t fall in the trap of trying to think about what each of these functions “typically do.” The meaning is different for each instance of the Monad class. We will examine particular implementations shortly.

First, let’s examine return. Despite its name, it has nothing to do with the standard flow control keyword in other languages. In Haskell it is not a keyword, and it is not a flow control operator. Instead, return is a generalized value constructor, similar to how fmap was a genearlized version of map. We can show this in ghci:

Prelude> :t Just
Just :: a -> Maybe a
Prelude> :t return
return :: (Monad m) => a -> m a

Both functions take a value of some type, call it a, and return a value of a type that is parameterized over a. The Just function always returns a value of type (Maybe a), whereas the return function can generate a value of any parameterized type that is an instance of the Monad class. It is an alias for one or more of the type’s value constructors. We’ll give use cases for return later, but for now understand that if you need to create a monad, you often use return instead of explicitly using the type’s value constructors.

Next, consider bind, or (>>=). It is very similar to the fmap function, as we can see in ghci:

Prelude> :t fmap
fmap :: (Functor f) => (a -> b) -> f a -> f b
Prelude> :t (>>=)
(>>=) :: (Monad m) => m a -> (a -> m b) -> m b
Prelude> :t (flip (>>=))
(flip (>>=)) :: (Monad m) => (a -> m b) -> m a -> m b

Both functions operate over a function and a parameterized type. The first two arguments are flipped, but that’s a small matter of syntax. Also, the functions have slightly different signatures, but the intent looks familiar.

Just like with Functor, there are also some rules that any well defined instance of the Monad class must define.

> return a >>= k                 = k a
> m        >>= return            = m
> xs       >>= return . f        = fmap f xs
> m        >>= (\x -> k x >>= h) = (m >>= k) >>= h

As with the fmap rules, don’t think about these too hard. The first two rules say that return doesn’t tinker its argument. The third rule relates bind to fmap, as we hinted above. The third rule is a sort of associativity.

Haskell provides special syntactic sugar for the bind operator within its do block. Haskell performs the following two translations for us:

> do e1 ; e2         = e1 >>  e2
> do p <- e1 ; e2    = e1 >>= \p -> e2

In the second case, if p does not pattern match with e2, then fail is called. These two translations allow us to write monadic code that implicitly calls the bind operator. One effect of this notation is that the resulting code resembles typical more familiar imperative code, as we’ll see in the next section when we look at examples.

Basic Example Monads

Let’s make these monadic functions concrete by looking at two basic monads: (Maybe a) and [a]. For some time, I was confused when familiar types like these were referred to as monads. It wasn’t clear to me that any parametric type which implements the Monad class is always a monad. The monadic functionality only shines we apply a monadic operator, such as bind, on a value with the monadic type.

Monadic Maybe a

Starting with (Maybe a), we define the two key monadic operators as follows:

> instance Monad Maybe where
>     return         = Just
>     Nothing  >>= f = Nothing
>     (Just x) >>= f = f x
>     fail _         = Nothing

The return function is simply an alias for the value constructor Just. The binding operator always returns Nothing if the first argument is Nothing. Otherwise, we apply the given function f to the value x paramaterized by Just in the first argument. Due to its type, the function f must return either Just x' or Nothing.

It’s helpful to think of this instance of the bind operator passing zero or more values from one function to the next. As the type signature of (>>=) dictates, each function must produce a value of type (Maybe a). Consider the following example.

Suppose we want to define a decrement operator over the natural numbers (all integers greater than or equal to zero). To seamlessly support inputs less than or equal to zero, we define our function as follows:

> decrementNat x | x <= 0    = Nothing
>                | otherwise = Just (x-1)

Note that decrementNat has the desirable signature a -> Maybe a. Now we can use the monadic bind function to perform multiple decrements. If we load the above function from a module Foo in ghci, we can write:

*Foo> Nothing >>= decrementNat
Nothing
*Foo> Nothing >>= decrementNat >>= decrementNat
Nothing
*Foo> Just 2 >>= decrementNat
Just 1
*Foo> Just 2 >>= decrementNat >>= decrementNat
Just 0
*Foo> Just 2 >>= decrementNat >>= decrementNat >>= decrementNat
Nothing

We can also use the syntactic sugar of do, to write a function such as:

> decrementNat3 x = do x1 <- decrementNat x
>                      x2 <- decrementNat x1
>                      decrementNat x2

Then, once again, in ghci, we can write:

*Foo> decrementNat3 2
Nothing
*Foo> decrementNat3 5
Just 2

Notice that once the bind function (as defined for the (Maybe a) type) encounters a Nothing, it forever generates a Nothing. It short circuits. Otherwise, the incrementally smaller value is passed along the chain.

Monadic List

The list type is also an instance of the Monad type class, defined as follows:

> instance Monad [] where
>     return x = [x]
>     m >>= f  = concatMap f m
>     fail _   = []

Once again, return is an alias for the value constructor []. The bind operator is best conceptualized in two steps. First, a 2d list (a list of lists) is created because the function f is applied to every element in the list m. Remember that f creates a list. Then all of those lists are concatenated together.

Consider a simple expression such as:

Prelude> [1,2] >>= (\x -> return $ odd x)
[True,False]

Alternatively, you could write:

> myodd = do x <- [1,2]
>            return $ odd x

What exactly is going on here? Remember that for lists, the bind operator is concatMap, and return is the list constructor. We might also write it as:

Prelude> concatMap (return . odd) [1,2]
[True,False]

So the short monadic bind expression applies the function \x -> return $ odd x to both 1 and 2 to create [True] and [False], which are then concatenated together.

We can expand this example to produce all pairs of a list l.

> allpairs l = do x <- l
>                 y <- l
>                 return (x, y)

Note that in this case there are two chained calls to concatMap.

To be continued. . .

These are the basic ideas behind monads. We have not covered the type class MonadPlus. We have also not explored many common instances of the Monad class, most notably (IO a) and (State a). Perhaps I’ll tackle those topics in a subsequent posts.

Comments (1)

Let Ichan Destroy Yahoo!

Two days ago Carl Icahn wrote a brilliantly pointed letter to Roy Bostock, Yahoo’s chairman of the board. The letter is a masterpiece in swashbuckling rhetoric. My favorite sentnece sentence was:

During the past week, a number of shareholders have asked me to lead a proxy fight to attempt to remove the current board and to establish a new board which would attempt to negotiate a successful merger with Microsoft, something that in my opinion the current board has completely botched.

I think Icahn is making the right move. Yahoo has been spinning its wheels for years now. Jerry Yang, Yahoo’s founder and CEO, is widely known to possess a quixotic fixation on his beloved company. He exudes a righteous attitude (Yahoo is too good for Microsoft), and he’s consistently failed to demonstrate business acumen (what has Yahoo done well lately).

Bostock’s reply certainly doesn’t provide any comfort. It’s a long winded effort to claim that really, they honestly did try to strike a fair deal with Microsoft. He assumes that Icahn and other disgruntled shareholders overlooked all the events that he enumerates.

Yesterday John Dvorak wrote an article that evaluates Icahn’s proposed slate of new board members. He concludes that Icahn would destroy Yahoo because the proposed members were corporate hawks instead of savvy technologists. First, board members cannot inject good technology into Yahoo. That is exactly their problem right now, with Yang at the helm. Second, I think Yahoo is destined to fail as a standalone company because its attitude troubles discourage the innovative atmosphere that it would need to turn around.

So let Icahn install its corporate raiders and chop up Yahoo or otherwise package it for sale. Yahoo certainly contains a lot of value, so why not capture it? If the new board succeeds, the shareholders will reap a nice premium, and Yahoo’s technical gemstones will live on under a new brand.

With all this said, I don’t necessarily think Microsoft would benefit from purchasing Yahoo.  But if Steve Balmer wants to buy Yahoo, then the Yahoo board should find the best way to strike a deal.

Leave a Comment

Remarks on Language Dabbling Considered Wasteful

Today I saw this well-written post by

I expected to disagree with Gustav because I have written production-quality software in many languages, including Ruby, Python, Perl, C, C++, C#, Java, and Erlang. I’m currently learning Haskell in my spare time (and I’m loving it). I think that my efforts in all of these languages has kept me nimble. I’ve met a lot of one trick ponies who I think would benefit from the change in perspective that a new language brings.

Instead, I think Gustav makes several valid points. In fact, in a subsequent prefix to his post, he writes that in the past seven years he’s added two languages to his core set, and dropped one; so he’s had fairly decent turnover. He hasn’t been a Java horse for the past 10 years.

Even so, I would like to make a few quick points:

  1. The real challenge is finding an efficient and productive way to keep learning and to stay in an innovative frame of mind. If you fall into the trap of punching the clock every day without staying aware of new approaches and techniques, then you’ll quickly fall behind.
  2. Just as with exercise routines or diets, motivating forces are highly personal. I happen to think that I learn new languages rather quickly, so my learning curve is relatively flat. I also get a lot of pleasure and inspiration when I work in a new language, which carries over to concurrent projects in my old languages. Those benefits are personal though — I can’t speak for others.
  3. Even a developer who prefers to maintain a small proficiency set should also have a willingness to tackle new projects in a new language. I never let a language get in the way of cooperation. I’m certainly quick to object to uncommented code or poorly organized components, but I ultimately do not care what language is picked. I have seen far too many people who immediately jump to the question of “what language will we use?” when the better question is “how should we solve this problem?”

Leave a Comment

Accelerating Wall Street

Today I attended a conference in New York called Accelerating Wall Street. It was an opportunity for technologists like myself to hear about how others in the financial industry are handling the ever growing volume and speed of order and information flow on the electronic markets. The main topics of discussion were:

  • the true meaning of “ultra low latency” today, versus a couple years ago,
  • efforts to utilize multiple cores,
  • exploring complex event processing (CEP), and
  • hardware acceleration.

As I expected, due to Wall Street’s highly competitive and secretive nature, the participants did not share many actual experiences or plans. Instead, most of the conversations were high level and forward looking. A number of intelligent and influential people were there though, so I did leave with some fresh ideas.

First, technology on Wall Street is changing extremely rapidly. You cannot build up a system to implement a trading strategy, and then sit back, light your cigar, and count the money as it rolls in. To succeed you must continually improve your system and look for new opportunities. You must be highly agile so opportunities don’t disappear while you build your solution. For example, two years ago the highest latency traders were concerned about measuring and saving milliseconds, and now they’re concerned about 100s of microseconds. Similarly, be cautious of anybody touting results or best practices from even one or two years ago — they are likely outdated and largely irrelevant.

As a result, only a few elite firms can compete in the latency race. To win the race, you must optimize all of your system components and all of their interactions. At the conference, several stories were tossed around about shops that focused on microseconds within a system that included a hop that takes 10s of milliseconds. In a sense, this realization questions the relevance of the conference’s topic.

Recently I have researched some of the major complex event systems. I have a hard time differentiating the various offerings or identifying the best product. This experience was echoed by others at the conference. Further, all of the participants who spoke directly about their experience using CEP said that they used Esper, which is open source. Another common observation, which I share, was that CEP products need to offer tighter integration with messaging bus products.

Finally, I didn’t hear any lucid comments about multi-core processors or grid computing. Instead of taking the fresh development perspective that’s needed to utilize parallel processing, Wall Street seems to be stuck in the mode of optimizing their legacy techniques and products. For example, some attendees discussed using hardware acceleration for Java components. The general mindset is still, “well, I code it up using threads in Java, and if that’s too slow I either recode it in C++ or accelerate it with special hardware.” The firms that manage to embrace more modern techniques for developing concurrent software systems will be the ultimate winners.

Edit: here is a post from one of the panelists at the conference.

Leave a Comment

WSJ: New Breed of Business Gurus Rises

Today’s Wall Street Journal has an article titled “New Breed of Business Gurus Rises.” The article discusses a “ranking of influential business thinkers” that “was compiled by Thomas H. Davenport” and “H. James Wilson.” The two researchers use a technique that merges Google search results, Lexis Nexis citations, and academic citations. The top five, from a pre-selected list of 110 people, are Gary Hamel, Thomas L. Friedman, Bill Gates, Malcolm Gladwell, and Howard Gardner.

I would like to know more about the methodology behind the ranking, but I’m skeptical about its quality. For example, Thomas Friedman writes a popular newspaper column that touches on many topics outside of management. I don’t see how a count of his Google hits has much bearing on his management ideas. The article says that the book “What’s the Big Idea?”, written by Davenport and Wilson, contains more information on the methodology, so perhaps the authors account for such issues.

Not to pick the article apart, but it uses this quote to emphasize how managers are tapping into new sources of ideas: “Susan Flygare, a sales-strategy executive at Blue Cross and Blue Shield of Minnesota who has attended speeches by a Harvard-educated stand-up comic, as well as by Messrs. Gladwell and Hamel.” Unless Mrs. Flygare is hinting she saw Conan O’Brien, the h-bomb seems irrelevant.

I am interested in the general topic of mining citations and internet references for influential people within a specific domain of expertise. It would be fun to create a website that returned an ordered list of gurus for any given domain or topic. Perhaps a site like that exists. If so, please point me to it.

Leave a Comment

Another approach to infinite set operations

Brent wrote a nice reply to my last post about infinite set operations. I agree that the arbitrary halting threshold embedded in my solution is imperfect. This post further expands upon Brent’s suggested approach.

What if we redefine the difference and intersection operations as functions that emit information about the comparisons that they perform as they move through both inputs? In this case we can use (Maybe a), producing a Nothing when we reject an item, and a (Just a) when an item is accepted.

This stream of (Maybe a) values could then be filtered by another function to produce a result appropriate for the application at hand.

> module InfiniteSets2 where
> import List
> import Maybe

First we redefine the difference function.

> infDiff2 :: (Eq a, Ord a, Enum a) => [a] -> [a] -> [Maybe a]
> infDiff2 [] _          = []
> infDiff2 (x:xs) []     = (Just x):(infDiff2 xs [])
> infDiff2 (x:xs) (y:ys) | x == y = Nothing  : (infDiff2 xs ys)
>                        | x <  y = (Just x) : (infDiff2 xs (y:ys))
>                        | otherwise = infDiff2 (x:xs) ys

And here is the intersection function.

> infIntersect2 :: (Eq a, Ord a, Enum a) => [a] -> [a] -> [Maybe a]
> infIntersect2 [] _     = []
> infIntersect2 _  []    = []
> infIntersect2 (x:xs) (y:ys) | x == y    = (Just x) : (infIntersect2 xs ys)
>                             | x <  y    = Nothing : (infIntersect2 xs (y:ys))
>                             | otherwise = Nothing : (infIntersect2 (x:xs) ys)

We can rewrite our old infDiff function, here renamed to nDiff, as
follows.

> nDiff n l1 l2 = f $ map (catMaybes . (take n)) (tails $ infDiff2 l1 l2)
>     where f []      = []
>           f ([]:xs) = []
>           f (x:xs)  = (head x) : (f xs)

And here are some example calls.

> test1 = take 20 $ infDiff2 [1,2,3,5,10,20,50,100] [1,3,4,6,10,15,30,50]
> test2 = take 20 $ infDiff2 [3,5..] [3,5..]
> test3 = take 20 $ infDiff2 ([1,2] ++ [3,5..]) [3,5..]
> test4 = take 20 $ infDiff2 [1,3..] [2,4..]
> test5 = take 20 $ infDiff2 ([1,2] ++ [3,5..]) [3,5..25]

> test6 = take 20 $ infIntersect2 [1,2,3,5,10] [1,3,4,6,10]
> test7 = take 20 $ infIntersect2 [3,5..] [3,5..]
> test8 = take 20 $ infIntersect2 [3,5..] [2,4..]

> testNDiff1 = take 40 $ nDiff 5 ([1,2] ++ [3,5..]) [3,5..]

Leave a Comment

Follow

Get every new post delivered to your Inbox.