Wednesday, 29 January 2014

Data Scope

Data Scope is a command line application I implemented in C\C++ to allow me to quickly access the tick data in my Mongo database. The results are printed to the linux shell which I find quite effective for investigating high frequency data.

The application works well but I have a list of ways that it can be extended/improved both in terms of functionality and efficiency.

Usage

Start Data Scope using the command datascope.


-bash$  datascope
Data Scope:   

The help command lists the available Data Scope commands.

Data Scope: help
Command: bars           Description: OHLC bars.
Command: vap            Description: Volume at price.
Command: vapt           Description: Volume at price per bar.
Command: svpinapt       Description: Signed volume at price per bar.
Command: mktapt         Description: Market at price per bar.
Command: load           Description: Load trade and quote data.
Command: showtaqcached  Description: List cached TAQ.
Command: showbarscached Description: List cached bars.
Command: help           Description: List of valid commands.
Data Scope: 

The following command shows the help file for the bars command.

Data Scope: bars --help
Available options
-----------------
-h, --help
    Example Usage: --key=CL:A:20130101 --start=23:00:00:000 --end=24:00:00:000 --barsize==5:m

--key  [required]
    Format: MATURITY:DATE - EG CL:A:20130101

--start  [required]
    Format: HH:MM:SS

--end  [required]
    Format: HH:MM:SS

--barsize  [required]
    1:h,1:m,1:s,1:ms

Return codes
-----------------
    0     Success
    1     Error

Data Scope: 

I then call the bars command to generate 5 minute bars between 13:00 and 15:00 on 20130102 for the front month of the future symbol CL (A denotes front month).


Data Scope: bars --key=CL:A:20130102 --start=13:00:00:000 --end=15:00:00:000 --barsize==5:m

The following figure shows the color formatted result which is printed to the shell.


As a second example I print 5 second bars between 13:00 and 13:01 on the same day.


Data Scope: bars --key=CL:A:20130102 --start=13:00:00:000 --end=13:01:00:000 --barsize==5:s


The next command I illustrate is vapt which shows the evolution of volume at price over bars of a particular size. The following command call generates the volume at price over 2 minute bars between 23:00 and 24:00.


Data Scope: vapt --key=CL:A:20130101 --start=23:00:00:000 --end=24:00:00:000 --barsize==2:m


The last command I illustrate is mktapt which shows the size at the best bid and offer prices at discrete intervals. The following command call shows the market at 2 minute intervals.


Data Scope: mktapt --key=CL:A:20130101 --start=23:00:00:000 --end=24:00:00:000 --barsize==2:m

As a second example of this command I show the market at 200 millisecond intervals between 23:00:00 and 23:00:05.

Data Scope: mktapt  --key=CL:A:20130101 --start=23:00:00:000 --end=23:00:05:000 --barsize==200:ms


Implementation in C\C++

 

#ifndef _MAIN_CPP_
#define _MAIN_CPP_

#include map
#include iostream
#include string
#include crime
#include readline/readline.h
#include readline/history.h
#include stdio.h
#include boost/lexical_cast.hpp
#include boost/numeric/ublas/matrix.hpp
#include boost/numeric/ublas/io.hpp
#include boost/log/trivial.hpp
#include vector
#include sys/types.h
#include sys/file.h
#include sys/stat.h
#include sys/errno.h

//The following includes exit in BobLib which is a C++ static library I implemented
#include "timeseries.h"
#include "trade.h"
#include "snapshot.h"
#include "simulator.h"
#include "utility.h"
#include "readwrite.h"
#include "barsoneday.h"
#include "argvparser.h"
#include "boblibtypes.h"

using namespace BobLib;
using namespace boost::posix_time;
using boost::bad_lexical_cast;
using boost::lexical_cast;
using namespace std;


int load_data(char* arg);
int timeStringToTimeDuration(string timeStr, time_duration& ); //HH:MM:SS
int barsizeStringToTimeDuration(string barsizeStr,time_duration& ); //1:s
int dateStringToDate(string date_str, boost::gregorian::date& ); //YYYYMMDD
int createBarsOneDay(BarsOneDay& bars,char* arg);
void print_details(BarsOneDay* bar);
void initLog();

// CL:A:20130101  
std::map > trade_map;
std::map > quote_map;

extern char *xmalloc ();

int com_bars PARAMS((char *));
int com_vap PARAMS((char *));
int com_vapt PARAMS((char *));
int com_svpinapt PARAMS((char *));
int com_mktapt PARAMS((char *));
int com_load PARAMS((char *));
int com_showloaded PARAMS((char *));
int com_help PARAMS((char *));

/* A structure which contains information on the commands this program
   can understand. */

typedef struct {
      char *name;           /* User printable name of the function. */
      rl_icpfunc_t *func;     /* Function to call to do the job. */
      char *doc;            /* Documentation for this function.  */
} COMMAND;


COMMAND commands[] = {
    {"bars",com_bars, "Command: Bars"},
    {"vap",com_vap, "Command: Volume at price"},
    {"vapt",com_vapt, "Command: Volume at price per bar"},
    {"svpinapt",com_svpinapt, "Command: Signed volume at price per bar"},
    {"mktapt",com_mktapt, "Command: Market at price per bar"},
    {"load",com_load, "Command: Load trade and quote data"},
    {"showloaded",com_showloaded, "Command: List loaded days"},
    {"help",com_help, "Command: Help"},
    { (char *)NULL, (rl_icpfunc_t *)NULL, (char *)NULL }
};

/* Forward declarations. */
char *stripwhite (char*);
COMMAND *find_command (char*);
int execute_line ( char *line);
int valid_argument ( char *caller,char *arg);

/* The name of this program, as taken from argv[0]. */
char *progname;

/* When non-zero, this global means the user is done using this program. */
int done;


int numargs = 0;



int main(int argc, char **argv)
{
    void initLog();


    numargs = argc;
   
   //3. READLINE START
    
    char *line, *s;

    /* Loop reading and executing lines until the user quits. */
     for ( ; done == 0; )
     {
         line = readline ("Data Scope: ");
         if (!line)
            break;
        
           /* Remove leading and trailing whitespace from the line.
                    Then, if there is anything left, add it to the history list
                             and execute it. */
          s = stripwhite (line);

          if (*s)
          {
                add_history (s);
                execute_line (s);
          }
          free (line);
     }
     exit (0);
}


int execute_line ( char *line)
{
    register int i;
    COMMAND *command;
    char *word;

    /* Isolate the command word. */
    i = 0;
    while (line[i] && whitespace (line[i]))
        i++;
    word = line + i;

    while (line[i] && !whitespace (line[i]))
        i++;

    if (line[i])
        line[i++] = '\0';
        
    command = find_command (word);
    
    if (!command)
    {
        fprintf (stderr, "%s: No such command for DataScope.\n", word);
        return (-1);
    }

    /* Get argument to command, if any. */
     while (whitespace (line[i]))
         i++;

     word = line + i;

     /* Call the function. */
     return ((*(command->func)) (word));
}

/* Look up NAME as the name of a command, and return a pointer to that
   command.  Return a NULL pointer if NAME isn't a command name. */

COMMAND * find_command (char *name)
{
    register int i;

    for (i = 0; commands[i].name; i++)
        if (strcmp (name, commands[i].name) == 0)
             return (&commands[i]);

    return ((COMMAND *)NULL);
}


/* Strip whitespace from the start and end of STRING.  Return a pointer
   into STRING. */

char * stripwhite( char *str){
    register char *s, *t;

    for (s =str; whitespace (*s); s++)
         ;
    
    if (*s == 0)
       return (s);

    t = s + strlen (s) - 1;
    while (t > s && whitespace (*t))
        t--;
    
    *++t = '\0';
    return s;
}

int createBarsOneDay(BarsOneDay& bars,char* arg)
{
    ArgvParser cmd;
    cmd.addErrorCode(0, "Success");
    cmd.addErrorCode(1, "Error");
    cmd.setHelpOption("h", "help", "Usage: --key=CL:A:20130101 --start=23:00:00:000 --end=24:00:00:000 --barsize==5:m");
    cmd.defineOption("key","Format: MATURITY:DATE - EG CL:A:20130101" ,  ArgvParser::OptionRequired | ArgvParser::OptionRequiresValue);
    cmd.defineOption("start","Format: HH:MM:SS", ArgvParser::OptionRequired | ArgvParser::OptionRequiresValue);
    cmd.defineOption("end","Format: HH:MM:SS", ArgvParser::OptionRequired | ArgvParser::OptionRequiresValue);
    cmd.defineOption("barsize","1:h,1:m,1:s,1:ms ", ArgvParser::OptionRequired | ArgvParser::OptionRequiresValue);
    cmd.defineOption("shift","1:h,1:m,1:s,1:ms " );

    int result = cmd.parse(arg);
    if (result != ArgvParser::NoParserError){
        cout << cmd.parseErrorDescription(result) << endl;
        return 1;
    }                                         
   
    //Parse Parameters
    string key_str,start_str,end_str,barsize_str;
    
    if (cmd.foundOption("key")){
      key_str  = cmd.optionValue("key");
    }
    if (cmd.foundOption("start")){
        start_str  = cmd.optionValue("start");
    }
    if (cmd.foundOption("end")){
        end_str  = cmd.optionValue("end");
    }
    if (cmd.foundOption("barsize")){
        barsize_str  = cmd.optionValue("barsize");
    }
    string shift_str = "";
    bool shift = false;
    if (cmd.foundOption("shift")){
        shift=true; 
        shift_str  = cmd.optionValue("shift");
    }
    
      
    //Param: start time
    time_duration td_start;
   
    if(timeStringToTimeDuration(start_str, td_start))
        return 1;
    
    //Param: end time    
    time_duration td_end;
   
    if(timeStringToTimeDuration(end_str, td_end))
        return 1;
  
   //Param: shift
    if(shift){
    time_duration td_shift; 
    if(barsizeStringToTimeDuration(shift_str,td_shift))
        return 1;
    
    td_start = td_start + td_shift;
    td_end = td_end + td_shift;  
   }

   //Param: barsize
    time_duration td_barsize; 

    if(barsizeStringToTimeDuration(barsize_str,td_barsize))
        return 1;
   
    //date (extract from key)
   
    std::vector array;
    boost::split(array,key_str, boost::is_any_of(":"));
 
    if(array.size()!=3){
        cout << "invalid key - use format SYMBOL:MATURITY:DATE" < trades = trade_map[key_str];
    
    if ( quote_map.find( key_str) == quote_map.end() ) {
        cout << "no quotes found for key " << key_str << endl;
        return 1;
    }
    std::vector quotes = quote_map[key_str];

    
    ptime pt_start(boost_date,td_start);
    ptime pt_end(boost_date,td_end);

    if(pt_end <= pt_start){
        cout << "end time must be greater than start time" << endl;
    }


    //extract subset of trades
    int tStart=0;
    int tEnd= trades.size()-1;
    //find start
    while( trades[tStart].datetime() < pt_start)
        ++tStart;
    while( trades[tEnd].datetime() > pt_end)
        --tEnd;
    std::vector trades_subset;
    for(int i=tStart; i<=tEnd;i++){
        trades_subset.push_back(trades[i]);
    }

    //extract subset of quotes
    tStart=0;
    tEnd= quotes.size()-1;
    //find start
    while( quotes[tStart].datetime() < pt_start)
        ++tStart;
    while( quotes[tEnd].datetime() > pt_end)
        --tEnd;
    std::vector quotes_subset;
    for(int i=tStart; i<=tEnd;i++){
        quotes_subset.push_back(quotes[i]);
    }

       cout << "num trades: " << trades_subset.size() << "  num quotes: " << quotes_subset.size() << endl;
    //if((trades_subset.size()==0)&&(quotes_subset.size()==0)){
    //    cout << "no trades or quotes between " << to_simple_string(td_start)<<" and "<< to_simple_string(td_end) << endl;
   // }
  
   if(pt_start <= quotes[0].datetime()) 
   {
        BarsOneDay bars1( boost_date,td_start,td_end,td_barsize,1);
        bars1.build(trades_subset,quotes_subset);
        bars = bars1;
   }
   else{

       //find valid Quote before pt_start - use this to seed BarsOneDay
       tStart=0;
       Quote seedQuote;
       while( quotes[tStart].datetime() < pt_start){
           seedQuote = quotes[tStart];
           ++tStart;
        }
    
        cout << "seed quote: " << to_simple_string(seedQuote.datetime());
        BarsOneDay bars1( boost_date,td_start,td_end,td_barsize,1); 
        bars1.build(trades_subset,quotes_subset,seedQuote);
        bars = bars1;
    }


    return 0;
}

void print_details(BarsOneDay* bar)
{
 //symbol
 cout << "start " << bar->startTime() << "   end " << bar->endTime() << endl;
}

int com_bars(char *arg){
    BarsOneDay bars;
    
    if(createBarsOneDay(bars,arg)){
        return 1;
    } 
    
    cout  << endl;
    print_details(&bars);
    bars.coutBarStats();
    cout  << endl;
    
    return 0;
}

int com_vap(char *arg){
    BarsOneDay bars;
    
    if(createBarsOneDay(bars,arg)){
        return 1;
    } 
    
    cout  << endl;
    print_details(&bars);
    bars.coutVolumeAtPrice();
    cout  << endl;
    
    return 0;
}

int com_vapt(char *arg){
    BarsOneDay bars;
    
    if(createBarsOneDay(bars,arg)){
        return 1;
    } 
    
    cout  << endl;
    print_details(&bars);
    bars.coutVolumeAtPricePerBar();
    cout  << endl;
    
    return 0;
}

int com_svpinapt(char *arg){
    BarsOneDay bars;
    
    if(createBarsOneDay(bars,arg)){
        return 1;
    } 
    
    cout  << endl;
    print_details(&bars);
    bars.coutSVPINAtPricePerBar();
    cout  << endl;
    
    return 0;
}

int com_mktapt(char *arg){
    BarsOneDay bars;
    
    if(createBarsOneDay(bars,arg)){
        return 1;
    } 
    
    cout  << endl;
    print_details(&bars);
    bars.coutMktAtPricePerBar();
    cout  << endl;
    
    return 0;
}

int com_load(char *arg){
    
    if(load_data(arg)){
        return 1;
    } 
    
    return 0;
}

int com_showloaded(char* arg){

    if((trade_map.size()==0) || (quote_map.size()==0)){
        cout << "No Keys Have Been Loaded!" << endl ;
        return 1;
    }

    //show loaded trades
    std::map >::iterator it_trade = trade_map.begin();
    std::map >::iterator endit_trade = trade_map.end();
   
    cout <first << endl;  
    }
    cout << endl; 
    
    //show loaded quotes
    std::map >::iterator it_quote = quote_map.begin();
    std::map >::iterator endit_quote = quote_map.end();
   
    cout << endl <<"Keys Loaded - Quotes: " << endl; 
    for(; it_quote != endit_quote; ++it_quote)
    {
           cout << it_quote->first << endl;  
    }
    cout << endl ;

    return 0;
}



/* Print out help for ARG, or for all of the commands if ARG is not present. */

int com_help (char* arg)
{
    register int i;
    int printed = 0;

    for (i = 0; commands[i].name; i++)
    {
        if (!*arg || (strcmp (arg, commands[i].name) == 0))
        {
             printf ("%s\t\t%s.\n", commands[i].name, commands[i].doc);
             printed++;
        }
    }

    if (!printed)
    {
        printf ("No commands match `%s'.  Possibilties are:\n", arg);

        for (i = 0; commands[i].name; i++)
        {
            /* Print in six columns. */
            if (printed == 6)
            {
                printed = 0;
                printf ("\n");
            }

            printf ("%s\t", commands[i].name);
            printed++;
        }

        if (printed)
            printf ("\n");
    }
    return 0;
}


/* Return non-zero if ARG is a valid argument for CALLER, else print
   an error message and return zero. */

int valid_argument ( char *caller,char *arg)
{
     if (!arg || !*arg)
     {
        fprintf (stderr, "%s: Argument required.\n", caller);
        return (0);
     }

     return (1);
}




int load_data(char* arg)
{
    string data_dir = "/home/cs/rbradley/data/futures/";
    
    ArgvParser cmd;
    cmd.addErrorCode(0, "Success");
    cmd.addErrorCode(1, "Error");
    cmd.setHelpOption("h", "help", "Usage: --key=CL:A:20130101-CL:A:20130102 ");
    cmd.defineOption("key","Format: SYMBOL:MATURITY:DATE-SYMBOL:MATURITY:DATA " ,  ArgvParser::OptionRequired | ArgvParser::OptionRequiresValue);

    int result = cmd.parse(arg);
    if (result != ArgvParser::NoParserError){
        cout << cmd.parseErrorDescription(result) << endl;
        return 1;
    }                                         
    string key_str;
    
    if (cmd.foundOption("key")){
      key_str  = cmd.optionValue("key");
    }

    std::vector keys;
    boost::split(keys,key_str, boost::is_any_of("-"));

    for(unsigned int i=0; i array;
        boost::split(array, key, boost::is_any_of(":"));

        if(array.size()!= 3){
            cout << "invalid key - use format SYMBOL:MATURITY:DATE " << endl;
            continue;
        }

        string symbol = array[0];
        string maturity = array[1];
        string date = array[2];

        std::vector trades;
        time_t tstart = time(0);
        cout << "loading trades for " << key <<" ... " << endl;
        bool tradesLoaded = ReadWrite::load_TW(data_dir,symbol,maturity,date,trades,100);
        
        if(tradesLoaded){
            cout << "trades loaded for "<< key << " in " << difftime(time(0), tstart) << " seconds \n" ;
            trade_map[key] = trades;
        }
        else{
            continue;
        }
        //Load Quotes
        std::vector quotes;
        tstart = time(0);
        cout << "loading quotes for " << key << " ... " << endl;
        bool quotesLoaded = ReadWrite::load_TW(data_dir,symbol,maturity,date,quotes,100);
        
        if(quotesLoaded){
            cout << "quotes loaded for "<< key << " in " << difftime(time(0), tstart) << " seconds \n" ;
            quote_map[key] = quotes;
        }
        else{
            continue;
        }
    }
    return 0; 
}

int timeStringToTimeDuration(string timeStr, time_duration& td) //HH:MM:SS
{
    std::vector array;
    boost::split(array, timeStr, boost::is_any_of(":"));
    if(array.size()!=4){
        cout << "invalid start time - use format HH:MM:SS:ZZZ" << endl;
        return 1;
    }
    int start_hour=0,start_minute=0,start_second=0,start_millisecond=0;
    try{
        start_hour = boost::lexical_cast(array[0]);         
    }
    catch(bad_lexical_cast &){
        cout << "invalid start time - use format HH:MM:SS:ZZZ" << endl;
        return 1;
    }
    try{
        start_minute = boost::lexical_cast(array[1]);         
    }
    catch(bad_lexical_cast &){
        cout << "invalid start time - use format HH:MM:SS:ZZZ" << endl;
        return 1;
    }
    try{
        start_second = boost::lexical_cast(array[2]);         
    }
    catch(bad_lexical_cast &){
        cout << "invalid start time - use format HH:MM:SS:ZZZ" << endl;
        return 1;
    }
    try{
        start_millisecond = boost::lexical_cast(array[3]);         
    }
    catch(bad_lexical_cast &){
        cout << "invalid start time - use format HH:MM:SS:ZZZ" << endl;
        return 1;
    }
     //check number ranges

    if((start_hour>24) || (start_hour <0)){ 
        cout << "invalid start time - use format HH:MM:SS:ZZZ" << endl;
        return 1;
    }
    if((start_hour==24) && (start_minute !=0)){ 
        cout << "invalid start time - use format HH:MM:SS:ZZZ" << endl;
        return 1;
    }
    if((start_minute>59) || (start_minute <0)){ 
        cout << "invalid start time - use format HH:MM:SS:ZZZ" << endl;
        return 1;
    }
    if((start_second>59) || (start_second <0)){ 
        cout << "invalid start time - use format HH:MM:SS:ZZZ" << endl;
        return 1;
    }
    td = hours(start_hour)+minutes(start_minute)+seconds(start_second)+milliseconds(start_millisecond);
    return 0;
}

int barsizeStringToTimeDuration(string barsizeStr,time_duration& td)
{
    //barsize 1:s
    int barsize_hours=0,barsize_minutes=0,barsize_seconds=0,barsize_milliseconds=0;

    std::vector<std::string> array;
    boost::split(array, barsizeStr, boost::is_any_of(":"));
    
    if(array.size()!=2){
        cout << "invalid barsize - use format 1:s" << endl;
        return 1;
    } 

    if (array[1].compare("h") == 0){
        try{
            barsize_hours = boost::lexical_cast(array[0]);    
        }
        catch(bad_lexical_cast &){
            cout << "invalid barsize - use format 1:s" << endl;
            return 1;
        }
    }
    else if (array[1].compare("m") == 0){
        try{
            barsize_minutes = boost::lexical_cast(array[0]);    
        }
        catch(bad_lexical_cast &){
            cout << "invalid barsize - use format 1:s" << endl;
            return 1;
        }
    }
    else if (array[1].compare("s") == 0){
        try{
            barsize_seconds = boost::lexical_cast(array[0]);    
        }
        catch(bad_lexical_cast &){
            cout  "invalid barsize - use format 1:s" <<  endl;
            return 1;
        }
    }
    else if (array[1].compare("ms") == 0){
        try{
            barsize_milliseconds = boost::lexical_cast(array[0]);    
        }
        catch(bad_lexical_cast &){
            cout << "invalid barsize - use format 1:s" << endl;
            return 1;
        }
    }
    else{
        cout << "invalid barsize - use format 1:s" << endl;
        return 1;
    }
    td = hours(barsize_hours)+minutes(barsize_minutes)+seconds(barsize_seconds)+milliseconds(barsize_milliseconds);

    return 0;   
}


int dateStringToDate(string date_str, boost::gregorian::date& d)
{    
    if(date_str.size()!=8){
        cout << "invalid date - use format YYYYMMDD" << endl;
        return 1;
    }
    
    int year,month,day;

    try{
        year  = boost::lexical_cast(date_str.substr(0,4));        
    }
    catch(bad_lexical_cast &){
        cout << "invalid date - use format YYYYMMDD" << endl;
        return 1;
    }
    try{
        month = boost::lexical_cast(date_str.substr(4,2));        
    }
    catch(bad_lexical_cast &){
        cout << "invalid date - use format YYYYMMDD" << endl;
        return 1;
    }
    try{
        day  = boost::lexical_cast(date_str.substr(6,2));        
    }
    catch(bad_lexical_cast &){
        cout << "invalid date - use format YYYYMMDD" << endl;
        return 1;
    }
    boost::gregorian::date d1(year,month,day);
    d = d1;
    
    return 0;
}

void initLog()
{
    logging::add_file_log
    (
        keywords::file_name = "./logs/datascope_%N.log",                                        
        keywords::rotation_size = 10 * 1024 * 1024,                                   
        keywords::time_based_rotation = sinks::file::rotation_at_time_point(0, 0, 0), 
        keywords::format = "[%TimeStamp%]: %Message%"                                 
    );
    
    logging::core::get()->set_filter
    (
        logging::trivial::severity >= logging::trivial::info
    );
}


No comments:

Post a Comment