Source code for pecos.graphics

"""
The graphics module contains functions to format scatter and time series 
plots for reporting.
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import textwrap
import os
import logging

try:
    from nose.tools import nottest as _nottest
except ImportError:
    def _nottest(afunction):
        return afunction
        
logger = logging.getLogger(__name__)

[docs]def plot_scatter(x,y,xaxis_min=None, xaxis_max=None, yaxis_min=None, yaxis_max=None):
    """
    Create a scatter plot.  If x and y have the same number of columns, then 
    the columns of x are plotted against the corresponding columns of y, in order.
    If x (or y) has 1 column, then that column of data is plotted against all
    the columns in y (or x).
    
    Parameters
    ----------
    x : pd.DataFrame
        x data
    
    y : pd.DataFrame
        y data
    
    xaxis_min : float (optional)
        X-axis minimum        
        
    xaxis_max : float (optional)
        X-axis maximum    
        
    yaxis_min : float (optional)
        Y-axis minimum            
        
    yaxis_max : float (optional)
        Y-axis maximum         
    """
    
    ax = plt.gca()

    try:
        if x.shape[1] == y.shape[1]:
            for i in range(x.shape[1]):
                plt.plot(x.iloc[:,i],y.iloc[:,i], '.', markersize=3) #, color=next(colors))
                plt.xticks(rotation='vertical')
                plt.hold(True)
        elif x.shape[1] != y.shape[1]:
            if x.shape[1] == 1:
                for col in y.columns:
                    plt.plot(x,y[col], '.', markersize=3) #, color=next(colors))
                    plt.xticks(rotation='vertical')
                    plt.hold(True)
            elif y.shape[1] == 1:
                for col in x.columns:
                    plt.plot(x[col],y, '.', markersize=3) #, color=next(colors))
                    plt.xticks(rotation='vertical')
                    plt.hold(True)
    except:
        plt.text(0.3,0.5,'Insufficient Data', fontsize=8)
    
    # Format axis
    xmin_plt, xmax_plt = plt.xlim()
    ymin_plt, ymax_plt = plt.ylim()
    if xaxis_min is None:
        xaxis_min = xmin_plt
    if xaxis_max is None:
        xaxis_max = xmax_plt
    if yaxis_min is None:
        yaxis_min = ymin_plt
    if yaxis_max is None:
        yaxis_max = ymax_plt
    plt.xlim((xaxis_min, xaxis_max))
    plt.ylim((yaxis_min, yaxis_max))
    ax.tick_params(axis='both', labelsize=8)
    box = ax.get_position()
    ax.set_position([box.x0, box.y0+0.15, box.width, box.height*0.75])
    
[docs]def plot_timeseries(data, tfilter=None, test_results_group=None, xaxis_min=None, xaxis_max=None, yaxis_min=None, yaxis_max=None):
    """
    Create a time series plot using each column in the DataFrame.
    
    Parameters
    ----------
    data : pd.DataFrame
        Data, indexed by time
        
    tfilter : pd.Series (optional)
        Boolean values used to include time filter in the plot 
        
    test_results_group : pd.DataFrame (optional)
        Test results for a particular variable.  To group test results by variable, use
        grouped = pm.test_results.groupby(['System Name', 'Variable Name']).
    
    xaxis_min : float (optional)
        X-axis minimum        
        
    xaxis_max : float (optional)
        X-axis maximum    
        
    yaxis_min : float (optional)
        Y-axis minimum            
        
    yaxis_max : float (optional)
        Y-axis maximum   
    """
    
    ax = plt.gca()
    
    try:
        # plot timeseries
        if isinstance(data, pd.Series):
            data.plot(ax=ax, grid=False, legend=False, color='k', fontsize=8, rot=90, label='Data', x_compat=True)
        else:
            data.plot(ax=ax, grid=False, legend=False, fontsize=8, rot=90, label='Data')
    
        if tfilter is not None:
            # add tfilter        
            temp = np.where(tfilter - tfilter.shift())
            temp = np.append(temp[0],len(tfilter)-1)
            count = 0
            for i in range(len(temp)-1):
                if tfilter[temp[i]] == 0:
                    if count == 0:
                        ax.axvspan(data.index[temp[i]], data.index[temp[i+1]], facecolor='k', alpha=0.2, label='Time filter')
                        count = count+1
                    else:
                        ax.axvspan(data.index[temp[i]], data.index[temp[i+1]], facecolor='k', alpha=0.2)     
        
        # add errors 
        try:
            if test_results_group.empty:
                test_results_group = None
        except:
            pass
        if test_results_group is not None:
            key2 = test_results_group['Error Flag']
            grouped2 = test_results_group.groupby(key2)
            
            for error_flag in key2.unique():
                test_results_group2 = grouped2.get_group(error_flag)
                
                error_label = '\n'.join(textwrap.wrap(error_flag, 30))
                warning_label = '\n'.join(textwrap.wrap('Warning ' + str(test_results_group2.index.values).strip('[]'), 30)) #str(out_df2.index.values).strip('[]'), 30))
                error_label = error_label + '\n' + warning_label
                
                date_idx2 = np.array([False]*len(data.index))
                for row2 in range(len(test_results_group2.index)):
                    date_idx2 = date_idx2 + ((data.index >= test_results_group2.iloc[row2,2]) & (data.index <= test_results_group2.iloc[row2,3]))
                
                if sum(date_idx2) == 0:
                    continue
                
                data2 = data[date_idx2]
                if error_flag in ['Duplicate timestamp', 'Missing data', 'Corrupt data', 'Nonmonotonic timestamp']:
                    continue
                if "Data <" in error_flag:
                    try:
                        ax.scatter(data2.index, data2.values, c='r', marker='+', label=error_label)   
                    except:
                        ax.scatter(data2.index[0], data2.values[0], c='r', marker='+', label=error_label) 
                elif "Data >" in error_flag:
                    try:
                        ax.scatter(data2.index, data2.values, c='r', marker='+', label=error_label) 
                    except:
                        ax.scatter(data2.index[0], data2.values[0], c='r', marker='+', label=error_label) 
                else:
                    try:
                        ax.scatter(data2.index, data2.values, c='g', marker='+', label=error_label)  
                    except:
                        ax.scatter(data2.index[0], data2.values[0], c='g', marker='+', label=error_label) 
        
        # Format axis
        xmin_plt, xmax_plt = plt.xlim()
        ymin_plt, ymax_plt = plt.ylim()
        if tfilter is not None:
            ymin_plt = np.nanmin(data[tfilter].values)
            ymax_plt = np.nanmax(data[tfilter].values)
        if np.abs(ymin_plt - ymax_plt) < 0.01:
            ymin_plt, ymax_plt = plt.ylim()
    except:
        plt.text(0.3,0.5,'Insufficient Data', fontsize=8)
        xmin_plt, xmax_plt = plt.xlim()
        ymin_plt, ymax_plt = plt.ylim()
    
    # Format axis
    y_range = (ymax_plt - ymin_plt)
    if xaxis_min is None:
        xaxis_min = xmin_plt
    if xaxis_max is None:
        xaxis_max = xmax_plt
    if yaxis_min is None:
        yaxis_min = ymin_plt-y_range/10
    if yaxis_max is None:
        yaxis_max = ymax_plt+y_range/10
    plt.xlim((xaxis_min, xaxis_max))
    plt.ylim((yaxis_min, yaxis_max))
    ax.get_yaxis().get_major_formatter().set_useOffset(False)
    ax.tick_params(axis='both', labelsize=8)
    plt.xlabel('Time', fontsize=8)
    box = ax.get_position()
    ax.set_position([box.x0, box.y0+0.15, box.width, box.height*0.75])

[docs]def plot_colorblock(values, vmin=0, vmax=1, nColors=12, colors=[(0.75, 0.15, 0.15), (1, 0.75, 0.15), (0.15, 0.75, 0.15)]):
    """ 
    Create a colorblock figure.  Default color scheme is red to yellow to green with 12 colors.  
    This function can be used to generate dashboards with simple color indicators in each cell.
    
    Parameters
    -----------
    values : 2D np.array
        Values to plot in the colorblock
    
    vmin : float (optional)
        Colomap minimum, default = 0
    
    vmax : float (optional)
        Colomap maximum, default = 1
    
    num_colors : int (optional)
        Number of colors in the colormap
    
    colors : list (optional)
        List of colors, colors can be specified in any way understandable by matplotlib.colors.ColorConverter.to_rgb().
        Default is red to yellow to green.
    """
    from matplotlib.colors import LinearSegmentedColormap
    cmap = LinearSegmentedColormap.from_list(name='custom', colors = colors, N=nColors)
    
    fig = plt.imshow(values, cmap=cmap, aspect='equal', vmin=vmin, vmax=vmax)
    plt.axis('off')
    fig.axes.get_xaxis().set_visible(False)
    fig.axes.get_yaxis().set_visible(False)
    
@_nottest
[docs]def plot_test_results(filename_root, pm, image_format='png', dpi=500, figsize=(7.0, 2.5)):
    """
    Create test results graphics which highlight data points that
    failed a quality control test.

    Parameters
    ----------
    filename : string
        Filename root, with full path.  
        Each grpahics filename is appended with an integer.
        For example, filename_root = 'C:\\\\pecos\\\\results\\\\test' will generate a file named 
        'C:\\\\pecos\\\\results\\\\test1.png'.
        The directory ''C:\\\\pecos\\\\results' must exist.

    pm : PerformanceMonitoring object
        Contains data (pm.df) and test results (pm.test_results)
        
    image_format : string  (optional)
        Image format, default = 'png'
    
    """
    
    filename_root = os.path.abspath(filename_root)
    
    # Colect file names
    test_results_graphics = []
    
    if pm.test_results.empty:
        return test_results_graphics

    graphic = 0

    tfilter = pm.tfilter

    grouped = pm.test_results.groupby(['System Name', 'Variable Name'])

    for name, test_results_group in grouped:
        if name[1] == ' ':
            continue
        elif name[0] == '':
            col_name = str(name[1])
        else:
            col_name = str(name[0]) + ":" + str(name[1])


        if test_results_group['Error Flag'].all() in ['Duplicate timestamp', 'Missing data', 'Corrupt data', 'Missing timestamp', 'Nonmonotonic timestamp']:
            continue
        logger.info("Creating graphic for " + col_name)
        plt.figure(figsize = figsize)
        plot_timeseries(pm.df[col_name], tfilter, test_results_group = test_results_group)

        ax = plt.gca()
        box = ax.get_position()
        ax.set_position([box.x0, box.y0, box.width*0.65, box.height])
        plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=8)
        plt.title(col_name, fontsize=8)
        
        filename = filename_root + str(graphic) + '.' + image_format
        test_results_graphics.append(filename)
        plt.savefig(filename, format=image_format, dpi=dpi)
        
        graphic = graphic + 1
        plt.close()

    return test_results_graphics