Examples

This document demonstrates various ggsql features with runnable examples using CSV files.

Basic Visualizations

Simple Scatter Plot

SELECT x, y FROM 'data.csv'
VISUALISE x, y
DRAW point
VISUALISE bill_len AS x, bill_dep AS y, species AS color FROM ggsql:penguins
DRAW point

Line Chart with Date Scale

SELECT sale_date, revenue FROM 'sales.csv'
WHERE category = 'Electronics'
VISUALISE sale_date AS x, revenue AS y
DRAW line
SCALE x SETTING type => 'date'
LABEL 
    title => 'Electronics Revenue Over Time', 
    x => 'Date', 
    y => 'Revenue ($)'
SELECT * FROM ggsql:airquality
VISUALISE Date AS x
DRAW line MAPPING Ozone AS y, 'Ozone' AS color
DRAW line MAPPING Temp AS y, 'Temp' AS color

Bar Chart by Category

SELECT category, SUM(revenue) as total
FROM 'sales.csv'
GROUP BY category
VISUALISE category AS x, total AS y, category AS fill
DRAW bar
LABEL 
    title => 'Total Revenue by Category', 
    x => 'Category', 
    y => 'Total Revenue ($)'

Line chart with multiple lines with same aesthetics

SELECT * FROM 'sales.csv'
VISUALISE sale_date AS x, revenue AS y
DRAW line
    PARTITION BY category

Statistical Transformations

Statistical transformations automatically compute aggregations for certain geom types.

Histogram

When using DRAW histogram, ggsql automatically bins continuous data and counts occurrences. You only need to specify the x aesthetic:

SELECT revenue FROM 'sales.csv'
VISUALISE revenue AS x
DRAW histogram
LABEL
    title => 'Revenue Distribution',
    x => 'Revenue ($)',
    y => 'Count'

Bar with Automatic Count

When using DRAW bar without a y aesthetic, ggsql automatically counts occurrences of each x value:

SELECT category FROM 'sales.csv'
VISUALISE category AS x
DRAW bar
LABEL
    title => 'Sales Count by Category',
    x => 'Category',
    y => 'Count'

Bar with Weighted Count

You can also specify a weight aesthetic to sum values instead of counting:

SELECT category, revenue FROM 'sales.csv'
VISUALISE category AS x, revenue AS weight
DRAW bar
LABEL
    title => 'Total Revenue by Category',
    x => 'Category',
    y => 'Revenue ($)'

Histogram Settings

Control histogram binning with SETTING options:

Custom number of bins:

SELECT revenue FROM 'sales.csv'
VISUALISE revenue AS x
DRAW histogram 
    SETTING bins => 10
LABEL
    title => 'Revenue Distribution (10 bins)',
    x => 'Revenue ($)',
    y => 'Count'

Explicit bin width (overrides bins):

SELECT revenue FROM 'sales.csv'
VISUALISE revenue AS x
DRAW histogram 
    SETTING binwidth => 500
LABEL
    title => 'Revenue Distribution (500 bin width)',
    x => 'Revenue ($)',
    y => 'Count'

Left-closed intervals (default is right-closed (a, b]):

SELECT revenue FROM 'sales.csv'
VISUALISE revenue AS x
DRAW histogram 
    SETTING bins => 8, closed => 'left'
LABEL
    title => 'Revenue Distribution (left-closed intervals)',
    x => 'Revenue ($)',
    y => 'Count'

Histogram Remapping

Histogram computes several statistics: bin, bin_end, count, and density. By default, count is mapped to y. Use REMAPPING to show density (proportion) instead:

SELECT revenue FROM 'sales.csv'
VISUALISE revenue AS x
DRAW histogram 
    REMAPPING density AS y
LABEL
    title => 'Revenue Density Distribution',
    x => 'Revenue ($)',
    y => 'Density'

Bar Width Setting

Control bar width with the width setting (0-1 scale, default 0.9):

SELECT category FROM 'sales.csv'
VISUALISE category AS x
DRAW bar 
    SETTING width => 0.5
LABEL
    title => 'Sales Count (Narrow Bars)',
    x => 'Category',
    y => 'Count'

Bar Remapping

Bar computes count and proportion statistics. By default, count is mapped to y. Use REMAPPING to show proportions instead:

SELECT category FROM 'sales.csv'
VISUALISE category AS x
DRAW bar 
    REMAPPING proportion AS y
LABEL
    title => 'Sales Proportion by Category',
    x => 'Category',
    y => 'Proportion'

Combine with weight to show weighted proportions:

SELECT category, revenue FROM 'sales.csv'
VISUALISE category AS x, revenue AS weight
DRAW bar 
    REMAPPING proportion AS y
LABEL
    title => 'Revenue Share by Category',
    x => 'Category',
    y => 'Share of Total Revenue'

Multiple Layers

Line with Points

SELECT date, value FROM 'timeseries.csv'
VISUALISE date AS x, value AS y
DRAW line 
    SETTING  color => 'blue'
DRAW point 
    SETTING size => 30, color => 'red'
SCALE x SETTING type => 'date'
LABEL 
    title => 'Time Series with Points', 
    x => 'Date', 
    y => 'Value'

Colored Lines by Category

SELECT date, value, category FROM 'metrics.csv'
VISUALISE date AS x, value AS y, category AS color
DRAW line
SCALE x SETTING type => 'date'
LABEL 
    title => 'Metrics by Category', 
    x => 'Date', 
    y => 'Value'

Faceting

Facet Wrap by Region

SELECT sale_date, revenue, region FROM 'sales.csv'
WHERE category = 'Electronics'
VISUALISE sale_date AS x, revenue AS y
DRAW line
SCALE x SETTING type => 'date'
FACET WRAP region
LABEL 
    title => 'Electronics Sales by Region', 
    x => 'Date', 
    y => 'Revenue ($)'

Facet Grid

SELECT
    DATE_TRUNC('month', sale_date) as month,
    region,
    category,
    SUM(revenue) as total_revenue,
    SUM(quantity) * 100 as total_quantity_scaled
FROM 'sales.csv'
GROUP BY DATE_TRUNC('month', sale_date), region, category
VISUALISE month AS x
DRAW line 
    MAPPING total_revenue AS y
    SETTING color => 'steelblue'
DRAW point 
    MAPPING total_revenue AS y
    SETTING size => 30, color => 'darkblue'
DRAW line 
    MAPPING total_quantity_scaled AS y
    SETTING color => 'coral'
DRAW point 
    MAPPING total_quantity_scaled AS y
    SETTING size => 30, color => 'orangered'
SCALE x SETTING type => 'date'
FACET region BY category
LABEL 
    title => 'Monthly Revenue and Quantity by Region and Category', 
    x => 'Month', 
    y => 'Value'

Coordinate Transformations

Flipped Coordinates (Horizontal Bar Chart)

SELECT region, SUM(revenue) as total
FROM 'sales.csv'
GROUP BY region
ORDER BY total DESC
VISUALISE region AS x, total AS y, region AS fill
DRAW bar
COORD flip
LABEL 
    title => 'Total Revenue by Region', 
    x => 'Region', 
    y => 'Total Revenue ($)'

Cartesian with Axis Limits

SELECT x, y FROM 'data.csv'
VISUALISE x, y
DRAW point 
    SETTING size => 4, color => 'blue'
COORD cartesian 
    SETTING xlim => [0, 60], ylim => [0, 70]
LABEL 
    title => 'Scatter Plot with Custom Axis Limits', 
    x => 'X', 
    y => 'Y'

Pie Chart with Polar Coordinates

SELECT category, SUM(revenue) as total
FROM 'sales.csv'
GROUP BY category
VISUALISE total AS y, category AS fill
DRAW bar
COORD polar
LABEL 
    title => 'Revenue Distribution by Category'

Constant Mappings

Constants can be used in both the VISUALISE clause (global) and MAPPING clauses (per-layer) to set fixed aesthetic values.

Different Constants Per Layer

Each layer can have its own constant value, creating a legend showing all values:

WITH monthly AS (
    SELECT
        DATE_TRUNC('month', sale_date) as month,
        category,
        SUM(revenue) as revenue
    FROM 'sales.csv'
    GROUP BY DATE_TRUNC('month', sale_date), category
)
VISUALISE month AS x, revenue AS y
DRAW line 
    MAPPING 'Electronics' AS color FROM monthly 
    FILTER category = 'Electronics'
DRAW line 
    MAPPING 'Clothing' AS color FROM monthly 
    FILTER category = 'Clothing'
DRAW line 
    MAPPING 'Furniture' AS color FROM monthly 
    FILTER category = 'Furniture'
SCALE x SETTING type => 'date'
LABEL 
    title => 'Revenue by Category (Constant Colors)', 
    x => 'Month', 
    y => 'Revenue ($)'

Mixed Constants and Columns

When mixing constant and column mappings for the same aesthetic, the axis/legend label uses the first non-constant column name:

SELECT date, value, category FROM 'metrics.csv'
VISUALISE date AS x
DRAW line 
    MAPPING value AS y, category AS color
DRAW point 
    MAPPING 120 AS y 
    SETTING size => 20, color => 'blue'
SCALE x SETTING type => 'date'
LABEL 
    title => 'Metrics with Threshold Line', 
    x => 'Date'

Numeric Constants

Numbers work as constants too:

SELECT x, y FROM 'data.csv'
VISUALISE x, y
DRAW point 
    SETTING color => 'blue', size => 100
DRAW point 
    SETTING color => 'red', size => 50 
    FILTER y > 50
LABEL 
    title => 'Scatter Plot with Constant Sizes'

Layer filtering

Filter one layer

SELECT date, value FROM 'timeseries.csv'
VISUALISE date AS x, value AS y
DRAW line 
    SETTING color => 'blue'
DRAW point 
    SETTING color => 'red', size => 30 
    FILTER value < 130
SCALE x SETTING type => 'date'
LABEL 
    title => 'Time Series with Points', 
    x => 'Date', 
    y => 'Value'

Layer ordering

ORDER BY in a layer

Use ORDER BY to ensure data is sorted correctly within a layer. This is especially important for line charts where the order of points affects the visual path:

WITH unordered_data AS (
    SELECT * FROM (VALUES
        (150.0, '2023-03-01'::DATE),
        (100.0, '2023-01-01'::DATE),
        (120.0, '2023-05-01'::DATE),
        (200.0, '2023-02-01'::DATE),
        (180.0, '2023-04-01'::DATE)
    ) AS t(value, date)
)
VISUALISE
DRAW path 
    MAPPING date AS x, value AS y FROM unordered_data 
    ORDER BY date
DRAW point 
    MAPPING date AS x, value AS y FROM unordered_data 
    SETTING size => 40, color => 'red'
SCALE x SETTING type => 'date'
LABEL 
    title => 'Line Chart with ORDER BY', 
    x => 'Date', 
    y => 'Value'

Combining FILTER and ORDER BY

The ORDER BY clause can be combined with FILTER and other layer options:

SELECT date, value, category FROM 'metrics.csv'
VISUALISE
DRAW path 
    MAPPING date AS x, value AS y, category AS color 
    FILTER category != 'Support' 
    ORDER BY value
DRAW point 
    MAPPING date AS x, value AS y, category AS color 
    SETTING size => 20 
    FILTER category != 'Support' 
SCALE x SETTING type => 'date'
LABEL 
    title => 'Sales and Marketing Metrics (Ordered)', 
    x => 'Date', 
    y => 'Value'

Common Table Expressions (CTEs)

Simple CTE with VISUALISE FROM

WITH monthly_sales AS (
    SELECT
        DATE_TRUNC('month', sale_date) as month,
        SUM(revenue) as total_revenue
    FROM 'sales.csv'
    GROUP BY DATE_TRUNC('month', sale_date)
)
VISUALISE month AS x, total_revenue AS y FROM monthly_sales
DRAW line
DRAW point
SCALE x SETTING type => 'date'
LABEL 
    title => 'Monthly Revenue Trends', 
    x => 'Month', 
    y => 'Revenue ($)'

Multiple CTEs

WITH daily_sales AS (
    SELECT sale_date, region, SUM(revenue) as revenue
    FROM 'sales.csv'
    GROUP BY sale_date, region
),
regional_totals AS (
    SELECT region, SUM(revenue) as total
    FROM daily_sales
    GROUP BY region
)
VISUALISE region AS x, total AS y, region AS fill FROM regional_totals
DRAW bar
COORD flip
LABEL 
    title => 'Total Revenue by Region', 
    x => 'Region', 
    y => 'Total Revenue ($)'

Layer-Specific Data Sources (MAPPING FROM)

Layers can pull data from different sources using MAPPING FROM. This enables overlaying data from different CTEs or tables.

Comparing Actuals vs Targets

Each layer can reference a different CTE using MAPPING ... FROM cte_name:

WITH actuals AS (
    SELECT
        DATE_TRUNC('month', sale_date) as month,
        SUM(revenue) as value
    FROM 'sales.csv'
    GROUP BY DATE_TRUNC('month', sale_date)
),
targets AS (
    SELECT * FROM (VALUES
        ('2023-01-01'::DATE, 5000.0),
        ('2023-02-01'::DATE, 5500.0),
        ('2023-03-01'::DATE, 6000.0),
        ('2023-04-01'::DATE, 6500.0),
        ('2023-05-01'::DATE, 7000.0),
        ('2023-06-01'::DATE, 7500.0),
        ('2023-07-01'::DATE, 8000.0),
        ('2023-08-01'::DATE, 8500.0),
        ('2023-09-01'::DATE, 9000.0),
        ('2023-10-01'::DATE, 9500.0),
        ('2023-11-01'::DATE, 10000.0),
        ('2023-12-01'::DATE, 10500.0)
    ) AS t(month, value)
)
VISUALISE
DRAW line 
    MAPPING month AS x, value AS y, 'Actual' AS color FROM actuals
DRAW point 
    MAPPING month AS x, value AS y, 'Actual' AS color FROM actuals 
    SETTING size => 30
DRAW line 
    MAPPING month AS x, value AS y, 'Target' AS color FROM targets
SCALE x SETTING type => 'date'
LABEL 
    title => 'Revenue: Actual vs Target', 
    x => 'Month', 
    y => 'Revenue ($)'

CTE Chain: Raw → Filtered → Aggregated

CTEs can reference other CTEs, creating a data transformation pipeline:

WITH raw_data AS (
    SELECT sale_date, revenue, category, region
    FROM 'sales.csv'
),
electronics_only AS (
    SELECT * FROM raw_data
    WHERE category = 'Electronics'
),
monthly_electronics AS (
    SELECT
        DATE_TRUNC('month', sale_date) as month,
        region,
        SUM(revenue) as total
    FROM electronics_only
    GROUP BY DATE_TRUNC('month', sale_date), region
)
VISUALISE month AS x, total AS y, region AS color FROM monthly_electronics
DRAW line
DRAW point
SCALE x SETTING type => 'date'
LABEL 
    title => 'Electronics Revenue by Region (CTE Chain)', 
    x => 'Month', 
    y => 'Revenue ($)'

Layer FROM with FILTER

Combine FROM with FILTER to get filtered subsets from a CTE:

WITH all_sales AS (
    SELECT
        DATE_TRUNC('month', sale_date) as month,
        category,
        SUM(revenue) as revenue
    FROM 'sales.csv'
    GROUP BY DATE_TRUNC('month', sale_date), category
)
VISUALISE
DRAW line 
    MAPPING month AS x, revenue AS y, 'All Categories' AS color FROM all_sales
DRAW line 
    MAPPING month AS x, revenue AS y, 'Electronics' AS color FROM all_sales 
    FILTER category = 'Electronics'
DRAW line 
    MAPPING month AS x, revenue AS y, 'Clothing' AS color FROM all_sales 
    FILTER category = 'Clothing'
SCALE x SETTING type => 'date'
LABEL 
    title => 'Revenue by Category (Filtered Layers)', 
    x => 'Month', 
    y => 'Revenue ($)'

Multiple File Sources

Layers can also reference different CSV files directly:

VISUALISE
DRAW line 
    MAPPING date AS x, value AS y, 'Time Series' AS color FROM 'timeseries.csv'
DRAW point 
    MAPPING x AS x, y AS y, 'Scatter' AS color FROM 'data.csv'
LABEL 
    title => 'Data from Multiple Files', 
    x => 'X', 
    y => 'Y'

Advanced Examples

Complete Regional Sales Analysis

SELECT
    sale_date,
    region,
    SUM(quantity) as total_quantity
FROM 'sales.csv'
WHERE sale_date >= '2023-01-01'
GROUP BY sale_date, region
ORDER BY sale_date
VISUALISE sale_date AS x, total_quantity AS y, region AS color
DRAW line
DRAW point
SCALE x SETTING type => 'date'
FACET WRAP region
LABEL 
    title => 'Sales Trends by Region', 
    x => 'Date', 
    y => 'Total Quantity'

Multi-Category Analysis

SELECT
    category,
    region,
    SUM(revenue) as total_revenue
FROM 'sales.csv'
GROUP BY category, region
VISUALISE category AS x, total_revenue AS y, region AS fill
DRAW bar
LABEL 
    title => 'Revenue by Category and Region', 
    x => 'Category', 
    y => 'Revenue ($)'