SQL Nodes

using FunSQL

using FunSQL:
    Agg, Append, As, Asc, Bind, CrossJoin, Define, Desc, Fun, From, Get,
    Group, Highlight, Iterate, Join, LeftJoin, Limit, Lit, Order, Over,
    Partition, SQLNode, SQLTable, Select, Sort, Var, Where, With,
    WithExternal, ID, render

We start with specifying the database model.

const concept =
    SQLTable(:concept, columns = [:concept_id, :vocabulary_id, :concept_code, :concept_name])

const location =
    SQLTable(:location, columns = [:location_id, :city, :state])

const person =
    SQLTable(:person, columns = [:person_id, :gender_concept_id, :year_of_birth, :month_of_birth, :day_of_birth, :birth_datetime, :location_id])

const visit_occurrence =
    SQLTable(:visit_occurrence, columns = [:visit_occurrence_id, :person_id, :visit_start_date, :visit_end_date])

const measurement =
    SQLTable(:measurement, columns = [:measurement_id, :person_id, :measurement_concept_id, :measurement_date])

const observation =
    SQLTable(:observation, columns = [:observation_id, :person_id, :observation_concept_id, :observation_date])

In FunSQL, a SQL query is generated from a tree of SQLNode objects. The nodes are created using constructors with familiar SQL names and connected together using the chain (|>) operator.

q = From(person) |>
    Where(Fun.">"(Get.year_of_birth, 2000)) |>
    Select(Get.person_id)
#-> (…) |> Select(…)

Displaying a SQLNode object shows how it was constructed.

display(q)
#=>
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |> Where(Fun.">"(Get.year_of_birth, 2000)),
    q3 = q2 |> Select(Get.person_id)
    q3
end
=#

Each node wraps a concrete node object, which can be accessed using the indexing operator.

q[]
#-> ((…) |> Select(…))[]

display(q[])
#=>
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |> Where(Fun.">"(Get.year_of_birth, 2000)),
    q3 = q2 |> Select(Get.person_id)
    q3[]
end
=#

The SQL query is generated using the function render().

print(render(q))
#=>
SELECT "person_1"."person_id"
FROM "person" AS "person_1"
WHERE ("person_1"."year_of_birth" > 2000)
=#

Ill-formed queries are detected.

q = From(person) |> Agg.count() |> Select(Get.person_id)
render(q)
#=>
ERROR: FunSQL.IllFormedError in:
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |> Agg.count() |> Select(Get.person_id)
    q2
end
=#

q = From(person) |> Fun.current_date()
#=>
ERROR: FunSQL.RebaseError in:
Fun.current_date()
=#

@funsql

The @funsql macro provides alternative notation for specifying FunSQL queries.

q = @funsql begin
    from(person)
    filter(year_of_birth > 2000)
    select(person_id)
end

display(q)
#=>
let q1 = From(:person),
    q2 = q1 |> Where(Fun.">"(Get.year_of_birth, 2000)),
    q3 = q2 |> Select(Get.person_id)
    q3
end
=#

We can combine @funsql notation with regular Julia code.

q = @funsql begin
    from(person)
    $(Where(Get.year_of_birth .> 2000))
    select(person_id)
end

display(q)
#=>
let q1 = From(:person),
    q2 = q1 |> Where(Fun.">"(Get.year_of_birth, 2000)),
    q3 = q2 |> Select(Get.person_id)
    q3
end
=#

q = From(:person) |>
    @funsql(filter(year_of_birth > 2000)) |>
    Select(Get.person_id)

display(q)
#=>
let q1 = From(:person),
    q2 = q1 |> Where(Fun.">"(Get.year_of_birth, 2000)),
    q3 = q2 |> Select(Get.person_id)
    q3
end
=#

The @funsql notation allows us to encapsulate query fragments into query functions.

@funsql adults() = from(person).filter(2020 - year_of_birth >= 16)

display(@funsql adults())
#=>
let q1 = From(:person),
    q2 = q1 |> Where(Fun.">="(Fun."-"(2020, Get.year_of_birth), 16))
    q2
end
=#

Query functions defined with @funsql can accept parameters.

@funsql concept_by_code(v, c) =
    begin
        from(concept)
        filter(vocabulary_id == $v && concept_code == $c)
    end

display(@funsql concept_by_code("SNOMED", "22298006"))
#=>
let q1 = From(:concept),
    q2 = q1 |>
         Where(Fun.and(Fun."="(Get.vocabulary_id, "SNOMED"),
                       Fun."="(Get.concept_code, "22298006")))
    q2
end
=#

Query functions support ... notation.

@funsql concept_by_code(v, cs...) =
    begin
        from(concept)
        filter(vocabulary_id == $v && in(concept_code, $(cs...)))
    end

display(@funsql concept_by_code("Visit", "IP", "ER"))
#=>
let q1 = From(:concept),
    q2 = q1 |>
         Where(Fun.and(Fun."="(Get.vocabulary_id, "Visit"),
                       Fun.in(Get.concept_code, "IP", "ER")))
    q2
end
=#

Query functions support keyword arguments and default values.

@funsql age(yob = year_of_birth; at = fun(`EXTRACT(YEAR FROM CURRENT_DATE) `)) =
    ($at - $yob)

q = @funsql begin
    from(person)
    define(
        age => age(),
        age_in_2000 => age(at = 2000))
end

display(q)
#=>
let q1 = From(:person),
    q2 = q1 |>
         Define(Fun."-"(Fun."EXTRACT(YEAR FROM CURRENT_DATE) "(),
                        Get.year_of_birth) |>
                As(:age),
                Fun."-"(2000, Get.year_of_birth) |> As(:age_in_2000))
    q2
end
=#

A parameter of a query function accepts a type declaration.

@funsql concept(c::String, v::String = "SNOMED") =
    concept_by_code($v, $c)

@funsql concept(id::Int) =
    from(concept).filter(concept_id == $id)

display(@funsql concept("22298006"))
#=>
let q1 = From(:concept),
    q2 = q1 |>
         Where(Fun.and(Fun."="(Get.vocabulary_id, "SNOMED"),
                       Fun."="(Get.concept_code, "22298006")))
    q2
end
=#

display(@funsql concept(4329847))
#=>
let q1 = From(:concept),
    q2 = q1 |> Where(Fun."="(Get.concept_id, 4329847))
    q2
end
=#

A single @funsql macro can wrap multiple definitions.

@funsql begin
    SNOMED(codes...) = concept_by_code("SNOMED", $(codes...))

    `MYOCARDIAL INFARCTION`() = SNOMED("22298006")
end

display(@funsql `MYOCARDIAL INFARCTION`())
#=>
let q1 = From(:concept),
    q2 = q1 |>
         Where(Fun.and(Fun."="(Get.vocabulary_id, "SNOMED"),
                       Fun."="(Get.concept_code, "22298006")))
    q2
end
=#

A query function may have a docstring.

@funsql begin
    "SNOMED concept set with the given `codes`"
    SNOMED

    "Visit concept set with the given `codes`"
    Visit(codes...) = concept_by_code("Visit", $(codes...))
end

@doc funsql_SNOMED
#-> SNOMED concept set with the given `codes`

@doc funsql_Visit
#-> Visit concept set with the given `codes`

An ill-formed @funsql query triggers an error.

@funsql for p in person; end
#=>
ERROR: LoadError: FunSQL.TransliterationError: ill-formed @funsql notation:
quote
    for p = person
    end
end
in expression starting at …
=#

Literals

A SQL value is created with Lit() constructor.

e = Lit("SQL is fun!")
#-> Lit("SQL is fun!")

In a SELECT clause, bare literal expressions get an alias "_".

q = Select(e)

print(render(q))
#=>
SELECT 'SQL is fun!' AS "_"
=#

Values of certain Julia data types are automatically converted to SQL literals when they are used in the context of a SQL node.

using Dates

q = Select("null" => missing,
           "boolean" => true,
           "integer" => 42,
           "text" => "SQL is fun!",
           "date" => Date(2000))

Such plain literals could also be used in @funsql notation.

q = @funsql select(null => missing,
                   boolean => true,
                   integer => 42,
                   text => "SQL is fun!",
                   date => $(Date(2000)))

display(q)
#=>
Select(missing |> As(:null),
       true |> As(:boolean),
       42 |> As(:integer),
       "SQL is fun!" |> As(:text),
       Dates.Date("2000-01-01") |> As(:date))
=#

Attributes

To reference a table attribute, we use the Get constructor.

e = Get(:person_id)
#-> Get.person_id

Alternatively, use shorthand notation.

Get.person_id
#-> Get.person_id

Get."person_id"
#-> Get.person_id

Get[:person_id]
#-> Get.person_id

Get["person_id"]
#-> Get.person_id

Hierarchical notation is supported.

e = Get.p.person_id
#-> Get.p.person_id

Get.p |> Get.person_id
#-> Get.p.person_id

In the context where a SQL node is expected, a bare symbol is automatically converted to a reference.

q = Select(:person_id)

display(q)
#-> Select(Get.person_id)

@funsql macro translates an identifier to a symbol. In suitable context, this symbol will be translated to a column reference.

@funsql person_id
#-> :person_id

@funsql notation supports hierarchical references.

@funsql p.person_id
#-> Get.p.person_id

Use backticks to represent a name that is not a valid identifier.

@funsql `person_id`
#-> :person_id

@funsql `p`.`person_id`
#-> Get.p.person_id

Get can also create bound references.

q = From(person)

e = Get(over = q, :year_of_birth)
#-> (…) |> Get.year_of_birth

display(e)
#=>
let person = SQLTable(:person, …),
    q1 = From(person)
    q1.year_of_birth
end
=#

q.person_id
#-> (…) |> Get.person_id

q."person_id"
#-> (…) |> Get.person_id

q[:person_id]
#-> (…) |> Get.person_id

q["person_id"]
#-> (…) |> Get.person_id

Get is used for dereferencing an alias created with As.

q = From(person) |>
    As(:p) |>
    Select(Get.p.person_id)

print(render(q))
#=>
SELECT "person_1"."person_id"
FROM "person" AS "person_1"
=#

This is particularly useful when you need to disambiguate the output of Join.

q = From(person) |>
    As(:p) |>
    Join(From(location) |> As(:l),
         on = Get.p.location_id .== Get.l.location_id) |>
    Select(Get.p.person_id, Get.l.state)

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  "location_1"."state"
FROM "person" AS "person_1"
JOIN "location" AS "location_1" ON ("person_1"."location_id" = "location_1"."location_id")
=#

When Get refers to an unknown attribute, an error is reported.

q = Select(Get.person_id)

print(render(q))
#=>
ERROR: FunSQL.ReferenceError: cannot find `person_id` in:
Select(Get.person_id)
=#

q = From(person) |>
    As(:p) |>
    Select(Get.q.person_id)

print(render(q))
#=>
ERROR: FunSQL.ReferenceError: cannot find `q` in:
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |> As(:p) |> Select(Get.q.person_id)
    q2
end
=#

An attribute defined in a Join shadows any previously defined attributes with the same name.

q = person |>
    Join(person, true) |>
    Select(Get.person_id)

print(render(q))
#=>
SELECT "person_2"."person_id"
FROM "person" AS "person_1"
CROSS JOIN "person" AS "person_2"
=#

An incomplete hierarchical reference, as well as an unexpected hierarchical reference, will result in an error.

q = person |>
    As(:p) |>
    Select(Get.p)

print(render(q))
#=>
ERROR: FunSQL.ReferenceError: incomplete reference `p` in:
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |> As(:p) |> Select(Get.p)
    q2
end
=#

q = person |>
    Select(Get.person_id.year_of_birth)

print(render(q))
#=>
ERROR: FunSQL.ReferenceError: unexpected reference after `person_id` in:
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |> Select(Get.person_id.year_of_birth)
    q2
end
=#

A reference bound to any node other than Get will cause an error.

q = (qₚ = From(person)) |> Select(qₚ.person_id)

print(render(q))
#=>
ERROR: FunSQL.IllFormedError in:
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |> Select(q1.person_id)
    q2
end
=#

Any expression could be given a name and attached to a query using the Define constructor.

q = From(person) |>
    Define(:age => Fun.now() .- Get.birth_datetime)
#-> (…) |> Define(…)

display(q)
#=>
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |> Define(Fun."-"(Fun.now(), Get.birth_datetime) |> As(:age))
    q2
end
=#

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  "person_1"."gender_concept_id",
  "person_1"."year_of_birth",
  "person_1"."month_of_birth",
  "person_1"."day_of_birth",
  "person_1"."birth_datetime",
  "person_1"."location_id",
  (now() - "person_1"."birth_datetime") AS "age"
FROM "person" AS "person_1"
=#

This expression could be referred to by name as if it were a regular table attribute.

print(render(q |> Where(Get.age .> "16 years")))
#=>
SELECT
  "person_2"."person_id",
  "person_2"."gender_concept_id",
  "person_2"."year_of_birth",
  "person_2"."month_of_birth",
  "person_2"."day_of_birth",
  "person_2"."birth_datetime",
  "person_2"."location_id",
  "person_2"."age"
FROM (
  SELECT
    "person_1"."person_id",
    "person_1"."gender_concept_id",
    "person_1"."year_of_birth",
    "person_1"."month_of_birth",
    "person_1"."day_of_birth",
    "person_1"."birth_datetime",
    "person_1"."location_id",
    (now() - "person_1"."birth_datetime") AS "age"
  FROM "person" AS "person_1"
) AS "person_2"
WHERE ("person_2"."age" > '16 years')
=#

A Define node can be created using @funsql notation.

q = @funsql from(person).define(age => 2000 - year_of_birth)

display(q)
#=>
let q1 = From(:person),
    q2 = q1 |> Define(Fun."-"(2000, Get.year_of_birth) |> As(:age))
    q2
end
=#

Define does not create a nested query if the definition is a literal or a simple reference.

q = From(person) |>
    Define(:year => Get.year_of_birth,
           :threshold => 2000) |>
    Where(Get.year .>= Get.threshold)

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  "person_1"."gender_concept_id",
  "person_1"."year_of_birth",
  "person_1"."month_of_birth",
  "person_1"."day_of_birth",
  "person_1"."birth_datetime",
  "person_1"."location_id",
  "person_1"."year_of_birth" AS "year",
  2000 AS "threshold"
FROM "person" AS "person_1"
WHERE ("person_1"."year_of_birth" >= 2000)
=#

Define can be used to override an existing field.

q = From(person) |>
    Define(:person_id => Get.year_of_birth, :year_of_birth => Get.person_id)

print(render(q))
#=>
SELECT
  "person_1"."year_of_birth" AS "person_id",
  "person_1"."gender_concept_id",
  "person_1"."person_id" AS "year_of_birth",
  "person_1"."month_of_birth",
  "person_1"."day_of_birth",
  "person_1"."birth_datetime",
  "person_1"."location_id"
FROM "person" AS "person_1"
=#

Define allows you to insert columns at the beginning or at the end of the column list.

q = From(person) |>
    Define(:age => Fun.now() .- Get.birth_datetime, Get.birth_datetime,
           before = true)

display(q)
#=>
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |>
         Define(Fun."-"(Fun.now(), Get.birth_datetime) |> As(:age),
                Get.birth_datetime,
                before = true)
    q2
end
=#

print(render(q))
#=>
SELECT
  (now() - "person_1"."birth_datetime") AS "age",
  "person_1"."birth_datetime",
  "person_1"."person_id",
  "person_1"."gender_concept_id",
  "person_1"."year_of_birth",
  "person_1"."month_of_birth",
  "person_1"."day_of_birth",
  "person_1"."location_id"
FROM "person" AS "person_1"
=#

q = From(person) |>
    Define(:age => Fun.now() .- Get.birth_datetime, Get.birth_datetime,
           after = true)

display(q)
#=>
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |>
         Define(Fun."-"(Fun.now(), Get.birth_datetime) |> As(:age),
                Get.birth_datetime,
                after = true)
    q2
end
=#

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  "person_1"."gender_concept_id",
  "person_1"."year_of_birth",
  "person_1"."month_of_birth",
  "person_1"."day_of_birth",
  "person_1"."location_id",
  (now() - "person_1"."birth_datetime") AS "age",
  "person_1"."birth_datetime"
FROM "person" AS "person_1"
=#

It can also insert columns in front of or right after a specified column.

q = From(person) |>
    Define(:age => Fun.now() .- Get.birth_datetime, Get.birth_datetime,
           before = :year_of_birth)

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  "person_1"."gender_concept_id",
  (now() - "person_1"."birth_datetime") AS "age",
  "person_1"."birth_datetime",
  "person_1"."year_of_birth",
  "person_1"."month_of_birth",
  "person_1"."day_of_birth",
  "person_1"."location_id"
FROM "person" AS "person_1"
=#

q = From(person) |>
    Define(:age => Fun.now() .- Get.birth_datetime, Get.birth_datetime,
           after = :birth_datetime)

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  "person_1"."gender_concept_id",
  "person_1"."year_of_birth",
  "person_1"."month_of_birth",
  "person_1"."day_of_birth",
  (now() - "person_1"."birth_datetime") AS "age",
  "person_1"."birth_datetime",
  "person_1"."location_id"
FROM "person" AS "person_1"
=#

It is an error to set both before and after or to refer to a non-existent column.

q = From(person) |>
    Define(before = true, after = true)

print(render(q))
#=>
ERROR: DomainError with (before = true, after = true):
only one of `before` and `after` could be set
=#

q = Define(before = :person_id)

print(render(q))
#=>
ERROR: FunSQL.ReferenceError: cannot find `person_id` in:
let q1 = Define(before = :person_id)
    q1
end
=#

Define has no effect if none of the defined fields are used in the query.

q = From(person) |>
    Define(:age => 2020 .- Get.year_of_birth) |>
    Select(Get.person_id, Get.year_of_birth)

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  "person_1"."year_of_birth"
FROM "person" AS "person_1"
=#

Define can be used after Select.

q = From(person) |>
    Select(Get.person_id, Get.year_of_birth) |>
    Define(:age => 2020 .- Get.year_of_birth)

print(render(q))
#=>
SELECT
  "person_2"."person_id",
  "person_2"."year_of_birth",
  (2020 - "person_2"."year_of_birth") AS "age"
FROM (
  SELECT
    "person_1"."person_id",
    "person_1"."year_of_birth"
  FROM "person" AS "person_1"
) AS "person_2"
=#

Define requires that all definitions have a unique alias.

From(person) |>
Define(:age => Fun.now() .- Get.birth_datetime,
       :age => Fun.current_timestamp() .- Get.birth_datetime)
#=>
ERROR: FunSQL.DuplicateLabelError: `age` is used more than once in:
Define(Fun."-"(Fun.now(), Get.birth_datetime) |> As(:age),
       Fun."-"(Fun.current_timestamp(), Get.birth_datetime) |> As(:age))
=#

Variables

A query variable is created with the Var constructor.

e = Var(:YEAR)
#-> Var.YEAR

Alternatively, use shorthand notation.

Var.YEAR
#-> Var.YEAR

Var."YEAR"
#-> Var.YEAR

Var[:YEAR]
#-> Var.YEAR

Var["YEAR"]
#-> Var.YEAR

A variable could be created with @funsql notation.

@funsql :YEAR
#-> Var.YEAR

Unbound query variables are serialized as query parameters.

q = From(person) |>
    Where(Get.year_of_birth .> Var.YEAR)

sql = render(q)

print(sql)
#=>
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
WHERE ("person_1"."year_of_birth" > :YEAR)
=#

sql.vars
#-> [:YEAR]

Query variables could be bound using the Bind constructor.

q0(person_id) =
    From(visit_occurrence) |>
    Where(Get.person_id .== Var.PERSON_ID) |>
    Bind(:PERSON_ID => person_id)

q0(1)
#-> (…) |> Bind(…)

display(q0(1))
#=>
let visit_occurrence = SQLTable(:visit_occurrence, …),
    q1 = From(visit_occurrence),
    q2 = q1 |> Where(Fun."="(Get.person_id, Var.PERSON_ID))
    q2 |> Bind(1 |> As(:PERSON_ID))
end
=#

print(render(q0(1)))
#=>
SELECT
  "visit_occurrence_1"."visit_occurrence_id",
  "visit_occurrence_1"."person_id",
  "visit_occurrence_1"."visit_start_date",
  "visit_occurrence_1"."visit_end_date"
FROM "visit_occurrence" AS "visit_occurrence_1"
WHERE ("visit_occurrence_1"."person_id" = 1)
=#

A Bind node can be created with @funsql notation.

q = @funsql begin
    from(visit_occurrence)
    filter(person_id == :PERSON_ID)
    bind(:PERSON_ID => person_id)
end

display(q)
#=>
let q1 = From(:visit_occurrence),
    q2 = q1 |> Where(Fun."="(Get.person_id, Var.PERSON_ID))
    q2 |> Bind(Get.person_id |> As(:PERSON_ID))
end
=#

Bind lets us create correlated subqueries.

q = From(person) |>
    Where(Fun.exists(q0(Get.person_id)))

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
WHERE (EXISTS (
  SELECT NULL AS "_"
  FROM "visit_occurrence" AS "visit_occurrence_1"
  WHERE ("visit_occurrence_1"."person_id" = "person_1"."person_id")
))
=#

When an argument to Bind is an aggregate, it must be evaluated in a nested subquery.

q0(person_id, date) =
    From(observation) |>
    Where(Fun.and(Get.person_id .== Var.PERSON_ID,
                  Get.observation_date .>= Var.DATE)) |>
    Bind(:PERSON_ID => person_id, :DATE => date)

q = From(visit_occurrence) |>
    Group(Get.person_id) |>
    Where(Fun.exists(q0(Get.person_id, Agg.max(Get.visit_start_date))))

print(render(q))
#=>
SELECT "visit_occurrence_2"."person_id"
FROM (
  SELECT
    "visit_occurrence_1"."person_id",
    max("visit_occurrence_1"."visit_start_date") AS "max"
  FROM "visit_occurrence" AS "visit_occurrence_1"
  GROUP BY "visit_occurrence_1"."person_id"
) AS "visit_occurrence_2"
WHERE (EXISTS (
  SELECT NULL AS "_"
  FROM "observation" AS "observation_1"
  WHERE
    ("observation_1"."person_id" = "visit_occurrence_2"."person_id") AND
    ("observation_1"."observation_date" >= "visit_occurrence_2"."max")
))
=#

An empty Bind can be created.

Bind(args = [])
#-> Bind(args = [])

Bind requires that all variables have a unique name.

Bind(:PERSON_ID => 1, :PERSON_ID => 2)
#=>
ERROR: FunSQL.DuplicateLabelError: `PERSON_ID` is used more than once in:
Bind(1 |> As(:PERSON_ID), 2 |> As(:PERSON_ID))
=#

Functions and Operators

A function or an operator invocation is created with the Fun constructor.

Fun.">"
#-> Fun.:(">")

e = Fun.">"(Get.year_of_birth, 2000)
#-> Fun.:(">")(…)

display(e)
#-> Fun.">"(Get.year_of_birth, 2000)

Alternatively, Fun nodes are created by broadcasting. Common Julia operators are replaced with their SQL equivalents.

#? VERSION >= v"1.7"
e = Get.location.state .== "IL" .|| Get.location.zip .!= "60615"
#-> Fun.or(…)

#? VERSION >= v"1.7"
display(e)
#-> Fun.or(Fun."="(Get.location.state, "IL"), Fun."<>"(Get.location.zip, "60615"))

#? VERSION >= v"1.7"
e = .!(e .&& Get.year_of_birth .> 1950 .&& Get.year_of_birth .< 1990)
#-> Fun.not(…)

#? VERSION >= v"1.7"
display(e)
#=>
Fun.not(Fun.and(Fun.or(Fun."="(Get.location.state, "IL"),
                       Fun."<>"(Get.location.zip, "60615")),
                Fun.and(Fun.">"(Get.year_of_birth, 1950),
                        Fun."<"(Get.year_of_birth, 1990))))
=#

A vector of arguments could be passed directly.

Fun.">"(args = SQLNode[Get.year_of_birth, 2000])
#-> Fun.:(">")(…)

Fun nodes can be generated in @funsql notation.

e = @funsql fun(>, year_of_birth, 2000)

display(e)
#-> Fun.">"(Get.year_of_birth, 2000)

In order to generate Fun nodes using regular function and operator calls, we need to declare these functions and operators in advance.

e = @funsql concat(location.city, ", ", location.state)

display(e)
#-> Fun.concat(Get.location.city, ", ", Get.location.state)

e = @funsql 1950 < year_of_birth < 1990

display(e)
#-> Fun.and(Fun."<"(1950, Get.year_of_birth), Fun."<"(Get.year_of_birth, 1990))

e = @funsql location.state != "IL" || location.zip != 60615

display(e)
#-> Fun.or(Fun."<>"(Get.location.state, "IL"), Fun."<>"(Get.location.zip, 60615))

e = @funsql location.state == "IL" && location.zip == 60615

display(e)
#-> Fun.and(Fun."="(Get.location.state, "IL"), Fun."="(Get.location.zip, 60615))

In @funsql notation, use backticks to represent a name that is not a valid identifier.

e = @funsql fun(`SUBSTRING(? FROM ? FOR ?)`, city, 1, 1)

display(e)
#-> Fun."SUBSTRING(? FROM ? FOR ?)"(Get.city, 1, 1)

q = @funsql `from`(person).`filter`(year_of_birth <= 1964)

display(q)
#=>
let q1 = From(:person),
    q2 = q1 |> Where(Fun."<="(Get.year_of_birth, 1964))
    q2
end
=#

In @funsql notation, an if statement is converted to a CASE expression.

e = @funsql year_of_birth <= 1964 ? "Boomers" : "Millenials"

display(e)
#-> Fun.case(Fun."<="(Get.year_of_birth, 1964), "Boomers", "Millenials")

e = @funsql year_of_birth <= 1964 ? "Boomers" :
            year_of_birth <= 1980 ? "Generation X" : "Millenials"

display(e)
#=>
Fun.case(Fun."<="(Get.year_of_birth, 1964),
         "Boomers",
         Fun."<="(Get.year_of_birth, 1980),
         "Generation X",
         "Millenials")
=#

e = @funsql if year_of_birth <= 1964; "Boomers"; end

display(e)
#-> Fun.case(Fun."<="(Get.year_of_birth, 1964), "Boomers")

e = @funsql begin
    if year_of_birth <= 1964
        "Boomers"
    elseif year_of_birth <= 1980
        "Generation X"
    end
end

display(e)
#=>
Fun.case(Fun."<="(Get.year_of_birth, 1964),
         "Boomers",
         Fun."<="(Get.year_of_birth, 1980),
         "Generation X")
=#

e = @funsql begin
    if year_of_birth <= 1964
        "Boomers"
    elseif year_of_birth <= 1980
        "Generation X"
    elseif year_of_birth <= 1996
        "Millenials"
    else
        "Generation Z"
    end
end

display(e)
#=>
Fun.case(Fun."<="(Get.year_of_birth, 1964),
         "Boomers",
         Fun."<="(Get.year_of_birth, 1980),
         "Generation X",
         Fun."<="(Get.year_of_birth, 1996),
         "Millenials",
         "Generation Z")
=#

In a SELECT clause, the function name becomes the column alias.

q = From(location) |>
    Select(Fun.coalesce(Get.city, "N/A"))

print(render(q))
#=>
SELECT coalesce("location_1"."city", 'N/A') AS "coalesce"
FROM "location" AS "location_1"
=#

When the name contains only symbol characters, or when it starts or ends with a space character, it is interpreted as an operator.

q = From(location) |>
    Select(Fun." || "(Get.city, ", ", Get.state))

print(render(q))
#=>
SELECT ("location_1"."city" || ', ' || "location_1"."state") AS "_"
FROM "location" AS "location_1"
=#

The function name containing ? serves as a template.

q = From(location) |>
    Select(Fun."SUBSTRING(? FROM ? FOR ?)"(Get.city, 1, 1))

print(render(q))
#=>
SELECT SUBSTRING("location_1"."city" FROM 1 FOR 1) AS "_"
FROM "location" AS "location_1"
=#

The number of arguments to a function must coincide with the number of placeholders in the template.

Fun."SUBSTRING(? FROM ? FOR ?)"(Get.city)
#=>
ERROR: FunSQL.InvalidArityError: `SUBSTRING(? FROM ? FOR ?)` expects 3 arguments, got 1 in:
Fun."SUBSTRING(? FROM ? FOR ?)"(Get.city)
=#

Some common functions also validate the number of arguments.

Fun.case()
#=>
ERROR: FunSQL.InvalidArityError: `case` expects at least 2 arguments, got 0 in:
Fun.case()
=#

Fun.is_null(Get.city, Get.state)
#=>
ERROR: FunSQL.InvalidArityError: `is_null` expects 1 argument, got 2 in:
Fun.is_null(Get.city, Get.state)
=#

Fun.count(Get.city, Get.state)
#=>
ERROR: FunSQL.InvalidArityError: `count` expects from 0 to 1 argument, got 2 in:
Fun.count(Get.city, Get.state)
=#

A function invocation may include a nested query.

p = From(person) |>
    Where(Get.year_of_birth .> 1950)

q = Select(Fun.exists(p))

print(render(q))
#=>
SELECT (EXISTS (
  SELECT NULL AS "_"
  FROM "person" AS "person_1"
  WHERE ("person_1"."year_of_birth" > 1950)
)) AS "exists"
=#

p = From(concept) |>
    Where(Fun.and(Get.vocabulary_id .== "Gender",
                  Get.concept_code .== "F")) |>
    Select(Get.concept_id)

q = From(person) |>
    Where(Fun.in(Get.gender_concept_id, p))

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
WHERE ("person_1"."gender_concept_id" IN (
  SELECT "concept_1"."concept_id"
  FROM "concept" AS "concept_1"
  WHERE
    ("concept_1"."vocabulary_id" = 'Gender') AND
    ("concept_1"."concept_code" = 'F')
))
=#

FunSQL can simplify logical expressions.

q = From(person) |>
    Where(Fun.and())

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
=#

q = From(person) |>
    Select(Get.person_id) |>
    Where(Fun.and())

print(render(q))
#=>
SELECT "person_1"."person_id"
FROM "person" AS "person_1"
=#

q = From(person) |>
    Where(Fun.and(Get.year_of_birth .> 1950))

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
WHERE ("person_1"."year_of_birth" > 1950)
=#

q = From(person) |>
    Where(foldl(Fun.and, [Get.year_of_birth .> 1950, Get.year_of_birth .< 1960, Get.year_of_birth .!= 1955], init = Fun.and()))

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
WHERE
  ("person_1"."year_of_birth" > 1950) AND
  ("person_1"."year_of_birth" < 1960) AND
  ("person_1"."year_of_birth" <> 1955)
=#

q = From(person) |>
    Where(Fun.or())

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
WHERE FALSE
=#

q = From(person) |>
    Where(Fun.or(Get.year_of_birth .> 1950))

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
WHERE ("person_1"."year_of_birth" > 1950)
=#

q = From(person) |>
    Where(Fun.or(Fun.or(Fun.or(), Get.year_of_birth .> 1950), Get.year_of_birth .< 1960))

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
WHERE
  ("person_1"."year_of_birth" > 1950) OR
  ("person_1"."year_of_birth" < 1960)
=#

#? VERSION >= v"1.7"
q = From(person) |>
    Where(Get.year_of_birth .> 1950 .|| Get.year_of_birth .< 1960 .|| Get.year_of_birth .!= 1955)

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
WHERE
  ("person_1"."year_of_birth" > 1950) OR
  ("person_1"."year_of_birth" < 1960) OR
  ("person_1"."year_of_birth" <> 1955)
=#

q = From(person) |>
    Where(Fun.not(false))

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
=#

Append

The Append constructor creates a subquery that concatenates the output of multiple queries.

q = From(measurement) |>
    Define(:date => Get.measurement_date) |>
    Append(From(observation) |>
           Define(:date => Get.observation_date))
#-> (…) |> Append(…)

display(q)
#=>
let measurement = SQLTable(:measurement, …),
    observation = SQLTable(:observation, …),
    q1 = From(measurement),
    q2 = q1 |> Define(Get.measurement_date |> As(:date)),
    q3 = From(observation),
    q4 = q3 |> Define(Get.observation_date |> As(:date)),
    q5 = q2 |> Append(q4)
    q5
end
=#

print(render(q |> Select(Get.person_id, Get.date)))
#=>
SELECT
  "union_1"."person_id",
  "union_1"."date"
FROM (
  SELECT
    "measurement_1"."person_id",
    "measurement_1"."measurement_date" AS "date"
  FROM "measurement" AS "measurement_1"
  UNION ALL
  SELECT
    "observation_1"."person_id",
    "observation_1"."observation_date" AS "date"
  FROM "observation" AS "observation_1"
) AS "union_1"
=#

Append can also be specified without the over node.

q = Append(From(measurement) |>
           Define(:date => Get.measurement_date),
           From(observation) |>
           Define(:date => Get.observation_date)) |>
    Select(Get.person_id, Get.date)

print(render(q))
#=>
SELECT
  "union_1"."person_id",
  "union_1"."date"
FROM (
  SELECT
    "measurement_1"."person_id",
    "measurement_1"."measurement_date" AS "date"
  FROM "measurement" AS "measurement_1"
  UNION ALL
  SELECT
    "observation_1"."person_id",
    "observation_1"."observation_date" AS "date"
  FROM "observation" AS "observation_1"
) AS "union_1"
=#

An Append node can be created using @funsql notation.

q = @funsql begin
    from(measurement).define(date => measurement_date)
    append(from(observation).define(date => observation_date))
end

display(q)
#=>
let q1 = From(:measurement),
    q2 = q1 |> Define(Get.measurement_date |> As(:date)),
    q3 = From(:observation),
    q4 = q3 |> Define(Get.observation_date |> As(:date)),
    q5 = q2 |> Append(q4)
    q5
end
=#

Append will automatically assign unique aliases to the exported columns.

q = From(measurement) |>
    Define(:concept_id => Get.measurement_concept_id) |>
    Group(Get.person_id) |>
    Define(:count => 1, :count_2 => 2) |>
    Append(From(observation) |>
           Define(:concept_id => Get.observation_concept_id) |>
           Group(Get.person_id) |>
           Define(:count => 10, :count_2 => 20)) |>
    Select(Get.person_id, :agg_count => Agg.count(), Get.count_2, Get.count)

print(render(q))
#=>
SELECT
  "union_1"."person_id",
  "union_1"."count" AS "agg_count",
  "union_1"."count_2",
  "union_1"."count_3" AS "count"
FROM (
  SELECT
    "measurement_1"."person_id",
    count(*) AS "count",
    2 AS "count_2",
    1 AS "count_3"
  FROM "measurement" AS "measurement_1"
  GROUP BY "measurement_1"."person_id"
  UNION ALL
  SELECT
    "observation_1"."person_id",
    count(*) AS "count",
    20 AS "count_2",
    10 AS "count_3"
  FROM "observation" AS "observation_1"
  GROUP BY "observation_1"."person_id"
) AS "union_1"
=#

Append will not put duplicate expressions into the SELECT clauses of the nested subqueries.

q = From(person) |>
    Join(From(measurement) |>
         Define(:date => Get.measurement_date) |>
         Append(From(observation) |>
                Define(:date => Get.observation_date)) |>
         As(:assessment),
         on = Get.person_id .== Get.assessment.person_id) |>
    Where(Get.assessment.date .> Fun.current_timestamp()) |>
    Select(Get.person_id, Get.assessment.date)

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  "assessment_1"."date"
FROM "person" AS "person_1"
JOIN (
  SELECT
    "measurement_1"."measurement_date" AS "date",
    "measurement_1"."person_id"
  FROM "measurement" AS "measurement_1"
  UNION ALL
  SELECT
    "observation_1"."observation_date" AS "date",
    "observation_1"."person_id"
  FROM "observation" AS "observation_1"
) AS "assessment_1" ON ("person_1"."person_id" = "assessment_1"."person_id")
WHERE ("assessment_1"."date" > CURRENT_TIMESTAMP)
=#

q = From(measurement) |>
    Define(:date => Get.measurement_date) |>
    Append(From(observation) |>
    Define(:date => Get.observation_date)) |>
    Group(Get.date) |>
    Define(Agg.count())

print(render(q))
#=>
SELECT
  "union_1"."date",
  count(*) AS "count"
FROM (
  SELECT "measurement_1"."measurement_date" AS "date"
  FROM "measurement" AS "measurement_1"
  UNION ALL
  SELECT "observation_1"."observation_date" AS "date"
  FROM "observation" AS "observation_1"
) AS "union_1"
GROUP BY "union_1"."date"
=#

Append aligns the columns of its subqueries.

q = From(measurement) |>
    Select(Get.person_id, :date => Get.measurement_date) |>
    Append(From(observation) |>
           Select(:date => Get.observation_date, Get.person_id))

print(render(q))
#=>
SELECT
  "measurement_1"."person_id",
  "measurement_1"."measurement_date" AS "date"
FROM "measurement" AS "measurement_1"
UNION ALL
SELECT
  "observation_2"."person_id",
  "observation_2"."date"
FROM (
  SELECT
    "observation_1"."observation_date" AS "date",
    "observation_1"."person_id"
  FROM "observation" AS "observation_1"
) AS "observation_2"
=#

Arguments of Append may contain ORDER BY or LIMIT clauses, which must be wrapped in a nested subquery.

q = From(measurement) |>
    Order(Get.measurement_date) |>
    Select(Get.person_id, :date => Get.measurement_date) |>
    Append(From(observation) |>
           Define(:date => Get.observation_date) |>
           Limit(1))

print(render(q))
#=>
SELECT
  "measurement_2"."person_id",
  "measurement_2"."date"
FROM (
  SELECT
    "measurement_1"."person_id",
    "measurement_1"."measurement_date" AS "date"
  FROM "measurement" AS "measurement_1"
  ORDER BY "measurement_1"."measurement_date"
) AS "measurement_2"
UNION ALL
SELECT
  "observation_2"."person_id",
  "observation_2"."date"
FROM (
  SELECT
    "observation_1"."person_id",
    "observation_1"."observation_date" AS "date"
  FROM "observation" AS "observation_1"
  FETCH FIRST 1 ROW ONLY
) AS "observation_2"
=#

An Append without any queries can be created explicitly.

q = Append(args = [])
#-> Append(args = [])

print(render(q))
#=>
SELECT NULL AS "_"
WHERE FALSE
=#

Without an explicit Select, the output of Append includes the common columns of the nested queries.

q = Append(measurement, observation)

print(render(q))
#=>
SELECT "measurement_1"."person_id"
FROM "measurement" AS "measurement_1"
UNION ALL
SELECT "observation_1"."person_id"
FROM "observation" AS "observation_1"
=#

Iterate

The Iterate constructor creates an iteration query. In the argument of Iterate, the From(^) node refers to the output of the previous iteration. We could use Iterate and From(^) to create a factorial table.

q = Define(:n => 1, :f => 1) |>
    Iterate(From(^) |>
            Define(:n => Get.n .+ 1, :f => Get.f .* (Get.n .+ 1)) |>
            Where(Get.n .<= 10))
#-> (…) |> Iterate(…)

display(q)
#=>
let q1 = Define(1 |> As(:n), 1 |> As(:f)),
    q2 = From(^),
    q3 = q2 |>
         Define(Fun."+"(Get.n, 1) |> As(:n),
                Fun."*"(Get.f, Fun."+"(Get.n, 1)) |> As(:f)),
    q4 = q3 |> Where(Fun."<="(Get.n, 10)),
    q5 = q1 |> Iterate(q4)
    q5
end
=#

print(render(q))
#=>
WITH RECURSIVE "__1" ("n", "f") AS (
  SELECT
    1 AS "n",
    1 AS "f"
  UNION ALL
  SELECT
    "__3"."n",
    "__3"."f"
  FROM (
    SELECT
      ("__2"."n" + 1) AS "n",
      ("__2"."f" * ("__2"."n" + 1)) AS "f"
    FROM "__1" AS "__2"
  ) AS "__3"
  WHERE ("__3"."n" <= 10)
)
SELECT
  "__4"."n",
  "__4"."f"
FROM "__1" AS "__4"
=#

An Iterate node can be created using @funsql notation.

q = @funsql begin
    define(n => 1, f => 1)
    iterate(define(n => n + 1, f => f * (n + 1)).filter(n <= 10))
end

display(q)
#=>
let q1 = Define(1 |> As(:n), 1 |> As(:f)),
    q2 = Define(Fun."+"(Get.n, 1) |> As(:n),
                Fun."*"(Get.f, Fun."+"(Get.n, 1)) |> As(:f)),
    q3 = q2 |> Where(Fun."<="(Get.n, 10)),
    q4 = q1 |> Iterate(q3)
    q4
end
=#

The From(^) node in front of the iterator query can be omitted.

q = Define(:n => 1, :f => 1) |>
    Iterate(Define(:n => Get.n .+ 1, :f => Get.f .* (Get.n .+ 1)) |>
            Where(Get.n .<= 10))

print(render(q))
#=>
WITH RECURSIVE "__1" ("n", "f") AS (
  SELECT
    1 AS "n",
    1 AS "f"
  UNION ALL
  SELECT
    "__3"."n",
    "__3"."f"
  FROM (
    SELECT
      ("__2"."n" + 1) AS "n",
      ("__2"."f" * ("__2"."n" + 1)) AS "f"
    FROM "__1" AS "__2"
  ) AS "__3"
  WHERE ("__3"."n" <= 10)
)
SELECT
  "__4"."n",
  "__4"."f"
FROM "__1" AS "__4"
=#

An Iterate node may use a CTE.

q = Define(:n => 1, :f => 1) |>
    Iterate(Define(:n => Get.n .+ 1, :f => Get.f .* (Get.n .+ 1)) |>
            CrossJoin(From(:threshold)) |>
            Where(Get.n .<= Get.threshold)) |>
    With(:threshold => Define(:threshold => 10))

print(render(q))
#=>
WITH RECURSIVE "threshold_1" ("threshold") AS (
  SELECT 10 AS "threshold"
),
"__1" ("n", "f") AS (
  SELECT
    1 AS "n",
    1 AS "f"
  UNION ALL
  SELECT
    "__3"."n",
    "__3"."f"
  FROM (
    SELECT
      ("__2"."n" + 1) AS "n",
      ("__2"."f" * ("__2"."n" + 1)) AS "f",
      "threshold_2"."threshold"
    FROM "__1" AS "__2"
    CROSS JOIN "threshold_1" AS "threshold_2"
  ) AS "__3"
  WHERE ("__3"."n" <= "__3"."threshold")
)
SELECT
  "__4"."n",
  "__4"."f"
FROM "__1" AS "__4"
=#

It is an error to use From(^) outside of Iterate.

q = From(^)

print(render(q))
#=>
ERROR: FunSQL.ReferenceError: self-reference outside of Iterate in:
let q1 = From(^)
    q1
end
=#

The set of columns produced by Iterate is the intersection of the columns produced by the base query and the iterator query.

q = Define(:k => 0, :m => 0) |>
    Iterate(As(:previous) |>
            Where(Get.previous.m .< 10) |>
            Define(:m => Get.previous.m .+ 1, :n => 0))

print(render(q))
#=>
WITH RECURSIVE "previous_1" ("m") AS (
  SELECT 0 AS "m"
  UNION ALL
  SELECT ("previous_2"."m" + 1) AS "m"
  FROM "previous_1" AS "previous_2"
  WHERE ("previous_2"."m" < 10)
)
SELECT "previous_3"."m"
FROM "previous_1" AS "previous_3"
=#

Iterate aligns the columns of its subqueries.

q = Select(:n => 1, :f => 1) |>
    Iterate(Where(Get.n .< 10) |>
            Select(:f => (Get.n .+ 1) .* Get.f,
                   :n => Get.n .+ 1))

print(render(q))
#=>
WITH RECURSIVE "__1" ("n", "f") AS (
  SELECT
    1 AS "n",
    1 AS "f"
  UNION ALL
  SELECT
    "__3"."n",
    "__3"."f"
  FROM (
    SELECT
      (("__2"."n" + 1) * "__2"."f") AS "f",
      ("__2"."n" + 1) AS "n"
    FROM "__1" AS "__2"
    WHERE ("__2"."n" < 10)
  ) AS "__3"
)
SELECT
  "__4"."n",
  "__4"."f"
FROM "__1" AS "__4"
=#

As

An alias to an expression can be added with the As constructor.

e = 42 |> As(:integer)
#-> (…) |> As(:integer)

display(e)
#-> 42 |> As(:integer)

print(render(Select(e)))
#=>
SELECT 42 AS "integer"
=#

As node can be created with @funsql.

e = @funsql (42).as(integer)

display(e)
#-> 42 |> As(:integer)

The => shorthand is supported by @funsql.

e = @funsql integer => 42

display(e)
#-> :integer => 42

As is also used to create an alias for a subquery.

q = From(person) |>
    As(:p) |>
    Select(Get.p.person_id)

print(render(q))
#=>
SELECT "person_1"."person_id"
FROM "person" AS "person_1"
=#

As blocks the default output columns.

q = From(person) |> As(:p)

print(render(q))
#=>
SELECT NULL AS "_"
FROM "person" AS "person_1"
=#

From

The From constructor creates a subquery that selects columns from the given table.

q = From(person)
#-> From(…)

display(q)
#-> From(SQLTable(:person, …))

By default, From selects all columns from the table.

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  "person_1"."gender_concept_id",
  "person_1"."year_of_birth",
  "person_1"."month_of_birth",
  "person_1"."day_of_birth",
  "person_1"."birth_datetime",
  "person_1"."location_id"
FROM "person" AS "person_1"
=#

From adds the schema qualifier when the table has the schema.

const pg_database =
    SQLTable(qualifiers = [:pg_catalog], :pg_database, columns = [:oid, :datname])

q = From(pg_database)

print(render(q))
#=>
SELECT
  "pg_database_1"."oid",
  "pg_database_1"."datname"
FROM "pg_catalog"."pg_database" AS "pg_database_1"
=#

In a suitable context, a SQLTable object is automatically converted to a From subquery.

print(render(person))
#=>
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
=#

From and other subqueries generate a correct SELECT clause when the table has no columns.

empty = SQLTable(:empty, columns = Symbol[])

q = From(empty) |>
    Where(false) |>
    Select(args = [])

display(q)
#=>
let empty = SQLTable(:empty, …),
    q1 = From(empty),
    q2 = q1 |> Where(false),
    q3 = q2 |> Select(args = [])
    q3
end
=#

print(render(q))
#=>
SELECT NULL AS "_"
FROM "empty" AS "empty_1"
WHERE FALSE
=#

When From takes a Tables-compatible argument, it generates a VALUES query.

using DataFrames

df = DataFrame(name = ["SQL", "Julia", "FunSQL"],
               year = [1974, 2012, 2021])

q = From(df)
#-> From(…)

display(q)
#-> From((name = ["SQL", …], year = [1974, …]))

print(render(q))
#=>
SELECT
  "values_1"."name",
  "values_1"."year"
FROM (
  VALUES
    ('SQL', 1974),
    ('Julia', 2012),
    ('FunSQL', 2021)
) AS "values_1" ("name", "year")
=#

SQLite does not support column aliases with AS clause.

print(render(q, dialect = :sqlite))
#=>
SELECT
  "values_1"."column1" AS "name",
  "values_1"."column2" AS "year"
FROM (
  VALUES
    ('SQL', 1974),
    ('Julia', 2012),
    ('FunSQL', 2021)
) AS "values_1"
=#

Only columns that are used in the query will be serialized.

q = From(df) |>
    Select(Get.name)

print(render(q))
#=>
SELECT "values_1"."name"
FROM (
  VALUES
    ('SQL'),
    ('Julia'),
    ('FunSQL')
) AS "values_1" ("name")
=#

A column of NULLs will be added if no actual columns are used.

q = From(df) |>
    Group() |>
    Select(Agg.count())

print(render(q))
#=>
SELECT count(*) AS "count"
FROM (
  VALUES
    (NULL),
    (NULL),
    (NULL)
) AS "values_1" ("_")
=#

Since VALUES clause requires at least one row of data, a different representation is used when the source table is empty.

q = From(df[1:0, :])

print(render(q))
#=>
SELECT
  NULL AS "name",
  NULL AS "year"
WHERE FALSE
=#

The source table must have at least one column.

q = From(df[1:0, 1:0])
#=>
ERROR: DomainError with 0×0 DataFrame:
a table with at least one column is expected
=#

From can accept a table-valued function. Since the output type of the function is not known to FunSQL, you must manually specify the names of the output columns.

q = From(Fun.generate_series(0, 100, 10), columns = [:value])
#-> From(…, columns = [:value])

display(q)
#-> From(Fun.generate_series(0, 100, 10), columns = [:value])

print(render(q))
#=>
SELECT "generate_series_1"."value"
FROM generate_series(0, 100, 10) AS "generate_series_1" ("value")
=#

WITH ORDINALITY annotation adds an extra column that enumerates the output rows.

q = From(Fun."? WITH ORDINALITY"(Fun.generate_series(0, 100, 10)),
         columns = [:value, :index])

print(render(q))
#=>
SELECT
  "__1"."value",
  "__1"."index"
FROM generate_series(0, 100, 10) WITH ORDINALITY AS "__1" ("value", "index")
=#

A From node can be created with @funsql notation.

q = @funsql from(person)

display(q)
#-> From(:person)

q = @funsql from(nothing)

display(q)
#-> From(nothing)

q = @funsql from(^)

display(q)
#-> From(^)

q = @funsql from($person)

display(q)
#-> From(SQLTable(:person, …))

q = @funsql from($df)

display(q)
#-> From((name = ["SQL", …], year = [1974, …]))

funsql_generate_series = FunSQL.FunClosure(:generate_series)

q = @funsql from(generate_series(0, 100, 10), columns = [value])

display(q)
#-> From(Fun.generate_series(0, 100, 10), columns = [:value])

When From with a tabular function is attached to the right branch of a Join node, the function may use data from the left branch of Join, even without being wrapped in a Bind node.

q = From(Fun.regexp_split_to_table("(10,20)-(30,40)-(50,60)", "-"),
         columns = [:point]) |>
    CrossJoin(From(Fun.regexp_matches(Get.point, "(\\d+),(\\d+)"),
                   columns = [:captures])) |>
    Select(:x => Fun."CAST(?[1] AS INTEGER)"(Get.captures),
           :y => Fun."CAST(?[2] AS INTEGER)"(Get.captures))

print(render(q))
#=>
SELECT
  CAST("regexp_matches_1"."captures"[1] AS INTEGER) AS "x",
  CAST("regexp_matches_1"."captures"[2] AS INTEGER) AS "y"
FROM regexp_split_to_table('(10,20)-(30,40)-(50,60)', '-') AS "regexp_split_to_table_1" ("point")
CROSS JOIN regexp_matches("regexp_split_to_table_1"."point", '(\d+),(\d+)') AS "regexp_matches_1" ("captures")
=#

All the columns of a tabular function must have distinct names.

From(Fun."? WITH ORDINALITY"(Fun.generate_series(0, 100, 10)),
     columns = [:index, :index])
#=>
ERROR: FunSQL.DuplicateLabelError: `index` is used more than once in:
let q1 = From(Fun."? WITH ORDINALITY"(Fun.generate_series(0, 100, 10)),
              columns = [:index, :index])
    q1
end
=#

From(nothing) will generate a unit dataset with one row.

q = From(nothing)

display(q)
#-> From(nothing)

print(render(q))
#=>
SELECT NULL AS "_"
=#

With, Over, and WithExternal

We can create a temporary dataset using With and refer to it with From.

q = From(:male) |>
    With(From(person) |>
         Where(Get.gender_concept_id .== 8507) |>
         As(:male))

display(q)
#=>
let person = SQLTable(:person, …),
    q1 = From(:male),
    q2 = From(person),
    q3 = q2 |> Where(Fun."="(Get.gender_concept_id, 8507)),
    q4 = q1 |> With(q3 |> As(:male))
    q4
end
=#

print(render(q))
#=>
WITH "male_1" ("person_id", …, "location_id") AS (
  SELECT
    "person_1"."person_id",
    ⋮
    "person_1"."location_id"
  FROM "person" AS "person_1"
  WHERE ("person_1"."gender_concept_id" = 8507)
)
SELECT
  "male_2"."person_id",
  ⋮
  "male_2"."location_id"
FROM "male_1" AS "male_2"
=#

With definitions can be annotated as materialized or not materialized:

q = From(:male) |>
    With(From(person) |>
         Where(Get.gender_concept_id .== 8507) |>
         As(:male),
         materialized = true)
#-> (…) |> With(…, materialized = true)

print(render(q))
#=>
WITH "male_1" ( … ) AS MATERIALIZED (
  ⋮
)
SELECT
  ⋮
FROM "male_1" AS "male_2"
=#

q = From(:male) |>
    With(From(person) |>
         Where(Get.gender_concept_id .== 8507) |>
         As(:male),
         materialized = false)

print(render(q))
#=>
WITH "male_1" ( … ) AS NOT MATERIALIZED (
  ⋮
)
SELECT
  ⋮
FROM "male_1" AS "male_2"
=#

With can take more than one definition.

q = Select(:male_count => From(:male) |> Group() |> Select(Agg.count()),
           :female_count => From(:female) |> Group() |> Select(Agg.count())) |>
    With(:male => From(person) |> Where(Get.gender_concept_id .== 8507),
         :female => From(person) |> Where(Get.gender_concept_id .== 8532))

print(render(q))
#=>
WITH "male_1" ("_") AS (
  SELECT NULL AS "_"
  FROM "person" AS "person_1"
  WHERE ("person_1"."gender_concept_id" = 8507)
),
"female_1" ("_") AS (
  SELECT NULL AS "_"
  FROM "person" AS "person_2"
  WHERE ("person_2"."gender_concept_id" = 8532)
)
SELECT
  (
    SELECT count(*) AS "count"
    FROM "male_1" AS "male_2"
  ) AS "male_count",
  (
    SELECT count(*) AS "count"
    FROM "female_1" AS "female_2"
  ) AS "female_count"
=#

With can shadow the previous With definition.

q = From(:cohort) |>
    With(:cohort => From(:cohort) |> Where(Get.gender_concept_id .== 8507)) |>
    With(:cohort => From(:cohort) |> Where(Get.year_of_birth .>= 1950)) |>
    With(:cohort => From(person)) |>
    Select(Get.person_id)

print(render(q))
#=>
WITH "cohort_1" ("person_id", "gender_concept_id", "year_of_birth") AS (
  SELECT
    "person_1"."person_id",
    "person_1"."gender_concept_id",
    "person_1"."year_of_birth"
  FROM "person" AS "person_1"
),
"cohort_3" ("person_id", "gender_concept_id") AS (
  SELECT
    "cohort_2"."person_id",
    "cohort_2"."gender_concept_id"
  FROM "cohort_1" AS "cohort_2"
  WHERE ("cohort_2"."year_of_birth" >= 1950)
),
"cohort_5" ("person_id") AS (
  SELECT "cohort_4"."person_id"
  FROM "cohort_3" AS "cohort_4"
  WHERE ("cohort_4"."gender_concept_id" = 8507)
)
SELECT "cohort_6"."person_id"
FROM "cohort_5" AS "cohort_6"
=#

A With node can be created using @funsql.

q = @funsql begin
    from(male)
    with(male => from(person).filter(gender_concept_id == 8507),
         materialized = false)
end

display(q)
#=>
let q1 = From(:male),
    q2 = From(:person),
    q3 = q2 |> Where(Fun."="(Get.gender_concept_id, 8507)),
    q4 = q1 |> With(q3 |> As(:male), materialized = false)
    q4
end
=#

A dataset defined by With must have an explicit label assigned to it.

q = From(:person) |>
    With(From(person))

print(render(q))
#=>
ERROR: FunSQL.ReferenceError: table reference `person` requires As in:
let person = SQLTable(:person, …),
    q1 = From(:person),
    q2 = From(person),
    q3 = q1 |> With(q2)
    q3
end
=#

Datasets defined by With must have a unique label.

From(:p) |>
With(:p => From(person),
     :p => From(person))
#=>
ERROR: FunSQL.DuplicateLabelError: `p` is used more than once in:
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = From(person),
    q3 = With(q1 |> As(:p), q2 |> As(:p))
    q3
end
=#

It is an error for From to refer to an undefined dataset.

q = From(:p)

print(render(q))
#=>
ERROR: FunSQL.ReferenceError: cannot find `p` in:
let q1 = From(:p)
    q1
end
=#

A variant of With called Over exchanges the positions of the definition and the query that uses it.

q = From(person) |>
    Where(Get.gender_concept_id .== 8507) |>
    As(:male) |>
    Over(From(:male))
#-> (…) |> Over(…)

display(q)
#=>
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |> Where(Fun."="(Get.gender_concept_id, 8507)),
    q3 = From(:male),
    q4 = q2 |> As(:male) |> Over(q3)
    q4
end
=#

print(render(q))
#=>
WITH "male_1" ("person_id", …, "location_id") AS (
  SELECT
    "person_1"."person_id",
    ⋮
    "person_1"."location_id"
  FROM "person" AS "person_1"
  WHERE ("person_1"."gender_concept_id" = 8507)
)
SELECT
  "male_2"."person_id",
  ⋮
  "male_2"."location_id"
FROM "male_1" AS "male_2"
=#

An Over node can be created using @funsql.

q = @funsql begin
    male => from(person).filter(gender_concept_id == 8507)
    over(from(male), materialized = true)
end

display(q)
#=>
let q1 = From(:person),
    q2 = q1 |> Where(Fun."="(Get.gender_concept_id, 8507)),
    q3 = From(:male),
    q4 = q2 |> As(:male) |> Over(q3, materialized = true)
    q4
end
=#

A variant of With called WithExternal can be used to prepare a definition for a CREATE TABLE AS or SELECT INTO statement.

with_external_handler((tbl, def)) =
    println("CREATE TEMP TABLE ",
            render(ID(tbl.qualifiers, tbl.name)),
            " (", join([render(ID(c.name)) for (n, c) in tbl.columns], ", "), ") AS\n",
            render(def), ";\n")

q = From(:male) |>
    WithExternal(From(person) |>
                 Where(Get.gender_concept_id .== 8507) |>
                 As(:male),
                 qualifiers = [:tmp],
                 handler = with_external_handler)
#-> (…) |> WithExternal(…, qualifiers = [:tmp], handler = with_external_handler)

print(render(q))
#=>
CREATE TEMP TABLE "tmp"."male" ("person_id", …, "location_id") AS
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
WHERE ("person_1"."gender_concept_id" = 8507);

SELECT
  "male_1"."person_id",
  ⋮
  "male_1"."location_id"
FROM "tmp"."male" AS "male_1"
=#

Datasets defined by WithExternal must have a unique label.

From(:p) |>
WithExternal(:p => From(person),
             :p => From(person))
#=>
ERROR: FunSQL.DuplicateLabelError: `p` is used more than once in:
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = From(person),
    q3 = WithExternal(q1 |> As(:p), q2 |> As(:p))
    q3
end
=#

Group

The Group constructor creates a subquery that summarizes the rows partitioned by the given keys.

q = From(person) |>
    Group(Get.year_of_birth)
#-> (…) |> Group(…)

display(q)
#=>
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |> Group(Get.year_of_birth)
    q2
end
=#

print(render(q))
#=>
SELECT DISTINCT "person_1"."year_of_birth"
FROM "person" AS "person_1"
=#

A Group node can be created using @funsql notation.

q = @funsql from(person).group(year_of_birth)

display(q)
#=>
let q1 = From(:person),
    q2 = q1 |> Group(Get.year_of_birth)
    q2
end
=#

Partitions created by Group are summarized using aggregate expressions.

Agg.count
#-> Agg.count

q = From(person) |>
    Group(Get.year_of_birth) |>
    Select(Get.year_of_birth, Agg.count())

print(render(q))
#=>
SELECT
  "person_1"."year_of_birth",
  count(*) AS "count"
FROM "person" AS "person_1"
GROUP BY "person_1"."year_of_birth"
=#

Aggregate functions can be created with @funsql.

e = @funsql agg(min, year_of_birth)

display(e)
#-> Agg.min(Get.year_of_birth)

e = @funsql min(year_of_birth)

display(e)
#-> Agg.min(Get.year_of_birth)

e = @funsql count(filter = year_of_birth > 1950)

display(e)
#-> Agg.count(filter = Fun.">"(Get.year_of_birth, 1950))

e = @funsql visit_group.count()

display(e)
#-> Get.visit_group |> Agg.count()

e = @funsql `count`()

display(e)
#-> Agg.count()

e = @funsql visit_group.`count`()

display(e)
#-> Get.visit_group |> Agg.count()

e = @funsql `visit_group`.`count`()

display(e)
#-> Get.visit_group |> Agg.count()

Group will create a single instance of an aggregate function even if it is used more than once.

q = From(person) |>
    Join(:visit_group => From(visit_occurrence) |>
                         Group(Get.person_id),
         on = Get.person_id .== Get.visit_group.person_id) |>
    Where(Agg.count(over = Get.visit_group) .>= 2) |>
    Select(Get.person_id, Agg.count(over = Get.visit_group))

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  "visit_group_1"."count"
FROM "person" AS "person_1"
JOIN (
  SELECT
    count(*) AS "count",
    "visit_occurrence_1"."person_id"
  FROM "visit_occurrence" AS "visit_occurrence_1"
  GROUP BY "visit_occurrence_1"."person_id"
) AS "visit_group_1" ON ("person_1"."person_id" = "visit_group_1"."person_id")
WHERE ("visit_group_1"."count" >= 2)
=#

Group creates a nested subquery when this is necessary to avoid duplicating the group key expression.

q = From(person) |>
    Group(:age => 2000 .- Get.year_of_birth)

print(render(q))
#=>
SELECT DISTINCT (2000 - "person_1"."year_of_birth") AS "age"
FROM "person" AS "person_1"
=#

q = From(person) |>
    Group(:age => 2000 .- Get.year_of_birth) |>
    Select(Agg.count())

print(render(q))
#=>
SELECT count(*) AS "count"
FROM "person" AS "person_1"
GROUP BY (2000 - "person_1"."year_of_birth")
=#

q = From(person) |>
    Group(:age => 2000 .- Get.year_of_birth) |>
    Define(Agg.count())

print(render(q))
#=>
SELECT
  "person_2"."age",
  count(*) AS "count"
FROM (
  SELECT (2000 - "person_1"."year_of_birth") AS "age"
  FROM "person" AS "person_1"
) AS "person_2"
GROUP BY "person_2"."age"
=#

Group could be used consequently.

q = From(measurement) |>
    Group(Get.measurement_concept_id) |>
    Group(Agg.count()) |>
    Select(Get.count, :size => Agg.count())

print(render(q))
#=>
SELECT
  "measurement_2"."count",
  count(*) AS "size"
FROM (
  SELECT count(*) AS "count"
  FROM "measurement" AS "measurement_1"
  GROUP BY "measurement_1"."measurement_concept_id"
) AS "measurement_2"
GROUP BY "measurement_2"."count"
=#

Group accepts an empty list of keys.

q = From(person) |>
    Group() |>
    Select(Agg.count(), Agg.min(Get.year_of_birth), Agg.max(Get.year_of_birth))

print(render(q))
#=>
SELECT
  count(*) AS "count",
  min("person_1"."year_of_birth") AS "min",
  max("person_1"."year_of_birth") AS "max"
FROM "person" AS "person_1"
=#

Group with no keys and no aggregates creates a trivial subquery.

q = From(person) |>
    Group()

print(render(q))
#-> SELECT NULL AS "_"

A SELECT DISTINCT query must include all the keys even when they are not used downstream.

q = From(person) |>
    Group(Get.year_of_birth) |>
    Group() |>
    Select(Agg.count())

print(render(q))
#=>
SELECT count(*) AS "count"
FROM (
  SELECT DISTINCT "person_1"."year_of_birth"
  FROM "person" AS "person_1"
) AS "person_2"
=#

Group allows specifying the grouping sets, either with grouping mode indicators :cube or :rollup, or by explicit enumeration.

q = From(person) |>
    Group(Get.year_of_birth, sets = :cube)
    Define(Agg.count())

display(q)
#=>
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |> Group(Get.year_of_birth, sets = :CUBE)
    q2
end
=#

print(render(q))
#=>
SELECT "person_1"."year_of_birth"
FROM "person" AS "person_1"
GROUP BY CUBE("person_1"."year_of_birth")
=#

q = From(person) |>
    Group(Get.year_of_birth, sets = [[1], Int[]])
    Define(Agg.count())

display(q)
#=>
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |> Group(Get.year_of_birth, sets = [[1], []])
    q2
end
=#

print(render(q))
#=>
SELECT "person_1"."year_of_birth"
FROM "person" AS "person_1"
GROUP BY GROUPING SETS(("person_1"."year_of_birth"), ())
=#

Group allows specifying grouping sets using names of the grouping keys.

q = From(person) |>
    Group(Get.year_of_birth, Get.gender_concept_id,
          sets = ([:year_of_birth], ["gender_concept_id"]))
    Define(Agg.count())

display(q)
#=>
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |>
         Group(Get.year_of_birth, Get.gender_concept_id, sets = [[1], [2]])
    q2
end
=#

Group will report when a grouping set refers to an unknown key.

From(person) |>
Group(Get.year_of_birth, sets = [[:gender_concept_id], []])
#=>
ERROR: FunSQL.InvalidGroupingSetsError: `gender_concept_id` is not a valid key
=#

Group complains about out-of-bound or incomplete grouping sets.

From(person) |>
Group(Get.year_of_birth, sets = [[1, 2], [1], []])
#=>
ERROR: FunSQL.InvalidGroupingSetsError: `2` is out of bounds in:
let q1 = Group(Get.year_of_birth, sets = [[1, 2], [1], []])
    q1
end
=#

From(person) |>
Group(Get.year_of_birth, Get.gender_concept_id,
      sets = [[1], []])
#=>
ERROR: FunSQL.InvalidGroupingSetsError: missing keys `[:year_of_birth]` in:
let q1 = Group(Get.year_of_birth, Get.gender_concept_id, sets = [[1], []])
    q1
end
=#

Group allows specifying the name of a group field.

q = From(person) |>
    Group(Get.year_of_birth, name = :person) |>
    Define(Get.person |> Agg.count())

display(q)
#=>
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |> Group(Get.year_of_birth, name = :person),
    q3 = q2 |> Define(Get.person |> Agg.count())
    q3
end
=#

print(render(q))
#=>
SELECT
  "person_1"."year_of_birth",
  count(*) AS "count"
FROM "person" AS "person_1"
GROUP BY "person_1"."year_of_birth"
=#

Group requires all keys to have unique aliases.

q = From(person) |>
    Group(Get.person_id, Get.person_id)
#=>
ERROR: FunSQL.DuplicateLabelError: `person_id` is used more than once in:
Group(Get.person_id, Get.person_id)
=#

The name of group field must also be unique.

q = From(person) |>
    Group(:group => Get.year_of_birth, name = :group)
#=>
ERROR: FunSQL.DuplicateLabelError: `group` is used more than once in:
Group(Get.year_of_birth |> As(:group), name = :group)
=#

Group ensures that each aggregate expression gets a unique alias.

q = From(person) |>
    Join(:visit_group => From(visit_occurrence) |>
                         Group(Get.person_id),
         on = Get.person_id .== Get.visit_group.person_id) |>
    Select(Get.person_id,
           :max_visit_start_date =>
               Get.visit_group |> Agg.max(Get.visit_start_date),
           :max_visit_end_date =>
               Get.visit_group |> Agg.max(Get.visit_end_date))

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  "visit_group_1"."max" AS "max_visit_start_date",
  "visit_group_1"."max_2" AS "max_visit_end_date"
FROM "person" AS "person_1"
JOIN (
  SELECT
    max("visit_occurrence_1"."visit_start_date") AS "max",
    max("visit_occurrence_1"."visit_end_date") AS "max_2",
    "visit_occurrence_1"."person_id"
  FROM "visit_occurrence" AS "visit_occurrence_1"
  GROUP BY "visit_occurrence_1"."person_id"
) AS "visit_group_1" ON ("person_1"."person_id" = "visit_group_1"."person_id")
=#

Aggregate expressions can be applied to a filtered portion of a partition.

e = Agg.count(filter = Get.year_of_birth .> 1950)
#-> Agg.count(filter = (…))

display(e)
#-> Agg.count(filter = Fun.">"(Get.year_of_birth, 1950))

q = From(person) |> Group() |> Select(e)

print(render(q))
#=>
SELECT (count(*) FILTER (WHERE ("person_1"."year_of_birth" > 1950))) AS "count"
FROM "person" AS "person_1"
=#

It is an error for an aggregate expression to be used without Group.

q = From(person) |> Select(Agg.count())

print(render(q))
#=>
ERROR: FunSQL.ReferenceError: aggregate expression requires Group or Partition in:
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |> Select(Agg.count())
    q2
end
=#

Group in a Join expression shadows any previous applications of Group.

qₚ = From(person)
qᵥ = From(visit_occurrence) |> Group(:visit_person_id => Get.person_id)
qₘ = From(measurement) |> Group(:measurement_person_id => Get.person_id)

q = qₚ |>
    Join(qᵥ, on = Get.person_id .== Get.visit_person_id, left = true) |>
    Join(qₘ, on = Get.person_id .== Get.measurement_person_id, left = true) |>
    Select(Get.person_id, :count => Fun.coalesce(Agg.count(), 0))

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  coalesce("measurement_2"."count", 0) AS "count"
FROM "person" AS "person_1"
LEFT JOIN (
  SELECT DISTINCT "visit_occurrence_1"."person_id" AS "visit_person_id"
  FROM "visit_occurrence" AS "visit_occurrence_1"
) AS "visit_occurrence_2" ON ("person_1"."person_id" = "visit_occurrence_2"."visit_person_id")
LEFT JOIN (
  SELECT
    count(*) AS "count",
    "measurement_1"."person_id" AS "measurement_person_id"
  FROM "measurement" AS "measurement_1"
  GROUP BY "measurement_1"."person_id"
) AS "measurement_2" ON ("person_1"."person_id" = "measurement_2"."measurement_person_id")
=#

It is still possible to use an aggregate in the context of a Join when the corresponding Group could be determined unambiguously.

qₚ = From(person)
qᵥ = From(visit_occurrence) |> Group(:visit_person_id => Get.person_id)

q = qₚ |>
    Join(qᵥ, on = Get.person_id .== Get.visit_person_id, left = true) |>
    Select(Get.person_id, :count => Fun.coalesce(Agg.count(), 0))

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  coalesce("visit_occurrence_2"."count", 0) AS "count"
FROM "person" AS "person_1"
LEFT JOIN (
  SELECT
    count(*) AS "count",
    "visit_occurrence_1"."person_id" AS "visit_person_id"
  FROM "visit_occurrence" AS "visit_occurrence_1"
  GROUP BY "visit_occurrence_1"."person_id"
) AS "visit_occurrence_2" ON ("person_1"."person_id" = "visit_occurrence_2"."visit_person_id")
=#

Partition

The Partition constructor creates a subquery that partitions the rows by the given keys.

q = From(person) |>
    Partition(Get.year_of_birth, order_by = [Get.month_of_birth, Get.day_of_birth])
#-> (…) |> Partition(…, order_by = […])

display(q)
#=>
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |>
         Partition(Get.year_of_birth,
                   order_by = [Get.month_of_birth, Get.day_of_birth])
    q2
end
=#

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
=#

A Partition node can be created with @funsql notation.

q = @funsql begin
    from(person)
    partition(year_of_birth, order_by = [month_of_birth, day_of_birth])
end

display(q)
#=>
let q1 = From(:person),
    q2 = q1 |>
         Partition(Get.year_of_birth,
                   order_by = [Get.month_of_birth, Get.day_of_birth])
    q2
end
=#

Calculations across the rows of the partitions are performed by window functions.

q = From(person) |>
    Partition(Get.gender_concept_id) |>
    Select(Get.person_id, Agg.row_number())

display(q)
#=>
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |> Partition(Get.gender_concept_id),
    q3 = q2 |> Select(Get.person_id, Agg.row_number())
    q3
end
=#

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  (row_number() OVER (PARTITION BY "person_1"."gender_concept_id")) AS "row_number"
FROM "person" AS "person_1"
=#

q = From(visit_occurrence) |>
    Partition(Get.person_id) |>
    Where(Get.visit_start_date .- Agg.min(Get.visit_start_date, filter = Get.visit_start_date .< Get.visit_end_date) .> 30) |>
    Select(Get.person_id, Get.visit_start_date)

print(render(q))
#=>
SELECT
  "visit_occurrence_2"."person_id",
  "visit_occurrence_2"."visit_start_date"
FROM (
  SELECT
    "visit_occurrence_1"."person_id",
    "visit_occurrence_1"."visit_start_date",
    (min("visit_occurrence_1"."visit_start_date") FILTER (WHERE ("visit_occurrence_1"."visit_start_date" < "visit_occurrence_1"."visit_end_date")) OVER (PARTITION BY "visit_occurrence_1"."person_id")) AS "min"
  FROM "visit_occurrence" AS "visit_occurrence_1"
) AS "visit_occurrence_2"
WHERE (("visit_occurrence_2"."visit_start_date" - "visit_occurrence_2"."min") > 30)
=#

A partition may specify the window frame.

q = From(person) |>
    Group(Get.year_of_birth) |>
    Partition(order_by = [Get.year_of_birth],
              frame = (mode = :range, start = -1, finish = 1)) |>
    Select(Get.year_of_birth, Agg.avg(Agg.count()))

display(q)
#=>
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |> Group(Get.year_of_birth),
    q3 = q2 |>
         Partition(order_by = [Get.year_of_birth],
                   frame = (mode = :RANGE, start = -1, finish = 1)),
    q4 = q3 |> Select(Get.year_of_birth, Agg.avg(Agg.count()))
    q4
end
=#

print(render(q))
#=>
SELECT
  "person_1"."year_of_birth",
  (avg(count(*)) OVER (ORDER BY "person_1"."year_of_birth" RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING)) AS "avg"
FROM "person" AS "person_1"
GROUP BY "person_1"."year_of_birth"
=#

A window frame can be specified in @funsql notation.

q = @funsql partition(order_by = [year_of_birth], frame = groups)

display(q)
#-> Partition(order_by = [Get.year_of_birth], frame = :GROUPS)

q = @funsql partition(order_by = [year_of_birth], frame = (mode = range, start = -1, finish = 1))

display(q)
#=>
Partition(order_by = [Get.year_of_birth],
          frame = (mode = :RANGE, start = -1, finish = 1))
=#

q = @funsql partition(; order_by = [year_of_birth], frame = (mode = range, start = -Inf, finish = Inf, exclude = current_row))

display(q)
#=>
Partition(
    order_by = [Get.year_of_birth],
    frame =
        (mode = :RANGE, start = -Inf, finish = Inf, exclude = :CURRENT_ROW))
=#

Partition may assign an explicit name to the partition.

q = From(person) |>
    Group(Get.gender_concept_id) |>
    Partition(name = :all) |>
    Define(:pct => 100 .* Agg.count() ./ (Get.all |> Agg.sum(Agg.count())))

display(q)
#=>
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |> Group(Get.gender_concept_id),
    q3 = q2 |> Partition(name = :all),
    q4 = q3 |>
         Define(Fun."/"(Fun."*"(100, Agg.count()),
                        Get.all |> Agg.sum(Agg.count())) |>
                As(:pct))
    q4
end
=#

print(render(q))
#=>
SELECT
  "person_2"."gender_concept_id",
  ((100 * "person_2"."count") / (sum("person_2"."count") OVER ())) AS "pct"
FROM (
  SELECT
    "person_1"."gender_concept_id",
    count(*) AS "count"
  FROM "person" AS "person_1"
  GROUP BY "person_1"."gender_concept_id"
) AS "person_2"
=#

This name may shadow an existing column.

q = From(location) |>
    Partition(Get.location_id, name = :location_id)

print(render(q))
#=>
SELECT
  "location_1"."city",
  "location_1"."state"
FROM "location" AS "location_1"
=#

It is common to use several Partition nodes in a row like in the following example which calculates non-overlapping visits.

q = From(visit_occurrence) |>
    Partition(Get.person_id,
              order_by = [Get.visit_start_date],
              frame = (mode = :rows, start = -Inf, finish = -1)) |>
    Define(:boundary => Agg.max(Get.visit_end_date)) |>
    Define(:gap => Get.visit_start_date .- Get.boundary) |>
    Define(:new => Fun.case(Get.gap .<= 0, 0, 1)) |>
    Partition(Get.person_id,
              order_by = [Get.visit_start_date, .- Get.new],
              frame = :rows) |>
    Define(:group => Agg.sum(Get.new)) |>
    Group(Get.person_id, Get.group) |>
    Define(:start_date => Agg.min(Get.visit_start_date),
           :end_date => Agg.max(Get.visit_end_date)) |>
    Select(Get.person_id, Get.start_date, Get.end_date)

print(render(q))
#=>
SELECT
  "visit_occurrence_3"."person_id",
  min("visit_occurrence_3"."visit_start_date") AS "start_date",
  max("visit_occurrence_3"."visit_end_date") AS "end_date"
FROM (
  SELECT
    "visit_occurrence_2"."person_id",
    (sum("visit_occurrence_2"."new") OVER (PARTITION BY "visit_occurrence_2"."person_id" ORDER BY "visit_occurrence_2"."visit_start_date", (- "visit_occurrence_2"."new") ROWS UNBOUNDED PRECEDING)) AS "group",
    "visit_occurrence_2"."visit_start_date",
    "visit_occurrence_2"."visit_end_date"
  FROM (
    SELECT
      "visit_occurrence_1"."person_id",
      "visit_occurrence_1"."visit_start_date",
      "visit_occurrence_1"."visit_end_date",
      (CASE WHEN (("visit_occurrence_1"."visit_start_date" - (max("visit_occurrence_1"."visit_end_date") OVER (PARTITION BY "visit_occurrence_1"."person_id" ORDER BY "visit_occurrence_1"."visit_start_date" ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING))) <= 0) THEN 0 ELSE 1 END) AS "new"
    FROM "visit_occurrence" AS "visit_occurrence_1"
  ) AS "visit_occurrence_2"
) AS "visit_occurrence_3"
GROUP BY
  "visit_occurrence_3"."person_id",
  "visit_occurrence_3"."group"
=#

Join

The Join constructor creates a subquery that correlates two nested subqueries.

q = From(person) |>
    Join(:location => From(location),
         on = Get.location_id .== Get.location.location_id,
         left = true)
#-> (…) |> Join(…)

display(q)
#=>
let person = SQLTable(:person, …),
    location = SQLTable(:location, …),
    q1 = From(person),
    q2 = From(location),
    q3 = q1 |>
         Join(q2 |> As(:location),
              Fun."="(Get.location_id, Get.location.location_id),
              left = true)
    q3
end
=#

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
LEFT JOIN "location" AS "location_1" ON ("person_1"."location_id" = "location_1"."location_id")
=#

LEFT JOIN is commonly used and has its own constructor.

q = From(person) |>
    LeftJoin(:location => From(location),
             on = Get.location_id .== Get.location.location_id)

display(q)
#=>
let person = SQLTable(:person, …),
    location = SQLTable(:location, …),
    q1 = From(person),
    q2 = From(location),
    q3 = q1 |>
         Join(q2 |> As(:location),
              Fun."="(Get.location_id, Get.location.location_id),
              left = true)
    q3
end
=#

Various Join nodes can be created with @funsql notation.

q = @funsql begin
    from(person)
    join(location => from(location),
         on = location_id == location.location_id,
         left = true)
end

display(q)
#=>
let q1 = From(:person),
    q2 = From(:location),
    q3 = q1 |>
         Join(q2 |> As(:location),
              Fun."="(Get.location_id, Get.location.location_id),
              left = true)
    q3
end
=#

q = @funsql begin
    from(person)
    left_join(location => from(location),
              location_id == location.location_id)
end

display(q)
#=>
let q1 = From(:person),
    q2 = From(:location),
    q3 = q1 |>
         Join(q2 |> As(:location),
              Fun."="(Get.location_id, Get.location.location_id),
              left = true)
    q3
end
=#

q = @funsql begin
    from(person)
    cross_join(other => from(person))
end

display(q)
#=>
let q1 = From(:person),
    q2 = From(:person),
    q3 = q1 |> Join(q2 |> As(:other), true)
    q3
end
=#

Nested subqueries that are combined with Join may fail to collapse.

q = From(person) |>
    Where(Get.year_of_birth .> 1970) |>
    Join(:location => From(location) |>
                      Where(Get.state .== "IL"),
         on = (Get.location_id .== Get.location.location_id)) |>
    Select(Get.person_id, Get.location.city)

print(render(q))
#=>
SELECT
  "person_2"."person_id",
  "location_2"."city"
FROM (
  SELECT
    "person_1"."person_id",
    "person_1"."location_id"
  FROM "person" AS "person_1"
  WHERE ("person_1"."year_of_birth" > 1970)
) AS "person_2"
JOIN (
  SELECT
    "location_1"."city",
    "location_1"."location_id"
  FROM "location" AS "location_1"
  WHERE ("location_1"."state" = 'IL')
) AS "location_2" ON ("person_2"."location_id" = "location_2"."location_id")
=#

An outer Join does not collapse its branches when doing so may change the values of unmatched rows.

join_q(; left = false, right = false) =
    From(:cohort1) |> Define(:n_cohort => 1) |> As(:cohort1) |>
    Join(From(:cohort2) |> Define(:n_cohort => 1) |> As(:cohort2),
         on = Get.cohort1.person_id .== Get.cohort2.person_id,
         left = left,
         right = right) |>
    Select(:person_id => Fun.coalesce(Get.cohort1.person_id, Get.cohort2.person_id),
           :n_cohort => Fun.coalesce(Get.cohort1.n_cohort, 0) .+ Fun.coalesce(Get.cohort2.n_cohort, 0)) |>
    With(:cohort1 => From(person) |> Where(Get.year_of_birth .> 1970),
         :cohort2 => From(person) |> Where(Get.year_of_birth .< 1990))

print(render(join_q()))
#=>
WITH "cohort1_1" ("person_id") AS (
  SELECT "person_1"."person_id"
  FROM "person" AS "person_1"
  WHERE ("person_1"."year_of_birth" > 1970)
),
"cohort2_1" ("person_id") AS (
  SELECT "person_2"."person_id"
  FROM "person" AS "person_2"
  WHERE ("person_2"."year_of_birth" < 1990)
)
SELECT
  coalesce("cohort1_2"."person_id", "cohort2_2"."person_id") AS "person_id",
  (coalesce(1, 0) + coalesce(1, 0)) AS "n_cohort"
FROM "cohort1_1" AS "cohort1_2"
JOIN "cohort2_1" AS "cohort2_2" ON ("cohort1_2"."person_id" = "cohort2_2"."person_id")
=#

print(render(join_q(left = true)))
#=>
WITH "cohort1_1" ("person_id") AS (
  SELECT "person_1"."person_id"
  FROM "person" AS "person_1"
  WHERE ("person_1"."year_of_birth" > 1970)
),
"cohort2_1" ("person_id") AS (
  SELECT "person_2"."person_id"
  FROM "person" AS "person_2"
  WHERE ("person_2"."year_of_birth" < 1990)
)
SELECT
  coalesce("cohort1_2"."person_id", "cohort2_3"."person_id") AS "person_id",
  (coalesce(1, 0) + coalesce("cohort2_3"."n_cohort", 0)) AS "n_cohort"
FROM "cohort1_1" AS "cohort1_2"
LEFT JOIN (
  SELECT
    "cohort2_2"."person_id",
    1 AS "n_cohort"
  FROM "cohort2_1" AS "cohort2_2"
) AS "cohort2_3" ON ("cohort1_2"."person_id" = "cohort2_3"."person_id")
=#

print(render(join_q(right = true)))
#=>
WITH "cohort1_1" ("person_id") AS (
  SELECT "person_1"."person_id"
  FROM "person" AS "person_1"
  WHERE ("person_1"."year_of_birth" > 1970)
),
"cohort2_1" ("person_id") AS (
  SELECT "person_2"."person_id"
  FROM "person" AS "person_2"
  WHERE ("person_2"."year_of_birth" < 1990)
)
SELECT
  coalesce("cohort1_3"."person_id", "cohort2_2"."person_id") AS "person_id",
  (coalesce("cohort1_3"."n_cohort", 0) + coalesce(1, 0)) AS "n_cohort"
FROM (
  SELECT
    "cohort1_2"."person_id",
    1 AS "n_cohort"
  FROM "cohort1_1" AS "cohort1_2"
) AS "cohort1_3"
RIGHT JOIN "cohort2_1" AS "cohort2_2" ON ("cohort1_3"."person_id" = "cohort2_2"."person_id")
=#

print(render(join_q(left = true, right = true)))
#=>
WITH "cohort1_1" ("person_id") AS (
  SELECT "person_1"."person_id"
  FROM "person" AS "person_1"
  WHERE ("person_1"."year_of_birth" > 1970)
),
"cohort2_1" ("person_id") AS (
  SELECT "person_2"."person_id"
  FROM "person" AS "person_2"
  WHERE ("person_2"."year_of_birth" < 1990)
)
SELECT
  coalesce("cohort1_3"."person_id", "cohort2_3"."person_id") AS "person_id",
  (coalesce("cohort1_3"."n_cohort", 0) + coalesce("cohort2_3"."n_cohort", 0)) AS "n_cohort"
FROM (
  SELECT
    "cohort1_2"."person_id",
    1 AS "n_cohort"
  FROM "cohort1_1" AS "cohort1_2"
) AS "cohort1_3"
FULL JOIN (
  SELECT
    "cohort2_2"."person_id",
    1 AS "n_cohort"
  FROM "cohort2_1" AS "cohort2_2"
) AS "cohort2_3" ON ("cohort1_3"."person_id" = "cohort2_3"."person_id")
=#

Join can be applied to correlated subqueries.

ql(person_id) =
    From(visit_occurrence) |>
    Where(Get.person_id .== Var.PERSON_ID) |>
    Partition(order_by = [Get.visit_start_date]) |>
    Where(Agg.row_number() .== 1) |>
    Bind(:PERSON_ID => person_id)

print(render(ql(1)))
#=>
SELECT
  "visit_occurrence_2"."visit_occurrence_id",
  "visit_occurrence_2"."person_id",
  "visit_occurrence_2"."visit_start_date",
  "visit_occurrence_2"."visit_end_date"
FROM (
  SELECT
    "visit_occurrence_1"."visit_occurrence_id",
    "visit_occurrence_1"."person_id",
    "visit_occurrence_1"."visit_start_date",
    "visit_occurrence_1"."visit_end_date",
    (row_number() OVER (ORDER BY "visit_occurrence_1"."visit_start_date")) AS "row_number"
  FROM "visit_occurrence" AS "visit_occurrence_1"
  WHERE ("visit_occurrence_1"."person_id" = 1)
) AS "visit_occurrence_2"
WHERE ("visit_occurrence_2"."row_number" = 1)
=#

q = From(person) |>
    Join(:visit => ql(Get.person_id), on = true) |>
    Select(Get.person_id,
           Get.visit.visit_occurrence_id,
           Get.visit.visit_start_date)

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  "visit_1"."visit_occurrence_id",
  "visit_1"."visit_start_date"
FROM "person" AS "person_1"
CROSS JOIN LATERAL (
  SELECT
    "visit_occurrence_2"."visit_occurrence_id",
    "visit_occurrence_2"."visit_start_date"
  FROM (
    SELECT
      "visit_occurrence_1"."visit_occurrence_id",
      "visit_occurrence_1"."visit_start_date",
      (row_number() OVER (ORDER BY "visit_occurrence_1"."visit_start_date")) AS "row_number"
    FROM "visit_occurrence" AS "visit_occurrence_1"
    WHERE ("visit_occurrence_1"."person_id" = "person_1"."person_id")
  ) AS "visit_occurrence_2"
  WHERE ("visit_occurrence_2"."row_number" = 1)
) AS "visit_1"
=#

The LATERAL keyword is omitted when the join branch is reduced to a function call.

q = From(concept) |>
Join(
    From(Fun.string_to_table(Get.concept_name, " "), columns = [:word]),
    on = true) |>
Group(Get.word)

print(render(q))
#=>
SELECT DISTINCT "string_to_table_1"."word"
FROM "concept" AS "concept_1"
CROSS JOIN string_to_table("concept_1"."concept_name", ' ') AS "string_to_table_1" ("word")
=#

Some database backends require LATERAL even in this case.

print(render(q, dialect = :spark))
#=>
SELECT DISTINCT `string_to_table_1`.`word`
FROM `concept` AS `concept_1`
CROSS JOIN LATERAL string_to_table(`concept_1`.`concept_name`, ' ') AS `string_to_table_1` (`word`)
=#

An optional Join is omitted when the output contains no data from its right branch.

q = From(person) |>
    LeftJoin(:location => From(location),
             on = Get.location_id .== Get.location.location_id,
             optional = true)

display(q)
#=>
let person = SQLTable(:person, …),
    location = SQLTable(:location, …),
    q1 = From(person),
    q2 = From(location),
    q3 = q1 |>
         Join(q2 |> As(:location),
              Fun."="(Get.location_id, Get.location.location_id),
              left = true,
              optional = true)
    q3
end
=#

print(render(q |> Select(Get.year_of_birth)))
#=>
SELECT "person_1"."year_of_birth"
FROM "person" AS "person_1"
=#

print(render(q |> Select(Get.year_of_birth, Get.location.state)))
#=>
SELECT
  "person_1"."year_of_birth",
  "location_1"."state"
FROM "person" AS "person_1"
LEFT JOIN "location" AS "location_1" ON ("person_1"."location_id" = "location_1"."location_id")
=#

Order

The Order constructor creates a subquery for sorting the data.

q = From(person) |>
    Order(Get.year_of_birth)
#-> (…) |> Order(…)

display(q)
#=>
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |> Order(Get.year_of_birth)
    q2
end
=#

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
ORDER BY "person_1"."year_of_birth"
=#

An Order node can be created with @funsql notation.

q = @funsql begin
    from(person)
    order(year_of_birth)
end

display(q)
#=>
let q1 = From(:person),
    q2 = q1 |> Order(Get.year_of_birth)
    q2
end
=#

Order is often used together with Limit.

q = From(person) |>
    Order(Get.year_of_birth) |>
    Limit(10) |>
    Order(Get.person_id)

print(render(q))
#=>
SELECT
  "person_2"."person_id",
  ⋮
  "person_2"."location_id"
FROM (
  SELECT
    "person_1"."person_id",
    ⋮
    "person_1"."location_id"
  FROM "person" AS "person_1"
  ORDER BY "person_1"."year_of_birth"
  FETCH FIRST 10 ROWS ONLY
) AS "person_2"
ORDER BY "person_2"."person_id"
=#

An Order without columns to sort by is a no-op.

q = From(person) |>
    Order(by = [])
#-> (…) |> Order(by = [])

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
=#

It is possible to specify ascending or descending order of the sort column.

q = From(person) |>
    Order(Get.year_of_birth |> Desc(nulls = :first),
          Get.person_id |> Asc())

display(q)
#=>
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |>
         Order(Get.year_of_birth |> Desc(nulls = :NULLS_FIRST),
               Get.person_id |> Asc())
    q2
end
=#

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
ORDER BY
  "person_1"."year_of_birth" DESC NULLS FIRST,
  "person_1"."person_id" ASC
=#

A generic Sort constructor could also be used for this purpose.

q = From(person) |>
    Order(Get.year_of_birth |> Sort(:desc, nulls = :first),
          Get.person_id |> Sort(:asc))

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
ORDER BY
  "person_1"."year_of_birth" DESC NULLS FIRST,
  "person_1"."person_id" ASC
=#

Sort decorations can be created with @funsql.

q = @funsql begin
    from(person)
    order(year_of_birth.desc(nulls = first), person_id.asc())
end

display(q)
#=>
let q1 = From(:person),
    q2 = q1 |>
         Order(Get.year_of_birth |> Desc(nulls = :NULLS_FIRST),
               Get.person_id |> Asc())
    q2
end
=#

q = @funsql begin
    from(person)
    order(year_of_birth.sort(desc, nulls = first), person_id.sort(asc))
end

display(q)
#=>
let q1 = From(:person),
    q2 = q1 |>
         Order(Get.year_of_birth |> Desc(nulls = :NULLS_FIRST),
               Get.person_id |> Asc())
    q2
end
=#

Limit

The Limit constructor creates a subquery that takes a fixed-size slice of the dataset.

q = From(person) |>
    Order(Get.person_id) |>
    Limit(10)
#-> (…) |> Limit(10)

display(q)
#=>
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |> Order(Get.person_id),
    q3 = q2 |> Limit(10)
    q3
end
=#

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
ORDER BY "person_1"."person_id"
FETCH FIRST 10 ROWS ONLY
=#

Both the offset and the limit can be specified.

q = From(person) |>
    Order(Get.person_id) |>
    Limit(100, 10)

display(q)
#=>
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |> Order(Get.person_id),
    q3 = q2 |> Limit(100, 10)
    q3
end
=#

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
ORDER BY "person_1"."person_id"
OFFSET 100 ROWS
FETCH NEXT 10 ROWS ONLY
=#

q = From(person) |>
    Order(Get.person_id) |>
    Limit(101:110)

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
ORDER BY "person_1"."person_id"
OFFSET 100 ROWS
FETCH NEXT 10 ROWS ONLY
=#

q = From(person) |>
    Limit(offset = 100) |>
    Limit(limit = 10)

print(render(q))
#=>
SELECT
  "person_2"."person_id",
  ⋮
  "person_2"."location_id"
FROM (
  SELECT
    "person_1"."person_id",
    ⋮
    "person_1"."location_id"
  FROM "person" AS "person_1"
  OFFSET 100 ROWS
) AS "person_2"
FETCH FIRST 10 ROWS ONLY
=#

q = From(person) |>
    Limit()

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
=#

A Limit node can be created with @funsql notation.

q = @funsql from(person).order(person_id).limit(10)

display(q)
#=>
let q1 = From(:person),
    q2 = q1 |> Order(Get.person_id),
    q3 = q2 |> Limit(10)
    q3
end
=#

q = @funsql from(person).order(person_id).limit(100, 10)

display(q)
#=>
let q1 = From(:person),
    q2 = q1 |> Order(Get.person_id),
    q3 = q2 |> Limit(100, 10)
    q3
end
=#

q = @funsql from(person).order(person_id).limit(101:110)

display(q)
#=>
let q1 = From(:person),
    q2 = q1 |> Order(Get.person_id),
    q3 = q2 |> Limit(100, 10)
    q3
end
=#

Select

The Select constructor creates a subquery that fixes the output columns.

q = From(person) |>
    Select(Get.person_id)
#-> (…) |> Select(…)

display(q)
#=>
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |> Select(Get.person_id)
    q2
end
=#

print(render(q))
#=>
SELECT "person_1"."person_id"
FROM "person" AS "person_1"
=#

A Select node can be created with @funsql notation.

q = @funsql from(person).select(person_id)

display(q)
#=>
let q1 = From(:person),
    q2 = q1 |> Select(Get.person_id)
    q2
end
=#

Select does not have to be the last subquery in a chain, but it always creates a complete subquery.

q = From(person) |>
    Select(Get.year_of_birth) |>
    Where(Fun.">"(Get.year_of_birth, 2000))

print(render(q))
#=>
SELECT "person_2"."year_of_birth"
FROM (
  SELECT "person_1"."year_of_birth"
  FROM "person" AS "person_1"
) AS "person_2"
WHERE ("person_2"."year_of_birth" > 2000)
=#

Select requires all columns in the list to have unique aliases.

q = From(person) |>
    Select(Get.person_id, Get.person_id)
#=>
ERROR: FunSQL.DuplicateLabelError: `person_id` is used more than once in:
Select(Get.person_id, Get.person_id)
=#

Where

The Where constructor creates a subquery that filters by the given condition.

q = From(person) |>
    Where(Fun.">"(Get.year_of_birth, 2000))
#-> (…) |> Where(…)

display(q)
#=>
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |> Where(Fun.">"(Get.year_of_birth, 2000))
    q2
end
=#

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
WHERE ("person_1"."year_of_birth" > 2000)
=#

A Where node can be created with @funsql notation.

q = @funsql from(person).filter(year_of_birth > 2000)

display(q)
#=>
let q1 = From(:person),
    q2 = q1 |> Where(Fun.">"(Get.year_of_birth, 2000))
    q2
end
=#

Several Where operations in a row are collapsed to a single WHERE clause.

q = From(person) |>
    Where(Fun.">"(Get.year_of_birth, 2000)) |>
    Where(Fun."<"(Get.year_of_birth, 2020)) |>
    Where(Fun."<>"(Get.year_of_birth, 2010))

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
WHERE
  ("person_1"."year_of_birth" > 2000) AND
  ("person_1"."year_of_birth" < 2020) AND
  ("person_1"."year_of_birth" <> 2010)
=#

q = From(person) |>
    Where(Get.year_of_birth .!= 2010) |>
    Where(Fun.and(Get.year_of_birth .> 2000, Get.year_of_birth .< 2020))

print(render(q))
#=>
SELECT
  "person_1"."person_id",
  ⋮
  "person_1"."location_id"
FROM "person" AS "person_1"
WHERE
  ("person_1"."year_of_birth" <> 2010) AND
  ("person_1"."year_of_birth" > 2000) AND
  ("person_1"."year_of_birth" < 2020)
=#

Where that follows Group subquery is transformed to a HAVING clause.

q = From(person) |>
    Group(Get.year_of_birth) |>
    Where(Agg.count() .> 10)

print(render(q))
#=>
SELECT "person_1"."year_of_birth"
FROM "person" AS "person_1"
GROUP BY "person_1"."year_of_birth"
HAVING (count(*) > 10)
=#

q = From(person) |>
    Group(Get.gender_concept_id) |>
    Where(Agg.count(filter = Get.year_of_birth .== 2010) .> 10) |>
    Where(Agg.count(filter = Get.year_of_birth .== 2000) .< 100) |>
    Where(Fun.and(Agg.count(filter = Get.year_of_birth .== 1933) .!= 33,
                  Agg.count(filter = Get.year_of_birth .== 1966) .!= 66))

print(render(q))
#=>
SELECT "person_1"."gender_concept_id"
FROM "person" AS "person_1"
GROUP BY "person_1"."gender_concept_id"
HAVING
  ((count(*) FILTER (WHERE ("person_1"."year_of_birth" = 2010))) > 10) AND
  ((count(*) FILTER (WHERE ("person_1"."year_of_birth" = 2000))) < 100) AND
  ((count(*) FILTER (WHERE ("person_1"."year_of_birth" = 1933))) <> 33) AND
  ((count(*) FILTER (WHERE ("person_1"."year_of_birth" = 1966))) <> 66)
=#

q = From(person) |>
    Group(Get.gender_concept_id) |>
    Where(Fun.or(Agg.count(filter = Get.year_of_birth .== 2010) .> 10,
                 Agg.count(filter = Get.year_of_birth .== 2000) .< 100))

print(render(q))
#=>
SELECT "person_1"."gender_concept_id"
FROM "person" AS "person_1"
GROUP BY "person_1"."gender_concept_id"
HAVING
  ((count(*) FILTER (WHERE ("person_1"."year_of_birth" = 2010))) > 10) OR
  ((count(*) FILTER (WHERE ("person_1"."year_of_birth" = 2000))) < 100)
=#

Highlighting

To highlight a node on the output, wrap it with Highlight.

q = From(person) |>
    Highlight(:underline) |>
    Where(Fun.">"(Get.year_of_birth |> Highlight(:bold), 2000) |>
          Highlight(:white)) |>
    Select(Get.person_id) |>
    Highlight(:green)
#-> (…) |> Highlight(:green)

When the query is displayed on a color terminal, the affected node is highlighted.

display(q)
#=>
let person = SQLTable(:person, …),
    q1 = From(person),
    q2 = q1 |> Where(Fun.">"(Get.year_of_birth, 2000)),
    q3 = q2 |> Select(Get.person_id)
    q3
end
=#

The Highlight node does not otherwise affect processing of the query.

print(render(q))
#=>
SELECT "person_1"."person_id"
FROM "person" AS "person_1"
WHERE ("person_1"."year_of_birth" > 2000)
=#

A Highlight node can be created with @funsql notation.

q = @funsql from(person).highlight(red)

display(q)
#=>
let q1 = From(:person)
    q1
end
=#

Debugging

Enable debug logging to get some insight on how FunSQL translates a query object into SQL. Set the JULIA_DEBUG environment variable to the name of a translation stage and render() will print the result of this stage.

Consider the following query.

q = From(person) |>
    Where(Get.year_of_birth .<= 2000) |>
    Join(:location => From(location) |>
                      Where(Get.state .== "IL"),
         on = (Get.location_id .== Get.location.location_id)) |>
    Join(:visit_group => From(visit_occurrence) |>
                         Group(Get.person_id),
         on = (Get.person_id .== Get.visit_group.person_id),
         left = true) |>
    Select(Get.person_id,
           :max_visit_start_date =>
               Get.visit_group |> Agg.max(Get.visit_start_date))

At the first stage of the translation, render() resolves table references and determines node types.

#? VERSION >= v"1.7"    # https://github.com/JuliaLang/julia/issues/26798
withenv("JULIA_DEBUG" => "FunSQL.resolve") do
    render(q)
end;
#=>
┌ Debug: FunSQL.resolve
│ let person = SQLTable(:person, …),
│     location = SQLTable(:location, …),
│     visit_occurrence = SQLTable(:visit_occurrence, …),
│     q1 = FromTable(table = person),
│     q2 = Resolved(RowType(:person_id => ScalarType(),
│                           :gender_concept_id => ScalarType(),
│                           :year_of_birth => ScalarType(),
│                           :month_of_birth => ScalarType(),
│                           :day_of_birth => ScalarType(),
│                           :birth_datetime => ScalarType(),
│                           :location_id => ScalarType()),
│                   over = q1) |>
│          Where(Resolved(ScalarType(),
│                         over = Fun."<="(Resolved(ScalarType(),
│                                                  over = Get.year_of_birth),
│                                         Resolved(ScalarType(), over = 2000)))),
⋮
│     WithContext(over = Resolved(RowType(:person_id => ScalarType(),
│                                         :max_visit_start_date => ScalarType()),
│                                 over = q9),
│                 catalog = SQLCatalog(dialect = SQLDialect(), cache = nothing))
│ end
└ @ FunSQL …
=#

Next, render() determines, for each tabular node, the data that it must produce.

#? VERSION >= v"1.7"
withenv("JULIA_DEBUG" => "FunSQL.link") do
    render(q)
end;
#=>
┌ Debug: FunSQL.link
│ let person = SQLTable(:person, …),
│     location = SQLTable(:location, …),
│     visit_occurrence = SQLTable(:visit_occurrence, …),
│     q1 = FromTable(table = person),
│     q2 = Get.person_id,
│     q3 = Get.person_id,
│     q4 = Get.location_id,
│     q5 = Get.year_of_birth,
│     q6 = Linked([q2, q3, q4, q5], 3, over = q1),
⋮
│     WithContext(over = q33,
│                 catalog = SQLCatalog(dialect = SQLDialect(), cache = nothing))
│ end
└ @ FunSQL …
=#

On the next stage, the query object is converted to a SQL syntax tree.

#? VERSION >= v"1.7"
withenv("JULIA_DEBUG" => "FunSQL.translate") do
    render(q)
end;
#=>
┌ Debug: FunSQL.translate
│ WITH_CONTEXT(
│     over = ID(:person) |>
│            AS(:person_1) |>
│            FROM() |>
│            WHERE(FUN("<=", ID(:person_1) |> ID(:year_of_birth), LIT(2000))) |>
│            SELECT(ID(:person_1) |> ID(:person_id),
│                   ID(:person_1) |> ID(:location_id)) |>
│            AS(:person_2) |>
│            FROM() |>
│            JOIN(ID(:location) |>
│                 AS(:location_1) |>
│                 FROM() |>
│                 WHERE(FUN("=", ID(:location_1) |> ID(:state), LIT("IL"))) |>
│                 SELECT(ID(:location_1) |> ID(:location_id)) |>
│                 AS(:location_2),
│                 FUN("=",
│                     ID(:person_2) |> ID(:location_id),
│                     ID(:location_2) |> ID(:location_id))) |>
│            JOIN(ID(:visit_occurrence) |>
│                 AS(:visit_occurrence_1) |>
│                 FROM() |>
│                 GROUP(ID(:visit_occurrence_1) |> ID(:person_id)) |>
│                 SELECT(AGG("max",
│                            ID(:visit_occurrence_1) |> ID(:visit_start_date)) |>
│                        AS(:max),
│                        ID(:visit_occurrence_1) |> ID(:person_id)) |>
│                 AS(:visit_group_1),
│                 FUN("=",
│                     ID(:person_2) |> ID(:person_id),
│                     ID(:visit_group_1) |> ID(:person_id)),
│                 left = true) |>
│            SELECT(ID(:person_2) |> ID(:person_id),
│                   ID(:visit_group_1) |> ID(:max) |> AS(:max_visit_start_date)),
│     columns = [SQLColumn(:person_id), SQLColumn(:max_visit_start_date)])
└ @ FunSQL …
=#

Finally, the SQL tree is serialized into SQL.

#? VERSION >= v"1.7"
withenv("JULIA_DEBUG" => "FunSQL.serialize") do
    render(q)
end;
#=>
┌ Debug: FunSQL.serialize
│ SQLString(
│     """
│     SELECT
│       "person_2"."person_id",
│       "visit_group_1"."max" AS "max_visit_start_date"
│     FROM (
│       SELECT
│         "person_1"."person_id",
│         "person_1"."location_id"
│       FROM "person" AS "person_1"
│       WHERE ("person_1"."year_of_birth" <= 2000)
│     ) AS "person_2"
│     JOIN (
│       SELECT "location_1"."location_id"
│       FROM "location" AS "location_1"
│       WHERE ("location_1"."state" = 'IL')
│     ) AS "location_2" ON ("person_2"."location_id" = "location_2"."location_id")
│     LEFT JOIN (
│       SELECT
│         max("visit_occurrence_1"."visit_start_date") AS "max",
│         "visit_occurrence_1"."person_id"
│       FROM "visit_occurrence" AS "visit_occurrence_1"
│       GROUP BY "visit_occurrence_1"."person_id"
│     ) AS "visit_group_1" ON ("person_2"."person_id" = "visit_group_1"."person_id")""",
│     columns = [SQLColumn(:person_id), SQLColumn(:max_visit_start_date)])
└ @ FunSQL …
=#