From a367bb9fc9674f9ea492e8dbd787bcfda136ca4e Mon Sep 17 00:00:00 2001 From: Judith Silverman Date: Wed, 17 Jun 2026 09:08:29 -0700 Subject: [PATCH 1/3] IRSA-7700: Add support for SQL WINDOW function --- src/Query/ADQL_parser.hxx | 4 + .../ADQL_parser/ADQL_parser/init_factor.cxx | 24 ++++- .../ADQL_parser/init_reserved_words.cxx | 5 +- .../Numeric_Value_Function.hxx | 3 +- .../Numeric_Value_Function/empty.cxx | 1 + .../Numeric_Value_Function/ostream.cxx | 1 + src/Query/Query_Specification/Select.hxx | 1 + .../Whitelisted_Function_Wrap.hxx | 1 + .../Query_Specification/Window_Function.hxx | 45 +++++++++ .../Window_Function_Wrap.hxx | 10 ++ test/parse_adql.cxx | 95 +++++++++++++++++++ wscript | 2 + 12 files changed, 187 insertions(+), 5 deletions(-) create mode 100644 src/Query/Query_Specification/Window_Function.hxx create mode 100644 src/Query/Query_Specification/Window_Function_Wrap.hxx diff --git a/src/Query/ADQL_parser.hxx b/src/Query/ADQL_parser.hxx index 5b1f98d..c5a1469 100644 --- a/src/Query/ADQL_parser.hxx +++ b/src/Query/ADQL_parser.hxx @@ -553,4 +553,8 @@ struct ADQL_parser : boost::spirit::qi::grammar reference_position; + + boost::spirit::qi::rule + window_function; }; diff --git a/src/Query/ADQL_parser/ADQL_parser/init_factor.cxx b/src/Query/ADQL_parser/ADQL_parser/init_factor.cxx index 88632d3..f06253d 100644 --- a/src/Query/ADQL_parser/ADQL_parser/init_factor.cxx +++ b/src/Query/ADQL_parser/ADQL_parser/init_factor.cxx @@ -256,6 +256,19 @@ void ADQL_parser::init_factor() { -('(' > unsigned_integer > ')') >> ')']; cast_function.name("cast_function"); + window_function = + hold[whitelisted_function_name >> lit('(') >> lit(')') >> + &(ascii::no_case[lit("OVER")])][at_c<0>(_val) = _1] >> + lexeme[ascii::no_case[lit("OVER")] >> &boost::spirit::qi::space] >> + lit('(') >> + lexeme[ascii::no_case[lit("PARTITION")] >> &boost::spirit::qi::space] >> + lexeme[ascii::no_case[lit("BY")] >> &boost::spirit::qi::space] >> + (value_expression % lit(','))[at_c<1>(_val) = _1] >> + lexeme[ascii::no_case[lit("ORDER")] >> &boost::spirit::qi::space] >> + lexeme[ascii::no_case[lit("BY")] >> &boost::spirit::qi::space] >> + (value_expression % lit(','))[at_c<2>(_val) = _1] >> lit(')'); + window_function.name("window_function"); + position_function %= hold[ascii::no_case["POSITION"] >> '(' >> character_string_literal >> &no_skip[boost::spirit::qi::space] >> ascii::no_case["IN"] >> @@ -263,10 +276,16 @@ void ADQL_parser::init_factor() { position_function.name("position_function"); // FIXME: numeric_value_function should have - // numeric_geometry_function + // numeric_geometry_function. + + // Note that window_function must come before whitelisted_function + // since ROW_NUMBER would otherwise be consumed by the whitelisted + // function rule first. + numeric_value_function %= trig_function | math_function | cast_function | position_function | non_predicate_geometry_function | - whitelisted_function | sql_no_arg_function; + window_function | whitelisted_function | + sql_no_arg_function; numeric_value_function.name("numeric_value_function"); // Flipped the order here, because a value_expression can match a @@ -304,6 +323,7 @@ void ADQL_parser::init_factor() { BOOST_SPIRIT_DEBUG_NODE(whitelisted_function_name); BOOST_SPIRIT_DEBUG_NODE(whitelisted_function_param); BOOST_SPIRIT_DEBUG_NODE(whitelisted_function); + BOOST_SPIRIT_DEBUG_NODE(window_function); BOOST_SPIRIT_DEBUG_NODE(cast_function); BOOST_SPIRIT_DEBUG_NODE(position_function); BOOST_SPIRIT_DEBUG_NODE(numeric_value_function); diff --git a/src/Query/ADQL_parser/ADQL_parser/init_reserved_words.cxx b/src/Query/ADQL_parser/ADQL_parser/init_reserved_words.cxx index 534bf34..05aac9c 100644 --- a/src/Query/ADQL_parser/ADQL_parser/init_reserved_words.cxx +++ b/src/Query/ADQL_parser/ADQL_parser/init_reserved_words.cxx @@ -82,8 +82,9 @@ void ADQL_parser::init_reserved_words() { ascii::no_case["PRIVILEGES"] | ascii::no_case["PRIOR"] | ascii::no_case["PRIMARY"] | ascii::no_case["PRESERVE"] | ascii::no_case["PREPARE"] | ascii::no_case["PRECISION"] | - ascii::no_case["POSITION"] | ascii::no_case["PARTIAL"] | - ascii::no_case["PAD"] | ascii::no_case["OVERLAPS"] | + ascii::no_case["POSITION"] | ascii::no_case["PARTITION"] | + ascii::no_case["PARTIAL"] | ascii::no_case["PAD"] | + ascii::no_case["OVERLAPS"] | ascii::no_case["OVER"] | ascii::no_case["OUTPUT"] | ascii::no_case["OUTER"] | ascii::no_case["ORDER"] | ascii::no_case["OR"]; diff --git a/src/Query/Query_Specification/Factor/Numeric_Primary/Numeric_Value_Function.hxx b/src/Query/Query_Specification/Factor/Numeric_Primary/Numeric_Value_Function.hxx index c479746..e70fd2d 100644 --- a/src/Query/Query_Specification/Factor/Numeric_Primary/Numeric_Value_Function.hxx +++ b/src/Query/Query_Specification/Factor/Numeric_Primary/Numeric_Value_Function.hxx @@ -4,6 +4,7 @@ #include "../../Non_Predicate_Geometry_Function.hxx" #include "../../Whitelisted_Function_Wrap.hxx" +#include "../../Window_Function_Wrap.hxx" #include "Numeric_Value_Function/Cast_Function.hxx" #include "Numeric_Value_Function/Math_Function.hxx" #include "Numeric_Value_Function/Position_Function.hxx" @@ -14,7 +15,7 @@ class Numeric_Value_Function { public: typedef boost::variant + Window_Function_Wrap, Whitelisted_Function_Wrap, std::string> Variant; Variant variant; bool empty() const; diff --git a/src/Query/Query_Specification/Factor/Numeric_Primary/Numeric_Value_Function/empty.cxx b/src/Query/Query_Specification/Factor/Numeric_Primary/Numeric_Value_Function/empty.cxx index 15d1149..a439c5a 100644 --- a/src/Query/Query_Specification/Factor/Numeric_Primary/Numeric_Value_Function/empty.cxx +++ b/src/Query/Query_Specification/Factor/Numeric_Primary/Numeric_Value_Function/empty.cxx @@ -1,5 +1,6 @@ #include "../../../../empty_variant.hxx" #include "../../../Whitelisted_Function.hxx" +#include "../../../Window_Function.hxx" #include "../Numeric_Value_Function.hxx" bool ADQL::Numeric_Value_Function::empty() const { return empty_variant(variant); } diff --git a/src/Query/Query_Specification/Factor/Numeric_Primary/Numeric_Value_Function/ostream.cxx b/src/Query/Query_Specification/Factor/Numeric_Primary/Numeric_Value_Function/ostream.cxx index 8977afa..b570e62 100644 --- a/src/Query/Query_Specification/Factor/Numeric_Primary/Numeric_Value_Function/ostream.cxx +++ b/src/Query/Query_Specification/Factor/Numeric_Primary/Numeric_Value_Function/ostream.cxx @@ -1,4 +1,5 @@ #include "../../../Whitelisted_Function.hxx" +#include "../../../Window_Function.hxx" #include "../Numeric_Value_Function.hxx" namespace ADQL { diff --git a/src/Query/Query_Specification/Select.hxx b/src/Query/Query_Specification/Select.hxx index 9c98702..a1a03f3 100644 --- a/src/Query/Query_Specification/Select.hxx +++ b/src/Query/Query_Specification/Select.hxx @@ -4,6 +4,7 @@ #include "Non_As.hxx" #include "Value_Expression_Primary.hxx" #include "Whitelisted_Function.hxx" +#include "Window_Function.hxx" namespace ADQL { class Select { diff --git a/src/Query/Query_Specification/Whitelisted_Function_Wrap.hxx b/src/Query/Query_Specification/Whitelisted_Function_Wrap.hxx index 5f9d786..b3a8583 100644 --- a/src/Query/Query_Specification/Whitelisted_Function_Wrap.hxx +++ b/src/Query/Query_Specification/Whitelisted_Function_Wrap.hxx @@ -6,5 +6,6 @@ namespace ADQL { class Whitelisted_Function; typedef boost::recursive_wrapper Whitelisted_Function_Wrap; + std::ostream &operator<<(std::ostream &os, const Whitelisted_Function_Wrap &s); } // namespace ADQL diff --git a/src/Query/Query_Specification/Window_Function.hxx b/src/Query/Query_Specification/Window_Function.hxx new file mode 100644 index 0000000..f476ddf --- /dev/null +++ b/src/Query/Query_Specification/Window_Function.hxx @@ -0,0 +1,45 @@ +#pragma once + +#include +#include +#include + +#include +#include + +#include "Value_Expression.hxx" + +namespace ADQL { +class Window_Function { +public: + std::string function_name; + std::vector partition_by; + std::vector order_by; + bool empty() const { return function_name.empty(); } +}; + +inline std::ostream &operator<<(std::ostream &os, + const ADQL::Window_Function &window_function) { + os << window_function.function_name << "() OVER (PARTITION BY "; + for (auto p = window_function.partition_by.begin(); + p != window_function.partition_by.end();) { + os << *p; + ++p; + if (p != window_function.partition_by.end()) os << ", "; + } + os << " ORDER BY "; + for (auto o = window_function.order_by.begin(); + o != window_function.order_by.end();) { + os << *o; + ++o; + if (o != window_function.order_by.end()) os << ", "; + } + os << ")"; + return os; +} +} // namespace ADQL + +BOOST_FUSION_ADAPT_STRUCT(ADQL::Window_Function, + (std::string, function_name) + (std::vector, partition_by) + (std::vector, order_by)) diff --git a/src/Query/Query_Specification/Window_Function_Wrap.hxx b/src/Query/Query_Specification/Window_Function_Wrap.hxx new file mode 100644 index 0000000..874cfd5 --- /dev/null +++ b/src/Query/Query_Specification/Window_Function_Wrap.hxx @@ -0,0 +1,10 @@ +#pragma once +#include +#include + +namespace ADQL { +class Window_Function; +typedef boost::recursive_wrapper Window_Function_Wrap; +bool empty(const Window_Function_Wrap &s); +std::ostream &operator<<(std::ostream &os, const Window_Function_Wrap &s); +} // namespace ADQL diff --git a/test/parse_adql.cxx b/test/parse_adql.cxx index fc492ee..909d678 100644 --- a/test/parse_adql.cxx +++ b/test/parse_adql.cxx @@ -811,6 +811,97 @@ int main(int argc, char *argv[]) { "tap_ancillary.irsa_directory_datatypes v ON d.collection=v.collection " "WHERE d.semantics like '%primary%' ORDER BY " "LOWER(facility_name),d.collection,instrument", + + // IRSA-7700: support for WINDOW/PARTITION/OVER + "SELECT ROW_NUMBER() Over (PARTITION BY facility, instrument ORDER BY " + "dist) FROM my_table", + + "SELECT * FROM (SELECT SIA2_MINIMAL_JOIN.*, ROW_NUMBER() OVER (PARTITION " + "BY SIA2_MINIMAL_JOIN.facility, SIA2_MINIMAL_JOIN.instrument, " + "SIA2_MINIMAL_JOIN.upload_row_id ORDER BY " + "SIA2_MINIMAL_JOIN.dist_to_point_meters) as rownum FROM SIA2_MINIMAL_JOIN) " + "ranked WHERE rownum = 1 AND DISTANCE(POINT('ICRS', ranked.ra, " + "ranked.dec), POINT('ICRS', 1.0, 2.0)) < 0.1", + + "SELECT * FROM (SELECT SIA2_MINIMAL_JOIN.*, ROW_NUMBER() OVER (PARTITION " + "BY SIA2_MINIMAL_JOIN.facility, SIA2_MINIMAL_JOIN.instrument, " + "SIA2_MINIMAL_JOIN.upload_row_id ORDER BY " + "SIA2_MINIMAL_JOIN.dist_to_point_meters) as rownum, DISTANCE(POINT('ICRS', " + "SIA2_MINIMAL_JOIN.ra, SIA2_MINIMAL_JOIN.dec), POINT('ICRS', 1.0, 2.0)) as " + "dist FROM SIA2_MINIMAL_JOIN) ranked WHERE rownum = 1", + + "WITH SIA2_MINIMAL_JOIN AS (SELECT o.obsid as obsid, p.planeid as " + "planeid, o.telescope_name as facility_name, o.instrument_name as " + "instrument_name, CAST ('1' AS BIGINT) as upload_row_id FROM " + "(caom.simulated_observation o JOIN caom.simulated_plane p ON o.obsid = " + "p.obsid) ) SELECT coord1(p.pt) as s_ra, coord2(p.pt) as s_dec, " + "SIA2_CLOSEST_SUBQUERY.dist_to_point_meters / 111194.68229846345 as " + "min_dist_to_point FROM (SELECT " + "obsid,planeid,facility_name,instrument_name,upload_row_id FROM (SELECT " + "SIA2_MINIMAL_JOIN, ROW_NUMBER() OVER (PARTITION BY " + "SIA2_MINIMAL_JOIN.facility, SIA2_MINIMAL_JOIN.instrument, " + "SIA2_MINIMAL_JOIN.upload_row_id ORDER BY " + "SIA2_MINIMAL_JOIN.dist_to_point_meters) as rownum FROM SIA2_MINIMAL_JOIN) " + "ranked WHERE rownum = 1) SIA2_CLOSEST_SUBQUERY, ((caom.observation o JOIN " + "caom.plane p ON o.obsid = p.obsid) JOIN caom.artifact a ON p.planeid = " + "a.planeid) WHERE SIA2_CLOSEST_SUBQUERY.obsid = o.obsid AND " + "SIA2_CLOSEST_SUBQUERY.planeid = p.planeid AND " + "(SIA2_CLOSEST_SUBQUERY.facility IS NULL OR SIA2_CLOSEST_SUBQUERY.facility " + "= o.telescope_name) AND (SIA2_CLOSEST_SUBQUERY.instrument IS NULL OR " + "SIA2_CLOSEST_SUBQUERY.instrument = o.instrument_name)", + + "WITH SIA2_MINIMAL_JOIN AS (SELECT o.obsid as obsid, p.planeid as planeid, " + "CAST ('1' AS BIGINT) as upload_row_id FROM (caom.simulated_observation o " + "JOIN caom.simulated_plane p ON o.obsid = p.obsid) WHERE " + "(((p.dataproducttype = 'image') OR (p.dataproducttype = 'cube'))) ) " + "SELECT coord1(p.pt) as s_ra, coord2(p.pt) as s_dec, o.telescope_name as " + "facility_name, o.instrument_name as instrument_name, a.producttype as " + "dataproduct_subtype, p.calibrationlevel as calib_level, p.dataproducttype " + "as dataproduct_type, p.energy_bandpassname as energy_bandpassname, " + "p.energy_emband as energy_emband, o.observationid as obs_id, " + "p.position_resolution as s_resolution, p.energy_bounds_lower as em_min, " + "p.energy_bounds_upper as em_max, p.energy_resolvingpower as em_res_power, " + "o.proposal_title as proposal_title, CASE WHEN position('http' IN a.uri) > " + "0 AND position('ipac.caltech.edu' IN a.uri) = 0 THEN a.uri ELSE " + "'HOST:PORT/' || strip_url_prefix(a.uri, 'https.*edu/') END as access_url, " + "a.contenttype as access_format, CAST (CEIL(a.contentlength/1000.0) AS " + "BIGINT) as access_estsize, p.time_exposure as t_exptime, CASE WHEN " + "(p.poly IS NOT NULL) THEN poly_to_region(p.poly) ELSE pt_to_region(p.pt) " + "END as s_region, o.collection as obs_collection, o.intent as obs_intent, " + "o.algorithm_name as algorithm_name, o.telescope_keywords as " + "facility_keywords, o.instrument_keywords as instrument_keywords, " + "o.environment_photometric as environment_photometric, o.proposal_id as " + "proposal_id, o.proposal_pi as proposal_pi, o.proposal_project as " + "proposal_project, o.target_name as target_name, o.target_type as " + "target_type, o.target_standard as target_standard, o.target_moving as " + "target_moving, o.target_keywords as target_keywords, p.datarelease as " + "obs_release_date, p.position_dimension_naxis1 as s_xel1, " + "p.position_dimension_naxis2 as s_xel2, p.position_samplesize as " + "s_pixel_scale, p.position_timedependent as position_timedependent, " + "p.time_bounds_lower as t_min, p.time_bounds_upper as t_max, " + "p.time_resolution as t_resolution, p.time_dimension as t_xel, " + "'ivo://irsa.ipac/' || o.collection || '?' || o.observationid || '/' || " + "p.productID as obs_publisher_did, " + "sqrt(p.position_dimension_naxis1*p.position_dimension_naxis2)*p.position_" + "samplesize/3600 as s_fov, p.energy_dimension as em_xel, " + "p.polarization_states as pol_states, p.polarization_dimension as pol_xel, " + "SIA2_cloud_access_column(o.collection, a.uri) as cloud_access, NULL::char " + "as o_ucd, SIA2_CLOSEST_SUBQUERY.upload_row_id as upload_row_id, CASE " + "a.producttype WHEN 'science' THEN 'aaa' WHEN 'calibration' THEN 'aab' " + "ELSE producttype END as irsa_temp_producttype, " + "SIA2_CLOSEST_SUBQUERY.dist_to_point_meters / 111194.68229846345 as " + "min_dist_to_point FROM (SELECT * FROM (SELECT SIA2_MINIMAL_JOIN.*, " + "ROW_NUMBER() OVER (PARTITION BY SIA2_MINIMAL_JOIN.facility, " + "SIA2_MINIMAL_JOIN.instrument, SIA2_MINIMAL_JOIN.upload_row_id ORDER BY " + "SIA2_MINIMAL_JOIN.dist_to_point_meters) as rownum FROM SIA2_MINIMAL_JOIN) " + "ranked WHERE rownum = 1) SIA2_CLOSEST_SUBQUERY, ((caom.observation o JOIN " + "caom.plane p ON o.obsid = p.obsid) JOIN caom.artifact a ON p.planeid = " + "a.planeid) WHERE SIA2_CLOSEST_SUBQUERY.obsid = o.obsid AND " + "SIA2_CLOSEST_SUBQUERY.planeid = p.planeid AND " + "(SIA2_CLOSEST_SUBQUERY.facility IS NULL OR SIA2_CLOSEST_SUBQUERY.facility " + "= o.telescope_name) AND (SIA2_CLOSEST_SUBQUERY.instrument IS NULL OR " + "SIA2_CLOSEST_SUBQUERY.instrument = o.instrument_name)", + #endif // RUN_ALL }; @@ -921,6 +1012,9 @@ int main(int argc, char *argv[]) { "SELECT bar FROM foo where (ST(ST(ST(ST(ST(ST(ST(ST(ST())))))))))", "SELECT * FROM my_table1 where sys_context('USERENV','DB_NAME')='wise1'", + "SELECT DISTINCT ON (collection) ra, dec, collection FROM my_table ORDER " + "BY collection, dist", + // IRSA-7735: retire support for table() function "WITH temp (collection, multi_type) AS (SELECT collection,mytype " "FROM table(tap_ancillary.DCE_DATATYPE('irsa_directory'))) " @@ -937,6 +1031,7 @@ int main(int argc, char *argv[]) { "WHERE semantics like '%primary%' AND " "irsa_directory.collection=temp.collection " "ORDER BY facility_name,irsa_directory.collection,instrument_name", + "SELECT RANK() OVER (PARTITION BY collection ORDER BY dist) FROM my_table", }; int result(0); diff --git a/wscript b/wscript index 669f347..14d3d25 100644 --- a/wscript +++ b/wscript @@ -114,6 +114,8 @@ def build(ctx): 'src/Query/Query_Specification/Value_Expression_Primary/Case_Expression/Case_Abbreviation/Nullif/ostream.cxx', 'src/Query/Query_Specification/Value_Expression_Primary/Case_Expression/Case_Abbreviation/Coalesce/ostream.cxx', 'src/Query/Query_Specification/Whitelisted_Function_Wrap/ostream.cxx', + 'src/Query/Query_Specification/Window_Function_Wrap/empty.cxx', + 'src/Query/Query_Specification/Window_Function_Wrap/ostream.cxx', 'src/Query/Query_Specification/Value_Expression/empty.cxx', 'src/Query/Query_Specification/Value_Expression/ostream.cxx', 'src/Query/Query_Specification/Value_Expression_Wrap/ostream.cxx', From 6f5ea1d8fc9a9733c81651a1c4a03376f8c61a74 Mon Sep 17 00:00:00 2001 From: Judith Silverman Date: Wed, 24 Jun 2026 08:28:49 -0700 Subject: [PATCH 2/3] IRSA-7700: Add support for WINDOW Take II Belatedly git-add new file --- .../Query_Specification/Window_Function_Wrap/empty.cxx | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 src/Query/Query_Specification/Window_Function_Wrap/empty.cxx diff --git a/src/Query/Query_Specification/Window_Function_Wrap/empty.cxx b/src/Query/Query_Specification/Window_Function_Wrap/empty.cxx new file mode 100644 index 0000000..fd7ff02 --- /dev/null +++ b/src/Query/Query_Specification/Window_Function_Wrap/empty.cxx @@ -0,0 +1,6 @@ +#include "../Window_Function.hxx" +#include "../Window_Function_Wrap.hxx" + +namespace ADQL { +bool empty(const Window_Function_Wrap &s) { return s.get().empty(); } +} // namespace ADQL From fc966bb580e7cfae95c45237a1dc863a2e02a787 Mon Sep 17 00:00:00 2001 From: Judith Silverman Date: Wed, 24 Jun 2026 09:07:42 -0700 Subject: [PATCH 3/3] IRSA-7700: Add support for WINDOW Take III Belatedly git-add another new file --- .../Query_Specification/Window_Function_Wrap/ostream.cxx | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 src/Query/Query_Specification/Window_Function_Wrap/ostream.cxx diff --git a/src/Query/Query_Specification/Window_Function_Wrap/ostream.cxx b/src/Query/Query_Specification/Window_Function_Wrap/ostream.cxx new file mode 100644 index 0000000..e7803d2 --- /dev/null +++ b/src/Query/Query_Specification/Window_Function_Wrap/ostream.cxx @@ -0,0 +1,9 @@ +#include "../Window_Function.hxx" +#include "../Window_Function_Wrap.hxx" + +namespace ADQL { +std::ostream &operator<<(std::ostream &os, + const ADQL::Window_Function_Wrap &wrap) { + return os << wrap.get(); +} +} // namespace ADQL