Selecting substring with different starting location

(I’m not sure if I’m using the correct title… please let me know if you think it should be changed to something else)

Hi, I’m trying to select substring with different starting location.

I have a table like this,

Req:

WITH 
  Req (COURSE_NUM, MAJOR) AS
(
VALUES
  ('A001', 'CS1            ')  
, ('A002', 'CS2 CS1 CS3 CS4')
, ('A003', 'CS2            ')
, ('B001', 'CS3 CS1        ')  
, ('B002', 'CS2            ')
)
SELECT * FROM Req

COURSE_NUM   MAJOR
----------  -------
A001        CS1            
A002        CS2 CS1 CS3 CS4
A003        CS2            
B001        CS3 CS1        
B002        CS2            

I’m trying to get a table like this:

MAJOR  COURSE_NUM
-----  -----------
CS1     A001
CS1     A002
CS1     B001
CS2     A002
CS2     A003
CS2     B002
CS3     A002
CS3     B002
CS4     A002

After googling around, I found CHARINDEX() might be a solution. So I tried DB2 equivalent LOCATE():

Select SUBSTR(MAJOR, LOCATE('CS1', MAJOR), 3)
      ,COURSE_NUM
FROM Req
WHERE MAJOR LIKE '%CS1%'

The above query would give me a result like this:

MAJOR  COURSE_NUM
-----  -----------
CS1     A001
CS1     A002
CS1     B001

This is okay for one major. What if I want all majors? I also tried using a host variable in LOCATE() and in LIKE clause. But it raised an error:

SQLCODE = -171, ERROR:  THE DATA TYPE, LENGTH, OR VALUE OF
ARGUMENT 1 OF LOCATE IS INVALID                           

I’m not sure if the error was caused by my syntax or other restrictions.

Is there any way or workaround to accomplish this?
Please let me know if you need more information. Thanks!

Db2 for LUW

WITH 
  Req (COURSE_NUM, MAJOR) AS
(
VALUES
  ('A001', 'CS1            ')  
, ('A002', 'CS2 CS1 CS3 CS4')
, ('A003', 'CS2            ')
, ('B001', 'CS3 CS1        ')  
, ('B002', 'CS2            ')
)
SELECT T.TOKEN, R.COURSE_NUM
FROM Req R, XMLTABLE
(
'for $id in tokenize($s, " +") return <i>{string($id)}</i>' 
PASSING TRIM (R.MAJOR) as "s"
COLUMNS 
  TOKEN VARCHAR (10) PATH '.'
) T
ORDER BY T.TOKEN, R.COURSE_NUM;

DB2 for all pratforms using the REGEXP_SUBSTR function

WITH 
  Req (COURSE_NUM, MAJOR) AS
(
          SELECT 'A001', 'CS1            ' FROM SYSIBM.SYSDUMMY1
UNION ALL SELECT 'A002', 'CS2 CS1 CS3 CS4' FROM SYSIBM.SYSDUMMY1
UNION ALL SELECT 'A003', 'CS2            ' FROM SYSIBM.SYSDUMMY1
UNION ALL SELECT 'B001', 'CS3 CS1        ' FROM SYSIBM.SYSDUMMY1
UNION ALL SELECT 'B002', 'CS2            ' FROM SYSIBM.SYSDUMMY1
)
, T (OCCURENCE, COURSE_NUM, MAJOR, TOKEN) AS
(
SELECT 
  1 AS OCCURENCE, COURSE_NUM, MAJOR AS MAJOR
, REGEXP_SUBSTR (MAJOR, '[^ ]+', 1, 1) TOKEN
FROM Req
  UNION ALL
SELECT 
  OCCURENCE + 1, COURSE_NUM, MAJOR 
, REGEXP_SUBSTR (MAJOR, '[^ ]+', 1, OCCURENCE + 1) TOKEN
FROM T
WHERE REGEXP_SUBSTR (MAJOR, '[^ ]+', 1, OCCURENCE + 1) IS NOT NULL
)
SELECT TOKEN, COURSE_NUM 
FROM T
ORDER BY TOKEN, COURSE_NUM;

DB2 for all pratforms using the LOCATE function

WITH 
  Req (COURSE_NUM, MAJOR) AS
(
          SELECT 'A001', 'CS1            ' FROM SYSIBM.SYSDUMMY1
UNION ALL SELECT 'A002', 'CS2 CS1 CS3 CS4' FROM SYSIBM.SYSDUMMY1
UNION ALL SELECT 'A003', 'CS2            ' FROM SYSIBM.SYSDUMMY1
UNION ALL SELECT 'B001', 'CS3 CS1        ' FROM SYSIBM.SYSDUMMY1
UNION ALL SELECT 'B002', 'CS2            ' FROM SYSIBM.SYSDUMMY1
)
, T (COURSE_NUM, MAJOR, TOKEN) AS
(
SELECT 
  COURSE_NUM
, LTRIM (SUBSTR (MAJOR, NULLIF (LOCATE (' ', MAJOR), 0) + 1)) MAJOR
, SUBSTR (MAJOR, 1, COALESCE (NULLIF (LOCATE (' ', MAJOR), 0) - 1, LENGTH (MAJOR))) TOKEN
FROM (SELECT COURSE_NUM, TRIM (MAJOR) MAJOR FROM Req) R
  UNION ALL
SELECT 
  COURSE_NUM
, LTRIM (SUBSTR (MAJOR, NULLIF (LOCATE (' ', MAJOR), 0) + 1)) MAJOR
, SUBSTR (MAJOR, 1, COALESCE (NULLIF (LOCATE (' ', MAJOR), 0) - 1, LENGTH (MAJOR))) TOKEN
FROM T
WHERE MAJOR IS NOT NULL
)
SELECT TOKEN, COURSE_NUM 
FROM T
ORDER BY TOKEN, COURSE_NUM;

DB2 has a newish function systools.split() to do this:

select req.course_num, element
from req r cross join
     table(systools.split(major, ' ')) ;

Leave a Comment