diff --git a/SQL_production/TM_2023A_data.sql b/SQL_production/TM_2023A_data.sql
new file mode 100644
index 0000000..f05f1ae
--- /dev/null
+++ b/SQL_production/TM_2023A_data.sql
@@ -0,0 +1,434 @@
+
+-----------------------------------------------------------------------------
+-----------------------------------------------------------------------------
+----						Data for TMs			                     ----
+----                                                                     ----
+----  This code creates the data download tables and the front end data  ----
+----                                                                     ----
+---- It has been updated for 2022 to include size in the breakdowns and  ----
+----  exam cohort - this made a difference to VRQ3 sports studies where  ----
+----  different gnumbers were classed as either applied general or tech  ----
+----   level. The front end has also had an update on 2021 to include    ----
+---- qualification type to disaggregate A and AS level and to give more  ----
+----  detail to study skills. Also removed a lot of the unusual manual   ----
+---- grade changes.                                                      ----
+----                                                                     ----
+-----------------------------------------------------------------------------
+-----------------------------------------------------------------------------
+
+-- declare the year and all the names of the base tables to be updated to appropriate tables
+DECLARE @RPYEAR AS INTEGER = 2023
+
+If object_Id('tempDB..#subj_tab') is not null drop table #subj_tab
+select * into #subj_tab from QRD.dbo.Subje01_2023_10_31
+If object_Id('tempDB..#tab2') is not null drop table #tab2
+select * into #tab2 from QRD.dbo.Table2_2023_10_31
+If object_Id('tempDB..#tab3') is not null drop table #tab3
+select * into #tab3 from QRD.dbo.Table3_2023_10_31
+If object_Id('tempDB..#tab4') is not null drop table #tab4
+select * into #tab4 from QRD.dbo.Table4_2023_10_31
+If object_Id('tempDB..#subj') is not null drop table #subj
+--select distinct SUBLEVNO, SUBJ, MAPPING into #subj from [L3VA].[U2022].[QUAL_SUBJ_LOOKUP]
+select distinct SUBJ, MAPPING into #subj from [L3VA].[U2023].[SUBJCODES_lookup]
+
+If object_Id('tempDB..#raw_inst') is not null drop table #raw_inst
+select * into #raw_inst from KS5_RESTRICTED.[Outputs].[RAW_Inst_POST16_202223A]
+If object_Id('tempDB..#raw_LA') is not null drop table #raw_LA
+select * into #raw_LA from KS5_RESTRICTED.[Outputs].[RAW_LEA_POST16_202223A]
+If object_Id('tempDB..#exam') is not null drop table #exam
+select * into #exam from [KS5_RESTRICTED].[Outputs].[Exam_PT_POST16_202223A]
+If object_Id('tempDB..#CSCP_lookup') is not null drop table #CSCP_lookup
+select * into #CSCP_lookup from KS5_RESTRICTED.Internal.CSCP_subject_lookup
+If object_Id('tempDB..#indicator') is not null drop table #indicator
+select * into #indicator from KS5_RESTRICTED.[Outputs].[PupilIndicators_POST16_202223A]
+If object_Id('tempDB..#PRIOR') is not null drop table #PRIOR
+select * into #PRIOR from KS5_STATISTICS_RESTRICTED.[EES_2023A].[PRIORS_KS4end2022]
+If object_Id('tempDB..#Allocations') is not null drop table #Allocations
+select PUPILID, END_KS into #Allocations from KS5_RESTRICTED.[Outputs].[PupilAllocations_POST16_202223A]
+
+----------------------------------------------------------
+--
+--Most of this initial set up code is not important for TMs it's taken from the performance tables data download
+--
+----------------------------------------------------------
+
+--calculating the original PTQ INCLUDE ignoring COVID impacted to use to flag the COVID impacted results
+--the spec for PTQ INCLUDE doesn't include removing # but it appears to be the case when QAing with RM - i think there is something somewhere about # quals
+If object_Id('tempDB..#QRD_PTQINCLUDE') is not null drop table #QRD_PTQINCLUDE
+select distinct a.QUID, case when a.syllabus_ref = '***' then '' else a.syllabus_ref end as syllabus_ref, a.Wolf_Included_1618 as Wolf_Included_1618_original, KS5_Academic, KS5_Subset,
+case when syllabus_ref != 'KNO' and (App1618 = 1 and ((KS5_Equivalences = 1 and KS5_Academic = 1) or Wolf_Included_1618 in (1,2,3)) and QUID != '#') then 1 else 0 end PTQ_INCLUDE_original
+into #QRD_PTQINCLUDE
+from #subj_tab as a
+left join #tab4 as b
+on a.Qual_Type = b.Qual_Type
+
+--get the asize and gsize of the different qualications
+If object_Id('tempDB..#QRD_SIZE') is not null drop table #QRD_SIZE
+select a.QUID, 
+max(coalesce(b.GCSE_Equivalent, c.GCSE_Equivalent)) as GSIZE,
+max(coalesce(b.AL_Equivalent, c.AL_Equivalent)) as ASIZE
+into #QRD_SIZE
+from #subj_tab as a
+left join #tab2 as b
+on a.Qual_Type = b.Qual_Type
+left join #tab3 as c
+on a.QUID = c.QUID
+group by a.QUID
+
+
+--Code from Ops to create the school type lookup
+If object_Id('tempDB..#insti_data') is not null drop table #insti_data
+  select distinct r.LEALAB, URN, schname, TAB1618,
+case when b.nftype = 20 then 'Sponsored Academy'
+when b.nftype = 21 then 'Community School'
+when b.nftype = 22 then 'Voluntary Aided School'
+when b.nftype = 23 then 'Voluntary Controlled School'
+when b.nftype = 24 then 'Foundation School'
+when b.nftype = 25 then 'City Technology College'
+when b.nftype = 26 then 'Community Special School'
+when b.nftype = 27 then 'Foundation Special School'
+when b.nftype = 28 then 'Non-Maintained Special School'
+when b.nftype = 29 then 'Independent School Approved for SEN pupils'
+when b.nftype = 30 then 'Independent School'
+when b.nftype = 31 and b.FESITYPE=1 then 'Agriculture and Horticulture College'
+when b.nftype = 31 and b.FESITYPE=2 then 'Art, Design and Performing Art College'
+when b.nftype = 31 and b.FESITYPE=3 then 'General Further Education College'
+when b.nftype = 31 and b.FESITYPE=4 then 'General Further Education College (Special)'
+when b.nftype = 31 and b.FESITYPE=5 then 'Sixth Form College'
+when b.nftype = 31 and b.FESITYPE=8 then 'Specialist Designated College'
+when b.nftype = 31 and b.FESITYPE=9 then 'Tertiary College'
+when b.nftype = 32 then 'Community hospital school'
+when b.nftype = 33 then 'Foundation hospital school'
+when b.nftype = 34 then 'Pupil Referral Unit'
+when b.nftype = 35 then 'Sixth Form Centre / Consortia'
+when b.nftype = 36 then 'Ministry of Defence Funded College'
+when b.nftype = 38 then 'Special College'
+when b.nftype = 41 then 'European Schools'
+when b.nftype = 42 then 'Playing for Success Centres'
+when b.nftype = 43 then 'Offshore Schools'
+when b.nftype = 44 then 'Service Childrens Education'
+when b.nftype = 45 then 'Higher Education Institutions'
+when b.nftype = 46 then 'Welsh Establishment'
+when b.nftype = 47 then 'LA Nursery School'
+when b.nftype = 48 then 'Other Independent Special School'
+when b.nftype = 49 then 'Early Years setting'
+when b.nftype = 50 then 'Sponsored Special Academy'
+when b.nftype = 51 then 'Converter Academy'
+when b.nftype = 52 then 'Free School - Mainstream'
+when b.nftype = 53 then 'Free School - Special'
+when b.nftype = 54 then 'British Overseas School'
+when b.nftype = 55 then 'Special Converter Academy'
+when b.nftype = 56 then 'Alternative Provision'
+when b.nftype = 57 then 'University Technical College'
+when b.nftype = 58 then 'Studio School'
+when b.nftype = 59 then 'Free School - 16-19'
+when b.nftype = 60 then 'International Schools'
+when b.nftype = 63 then 'Academy 16-19 Converter'
+when b.nftype = 64 then 'Academy 16-19 Sponsor Led'
+when b.nftype = 97 then 'Alternative provision'
+when b.nftype = 98 then 'Legacy types'
+when b.nftype = 99 then 'Secure Unit'
+else b.NFTYPE end as [School or college type]
+into #insti_data
+from #raw_inst as b 
+left join #raw_LA as r 
+on b.LEA = r.LEA
+
+
+/* Get data from exam file. Join on the front end friendly subject names - currently in an internal table on KS5_RESTRICTED
+but ultimately we want this to end up in the QRD. Join on the calculated PTQ INCLUDE, ASIZE and GSIZE
+Grade X has been removed for TMs
+*/
+--DECLARE @RPYEAR AS INTEGER = 2022
+If object_Id('tempDB..#examprep') is not null drop table #examprep
+select LAESTAB, URN, PUPILID, a.GNUMBER, a.BRDSUBNO, _QualificationTypeCode as QUAL_TYPE, SUBLEVNO, AMDEXAM, EXAMYEAR, d.ASIZE, d.GSIZE, MAPPING,
+SEASON, GRADE, POINTS_1618, POTENTIAL_LEVEL, c.subject_user_engagement as [Subject], coalesce(PTQ_INCLUDE_original, 0) as PTQ_INCLUDE_original, PTQ_INCLUDE,
+DISC_ALL, DISC_0_1, DISC_1_2, DISC_0_2, DISC_SINGLE, DISCS_ALL, DISCS_0_1, DISCS_1_2, DISCS_0_2, DISCS_SINGLE, DISCB_ALL, DISCB_0_1, DISCB_1_2, DISCB_0_2, DISCB_SINGLE,
+DISC_ALL_FULL, DISC_0_1_FULL, DISC_1_2_FULL, DISC_0_2_FULL, DISC_SINGLE_FULL, _Wolf_Included_1618, EXAMNO
+into #examprep
+from #exam as a
+left join #QRD_PTQINCLUDE as b
+on a.GNUMBER = b.QUID and a.BRDSUBNO = b.Syllabus_Ref
+left join #CSCP_lookup as c
+on a.MAPPING = c.DISC_CODE
+left join #QRD_SIZE as d
+on a.GNUMBER = d.QUID
+where GRADE != 'X' -- added to stop messing up AS level discounting
+
+
+/*
+select results for the appropriate provider based on COND and without discounting
+make changes to the grade:
+	- including adding covid impact result based on ptq include - this can be removed once COVID impacted results are out of scope of the data
+	- setting pending and no result to no result but now seperating Fail out from no result so they are reported seperately
+	- making changes to IB where the cumulative points is given with a letter grade eg. 32D - we remove the letter grade. In the cases of a fail eg. 32F, the result is listed
+	as no result - using points to identify this
+	- sublevno 129 is an IB combined certificate - the grades that seem to exist are K and J where K is awarded and J is unawareded. This has been set as a pass for K and no result for J
+	in previous code and then later in the code the pass was set to no result. We are not sure why this was done and does not appear correct and so I have left as pass and fail.
+*/
+--DECLARE @RPYEAR AS INTEGER = 2022
+If object_Id('tempDB..#examcut') is not null drop table #examcut
+select b.*, Qual_Description, z.GPTSPE_1, z.PTSPE_1,
+case when PTQ_INCLUDE = 0 and PTQ_INCLUDE_original = 1 then 'COVID result' -- this doesn't matter as I have added the PTQ_INCLUDE = 1 filter to the where statement so this is redundent
+when GRADE IN ('Q','R','X') then 'No result'
+when GRADE IN ('F','U','N') then 'Fail'
+when SUBLEVNO = 130 and POINTS_1618 > 0 then LEFT(GRADE, 2)
+when SUBLEVNO = 130 and POINTS_1618 = 0 then 'Fail'
+when sublevno=129 then (case when grade='K' then 'Pass' when grade in ('J','F','Q','U','X') then 'Fail' end)
+when grade = 'D*' then '*D'
+	   when grade = 'PM' then 'MP'
+	   when grade = 'MD' then 'DM'
+	   when grade = 'D**' then '**D' 
+	   when grade = 'DD*' then '*DD'
+	   when grade = 'MDD' then 'DDM'
+	   when grade = 'MMD' then 'DMM'
+	   when grade = 'PMM' then 'MMP'
+	   when grade = 'PPM' then 'MPP'
+else GRADE end GRADE_UPDATE,
+case when _Wolf_Included_1618 IN (1,2,3) then case when PTSPE_1 <1 then '<1'
+when PTSPE_1 >=1 and PTSPE_1 <2 then '1-<2'
+when PTSPE_1 >=2 and PTSPE_1 <3 then '2-<3'
+when PTSPE_1 >=3 and PTSPE_1 <4 then '3-<4'
+when PTSPE_1 >=4 and PTSPE_1 <5 then '4-<5'
+when PTSPE_1 >=5 and PTSPE_1 <6 then '5-<6'
+when PTSPE_1 >=6 and PTSPE_1 <7 then '6-<7'
+when PTSPE_1 >=6 and PTSPE_1 <7 then '6-<7'
+when PTSPE_1 >=7 and PTSPE_1 <8 then '7-<8'
+when PTSPE_1 >=8 and PTSPE_1 <9 then '8-<9'
+when PTSPE_1 >=9 then '9>=' end 
+else case when GPTSPE_1 <1 then '<1'
+when GPTSPE_1 >=1 and GPTSPE_1 <2 then '1-<2'
+when GPTSPE_1 >=2 and GPTSPE_1 <3 then '2-<3'
+when GPTSPE_1 >=3 and GPTSPE_1 <4 then '3-<4'
+when GPTSPE_1 >=4 and GPTSPE_1 <5 then '4-<5'
+when GPTSPE_1 >=5 and GPTSPE_1 <6 then '5-<6'
+when GPTSPE_1 >=6 and GPTSPE_1 <7 then '6-<7'
+when GPTSPE_1 >=6 and GPTSPE_1 <7 then '6-<7'
+when GPTSPE_1 >=7 and GPTSPE_1 <8 then '7-<8'
+when GPTSPE_1 >=8 and GPTSPE_1 <9 then '8-<9'
+when GPTSPE_1 >=9 then '9>=' end end as PRIOR_BAND,
+CASE WHEN (COND =1) THEN DISC_ALL  -- not moved, discount over all years, otherwise			
+		WHEN (COND =2) THEN DISC_0_1  -- else didn't move in RY and RY-1 and exam is in RY or RY-1
+		WHEN (COND =3) THEN DISC_1_2-- else didn't move in RY-1 and RY-2 and exam is in RY-1 or RY-2
+		WHEN (COND =4) THEN DISC_0_2-- else same inst in RY and RY-2 and exam is in RY or RY-2
+		WHEN (COND =5) THEN DISC_SINGLE-- else exam is in RY
+		WHEN (COND =6) THEN DISC_SINGLE-- else exam is in RY-1
+		WHEN (COND =7) THEN DISC_SINGLE-- else exam is in RY-2
+	ELSE 1 END AS DISC, -- points discounting flag
+	CASE WHEN (COND =1) THEN DISCS_ALL
+		WHEN (COND =2) THEN DISCS_0_1
+		WHEN (COND =3) THEN DISCS_1_2
+		WHEN (COND =4) THEN DISCS_0_2
+		WHEN (COND =5) THEN DISCS_SINGLE
+		WHEN (COND =6)  THEN DISCS_SINGLE
+		WHEN (COND =7) THEN DISCS_SINGLE
+	ELSE 1 END AS DISCS,-- size discounting flag
+	CASE WHEN (COND =1) THEN DISCB_ALL
+		WHEN (COND =2) THEN DISCB_0_1
+		WHEN (COND =3) THEN DISCB_1_2
+		WHEN (COND =4) THEN DISCB_0_2
+		WHEN (COND =5) THEN DISCB_SINGLE
+		WHEN (COND =6) THEN DISCB_SINGLE
+		WHEN (COND =7) THEN DISCB_SINGLE
+	ELSE 1 END AS DISCB,-- subset discounting flag,
+	CASE WHEN (COND =1) THEN DISC_ALL_FULL  -- not moved, discount over all years, otherwise			
+		WHEN (COND =2) THEN DISC_0_1_FULL   -- else didn't move in RY and RY-1 and exam is in RY or RY-1
+		WHEN (COND =3) THEN DISC_1_2_FULL -- else didn't move in RY-1 and RY-2 and exam is in RY-1 or RY-2
+		WHEN (COND =4) THEN DISC_0_2_FULL -- else same inst in RY and RY-2 and exam is in RY or RY-2
+		WHEN (COND =5) THEN DISC_SINGLE_FULL -- else exam is in RY
+		WHEN (COND =6) THEN DISC_SINGLE_FULL -- else exam is in RY-1
+		WHEN (COND =7) THEN DISC_SINGLE_FULL -- else exam is in RY-2
+	ELSE 1 END AS DISC_FULL 
+into #examcut
+from #indicator as a
+left join #PRIOR z
+on a.PUPILID = z.pupilid and a.KS4_YEAR_CALC = z.KS4_YEAR_CALC
+LEFT join #Allocations y
+on a.PUPILID = y.PUPILID
+left join #examprep as b
+on a.PUPILID = b.PUPILID and a.URN = b.URN
+left join #tab4 as c
+on b.SUBLEVNO = c.Qual_Number
+where [TRIGGER] = 1 and RECTYPE = 1 and COND IN (1,2,3,4,5,6,7) and NAT1618 = 1 and END_KS = 1 --??
+  and PTQ_INCLUDE = 1 --and PTQ_INCLUDE_original = 1 -- remove COVID impacted quals for 2022 TMs
+  AND (AMDEXAM NOT IN ('TO','CL', 'NR','D','W') OR AMDEXAM IS NULL) 
+and ((COND = 1 and ((EXAMYEAR IN (@RPYEAR, (@RPYEAR - 1), (@RPYEAR - 2)) and SEASON = 'S') or (EXAMYEAR IN ((@RPYEAR - 1), (@RPYEAR - 2), (@RPYEAR - 3)) and SEASON = 'W')) )
+OR (COND = 2 and ((EXAMYEAR IN (@RPYEAR, (@RPYEAR - 1)) and SEASON = 'S') or (EXAMYEAR IN ((@RPYEAR - 1), (@RPYEAR - 2)) and SEASON = 'W')))
+OR (COND = 3 and ((EXAMYEAR IN ((@RPYEAR - 1), (@RPYEAR - 2)) and SEASON = 'S') or (EXAMYEAR IN ((@RPYEAR - 2), (@RPYEAR - 3)) and SEASON = 'W')) )
+OR (COND = 4 and ((EXAMYEAR IN (@RPYEAR, (@RPYEAR - 2)) and SEASON = 'S') or (EXAMYEAR IN ((@RPYEAR - 1), (@RPYEAR - 3)) and SEASON = 'W')) )
+OR (COND = 5 and ((EXAMYEAR = @RPYEAR and SEASON = 'S') or (EXAMYEAR = (@RPYEAR - 1) and SEASON = 'W')) )
+OR (COND = 6 and ((EXAMYEAR = (@RPYEAR - 1) and SEASON = 'S') or (EXAMYEAR = (@RPYEAR - 2) and SEASON = 'W')) )
+OR (COND = 7 and ((EXAMYEAR = (@RPYEAR - 2) and SEASON = 'S') or (EXAMYEAR = (@RPYEAR - 3) and SEASON = 'W')) ))
+
+
+--pull all the grades from the QRD tables 2 and 3 for the qualifications to be reported
+If object_Id('tempDB..#grade_struc') is not null drop table #grade_struc
+select a.*, b.Grade as grade2, c.Grade as grade3, b.points as points2, c.Points as points3,
+coalesce(c.grade, b.grade) as QRD_grade, coalesce(c.points, b.points) as QRD_points,
+coalesce(c.last_input_year, b.last_input_year) as last_input_year
+into #grade_struc
+from (
+select distinct GNUMBER, QUAL_TYPE, SUBLEVNO	
+from #examcut) as a
+left join #tab2 as b
+on a.QUAL_TYPE = b.Qual_Type
+left join #tab3 as c
+on a.GNUMBER = c.QUID
+
+--altering some of the grades for no results and fails and the ordering of distinction, merit and pass
+--eg MDD becomes DDM
+--DECLARE @RPYEAR AS INTEGER = 2022
+If object_Id('tempDB..#grade_struc_update') is not null drop table #grade_struc_update
+select distinct GNUMBER, QRD_points, QUAL_TYPE,
+case when QRD_grade IN ('Q','R','X') then 'No result'
+when QRD_grade IN ('F','U','N') then 'Fail'
+when SUBLEVNO = 130 and QRD_points > 0 then LEFT(QRD_grade, 2)
+when SUBLEVNO = 130 and QRD_points = 0 then 'Fail'
+when sublevno=129 then (case when QRD_grade='K' then 'Pass' when QRD_grade in ('J','F','Q','U','X') then 'Fail' else QRD_grade end)
+ when QRD_grade = 'D*' then '*D'
+	   when QRD_grade = 'PM' then 'MP'
+	   when QRD_grade = 'MD' then 'DM'
+	   when QRD_grade = 'D**' then '**D' 
+	   when QRD_grade = 'DD*' then '*DD'
+	   when QRD_grade = 'MDD' then 'DDM'
+	   when QRD_grade = 'MMD' then 'DMM'
+	   when QRD_grade = 'PMM' then 'MMP'
+	   when QRD_grade = 'PPM' then 'MPP'	
+else QRD_grade end as GRADE_update
+into #grade_struc_update
+from #grade_struc
+where last_input_year = @RPYEAR
+
+--concatenate the grades to make a grade structure but remove fail and no result in contrast to previous years
+If object_Id('tempDB..#generic_gradeStruc') is not null drop table #generic_gradeStruc
+Select Main.GNUMBER, Main.qual_type,
+       Left(Main.gradeStructure,Len(Main.gradeStructure)-1) As "gradeStructure"
+into #generic_gradeStruc
+From
+    (
+        Select distinct ST2.GNUMBER, ST2.qual_type,
+            (
+                Select ST1.GRADE_update + ',' AS [text()]
+                From #grade_struc_update ST1
+                Where ST1.GNUMBER = ST2.GNUMBER and ST1.GRADE_update NOT IN ('No result', 'Fail')
+                ORDER BY ST1.GNUMBER, QRD_points desc, GRADE_update
+                For XML PATH ('')
+            ) gradeStructure
+        From #grade_struc_update ST2
+    ) [Main]
+
+--create the exam cohort flag 
+If object_Id('tempDB..#all_dat') is not null drop table #all_dat
+select a.*, b.gradeStructure,
+case when KS5_Academic = 1 and KS5_Subset = 1 and SUBLEVNO != 101 then 'A level'
+when KS5_Academic = 1 then 'Other academic'
+when _Wolf_Included_1618 = 1 then 'Tech level'
+when _Wolf_Included_1618 = 2 then 'Applied general'
+when _Wolf_Included_1618 = 3 then 'Technical certificate' end as Qualification
+into #all_dat
+from #examcut as a
+left join #generic_gradeStruc as b
+on a.GNUMBER = b.GNUMBER
+left join #QRD_PTQINCLUDE as y
+on a.GNUMBER = y.QUID and a.BRDSUBNO = y.Syllabus_Ref
+
+
+--national, qualification and subject
+If object_Id('tempDB..#nat_subj') is not null drop table #nat_subj
+select Qual_Description, a.SUBLEVNO, Potential_Level, ASIZE, GSIZE, a.MAPPING, SUBJ, [Subject], gradeStructure, PRIOR_BAND,  GRADE_UPDATE as GRADE, --Qualification, 
+count(PUPILID) as total_students
+into #nat_subj
+from #all_dat as a
+left join #insti_data as d
+on a.URN = d.URN
+left join #subj e
+on a.MAPPING = e.MAPPING --and a.SUBLEVNO = e.SUBLEVNO
+where --TAB1618 = 1
+ ((Qualification IN ('Other academic', 'Tech level', 'Applied general') and DISCS = 0) OR
+(Qualification = 'A level' and (DISCS=0 OR (DISCS=1 AND DISC=1 AND DISCB=0))) OR
+(Qualification = 'Technical certificate' and DISC_FULL = 0))
+group by Qual_Description, a.SUBLEVNO, Potential_Level, ASIZE, GSIZE, a.MAPPING, SUBJ, [Subject], gradeStructure, PRIOR_BAND, GRADE_UPDATE --Qualification, 
+order by Qual_Description, SUBLEVNO, ASIZE, GSIZE, [Subject], GRADE_UPDATE, PRIOR_BAND
+
+--create national data again but for AS so we can ignore usual discounting and implement AS only discounting
+If object_Id('tempDB..#nat_subj_AS') is not null drop table #nat_subj_AS
+select 'GCE AS level (All)' as Qual_Description, 699 as SUBLEVNO, Potential_Level, ASIZE, GSIZE, a.MAPPING, SUBJ, [Subject], gradeStructure, PRIOR_BAND,  GRADE_UPDATE as GRADE, --Qualification, 
+count(PUPILID) as total_students
+into #nat_subj_AS
+from (select *, ROW_NUMBER() OVER (PARTITION BY LAESTAB, PUPILID, MAPPING, SUBLEVNO
+ORDER BY POINTS_1618 DESC, EXAMYEAR DESC, SEASON DESC, EXAMNO) AS MAINEXAM
+from #all_dat
+where SUBLEVNO = 121) as a
+left join #insti_data as d
+on a.URN = d.URN
+left join /*(
+select distinct MAPPING, SUBJ
+from*/ #subj
+/*where SUBLEVNO IN (111, 121))*/ e
+on a.MAPPING = e.MAPPING --and a.SUBLEVNO = e.SUBLEVNO
+where MAINEXAM = 1 --TAB1618 = 1
+group by Qual_Description, a.SUBLEVNO, Potential_Level, ASIZE, GSIZE, a.MAPPING, SUBJ, [Subject], gradeStructure, PRIOR_BAND, GRADE_UPDATE --Qualification, 
+order by Qual_Description, SUBLEVNO, ASIZE, GSIZE, [Subject], GRADE_UPDATE, PRIOR_BAND
+
+-- bind the 2 tables together
+If object_Id('tempDB..#bind_nat_dat') is not null drop table #bind_nat_dat
+select *
+into #bind_nat_dat
+from 
+(
+select  Qual_Description, SUBLEVNO, Potential_Level, ASIZE, GSIZE, MAPPING, [Subject], gradeStructure, PRIOR_BAND, SUBJ, GRADE, total_students
+from #nat_subj
+where GRADE NOT IN ('COVID result', 'No result')
+union
+select  Qual_Description, SUBLEVNO, Potential_Level, ASIZE, GSIZE, MAPPING, [Subject], gradeStructure, PRIOR_BAND, SUBJ, GRADE, total_students
+from #nat_subj_AS	
+where GRADE NOT IN ('COVID result', 'No result')) as a
+
+--filter out quals where there are 15 or fewer entries and output final table
+If object_Id('tempDB..#TM_data') is not null drop table #TM_data
+select b.*
+into #TM_data
+from (
+select SUBLEVNO, Potential_Level, ASIZE, GSIZE, MAPPING, gradeStructure
+from #bind_nat_dat
+group by SUBLEVNO, Potential_Level, ASIZE, GSIZE, MAPPING, gradeStructure
+having SUM(total_students) > 15) a
+left join #bind_nat_dat b
+on a.SUBLEVNO = b.SUBLEVNO and a.POTENTIAL_LEVEL = b.POTENTIAL_LEVEL 
+and a.ASIZE = b.ASIZE and a.GSIZE = b.GSIZE and a.MAPPING = b.MAPPING and a.gradeStructure = b.gradeStructure
+
+
+select * from #TM_data
+where PRIOR_BAND is NULL
+
+-- 11913 rows
+-- up to 11914 rows with grade structure added in
+-- 955 NULLS in prior band
+
+select * from #TM_data
+where [Subject] is NULL
+-- should be zero
+
+
+--drop table [KS5_STATISTICS_RESTRICTED].[TM_2022].[TM_data_2023A]
+
+
+select * 
+into [KS5_STATISTICS_RESTRICTED].[TM_2023].[TM_data_2023A]
+from #TM_data
+
+
+-- Note
+-- JB, 05/01/2023
+
+-- One of the columns had a character length that caused errors in R, 
+-- this was identified in SQL using 
+
+-- use KS5_STATISTICS_RESTRICTED
+-- select COLUMN_NAME, DATA_TYPE, CHARACTER_MAXIMUM_LENGTH
+-- from INFORMATION_SCHEMA.COLUMNS 
+-- where TABLE_SCHEMA = 'TM_2022' 
+-- and TABLE_NAME = 'TM_data_2022A'
+
+-- and then corrected using
+-- alter table [TM_2023].[TM_data_2023A] alter column gradeStructure varchar (200) not NULL;
diff --git a/background_scripts/data_processing.R b/background_scripts/data_processing.R
index 3174862..cdd70b0 100644
--- a/background_scripts/data_processing.R
+++ b/background_scripts/data_processing.R
@@ -17,144 +17,54 @@ library(readr)
 rm(list=ls())
 
 # -----------------------------------------------------------------------------------------------------------------------------
-# ---- Things to change between runs ----
+# ---- Source functions ----
 # -----------------------------------------------------------------------------------------------------------------------------
 
-tm_file = "[KS5_STATISTICS_RESTRICTED].[TM_2023].[TM_data_2023U]"
+source("./background_scripts/data_processing_func.R")
 
+# -----------------------------------------------------------------------------------------------------------------------------
+# ---- Things to change between runs ----
+# -----------------------------------------------------------------------------------------------------------------------------
 
 ancillary_save_path <- "//lonnetapp01/DSGA2/!!Secure Data/SFR/2023/KS5/November_2023/06_ancillary/"
 current_year <- "2023U"
 
 # -----------------------------------------------------------------------------------------------------------------------------
-# ---- Reading in the data from SQL tables ----
+# ---- Thing to add - Reading in the data from SQL tables and running function ----
 # -----------------------------------------------------------------------------------------------------------------------------
 
-
 # establish connection to server
-con <- DBI::dbConnect(odbc::odbc(), driver = "SQL Server",
-                      server = "VMT1PR-DHSQL02")
+con <- DBI::dbConnect(odbc::odbc(), driver = "SQL Server", server = "VMT1PR-DHSQL02")
 
 
-# Select data from SQL tables
+# Select data from SQL tables - need to add the current year for unamended run and change from U to A for the amended run
 
-tm_data_raw <- tbl(con, sql(paste("select * from", tm_file))) %>% collect()
+tm_data_raw_2023 <- tbl(con, sql("select * from [KS5_STATISTICS_RESTRICTED].[TM_2023].[TM_data_2023U]")) %>% collect()
+tm_data_raw_2022 <- tbl(con, sql("select * from [KS5_STATISTICS_RESTRICTED].[TM_2022].[TM_data_2022A]")) %>% collect()
 
 # disconnect
 DBI::dbDisconnect(con)
 
 
+# add current year to the function call to produce the processed data - only needs updating for unamended runs as 
+# the SQL update above deals with version
 
-
-# -----------------------------------------------------------------------------------------------------------------------------
-# ---- SORTING NAs ----
-# -----------------------------------------------------------------------------------------------------------------------------
-
-colSums(is.na(tm_data_raw)) %>% as.data.frame() 
-# NAs in prior band which need removing
-# NAs in subj because no subj code was assigned within the SQL production code
-# SUBJ is assigned by joining on the QUAL_SUBJ_LOOKUP, 
-# but some subjects are filtered out of the L3VA process if they are not entered in 5 or more institutions
-# for the TMs we don't mind this rule, so we've decided to leave them in but we will have to create new SUBJ codes
-# to ensure I'm not overwriting any existing SUBJ codes, I'm generating 3 digit random numbers (SUBJ is usually 5 digits)
-
-
-tm_data_subj_na <- tm_data_raw %>% 
-  filter(!(is.na(PRIOR_BAND)),
-         is.na(SUBJ)) %>%
-  group_by(Subject) %>%
-  mutate(SUBJ = sample(100:900,1))
-
-tm_data <- tm_data_raw %>%
-  filter(!(is.na(PRIOR_BAND)),
-         !(is.na(SUBJ))) %>%
-  bind_rows(tm_data_subj_na)
-  
-colSums(is.na(tm_data)) %>% as.data.frame() 
-
-
-
-# -----------------------------------------------------------------------------------------------------------------------------
-# ---- NUMBERS & PERCENTAGES CALCULATED ---- 
-# -----------------------------------------------------------------------------------------------------------------------------
-
-
-student_numbers <- tm_data %>% 
-  mutate(GRADE = case_when(GRADE == "Fail" & SUBLEVNO != 130 ~ "U",
-                           TRUE ~ GRADE),
-         SIZE = case_when(ASIZE == 0 ~ GSIZE, 
-                          TRUE ~ ASIZE),
-         QUAL_ID = paste0(SUBLEVNO, SUBJ, SIZE, gradeStructure),
-         ROW_ID = paste0(SUBLEVNO, SUBJ, SIZE, PRIOR_BAND, gradeStructure)) %>%
-  mutate(across(c(everything(), -total_students), ~as.character(.))) %>%
-  arrange(ROW_ID) %>% 
-  pivot_wider(names_from = GRADE, values_from = total_students)
-
-
-
-student_percentages <- student_numbers %>% 
-  janitor::adorn_percentages() %>%
-  mutate_if(is.numeric, function(x){round(x*100, 2)}) %>%
-  mutate_if(is.numeric, ~paste0(.x, "%"))
-
+processed_data_2023 <- TM_data_prod_func(tm_data_raw_2023, 2023)
+processed_data_2022 <- TM_data_prod_func(tm_data_raw_2022, 2022)
 
 
+# This needs updating to make sure it is the current year data as it is used to produce the ancillary data
+# should only need updating for unamended run as with the above
 
-
-# -----------------------------------------------------------------------------------------------------------------------------
-# ---- LOOKUP & FULL GRADE OPTIONS CALCULATED ---- 
-# -----------------------------------------------------------------------------------------------------------------------------
-
-
-qual_lookup <- tm_data %>%
-  select(Qual_Description, SUBLEVNO, Subject, SUBJ, ASIZE, GSIZE, gradeStructure) %>%
-  distinct() %>%
-  mutate(SIZE = case_when(ASIZE == 0 ~ GSIZE, 
-                          TRUE ~ ASIZE))
-
-
-grade_lookup_u <- tm_data %>%
-  select(SUBLEVNO, SUBJ, ASIZE, GSIZE, gradeStructure) %>%
-  distinct() %>%
-  mutate(GRADE = case_when(SUBLEVNO == 130 ~ "Fail",
-                           TRUE ~ "U")) 
-  
-grade_lookup_sep <- tm_data %>%
-  select(SUBLEVNO, SUBJ, ASIZE, GSIZE, gradeStructure) %>%
-  distinct() %>%
-  mutate(GRADE = gradeStructure) %>%
-  separate_rows(. , GRADE, sep = ",") 
-
-grade_lookup <- bind_rows(grade_lookup_u, grade_lookup_sep) %>%
-  arrange(SUBLEVNO, SUBJ, ASIZE, GSIZE, gradeStructure, GRADE) %>%
-  mutate(SIZE = case_when(ASIZE == 0 ~ GSIZE, 
-                          TRUE ~ ASIZE))
-
-
-
-# would like to sort numeric and character grades differently so that all grades go from low - high
-# for numeric this is ascending, for character this is descending
-# will need to split again based on numeric and character and then re-combine again
-grades_char <- grade_lookup %>%
-  mutate(char_grade_check = is.na(suppressWarnings(as.numeric(grade_lookup$GRADE)))) %>%  # gives extra column - True is character, False is number
-  filter(char_grade_check == TRUE) %>% 
-  arrange(desc(GRADE))
-
-grades_num <- grade_lookup %>%
-  mutate(char_grade_check = is.na(suppressWarnings(as.numeric(grade_lookup$GRADE)))) %>%  # gives extra column - True is character, False is number
-  filter(char_grade_check == FALSE) %>%
-  arrange(GRADE)
-
-grades_ordered_lookup <- bind_rows(grades_char, grades_num) %>%
-  arrange(SUBLEVNO)
-
+current_year_data <- processed_data_2023
 
 
 # -----------------------------------------------------------------------------------------------------------------------------
 # ---- ANCILLARY DATA FOR EES ---- 
 # -----------------------------------------------------------------------------------------------------------------------------
 
-ancillary_data_numbers <- student_numbers %>%
+ancillary_data_numbers <- current_year_data$student_numbers %>%
+  select(-ReportYr) %>% 
   pivot_longer(!c(Qual_Description, SUBLEVNO, Potential_Level, ASIZE, GSIZE, MAPPING, Subject, gradeStructure, PRIOR_BAND, SUBJ, SIZE, QUAL_ID, ROW_ID),
                names_to = "grade",
                values_to = "count") %>%
@@ -168,10 +78,11 @@ ancillary_data_numbers <- student_numbers %>%
          grade,
          count) %>%
   filter(!is.na(count)) %>%
-  arrange(qualification_code, subject_code, size, grade_structure, prior_attainment_band, grade)
+  arrange(qualification_code, subject_code, size, grade_structure, prior_attainment_band, grade, .locale = "en")
 
 
-ancillary_data_percentages <- student_percentages %>%
+ancillary_data_percentages <- current_year_data$student_percentages %>%
+  select(-ReportYr) %>% 
   pivot_longer(!c(Qual_Description, SUBLEVNO, Potential_Level, ASIZE, GSIZE, MAPPING, Subject, gradeStructure, PRIOR_BAND, SUBJ, SIZE, QUAL_ID, ROW_ID),
                names_to = "grade",
                values_to = "percentage") %>%
@@ -185,7 +96,7 @@ ancillary_data_percentages <- student_percentages %>%
          grade,
          percentage) %>%
   filter(percentage != "NA%") %>%
-  arrange(qualification_code, subject_code, size, grade_structure, prior_attainment_band, grade)
+  arrange(qualification_code, subject_code, size, grade_structure, prior_attainment_band, grade, .locale = "en")
 
 
 ancillary_data <- ancillary_data_numbers %>%
@@ -196,44 +107,26 @@ ancillary_data <- ancillary_data_numbers %>%
 write_csv(ancillary_data, paste0(ancillary_save_path, 'tm_numbers_percentages_', current_year, '.csv'))
 
 
-
 # -----------------------------------------------------------------------------------------------------------------------------
-# ---- add academic year to data ---- 
+# ---- Saving Data ---- 
 # -----------------------------------------------------------------------------------------------------------------------------
 
-student_numbers <- student_numbers %>% 
-  mutate(ReportYr = 2023) %>% 
-  select(ReportYr, everything())
+# bind all the processed data
 
-student_percentages <- student_percentages %>% 
-  mutate(ReportYr = 2023) %>% 
-  select(ReportYr, everything())
+student_numbers <- lapply(ls(pattern = "processed_data_", .GlobalEnv), function(x){
+  bind_rows(mget(x, .GlobalEnv)[[1]]["student_numbers"])}) %>% bind_rows()
 
-qual_lookup <- qual_lookup %>% 
-  mutate(ReportYr = 2023) %>% 
-  select(ReportYr, everything())
+student_percentages <- lapply(ls(pattern = "processed_data_", .GlobalEnv), function(x){
+  bind_rows(mget(x, .GlobalEnv)[[1]]["student_percentages"])}) %>% bind_rows() 
 
-grades_ordered_lookup <- grades_ordered_lookup %>% 
-  mutate(ReportYr = 2023) %>% 
-  select(ReportYr, everything())
+qual_lookup <- lapply(ls(pattern = "processed_data_", .GlobalEnv), function(x){
+  bind_rows(mget(x, .GlobalEnv)[[1]]["qual_lookup"])}) %>% bind_rows()
 
+grades_ordered_lookup <- lapply(ls(pattern = "processed_data_", .GlobalEnv), function(x){
+  bind_rows(mget(x, .GlobalEnv)[[1]]["grades_ordered_lookup"])}) %>% bind_rows()
 
-# grade_list %>% filter(SUBLEVNO == 253, SUBJ == 20596, ASIZE == 1)
 
-
-# -----------------------------------------------------------------------------------------------------------------------------
-# ---- Saving Data ---- 
-# -----------------------------------------------------------------------------------------------------------------------------
-
-student_numbers_old <- read_rds("./data/all_student_numbers.rds")
-student_percentages_old <- read_rds("./data/all_student_percentages.rds")
-qual_lookup_old <- read_rds("./data/qual_lookup.rds")
-grades_ordered_lookup_old <- read_rds("./data/grade_lookup.rds")
-
-student_numbers <- bind_rows(student_numbers, student_numbers_old)
-student_percentages <- bind_rows(student_percentages, student_percentages_old)
-qual_lookup <- bind_rows(qual_lookup, qual_lookup_old)
-grades_ordered_lookup <- bind_rows(grades_ordered_lookup, grades_ordered_lookup_old)
+# save as rds files
 
 saveRDS(student_numbers, "./data/all_student_numbers.rds")
 saveRDS(student_percentages, "./data/all_student_percentages.rds")
@@ -241,4 +134,3 @@ saveRDS(student_percentages, "./data/all_student_percentages.rds")
 saveRDS(qual_lookup, "./data/qual_lookup.rds")
 saveRDS(grades_ordered_lookup, "./data/grade_lookup.rds")
 
-
diff --git a/background_scripts/data_processing_func.R b/background_scripts/data_processing_func.R
new file mode 100644
index 0000000..6c9c554
--- /dev/null
+++ b/background_scripts/data_processing_func.R
@@ -0,0 +1,124 @@
+
+
+TM_data_prod_func <- function(sql_data, ReportYear) {
+  
+  # -----------------------------------------------------------------------------------------------------------------------------
+  # ---- SORTING NAs ----
+  # -----------------------------------------------------------------------------------------------------------------------------
+  
+  colSums(is.na(sql_data)) %>% as.data.frame() 
+  # NAs in prior band which need removing
+  # NAs in subj because no subj code was assigned within the SQL production code
+  # SUBJ is assigned by joining on the QUAL_SUBJ_LOOKUP, 
+  # but some subjects are filtered out of the L3VA process if they are not entered in 5 or more institutions
+  # for the TMs we don't mind this rule, so we've decided to leave them in but we will have to create new SUBJ codes
+  # to ensure I'm not overwriting any existing SUBJ codes, I'm generating 3 digit random numbers (SUBJ is usually 5 digits)
+  
+  
+  tm_data_subj_na <- sql_data %>% 
+    filter(!(is.na(PRIOR_BAND)),
+           is.na(SUBJ)) %>%
+    group_by(Subject) %>%
+    mutate(SUBJ = sample(100:900,1))
+  
+  tm_data <- sql_data %>%
+    filter(!(is.na(PRIOR_BAND)),
+           !(is.na(SUBJ))) %>%
+    bind_rows(tm_data_subj_na)
+  
+  colSums(is.na(tm_data)) %>% as.data.frame() 
+  
+  # -----------------------------------------------------------------------------------------------------------------------------
+  # ---- NUMBERS & PERCENTAGES CALCULATED ---- 
+  # -----------------------------------------------------------------------------------------------------------------------------
+  
+  student_numbers <- tm_data %>% 
+    mutate(GRADE = case_when(GRADE == "Fail" & SUBLEVNO != 130 ~ "U",
+                             TRUE ~ GRADE),
+           SIZE = case_when(ASIZE == 0 ~ GSIZE, 
+                            TRUE ~ ASIZE),
+           QUAL_ID = paste0(SUBLEVNO, SUBJ, SIZE, gradeStructure),
+           ROW_ID = paste0(SUBLEVNO, SUBJ, SIZE, PRIOR_BAND, gradeStructure)) %>%
+    mutate(across(c(everything(), -total_students), ~as.character(.))) %>%
+    arrange(ROW_ID, .locale = "en") %>% 
+    pivot_wider(names_from = GRADE, values_from = total_students)
+  
+  
+  student_percentages <- student_numbers %>% 
+    janitor::adorn_percentages() %>%
+    mutate_if(is.numeric, function(x){round(x*100, 2)}) %>%
+    mutate_if(is.numeric, ~paste0(.x, "%"))
+  
+  # -----------------------------------------------------------------------------------------------------------------------------
+  # ---- LOOKUP & FULL GRADE OPTIONS CALCULATED ---- 
+  # -----------------------------------------------------------------------------------------------------------------------------
+  
+  
+  qual_lookup <- tm_data %>%
+    select(Qual_Description, SUBLEVNO, Subject, SUBJ, ASIZE, GSIZE, gradeStructure) %>%
+    distinct() %>%
+    mutate(SIZE = case_when(ASIZE == 0 ~ GSIZE, 
+                            TRUE ~ ASIZE))
+  
+  
+  grade_lookup_u <- tm_data %>%
+    select(SUBLEVNO, SUBJ, ASIZE, GSIZE, gradeStructure) %>%
+    distinct() %>%
+    mutate(GRADE = case_when(SUBLEVNO == 130 ~ "Fail",
+                             TRUE ~ "U")) 
+  
+  grade_lookup_sep <- tm_data %>%
+    select(SUBLEVNO, SUBJ, ASIZE, GSIZE, gradeStructure) %>%
+    distinct() %>%
+    mutate(GRADE = gradeStructure) %>%
+    separate_rows(. , GRADE, sep = ",") 
+  
+  grade_lookup <- bind_rows(grade_lookup_u, grade_lookup_sep) %>%
+    arrange(SUBLEVNO, SUBJ, ASIZE, GSIZE, gradeStructure, GRADE) %>%
+    mutate(SIZE = case_when(ASIZE == 0 ~ GSIZE, 
+                            TRUE ~ ASIZE))
+  
+  
+  # would like to sort numeric and character grades differently so that all grades go from low - high
+  # for numeric this is ascending, for character this is descending
+  # will need to split again based on numeric and character and then re-combine again
+  grades_char <- grade_lookup %>%
+    mutate(char_grade_check = is.na(suppressWarnings(as.numeric(grade_lookup$GRADE)))) %>%  # gives extra column - True is character, False is number
+    filter(char_grade_check == TRUE) %>% 
+    arrange(desc(GRADE))
+  
+  grades_num <- grade_lookup %>%
+    mutate(char_grade_check = is.na(suppressWarnings(as.numeric(grade_lookup$GRADE)))) %>%  # gives extra column - True is character, False is number
+    filter(char_grade_check == FALSE) %>%
+    arrange(GRADE)
+  
+  grades_ordered_lookup <- bind_rows(grades_char, grades_num) %>%
+    arrange(SUBLEVNO)
+  
+  # -----------------------------------------------------------------------------------------------------------------------------
+  # ---- add academic year to data ---- 
+  # -----------------------------------------------------------------------------------------------------------------------------
+  
+  student_numbers <- student_numbers %>% 
+    mutate(ReportYr = ReportYear) %>% 
+    select(ReportYr, everything())
+  
+  student_percentages <- student_percentages %>% 
+    mutate(ReportYr = ReportYear) %>% 
+    select(ReportYr, everything())
+  
+  qual_lookup <- qual_lookup %>% 
+    mutate(ReportYr = ReportYear) %>% 
+    select(ReportYr, everything())
+  
+  grades_ordered_lookup <- grades_ordered_lookup %>% 
+    mutate(ReportYr = ReportYear) %>% 
+    select(ReportYr, everything())
+  
+  
+  # grade_list %>% filter(SUBLEVNO == 253, SUBJ == 20596, ASIZE == 1)
+  
+  return(list(student_numbers = student_numbers, student_percentages = student_percentages, 
+              qual_lookup = qual_lookup, grades_ordered_lookup = grades_ordered_lookup))
+  
+}
diff --git a/ui.R b/ui.R
index 60b6b3a..f2d52f9 100644
--- a/ui.R
+++ b/ui.R
@@ -80,7 +80,7 @@ ui <- function(input, output, session) {
       ),
     shinyjs::useShinyjs(),
     customDisconnectMessage(),
-    useShinydashboard(),
+    #useShinydashboard(),
     tags$head(includeHTML(("google-analytics.html"))),
     tags$head(
       tags$link(