From d9d24821b736410c38cbcd976e9032937337f09f Mon Sep 17 00:00:00 2001 From: Michael Gordeev Date: Mon, 10 Feb 2020 20:00:18 +0300 Subject: [PATCH] Cleanup and optimization of the parser (not finished) --- .../Activities/ExportActivity.cs | 2 +- GUT.Schedule/GUTSchedule.Droid/Calendar.cs | 4 +- .../Models/CabinetExportParameters.cs | 8 + .../GUTSchedule/Models/CabinetSubject.cs | 46 --- GUT.Schedule/GUTSchedule/Models/Data.cs | 20 -- GUT.Schedule/GUTSchedule/Models/DataSet.cs | 16 - .../Models/DefaultExportParameters.cs | 10 + .../GUTSchedule/Models/ExportParameters.cs | 10 + GUT.Schedule/GUTSchedule/Models/Occupation.cs | 17 ++ GUT.Schedule/GUTSchedule/Models/Subject.cs | 62 ---- GUT.Schedule/GUTSchedule/Parser.cs | 278 ++++++++++++------ 11 files changed, 238 insertions(+), 235 deletions(-) create mode 100644 GUT.Schedule/GUTSchedule/Models/CabinetExportParameters.cs delete mode 100644 GUT.Schedule/GUTSchedule/Models/CabinetSubject.cs delete mode 100644 GUT.Schedule/GUTSchedule/Models/Data.cs delete mode 100644 GUT.Schedule/GUTSchedule/Models/DataSet.cs create mode 100644 GUT.Schedule/GUTSchedule/Models/DefaultExportParameters.cs create mode 100644 GUT.Schedule/GUTSchedule/Models/ExportParameters.cs create mode 100644 GUT.Schedule/GUTSchedule/Models/Occupation.cs delete mode 100644 GUT.Schedule/GUTSchedule/Models/Subject.cs diff --git a/GUT.Schedule/GUTSchedule.Droid/Activities/ExportActivity.cs b/GUT.Schedule/GUTSchedule.Droid/Activities/ExportActivity.cs index a8f8419..857efc7 100644 --- a/GUT.Schedule/GUTSchedule.Droid/Activities/ExportActivity.cs +++ b/GUT.Schedule/GUTSchedule.Droid/Activities/ExportActivity.cs @@ -54,7 +54,7 @@ namespace GUTSchedule.Droid.Activities } else { - List schedule = await Parser.LoadSchedule(); + List schedule = await Parser.LoadSchedule(); schedule = schedule.FindAll(i => i.StartTime.Date >= Data.StartDate && i.StartTime.Date <= Data.EndDate); // Filtering schedule according to export range diff --git a/GUT.Schedule/GUTSchedule.Droid/Calendar.cs b/GUT.Schedule/GUTSchedule.Droid/Calendar.cs index 75f7c15..987752b 100644 --- a/GUT.Schedule/GUTSchedule.Droid/Calendar.cs +++ b/GUT.Schedule/GUTSchedule.Droid/Calendar.cs @@ -40,11 +40,11 @@ namespace GUTSchedule.Droid cursor.Close(); } - public static void Export(IEnumerable schedule) + public static void Export(IEnumerable schedule) { DataSet data = Data.DataSet; - foreach (Subject item in schedule) + foreach (Occupation item in schedule) { ContentValues eventValues = new ContentValues(); diff --git a/GUT.Schedule/GUTSchedule/Models/CabinetExportParameters.cs b/GUT.Schedule/GUTSchedule/Models/CabinetExportParameters.cs new file mode 100644 index 0000000..5c19a26 --- /dev/null +++ b/GUT.Schedule/GUTSchedule/Models/CabinetExportParameters.cs @@ -0,0 +1,8 @@ +namespace GUTSchedule.Models +{ + public class CabinetExportParameters : ExportParameters + { + public string Email { get; set; } + public string Password { get; set; } + } +} \ No newline at end of file diff --git a/GUT.Schedule/GUTSchedule/Models/CabinetSubject.cs b/GUT.Schedule/GUTSchedule/Models/CabinetSubject.cs deleted file mode 100644 index 52254bc..0000000 --- a/GUT.Schedule/GUTSchedule/Models/CabinetSubject.cs +++ /dev/null @@ -1,46 +0,0 @@ -using System; - -namespace GUTSchedule.Models -{ - public class CabinetSubject - { - public string Name { get; set; } - public string Type { get; set; } - public string Cabinet { get; set; } - public string Order { get; set; } - public DateTime StartTime { get; set; } - public DateTime EndTime { get; set; } - public string Opponent { get; set; } - public bool ProfessorSchedule { get; set; } - - public CabinetSubject(string name, string type, string cabinet, string opponent, int year, int month, int day, string schedule, bool profSchedule) - { - Name = name; - Type = type; - Cabinet = cabinet; - Opponent = opponent; - ProfessorSchedule = profSchedule; - - string[] time = schedule.Split('-'); - - StartTime = new DateTime(year, month, day, int.Parse(time[0].Split('.')[0]), int.Parse(time[0].Split('.')[1]), 0); - EndTime = new DateTime(year, month, day, int.Parse(time[1].Split('.')[0]), int.Parse(time[1].Split('.')[1]), 0); - switch (time[0]) - { - case "09.00": Order = "1"; break; - case "10.45": Order = "2"; break; - case "13.00": Order = "3"; break; - case "14.45": Order = "4"; break; - case "16.30": Order = "5"; break; - case "18.15": Order = "6"; break; - case "20.00": Order = "7"; break; - case "10.30": Order = "2"; break; //Расписание для пар по физ-ре - case "12.00": Order = "3"; break; - case "13.30": Order = "4"; break; - case "15.00": Order = "5"; break; - case "18.00": Order = "7"; break; - default: Order = ""; break; - } - } - } -} \ No newline at end of file diff --git a/GUT.Schedule/GUTSchedule/Models/Data.cs b/GUT.Schedule/GUTSchedule/Models/Data.cs deleted file mode 100644 index f435839..0000000 --- a/GUT.Schedule/GUTSchedule/Models/Data.cs +++ /dev/null @@ -1,20 +0,0 @@ -using GUTSchedule.Models; -using System; -using System.Collections.Generic; - -namespace GUTSchedule -{ - public static class Data - { - public static List<(string Id, string Name)> Faculties { get; set; } - public static List<(string Id, string Name)> Groups { get; set; } - public static int FirstWeekDay { get; set; } - public static DateTime StartDate { get; set; } = DateTime.Today; - public static DateTime EndDate { get; set; } = DateTime.Today.AddDays(7); - - /// - /// Export parameters - /// - public static DataSet DataSet { get; set; } - } -} \ No newline at end of file diff --git a/GUT.Schedule/GUTSchedule/Models/DataSet.cs b/GUT.Schedule/GUTSchedule/Models/DataSet.cs deleted file mode 100644 index 90ea0fc..0000000 --- a/GUT.Schedule/GUTSchedule/Models/DataSet.cs +++ /dev/null @@ -1,16 +0,0 @@ -using System.Net.Http; - -namespace GUTSchedule.Models -{ - public class DataSet - { - public string Calendar { get; set; } - public string Faculty { get; set; } - public int Course { get; set; } - public string Group { get; set; } - public int Reminder { get; set; } - public bool AddGroupToTitle { get; set; } - public HttpClient HttpClient { get; set; } - public bool? IsProfessor { get; set; } - } -} \ No newline at end of file diff --git a/GUT.Schedule/GUTSchedule/Models/DefaultExportParameters.cs b/GUT.Schedule/GUTSchedule/Models/DefaultExportParameters.cs new file mode 100644 index 0000000..0a83b91 --- /dev/null +++ b/GUT.Schedule/GUTSchedule/Models/DefaultExportParameters.cs @@ -0,0 +1,10 @@ +namespace GUTSchedule.Models +{ + public class DefaultExportParameters : ExportParameters + { + public string FacultyId { get; set; } + public string GroupId { get; set; } + public string Course { get; set; } + public bool Session { get; set; } + } +} \ No newline at end of file diff --git a/GUT.Schedule/GUTSchedule/Models/ExportParameters.cs b/GUT.Schedule/GUTSchedule/Models/ExportParameters.cs new file mode 100644 index 0000000..1078c5b --- /dev/null +++ b/GUT.Schedule/GUTSchedule/Models/ExportParameters.cs @@ -0,0 +1,10 @@ +using System; + +namespace GUTSchedule.Models +{ + public abstract class ExportParameters + { + public DateTime StartDate { get; set; } + public DateTime EndDate { get; set; } + } +} \ No newline at end of file diff --git a/GUT.Schedule/GUTSchedule/Models/Occupation.cs b/GUT.Schedule/GUTSchedule/Models/Occupation.cs new file mode 100644 index 0000000..9d6aaec --- /dev/null +++ b/GUT.Schedule/GUTSchedule/Models/Occupation.cs @@ -0,0 +1,17 @@ +using System; + +namespace GUTSchedule.Models +{ + public class Occupation + { + public string Name { get; set; } + public string Type { get; set; } + public string Cabinet { get; set; } + public string Order { get; set; } + public DateTime StartTime { get; set; } + public DateTime EndTime { get; set; } + public string Opponent { get; set; } + + public string Group { get; set; } + } +} \ No newline at end of file diff --git a/GUT.Schedule/GUTSchedule/Models/Subject.cs b/GUT.Schedule/GUTSchedule/Models/Subject.cs deleted file mode 100644 index c15a184..0000000 --- a/GUT.Schedule/GUTSchedule/Models/Subject.cs +++ /dev/null @@ -1,62 +0,0 @@ -using System; -using System.Collections.Generic; - -namespace GUTSchedule.Models -{ - public class Subject - { - public string Name { get; set; } - public string Type { get; set; } - public string Professor { get; set; } - public string[] Cabinets { get; set; } - public string Order { get; set; } - public DateTime StartTime { get; set; } - public DateTime EndTime { get; set; } - public string Group { get; set; } - - public static List GetSubject(string name, string type, string professor, string place, int order, string[] weeks, int weekday, string group) - { - List subjects = new List(); - string[] cabinets = place.Replace("ауд.: ", "").Replace("; Б22", "").Split(';'); - string pair = order < 10 ? order.ToString() : $"Ф{order - 81}"; - - foreach (string week in weeks) - subjects.Add(new Subject(name, type, professor, cabinets, pair, int.Parse(week), weekday, group)); - - return subjects; - } - - public Subject(string name, string type, string prof, string[] cabs, string order, int week, int weekday, string group) - { - Name = name; - Type = type; - Professor = prof; - Cabinets = cabs; - Order = order; - Group = group; - - StartTime = Extensions.GetDateFromWeeks(week, weekday); - string rawTime; - switch (order) - { - case "1": rawTime = "9:00"; break; - case "2": rawTime = "10:45"; break; - case "3": rawTime = "13:00"; break; - case "4": rawTime = "14:45"; break; - case "5": rawTime = "16:30"; break; - case "6": rawTime = "18:15"; break; - case "7": rawTime = "20:00"; break; - case "Ф1": rawTime = "9:00"; break; //Расписание для пар по физ-ре - case "Ф2": rawTime = "10:30"; break; - case "Ф3": rawTime = "12:00"; break; - case "Ф4": rawTime = "13:30"; break; - case "Ф5": rawTime = "15:00"; break; - case "Ф6": rawTime = "16:30"; break; - case "Ф7": rawTime = "18:00"; break; - default: rawTime = "9:00"; break; - } - StartTime = StartTime.Add(TimeSpan.Parse(rawTime)); - EndTime = StartTime + TimeSpan.FromMinutes(order.Contains("Ф") ? 90 : 95); - } - } -} \ No newline at end of file diff --git a/GUT.Schedule/GUTSchedule/Parser.cs b/GUT.Schedule/GUTSchedule/Parser.cs index f4b09ae..cc97a5a 100644 --- a/GUT.Schedule/GUTSchedule/Parser.cs +++ b/GUT.Schedule/GUTSchedule/Parser.cs @@ -11,108 +11,119 @@ using System.Threading.Tasks; namespace GUTSchedule { + public enum ScheduleType + { + Default = 1, + Session = 2 + } + public static class Parser { - public static async Task> LoadSchedule() + public static async Task VaildateAuthorization(string email, string password) { - List schedule = new List(); + if (string.IsNullOrWhiteSpace(email)) + throw new ArgumentNullException(nameof(email)); + if (string.IsNullOrWhiteSpace(password)) + throw new ArgumentNullException(nameof(password)); + HttpClient client = new HttpClient(); - Dictionary requestBody = new Dictionary - { - { "group_el", "0" }, - { "kurs", Data.DataSet.Course.ToString() }, - { "type_z", "1" }, - { "faculty", Data.DataSet.Faculty }, - { "group", Data.DataSet.Group }, - { "ok", "Показать" }, - { "schet", GetCurrentSemester() } - }; - HttpRequestMessage request = new HttpRequestMessage(HttpMethod.Post, "https://cabinet.sut.ru/raspisanie_all_new") - { - Content = new FormUrlEncodedContent(requestBody) - }; - request.Content.Headers.ContentType = new MediaTypeHeaderValue("application/x-www-form-urlencoded"); + + await client.GetAsync("https://cabs.itut.ru/cabinet/"); + + HttpRequestMessage request = new HttpRequestMessage(HttpMethod.Post, "https://cabs.itut.ru/cabinet/lib/autentificationok.php"); + request.SetContent( + ("users", email), + ("parole", password)); HttpResponseMessage response = await client.SendAsync(request); + string responseContent = await response.GetString(); - IHtmlDocument doc = new HtmlParser().ParseDocument(await response.Content.ReadAsStringAsync()); + if (!response.IsSuccessStatusCode) + throw new HttpRequestException($"{response.StatusCode} ({response.ReasonPhrase}): {responseContent}"); - string groupName = Data.Groups.First(i => i.Id == Data.DataSet.Group).Name; - - IHtmlCollection pairs = doc.QuerySelectorAll(".pair"); - foreach (IElement item in pairs) + if (!responseContent.StartsWith("1", StringComparison.OrdinalIgnoreCase)) { - string name, type, professor, place; - int order, weekday; - string[] weeks; + Dictionary responseQuery = new Dictionary(); + foreach (string i in responseContent.Split('&')) + responseQuery.Add(i.Split('=')[0], i.Split('=')[1]); - name = item.QuerySelector(".subect strong")?.TextContent ?? "Неизвестный предмет (см. Расписание)"; - type = item.QuerySelector(".type").TextContent.Replace("(", "").Replace(")", ""); - professor = item.QuerySelector(".teacher")?.GetAttribute("title").Replace(";", "") ?? ""; - place = item.QuerySelector(".aud")?.TextContent ?? "СПбГУТ"; - order = int.Parse(item.GetAttribute("pair")) - 1; - weeks = item.QuerySelector(".weeks").TextContent.Replace("(", "").Replace("н)", "").Replace(" ", "").Split(','); - weekday = int.Parse(item.GetAttribute("weekday")); + throw new System.Security.VerificationException(responseQuery["error"].Replace("|", "; ")); + } + } - schedule.AddRange(Subject.GetSubject(name, type, professor, place, order, weeks, weekday, groupName)); + public static async Task> GetSchedule(ExportParameters exportParameters) + { + List schedule = new List(); + + if (exportParameters is CabinetExportParameters) + { + + } + else if (exportParameters is DefaultExportParameters arg) + { + if (arg.Session) + schedule.AddRange(await GetSessionSchedule()); + else + { + int offsetDay = int.Parse(await new HttpClient().GetStringAsync("https://xfox111.net/schedule_offset.txt")); + schedule.AddRange(await GetRegularSchedule(offsetDay, arg.FacultyId, arg.Course, arg.GroupId)); + } + } + else + throw new ArgumentException("Invaild argument instance", nameof(exportParameters)); + + // Merge duplicating entries + schedule.OrderByDescending(i => i.StartTime); + for (int k = 1; k < schedule.Count; k++) + if (schedule[k - 1].StartTime == schedule[k].StartTime && + schedule[k - 1].Name == schedule[k].Name && + schedule[k - 1].Type == schedule[k].Type) + { + schedule[k - 1].Opponent += "\n" + schedule[k].Opponent; + schedule[k - 1].Cabinet += "; " + schedule[k].Cabinet; + schedule.RemoveAt(k--); + } + + return schedule.FindAll(i => i.StartTime.Date >= exportParameters.StartDate && i.StartTime.Date <= exportParameters.EndDate); + } + + public static async Task> GetFaculties(ScheduleType scheduleType) => + await GetList( + ("choice", "1"), + ("kurs", "0"), + ("type_z", ((int)scheduleType).ToString()), + ("schet", GetCurrentSemester())); + + public static async Task> GetGroups(ScheduleType scheduleType, string facultyId, string course = "0") => + await GetList( + ("choice", "1"), + ("kurs", course), + ("type_z", ((int)scheduleType).ToString()), + ("schet", GetCurrentSemester()), + ("faculty", facultyId)); + + private static async Task> GetList(params (string key, string value)[] parameters) + { + List<(string id, string name)> list = new List<(string, string)>(); + using (HttpClient client = new HttpClient()) + { + HttpRequestMessage request = new HttpRequestMessage(HttpMethod.Post, "https://cabinet.sut.ru/raspisanie_all_new.php"); + request.SetContent(parameters); + request.Content.Headers.ContentType = new MediaTypeHeaderValue("application/x-www-form-urlencoded"); + + HttpResponseMessage response = await client.SendAsync(request); + string responseBody = await response.Content.ReadAsStringAsync(); + if (string.IsNullOrWhiteSpace(responseBody)) + return list; + + foreach (string s in responseBody.Remove(responseBody.Length - 1).Split(';')) + list.Add((s.Split(',')[0], s.Split(',')[1])); } - return schedule; + return list; } - public static async Task LoadFaculties() - { - Data.Faculties = new List<(string, string)>(); - HttpClient client = new HttpClient(); - Dictionary requestBody = new Dictionary - { - { "choice", "1" }, - { "kurs", "0" }, - { "type_z", "1" }, - { "schet", GetCurrentSemester() } - }; - HttpRequestMessage request = new HttpRequestMessage(HttpMethod.Post, "https://cabinet.sut.ru/raspisanie_all_new.php") - { - Content = new FormUrlEncodedContent(requestBody) - }; - request.Content.Headers.ContentType = new MediaTypeHeaderValue("application/x-www-form-urlencoded"); - - HttpResponseMessage response = await client.SendAsync(request); - string responseBody = await response.Content.ReadAsStringAsync(); - if (string.IsNullOrWhiteSpace(responseBody)) - throw new NullReferenceException("Расписание на текущий семестр еще не объявлено"); - - foreach (string s in responseBody.Split(';')) - try { Data.Faculties.Add((s.Split(',')[0], s.Split(',')[1])); } - catch { } - } - - public static async Task LoadGroups(string facultyId, int course) - { - HttpClient client = new HttpClient(); - Dictionary requestBody = new Dictionary - { - { "choice", "1" }, - { "kurs", course.ToString() }, - { "type_z", "1" }, - { "faculty", facultyId }, - { "schet", GetCurrentSemester() } - }; - HttpRequestMessage request = new HttpRequestMessage(HttpMethod.Post, "https://cabinet.sut.ru/raspisanie_all_new.php") - { - Content = new FormUrlEncodedContent(requestBody) - }; - request.Content.Headers.ContentType = new MediaTypeHeaderValue("application/x-www-form-urlencoded"); - - HttpResponseMessage response = await client.SendAsync(request); - string responseBody = await response.Content.ReadAsStringAsync(); - Data.Groups = new List<(string, string)>(); - foreach (string s in responseBody.Split(';')) - try { Data.Groups.Add((s.Split(',')[0], s.Split(',')[1])); } - catch { } - } - - static string GetCurrentSemester() + private static string GetCurrentSemester() { DateTime now = DateTime.Today; @@ -122,7 +133,98 @@ namespace GUTSchedule return $"205.{now.Year - 2001}{now.Year - 2000}/2"; } - public static async Task> GetCabinetSchedule(HttpClient client, DateTime date, bool checkProfSchedule) + private static DateTime[] GetDatesFromWeeks(int offsetDay, int weekday, string[] weeks) + { + List dates = new List(); + foreach(string rawWeek in weeks) + { + int week = int.Parse(rawWeek); + DateTime date = new DateTime(DateTime.Today.Year, DateTime.Today.Month >= 8 ? 9 : 2, offsetDay); + + date = date.AddDays(--week * 7); + date = date.AddDays(--weekday); + + dates.Add(date); + } + + return dates.ToArray(); + } + + private static async Task> GetRegularSchedule(int offsetDay, string facultyId, string course, string groupId) + { + if (string.IsNullOrWhiteSpace(facultyId)) + throw new ArgumentNullException(nameof(facultyId)); + if (string.IsNullOrWhiteSpace(course)) + throw new ArgumentNullException(nameof(course)); + if (string.IsNullOrWhiteSpace(groupId)) + throw new ArgumentNullException(nameof(groupId)); + + List schedule = new List(); + using (HttpClient client = new HttpClient()) + { + HttpRequestMessage request = new HttpRequestMessage(HttpMethod.Post, "https://cabinet.sut.ru/raspisanie_all_new"); + request.SetContent( + ("group_el", "0"), + ("kurs", course), + ("type_z", "1"), + ("faculty", facultyId), + ("group", groupId), + ("ok", "Показать"), + ("schet", GetCurrentSemester())); + + request.Content.Headers.ContentType = new MediaTypeHeaderValue("application/x-www-form-urlencoded"); + + HttpResponseMessage response = await client.SendAsync(request); + string responseContent = await response.Content.ReadAsStringAsync(); + if (string.IsNullOrWhiteSpace(responseContent)) + return schedule; + + IHtmlDocument doc = new HtmlParser().ParseDocument(responseContent); + + string groupName = doc.QuerySelector("#group").Children.FirstOrDefault(i => i.HasAttribute("selected")).TextContent; + + IHtmlCollection pairs = doc.QuerySelectorAll(".pair"); + foreach (IElement item in pairs) + { + DateTime[] dates = GetDatesFromWeeks( + offsetDay, + int.Parse(item.GetAttribute("weekday")), + item.QuerySelector(".weeks").TextContent.Replace("(", "").Replace("н)", "").Replace(" ", "").Split(',')); + + foreach(DateTime date in dates) + { + schedule.Add(new Occupation + { + Name = item.QuerySelector(".subect").TextContent, + Type = item.QuerySelector(".type").TextContent, + Group = groupName + }); + } + string name, type, professor, place; + int order, weekday; + string[] weeks; + + name = item.QuerySelector(".subect")?.TextContent ?? "Неизвестный предмет (см. Расписание)"; + type = item.QuerySelector(".type").TextContent.Replace("(", "").Replace(")", ""); + professor = item.QuerySelector(".teacher")?.GetAttribute("title").Replace(";", "") ?? ""; + place = item.QuerySelector(".aud")?.TextContent ?? "СПбГУТ"; + order = int.Parse(item.GetAttribute("pair")) - 1; + weeks = item.QuerySelector(".weeks").TextContent.Replace("(", "").Replace("н)", "").Replace(" ", "").Split(','); + weekday = int.Parse(item.GetAttribute("weekday")); + + schedule.AddRange(Occupation.GetSubject(name, type, professor, place, order, weeks, weekday, groupName)); + } + } + + return schedule; + } + + private static async Task> GetSessionSchedule() + { + + } + + private static async Task> GetCabinetSchedule(HttpClient client, DateTime date, bool checkProfSchedule) { HttpRequestMessage request = new HttpRequestMessage(HttpMethod.Post, $"https://cabs.itut.ru/cabinet/project/cabinet/forms/{(checkProfSchedule ? "pr_" : "")}raspisanie_kalendar.php"); request.SetContent(