Top > 気まぐれ日記 > 2005-08-11

番組表取得 *

番組表がなければ始まらないので、ONTV JAPANから番組表を取ってくるコードを書く。
(デバッグしていないの参考にする人は注意...タグや文字実体参照も未対応 :(

htmlのパーサはmshtmlを使うのが簡単だけど、入っていない環境も多いので使わず。

C#だとXMLを吐いた方が何かと便利なので、XMLで。
将来的にはXMLTVに準拠したほうが何かと便利なのか?

static void Main(string[] args)
{
    int ch = -1;
    if(args.Length > 0)
    {
        try
        {
            ch = Convert.ToInt32(args[0]);
        }
        catch
        {
        }
    }
    if(ch >= 0)
    {
        string chname = "";
        string chid   = string.Format("{0:0000}", ch);
        Uri uri = new Uri(string.Format("http://www.ontvjapan.com/program/gridChannel.php?ch={0}", chid));
        string htmldata = "";
        try
        {
            WebClient wc = new WebClient();
            wc.Headers.Add("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows XP)");
            byte[] data = wc.DownloadData(uri.AbsoluteUri);
            htmldata = Encoding.GetEncoding("euc-jp").GetString(data);
        }
        catch
        {
        }

        Match titlemc = Regex.Match(htmldata, "<title.*?>.*?/(.*?)</title>"
                                    , RegexOptions.IgnoreCase | RegexOptions.Singleline);
        if(titlemc.Success) chname = titlemc.Groups[1].Value.Trim();

        XmlTextWriter writer = new XmlTextWriter(Console.Out);
        writer.Formatting = Formatting.Indented;
        writer.WriteStartDocument(true);
        writer.WriteStartElement("tv");
        writer.WriteStartElement("channel");
        writer.WriteAttributeString("id", chid);
        writer.WriteElementString("name", chname);
        writer.WriteEndElement();

        Match tdmc = Regex.Match(htmldata, "<td.*?>(.*?)</td>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
        while (tdmc.Success) 
        {
            Match tdmc2 = Regex.Match(tdmc.Groups[1].Value,
                "<span\\sclass=\"style_title\">" +
                ".*?<a.*?href=\"(.*?hsid=(\\d{15}?))\".*?title=\"(\\d{2}:\\d{2})-(\\d{2}:\\d{2})(\\s.*?)\">(.*?)</a>" +
                ".*?</span>" +
                "(.*?<span\\sclass=\"style_subtitle\">(.*?)</span>|)" +
                ".*?<span\\sclass=\"style_corner\">(.*?)</span>" +
                ".*?<a\\shref=\"(.*?)\">"
                , RegexOptions.IgnoreCase);
            if(tdmc2.Success)
            {
                string hsid;
                int      pgyear, pgmonth, pgdate, pgsthour, pgstmin, pgedhour, pgedmin;
                string   pgtitle, pgsubtitle, pgcorner, pggenre, pgurl, pgiepg;
                DateTime pgstart, pgend;

                hsid       = tdmc2.Groups[2].Value.Trim();
                pgyear     = Convert.ToInt32(hsid.Substring(0, 4));
                pgmonth    = Convert.ToInt32(hsid.Substring(4, 2));
                pgdate     = Convert.ToInt32(hsid.Substring(6, 2));
                pgsthour   = Convert.ToInt32(tdmc2.Groups[3].Value.Substring(0, 2));
                pgstmin    = Convert.ToInt32(tdmc2.Groups[3].Value.Substring(3, 2));
                pgedhour   = Convert.ToInt32(tdmc2.Groups[4].Value.Substring(0, 2));
                pgedmin    = Convert.ToInt32(tdmc2.Groups[4].Value.Substring(3, 2));
                pgstart    = Convert.ToDateTime(string.Format("{0:0000}/{1:00}/{2:00} {3:00}:{4:00}", 
                                                                pgyear, pgmonth, pgdate, pgsthour, pgstmin));
                pgend      = Convert.ToDateTime(string.Format("{0:0000}/{1:00}/{2:00} {3:00}:{4:00}", 
                                                                pgyear, pgmonth, pgdate, pgedhour, pgstmin));
                if(pgsthour < 5)
                {
                    pgstart = pgstart.AddDays(1);
                    pgend   = pgend.AddDays(1);
                }
                if(pgstart  > pgend)
                {
                    pgend   = pgend.AddDays(1);
                }
                pggenre    = tdmc2.Groups[5].Value.Trim();
                pgurl      = (new Uri(uri, tdmc2.Groups[1].Value)).AbsoluteUri;
                pgtitle    = Regex.Replace(tdmc2.Groups[6].Value, "<img.*?>", "", RegexOptions.IgnoreCase).Trim();
                pgsubtitle = Regex.Replace(tdmc2.Groups[8].Value, "<img.*?>", "", RegexOptions.IgnoreCase).Trim();
                pgcorner   = Regex.Replace(tdmc2.Groups[9].Value, "<img.*?>", "", RegexOptions.IgnoreCase).Trim();
                pgiepg     = (new Uri(uri, tdmc2.Groups[10].Value)).AbsoluteUri;

                writer.WriteStartElement("programme");
                writer.WriteAttributeString("channel", chid);
                writer.WriteAttributeString("start", pgstart.ToString("yyyy-MM-ddTHH:mm:00Z"));
                writer.WriteElementString("year",  pgstart.ToString("yyyy"));
                writer.WriteElementString("month", pgstart.ToString("MM"));
                writer.WriteElementString("date",  pgstart.ToString("dd"));
                writer.WriteElementString("start", pgstart.ToString("HH:mm"));
                writer.WriteElementString("end",   pgend.ToString("HH:mm"));
                writer.WriteElementString("genre", pggenre);
                writer.WriteElementString("program-title",    pgtitle);
                writer.WriteElementString("program-subtitle", pgsubtitle);
                writer.WriteElementString("program-corner",   pgcorner);
                writer.WriteElementString("iepg",  pgiepg);
                writer.WriteEndElement();
            }
            tdmc = tdmc.NextMatch();
        }
        writer.WriteEndElement();
        writer.WriteEndDocument();
        if(writer != null) writer.Close();
    }
}

意味もなくIEに偽装。

C# *

実はまだデスクトップにVS2003入れてないのでノートで作成...重いよ :(