(* Simple Time Series Conversational Engine Mathematica package Copyright (C) 2014 Anton Antonov This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Written by Anton Antonov, antononcube@gmail.com, Windermere, Florida, USA. *) (* In order to run this package the packages FunctionalParsers.m and QuantileRegression.m has to be loaded. These packages are provided by the project MathematicaForPrediction at GitHub. *) Import["https://raw.githubusercontent.com/antononcube/MathematicaForPrediction/master/FunctionalParsers.m"] Import["https://raw.githubusercontent.com/antononcube/MathematicaForPrediction/master/QuantileRegression.m"] (* ::Section:: *) (*Grammar*) timeSeriesCode = timeSeriesEBNFCode = " = 'find' | 'compute' | 'calculate' | 'show' ; = ( ( 'temperature' | 'pressure' | 'wind' , 'speed' ) \[LeftTriangle] ( 'of' | 'for' ) ) , <@ TSWeatherSpec[#]& ; = [ 'the' ] \[RightTriangle] , ( 'temperature' | 'pressure' | 'wind' , 'speed' ) <@ TSWeatherSpec[Reverse[#]]& ; = | ; = '_LetterString' | '_LetterString' , [ ',' ] , '_LetterString' , [ [ ',' ] , '_LetterString' ] <@ TSCitySpec[Flatten[{#}]]& ; = '_String' <@ TSCompanySpec ; = ( [ 'the' ] \[RightTriangle] ( [ 'stock' ] \[RightTriangle] 'price' | 'trade' \[RightTriangle] 'volume' ) \[LeftTriangle] [ 'of' | 'for' ] ) , <@ TSFinancialData ; = [ 'the' ] \[RightTriangle] , ( [ 'stock' ] \[RightTriangle] 'price' | 'trade' \[RightTriangle] 'volume' ) <@ TSFinancialData[Reverse[#]]& ; = | ; = [ 'the' ] \[RightTriangle] 'last' , [ 'loaded' ] , ( 'data' | 'file' ) <@ TSPastData[Flatten[{#}]]& ; = | | ; = [ ( '1' | 'one' | 'a' ) ] , 'regression' , 'quantile' | 'quantile' , 'regression' <@ TSBSplineQRegression[1]& ; = [ 'Range[1,40]' ] \[LeftTriangle] ( 'regression' , 'quantiles' ) <@ TSBSplineQRegression[#]& ; = | ; = [ 'the' ] \[RightTriangle] [ ( 'top' | 'bottom' | 'largest' | 'smallest' | 'all' ) ] , 'outliers' <@ TSOutliers[Flatten[#]]& ; = ( 'least' , 'squares' , [ 'fit' ] , [ 'with' | 'of' ] ) \[RightTriangle] '_String' <@ TSLeastSquaresFit[#]& ; = | | ; = [ ] \[RightTriangle] <@ TSOperateCommand[#]& ; = , ( 'for' | 'on' | 'in' | 'over' | 'of' ) \[RightTriangle] <@ TSOperateOnDataCommand[#]& ; = ( 'load' , [ 'data' ] , 'file' ) \[RightTriangle] ( '_String' ) <@ TSLoadFile[#]& ; = ( 'load' , [ 'the' ] , [ 'data' ] ) \[RightTriangle] <@ TSLoadData[#]& ; = 'start' , 'over' | 'clear' <@ TSStartOver[Flatten[{#}]]& ; = 'clear' , ( 'plots' | 'plots' | 'graphics' ) <@ TSClearGraphics ; = 'what' , ( ( 'operations' , 'are' | [ 'are' ] , [ 'the' ] , 'operations' ) , [ 'implemented' | 'in' ] ) | [ 'what' ] , ( 'operation' | 'operations' ) , ( 'I' , 'can' | 'to' ) , ( 'use' | 'do' ) <@ TSWhatOperations[Flatten[{#}]]& ; = 'help' | [ 'all' ] , 'commands' <@ TSHelp[Flatten[{#}]]& ; = | ; = [ 'plot' | 'plots' ] , 'joined' | 'Joined' , '->' , 'True' | 'Joined->True' <@ TSPlotJoined ; = [ 'plot' | 'plots' ] , ( 'not' | 'non' ) , 'joined' | 'Joined' , '->' , 'False' | 'Joined->False' <@ TSPlotNotJoined ; = 'plot' , 'data' <@ TSPlotData ; = | | ; = | | | | | | | ; "; (* ::Section:: *) (*Parser generation*) tokens = ToTokens[timeSeriesCode]; res = GenerateParsersFromEBNF[tokens]; Print["Leaf count of the parsers genereated for the EBNF grammar: ", res // LeafCount] (* ::Section:: *) (*Interpreters*) Clear["TS*"] TSLoadFileInterpreter[parsed_String] := Block[{t, cn, mess, fname = parsed}, If[StringMatchQ[fname, ("'" | "\"") ~~ __ ~~ ("'" | "\"")], fname = StringTake[fname, {2, -2}] ]; (* parsed is assumed to be a file name of say a CSV file, each line of which is a pair of numbers *) t = ReadList[fname, {Word, Word}, WordSeparators -> {",", " ", "\t", "\n"}]; If[MatrixQ[t], If[!NumberQ[t[[1, 1]]] && StringMatchQ[t[[1, 1]], WordCharacter..] && !NumberQ[t[[1, 2]]] && StringMatchQ[t[[1, 2]], WordCharacter..], cn = First[t];t = Rest[t], cn = ToString /@ Range[1, Length[t[[1]]]]; ]; mess = "DataLoaded"; TSMESSAGE = "Data file loaded."; TSDATA = ToExpression[t]; , mess = "DataNotLoaded"; TSMESSAGE = "Data file NOT loaded."; cn = {};TSDATA = {} ]; mess ]; Clear[TSMakeProperty]; TSMakeProperty[s : {_String..}] := StringJoin[Map[TSMakeProperty, s]]; TSMakeProperty[s_String] := If[StringLength[s] > 0, ToUpperCase[StringTake[s, 1]] <> ToLowerCase[StringTake[s, {2, -1}]], s]; TSLoadDataInterpreter[parsed_] := Block[{citySpec, companySpec, prop, pos, t, messPart = ""}, (* parsed is assumed to be weather data specification *) prop = TSMakeProperty[parsed[[1, 1]]]; Which[ MemberQ[{"Price", "Volume"}, prop], companySpec = Cases[parsed, TSCompanySpec[x__] :> x, \[Infinity]][[1]]; If[Length[companySpec] == 1, companySpec = First[companySpec]];t = FinancialData[companySpec, prop, {DatePlus[Take[Date[], 3], {-365, "Day"}], Take[Date[], 3]}]; messPart = "Financial", True, citySpec = Cases[parsed, TSCitySpec[x__] :> x, \[Infinity]][[1]]; If[Length[citySpec] == 1, citySpec = First[citySpec]]; t = WeatherData[citySpec, prop, {DatePlus[Take[Date[], 3], {-60, "Day"}], Take[Date[], 3]}]; t = t["Path"] /. Quantity[x_, _] :> x; messPart = "Weather" ]; pos = Position[t[[All, 2]], Missing["NotAvailable"]]; If[Length[pos] > 0, t[[All, 2]] = Fold[ReplacePart[#1, #2 -> Mean[#1[[{#2 - 1, #2 + 1}]]]]&, t[[All, 2]], Flatten[pos]] ]; t[[All, 1]] = AbsoluteTime /@ t[[All, 1]]; If[MatrixQ[t], TSDATA = t; TSGRAPHICS = None; TSMESSAGE = messPart <> " data loaded."; "DataLoaded", (*ELSE*) TSMESSAGE = messPart <> " data NOT loaded."; "DataNotLoaded" ] ]; Clear[TSFindOutliers] TSFindOutliers[type_String, dataArg_?MatrixQ, nsplines_Integer : 5] := Block[{tfunc = None, bfunc = None, outliers = {}, data}, data = N@Select[dataArg, VectorQ[#, NumberQ]&]; If[ type == "top" || type == "largest" || type == "all", tfunc = QuantileRegression[data, nsplines, {.98}][[1]]; outliers = Join[outliers, Select[data, tfunc[#[[1]]] <= #[[2]]&]] ]; If[ type == "bottom" || type == "smallest" || type == "all", bfunc = QuantileRegression[data, nsplines, {.02}][[1]]; outliers = Join[outliers, Select[data, bfunc[#[[1]]] >= #[[2]]&]] ]; {outliers, tfunc, bfunc} ]; Clear[GetVariableName] GetVariableName[fexpr_] := Block[{}, ToExpression[Flatten[Cases[fexpr, s_Symbol /; !NumericQ[s], \[Infinity]]][[1]]] ]; TSOperateOnDataCommandInterpreter[parsed_] := Block[{op, dataSpec, data, n, res, mess = "ok", op1}, op = Cases[parsed, TSOperateCommand[x___] :> x, \[Infinity]][[1]]; Print["TSOperateOnDataCommandInterpreter:: parsed=", parsed]; Print["TSOperateOnDataCommandInterpreter:: op=", op]; dataSpec = parsed[[2]]; Which[ MatchQ[dataSpec, TSPastData[___]], data = TSDATA, MatchQ[dataSpec, TSWeatherSpec[___]], TSLoadDataInterpreter[dataSpec]; data = TSDATA, True, TSRESULT = {}; Return["UnknownDataSpec"] ]; If[!MatrixQ[data], TSRESULT = {}; TSMESSAGE = "The data should be a numerical matrix."; Return["WrongData"] ]; data = N@Select[data, VectorQ[#, NumberQ]&]; If[Length[data] == 0 || !MatrixQ[data, NumberQ], TSRESULT = {}; TSMESSAGE = "The data should be a numerical matrix."; Return["WrongData"] ]; Which[ MatchQ[op, TSBSplineQRegression[___]], n = op[[1]] /. {x_ /; NumberQ[ToExpression[x]] :> ToExpression[x], _ -> 5}; res = QuantileRegression[data, 10, Rescale[Range[0, n + 1], {0, n + 1}, {0, 1}][[2 ;; -2]], InterpolationOrder -> 2, Method -> {LinearProgramming, Method -> "CLP"}]; TSMESSAGE = "Found regression quantiles.", MatchQ[op, TSLeastSquaresFit[___]], op1 = ToExpression[StringReplace[op[[1]], {"table[" -> "Table[", "log[" -> "Log[", "sin[" -> "Sin[", "cos[" -> "Cos[", "sqrt[" -> "Sqrt["}]]; res = Fit[data, op1, GetVariableName[op1]]; If[Head[res] === Fit, res = None; TSMESSAGE = "Did NOT fit least squares.", TSMESSAGE = "Fitted least squares." ], MatchQ[op, TSOutliers[___]], n = DeleteCases[op[[1]], "outliers"]; If[Length[n] == 0, n = {"all"}]; res = TSFindOutliers[n[[1]], data, 7]; TSMESSAGE = "Found outliers.", True, TSMESSAGE = "Not clear what to do for " <> ToString[op]; Return["UnknownOperation"] ]; TSRESULT = Head[op][res]; "OperationOnDataCompleted" ]; TSOperateCommandInterpreter[parsed_] := Block[{}, Print["TSOperateCommandInterpreter::", parsed]; TSOperateOnDataCommandInterpreter[{TSOperateCommand[parsed], TSPastData[{"last", "loaded", "data"}]}] ]; TSStartOverInterpreter[parsed_] := Block[{}, TSDATA = {}; TSRESULT = {}; TSGRAPHICS = None; TSPLOTJOINED = False; TSMESSAGE = "Cleaned data and graphics."; "StartOver" ]; TSWhatOperationsInerpreter[parsed_] := Block[{}, TSRESULT = "The operations are:\n(all|top|bottom|largest|smallest) outliers,\nfind quantile(s),\nfind least squares fit ."; TSMESSAGE = "See note."; "Note" ]; TSHelpInterpreter[parsed_] := Block[{}, TSWhatOperationsInerpreter[parsed]; TSRESULT = "The data commands are:\nload data file ,\nload data (temperature|pressure|wind speed) (of|for) \nload data (company price|trade volume) (of|for) \nplot joined|plot not joined\nplot data\nclear graphics\nstart over|clear." <> "\n" <> TSRESULT; TSMESSAGE = "See note."; "Note" ]; TSClearGraphicsInterpreter[parsed_] := Block[{}, TSGRAPHICS = None; TSMESSAGE = "Graphics cleared."; "None" ]; TSPlotJoinedInterpreter[parsed_] := Block[{}, TSPLOTJOINED = True; TSGRAPHICS = None; TSMESSAGE = "Using joined plots from now on."; "None" ]; TSPlotNotJoinedInterpreter[parsed_] := Block[{}, TSPLOTJOINED = False; TSGRAPHICS = None; TSMESSAGE = "Using non-joined plots from now on."; "None" ]; TSPlotDataInterpreter[parsed_] := Block[{}, TSGRAPHICS = None; TSRESULT = None; TSMESSAGE = "Plotted data."; "OperationOnDataCompleted" ]; (* ::Section:: *) (*Visualize results*) Clear[VisualizeResults] VisualizeResults[contextDataRules_, isize_ : 800] := Block[{data = TSDATA /. (contextDataRules), ds, outliers, tfunc, bfunc, gr1, gr2, gr3, opts}, data = N@Select[data, VectorQ[#, NumberQ]&]; gr1 = DateListPlot[data, Joined -> TrueQ[TSPLOTJOINED /. contextDataRules]]; Which[ Head[TSRESULT /. (contextDataRules)] === TSBSplineQRegression , Block[{x}, gr2 = Plot[Through[First[TSRESULT /. (contextDataRules)][x]], Evaluate[{x, Min[data[[All, 1]]], Max[data[[All, 1]]]}], PlotStyle -> Darker[Red]] ] , Head[TSRESULT /. (contextDataRules)] === TSLeastSquaresFit, gr2 = Plot[First[TSRESULT /. (contextDataRules)], Evaluate[{GetVariableName[First[TSRESULT /. (contextDataRules)]], Min[data[[All, 1]]], Max[data[[All, 1]]]}], PlotStyle -> Green], Head[TSRESULT /. (contextDataRules)] === TSOutliers, {outliers, tfunc, bfunc} = (TSRESULT /. (contextDataRules))[[1]]; ds = (TSDATA /. contextDataRules)[[All, 1]]; opts = {Joined -> {False, True, True}, PlotStyle -> {{PointSize[0.007], Red}, {Thickness[0.001], Lighter[Blue]}, {Thickness[0.001], Lighter[Blue]}}, PerformanceGoal -> "Speed"}; gr2 = Which[ Head[tfunc] === Symbol, DateListPlot[{outliers, Transpose[{ds, bfunc /@ ds}]}, opts], Head[bfunc] === Symbol, DateListPlot[{outliers, Transpose[{ds, tfunc /@ ds}]}, opts], True, DateListPlot[{outliers, Transpose[{ds, tfunc /@ ds}], Transpose[{ds, bfunc /@ ds}]}, opts] ]; outliers = Tooltip[#, DateString[#[[1]], {"Year", ".", "Month", ".", "Day"}] <> ", " <> ToString[#[[2]]]]& /@ outliers; gr3 = ListPlot[outliers, PlotStyle -> {Red, PointSize[0.007]}] ]; If[Head[TSGRAPHICS /. contextDataRules] === Graphics, gr1 = TSGRAPHICS /. contextDataRules; ]; Print["VisualizeResults :", Head[TSGRAPHICS /. contextDataRules]]; Which[ Head[gr2] === Symbol, Show[{gr1}, PlotRange -> All, ImageSize -> isize], Head[gr3] === Symbol, Show[{gr1, gr2}, PlotRange -> All, ImageSize -> isize], True, Show[{gr1, gr2, gr3}, PlotRange -> All, ImageSize -> isize] ] ]; (* ::Section:: *) (*Interpretation Interface*) states = Sort@{"WaitingForARequest", "DataAvailable", "OperationOnDataCompleted", "WaitingForADataSpec", "WaitingForAnOpSpec"} messages = Sort@{"StarOver", "WrongData", "UnknownDataSpec", "UnknownOperation", "DataLoaded", "DataNotLoaded", "None", "Note", "Help"} contextFunctionRules = {TSHelp -> TSHelpInterpreter, TSWhatOperations -> TSWhatOperationsInerpreter, TSStartOver -> TSStartOverInterpreter, TSClearGraphics -> TSClearGraphicsInterpreter, TSLoadFile -> TSLoadFileInterpreter, TSLoadData -> TSLoadDataInterpreter, TSOperateOnDataCommand -> TSOperateOnDataCommandInterpreter, TSPlotJoined -> TSPlotJoinedInterpreter, TSPlotNotJoined -> TSPlotNotJoinedInterpreter, TSPlotData -> TSPlotDataInterpreter}; (* ::Text:: *) (*Should not be included : TSOperateCommand->TSOperateCommandInterpreter*) contextDataRules = {TSDATA -> {}, TSRESULT -> {}, TSMESSAGE -> "", TSGRAPHICS -> None, TSPLOTJOINED -> False}; FirstLetterToLowerCase[s_String] := If[StringLength[s] > 0, ToLowerCase[StringTake[s, 1]] <> StringTake[s, {2, -1}], s]; {viWidth} = {650}; {textOffset, rx, ry} = {0.01, 0.01, 0.004}; Magnify[ DynamicModule[{input = "", fsmState = "WaitingForARequest", fsmMessage = {}, fsmContext = {}, speechMessage = "", visualMessage = ""}, (* Clear the data symbols and assign to context *) Clear[TSDATA, TSRESULT, TSMESSAGE, TSGRAPHICS]; fsmContext = {"data" -> {TSDATA -> {}, TSRESULT -> {}, TSMESSAGE -> "", TSGRAPHICS -> None, TSPLOTJOINED -> False}, "functions" -> contextFunctionRules}; ColumnForm[{ Panel[ Row[ {Style["speech input : ", Blue], InputField[Dynamic[input], String, ImageSize -> viWidth - 50] }] , ImageSize -> {viWidth, 60}], Dynamic[ t = ParseShortest[pTSCOMMAND][ParseToTokens[FirstLetterToLowerCase[input]]]; Print["t=", t]; If[StringLength[input] > 0 && ListQ[t] && (Length[t] == 0 || ListQ[t[[1]]] && Length[t[[1, 1]]] > 0), fsmContext = {"data" -> Append[DeleteCases[fsmContext[[1, 2]], Rule[TSMESSAGE, ___]], Rule[TSMESSAGE, "Unknown input."]], "functions" -> contextFunctionRules}, (*ELSE*) t = InterpretWithContext[ParseShortest[pTSCOMMAND][ParseToTokens[FirstLetterToLowerCase[input]]], fsmContext]; If[ListQ[t] && Length[t] > 0 && ListQ[t[[1]]] && Length[t[[1]]] > 0 && TrueQ[Head[t[[1, 1, 2]]] === TSOperateCommand], t = InterpretWithContext[ParseShortest[pTSCOMMAND][ParseToTokens[FirstLetterToLowerCase[input] <> " for last loaded data"]], fsmContext] ] ]; PRINT["ListQ[t]&&Length[t]\[Equal]2 : ", ListQ[t] && Length[t] == 2]; If[ListQ[t] && Length[t] == 2, fsmMessage = If[Length[#] > 0, #[[1]], None]&[Flatten[t[[1]]]]; fsmContext = {"data" -> t[[2]], "functions" -> contextFunctionRules}; ]; PRINT[fsmMessage]; speechMessage = TSMESSAGE /. ("data" /. fsmContext); PRINT[speechMessage]; (*Print[ColumnForm/@fsmContext];*) ColumnForm[ {Panel[ Column[ {Style["spoken", Blue], If[StringQ[speechMessage], Speak[speechMessage];speechMessage, Null] }], ImageSize -> {viWidth, 50}], Panel[ Column[{Style["shown", Blue], Which[ TrueQ[fsmMessage == "OperationOnDataCompleted"], visualMessage = VisualizeResults["data" /. fsmContext, 600]; fsmContext = {"data" -> Append[DeleteCases[fsmContext[[1, 2]], Rule[TSGRAPHICS, ___]], Rule[TSGRAPHICS, visualMessage]], "functions" -> contextFunctionRules}; visualMessage, TrueQ[fsmMessage == "Note"], visualMessage = TSRESULT /. ("data" /. fsmContext), True, None ] }], ImageSize -> {viWidth, 450}]}], TrackedSymbols :> {input}] }] ], 1.4]