<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.0 20120330//EN" "JATS-archivearticle1.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta />
    <article-meta>
      <fpage>22</fpage>
      <lpage>29</lpage>
    </article-meta>
  </front>
  <body>
    <sec id="sec-1">
      <title>-</title>
      <p>© . .
© . .</p>
      <p>. . .
!&amp;.
$
!&amp;
'
: « &amp;
% !&amp;</p>
      <p>#!,
! % $
+ )
$</p>
      <p>% !&amp;
» (
#!,</p>
      <p>+
. 1).</p>
      <p>#,</p>
      <p>XVII
DAMDID/RCDL’2015 «
$!
&amp;
!
#,
'
», «
!&amp;</p>
      <p>«
% )
%
!: $
"
$
$
' !&amp;
&amp;
!&amp;»,</p>
      <p>#
#
$
!&amp;
), . .
» «
+
$!
»,
13-16
$
"
$
CRL
)
#
[15].</p>
      <p>
        ) !
Drools [
        <xref ref-type="bibr" rid="ref17">8</xref>
        ]
1 $
(Drools Rule Language) [
        <xref ref-type="bibr" rid="ref17">8</xref>
        ]
&amp; Drools Expert [
        <xref ref-type="bibr" rid="ref17">8</xref>
        ].
      </p>
      <p>+
!
*
'</p>
      <p>%
$ .
!
# $ % !&amp;</p>
      <p>% !) $!
3 CRL (Cells Rule Language).</p>
      <p>!) $!</p>
      <p>CRL
#,
- &amp;
!&amp;.</p>
      <p>!&amp;.
' !&amp;
# ,
!&amp;
&amp;
!&amp;.</p>
      <p>)
%
6
$!
' !</p>
      <p>CRL
$
#
),
!
!
)
# )
# [ 1, 5, 6, 14, 15].</p>
      <p>$
' !&amp;
!
%
$
!
$
#</p>
      <p>%$
+ &amp;
)
%
&amp;
!)
'
#
$
'</p>
      <p>#.
! %
!&amp;
!</p>
      <p>:
$!</p>
      <p>.</p>
      <p>DRL</p>
      <p>%
$
"
$
%$
#.
3.
#
!:</p>
      <p>
        ! .
! [
        <xref ref-type="bibr" rid="ref12">3, 16, 17</xref>
        ]
) $
      </p>
      <p>. (
!
2
$
#
, &amp;
.
!
#
# .</p>
      <p>+
"
% CRL
!&amp;
4.</p>
      <p>!
!
$
!
!&amp;. 6</p>
      <p>!)
TANGO [16]</p>
      <p>%$
!) " )
!&amp;
!
!&amp; $</p>
      <p>!
,
6
»
# . 6
%
! $
#!,
%
)
#!,</p>
      <p>"
».
&amp;
!
%
&amp;
!
(
#
' )
"
.
$ ' !
.
&amp; !&amp;
) ' )
' ) ,</p>
      <p>!
!
$
&amp;
&amp;
%
!
!
#!,
!:</p>
      <p>,
# ),
)
).</p>
      <p>,
!
! %</p>
      <p>:
$
' !&amp;
#!.
'
$
1. !: cl 3 !) cr 3 !) # ,
rt 3 &amp; rb 3 ;
2. ! ) style, ! $
!&amp;: font 3 * " , horzAlignment 3
$ % vertAlignment 3
% ! , # fgColor 3
bgColor 3 $ , !
# ' ! &amp; # (leftBorder 3 ) ,
topBorder 3 &amp; ) , rightBorder 3
&amp; ) , bottomBorder 3 &amp; ) );
3. text 3 , indent 3
( ' ' );
4. ' ! !: entries 3 &amp; )
labels 3 , !&amp; $ )
' ) .</p>
      <p>$ %$ % &amp;
' !&amp; !&amp; "</p>
      <p>, $ + )
«JSR 94: Java Rule Engine API» [10].
!) " - $
.</p>
      <p>(label) 3</p>
      <p>'
$ ' ;
cell 3
(entry),</p>
      <p>$ ' )
): value 3</p>
      <p>' )
#
,
'
!</p>
      <p>$
"
"
(«
&amp;
&amp;
$ '
4 CRL</p>
      <p>CRL
' %
!
+
'
' !
# "
4.1 '
?
$
&amp;
"
)
,
$
!
#
)
# ,
http://cells.icc.ru/pub/crl.</p>
      <p>#
$! ,
$ CRL
9 # . )
cell $cell : constraints
entry $entry : constraints
label $label : constraints
category $category : constraints</p>
      <p>!&amp; $ %
# &amp; &amp; CRL ,
! ! ! * " ,</p>
      <p>%
! %</p>
      <p>$
$
,
‘$’.</p>
      <p>$
!</p>
      <p>'
*
% ) '</p>
      <p>'
'</p>
      <p>MVEL.
+% $
),
&amp;
!
!
!
!&amp;
'
) ‘ ,’,
a a b b
c d c d
e g 1 1 1 2
e h 3 4 4 5
f g 6 4 4 5
! ( )
$</p>
      <p>! ( ) ' ) .
,</p>
      <p>% ),
% ' )</p>
      <p>!
# '
, '
«*</p>
      <p>,
$
&amp;
- &amp;</p>
      <p>$
&amp; !</p>
      <p>' .
!&amp; . 2, ' )</p>
      <p>' :
(‘a’,...,‘d’)</p>
      <p>&amp;
&amp;
,
!
!
» ,
! %</p>
      <p>:
!
.
)
» «
# $
(‘1’,...,‘6’), $
(‘e’,...,‘h’),
%
,
$
$
»,
$</p>
      <p>) ' )</p>
      <p>CRL
( , +
' ) $c, ) !</p>
      <p>! $corner,
@RowHeading, ! +
$ :
when
cell $corner : cl==1, rt==1, blank
3
% - ) ' ) ,
&amp;
!&amp;
' &amp;,</p>
      <p>"
!
!
,
!
&amp;
!
«
&amp;
% &amp; $
' &amp;,
!
.
!'
#
%
+</p>
      <p>C1 C2 C3
a = 1 b = 2 c = 3
d = 4 e = 5 f = 6
g = 7 h = 8 i = 9
. 3. J' )</p>
      <p>« '=$ ' »,
«$ ' » &amp;
# $
! 3 ( ).</p>
      <p>)
«</p>
      <p>'»
3 ( );
:
'
$
«
«
,
'=$ '
'»
!&amp;</p>
      <p>#
) &amp;</p>
      <p>«
!
$ '
»
%
)
# &amp;
&amp;
. 3, ,</p>
      <p>,
.</p>
      <p>),
&amp;
».</p>
      <p>'»
&amp; '
«$ ' $
when
cell $cell : rt&gt;1, $t : text
a
c 1
d 3
b
2
4
'</p>
      <p>),
$!' )</p>
      <p>.
' ) ,
' )
«$ '</p>
      <p>» 3
$
) $ '
:
&amp;
' )
$
' )
!
$
#</p>
      <p>F
CRL
I
!
$cell,</p>
      <p>"
new entry $cell
new label $cell
$ % , $</p>
      <p>:
&amp;
%
!)
$
, &amp;</p>
      <p>,
) '</p>
      <p>&amp;
!
,
$
$ !
!&amp;
«\d+» (
%$
$</p>
      <p>:
+
%
&amp; :
when cell $c : text matches "\\d+"
then new entry $c</p>
      <p>cell $c : rt&gt;$corner.rb, cr&lt;=$corner.cr
then mark @RowHeading -&gt; $c
( #
' $ &amp;</p>
      <p>) ' )
+ ) " $ '
$ 3 !
entry_value label_value
new entry entry_value -&gt; $cell
new label label_value -&gt; $cell
' , $ '
. 1
)
$cell.</p>
      <p>&amp;
!
when
cell $c : cl==1 || rt==1, !blank,</p>
      <p>$t : text
then
new label extract($t, "[ - ]+") -&gt; $c
new label extract($t, "[a-z]+") -&gt; $c</p>
      <p>CRL " # extract $
$t ' ) $c ' %,
,
,</p>
      <p>#!. V
A
a1 a2</p>
      <p>$!
' , $
category_name,
$label.</p>
      <p>a b
c
,
,
! %
% )
)
,
!
#</p>
      <p>.
! %
!
CRL</p>
      <p>)
%
#
:
when
cell $corner : cl==1, rt==1, $t : text
label $label : cell.cl &gt; $corner.cr
then set category token($t, 0) -&gt; $label
U %, CRL " # token $ +
$ $t, $ $ ' ) &amp;</p>
      <p>$corner. $ % %$
'
$label.</p>
      <p>,</p>
      <p>#
,
'
&amp;
&amp;
)
$</p>
      <p>% &amp;
* )
,
,
,
when
cell $c1 : cl==1, $l1 : label
cell $c2 : cl==1, rt&gt;$c1.rt,</p>
      <p>indent==$c1.indent+4, $l2 : label
no cells : cl==1, rt&gt;$c1.rt,</p>
      <p>rt&lt;$c2.rt, indent==$c1.indent
then set parent label $l1 -&gt; $l2
,
*
!
)</p>
      <p>.
%, '
! &amp;
# &amp;,
%
' ,
'</p>
      <p>.</p>
      <p>+ &amp;
!&amp;, '
!)
,</p>
      <p>,
,
)
$
(</p>
      <p>.
- )</p>
      <p>).
$ !
*</p>
      <p>.
) $</p>
      <p>'
#
,
!
,
!,
$
&amp;
,
,
!
,
' )
' ,
, '
!
!,
*
$
&gt;
$
,</p>
      <p>,
add label label_value
from $category -&gt; $entry
' , !</p>
      <p>$ '
$category. 6
$ &gt;</p>
      <p>$
$entry.</p>
      <p>add label label_value
from category_name -&gt; $entry
!</p>
      <p>category_name. V
%$
label_value.</p>
      <p>,
$
$
F
'
$
#
&amp;
&amp;
F</p>
      <p>$
,
,</p>
      <p>,
,
'
!&amp; $
!&amp;</p>
      <p>#
$
%
#
when
cell@RowHeading $c1 : $l1 : label
cell@RowHeading $c2 : cl==$c1.cl,</p>
      <p>cr==$c1.cr, $l2 : label
then group $l1 -&gt; $l2
$ % $ ! !
+</p>
      <p>! ,
. 2: {‘e’, ‘f’} {‘g’, ‘h’}.</p>
      <p>$ % ,
$
label_value</p>
      <p>)
)
$!</p>
      <p>. U
% &amp;
! $ '
%$
!</p>
      <p>!
), !</p>
      <p>+ )
'
$ '
#!</p>
      <p>'
) “ tons”
$
'
%
)
#
"
:
1
3
c
d
* u
** v
. 5. (
! %
(‘ u’ ‘v’) 3 ( );
from "Footnote" -&gt; $e
‘v’
,
$
,
,
#,
$
%
)
#,
(\*+) $
%
$ %
$ '
#</p>
      <p>,
$
&amp;
5 +
'</p>
      <p>(
&amp;
$
&amp;
$
$
$
$!
J$!
CRL
CRL
%$
"</p>
      <p>#
+ )
F % )*
, '
!
$</p>
      <p>!&amp;. J$!
"
$</p>
      <p>$
#
!</p>
      <p>$!
% !&amp;
$!
!
$
&amp;
#</p>
      <p>%
# ),
!&amp;
"
8
"
)
(
ETL</p>
      <p># &amp;,
# &amp;.</p>
      <p># )
!&amp;,
$
)
The paper discusses issues of the transformation of
information from arbitrary tables presented in
spreadsheets into the structured form. These tables
contain no relationships describing their semantics.
However, only after the semantic relationships are
recovered, the information from an arbitrary table can
be loaded into a database by standard ETL tools. We
suggest the CRL rule language for table analysis and
interpretation. It allows developing a simple program to
recover the missing semantic relationships. Particular
sets of the rules can be developed for different types of
tables to provide the transformation step in unstructured
tabular data integration.</p>
    </sec>
  </body>
  <back>
    <ref-list>
      <ref id="ref1">
        <mixed-citation>
          <source>Table-processing paradigms: a research survey // Int. J. on Document Analysis and Recognition</source>
          .
          <year>2006</year>
          . Vol.
          <volume>8</volume>
          , No 2. pp.
          <fpage>66</fpage>
          -
          <lpage>86</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref2">
        <mixed-citation>
          <source>of the 22nd Int. Conf. on Pattern Recognition.</source>
        </mixed-citation>
      </ref>
      <ref id="ref3">
        <mixed-citation>
          <string-name>
            <surname>Stockholm</surname>
          </string-name>
          , Sweden.
          <year>2014</year>
          .
        </mixed-citation>
      </ref>
      <ref id="ref4">
        <mixed-citation>
          <string-name>
            <given-names>Embley D.W.</given-names>
            ,
            <surname>Tao</surname>
          </string-name>
          <string-name>
            <given-names>C.</given-names>
            ,
            <surname>Liddle</surname>
          </string-name>
          <string-name>
            <surname>S.W. Automating</surname>
          </string-name>
          <article-title>the Extraction of Data from HTML Tables with Unknown Structure // Data</article-title>
          &amp; Knowledge
          <string-name>
            <surname>Engineering</surname>
          </string-name>
          .
          <year>2005</year>
          . Vol.
          <volume>54</volume>
          , No 1. pp.
          <fpage>3</fpage>
          -
          <lpage>28</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref5">
        <mixed-citation>
          <string-name>
            <given-names>Gatterbauer W.</given-names>
            ,
            <surname>Bohunsky</surname>
          </string-name>
          <string-name>
            <given-names>P.</given-names>
            ,
            <surname>Herzog</surname>
          </string-name>
          <string-name>
            <given-names>M.</given-names>
            ,
            <surname>Krüpl</surname>
          </string-name>
          <string-name>
            <given-names>B.</given-names>
            ,
            <surname>Pollak</surname>
          </string-name>
          <string-name>
            <given-names>B.</given-names>
            <surname>Towards</surname>
          </string-name>
          Domain-Independent
          <source>Information Extraction from Web Tables // In Proc. of the 16th Int. Conf. on World Wide Web</source>
          . New York, US.
          <year>2007</year>
          . pp.
          <fpage>71</fpage>
          -
          <lpage>80</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref6">
        <mixed-citation>
          2001. pp.
          <fpage>27</fpage>
          -
          <lpage>30</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref7">
        <mixed-citation>
          <source>JavaBeans Specification</source>
          <volume>1</volume>
          .01 Final Release, http://www.oracle.com/technetwork/java/javase/ tech/spec-136004.html JBoss Drools, http://www.drools.
          <source>org JSR</source>
          <volume>94</volume>
          :
          <string-name>
            <surname>Java Rule Engine</surname>
            <given-names>API</given-names>
          </string-name>
          , https://jcp.org/en/jsr/detail?id=94 Kim Y.-S.,
          <string-name>
            <surname>Lee K.-H. Extracting</surname>
          </string-name>
          <article-title>Logical Structures from</article-title>
          HTML Tables // Computer Standards &amp; Interfaces.
          <year>2008</year>
          . Vol.
          <volume>30</volume>
          , No 5. pp.
        </mixed-citation>
      </ref>
      <ref id="ref8">
        <mixed-citation>
          <string-name>
            <given-names>Nagy G.</given-names>
            ,
            <surname>Embley</surname>
          </string-name>
          <string-name>
            <surname>D.W.</surname>
          </string-name>
          , Seth S.
          <article-title>End-to-End Conversion of HTML Tables for Populating a Relational Database //</article-title>
          <source>In Proc. of the 11th IAPR Int. Workshop on Document Analysis Systems.</source>
        </mixed-citation>
      </ref>
      <ref id="ref9">
        <mixed-citation>
          <string-name>
            <surname>IEEE.</surname>
          </string-name>
          <year>2014</year>
          . pp.
          <fpage>222</fpage>
          -
          <lpage>226</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref10">
        <mixed-citation>
          <string-name>
            <given-names>Pivk A.</given-names>
            ,
            <surname>Cimiano</surname>
          </string-name>
          <string-name>
            <given-names>P.</given-names>
            ,
            <surname>Sure</surname>
          </string-name>
          <string-name>
            <given-names>Y.</given-names>
            ,
            <surname>Gams</surname>
          </string-name>
          <string-name>
            <given-names>M.</given-names>
            ,
            <surname>Rajkovic</surname>
          </string-name>
          <string-name>
            <given-names>V.</given-names>
            ,
            <surname>Studer</surname>
          </string-name>
          <string-name>
            <surname>R</surname>
          </string-name>
          .
          <article-title>Transforming Arbitrary Tables into Logical Form with TARTAR // Data</article-title>
          &amp; Knowledge
          <string-name>
            <surname>Engineering</surname>
          </string-name>
          .
          <year>2007</year>
          . Vol.
          <volume>60</volume>
          , No 3. pp.
          <fpage>567</fpage>
          -
          <lpage>595</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref11">
        <mixed-citation>
          <string-name>
            <given-names>Pivk A.</given-names>
            ,
            <surname>Cimianob</surname>
          </string-name>
          <string-name>
            <given-names>P.</given-names>
            ,
            <surname>Sure</surname>
          </string-name>
          <string-name>
            <surname>Y</surname>
          </string-name>
          . From Tables to Frames // Web Semantics: Science,
          <source>Services and Agents on the World Wide Web</source>
          .
          <year>2005</year>
          . Vol.
        </mixed-citation>
      </ref>
      <ref id="ref12">
        <mixed-citation>
          3, No 2
          <issue>-3</issue>
          . pp.
          <fpage>132</fpage>
          -
          <lpage>146</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref13">
        <mixed-citation>
          <string-name>
            <surname>e Silva</surname>
            <given-names>A.</given-names>
          </string-name>
          ,
          <string-name>
            <surname>Jorge</surname>
            <given-names>A.</given-names>
          </string-name>
          ,
          <string-name>
            <surname>Torgo</surname>
            <given-names>L</given-names>
          </string-name>
          .
          <article-title>Design of an endto-end method to extract information from tables //</article-title>
          <source>International Journal on Document Analysis and Recognition</source>
          .
          <year>2006</year>
          . Vol.
          <volume>8</volume>
          , No 2.
        </mixed-citation>
      </ref>
      <ref id="ref14">
        <mixed-citation>
          pp.
          <fpage>144</fpage>
          -
          <lpage>171</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref15">
        <mixed-citation>
          2015. Vol.
          <volume>42</volume>
          , No 2. pp.
          <fpage>929</fpage>
          -
          <lpage>937</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref16">
        <mixed-citation>
          <string-name>
            <given-names>Tijerino Y.A.</given-names>
            ,
            <surname>Embley</surname>
          </string-name>
          <string-name>
            <given-names>D.W.</given-names>
            ,
            <surname>Lonsdale</surname>
          </string-name>
          <string-name>
            <given-names>D.W.</given-names>
            ,
            <surname>Ding</surname>
          </string-name>
          <string-name>
            <given-names>Y.</given-names>
            ,
            <surname>Nagy</surname>
          </string-name>
          <string-name>
            <surname>G</surname>
          </string-name>
          . Towards Ontology Generation from Tables // World Wide Web:
          <source>Internet and Web Information Systems</source>
          .
          <year>2005</year>
          .
        </mixed-citation>
      </ref>
      <ref id="ref17">
        <mixed-citation>
          Vol.
          <volume>8</volume>
          , No 3. pp.
          <fpage>261</fpage>
          -
          <lpage>285</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref18">
        <mixed-citation>
          <source>Understanding Tables on the Web // In Proc. of the 31st Int. Conf. on Conceptual Modeling.</source>
        </mixed-citation>
      </ref>
      <ref id="ref19">
        <mixed-citation>
          <source>Springer-Verlag. Florence, Italy</source>
          .
          <year>2012</year>
          . pp.
          <fpage>141</fpage>
          -
          <lpage>155</lpage>
          .
        </mixed-citation>
      </ref>
      <ref id="ref20">
        <mixed-citation>
          <string-name>
            <surname>Wang X. Tabular</surname>
            <given-names>Abstraction</given-names>
          </string-name>
          , Editing, and Formatting.
          <source>PhD Thesis</source>
          . University of Waterloo, Waterloo, Ontario, Canada.
          <year>1996</year>
          .
        </mixed-citation>
      </ref>
    </ref-list>
  </back>
</article>