=Paper=
{{Paper
|id=Vol-1536/paper13
|storemode=property
|title=
Комбинированный подход к кросс-языковой идентификации сущностей
(A Combined Approach to Cross-Language Identity Resolution)
|pdfUrl=https://ceur-ws.org/Vol-1536/paper13.pdf
|volume=Vol-1536
|dblpUrl=https://dblp.org/rec/conf/rcdl/ApanovichM15
}}
==
Комбинированный подход к кросс-языковой идентификации сущностей
(A Combined Approach to Cross-Language Identity Resolution)
==
- © .. © . . . .. ! "" , # apanovich@iis.nsk.su mag@iis.nsk.su *+ $( $( $(, $ / *& ) &, '&'& ) & " $ % $ . " $ & '", % $ ) '" (' ) *+ +$ $( $( + LOD %" SpringerLink +$ $( $( + (http://link.springer.com/) -$ ( - !#. $( + ". - + +/$( ( *+ $( $( $(, $ 2 / *& ) &, '&'& ) & . /$ % " +$ + " $( $( *+ * $ ' " % %" SpringerLink $ '", *, ) $ ( - !#. " owl:sameAs. 1 ( * * , $ -$ ( - !# - + , ' $( ) & % $, +& + ( ( . / , ! "" ( - !#) -$ ( *+& -$ ( - +$ $ , * !# [8] $ -$$( +$( , $ $ , *+& 2$( (Linked Open Data, LOD) [10]. 2 +$ $ ( / $( . < , ) +$( 2$( / / + * , $ * +$ *+ * +$ $ $ (1), $ " " $(, $ " %" (2), '" (3) elibrary.ru? = *) $( (4). [1] ) & ) & $ $ " , , / &, / ' + $( + %" %" + +) , / + * , $ $, + &' ( $ -$$ ( - !#. '" % elibrary.ru / $* * +" RDF-$( RKBExplorer.com. 9$ ) ( ) " + +, +$ $( 10-15 , +$ $( + + ", " *" +" + ) " ( , * ( " . # *$( , , ) * / * elibrary.ru ) * ( +$( $ & " ( &", .. , 9.. @(, . - + +/$( ( .. < ( ( ", & %" $ +/ ( $ " "" . XVII ! = SpringerLink $ DAMDID/RCDL’2015 « $ % * . - $(, " », #, 13-16 ) + $( , 2015 " , * 91 / & ( - !#. - $ & $ * ( +$( $(, / $ $ PDF , " $ $ ( ) ". / $ $ & +$ . ) " $, SpringerLink / + " " ) & - & ) & '" + " + , $ ( -$$( +$( $( + * ), ) " . SILK[6]. = , *+$ VIAF DBLP, * (, %" $ ( [5, 7]. / & + , *+$ WorldCat.org [4], – / $ & + & * +$ +$ $( ( + ". [12, , A &' *$( 13]. , ( 9 < G, Q' " + * ( %$( . ) '" WorldCat.org ( -$$( +$( $(, + + $( 2$(. 2$ WorldCat.org $ RDF- + " . ( +$ $( , + " ( / *+ schema.org + + $ $( , BiblioGraph (http://BiblioGraph.net). $ ) ". Q = $ WordlCat.org + , % %" $ / * VIAF, ( , / $ , % $ +$ &, ) +$ $( , - T , . # , '" WorldCat ' * +/$ $ Identities, (http://www.worldcat.org/wcidentities/lccn- ) . " $, + n80162678), /' ) + $( , $&' ( ", $ " + *, + $( $ (/ ' & / +, + . - / ) & ( ( * +) *( .. ) . % $ ) .. ( ) ( % ", ) + # . % * ) ( , (' ( , 1989 2012 $( $(, +$ , $ $ «$ + . # $ .. » ( .. ) & $ 1988 ). ) ", + $( +/, $ $( , ) $ +$ ) '" & $. Q *)$ & +$ $ « $ "$», + $ +$( ( ( / + * & % . ' ' . ) $( " + $( ' ' & " OCLC $ $ ) , " < G (viaf.org), , & &' + ) , , ++$ ) , , -+ ( ( + (, +) (, [9, 14]. ( + (. # , +$ $( +$ $( % ) *+& * $ $ / $ (' , $, +*$ *$. @, , +$ $ / , , , " , +$ , ) +" " . % $ http://viaf.org/viaf/5347110, , *+ ) '" $ $(, , $( $" (, &' " " * $ , $ ) " $'$ ) ", , ) ". ( % $ $ . " ' %" $, * $ SpringerLink. ! * * ) , $( + +$ * (http://viaf.org/viaf/196995053), / ( * , +$ « $ $ ) " ) "» +" $). , $ % -' ( $ («! ( …»), $ ) '" &' " : $ $ / . @ *, + & ) , " " / 92 x +$ ( , / * , /& ) $ % ( - , + + . !# ( , " ), <) , & +/$ $ & $, +$ $ ' ) , / ) " %" + , " (W) ( SpringerLink. / . ( x * ( - !# *+& / $ +$ $ + +) ", , / % %" SpringerLink +. + + & +$ $ + , + . # , ' + ) + 30 0.05. +) ' Google $" +/ ' (Google translator.com). X /" "" $ , (/ * SpringerLink + $ & / . $ + / + ' $ & &' . $, +$ " / = Temperature * SpringForce(d) * $ ( - !#. W SpringForceK ; ' Jaro- Winkler [3]. *, ) = Temperature * ElectricForce(d) / + " +) " W * ElectricForceK; /& , , SpringForce(D) = 2 * log(D); + " " / +) , & ElectricForce(D) = 1 / d2, - % , W – +$ $ ' ". $( ( / . *( $ +$ ', *+ * +/* * +$ ( , - * ( " ) , !#). + &'" " $, * x 2 ) * ) , " $, ' $ +" $. +) , + $ + ( - !#. 3 $ " % x "$ * + & ) '" $ * ( .. , ) $ $. @$ ) " % ", $( $ +, ( .. , ) ( & * , - !#, &' ( $" . +'$ . !+*$ $ $ %" $ ' " ( * %" +/ : ' " elibrary.ru. tf-idf [16] " Q $( SpringerLink, + , / LDA (Latent - $( * + *$" + Dirichlet Allocation) [2]. A " ) ) ( ( LDA, / $ +) * ( )), $ ' ' * ) . ) <* — Z". @/ +*$ $ " $ ( , ( $ + 100 ( 3000 ) ") & ' + , 80% , ) [15]. ) ( $ ) x 2 ( ", $( + $, * * ) $. $ (/ + / ' &' ( $ +$ 70% . , + + # ! . 1 + $ $, NewgroupN, N-% $" ''" + $ “.. ” * + " $. ) %" x 2 /" $ " + SpringerLink.com. $ " +, + + + $( ( - !# $ + $ $ + . ( +$ $ $ % , 93 +$ (" . " ( 5 " / Andrei +$ +$ $ $ P. Ershov, * – Andrei $ +" $. ), + Ershov, 84 - A.P. Ershov, , +/&' " ) , $ * – A.P. Yershov. # +( & $ + $ . $ ) ( "$( ". + % ( 91 & ) , $( ) " . . * $ +. $ / ) / 21 ) , *$ * & ) , $( / ' * +$ .. $ +. + “.. ” $. SpringerLink.com $ " 91 ) . + ! . 1. ) , ) $ /' .. . * , $( $ +, +$ $ ) . " * ) 19 $ $, * ) " , 66 ) " / * , & *$( .. $(. +* $, % ) /* ( $( % * * - !#. % ( (, , * ) * *& * $" + +$ ( ) " . . , / * ) " + ( +$( , ) % ( SpringerLink * ) " ( A *. ' . . , /$( % $ $ (' ( % ( .. . $( % ) , + &' ( A * # " " $, ( ) & +*$ ' " $ $(, " . @/ % $ $, $ & + + ) " + (& *, +& $( ' , / 93%. ) , *+ + $( " ) -) . 4 & '( " (, *" $ $ + $ ) ) $ $ ! $ " / ) " ' !TT ( _ 14-07-00386). + %" ) . * $ $ . = $ + , 94 * Business Conference, San Francisco, June 5, 2012. http://mes-semantics.com/wp- content/uploads/2012/09/Becker-etal-LDIF [1] Apanovich Z.V., Marchuk A.G. Experiments on SemTechSanFrancisco.pdf. using the LOD cloud datasets to enrich the content [11] Steyvers M., Griffiths T. Probabilistic Topic of a scientific knowledge base, P.Klinov and D.Mouromtsev (Eds.) KESW 2013, CCIS 394, Models Handbook of Latent Semantic Analysis. Springer Verlag Berlin Heidelberg 2013, pp. 1-14. 2007. [12] 9( . 9., #( . ., T . . - [2] Blei D. M., Ng A., Jordan M. Latent Dirichlet allocation Journal of Machine Learning Research + $ ( +) (3) 2003 pp. 993-1022. $( / // . # X. .: . ( . – 2008. – @. 6, $. 1. – . [3] Cohen W. W., Ravikumar P. D., Fienberg S. E.: A 3–9. Comparison of String Distance Metrics for Name- [13] <+ . . +$ Matching Tasks. IIWeb 2003, pp. 73-78. / . . <+ , . [4] Godby C. J., Denenberg R. Common Ground: `.@ ", -. . < // Exploring Compatibilities Between the Linked =$ : $ Data Models of the Library of Congress and $ ( , %$ ) OCLC (RCDL’2012) : . XIV . . ., http://www.oclc.org/research/publications/2015/oc *- ", 15–18 . 2012 . – lcresearch-loc-linked-data-2015.html. *- " : +- «X [5] Hickey, T. B., Toves J. A.. 2014. "Managing », 2012. – . 360–369. Ambiguity In VIAF" D-Lib Magazine 20 [14] jq 2.. Z +: !+ (July/August). doi:10.1045/july2014- *+ )" .. hickey.http://www.dlib.org/dlib/july14/hickey/07h . X, .9: , N2, ickey.html. 2000, .115-126. [6] Isele R., Jentzsch A., Bizer Ch. Silk Server - [15] http://snowball.tartarus.org/ Adding missing Links while consuming Linked [16] http://www.codeproject.com/Articles/12098/Term- Data// 1st International Workshop on Consuming frequency-Inverse-document-frequency-implemen Linked Data (COLD 2010), Shanghai, November 2010. [7] Ley M.: DBLP - Some Lessons Learned. PVLDB A Combined Approach to Cross-Language 2(2), 2009, pp. 1493-1500. Identity Resolution [8] Marchuk A.G., Marchuk P.A. Specific features of digital libraries construction with linked content. Zinaida V. Apanovich, Alexander G. Marchuk Proc. of the RCDL’2010 Conf. – 2010. – P. 19–23. This paper describes experiments on the cross – (In Russian). language identity resolution problem that arises when [9] Rogov A.A., Sidorov Yu. Vl. Statistical and the English-language LOD datasets are used to populate Information-calculating Support of the Authorship the content of a Russian scholarly knowledge base. One Attribution of the Literary Works. Computer Data possible approach is the combined use of structured and Analysis and Modeling: Robustness and Computer text data, containing additional information and Intensive Methods: Proc. of the Sixth International facilitating the identity resolution. The dataset of the Conference (September 10-14, 2001, Minsk). Open Archive of the Russian Academy of Sciences and Vol.2: K-S/ Edited by Prof. Dr. S. Aivazian, Prof. SpringerLink e-library are used as test examples. Dr. Yu. Kharin and Prof. Dr. H. Rieder. Minsk: BSU, 2001. – P. 187-192. [10] Schultz A. et al. How to integrate LINKED DATA into your application //Semantic technology & 95